Program Listing for File fasta_processor.hpp

Return to documentation for file (src/cpp/fasta_processor.hpp)

#pragma once
#include <string>
#include <vector>
#include <fstream>
#include <cctype>
#include "factorizer.hpp"

namespace noLZSS {

struct FastaProcessResult {
    std::string sequence;
    size_t num_sequences;
    std::vector<std::string> sequence_ids;
    std::vector<size_t> sequence_lengths;
    std::vector<size_t> sequence_positions;
};

struct FastaParseResult {
    std::vector<std::string> sequences;
    std::vector<std::string> sequence_ids;
};

struct FastaFactorizationResult {
    std::vector<Factor> factors;
    std::vector<uint64_t> sentinel_factor_indices;
};

FastaProcessResult process_nucleotide_fasta(const std::string& fasta_path);

FastaProcessResult process_amino_acid_fasta(const std::string& fasta_path);

FastaFactorizationResult factorize_fasta_multiple_dna_w_rc(const std::string& fasta_path);

FastaFactorizationResult factorize_fasta_multiple_dna_no_rc(const std::string& fasta_path);

size_t write_factors_binary_file_fasta_multiple_dna_w_rc(const std::string& fasta_path, const std::string& out_path);

size_t write_factors_binary_file_fasta_multiple_dna_no_rc(const std::string& fasta_path, const std::string& out_path);

} // namespace noLZSS