.. _program_listing_file_src_cpp_factorizer.hpp: Program Listing for File factorizer.hpp ======================================= |exhale_lsh| :ref:`Return to documentation for file ` (``src/cpp/factorizer.hpp``) .. |exhale_lsh| unicode:: U+021B0 .. UPWARDS ARROW WITH TIP LEFTWARDS .. code-block:: cpp #pragma once #include #include #include #include #include namespace noLZSS { // Constants and utility functions for reverse complement handling constexpr uint64_t RC_MASK = (1ULL << 63); struct PreparedSequenceResult { std::string prepared_string; size_t original_length; std::vector sentinel_positions; }; struct FactorFileHeader { char magic[8] = {'n', 'o', 'L', 'Z', 'S', 'S', 'v', '1'}; uint64_t num_factors; uint64_t num_sequences; uint64_t num_sentinels; uint64_t header_size; }; inline bool is_rc(uint64_t ref) { return (ref & RC_MASK) != 0; } inline uint64_t rc_end(uint64_t ref) { return (ref & ~RC_MASK); } // Utility functions for DNA sequence preparation PreparedSequenceResult prepare_multiple_dna_sequences_w_rc(const std::vector& sequences); PreparedSequenceResult prepare_multiple_dna_sequences_no_rc(const std::vector& sequences); struct Factor { uint64_t start; uint64_t length; uint64_t ref; }; // Core factorization functions std::vector factorize(std::string_view text); std::vector factorize_file(const std::string& path, size_t reserve_hint = 0); // Counting functions size_t count_factors(std::string_view text); size_t count_factors_file(const std::string& path); // Binary output size_t write_factors_binary_file(const std::string& in_path, const std::string& out_path); // DNA-aware factorization functions with reverse complement support std::vector factorize_dna_w_rc(std::string_view text); std::vector factorize_file_dna_w_rc(const std::string& path, size_t reserve_hint = 0); size_t count_factors_dna_w_rc(std::string_view text); size_t count_factors_file_dna_w_rc(const std::string& path); size_t write_factors_binary_file_dna_w_rc(const std::string& in_path, const std::string& out_path); // Template functions for advanced usage template size_t factorize_stream_dna_w_rc(std::string_view text, Sink&& sink); template size_t factorize_file_stream_dna_w_rc(const std::string& path, Sink&& sink); // Multiple DNA sequences factorization functions with reverse complement support std::vector factorize_multiple_dna_w_rc(std::string_view text); std::vector factorize_file_multiple_dna_w_rc(const std::string& path, size_t reserve_hint = 0); size_t count_factors_multiple_dna_w_rc(std::string_view text); size_t count_factors_file_multiple_dna_w_rc(const std::string& path); size_t write_factors_binary_file_multiple_dna_w_rc(const std::string& in_path, const std::string& out_path); // Template functions for advanced usage with multiple sequences template size_t factorize_stream_multiple_dna_w_rc(std::string_view text, Sink&& sink); template size_t factorize_file_stream_multiple_dna_w_rc(const std::string& path, Sink&& sink); } // namespace noLZSS