From b6947b38425c46945e3e149623243755c8df855d Mon Sep 17 00:00:00 2001 From: "C. J. Howard" Date: Sat, 19 Dec 2020 22:37:58 +0800 Subject: [PATCH] Add functions for finding start and stop codons --- src/game/genetics/translate.hpp | 113 +++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/src/game/genetics/translate.hpp b/src/game/genetics/translate.hpp index 44987a9..327fa52 100644 --- a/src/game/genetics/translate.hpp +++ b/src/game/genetics/translate.hpp @@ -27,6 +27,7 @@ namespace dna { +/// Standard genetic code translation table. constexpr char* standard_code = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" // Amino acid "---M------**--*----M---------------M----------------------------" // Start/stop @@ -42,6 +43,116 @@ constexpr char* standard_code = * @param d_first Beginning of the destination range. * @return Output iterator to the amino acid in the destination range, one past the last amino acid translated. */ +template +OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first); + +/** + * Finds the first start codon in a sequence of bases. + * + * @param first,last Range of bases to search. + * @param t_first Beginning of the translation table. + * @return Iterator to the first base of the first start codon in the sequence, or @p last if no start codon is found. + */ +template +ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first); + +/** + * Finds the first stop codon in a sequence of codons. + * + * @param first,last Range of codons to search. + * @param t_first Beginning of the translation table. + * @return Iterator to the first base of the first stop codon in the sequence, or @p last if no stop codon is found. + */ +template +ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first); + +template +ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first) +{ + ForwardIt1 second = first; + ++second; + ForwardIt1 third = second; + ++third; + + ForwardIt2 start_first = t_first; + std::advance(start_first, 64); + ForwardIt2 base1_first = start_first; + std::advance(base1_first, 64); + ForwardIt2 base2_first = base1_first; + std::advance(base2_first, 64); + ForwardIt2 base3_first = base2_first; + std::advance(base3_first, 64); + + if (first != last && second != last) + { + while (third != last) + { + ForwardIt2 start = start_first; + ForwardIt2 base1 = base1_first; + ForwardIt2 base2 = base2_first; + ForwardIt2 base3 = base3_first; + + for (std::uint_fast8_t i = 64; i; --i) + { + if (*start != '-' && *start != '*' && *first == *base1 && *second == *base2 && *third == *base3) + return first; + + ++start; + ++base1; + ++base2; + ++base3; + } + + first = second; + second = third; + ++third; + } + } + + return last; +} + +template +ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first) +{ + ForwardIt1 second = first; + ++second; + ForwardIt1 third = second; + ++third; + + ForwardIt2 base1_first = t_first; + std::advance(base1_first, 128); + ForwardIt2 base2_first = base1_first; + std::advance(base2_first, 64); + ForwardIt2 base3_first = base2_first; + std::advance(base3_first, 64); + + while (first != last && second != last && third != last) + { + ForwardIt2 aa = t_first; + ForwardIt2 base1 = base1_first; + ForwardIt2 base2 = base2_first; + ForwardIt2 base3 = base3_first; + + for (std::uint_fast8_t i = 64; i; --i) + { + if (*aa == '*' && *first == *base1 && *second == *base2 && *third == *base3) + return first; + + ++aa; + ++base1; + ++base2; + ++base3; + } + + first = ++third; + second = ++third; + ++third; + } + + return last; +} + template OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first) { @@ -64,7 +175,7 @@ OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_f InputIt2 base2 = base2_first; InputIt2 base3 = base3_first; - for (std::uint_fast8_t i = 0; i < 64; ++i) + for (std::uint_fast8_t i = 64; i; --i) { if (*first == *base1 && *second == *base2 && *third == *base3) {