/* * Copyright (C) 2020 Christopher J. Howard * * This file is part of Antkeeper source code. * * Antkeeper source code is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Antkeeper source code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Antkeeper source code. If not, see . */ #ifndef ANTKEEPER_DNA_TRANSLATE_HPP #define ANTKEEPER_DNA_TRANSLATE_HPP #include #include #include namespace dna { /// DNA translation table for standard genetic code. constexpr char* standard_code = "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" // Amino acid "---M------**--*----M---------------M----------------------------" // Start/stop "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" // Base 1 "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" // Base 2 "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"; // Base 3 /** * Translates codons into amino acids until a stop codon is read or the end of the sequence is reached. * * @param first,last Range of codons to translate. * @param t_first Beginning of the translation table. * @param d_first Beginning of the destination range. * @return Output iterator to the amino acid in the destination range, one past the last amino acid translated. */ template OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first); /** * Finds the first start codon in a sequence of bases. * * @param first,last Range of bases to search. * @param t_first Beginning of the translation table. * @return Iterator to the first base of the first start codon in the sequence, or @p last if no start codon is found. */ template ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first); /** * Finds the first stop codon in a sequence of codons. * * @param first,last Range of codons to search. * @param t_first Beginning of the translation table. * @return Iterator to the first base of the first stop codon in the sequence, or @p last if no stop codon is found. */ template ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first); template ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first) { ForwardIt1 second = first; ++second; ForwardIt1 third = second; ++third; ForwardIt2 start_first = t_first; std::advance(start_first, 64); ForwardIt2 base1_first = start_first; std::advance(base1_first, 64); ForwardIt2 base2_first = base1_first; std::advance(base2_first, 64); ForwardIt2 base3_first = base2_first; std::advance(base3_first, 64); if (first != last && second != last) { while (third != last) { ForwardIt2 start = start_first; ForwardIt2 base1 = base1_first; ForwardIt2 base2 = base2_first; ForwardIt2 base3 = base3_first; for (std::uint_fast8_t i = 64; i; --i) { if (*start != '-' && *start != '*' && *first == *base1 && *second == *base2 && *third == *base3) return first; ++start; ++base1; ++base2; ++base3; } first = second; second = third; ++third; } } return last; } template ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first) { ForwardIt1 second = first; ++second; ForwardIt1 third = second; ++third; ForwardIt2 base1_first = t_first; std::advance(base1_first, 128); ForwardIt2 base2_first = base1_first; std::advance(base2_first, 64); ForwardIt2 base3_first = base2_first; std::advance(base3_first, 64); while (first != last && second != last && third != last) { ForwardIt2 aa = t_first; ForwardIt2 base1 = base1_first; ForwardIt2 base2 = base2_first; ForwardIt2 base3 = base3_first; for (std::uint_fast8_t i = 64; i; --i) { if (*aa == '*' && *first == *base1 && *second == *base2 && *third == *base3) return first; ++aa; ++base1; ++base2; ++base3; } first = ++third; second = ++third; ++third; } return last; } template OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first) { InputIt1 second = first; ++second; InputIt1 third = second; ++third; InputIt2 base1_first = t_first; std::advance(base1_first, 128); InputIt2 base2_first = base1_first; std::advance(base2_first, 64); InputIt2 base3_first = base2_first; std::advance(base3_first, 64); while (first != last && second != last && third != last) { InputIt2 aa = t_first; InputIt2 base1 = base1_first; InputIt2 base2 = base2_first; InputIt2 base3 = base3_first; for (std::uint_fast8_t i = 64; i; --i) { if (*first == *base1 && *second == *base2 && *third == *base3) { if (*aa == '*') return d_first; *(d_first++) = *aa; break; } ++aa; ++base1; ++base2; ++base3; } first = ++third; second = ++third; ++third; } return d_first; } } // namespace dna #endif // ANTKEEPER_DNA_TRANSLATE_HPP