💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

205 lines
5.5 KiB

  1. /*
  2. * Copyright (C) 2020 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_DNA_TRANSLATE_HPP
  20. #define ANTKEEPER_DNA_TRANSLATE_HPP
  21. #include <algorithm>
  22. #include <cstdint>
  23. #include <iterator>
  24. namespace dna
  25. {
  26. /// DNA translation table for standard genetic code.
  27. constexpr char* standard_code =
  28. "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" // Amino acid
  29. "---M------**--*----M---------------M----------------------------" // Start/stop
  30. "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" // Base 1
  31. "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" // Base 2
  32. "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"; // Base 3
  33. /**
  34. * Translates codons into amino acids until a stop codon is read or the end of the sequence is reached.
  35. *
  36. * @param first,last Range of codons to translate.
  37. * @param t_first Beginning of the translation table.
  38. * @param d_first Beginning of the destination range.
  39. * @return Output iterator to the amino acid in the destination range, one past the last amino acid translated.
  40. */
  41. template <class InputIt1, class InputIt2, class OutputIt>
  42. OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first);
  43. /**
  44. * Finds the first start codon in a sequence of bases.
  45. *
  46. * @param first,last Range of bases to search.
  47. * @param t_first Beginning of the translation table.
  48. * @return Iterator to the first base of the first start codon in the sequence, or @p last if no start codon is found.
  49. */
  50. template <class ForwardIt1, class ForwardIt2>
  51. ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first);
  52. /**
  53. * Finds the first stop codon in a sequence of codons.
  54. *
  55. * @param first,last Range of codons to search.
  56. * @param t_first Beginning of the translation table.
  57. * @return Iterator to the first base of the first stop codon in the sequence, or @p last if no stop codon is found.
  58. */
  59. template <class ForwardIt1, class ForwardIt2>
  60. ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first);
  61. template <class ForwardIt1, class ForwardIt2>
  62. ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first)
  63. {
  64. ForwardIt1 second = first;
  65. ++second;
  66. ForwardIt1 third = second;
  67. ++third;
  68. ForwardIt2 start_first = t_first;
  69. std::advance(start_first, 64);
  70. ForwardIt2 base1_first = start_first;
  71. std::advance(base1_first, 64);
  72. ForwardIt2 base2_first = base1_first;
  73. std::advance(base2_first, 64);
  74. ForwardIt2 base3_first = base2_first;
  75. std::advance(base3_first, 64);
  76. if (first != last && second != last)
  77. {
  78. while (third != last)
  79. {
  80. ForwardIt2 start = start_first;
  81. ForwardIt2 base1 = base1_first;
  82. ForwardIt2 base2 = base2_first;
  83. ForwardIt2 base3 = base3_first;
  84. for (std::uint_fast8_t i = 64; i; --i)
  85. {
  86. if (*start != '-' && *start != '*' && *first == *base1 && *second == *base2 && *third == *base3)
  87. return first;
  88. ++start;
  89. ++base1;
  90. ++base2;
  91. ++base3;
  92. }
  93. first = second;
  94. second = third;
  95. ++third;
  96. }
  97. }
  98. return last;
  99. }
  100. template <class ForwardIt1, class ForwardIt2>
  101. ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first)
  102. {
  103. ForwardIt1 second = first;
  104. ++second;
  105. ForwardIt1 third = second;
  106. ++third;
  107. ForwardIt2 base1_first = t_first;
  108. std::advance(base1_first, 128);
  109. ForwardIt2 base2_first = base1_first;
  110. std::advance(base2_first, 64);
  111. ForwardIt2 base3_first = base2_first;
  112. std::advance(base3_first, 64);
  113. while (first != last && second != last && third != last)
  114. {
  115. ForwardIt2 aa = t_first;
  116. ForwardIt2 base1 = base1_first;
  117. ForwardIt2 base2 = base2_first;
  118. ForwardIt2 base3 = base3_first;
  119. for (std::uint_fast8_t i = 64; i; --i)
  120. {
  121. if (*aa == '*' && *first == *base1 && *second == *base2 && *third == *base3)
  122. return first;
  123. ++aa;
  124. ++base1;
  125. ++base2;
  126. ++base3;
  127. }
  128. first = ++third;
  129. second = ++third;
  130. ++third;
  131. }
  132. return last;
  133. }
  134. template <class InputIt1, class InputIt2, class OutputIt>
  135. OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first)
  136. {
  137. InputIt1 second = first;
  138. ++second;
  139. InputIt1 third = second;
  140. ++third;
  141. InputIt2 base1_first = t_first;
  142. std::advance(base1_first, 128);
  143. InputIt2 base2_first = base1_first;
  144. std::advance(base2_first, 64);
  145. InputIt2 base3_first = base2_first;
  146. std::advance(base3_first, 64);
  147. while (first != last && second != last && third != last)
  148. {
  149. InputIt2 aa = t_first;
  150. InputIt2 base1 = base1_first;
  151. InputIt2 base2 = base2_first;
  152. InputIt2 base3 = base3_first;
  153. for (std::uint_fast8_t i = 64; i; --i)
  154. {
  155. if (*first == *base1 && *second == *base2 && *third == *base3)
  156. {
  157. if (*aa == '*')
  158. return d_first;
  159. *(d_first++) = *aa;
  160. break;
  161. }
  162. ++aa;
  163. ++base1;
  164. ++base2;
  165. ++base3;
  166. }
  167. first = ++third;
  168. second = ++third;
  169. ++third;
  170. }
  171. return d_first;
  172. }
  173. } // namespace dna
  174. #endif // ANTKEEPER_DNA_TRANSLATE_HPP