💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.7 KiB

  1. /*
  2. * Copyright (C) 2020 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_TRANSLATE_HPP
  20. #define ANTKEEPER_TRANSLATE_HPP
  21. #include <algorithm>
  22. #include <cstdint>
  23. #include <iterator>
  24. namespace dna
  25. {
  26. constexpr char* standard_code =
  27. "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" // Amino acid
  28. "---M------**--*----M---------------M----------------------------" // Start/stop
  29. "TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" // Base 1
  30. "TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" // Base 2
  31. "TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"; // Base 3
  32. /**
  33. * Translates codons into amino acids until a stop codon is read or the end of the sequence is reached.
  34. *
  35. * @param first,last Range of codons to translate.
  36. * @param t_first Beginning of the translation table.
  37. * @param d_first Beginning of the destination range.
  38. * @return Output iterator to the amino acid in the destination range, one past the last amino acid translated.
  39. */
  40. template <class InputIt1, class InputIt2, class OutputIt>
  41. OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first)
  42. {
  43. InputIt1 second = first;
  44. ++second;
  45. InputIt1 third = second;
  46. ++third;
  47. InputIt2 base1_first = t_first;
  48. std::advance(base1_first, 128);
  49. InputIt2 base2_first = base1_first;
  50. std::advance(base2_first, 64);
  51. InputIt2 base3_first = base2_first;
  52. std::advance(base3_first, 64);
  53. while (first != last && second != last && third != last)
  54. {
  55. InputIt2 aa = t_first;
  56. InputIt2 base1 = base1_first;
  57. InputIt2 base2 = base2_first;
  58. InputIt2 base3 = base3_first;
  59. for (std::uint_fast8_t i = 0; i < 64; ++i)
  60. {
  61. if (*first == *base1 && *second == *base2 && *third == *base3)
  62. {
  63. if (*aa == '*')
  64. return d_first;
  65. *(d_first++) = *aa;
  66. break;
  67. }
  68. ++aa;
  69. ++base1;
  70. ++base2;
  71. ++base3;
  72. }
  73. first = ++third;
  74. second = ++third;
  75. ++third;
  76. }
  77. return d_first;
  78. }
  79. } // namespace dna
  80. #endif // ANTKEEPER_TRANSLATE_HPP