💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
3.7 KiB

  1. /*
  2. * Copyright (C) 2020 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_DNA_FRAME_HPP
  20. #define ANTKEEPER_DNA_FRAME_HPP
  21. #include <iterator>
  22. namespace dna {
  23. /**
  24. * Finds the first start codon in a sequence.
  25. *
  26. * @param first,last Range of elements to search.
  27. * @param n Number of elements per codon.
  28. * @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
  29. * @return Iterator to the first element in the start codon, or @p last if no start codon was found.
  30. */
  31. template <class InputIt, class Size, class BinaryPredicate>
  32. InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p)
  33. {
  34. auto length = std::distance(first, last);
  35. if (length >= n)
  36. {
  37. InputIt next = first;
  38. std::advance(next, n);
  39. do
  40. {
  41. if (p(first, next))
  42. return first;
  43. ++first;
  44. ++next;
  45. --length;
  46. }
  47. while (length >= n);
  48. }
  49. return last;
  50. }
  51. /**
  52. * Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p.
  53. *
  54. * @param first,last Range of elements to search.
  55. * @param n Number of elements in the sequence.
  56. * @param stride Number of elements between searches.
  57. * @param p
  58. */
  59. template <class InputIt, class Size, class BinaryPredicate>
  60. InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p)
  61. {
  62. if (auto length = std::distance(first, last); length >= n)
  63. {
  64. Size offset = n + stride;
  65. InputIt next = first;
  66. std::advance(next, n);
  67. do
  68. {
  69. if (p(first, next))
  70. return first;
  71. if (length < offset)
  72. break;
  73. std::advance(first, stride);
  74. std::advance(next, stride);
  75. length -= offset;
  76. }
  77. while (1);
  78. }
  79. return last;
  80. }
  81. /**
  82. * Finds the first stop codon in a sequence.
  83. *
  84. * @param first,last Range of elements to search.
  85. * @param n Number of elements per codon.
  86. * @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
  87. * @return Iterator to the first element in the stop codon, or @p last if no stop codon was found.
  88. */
  89. template <class InputIt, class Size, class BinaryPredicate>
  90. InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p)
  91. {
  92. for (auto length = std::distance(first, last); length >= n; length -= n)
  93. {
  94. InputIt next = first;
  95. std::advance(next, n);
  96. if (p(first, next))
  97. return first;
  98. first = next;
  99. }
  100. return last;
  101. }
  102. /**
  103. * Finds the first open reading frame (ORF) in a range of elements.
  104. *
  105. * @param[in,out] first Iterator to the beginning of the sequence, which will point to th
  106. *
  107. * @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
  108. * @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
  109. */
  110. template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2>
  111. void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p)
  112. {
  113. first = find_start(first, last, n, start_p);
  114. if (first != last)
  115. last = find_stop(first, last, n, stop_p);
  116. }
  117. } // namespace dna
  118. #endif // ANTKEEPER_DNA_FRAME_HPP