💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

357 lines
10 KiB

  1. /*
  2. * Copyright (C) 2021 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP
  20. #define ANTKEEPER_GENETICS_SEQUENCE_HPP
  21. #include "base.hpp"
  22. #include "codon.hpp"
  23. #include "translation-table.hpp"
  24. #include <algorithm>
  25. #include <iterator>
  26. #include <random>
  27. namespace genetics {
  28. namespace sequence {
  29. /**
  30. * Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three.
  31. *
  32. * @tparam Iterator Sequence iterator type.
  33. */
  34. template <class Iterator>
  35. struct orf
  36. {
  37. /// Iterator to the first base of the start codon.
  38. Iterator start;
  39. /// Iterator to the first base of the stop codon.
  40. Iterator stop;
  41. };
  42. /**
  43. * Exchanges elements between two ranges, starting at a random offset.
  44. *
  45. * @param first1,last1 First range of elements to crossover.
  46. * @param first2 Beginning of the second range of elements to crossover.
  47. * @param g Uniform random bit generator.
  48. * @return Iterator to the start of the crossover in the second range.
  49. */
  50. template <class ForwardIt1, class ForwardIt2, class URBG>
  51. ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
  52. /**
  53. * Exchanges elements between two ranges multiple times, starting at a random offset each time.
  54. *
  55. * @param first1,last1 First range of elements to crossover.
  56. * @param first2 Beginning of the second range of elements to crossover.
  57. * @param count Number of times to crossover.
  58. * @param g Uniform random bit generator.
  59. */
  60. template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
  61. void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
  62. /**
  63. * Searches a sequence for an open reading frame (ORF).
  64. *
  65. * @param first,last Range of elements to search.
  66. * @param table Genetic code translation table.
  67. * @return First ORF in the sequence, or `{last, last}` if no ORF was found.
  68. */
  69. template <class ForwardIt>
  70. orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table);
  71. /**
  72. * Applies the given function to a randomly selected element in a range.
  73. *
  74. * @param first,last Range of elements to mutate.
  75. * @param unary_op Unary operation function object that will be applied.
  76. * @param g Uniform random bit generator.
  77. * @return Iterator to the mutated element.
  78. */
  79. template <class ForwardIt, class UnaryOperation, class URBG>
  80. ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
  81. /**
  82. * Applies the given function to a random selection of elements in a range.
  83. *
  84. * @param first,last Range of elements to mutate.
  85. * @param count Number of elements to mutate.
  86. * @param unary_op Unary operation function object that will be applied.
  87. * @param g Uniform random bit generator.
  88. */
  89. template <class ForwardIt, class Size, class UnaryOperation, class URBG>
  90. void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
  91. /**
  92. * Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols.
  93. *
  94. * @param first,last Sequence of IUPAC base symbols to search.
  95. * @param s_first,s_last Search string of IUPAC degenerate base symbols.
  96. * @param stride Distance between consecutive searches.
  97. * @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned.
  98. */
  99. template <class ForwardIt1, class ForwardIt2>
  100. ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride);
  101. /**
  102. * Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`.
  103. *
  104. * @param first,last Range of elements to transcribe.
  105. * @param d_first Beginning of the destination range.
  106. * @return Output iterator to the element past the last element transcribed.
  107. */
  108. template <class InputIt, class OutputIt>
  109. OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
  110. /**
  111. * Translates a sequence of codons into amino acids.
  112. *
  113. * @param first,last Open reading frame.
  114. * @param d_first Beginning of destination range.
  115. * @param table Genetic code translation table.
  116. * @return Output iterator to the element past the last element translated.
  117. */
  118. template <class InputIt, class OutputIt>
  119. OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table);
  120. namespace dna
  121. {
  122. /**
  123. * Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols.
  124. *
  125. * @param first,last Range of elements to complement.
  126. * @param d_first Beginning of the destination range.
  127. * @return Output iterator to the element past the last element complemented.
  128. */
  129. template <class InputIt, class OutputIt>
  130. OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
  131. }
  132. namespace rna
  133. {
  134. /**
  135. * Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols.
  136. *
  137. * @param first,last Range of elements to complement.
  138. * @param d_first Beginning of the destination range.
  139. * @return Output iterator to the element past the last element complemented.
  140. */
  141. template <class InputIt, class OutputIt>
  142. OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
  143. }
  144. template <class ForwardIt1, class ForwardIt2, class URBG>
  145. ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
  146. {
  147. typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
  148. std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
  149. difference_t pos = distribution(g);
  150. std::advance(first1, pos);
  151. std::advance(first2, pos);
  152. std::swap_ranges(first1, last1, first2);
  153. return first2;
  154. }
  155. template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
  156. void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
  157. {
  158. typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
  159. std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
  160. ForwardIt1 crossover1, crossover2;
  161. while (count)
  162. {
  163. crossover1 = first1;
  164. crossover2 = first2;
  165. difference_t pos = distribution(g);
  166. std::advance(crossover1, pos);
  167. std::advance(crossover2, pos);
  168. std::swap_ranges(crossover1, last1, crossover2);
  169. --count;
  170. }
  171. }
  172. template <class ForwardIt>
  173. orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table)
  174. {
  175. ForwardIt second;
  176. ForwardIt third;
  177. orf<ForwardIt> result;
  178. auto distance = std::distance(first, last);
  179. if (distance >= 3)
  180. {
  181. second = first;
  182. ++second;
  183. third = second;
  184. ++third;
  185. do
  186. {
  187. if (codon::is_start(*first, *second, *third, table.starts))
  188. {
  189. result.start = first;
  190. distance -= 3;
  191. break;
  192. }
  193. first = second;
  194. second = third;
  195. ++third;
  196. --distance;
  197. }
  198. while (third != last);
  199. }
  200. for (; distance >= 3; distance -= 3)
  201. {
  202. first = ++third;
  203. second = ++third;
  204. ++third;
  205. if (codon::is_stop(*first, *second, *third, table.aas))
  206. {
  207. result.stop = first;
  208. return result;
  209. }
  210. }
  211. return {last, last};
  212. }
  213. template <class ForwardIt, class UnaryOperation, class URBG>
  214. ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
  215. {
  216. typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
  217. if (first == last)
  218. return first;
  219. std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
  220. std::advance(first, distribution(g));
  221. *first = unary_op(*first);
  222. return first;
  223. }
  224. template <class ForwardIt, class Size, class UnaryOperation, class URBG>
  225. void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
  226. {
  227. typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
  228. if (first == last)
  229. return first;
  230. std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
  231. ForwardIt mutation;
  232. while (count)
  233. {
  234. mutation = first;
  235. std::advance(mutation, distribution(g));
  236. *mutation = unary_op(*mutation);
  237. --count;
  238. }
  239. }
  240. template <class ForwardIt1, class ForwardIt2>
  241. ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride)
  242. {
  243. for (auto distance = std::distance(first, last); distance > 0; distance -= stride)
  244. {
  245. ForwardIt1 it = first;
  246. for (ForwardIt2 s_it = s_first; ; ++it, ++s_it)
  247. {
  248. if (s_it == s_last)
  249. return first;
  250. if (it == last)
  251. return last;
  252. if (!base::compare(*it, *s_it))
  253. break;
  254. }
  255. if (distance > stride)
  256. std::advance(first, stride);
  257. }
  258. return last;
  259. }
  260. template <class InputIt, class OutputIt>
  261. inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
  262. {
  263. return std::transform(first, last, d_first, base::transcribe);
  264. }
  265. template <class InputIt, class OutputIt>
  266. OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table)
  267. {
  268. auto length = std::distance(first, last);
  269. if (length >= 3)
  270. {
  271. InputIt second = first;
  272. ++second;
  273. InputIt third = second;
  274. ++third;
  275. *(d_first++) = codon::translate(*first, *second, *third, table.starts);
  276. for (length -= 3; length >= 3; length -= 3)
  277. {
  278. first = ++third;
  279. second = ++third;
  280. ++third;
  281. *(d_first++) = codon::translate(*first, *second, *third, table.aas);
  282. }
  283. }
  284. return d_first;
  285. }
  286. namespace dna
  287. {
  288. template <class InputIt, class OutputIt>
  289. inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
  290. {
  291. return std::transform(first, last, d_first, base::dna::complement);
  292. }
  293. }
  294. namespace rna
  295. {
  296. template <class InputIt, class OutputIt>
  297. inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
  298. {
  299. return std::transform(first, last, d_first, base::rna::complement);
  300. }
  301. }
  302. } // namespace sequence
  303. } // namespace genetics
  304. #endif // ANTKEEPER_GENETICS_SEQUENCE_HPP