/* * Copyright (C) 2021 Christopher J. Howard * * This file is part of Antkeeper source code. * * Antkeeper source code is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Antkeeper source code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Antkeeper source code. If not, see . */ #ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP #define ANTKEEPER_GENETICS_SEQUENCE_HPP #include "base.hpp" #include "codon.hpp" #include #include #include namespace genetics { /// Functions and structures related to sequences of IUPAC degenerate base symbols. namespace sequence { /** * Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three. * * @tparam Iterator Sequence iterator type. */ template struct orf { /// Iterator to the first base of the start codon. Iterator start; /// Iterator to the first base of the stop codon. Iterator stop; }; /** * Exchanges elements between two ranges, starting at a random offset. * * @param first1,last1 First range of elements to crossover. * @param first2 Beginning of the second range of elements to crossover. * @param g Uniform random bit generator. * @return Iterator to the start of the crossover in the second range. */ template ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); /** * Exchanges elements between two ranges multiple times, starting at a random offset each time. * * @param first1,last1 First range of elements to crossover. * @param first2 Beginning of the second range of elements to crossover. * @param count Number of times to crossover. * @param g Uniform random bit generator. */ template void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); /** * Searches a sequence for an open reading frame (ORF). * * @param first,last Range of elements to search. * @param table Genetic code translation table. * @return First ORF in the sequence, or `{last, last}` if no ORF was found. */ template orf find_orf(ForwardIt first, ForwardIt last, const codon::table& table); /** * Applies the given function to a randomly selected element in a range. * * @param first,last Range of elements to mutate. * @param unary_op Unary operation function object that will be applied. * @param g Uniform random bit generator. * @return Iterator to the mutated element. */ template ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); /** * Applies the given function to a random selection of elements in a range. * * @param first,last Range of elements to mutate. * @param count Number of elements to mutate. * @param unary_op Unary operation function object that will be applied. * @param g Uniform random bit generator. */ template void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); /** * Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols. * * @param first,last Sequence of IUPAC base symbols to search. * @param s_first,s_last Search string of IUPAC degenerate base symbols. * @param stride Distance between consecutive searches. * @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned. */ template ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride); /** * Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. * * @param first,last Range of elements to transcribe. * @param d_first Beginning of the destination range. * @return Output iterator to the element past the last element transcribed. */ template OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); /** * Translates a sequence of codons into amino acids. * * @param first,last Open reading frame. * @param d_first Beginning of destination range. * @param table Genetic code translation table. * @return Output iterator to the element past the last element translated. */ template OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const codon::table& table); /// Functions which operate on sequences of IUPAC degenerate **DNA** base symbols. namespace dna { /** * Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols. * * @param first,last Range of elements to complement. * @param d_first Beginning of the destination range. * @return Output iterator to the element past the last element complemented. */ template OutputIt complement(InputIt first, InputIt last, OutputIt d_first); } /// Functions which operate on sequences of IUPAC degenerate **RNA** base symbols. namespace rna { /** * Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols. * * @param first,last Range of elements to complement. * @param d_first Beginning of the destination range. * @return Output iterator to the element past the last element complemented. */ template OutputIt complement(InputIt first, InputIt last, OutputIt d_first); } template ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) { typedef typename std::iterator_traits::difference_type difference_t; std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); difference_t pos = distribution(g); std::advance(first1, pos); std::advance(first2, pos); std::swap_ranges(first1, last1, first2); return first2; } template void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) { typedef typename std::iterator_traits::difference_type difference_t; std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); ForwardIt1 crossover1, crossover2; while (count) { crossover1 = first1; crossover2 = first2; difference_t pos = distribution(g); std::advance(crossover1, pos); std::advance(crossover2, pos); std::swap_ranges(crossover1, last1, crossover2); --count; } } template orf find_orf(ForwardIt first, ForwardIt last, const codon::table& table) { ForwardIt second; ForwardIt third; orf result; auto distance = std::distance(first, last); if (distance >= 3) { second = first; ++second; third = second; ++third; do { if (codon::is_start(*first, *second, *third, table.starts)) { result.start = first; distance -= 3; break; } first = second; second = third; ++third; --distance; } while (third != last); } for (; distance >= 3; distance -= 3) { first = ++third; second = ++third; ++third; if (codon::is_stop(*first, *second, *third, table.aas)) { result.stop = first; return result; } } return {last, last}; } template ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) { typedef typename std::iterator_traits::difference_type difference_t; if (first == last) return first; std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); std::advance(first, distribution(g)); *first = unary_op(*first); return first; } template void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) { typedef typename std::iterator_traits::difference_type difference_t; if (first == last) return first; std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); ForwardIt mutation; while (count) { mutation = first; std::advance(mutation, distribution(g)); *mutation = unary_op(*mutation); --count; } } template ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride) { for (auto distance = std::distance(first, last); distance > 0; distance -= stride) { ForwardIt1 it = first; for (ForwardIt2 s_it = s_first; ; ++it, ++s_it) { if (s_it == s_last) return first; if (it == last) return last; if (!base::compare(*it, *s_it)) break; } if (distance > stride) std::advance(first, stride); } return last; } template inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) { return std::transform(first, last, d_first, base::transcribe); } template OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const codon::table& table) { auto length = std::distance(first, last); if (length >= 3) { InputIt second = first; ++second; InputIt third = second; ++third; *(d_first++) = codon::translate(*first, *second, *third, table.starts); for (length -= 3; length >= 3; length -= 3) { first = ++third; second = ++third; ++third; *(d_first++) = codon::translate(*first, *second, *third, table.aas); } } return d_first; } namespace dna { template inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) { return std::transform(first, last, d_first, base::dna::complement); } } namespace rna { template inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) { return std::transform(first, last, d_first, base::rna::complement); } } } // namespace sequence } // namespace genetics #endif // ANTKEEPER_GENETICS_SEQUENCE_HPP