@ -0,0 +1,86 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#include "codon.hpp" | |||||
namespace genetics { | |||||
namespace codon { | |||||
/** | |||||
* Returns the index of a nucleobase for use with a translation table. | |||||
* | |||||
* @param base IUPAC code of nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @return Index of the nucleobase, or a negative value if a non-standard nucleobase was supplied. | |||||
*/ | |||||
static inline int base_index(char base) | |||||
{ | |||||
switch (base) | |||||
{ | |||||
case 'U': | |||||
case 'T': | |||||
return 0; | |||||
case 'C': | |||||
return 1; | |||||
case 'A': | |||||
return 2; | |||||
case 'G': | |||||
return 3; | |||||
} | |||||
return ~3; | |||||
} | |||||
/** | |||||
* Returns the index of a codon for use with a translation table. | |||||
* | |||||
* @param base1 IUPAC code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base2 IUPAC code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base3 IUPAC code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @return Index of codon, or a negative value if a non-standard nucleobase was supplied. | |||||
*/ | |||||
static inline int codon_index(char base1, char base2, char base3) | |||||
{ | |||||
int i = base_index(base1); | |||||
int j = base_index(base2); | |||||
int k = base_index(base3); | |||||
return (i << 4) | (j << 2) | k; | |||||
} | |||||
inline char translate(char base1, char base2, char base3, const char* aas) | |||||
{ | |||||
int index = codon_index(base1, base2, base3); | |||||
if (index < 0) | |||||
return '-'; | |||||
return aas[index]; | |||||
} | |||||
bool is_start(char base1, char base2, char base3, const char* starts) | |||||
{ | |||||
char aa = translate(base1, base2, base3, starts); | |||||
return ((aa != '-') && (aa != '*')); | |||||
} | |||||
bool is_stop(char base1, char base2, char base3, const char* aas) | |||||
{ | |||||
char aa = translate(base1, base2, base3, aas); | |||||
return (aa == '*'); | |||||
} | |||||
} // namspace codon | |||||
} // namespace genetics |
@ -0,0 +1,62 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_GENETICS_CODON_HPP | |||||
#define ANTKEEPER_GENETICS_CODON_HPP | |||||
namespace genetics { | |||||
namespace codon { | |||||
/** | |||||
* Returns `true` if a codon is a start codon. | |||||
* | |||||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param starts String of 64 IUPAC amino acid codes, ordered to match corresponding start codon indices. | |||||
* @return `true` if the codon is a start codon, `false` otherwise. | |||||
*/ | |||||
bool is_start(char base1, char base2, char base3, const char* starts); | |||||
/** | |||||
* Returns `true` if a codon is a stop codon. | |||||
* | |||||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. | |||||
* @return `true` if the codon is a stop codon, `false` otherwise. | |||||
*/ | |||||
bool is_stop(char base1, char base2, char base3, const char* aas); | |||||
/** | |||||
* Translates a codon into an amino acid. | |||||
* | |||||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||||
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. | |||||
* @return IUPAC amino acid code of corresponding amino acid, or `-` if an invalid codon was supplied. | |||||
*/ | |||||
char translate(char base1, char base2, char base3, const char* aas); | |||||
} // namspace codon | |||||
} // namespace genetics | |||||
#endif // ANTKEEPER_GENETICS_CODON_HPP |
@ -1,87 +0,0 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_DNA_CROSSOVER_HPP | |||||
#define ANTKEEPER_DNA_CROSSOVER_HPP | |||||
#include <algorithm> | |||||
#include <iterator> | |||||
#include <random> | |||||
namespace dna { | |||||
/** | |||||
* Exchanges elements between two ranges, starting at a random offset. | |||||
* | |||||
* @param first1,last1 First range of elements to crossover. | |||||
* @param first2 Beginning of the second range of elements to crossover. | |||||
* @param g Uniform random bit generator. | |||||
* @return Iterator to the start of the crossover in the second range. | |||||
*/ | |||||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); | |||||
/** | |||||
* Exchanges elements between two ranges multiple times, starting at a random offset each time. | |||||
* | |||||
* @param first1,last1 First range of elements to crossover. | |||||
* @param first2 Beginning of the second range of elements to crossover. | |||||
* @param count Number of times to crossover. | |||||
* @param g Uniform random bit generator. | |||||
*/ | |||||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); | |||||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||||
difference_t pos = distribution(g); | |||||
std::advance(first1, pos); | |||||
std::advance(first2, pos); | |||||
std::swap_ranges(first1, last1, first2); | |||||
return first2; | |||||
} | |||||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||||
ForwardIt1 crossover1, crossover2; | |||||
while (count) | |||||
{ | |||||
crossover1 = first1; | |||||
crossover2 = first2; | |||||
difference_t pos = distribution(g); | |||||
std::advance(crossover1, pos); | |||||
std::advance(crossover2, pos); | |||||
std::swap_ranges(crossover1, last1, crossover2); | |||||
--count; | |||||
} | |||||
} | |||||
} // namespace dna | |||||
#endif // ANTKEEPER_DNA_CROSSOVER_HPP |
@ -1,136 +0,0 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_DNA_FRAME_HPP | |||||
#define ANTKEEPER_DNA_FRAME_HPP | |||||
#include <iterator> | |||||
namespace dna { | |||||
/** | |||||
* Finds the first start codon in a sequence. | |||||
* | |||||
* @param first,last Range of elements to search. | |||||
* @param n Number of elements per codon. | |||||
* @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon. | |||||
* @return Iterator to the first element in the start codon, or @p last if no start codon was found. | |||||
*/ | |||||
template <class InputIt, class Size, class BinaryPredicate> | |||||
InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p) | |||||
{ | |||||
auto length = std::distance(first, last); | |||||
if (length >= n) | |||||
{ | |||||
InputIt next = first; | |||||
std::advance(next, n); | |||||
do | |||||
{ | |||||
if (p(first, next)) | |||||
return first; | |||||
++first; | |||||
++next; | |||||
--length; | |||||
} | |||||
while (length >= n); | |||||
} | |||||
return last; | |||||
} | |||||
/** | |||||
* Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p. | |||||
* | |||||
* @param first,last Range of elements to search. | |||||
* @param n Number of elements in the sequence. | |||||
* @param stride Number of elements between searches. | |||||
* @param p | |||||
*/ | |||||
template <class InputIt, class Size, class BinaryPredicate> | |||||
InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p) | |||||
{ | |||||
if (auto length = std::distance(first, last); length >= n) | |||||
{ | |||||
Size offset = n + stride; | |||||
InputIt next = first; | |||||
std::advance(next, n); | |||||
do | |||||
{ | |||||
if (p(first, next)) | |||||
return first; | |||||
if (length < offset) | |||||
break; | |||||
std::advance(first, stride); | |||||
std::advance(next, stride); | |||||
length -= offset; | |||||
} | |||||
while (1); | |||||
} | |||||
return last; | |||||
} | |||||
/** | |||||
* Finds the first stop codon in a sequence. | |||||
* | |||||
* @param first,last Range of elements to search. | |||||
* @param n Number of elements per codon. | |||||
* @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. | |||||
* @return Iterator to the first element in the stop codon, or @p last if no stop codon was found. | |||||
*/ | |||||
template <class InputIt, class Size, class BinaryPredicate> | |||||
InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p) | |||||
{ | |||||
for (auto length = std::distance(first, last); length >= n; length -= n) | |||||
{ | |||||
InputIt next = first; | |||||
std::advance(next, n); | |||||
if (p(first, next)) | |||||
return first; | |||||
first = next; | |||||
} | |||||
return last; | |||||
} | |||||
/** | |||||
* Finds the first open reading frame (ORF) in a range of elements. | |||||
* | |||||
* @param[in,out] first Iterator to the beginning of the sequence, which will point to th | |||||
* | |||||
* @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon. | |||||
* @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. | |||||
*/ | |||||
template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2> | |||||
void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p) | |||||
{ | |||||
first = find_start(first, last, n, start_p); | |||||
if (first != last) | |||||
last = find_stop(first, last, n, stop_p); | |||||
} | |||||
} // namespace dna | |||||
#endif // ANTKEEPER_DNA_FRAME_HPP |
@ -0,0 +1,28 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_GENETICS_HPP | |||||
#define ANTKEEPER_GENETICS_HPP | |||||
#include "base.hpp" | |||||
#include "codon.hpp" | |||||
#include "protein.hpp" | |||||
#include "sequence.hpp" | |||||
#endif // ANTKEEPER_GENETICS_HPP |
@ -1,82 +0,0 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_DNA_MUTATE_HPP | |||||
#define ANTKEEPER_DNA_MUTATE_HPP | |||||
#include <algorithm> | |||||
#include <iterator> | |||||
#include <random> | |||||
namespace dna { | |||||
/** | |||||
* Applies the given function to a randomly selected element in a range. | |||||
* | |||||
* @param first,last Range of elements to mutate. | |||||
* @param unary_op Unary operation function object that will be applied. | |||||
* @param g Uniform random bit generator. | |||||
* @return Iterator to the mutated element. | |||||
*/ | |||||
template <class ForwardIt, class UnaryOperation, class URBG> | |||||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); | |||||
/** | |||||
* Applies the given function to a random selection of elements in a range. | |||||
* | |||||
* @param first,last Range of elements to mutate. | |||||
* @param count Number of elements to mutate. | |||||
* @param unary_op Unary operation function object that will be applied. | |||||
* @param g Uniform random bit generator. | |||||
*/ | |||||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); | |||||
template <class ForwardIt, class UnaryOperation, class URBG> | |||||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||||
std::advance(first, distribution(g)); | |||||
*first = unary_op(*first); | |||||
return first; | |||||
} | |||||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||||
ForwardIt mutation; | |||||
while (count) | |||||
{ | |||||
mutation = first; | |||||
std::advance(mutation, distribution(g)); | |||||
*mutation = unary_op(*mutation); | |||||
--count; | |||||
} | |||||
} | |||||
} // namespace dna | |||||
#endif // ANTKEEPER_DNA_MUTATE_HPP |
@ -0,0 +1,31 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_GENETICS_PROTEIN_HPP | |||||
#define ANTKEEPER_GENETICS_PROTEIN_HPP | |||||
namespace genetics { | |||||
namespace protein { | |||||
} // namespace protein | |||||
} // namespace genetics | |||||
#endif // ANTKEEPER_GENETICS_PROTEIN_HPP |
@ -0,0 +1,357 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP | |||||
#define ANTKEEPER_GENETICS_SEQUENCE_HPP | |||||
#include "base.hpp" | |||||
#include "codon.hpp" | |||||
#include "translation-table.hpp" | |||||
#include <algorithm> | |||||
#include <iterator> | |||||
#include <random> | |||||
namespace genetics { | |||||
namespace sequence { | |||||
/** | |||||
* Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three. | |||||
* | |||||
* @tparam Iterator Sequence iterator type. | |||||
*/ | |||||
template <class Iterator> | |||||
struct orf | |||||
{ | |||||
/// Iterator to the first base of the start codon. | |||||
Iterator start; | |||||
/// Iterator to the first base of the stop codon. | |||||
Iterator stop; | |||||
}; | |||||
/** | |||||
* Exchanges elements between two ranges, starting at a random offset. | |||||
* | |||||
* @param first1,last1 First range of elements to crossover. | |||||
* @param first2 Beginning of the second range of elements to crossover. | |||||
* @param g Uniform random bit generator. | |||||
* @return Iterator to the start of the crossover in the second range. | |||||
*/ | |||||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); | |||||
/** | |||||
* Exchanges elements between two ranges multiple times, starting at a random offset each time. | |||||
* | |||||
* @param first1,last1 First range of elements to crossover. | |||||
* @param first2 Beginning of the second range of elements to crossover. | |||||
* @param count Number of times to crossover. | |||||
* @param g Uniform random bit generator. | |||||
*/ | |||||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); | |||||
/** | |||||
* Searches a sequence for an open reading frame (ORF). | |||||
* | |||||
* @param first,last Range of elements to search. | |||||
* @param table Genetic code translation table. | |||||
* @return First ORF in the sequence, or `{last, last}` if no ORF was found. | |||||
*/ | |||||
template <class ForwardIt> | |||||
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table); | |||||
/** | |||||
* Applies the given function to a randomly selected element in a range. | |||||
* | |||||
* @param first,last Range of elements to mutate. | |||||
* @param unary_op Unary operation function object that will be applied. | |||||
* @param g Uniform random bit generator. | |||||
* @return Iterator to the mutated element. | |||||
*/ | |||||
template <class ForwardIt, class UnaryOperation, class URBG> | |||||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); | |||||
/** | |||||
* Applies the given function to a random selection of elements in a range. | |||||
* | |||||
* @param first,last Range of elements to mutate. | |||||
* @param count Number of elements to mutate. | |||||
* @param unary_op Unary operation function object that will be applied. | |||||
* @param g Uniform random bit generator. | |||||
*/ | |||||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); | |||||
/** | |||||
* Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols. | |||||
* | |||||
* @param first,last Sequence of IUPAC base symbols to search. | |||||
* @param s_first,s_last Search string of IUPAC degenerate base symbols. | |||||
* @param stride Distance between consecutive searches. | |||||
* @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned. | |||||
*/ | |||||
template <class ForwardIt1, class ForwardIt2> | |||||
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride); | |||||
/** | |||||
* Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. | |||||
* | |||||
* @param first,last Range of elements to transcribe. | |||||
* @param d_first Beginning of the destination range. | |||||
* @return Output iterator to the element past the last element transcribed. | |||||
*/ | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); | |||||
/** | |||||
* Translates a sequence of codons into amino acids. | |||||
* | |||||
* @param first,last Open reading frame. | |||||
* @param d_first Beginning of destination range. | |||||
* @param table Genetic code translation table. | |||||
* @return Output iterator to the element past the last element translated. | |||||
*/ | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table); | |||||
namespace dna | |||||
{ | |||||
/** | |||||
* Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols. | |||||
* | |||||
* @param first,last Range of elements to complement. | |||||
* @param d_first Beginning of the destination range. | |||||
* @return Output iterator to the element past the last element complemented. | |||||
*/ | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt complement(InputIt first, InputIt last, OutputIt d_first); | |||||
} | |||||
namespace rna | |||||
{ | |||||
/** | |||||
* Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols. | |||||
* | |||||
* @param first,last Range of elements to complement. | |||||
* @param d_first Beginning of the destination range. | |||||
* @return Output iterator to the element past the last element complemented. | |||||
*/ | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt complement(InputIt first, InputIt last, OutputIt d_first); | |||||
} | |||||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||||
difference_t pos = distribution(g); | |||||
std::advance(first1, pos); | |||||
std::advance(first2, pos); | |||||
std::swap_ranges(first1, last1, first2); | |||||
return first2; | |||||
} | |||||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||||
ForwardIt1 crossover1, crossover2; | |||||
while (count) | |||||
{ | |||||
crossover1 = first1; | |||||
crossover2 = first2; | |||||
difference_t pos = distribution(g); | |||||
std::advance(crossover1, pos); | |||||
std::advance(crossover2, pos); | |||||
std::swap_ranges(crossover1, last1, crossover2); | |||||
--count; | |||||
} | |||||
} | |||||
template <class ForwardIt> | |||||
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table) | |||||
{ | |||||
ForwardIt second; | |||||
ForwardIt third; | |||||
orf<ForwardIt> result; | |||||
auto distance = std::distance(first, last); | |||||
if (distance >= 3) | |||||
{ | |||||
second = first; | |||||
++second; | |||||
third = second; | |||||
++third; | |||||
do | |||||
{ | |||||
if (codon::is_start(*first, *second, *third, table.starts)) | |||||
{ | |||||
result.start = first; | |||||
distance -= 3; | |||||
break; | |||||
} | |||||
first = second; | |||||
second = third; | |||||
++third; | |||||
--distance; | |||||
} | |||||
while (third != last); | |||||
} | |||||
for (; distance >= 3; distance -= 3) | |||||
{ | |||||
first = ++third; | |||||
second = ++third; | |||||
++third; | |||||
if (codon::is_stop(*first, *second, *third, table.aas)) | |||||
{ | |||||
result.stop = first; | |||||
return result; | |||||
} | |||||
} | |||||
return {last, last}; | |||||
} | |||||
template <class ForwardIt, class UnaryOperation, class URBG> | |||||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||||
if (first == last) | |||||
return first; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||||
std::advance(first, distribution(g)); | |||||
*first = unary_op(*first); | |||||
return first; | |||||
} | |||||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) | |||||
{ | |||||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||||
if (first == last) | |||||
return first; | |||||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||||
ForwardIt mutation; | |||||
while (count) | |||||
{ | |||||
mutation = first; | |||||
std::advance(mutation, distribution(g)); | |||||
*mutation = unary_op(*mutation); | |||||
--count; | |||||
} | |||||
} | |||||
template <class ForwardIt1, class ForwardIt2> | |||||
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride) | |||||
{ | |||||
for (auto distance = std::distance(first, last); distance > 0; distance -= stride) | |||||
{ | |||||
ForwardIt1 it = first; | |||||
for (ForwardIt2 s_it = s_first; ; ++it, ++s_it) | |||||
{ | |||||
if (s_it == s_last) | |||||
return first; | |||||
if (it == last) | |||||
return last; | |||||
if (!base::compare(*it, *s_it)) | |||||
break; | |||||
} | |||||
if (distance > stride) | |||||
std::advance(first, stride); | |||||
} | |||||
return last; | |||||
} | |||||
template <class InputIt, class OutputIt> | |||||
inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) | |||||
{ | |||||
return std::transform(first, last, d_first, base::transcribe); | |||||
} | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table) | |||||
{ | |||||
auto length = std::distance(first, last); | |||||
if (length >= 3) | |||||
{ | |||||
InputIt second = first; | |||||
++second; | |||||
InputIt third = second; | |||||
++third; | |||||
*(d_first++) = codon::translate(*first, *second, *third, table.starts); | |||||
for (length -= 3; length >= 3; length -= 3) | |||||
{ | |||||
first = ++third; | |||||
second = ++third; | |||||
++third; | |||||
*(d_first++) = codon::translate(*first, *second, *third, table.aas); | |||||
} | |||||
} | |||||
return d_first; | |||||
} | |||||
namespace dna | |||||
{ | |||||
template <class InputIt, class OutputIt> | |||||
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) | |||||
{ | |||||
return std::transform(first, last, d_first, base::dna::complement); | |||||
} | |||||
} | |||||
namespace rna | |||||
{ | |||||
template <class InputIt, class OutputIt> | |||||
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) | |||||
{ | |||||
return std::transform(first, last, d_first, base::rna::complement); | |||||
} | |||||
} | |||||
} // namespace sequence | |||||
} // namespace genetics | |||||
#endif // ANTKEEPER_GENETICS_SEQUENCE_HPP |
@ -1,46 +0,0 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP | |||||
#define ANTKEEPER_DNA_TRANSCRIBE_HPP | |||||
#include "nucleobase.hpp" | |||||
#include <algorithm> | |||||
namespace dna { | |||||
/** | |||||
* Transcribes a range of IUPAC degenerate base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. | |||||
* | |||||
* @param first,last Range of elements to transcribe. | |||||
* @param d_first Beginning of the destination range. | |||||
* @return Output iterator to the element past the last element transcribed. | |||||
*/ | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); | |||||
template <class InputIt, class OutputIt> | |||||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) | |||||
{ | |||||
return std::transform(first, last, d_first, base::transcribe); | |||||
} | |||||
} // namespace dna | |||||
#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP |
@ -1,55 +0,0 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_DNA_TRANSLATE_HPP | |||||
#define ANTKEEPER_DNA_TRANSLATE_HPP | |||||
#include <iterator> | |||||
namespace dna { | |||||
/** | |||||
* Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range. | |||||
* | |||||
* @param first,last Range of elements to translate. | |||||
* @param d_first Beginning of the destination range. | |||||
* @param n Number of elements by which to divide the range. | |||||
* @param binary_op Binary operation function object that will be applied to each subrange of @p n elements. | |||||
* @return Output iterator to the element past the last element translated. | |||||
*/ | |||||
template <class InputIt, class OutputIt, class Size, class BinaryOperation> | |||||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op); | |||||
template <class InputIt, class OutputIt, class Size, class BinaryOperation> | |||||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op) | |||||
{ | |||||
for (auto length = std::distance(first, last); length >= n; length -= n) | |||||
{ | |||||
InputIt next = first; | |||||
std::advance(next, n); | |||||
*(d_first++) = binary_op(first, next); | |||||
first = next; | |||||
} | |||||
return d_first; | |||||
} | |||||
} // namespace dna | |||||
#endif // ANTKEEPER_DNA_TRANSLATE_HPP |
@ -0,0 +1,48 @@ | |||||
/* | |||||
* Copyright (C) 2020 Christopher J. Howard | |||||
* | |||||
* This file is part of Antkeeper source code. | |||||
* | |||||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* Antkeeper source code is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP | |||||
#define ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP | |||||
namespace genetics { | |||||
/** | |||||
* Genetic code translation table. | |||||
* | |||||
* @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi | |||||
*/ | |||||
struct translation_table | |||||
{ | |||||
/// String of 64 IUPAC amino acid base symbols, in TCAG order. | |||||
const char* aas; | |||||
/// String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than `-` and `*` indicate a start codon and its amino acid. | |||||
const char* starts; | |||||
}; | |||||
/// Translation table for standard genetic code. | |||||
constexpr translation_table standard_code = | |||||
{ | |||||
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |||||
"---M------**--*----M---------------M----------------------------", | |||||
}; | |||||
} // namespace genetics | |||||
#endif // ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP |