@ -0,0 +1,86 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#include "codon.hpp" | |||
namespace genetics { | |||
namespace codon { | |||
/** | |||
* Returns the index of a nucleobase for use with a translation table. | |||
* | |||
* @param base IUPAC code of nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @return Index of the nucleobase, or a negative value if a non-standard nucleobase was supplied. | |||
*/ | |||
static inline int base_index(char base) | |||
{ | |||
switch (base) | |||
{ | |||
case 'U': | |||
case 'T': | |||
return 0; | |||
case 'C': | |||
return 1; | |||
case 'A': | |||
return 2; | |||
case 'G': | |||
return 3; | |||
} | |||
return ~3; | |||
} | |||
/** | |||
* Returns the index of a codon for use with a translation table. | |||
* | |||
* @param base1 IUPAC code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base2 IUPAC code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base3 IUPAC code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @return Index of codon, or a negative value if a non-standard nucleobase was supplied. | |||
*/ | |||
static inline int codon_index(char base1, char base2, char base3) | |||
{ | |||
int i = base_index(base1); | |||
int j = base_index(base2); | |||
int k = base_index(base3); | |||
return (i << 4) | (j << 2) | k; | |||
} | |||
inline char translate(char base1, char base2, char base3, const char* aas) | |||
{ | |||
int index = codon_index(base1, base2, base3); | |||
if (index < 0) | |||
return '-'; | |||
return aas[index]; | |||
} | |||
bool is_start(char base1, char base2, char base3, const char* starts) | |||
{ | |||
char aa = translate(base1, base2, base3, starts); | |||
return ((aa != '-') && (aa != '*')); | |||
} | |||
bool is_stop(char base1, char base2, char base3, const char* aas) | |||
{ | |||
char aa = translate(base1, base2, base3, aas); | |||
return (aa == '*'); | |||
} | |||
} // namspace codon | |||
} // namespace genetics |
@ -0,0 +1,62 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_GENETICS_CODON_HPP | |||
#define ANTKEEPER_GENETICS_CODON_HPP | |||
namespace genetics { | |||
namespace codon { | |||
/** | |||
* Returns `true` if a codon is a start codon. | |||
* | |||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param starts String of 64 IUPAC amino acid codes, ordered to match corresponding start codon indices. | |||
* @return `true` if the codon is a start codon, `false` otherwise. | |||
*/ | |||
bool is_start(char base1, char base2, char base3, const char* starts); | |||
/** | |||
* Returns `true` if a codon is a stop codon. | |||
* | |||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. | |||
* @return `true` if the codon is a stop codon, `false` otherwise. | |||
*/ | |||
bool is_stop(char base1, char base2, char base3, const char* aas); | |||
/** | |||
* Translates a codon into an amino acid. | |||
* | |||
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. | |||
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. | |||
* @return IUPAC amino acid code of corresponding amino acid, or `-` if an invalid codon was supplied. | |||
*/ | |||
char translate(char base1, char base2, char base3, const char* aas); | |||
} // namspace codon | |||
} // namespace genetics | |||
#endif // ANTKEEPER_GENETICS_CODON_HPP |
@ -1,87 +0,0 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_DNA_CROSSOVER_HPP | |||
#define ANTKEEPER_DNA_CROSSOVER_HPP | |||
#include <algorithm> | |||
#include <iterator> | |||
#include <random> | |||
namespace dna { | |||
/** | |||
* Exchanges elements between two ranges, starting at a random offset. | |||
* | |||
* @param first1,last1 First range of elements to crossover. | |||
* @param first2 Beginning of the second range of elements to crossover. | |||
* @param g Uniform random bit generator. | |||
* @return Iterator to the start of the crossover in the second range. | |||
*/ | |||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); | |||
/** | |||
* Exchanges elements between two ranges multiple times, starting at a random offset each time. | |||
* | |||
* @param first1,last1 First range of elements to crossover. | |||
* @param first2 Beginning of the second range of elements to crossover. | |||
* @param count Number of times to crossover. | |||
* @param g Uniform random bit generator. | |||
*/ | |||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); | |||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||
difference_t pos = distribution(g); | |||
std::advance(first1, pos); | |||
std::advance(first2, pos); | |||
std::swap_ranges(first1, last1, first2); | |||
return first2; | |||
} | |||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||
ForwardIt1 crossover1, crossover2; | |||
while (count) | |||
{ | |||
crossover1 = first1; | |||
crossover2 = first2; | |||
difference_t pos = distribution(g); | |||
std::advance(crossover1, pos); | |||
std::advance(crossover2, pos); | |||
std::swap_ranges(crossover1, last1, crossover2); | |||
--count; | |||
} | |||
} | |||
} // namespace dna | |||
#endif // ANTKEEPER_DNA_CROSSOVER_HPP |
@ -1,136 +0,0 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_DNA_FRAME_HPP | |||
#define ANTKEEPER_DNA_FRAME_HPP | |||
#include <iterator> | |||
namespace dna { | |||
/** | |||
* Finds the first start codon in a sequence. | |||
* | |||
* @param first,last Range of elements to search. | |||
* @param n Number of elements per codon. | |||
* @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon. | |||
* @return Iterator to the first element in the start codon, or @p last if no start codon was found. | |||
*/ | |||
template <class InputIt, class Size, class BinaryPredicate> | |||
InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p) | |||
{ | |||
auto length = std::distance(first, last); | |||
if (length >= n) | |||
{ | |||
InputIt next = first; | |||
std::advance(next, n); | |||
do | |||
{ | |||
if (p(first, next)) | |||
return first; | |||
++first; | |||
++next; | |||
--length; | |||
} | |||
while (length >= n); | |||
} | |||
return last; | |||
} | |||
/** | |||
* Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p. | |||
* | |||
* @param first,last Range of elements to search. | |||
* @param n Number of elements in the sequence. | |||
* @param stride Number of elements between searches. | |||
* @param p | |||
*/ | |||
template <class InputIt, class Size, class BinaryPredicate> | |||
InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p) | |||
{ | |||
if (auto length = std::distance(first, last); length >= n) | |||
{ | |||
Size offset = n + stride; | |||
InputIt next = first; | |||
std::advance(next, n); | |||
do | |||
{ | |||
if (p(first, next)) | |||
return first; | |||
if (length < offset) | |||
break; | |||
std::advance(first, stride); | |||
std::advance(next, stride); | |||
length -= offset; | |||
} | |||
while (1); | |||
} | |||
return last; | |||
} | |||
/** | |||
* Finds the first stop codon in a sequence. | |||
* | |||
* @param first,last Range of elements to search. | |||
* @param n Number of elements per codon. | |||
* @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. | |||
* @return Iterator to the first element in the stop codon, or @p last if no stop codon was found. | |||
*/ | |||
template <class InputIt, class Size, class BinaryPredicate> | |||
InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p) | |||
{ | |||
for (auto length = std::distance(first, last); length >= n; length -= n) | |||
{ | |||
InputIt next = first; | |||
std::advance(next, n); | |||
if (p(first, next)) | |||
return first; | |||
first = next; | |||
} | |||
return last; | |||
} | |||
/** | |||
* Finds the first open reading frame (ORF) in a range of elements. | |||
* | |||
* @param[in,out] first Iterator to the beginning of the sequence, which will point to th | |||
* | |||
* @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon. | |||
* @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. | |||
*/ | |||
template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2> | |||
void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p) | |||
{ | |||
first = find_start(first, last, n, start_p); | |||
if (first != last) | |||
last = find_stop(first, last, n, stop_p); | |||
} | |||
} // namespace dna | |||
#endif // ANTKEEPER_DNA_FRAME_HPP |
@ -0,0 +1,28 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_GENETICS_HPP | |||
#define ANTKEEPER_GENETICS_HPP | |||
#include "base.hpp" | |||
#include "codon.hpp" | |||
#include "protein.hpp" | |||
#include "sequence.hpp" | |||
#endif // ANTKEEPER_GENETICS_HPP |
@ -1,82 +0,0 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_DNA_MUTATE_HPP | |||
#define ANTKEEPER_DNA_MUTATE_HPP | |||
#include <algorithm> | |||
#include <iterator> | |||
#include <random> | |||
namespace dna { | |||
/** | |||
* Applies the given function to a randomly selected element in a range. | |||
* | |||
* @param first,last Range of elements to mutate. | |||
* @param unary_op Unary operation function object that will be applied. | |||
* @param g Uniform random bit generator. | |||
* @return Iterator to the mutated element. | |||
*/ | |||
template <class ForwardIt, class UnaryOperation, class URBG> | |||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); | |||
/** | |||
* Applies the given function to a random selection of elements in a range. | |||
* | |||
* @param first,last Range of elements to mutate. | |||
* @param count Number of elements to mutate. | |||
* @param unary_op Unary operation function object that will be applied. | |||
* @param g Uniform random bit generator. | |||
*/ | |||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); | |||
template <class ForwardIt, class UnaryOperation, class URBG> | |||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||
std::advance(first, distribution(g)); | |||
*first = unary_op(*first); | |||
return first; | |||
} | |||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||
ForwardIt mutation; | |||
while (count) | |||
{ | |||
mutation = first; | |||
std::advance(mutation, distribution(g)); | |||
*mutation = unary_op(*mutation); | |||
--count; | |||
} | |||
} | |||
} // namespace dna | |||
#endif // ANTKEEPER_DNA_MUTATE_HPP |
@ -0,0 +1,31 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_GENETICS_PROTEIN_HPP | |||
#define ANTKEEPER_GENETICS_PROTEIN_HPP | |||
namespace genetics { | |||
namespace protein { | |||
} // namespace protein | |||
} // namespace genetics | |||
#endif // ANTKEEPER_GENETICS_PROTEIN_HPP |
@ -0,0 +1,357 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP | |||
#define ANTKEEPER_GENETICS_SEQUENCE_HPP | |||
#include "base.hpp" | |||
#include "codon.hpp" | |||
#include "translation-table.hpp" | |||
#include <algorithm> | |||
#include <iterator> | |||
#include <random> | |||
namespace genetics { | |||
namespace sequence { | |||
/** | |||
* Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three. | |||
* | |||
* @tparam Iterator Sequence iterator type. | |||
*/ | |||
template <class Iterator> | |||
struct orf | |||
{ | |||
/// Iterator to the first base of the start codon. | |||
Iterator start; | |||
/// Iterator to the first base of the stop codon. | |||
Iterator stop; | |||
}; | |||
/** | |||
* Exchanges elements between two ranges, starting at a random offset. | |||
* | |||
* @param first1,last1 First range of elements to crossover. | |||
* @param first2 Beginning of the second range of elements to crossover. | |||
* @param g Uniform random bit generator. | |||
* @return Iterator to the start of the crossover in the second range. | |||
*/ | |||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); | |||
/** | |||
* Exchanges elements between two ranges multiple times, starting at a random offset each time. | |||
* | |||
* @param first1,last1 First range of elements to crossover. | |||
* @param first2 Beginning of the second range of elements to crossover. | |||
* @param count Number of times to crossover. | |||
* @param g Uniform random bit generator. | |||
*/ | |||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); | |||
/** | |||
* Searches a sequence for an open reading frame (ORF). | |||
* | |||
* @param first,last Range of elements to search. | |||
* @param table Genetic code translation table. | |||
* @return First ORF in the sequence, or `{last, last}` if no ORF was found. | |||
*/ | |||
template <class ForwardIt> | |||
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table); | |||
/** | |||
* Applies the given function to a randomly selected element in a range. | |||
* | |||
* @param first,last Range of elements to mutate. | |||
* @param unary_op Unary operation function object that will be applied. | |||
* @param g Uniform random bit generator. | |||
* @return Iterator to the mutated element. | |||
*/ | |||
template <class ForwardIt, class UnaryOperation, class URBG> | |||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); | |||
/** | |||
* Applies the given function to a random selection of elements in a range. | |||
* | |||
* @param first,last Range of elements to mutate. | |||
* @param count Number of elements to mutate. | |||
* @param unary_op Unary operation function object that will be applied. | |||
* @param g Uniform random bit generator. | |||
*/ | |||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); | |||
/** | |||
* Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols. | |||
* | |||
* @param first,last Sequence of IUPAC base symbols to search. | |||
* @param s_first,s_last Search string of IUPAC degenerate base symbols. | |||
* @param stride Distance between consecutive searches. | |||
* @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned. | |||
*/ | |||
template <class ForwardIt1, class ForwardIt2> | |||
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride); | |||
/** | |||
* Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. | |||
* | |||
* @param first,last Range of elements to transcribe. | |||
* @param d_first Beginning of the destination range. | |||
* @return Output iterator to the element past the last element transcribed. | |||
*/ | |||
template <class InputIt, class OutputIt> | |||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); | |||
/** | |||
* Translates a sequence of codons into amino acids. | |||
* | |||
* @param first,last Open reading frame. | |||
* @param d_first Beginning of destination range. | |||
* @param table Genetic code translation table. | |||
* @return Output iterator to the element past the last element translated. | |||
*/ | |||
template <class InputIt, class OutputIt> | |||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table); | |||
namespace dna | |||
{ | |||
/** | |||
* Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols. | |||
* | |||
* @param first,last Range of elements to complement. | |||
* @param d_first Beginning of the destination range. | |||
* @return Output iterator to the element past the last element complemented. | |||
*/ | |||
template <class InputIt, class OutputIt> | |||
OutputIt complement(InputIt first, InputIt last, OutputIt d_first); | |||
} | |||
namespace rna | |||
{ | |||
/** | |||
* Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols. | |||
* | |||
* @param first,last Range of elements to complement. | |||
* @param d_first Beginning of the destination range. | |||
* @return Output iterator to the element past the last element complemented. | |||
*/ | |||
template <class InputIt, class OutputIt> | |||
OutputIt complement(InputIt first, InputIt last, OutputIt d_first); | |||
} | |||
template <class ForwardIt1, class ForwardIt2, class URBG> | |||
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||
difference_t pos = distribution(g); | |||
std::advance(first1, pos); | |||
std::advance(first2, pos); | |||
std::swap_ranges(first1, last1, first2); | |||
return first2; | |||
} | |||
template <class ForwardIt1, class ForwardIt2, class Size, class URBG> | |||
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1); | |||
ForwardIt1 crossover1, crossover2; | |||
while (count) | |||
{ | |||
crossover1 = first1; | |||
crossover2 = first2; | |||
difference_t pos = distribution(g); | |||
std::advance(crossover1, pos); | |||
std::advance(crossover2, pos); | |||
std::swap_ranges(crossover1, last1, crossover2); | |||
--count; | |||
} | |||
} | |||
template <class ForwardIt> | |||
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table) | |||
{ | |||
ForwardIt second; | |||
ForwardIt third; | |||
orf<ForwardIt> result; | |||
auto distance = std::distance(first, last); | |||
if (distance >= 3) | |||
{ | |||
second = first; | |||
++second; | |||
third = second; | |||
++third; | |||
do | |||
{ | |||
if (codon::is_start(*first, *second, *third, table.starts)) | |||
{ | |||
result.start = first; | |||
distance -= 3; | |||
break; | |||
} | |||
first = second; | |||
second = third; | |||
++third; | |||
--distance; | |||
} | |||
while (third != last); | |||
} | |||
for (; distance >= 3; distance -= 3) | |||
{ | |||
first = ++third; | |||
second = ++third; | |||
++third; | |||
if (codon::is_stop(*first, *second, *third, table.aas)) | |||
{ | |||
result.stop = first; | |||
return result; | |||
} | |||
} | |||
return {last, last}; | |||
} | |||
template <class ForwardIt, class UnaryOperation, class URBG> | |||
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||
if (first == last) | |||
return first; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||
std::advance(first, distribution(g)); | |||
*first = unary_op(*first); | |||
return first; | |||
} | |||
template <class ForwardIt, class Size, class UnaryOperation, class URBG> | |||
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) | |||
{ | |||
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t; | |||
if (first == last) | |||
return first; | |||
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1); | |||
ForwardIt mutation; | |||
while (count) | |||
{ | |||
mutation = first; | |||
std::advance(mutation, distribution(g)); | |||
*mutation = unary_op(*mutation); | |||
--count; | |||
} | |||
} | |||
template <class ForwardIt1, class ForwardIt2> | |||
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride) | |||
{ | |||
for (auto distance = std::distance(first, last); distance > 0; distance -= stride) | |||
{ | |||
ForwardIt1 it = first; | |||
for (ForwardIt2 s_it = s_first; ; ++it, ++s_it) | |||
{ | |||
if (s_it == s_last) | |||
return first; | |||
if (it == last) | |||
return last; | |||
if (!base::compare(*it, *s_it)) | |||
break; | |||
} | |||
if (distance > stride) | |||
std::advance(first, stride); | |||
} | |||
return last; | |||
} | |||
template <class InputIt, class OutputIt> | |||
inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) | |||
{ | |||
return std::transform(first, last, d_first, base::transcribe); | |||
} | |||
template <class InputIt, class OutputIt> | |||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table) | |||
{ | |||
auto length = std::distance(first, last); | |||
if (length >= 3) | |||
{ | |||
InputIt second = first; | |||
++second; | |||
InputIt third = second; | |||
++third; | |||
*(d_first++) = codon::translate(*first, *second, *third, table.starts); | |||
for (length -= 3; length >= 3; length -= 3) | |||
{ | |||
first = ++third; | |||
second = ++third; | |||
++third; | |||
*(d_first++) = codon::translate(*first, *second, *third, table.aas); | |||
} | |||
} | |||
return d_first; | |||
} | |||
namespace dna | |||
{ | |||
template <class InputIt, class OutputIt> | |||
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) | |||
{ | |||
return std::transform(first, last, d_first, base::dna::complement); | |||
} | |||
} | |||
namespace rna | |||
{ | |||
template <class InputIt, class OutputIt> | |||
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) | |||
{ | |||
return std::transform(first, last, d_first, base::rna::complement); | |||
} | |||
} | |||
} // namespace sequence | |||
} // namespace genetics | |||
#endif // ANTKEEPER_GENETICS_SEQUENCE_HPP |
@ -1,46 +0,0 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP | |||
#define ANTKEEPER_DNA_TRANSCRIBE_HPP | |||
#include "nucleobase.hpp" | |||
#include <algorithm> | |||
namespace dna { | |||
/** | |||
* Transcribes a range of IUPAC degenerate base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. | |||
* | |||
* @param first,last Range of elements to transcribe. | |||
* @param d_first Beginning of the destination range. | |||
* @return Output iterator to the element past the last element transcribed. | |||
*/ | |||
template <class InputIt, class OutputIt> | |||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); | |||
template <class InputIt, class OutputIt> | |||
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) | |||
{ | |||
return std::transform(first, last, d_first, base::transcribe); | |||
} | |||
} // namespace dna | |||
#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP |
@ -1,55 +0,0 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_DNA_TRANSLATE_HPP | |||
#define ANTKEEPER_DNA_TRANSLATE_HPP | |||
#include <iterator> | |||
namespace dna { | |||
/** | |||
* Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range. | |||
* | |||
* @param first,last Range of elements to translate. | |||
* @param d_first Beginning of the destination range. | |||
* @param n Number of elements by which to divide the range. | |||
* @param binary_op Binary operation function object that will be applied to each subrange of @p n elements. | |||
* @return Output iterator to the element past the last element translated. | |||
*/ | |||
template <class InputIt, class OutputIt, class Size, class BinaryOperation> | |||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op); | |||
template <class InputIt, class OutputIt, class Size, class BinaryOperation> | |||
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op) | |||
{ | |||
for (auto length = std::distance(first, last); length >= n; length -= n) | |||
{ | |||
InputIt next = first; | |||
std::advance(next, n); | |||
*(d_first++) = binary_op(first, next); | |||
first = next; | |||
} | |||
return d_first; | |||
} | |||
} // namespace dna | |||
#endif // ANTKEEPER_DNA_TRANSLATE_HPP |
@ -0,0 +1,48 @@ | |||
/* | |||
* Copyright (C) 2020 Christopher J. Howard | |||
* | |||
* This file is part of Antkeeper source code. | |||
* | |||
* Antkeeper source code is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* Antkeeper source code is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP | |||
#define ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP | |||
namespace genetics { | |||
/** | |||
* Genetic code translation table. | |||
* | |||
* @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi | |||
*/ | |||
struct translation_table | |||
{ | |||
/// String of 64 IUPAC amino acid base symbols, in TCAG order. | |||
const char* aas; | |||
/// String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than `-` and `*` indicate a start codon and its amino acid. | |||
const char* starts; | |||
}; | |||
/// Translation table for standard genetic code. | |||
constexpr translation_table standard_code = | |||
{ | |||
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | |||
"---M------**--*----M---------------M----------------------------", | |||
}; | |||
} // namespace genetics | |||
#endif // ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP |