diff --git a/src/game/genetics/nucleobase.cpp b/src/game/genetics/base.cpp similarity index 68% rename from src/game/genetics/nucleobase.cpp rename to src/game/genetics/base.cpp index 4c33858..d176b6a 100644 --- a/src/game/genetics/nucleobase.cpp +++ b/src/game/genetics/base.cpp @@ -17,10 +17,9 @@ * along with Antkeeper source code. If not, see . */ -#include "nucleobase.hpp" -#include +#include "base.hpp" -namespace dna { +namespace genetics { namespace base { /** @@ -29,9 +28,9 @@ namespace base { * @param symbol IUPAC degenerate base symbol. * @return Bit mask representing the possible bases represented by the symbol. */ -static std::uint8_t decode(char symbol) +static inline unsigned char decode(char symbol) { - static constexpr std::uint8_t bases[26] = + static constexpr unsigned char bases[25] = { 0b0001, // A 0b1110, // B @@ -58,34 +57,43 @@ static std::uint8_t decode(char symbol) 0b1001, // W 0, // X 0b1010, // Y - 0, // Z }; - return (symbol < 'A' || symbol > 'Z') ? 0 : bases[symbol - 'A']; + return (symbol < 'A' || symbol >= 'Z') ? 0 : bases[symbol - 'A']; } -char complement_rna(char symbol) +int compare(char a, char b) { - static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZRZ"; - return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A']; + static constexpr int popcount[16] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 + }; + + return popcount[decode(a) & decode(b)]; } -char complement_dna(char symbol) +char transcribe(char symbol) { - static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZRZ"; - return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A']; + return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol; } -char transcribe(char symbol) +namespace dna { - return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol; + char complement(char symbol) + { + static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZR"; + return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A']; + } } -int compare(char a, char b) +namespace rna { - std::uint8_t bases = decode(a) & decode(b); - return (bases & 1) + (bases >> 1 & 1) + (bases >> 2 & 1) + (bases >> 3 & 1); + char complement(char symbol) + { + static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZR"; + return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A']; + } } } // namespace base -} // namespace dna +} // namespace genetics diff --git a/src/game/genetics/nucleobase.hpp b/src/game/genetics/base.hpp similarity index 69% rename from src/game/genetics/nucleobase.hpp rename to src/game/genetics/base.hpp index d99e31d..b0dfd51 100644 --- a/src/game/genetics/nucleobase.hpp +++ b/src/game/genetics/base.hpp @@ -17,27 +17,20 @@ * along with Antkeeper source code. If not, see . */ -#ifndef ANTKEEPER_DNA_NUCLEOBASE_HPP -#define ANTKEEPER_DNA_NUCLEOBASE_HPP +#ifndef ANTKEEPER_GENETICS_BASE_HPP +#define ANTKEEPER_GENETICS_BASE_HPP -namespace dna { +namespace genetics { namespace base { /** - * Returns the DNA complement of an IUPAC degenerate base symbol. - * - * @param symbol IUPAC degenerate base symbol. - * @return IUPAC degenerate base symbol of DNA complement. - */ -char complement_dna(char symbol); - -/** - * Returns the RNA complement of an IUPAC degenerate base symbol. + * Returns the number of bases that are represented by both IUPAC degenerate base symbols. * - * @param symbol IUPAC degenerate base symbol. - * @return IUPAC degenerate base symbol of RNA complement. + * @param a First IUPAC degenerate base symbol. + * @param b Second IUPAC degenerate base symbol. + * @return Number of bases represented by both symbols. */ -char complement_rna(char symbol); +int compare(char a, char b); /** * Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping `T` for `U` or `U` for `T`. @@ -47,16 +40,29 @@ char complement_rna(char symbol); */ char transcribe(char symbol); -/** - * Returns the number of bases that are represented by both IUPAC degenerate base symbols. - * - * @param a First IUPAC degenerate base symbol. - * @param b Second IUPAC degenerate base symbol. - * @return Number of bases represented by both symbols. - */ -int compare(char a, char b); +namespace dna +{ + /** + * Returns the DNA complement of an IUPAC degenerate base symbol. + * + * @param symbol IUPAC degenerate base symbol. + * @return IUPAC degenerate base symbol of DNA complement. + */ + char complement(char symbol); +} + +namespace rna +{ + /** + * Returns the RNA complement of an IUPAC degenerate base symbol. + * + * @param symbol IUPAC degenerate base symbol. + * @return IUPAC degenerate base symbol of RNA complement. + */ + char complement(char symbol); +} } // namespace base -} // namespace dna +} // namespace genetics -#endif // ANTKEEPER_DNA_NUCLEOBASE_HPP +#endif // ANTKEEPER_GENETICS_BASE_HPP diff --git a/src/game/genetics/codon.cpp b/src/game/genetics/codon.cpp new file mode 100644 index 0000000..f938dad --- /dev/null +++ b/src/game/genetics/codon.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#include "codon.hpp" + +namespace genetics { +namespace codon { + +/** + * Returns the index of a nucleobase for use with a translation table. + * + * @param base IUPAC code of nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @return Index of the nucleobase, or a negative value if a non-standard nucleobase was supplied. + */ +static inline int base_index(char base) +{ + switch (base) + { + case 'U': + case 'T': + return 0; + case 'C': + return 1; + case 'A': + return 2; + case 'G': + return 3; + } + + return ~3; +} + +/** + * Returns the index of a codon for use with a translation table. + * + * @param base1 IUPAC code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base2 IUPAC code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base3 IUPAC code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @return Index of codon, or a negative value if a non-standard nucleobase was supplied. + */ +static inline int codon_index(char base1, char base2, char base3) +{ + int i = base_index(base1); + int j = base_index(base2); + int k = base_index(base3); + return (i << 4) | (j << 2) | k; +} + +inline char translate(char base1, char base2, char base3, const char* aas) +{ + int index = codon_index(base1, base2, base3); + if (index < 0) + return '-'; + return aas[index]; +} + +bool is_start(char base1, char base2, char base3, const char* starts) +{ + char aa = translate(base1, base2, base3, starts); + return ((aa != '-') && (aa != '*')); +} + +bool is_stop(char base1, char base2, char base3, const char* aas) +{ + char aa = translate(base1, base2, base3, aas); + return (aa == '*'); +} + +} // namspace codon +} // namespace genetics diff --git a/src/game/genetics/codon.hpp b/src/game/genetics/codon.hpp new file mode 100644 index 0000000..5e799b0 --- /dev/null +++ b/src/game/genetics/codon.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#ifndef ANTKEEPER_GENETICS_CODON_HPP +#define ANTKEEPER_GENETICS_CODON_HPP + +namespace genetics { +namespace codon { + +/** + * Returns `true` if a codon is a start codon. + * + * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param starts String of 64 IUPAC amino acid codes, ordered to match corresponding start codon indices. + * @return `true` if the codon is a start codon, `false` otherwise. + */ +bool is_start(char base1, char base2, char base3, const char* starts); + +/** + * Returns `true` if a codon is a stop codon. + * + * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. + * @return `true` if the codon is a stop codon, `false` otherwise. + */ +bool is_stop(char base1, char base2, char base3, const char* aas); + +/** + * Translates a codon into an amino acid. + * + * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`. + * @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices. + * @return IUPAC amino acid code of corresponding amino acid, or `-` if an invalid codon was supplied. + */ +char translate(char base1, char base2, char base3, const char* aas); + +} // namspace codon +} // namespace genetics + +#endif // ANTKEEPER_GENETICS_CODON_HPP diff --git a/src/game/genetics/crossover.hpp b/src/game/genetics/crossover.hpp deleted file mode 100644 index 30a61a3..0000000 --- a/src/game/genetics/crossover.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2020 Christopher J. Howard - * - * This file is part of Antkeeper source code. - * - * Antkeeper source code is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Antkeeper source code is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Antkeeper source code. If not, see . - */ - -#ifndef ANTKEEPER_DNA_CROSSOVER_HPP -#define ANTKEEPER_DNA_CROSSOVER_HPP - -#include -#include -#include - -namespace dna { - -/** - * Exchanges elements between two ranges, starting at a random offset. - * - * @param first1,last1 First range of elements to crossover. - * @param first2 Beginning of the second range of elements to crossover. - * @param g Uniform random bit generator. - * @return Iterator to the start of the crossover in the second range. - */ -template -ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); - -/** - * Exchanges elements between two ranges multiple times, starting at a random offset each time. - * - * @param first1,last1 First range of elements to crossover. - * @param first2 Beginning of the second range of elements to crossover. - * @param count Number of times to crossover. - * @param g Uniform random bit generator. - */ -template -void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); - -template -ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) -{ - typedef typename std::iterator_traits::difference_type difference_t; - std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); - difference_t pos = distribution(g); - std::advance(first1, pos); - std::advance(first2, pos); - std::swap_ranges(first1, last1, first2); - return first2; -} - -template -void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) -{ - typedef typename std::iterator_traits::difference_type difference_t; - - std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); - ForwardIt1 crossover1, crossover2; - - while (count) - { - crossover1 = first1; - crossover2 = first2; - - difference_t pos = distribution(g); - std::advance(crossover1, pos); - std::advance(crossover2, pos); - std::swap_ranges(crossover1, last1, crossover2); - - --count; - } -} - -} // namespace dna - -#endif // ANTKEEPER_DNA_CROSSOVER_HPP diff --git a/src/game/genetics/frame.hpp b/src/game/genetics/frame.hpp deleted file mode 100644 index 7bed2bb..0000000 --- a/src/game/genetics/frame.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2020 Christopher J. Howard - * - * This file is part of Antkeeper source code. - * - * Antkeeper source code is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Antkeeper source code is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Antkeeper source code. If not, see . - */ - -#ifndef ANTKEEPER_DNA_FRAME_HPP -#define ANTKEEPER_DNA_FRAME_HPP - -#include - -namespace dna { - -/** - * Finds the first start codon in a sequence. - * - * @param first,last Range of elements to search. - * @param n Number of elements per codon. - * @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon. - * @return Iterator to the first element in the start codon, or @p last if no start codon was found. - */ -template -InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p) -{ - auto length = std::distance(first, last); - - if (length >= n) - { - InputIt next = first; - std::advance(next, n); - - do - { - if (p(first, next)) - return first; - - ++first; - ++next; - --length; - } - while (length >= n); - } - - return last; -} - -/** - * Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p. - * - * @param first,last Range of elements to search. - * @param n Number of elements in the sequence. - * @param stride Number of elements between searches. - * @param p - */ -template -InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p) -{ - if (auto length = std::distance(first, last); length >= n) - { - Size offset = n + stride; - InputIt next = first; - std::advance(next, n); - - do - { - if (p(first, next)) - return first; - - if (length < offset) - break; - - std::advance(first, stride); - std::advance(next, stride); - length -= offset; - } - while (1); - } - - return last; -} - -/** - * Finds the first stop codon in a sequence. - * - * @param first,last Range of elements to search. - * @param n Number of elements per codon. - * @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. - * @return Iterator to the first element in the stop codon, or @p last if no stop codon was found. - */ -template -InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p) -{ - for (auto length = std::distance(first, last); length >= n; length -= n) - { - InputIt next = first; - std::advance(next, n); - if (p(first, next)) - return first; - first = next; - } - - return last; -} - -/** - * Finds the first open reading frame (ORF) in a range of elements. - * - * @param[in,out] first Iterator to the beginning of the sequence, which will point to th - * - * @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon. - * @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. - */ -template -void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p) -{ - first = find_start(first, last, n, start_p); - if (first != last) - last = find_stop(first, last, n, stop_p); -} - -} // namespace dna - -#endif // ANTKEEPER_DNA_FRAME_HPP diff --git a/src/game/genetics/genetics.hpp b/src/game/genetics/genetics.hpp new file mode 100644 index 0000000..e4a7a78 --- /dev/null +++ b/src/game/genetics/genetics.hpp @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#ifndef ANTKEEPER_GENETICS_HPP +#define ANTKEEPER_GENETICS_HPP + +#include "base.hpp" +#include "codon.hpp" +#include "protein.hpp" +#include "sequence.hpp" + +#endif // ANTKEEPER_GENETICS_HPP diff --git a/src/game/genetics/mutate.hpp b/src/game/genetics/mutate.hpp deleted file mode 100644 index 6f71dd0..0000000 --- a/src/game/genetics/mutate.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2020 Christopher J. Howard - * - * This file is part of Antkeeper source code. - * - * Antkeeper source code is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Antkeeper source code is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Antkeeper source code. If not, see . - */ - -#ifndef ANTKEEPER_DNA_MUTATE_HPP -#define ANTKEEPER_DNA_MUTATE_HPP - -#include -#include -#include - -namespace dna { - -/** - * Applies the given function to a randomly selected element in a range. - * - * @param first,last Range of elements to mutate. - * @param unary_op Unary operation function object that will be applied. - * @param g Uniform random bit generator. - * @return Iterator to the mutated element. - */ -template -ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); - -/** - * Applies the given function to a random selection of elements in a range. - * - * @param first,last Range of elements to mutate. - * @param count Number of elements to mutate. - * @param unary_op Unary operation function object that will be applied. - * @param g Uniform random bit generator. - */ -template -void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); - -template -ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) -{ - typedef typename std::iterator_traits::difference_type difference_t; - - std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); - std::advance(first, distribution(g)); - *first = unary_op(*first); - - return first; -} - -template -void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) -{ - typedef typename std::iterator_traits::difference_type difference_t; - - std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); - ForwardIt mutation; - - while (count) - { - mutation = first; - std::advance(mutation, distribution(g)); - *mutation = unary_op(*mutation); - --count; - } -} - -} // namespace dna - -#endif // ANTKEEPER_DNA_MUTATE_HPP diff --git a/src/game/genetics/protein.hpp b/src/game/genetics/protein.hpp new file mode 100644 index 0000000..eab430c --- /dev/null +++ b/src/game/genetics/protein.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#ifndef ANTKEEPER_GENETICS_PROTEIN_HPP +#define ANTKEEPER_GENETICS_PROTEIN_HPP + +namespace genetics { +namespace protein { + + + +} // namespace protein +} // namespace genetics + +#endif // ANTKEEPER_GENETICS_PROTEIN_HPP diff --git a/src/game/genetics/sequence.hpp b/src/game/genetics/sequence.hpp new file mode 100644 index 0000000..5c6900f --- /dev/null +++ b/src/game/genetics/sequence.hpp @@ -0,0 +1,357 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP +#define ANTKEEPER_GENETICS_SEQUENCE_HPP + +#include "base.hpp" +#include "codon.hpp" +#include "translation-table.hpp" +#include +#include +#include + +namespace genetics { +namespace sequence { + +/** + * Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three. + * + * @tparam Iterator Sequence iterator type. + */ +template +struct orf +{ + /// Iterator to the first base of the start codon. + Iterator start; + + /// Iterator to the first base of the stop codon. + Iterator stop; +}; + +/** + * Exchanges elements between two ranges, starting at a random offset. + * + * @param first1,last1 First range of elements to crossover. + * @param first2 Beginning of the second range of elements to crossover. + * @param g Uniform random bit generator. + * @return Iterator to the start of the crossover in the second range. + */ +template +ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g); + +/** + * Exchanges elements between two ranges multiple times, starting at a random offset each time. + * + * @param first1,last1 First range of elements to crossover. + * @param first2 Beginning of the second range of elements to crossover. + * @param count Number of times to crossover. + * @param g Uniform random bit generator. + */ +template +void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g); + +/** + * Searches a sequence for an open reading frame (ORF). + * + * @param first,last Range of elements to search. + * @param table Genetic code translation table. + * @return First ORF in the sequence, or `{last, last}` if no ORF was found. + */ +template +orf find_orf(ForwardIt first, ForwardIt last, const translation_table& table); + +/** + * Applies the given function to a randomly selected element in a range. + * + * @param first,last Range of elements to mutate. + * @param unary_op Unary operation function object that will be applied. + * @param g Uniform random bit generator. + * @return Iterator to the mutated element. + */ +template +ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g); + +/** + * Applies the given function to a random selection of elements in a range. + * + * @param first,last Range of elements to mutate. + * @param count Number of elements to mutate. + * @param unary_op Unary operation function object that will be applied. + * @param g Uniform random bit generator. + */ +template +void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g); + +/** + * Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols. + * + * @param first,last Sequence of IUPAC base symbols to search. + * @param s_first,s_last Search string of IUPAC degenerate base symbols. + * @param stride Distance between consecutive searches. + * @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned. + */ +template +ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride); + +/** + * Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. + * + * @param first,last Range of elements to transcribe. + * @param d_first Beginning of the destination range. + * @return Output iterator to the element past the last element transcribed. + */ +template +OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); + +/** + * Translates a sequence of codons into amino acids. + * + * @param first,last Open reading frame. + * @param d_first Beginning of destination range. + * @param table Genetic code translation table. + * @return Output iterator to the element past the last element translated. + */ +template +OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table); + +namespace dna +{ + /** + * Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols. + * + * @param first,last Range of elements to complement. + * @param d_first Beginning of the destination range. + * @return Output iterator to the element past the last element complemented. + */ + template + OutputIt complement(InputIt first, InputIt last, OutputIt d_first); +} + +namespace rna +{ + /** + * Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols. + * + * @param first,last Range of elements to complement. + * @param d_first Beginning of the destination range. + * @return Output iterator to the element past the last element complemented. + */ + template + OutputIt complement(InputIt first, InputIt last, OutputIt d_first); +} + +template +ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g) +{ + typedef typename std::iterator_traits::difference_type difference_t; + std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); + difference_t pos = distribution(g); + std::advance(first1, pos); + std::advance(first2, pos); + std::swap_ranges(first1, last1, first2); + return first2; +} + +template +void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g) +{ + typedef typename std::iterator_traits::difference_type difference_t; + + std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1); + ForwardIt1 crossover1, crossover2; + + while (count) + { + crossover1 = first1; + crossover2 = first2; + + difference_t pos = distribution(g); + std::advance(crossover1, pos); + std::advance(crossover2, pos); + std::swap_ranges(crossover1, last1, crossover2); + + --count; + } +} + +template +orf find_orf(ForwardIt first, ForwardIt last, const translation_table& table) +{ + ForwardIt second; + ForwardIt third; + orf result; + + auto distance = std::distance(first, last); + + if (distance >= 3) + { + second = first; + ++second; + third = second; + ++third; + + do + { + if (codon::is_start(*first, *second, *third, table.starts)) + { + result.start = first; + distance -= 3; + break; + } + + first = second; + second = third; + ++third; + --distance; + } + while (third != last); + } + + for (; distance >= 3; distance -= 3) + { + first = ++third; + second = ++third; + ++third; + + if (codon::is_stop(*first, *second, *third, table.aas)) + { + result.stop = first; + return result; + } + } + + return {last, last}; +} + +template +ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g) +{ + typedef typename std::iterator_traits::difference_type difference_t; + + if (first == last) + return first; + + std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); + std::advance(first, distribution(g)); + *first = unary_op(*first); + + return first; +} + +template +void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g) +{ + typedef typename std::iterator_traits::difference_type difference_t; + + if (first == last) + return first; + + std::uniform_int_distribution distribution(0, std::distance(first, last) - 1); + ForwardIt mutation; + + while (count) + { + mutation = first; + std::advance(mutation, distribution(g)); + *mutation = unary_op(*mutation); + --count; + } +} + +template +ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride) +{ + for (auto distance = std::distance(first, last); distance > 0; distance -= stride) + { + ForwardIt1 it = first; + for (ForwardIt2 s_it = s_first; ; ++it, ++s_it) + { + if (s_it == s_last) + return first; + + if (it == last) + return last; + + if (!base::compare(*it, *s_it)) + break; + } + + if (distance > stride) + std::advance(first, stride); + } + + return last; +} + +template +inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) +{ + return std::transform(first, last, d_first, base::transcribe); +} + +template +OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table) +{ + auto length = std::distance(first, last); + + if (length >= 3) + { + InputIt second = first; + ++second; + InputIt third = second; + ++third; + + *(d_first++) = codon::translate(*first, *second, *third, table.starts); + + for (length -= 3; length >= 3; length -= 3) + { + first = ++third; + second = ++third; + ++third; + + *(d_first++) = codon::translate(*first, *second, *third, table.aas); + } + } + + return d_first; +} + +namespace dna +{ + template + inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) + { + return std::transform(first, last, d_first, base::dna::complement); + } +} + +namespace rna +{ + template + inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first) + { + return std::transform(first, last, d_first, base::rna::complement); + } +} + +} // namespace sequence +} // namespace genetics + +#endif // ANTKEEPER_GENETICS_SEQUENCE_HPP diff --git a/src/game/genetics/transcribe.hpp b/src/game/genetics/transcribe.hpp deleted file mode 100644 index b87da6e..0000000 --- a/src/game/genetics/transcribe.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2020 Christopher J. Howard - * - * This file is part of Antkeeper source code. - * - * Antkeeper source code is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Antkeeper source code is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Antkeeper source code. If not, see . - */ - -#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP -#define ANTKEEPER_DNA_TRANSCRIBE_HPP - -#include "nucleobase.hpp" -#include - -namespace dna { - -/** - * Transcribes a range of IUPAC degenerate base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`. - * - * @param first,last Range of elements to transcribe. - * @param d_first Beginning of the destination range. - * @return Output iterator to the element past the last element transcribed. - */ -template -OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first); - -template -OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first) -{ - return std::transform(first, last, d_first, base::transcribe); -} - -} // namespace dna - -#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP diff --git a/src/game/genetics/translate.hpp b/src/game/genetics/translate.hpp deleted file mode 100644 index 02ef693..0000000 --- a/src/game/genetics/translate.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2020 Christopher J. Howard - * - * This file is part of Antkeeper source code. - * - * Antkeeper source code is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Antkeeper source code is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Antkeeper source code. If not, see . - */ - -#ifndef ANTKEEPER_DNA_TRANSLATE_HPP -#define ANTKEEPER_DNA_TRANSLATE_HPP - -#include - -namespace dna { - -/** - * Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range. - * - * @param first,last Range of elements to translate. - * @param d_first Beginning of the destination range. - * @param n Number of elements by which to divide the range. - * @param binary_op Binary operation function object that will be applied to each subrange of @p n elements. - * @return Output iterator to the element past the last element translated. - */ -template -OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op); - -template -OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op) -{ - for (auto length = std::distance(first, last); length >= n; length -= n) - { - InputIt next = first; - std::advance(next, n); - *(d_first++) = binary_op(first, next); - first = next; - } - - return d_first; -} - -} // namespace dna - -#endif // ANTKEEPER_DNA_TRANSLATE_HPP diff --git a/src/game/genetics/translation-table.hpp b/src/game/genetics/translation-table.hpp new file mode 100644 index 0000000..7045ed0 --- /dev/null +++ b/src/game/genetics/translation-table.hpp @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2020 Christopher J. Howard + * + * This file is part of Antkeeper source code. + * + * Antkeeper source code is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Antkeeper source code is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Antkeeper source code. If not, see . + */ + +#ifndef ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP +#define ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP + +namespace genetics { + +/** + * Genetic code translation table. + * + * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi + */ +struct translation_table +{ + /// String of 64 IUPAC amino acid base symbols, in TCAG order. + const char* aas; + + /// String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than `-` and `*` indicate a start codon and its amino acid. + const char* starts; +}; + +/// Translation table for standard genetic code. +constexpr translation_table standard_code = +{ + "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", + "---M------**--*----M---------------M----------------------------", +}; + +} // namespace genetics + +#endif // ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP