diff --git a/src/game/genetics/nucleobase.cpp b/src/game/genetics/base.cpp
similarity index 68%
rename from src/game/genetics/nucleobase.cpp
rename to src/game/genetics/base.cpp
index 4c33858..d176b6a 100644
--- a/src/game/genetics/nucleobase.cpp
+++ b/src/game/genetics/base.cpp
@@ -17,10 +17,9 @@
* along with Antkeeper source code. If not, see .
*/
-#include "nucleobase.hpp"
-#include
+#include "base.hpp"
-namespace dna {
+namespace genetics {
namespace base {
/**
@@ -29,9 +28,9 @@ namespace base {
* @param symbol IUPAC degenerate base symbol.
* @return Bit mask representing the possible bases represented by the symbol.
*/
-static std::uint8_t decode(char symbol)
+static inline unsigned char decode(char symbol)
{
- static constexpr std::uint8_t bases[26] =
+ static constexpr unsigned char bases[25] =
{
0b0001, // A
0b1110, // B
@@ -58,34 +57,43 @@ static std::uint8_t decode(char symbol)
0b1001, // W
0, // X
0b1010, // Y
- 0, // Z
};
- return (symbol < 'A' || symbol > 'Z') ? 0 : bases[symbol - 'A'];
+ return (symbol < 'A' || symbol >= 'Z') ? 0 : bases[symbol - 'A'];
}
-char complement_rna(char symbol)
+int compare(char a, char b)
{
- static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZRZ";
- return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
+ static constexpr int popcount[16] =
+ {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
+ };
+
+ return popcount[decode(a) & decode(b)];
}
-char complement_dna(char symbol)
+char transcribe(char symbol)
{
- static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZRZ";
- return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
+ return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
}
-char transcribe(char symbol)
+namespace dna
{
- return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
+ char complement(char symbol)
+ {
+ static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZR";
+ return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
+ }
}
-int compare(char a, char b)
+namespace rna
{
- std::uint8_t bases = decode(a) & decode(b);
- return (bases & 1) + (bases >> 1 & 1) + (bases >> 2 & 1) + (bases >> 3 & 1);
+ char complement(char symbol)
+ {
+ static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZR";
+ return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
+ }
}
} // namespace base
-} // namespace dna
+} // namespace genetics
diff --git a/src/game/genetics/nucleobase.hpp b/src/game/genetics/base.hpp
similarity index 69%
rename from src/game/genetics/nucleobase.hpp
rename to src/game/genetics/base.hpp
index d99e31d..b0dfd51 100644
--- a/src/game/genetics/nucleobase.hpp
+++ b/src/game/genetics/base.hpp
@@ -17,27 +17,20 @@
* along with Antkeeper source code. If not, see .
*/
-#ifndef ANTKEEPER_DNA_NUCLEOBASE_HPP
-#define ANTKEEPER_DNA_NUCLEOBASE_HPP
+#ifndef ANTKEEPER_GENETICS_BASE_HPP
+#define ANTKEEPER_GENETICS_BASE_HPP
-namespace dna {
+namespace genetics {
namespace base {
/**
- * Returns the DNA complement of an IUPAC degenerate base symbol.
- *
- * @param symbol IUPAC degenerate base symbol.
- * @return IUPAC degenerate base symbol of DNA complement.
- */
-char complement_dna(char symbol);
-
-/**
- * Returns the RNA complement of an IUPAC degenerate base symbol.
+ * Returns the number of bases that are represented by both IUPAC degenerate base symbols.
*
- * @param symbol IUPAC degenerate base symbol.
- * @return IUPAC degenerate base symbol of RNA complement.
+ * @param a First IUPAC degenerate base symbol.
+ * @param b Second IUPAC degenerate base symbol.
+ * @return Number of bases represented by both symbols.
*/
-char complement_rna(char symbol);
+int compare(char a, char b);
/**
* Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping `T` for `U` or `U` for `T`.
@@ -47,16 +40,29 @@ char complement_rna(char symbol);
*/
char transcribe(char symbol);
-/**
- * Returns the number of bases that are represented by both IUPAC degenerate base symbols.
- *
- * @param a First IUPAC degenerate base symbol.
- * @param b Second IUPAC degenerate base symbol.
- * @return Number of bases represented by both symbols.
- */
-int compare(char a, char b);
+namespace dna
+{
+ /**
+ * Returns the DNA complement of an IUPAC degenerate base symbol.
+ *
+ * @param symbol IUPAC degenerate base symbol.
+ * @return IUPAC degenerate base symbol of DNA complement.
+ */
+ char complement(char symbol);
+}
+
+namespace rna
+{
+ /**
+ * Returns the RNA complement of an IUPAC degenerate base symbol.
+ *
+ * @param symbol IUPAC degenerate base symbol.
+ * @return IUPAC degenerate base symbol of RNA complement.
+ */
+ char complement(char symbol);
+}
} // namespace base
-} // namespace dna
+} // namespace genetics
-#endif // ANTKEEPER_DNA_NUCLEOBASE_HPP
+#endif // ANTKEEPER_GENETICS_BASE_HPP
diff --git a/src/game/genetics/codon.cpp b/src/game/genetics/codon.cpp
new file mode 100644
index 0000000..f938dad
--- /dev/null
+++ b/src/game/genetics/codon.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#include "codon.hpp"
+
+namespace genetics {
+namespace codon {
+
+/**
+ * Returns the index of a nucleobase for use with a translation table.
+ *
+ * @param base IUPAC code of nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @return Index of the nucleobase, or a negative value if a non-standard nucleobase was supplied.
+ */
+static inline int base_index(char base)
+{
+ switch (base)
+ {
+ case 'U':
+ case 'T':
+ return 0;
+ case 'C':
+ return 1;
+ case 'A':
+ return 2;
+ case 'G':
+ return 3;
+ }
+
+ return ~3;
+}
+
+/**
+ * Returns the index of a codon for use with a translation table.
+ *
+ * @param base1 IUPAC code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base2 IUPAC code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base3 IUPAC code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @return Index of codon, or a negative value if a non-standard nucleobase was supplied.
+ */
+static inline int codon_index(char base1, char base2, char base3)
+{
+ int i = base_index(base1);
+ int j = base_index(base2);
+ int k = base_index(base3);
+ return (i << 4) | (j << 2) | k;
+}
+
+inline char translate(char base1, char base2, char base3, const char* aas)
+{
+ int index = codon_index(base1, base2, base3);
+ if (index < 0)
+ return '-';
+ return aas[index];
+}
+
+bool is_start(char base1, char base2, char base3, const char* starts)
+{
+ char aa = translate(base1, base2, base3, starts);
+ return ((aa != '-') && (aa != '*'));
+}
+
+bool is_stop(char base1, char base2, char base3, const char* aas)
+{
+ char aa = translate(base1, base2, base3, aas);
+ return (aa == '*');
+}
+
+} // namspace codon
+} // namespace genetics
diff --git a/src/game/genetics/codon.hpp b/src/game/genetics/codon.hpp
new file mode 100644
index 0000000..5e799b0
--- /dev/null
+++ b/src/game/genetics/codon.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#ifndef ANTKEEPER_GENETICS_CODON_HPP
+#define ANTKEEPER_GENETICS_CODON_HPP
+
+namespace genetics {
+namespace codon {
+
+/**
+ * Returns `true` if a codon is a start codon.
+ *
+ * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param starts String of 64 IUPAC amino acid codes, ordered to match corresponding start codon indices.
+ * @return `true` if the codon is a start codon, `false` otherwise.
+ */
+bool is_start(char base1, char base2, char base3, const char* starts);
+
+/**
+ * Returns `true` if a codon is a stop codon.
+ *
+ * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices.
+ * @return `true` if the codon is a stop codon, `false` otherwise.
+ */
+bool is_stop(char base1, char base2, char base3, const char* aas);
+
+/**
+ * Translates a codon into an amino acid.
+ *
+ * @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
+ * @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices.
+ * @return IUPAC amino acid code of corresponding amino acid, or `-` if an invalid codon was supplied.
+ */
+char translate(char base1, char base2, char base3, const char* aas);
+
+} // namspace codon
+} // namespace genetics
+
+#endif // ANTKEEPER_GENETICS_CODON_HPP
diff --git a/src/game/genetics/crossover.hpp b/src/game/genetics/crossover.hpp
deleted file mode 100644
index 30a61a3..0000000
--- a/src/game/genetics/crossover.hpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2020 Christopher J. Howard
- *
- * This file is part of Antkeeper source code.
- *
- * Antkeeper source code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Antkeeper source code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Antkeeper source code. If not, see .
- */
-
-#ifndef ANTKEEPER_DNA_CROSSOVER_HPP
-#define ANTKEEPER_DNA_CROSSOVER_HPP
-
-#include
-#include
-#include
-
-namespace dna {
-
-/**
- * Exchanges elements between two ranges, starting at a random offset.
- *
- * @param first1,last1 First range of elements to crossover.
- * @param first2 Beginning of the second range of elements to crossover.
- * @param g Uniform random bit generator.
- * @return Iterator to the start of the crossover in the second range.
- */
-template
-ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
-
-/**
- * Exchanges elements between two ranges multiple times, starting at a random offset each time.
- *
- * @param first1,last1 First range of elements to crossover.
- * @param first2 Beginning of the second range of elements to crossover.
- * @param count Number of times to crossover.
- * @param g Uniform random bit generator.
- */
-template
-void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
-
-template
-ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
-{
- typedef typename std::iterator_traits::difference_type difference_t;
- std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1);
- difference_t pos = distribution(g);
- std::advance(first1, pos);
- std::advance(first2, pos);
- std::swap_ranges(first1, last1, first2);
- return first2;
-}
-
-template
-void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
-{
- typedef typename std::iterator_traits::difference_type difference_t;
-
- std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1);
- ForwardIt1 crossover1, crossover2;
-
- while (count)
- {
- crossover1 = first1;
- crossover2 = first2;
-
- difference_t pos = distribution(g);
- std::advance(crossover1, pos);
- std::advance(crossover2, pos);
- std::swap_ranges(crossover1, last1, crossover2);
-
- --count;
- }
-}
-
-} // namespace dna
-
-#endif // ANTKEEPER_DNA_CROSSOVER_HPP
diff --git a/src/game/genetics/frame.hpp b/src/game/genetics/frame.hpp
deleted file mode 100644
index 7bed2bb..0000000
--- a/src/game/genetics/frame.hpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (C) 2020 Christopher J. Howard
- *
- * This file is part of Antkeeper source code.
- *
- * Antkeeper source code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Antkeeper source code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Antkeeper source code. If not, see .
- */
-
-#ifndef ANTKEEPER_DNA_FRAME_HPP
-#define ANTKEEPER_DNA_FRAME_HPP
-
-#include
-
-namespace dna {
-
-/**
- * Finds the first start codon in a sequence.
- *
- * @param first,last Range of elements to search.
- * @param n Number of elements per codon.
- * @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
- * @return Iterator to the first element in the start codon, or @p last if no start codon was found.
- */
-template
-InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p)
-{
- auto length = std::distance(first, last);
-
- if (length >= n)
- {
- InputIt next = first;
- std::advance(next, n);
-
- do
- {
- if (p(first, next))
- return first;
-
- ++first;
- ++next;
- --length;
- }
- while (length >= n);
- }
-
- return last;
-}
-
-/**
- * Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p.
- *
- * @param first,last Range of elements to search.
- * @param n Number of elements in the sequence.
- * @param stride Number of elements between searches.
- * @param p
- */
-template
-InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p)
-{
- if (auto length = std::distance(first, last); length >= n)
- {
- Size offset = n + stride;
- InputIt next = first;
- std::advance(next, n);
-
- do
- {
- if (p(first, next))
- return first;
-
- if (length < offset)
- break;
-
- std::advance(first, stride);
- std::advance(next, stride);
- length -= offset;
- }
- while (1);
- }
-
- return last;
-}
-
-/**
- * Finds the first stop codon in a sequence.
- *
- * @param first,last Range of elements to search.
- * @param n Number of elements per codon.
- * @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
- * @return Iterator to the first element in the stop codon, or @p last if no stop codon was found.
- */
-template
-InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p)
-{
- for (auto length = std::distance(first, last); length >= n; length -= n)
- {
- InputIt next = first;
- std::advance(next, n);
- if (p(first, next))
- return first;
- first = next;
- }
-
- return last;
-}
-
-/**
- * Finds the first open reading frame (ORF) in a range of elements.
- *
- * @param[in,out] first Iterator to the beginning of the sequence, which will point to th
- *
- * @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
- * @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
- */
-template
-void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p)
-{
- first = find_start(first, last, n, start_p);
- if (first != last)
- last = find_stop(first, last, n, stop_p);
-}
-
-} // namespace dna
-
-#endif // ANTKEEPER_DNA_FRAME_HPP
diff --git a/src/game/genetics/genetics.hpp b/src/game/genetics/genetics.hpp
new file mode 100644
index 0000000..e4a7a78
--- /dev/null
+++ b/src/game/genetics/genetics.hpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#ifndef ANTKEEPER_GENETICS_HPP
+#define ANTKEEPER_GENETICS_HPP
+
+#include "base.hpp"
+#include "codon.hpp"
+#include "protein.hpp"
+#include "sequence.hpp"
+
+#endif // ANTKEEPER_GENETICS_HPP
diff --git a/src/game/genetics/mutate.hpp b/src/game/genetics/mutate.hpp
deleted file mode 100644
index 6f71dd0..0000000
--- a/src/game/genetics/mutate.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2020 Christopher J. Howard
- *
- * This file is part of Antkeeper source code.
- *
- * Antkeeper source code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Antkeeper source code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Antkeeper source code. If not, see .
- */
-
-#ifndef ANTKEEPER_DNA_MUTATE_HPP
-#define ANTKEEPER_DNA_MUTATE_HPP
-
-#include
-#include
-#include
-
-namespace dna {
-
-/**
- * Applies the given function to a randomly selected element in a range.
- *
- * @param first,last Range of elements to mutate.
- * @param unary_op Unary operation function object that will be applied.
- * @param g Uniform random bit generator.
- * @return Iterator to the mutated element.
- */
-template
-ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
-
-/**
- * Applies the given function to a random selection of elements in a range.
- *
- * @param first,last Range of elements to mutate.
- * @param count Number of elements to mutate.
- * @param unary_op Unary operation function object that will be applied.
- * @param g Uniform random bit generator.
- */
-template
-void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
-
-template
-ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
-{
- typedef typename std::iterator_traits::difference_type difference_t;
-
- std::uniform_int_distribution distribution(0, std::distance(first, last) - 1);
- std::advance(first, distribution(g));
- *first = unary_op(*first);
-
- return first;
-}
-
-template
-void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
-{
- typedef typename std::iterator_traits::difference_type difference_t;
-
- std::uniform_int_distribution distribution(0, std::distance(first, last) - 1);
- ForwardIt mutation;
-
- while (count)
- {
- mutation = first;
- std::advance(mutation, distribution(g));
- *mutation = unary_op(*mutation);
- --count;
- }
-}
-
-} // namespace dna
-
-#endif // ANTKEEPER_DNA_MUTATE_HPP
diff --git a/src/game/genetics/protein.hpp b/src/game/genetics/protein.hpp
new file mode 100644
index 0000000..eab430c
--- /dev/null
+++ b/src/game/genetics/protein.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#ifndef ANTKEEPER_GENETICS_PROTEIN_HPP
+#define ANTKEEPER_GENETICS_PROTEIN_HPP
+
+namespace genetics {
+namespace protein {
+
+
+
+} // namespace protein
+} // namespace genetics
+
+#endif // ANTKEEPER_GENETICS_PROTEIN_HPP
diff --git a/src/game/genetics/sequence.hpp b/src/game/genetics/sequence.hpp
new file mode 100644
index 0000000..5c6900f
--- /dev/null
+++ b/src/game/genetics/sequence.hpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP
+#define ANTKEEPER_GENETICS_SEQUENCE_HPP
+
+#include "base.hpp"
+#include "codon.hpp"
+#include "translation-table.hpp"
+#include
+#include
+#include
+
+namespace genetics {
+namespace sequence {
+
+/**
+ * Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three.
+ *
+ * @tparam Iterator Sequence iterator type.
+ */
+template
+struct orf
+{
+ /// Iterator to the first base of the start codon.
+ Iterator start;
+
+ /// Iterator to the first base of the stop codon.
+ Iterator stop;
+};
+
+/**
+ * Exchanges elements between two ranges, starting at a random offset.
+ *
+ * @param first1,last1 First range of elements to crossover.
+ * @param first2 Beginning of the second range of elements to crossover.
+ * @param g Uniform random bit generator.
+ * @return Iterator to the start of the crossover in the second range.
+ */
+template
+ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
+
+/**
+ * Exchanges elements between two ranges multiple times, starting at a random offset each time.
+ *
+ * @param first1,last1 First range of elements to crossover.
+ * @param first2 Beginning of the second range of elements to crossover.
+ * @param count Number of times to crossover.
+ * @param g Uniform random bit generator.
+ */
+template
+void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
+
+/**
+ * Searches a sequence for an open reading frame (ORF).
+ *
+ * @param first,last Range of elements to search.
+ * @param table Genetic code translation table.
+ * @return First ORF in the sequence, or `{last, last}` if no ORF was found.
+ */
+template
+orf find_orf(ForwardIt first, ForwardIt last, const translation_table& table);
+
+/**
+ * Applies the given function to a randomly selected element in a range.
+ *
+ * @param first,last Range of elements to mutate.
+ * @param unary_op Unary operation function object that will be applied.
+ * @param g Uniform random bit generator.
+ * @return Iterator to the mutated element.
+ */
+template
+ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
+
+/**
+ * Applies the given function to a random selection of elements in a range.
+ *
+ * @param first,last Range of elements to mutate.
+ * @param count Number of elements to mutate.
+ * @param unary_op Unary operation function object that will be applied.
+ * @param g Uniform random bit generator.
+ */
+template
+void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
+
+/**
+ * Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols.
+ *
+ * @param first,last Sequence of IUPAC base symbols to search.
+ * @param s_first,s_last Search string of IUPAC degenerate base symbols.
+ * @param stride Distance between consecutive searches.
+ * @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned.
+ */
+template
+ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride);
+
+/**
+ * Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`.
+ *
+ * @param first,last Range of elements to transcribe.
+ * @param d_first Beginning of the destination range.
+ * @return Output iterator to the element past the last element transcribed.
+ */
+template
+OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
+
+/**
+ * Translates a sequence of codons into amino acids.
+ *
+ * @param first,last Open reading frame.
+ * @param d_first Beginning of destination range.
+ * @param table Genetic code translation table.
+ * @return Output iterator to the element past the last element translated.
+ */
+template
+OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table);
+
+namespace dna
+{
+ /**
+ * Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols.
+ *
+ * @param first,last Range of elements to complement.
+ * @param d_first Beginning of the destination range.
+ * @return Output iterator to the element past the last element complemented.
+ */
+ template
+ OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
+}
+
+namespace rna
+{
+ /**
+ * Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols.
+ *
+ * @param first,last Range of elements to complement.
+ * @param d_first Beginning of the destination range.
+ * @return Output iterator to the element past the last element complemented.
+ */
+ template
+ OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
+}
+
+template
+ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
+{
+ typedef typename std::iterator_traits::difference_type difference_t;
+ std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1);
+ difference_t pos = distribution(g);
+ std::advance(first1, pos);
+ std::advance(first2, pos);
+ std::swap_ranges(first1, last1, first2);
+ return first2;
+}
+
+template
+void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
+{
+ typedef typename std::iterator_traits::difference_type difference_t;
+
+ std::uniform_int_distribution distribution(0, std::distance(first1, last1) - 1);
+ ForwardIt1 crossover1, crossover2;
+
+ while (count)
+ {
+ crossover1 = first1;
+ crossover2 = first2;
+
+ difference_t pos = distribution(g);
+ std::advance(crossover1, pos);
+ std::advance(crossover2, pos);
+ std::swap_ranges(crossover1, last1, crossover2);
+
+ --count;
+ }
+}
+
+template
+orf find_orf(ForwardIt first, ForwardIt last, const translation_table& table)
+{
+ ForwardIt second;
+ ForwardIt third;
+ orf result;
+
+ auto distance = std::distance(first, last);
+
+ if (distance >= 3)
+ {
+ second = first;
+ ++second;
+ third = second;
+ ++third;
+
+ do
+ {
+ if (codon::is_start(*first, *second, *third, table.starts))
+ {
+ result.start = first;
+ distance -= 3;
+ break;
+ }
+
+ first = second;
+ second = third;
+ ++third;
+ --distance;
+ }
+ while (third != last);
+ }
+
+ for (; distance >= 3; distance -= 3)
+ {
+ first = ++third;
+ second = ++third;
+ ++third;
+
+ if (codon::is_stop(*first, *second, *third, table.aas))
+ {
+ result.stop = first;
+ return result;
+ }
+ }
+
+ return {last, last};
+}
+
+template
+ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
+{
+ typedef typename std::iterator_traits::difference_type difference_t;
+
+ if (first == last)
+ return first;
+
+ std::uniform_int_distribution distribution(0, std::distance(first, last) - 1);
+ std::advance(first, distribution(g));
+ *first = unary_op(*first);
+
+ return first;
+}
+
+template
+void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
+{
+ typedef typename std::iterator_traits::difference_type difference_t;
+
+ if (first == last)
+ return first;
+
+ std::uniform_int_distribution distribution(0, std::distance(first, last) - 1);
+ ForwardIt mutation;
+
+ while (count)
+ {
+ mutation = first;
+ std::advance(mutation, distribution(g));
+ *mutation = unary_op(*mutation);
+ --count;
+ }
+}
+
+template
+ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits::difference_type stride)
+{
+ for (auto distance = std::distance(first, last); distance > 0; distance -= stride)
+ {
+ ForwardIt1 it = first;
+ for (ForwardIt2 s_it = s_first; ; ++it, ++s_it)
+ {
+ if (s_it == s_last)
+ return first;
+
+ if (it == last)
+ return last;
+
+ if (!base::compare(*it, *s_it))
+ break;
+ }
+
+ if (distance > stride)
+ std::advance(first, stride);
+ }
+
+ return last;
+}
+
+template
+inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
+{
+ return std::transform(first, last, d_first, base::transcribe);
+}
+
+template
+OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table)
+{
+ auto length = std::distance(first, last);
+
+ if (length >= 3)
+ {
+ InputIt second = first;
+ ++second;
+ InputIt third = second;
+ ++third;
+
+ *(d_first++) = codon::translate(*first, *second, *third, table.starts);
+
+ for (length -= 3; length >= 3; length -= 3)
+ {
+ first = ++third;
+ second = ++third;
+ ++third;
+
+ *(d_first++) = codon::translate(*first, *second, *third, table.aas);
+ }
+ }
+
+ return d_first;
+}
+
+namespace dna
+{
+ template
+ inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
+ {
+ return std::transform(first, last, d_first, base::dna::complement);
+ }
+}
+
+namespace rna
+{
+ template
+ inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
+ {
+ return std::transform(first, last, d_first, base::rna::complement);
+ }
+}
+
+} // namespace sequence
+} // namespace genetics
+
+#endif // ANTKEEPER_GENETICS_SEQUENCE_HPP
diff --git a/src/game/genetics/transcribe.hpp b/src/game/genetics/transcribe.hpp
deleted file mode 100644
index b87da6e..0000000
--- a/src/game/genetics/transcribe.hpp
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2020 Christopher J. Howard
- *
- * This file is part of Antkeeper source code.
- *
- * Antkeeper source code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Antkeeper source code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Antkeeper source code. If not, see .
- */
-
-#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP
-#define ANTKEEPER_DNA_TRANSCRIBE_HPP
-
-#include "nucleobase.hpp"
-#include
-
-namespace dna {
-
-/**
- * Transcribes a range of IUPAC degenerate base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`.
- *
- * @param first,last Range of elements to transcribe.
- * @param d_first Beginning of the destination range.
- * @return Output iterator to the element past the last element transcribed.
- */
-template
-OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
-
-template
-OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
-{
- return std::transform(first, last, d_first, base::transcribe);
-}
-
-} // namespace dna
-
-#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP
diff --git a/src/game/genetics/translate.hpp b/src/game/genetics/translate.hpp
deleted file mode 100644
index 02ef693..0000000
--- a/src/game/genetics/translate.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2020 Christopher J. Howard
- *
- * This file is part of Antkeeper source code.
- *
- * Antkeeper source code is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Antkeeper source code is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Antkeeper source code. If not, see .
- */
-
-#ifndef ANTKEEPER_DNA_TRANSLATE_HPP
-#define ANTKEEPER_DNA_TRANSLATE_HPP
-
-#include
-
-namespace dna {
-
-/**
- * Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range.
- *
- * @param first,last Range of elements to translate.
- * @param d_first Beginning of the destination range.
- * @param n Number of elements by which to divide the range.
- * @param binary_op Binary operation function object that will be applied to each subrange of @p n elements.
- * @return Output iterator to the element past the last element translated.
- */
-template
-OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op);
-
-template
-OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op)
-{
- for (auto length = std::distance(first, last); length >= n; length -= n)
- {
- InputIt next = first;
- std::advance(next, n);
- *(d_first++) = binary_op(first, next);
- first = next;
- }
-
- return d_first;
-}
-
-} // namespace dna
-
-#endif // ANTKEEPER_DNA_TRANSLATE_HPP
diff --git a/src/game/genetics/translation-table.hpp b/src/game/genetics/translation-table.hpp
new file mode 100644
index 0000000..7045ed0
--- /dev/null
+++ b/src/game/genetics/translation-table.hpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2020 Christopher J. Howard
+ *
+ * This file is part of Antkeeper source code.
+ *
+ * Antkeeper source code is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Antkeeper source code is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Antkeeper source code. If not, see .
+ */
+
+#ifndef ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP
+#define ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP
+
+namespace genetics {
+
+/**
+ * Genetic code translation table.
+ *
+ * @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
+ */
+struct translation_table
+{
+ /// String of 64 IUPAC amino acid base symbols, in TCAG order.
+ const char* aas;
+
+ /// String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than `-` and `*` indicate a start codon and its amino acid.
+ const char* starts;
+};
+
+/// Translation table for standard genetic code.
+constexpr translation_table standard_code =
+{
+ "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
+ "---M------**--*----M---------------M----------------------------",
+};
+
+} // namespace genetics
+
+#endif // ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP