Browse Source

Rename dna namespace to genetics, and revise genetic algorithms

master
C. J. Howard 3 years ago
parent
commit
4760fc1920
13 changed files with 670 additions and 450 deletions
  1. +27
    -19
      src/game/genetics/base.cpp
  2. +31
    -25
      src/game/genetics/base.hpp
  3. +86
    -0
      src/game/genetics/codon.cpp
  4. +62
    -0
      src/game/genetics/codon.hpp
  5. +0
    -87
      src/game/genetics/crossover.hpp
  6. +0
    -136
      src/game/genetics/frame.hpp
  7. +28
    -0
      src/game/genetics/genetics.hpp
  8. +0
    -82
      src/game/genetics/mutate.hpp
  9. +31
    -0
      src/game/genetics/protein.hpp
  10. +357
    -0
      src/game/genetics/sequence.hpp
  11. +0
    -46
      src/game/genetics/transcribe.hpp
  12. +0
    -55
      src/game/genetics/translate.hpp
  13. +48
    -0
      src/game/genetics/translation-table.hpp

src/game/genetics/nucleobase.cpp → src/game/genetics/base.cpp View File

@ -17,10 +17,9 @@
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "nucleobase.hpp"
#include <cstdint>
#include "base.hpp"
namespace dna {
namespace genetics {
namespace base { namespace base {
/** /**
@ -29,9 +28,9 @@ namespace base {
* @param symbol IUPAC degenerate base symbol. * @param symbol IUPAC degenerate base symbol.
* @return Bit mask representing the possible bases represented by the symbol. * @return Bit mask representing the possible bases represented by the symbol.
*/ */
static std::uint8_t decode(char symbol)
static inline unsigned char decode(char symbol)
{ {
static constexpr std::uint8_t bases[26] =
static constexpr unsigned char bases[25] =
{ {
0b0001, // A 0b0001, // A
0b1110, // B 0b1110, // B
@ -58,34 +57,43 @@ static std::uint8_t decode(char symbol)
0b1001, // W 0b1001, // W
0, // X 0, // X
0b1010, // Y 0b1010, // Y
0, // Z
}; };
return (symbol < 'A' || symbol > 'Z') ? 0 : bases[symbol - 'A'];
return (symbol < 'A' || symbol >= 'Z') ? 0 : bases[symbol - 'A'];
} }
char complement_rna(char symbol)
int compare(char a, char b)
{ {
static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZRZ";
return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
static constexpr int popcount[16] =
{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
};
return popcount[decode(a) & decode(b)];
} }
char complement_dna(char symbol)
char transcribe(char symbol)
{ {
static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZRZ";
return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
} }
char transcribe(char symbol)
namespace dna
{ {
return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
char complement(char symbol)
{
static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZR";
return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
}
} }
int compare(char a, char b)
namespace rna
{ {
std::uint8_t bases = decode(a) & decode(b);
return (bases & 1) + (bases >> 1 & 1) + (bases >> 2 & 1) + (bases >> 3 & 1);
char complement(char symbol)
{
static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZR";
return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
}
} }
} // namespace base } // namespace base
} // namespace dna
} // namespace genetics

src/game/genetics/nucleobase.hpp → src/game/genetics/base.hpp View File

@ -17,27 +17,20 @@
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifndef ANTKEEPER_DNA_NUCLEOBASE_HPP
#define ANTKEEPER_DNA_NUCLEOBASE_HPP
#ifndef ANTKEEPER_GENETICS_BASE_HPP
#define ANTKEEPER_GENETICS_BASE_HPP
namespace dna {
namespace genetics {
namespace base { namespace base {
/** /**
* Returns the DNA complement of an IUPAC degenerate base symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of DNA complement.
*/
char complement_dna(char symbol);
/**
* Returns the RNA complement of an IUPAC degenerate base symbol.
* Returns the number of bases that are represented by both IUPAC degenerate base symbols.
* *
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of RNA complement.
* @param a First IUPAC degenerate base symbol.
* @param b Second IUPAC degenerate base symbol.
* @return Number of bases represented by both symbols.
*/ */
char complement_rna(char symbol);
int compare(char a, char b);
/** /**
* Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping `T` for `U` or `U` for `T`. * Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping `T` for `U` or `U` for `T`.
@ -47,16 +40,29 @@ char complement_rna(char symbol);
*/ */
char transcribe(char symbol); char transcribe(char symbol);
/**
* Returns the number of bases that are represented by both IUPAC degenerate base symbols.
*
* @param a First IUPAC degenerate base symbol.
* @param b Second IUPAC degenerate base symbol.
* @return Number of bases represented by both symbols.
*/
int compare(char a, char b);
namespace dna
{
/**
* Returns the DNA complement of an IUPAC degenerate base symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of DNA complement.
*/
char complement(char symbol);
}
namespace rna
{
/**
* Returns the RNA complement of an IUPAC degenerate base symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of RNA complement.
*/
char complement(char symbol);
}
} // namespace base } // namespace base
} // namespace dna
} // namespace genetics
#endif // ANTKEEPER_DNA_NUCLEOBASE_HPP
#endif // ANTKEEPER_GENETICS_BASE_HPP

+ 86
- 0
src/game/genetics/codon.cpp View File

@ -0,0 +1,86 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#include "codon.hpp"
namespace genetics {
namespace codon {
/**
* Returns the index of a nucleobase for use with a translation table.
*
* @param base IUPAC code of nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @return Index of the nucleobase, or a negative value if a non-standard nucleobase was supplied.
*/
static inline int base_index(char base)
{
switch (base)
{
case 'U':
case 'T':
return 0;
case 'C':
return 1;
case 'A':
return 2;
case 'G':
return 3;
}
return ~3;
}
/**
* Returns the index of a codon for use with a translation table.
*
* @param base1 IUPAC code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base2 IUPAC code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base3 IUPAC code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @return Index of codon, or a negative value if a non-standard nucleobase was supplied.
*/
static inline int codon_index(char base1, char base2, char base3)
{
int i = base_index(base1);
int j = base_index(base2);
int k = base_index(base3);
return (i << 4) | (j << 2) | k;
}
inline char translate(char base1, char base2, char base3, const char* aas)
{
int index = codon_index(base1, base2, base3);
if (index < 0)
return '-';
return aas[index];
}
bool is_start(char base1, char base2, char base3, const char* starts)
{
char aa = translate(base1, base2, base3, starts);
return ((aa != '-') && (aa != '*'));
}
bool is_stop(char base1, char base2, char base3, const char* aas)
{
char aa = translate(base1, base2, base3, aas);
return (aa == '*');
}
} // namspace codon
} // namespace genetics

+ 62
- 0
src/game/genetics/codon.hpp View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_GENETICS_CODON_HPP
#define ANTKEEPER_GENETICS_CODON_HPP
namespace genetics {
namespace codon {
/**
* Returns `true` if a codon is a start codon.
*
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param starts String of 64 IUPAC amino acid codes, ordered to match corresponding start codon indices.
* @return `true` if the codon is a start codon, `false` otherwise.
*/
bool is_start(char base1, char base2, char base3, const char* starts);
/**
* Returns `true` if a codon is a stop codon.
*
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices.
* @return `true` if the codon is a stop codon, `false` otherwise.
*/
bool is_stop(char base1, char base2, char base3, const char* aas);
/**
* Translates a codon into an amino acid.
*
* @param base1 IUPAC base code of first nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base2 IUPAC base code of second nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param base3 IUPAC base code of third nucleobase, either `U`, `T`, `C`, `A`, or `G`.
* @param aas String of 64 IUPAC amino acid codes, ordered to match corresponding codon indices.
* @return IUPAC amino acid code of corresponding amino acid, or `-` if an invalid codon was supplied.
*/
char translate(char base1, char base2, char base3, const char* aas);
} // namspace codon
} // namespace genetics
#endif // ANTKEEPER_GENETICS_CODON_HPP

+ 0
- 87
src/game/genetics/crossover.hpp View File

@ -1,87 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_CROSSOVER_HPP
#define ANTKEEPER_DNA_CROSSOVER_HPP
#include <algorithm>
#include <iterator>
#include <random>
namespace dna {
/**
* Exchanges elements between two ranges, starting at a random offset.
*
* @param first1,last1 First range of elements to crossover.
* @param first2 Beginning of the second range of elements to crossover.
* @param g Uniform random bit generator.
* @return Iterator to the start of the crossover in the second range.
*/
template <class ForwardIt1, class ForwardIt2, class URBG>
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
/**
* Exchanges elements between two ranges multiple times, starting at a random offset each time.
*
* @param first1,last1 First range of elements to crossover.
* @param first2 Beginning of the second range of elements to crossover.
* @param count Number of times to crossover.
* @param g Uniform random bit generator.
*/
template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
template <class ForwardIt1, class ForwardIt2, class URBG>
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
difference_t pos = distribution(g);
std::advance(first1, pos);
std::advance(first2, pos);
std::swap_ranges(first1, last1, first2);
return first2;
}
template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
ForwardIt1 crossover1, crossover2;
while (count)
{
crossover1 = first1;
crossover2 = first2;
difference_t pos = distribution(g);
std::advance(crossover1, pos);
std::advance(crossover2, pos);
std::swap_ranges(crossover1, last1, crossover2);
--count;
}
}
} // namespace dna
#endif // ANTKEEPER_DNA_CROSSOVER_HPP

+ 0
- 136
src/game/genetics/frame.hpp View File

@ -1,136 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_FRAME_HPP
#define ANTKEEPER_DNA_FRAME_HPP
#include <iterator>
namespace dna {
/**
* Finds the first start codon in a sequence.
*
* @param first,last Range of elements to search.
* @param n Number of elements per codon.
* @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
* @return Iterator to the first element in the start codon, or @p last if no start codon was found.
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p)
{
auto length = std::distance(first, last);
if (length >= n)
{
InputIt next = first;
std::advance(next, n);
do
{
if (p(first, next))
return first;
++first;
++next;
--length;
}
while (length >= n);
}
return last;
}
/**
* Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p.
*
* @param first,last Range of elements to search.
* @param n Number of elements in the sequence.
* @param stride Number of elements between searches.
* @param p
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p)
{
if (auto length = std::distance(first, last); length >= n)
{
Size offset = n + stride;
InputIt next = first;
std::advance(next, n);
do
{
if (p(first, next))
return first;
if (length < offset)
break;
std::advance(first, stride);
std::advance(next, stride);
length -= offset;
}
while (1);
}
return last;
}
/**
* Finds the first stop codon in a sequence.
*
* @param first,last Range of elements to search.
* @param n Number of elements per codon.
* @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
* @return Iterator to the first element in the stop codon, or @p last if no stop codon was found.
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p)
{
for (auto length = std::distance(first, last); length >= n; length -= n)
{
InputIt next = first;
std::advance(next, n);
if (p(first, next))
return first;
first = next;
}
return last;
}
/**
* Finds the first open reading frame (ORF) in a range of elements.
*
* @param[in,out] first Iterator to the beginning of the sequence, which will point to th
*
* @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
* @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
*/
template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2>
void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p)
{
first = find_start(first, last, n, start_p);
if (first != last)
last = find_stop(first, last, n, stop_p);
}
} // namespace dna
#endif // ANTKEEPER_DNA_FRAME_HPP

+ 28
- 0
src/game/genetics/genetics.hpp View File

@ -0,0 +1,28 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_GENETICS_HPP
#define ANTKEEPER_GENETICS_HPP
#include "base.hpp"
#include "codon.hpp"
#include "protein.hpp"
#include "sequence.hpp"
#endif // ANTKEEPER_GENETICS_HPP

+ 0
- 82
src/game/genetics/mutate.hpp View File

@ -1,82 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_MUTATE_HPP
#define ANTKEEPER_DNA_MUTATE_HPP
#include <algorithm>
#include <iterator>
#include <random>
namespace dna {
/**
* Applies the given function to a randomly selected element in a range.
*
* @param first,last Range of elements to mutate.
* @param unary_op Unary operation function object that will be applied.
* @param g Uniform random bit generator.
* @return Iterator to the mutated element.
*/
template <class ForwardIt, class UnaryOperation, class URBG>
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
/**
* Applies the given function to a random selection of elements in a range.
*
* @param first,last Range of elements to mutate.
* @param count Number of elements to mutate.
* @param unary_op Unary operation function object that will be applied.
* @param g Uniform random bit generator.
*/
template <class ForwardIt, class Size, class UnaryOperation, class URBG>
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
template <class ForwardIt, class UnaryOperation, class URBG>
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
std::advance(first, distribution(g));
*first = unary_op(*first);
return first;
}
template <class ForwardIt, class Size, class UnaryOperation, class URBG>
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
ForwardIt mutation;
while (count)
{
mutation = first;
std::advance(mutation, distribution(g));
*mutation = unary_op(*mutation);
--count;
}
}
} // namespace dna
#endif // ANTKEEPER_DNA_MUTATE_HPP

+ 31
- 0
src/game/genetics/protein.hpp View File

@ -0,0 +1,31 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_GENETICS_PROTEIN_HPP
#define ANTKEEPER_GENETICS_PROTEIN_HPP
namespace genetics {
namespace protein {
} // namespace protein
} // namespace genetics
#endif // ANTKEEPER_GENETICS_PROTEIN_HPP

+ 357
- 0
src/game/genetics/sequence.hpp View File

@ -0,0 +1,357 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_GENETICS_SEQUENCE_HPP
#define ANTKEEPER_GENETICS_SEQUENCE_HPP
#include "base.hpp"
#include "codon.hpp"
#include "translation-table.hpp"
#include <algorithm>
#include <iterator>
#include <random>
namespace genetics {
namespace sequence {
/**
* Open reading frame (ORF), defined by a start codon and stop codon, with the distance between divisible by three.
*
* @tparam Iterator Sequence iterator type.
*/
template <class Iterator>
struct orf
{
/// Iterator to the first base of the start codon.
Iterator start;
/// Iterator to the first base of the stop codon.
Iterator stop;
};
/**
* Exchanges elements between two ranges, starting at a random offset.
*
* @param first1,last1 First range of elements to crossover.
* @param first2 Beginning of the second range of elements to crossover.
* @param g Uniform random bit generator.
* @return Iterator to the start of the crossover in the second range.
*/
template <class ForwardIt1, class ForwardIt2, class URBG>
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g);
/**
* Exchanges elements between two ranges multiple times, starting at a random offset each time.
*
* @param first1,last1 First range of elements to crossover.
* @param first2 Beginning of the second range of elements to crossover.
* @param count Number of times to crossover.
* @param g Uniform random bit generator.
*/
template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g);
/**
* Searches a sequence for an open reading frame (ORF).
*
* @param first,last Range of elements to search.
* @param table Genetic code translation table.
* @return First ORF in the sequence, or `{last, last}` if no ORF was found.
*/
template <class ForwardIt>
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table);
/**
* Applies the given function to a randomly selected element in a range.
*
* @param first,last Range of elements to mutate.
* @param unary_op Unary operation function object that will be applied.
* @param g Uniform random bit generator.
* @return Iterator to the mutated element.
*/
template <class ForwardIt, class UnaryOperation, class URBG>
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g);
/**
* Applies the given function to a random selection of elements in a range.
*
* @param first,last Range of elements to mutate.
* @param count Number of elements to mutate.
* @param unary_op Unary operation function object that will be applied.
* @param g Uniform random bit generator.
*/
template <class ForwardIt, class Size, class UnaryOperation, class URBG>
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g);
/**
* Searches a sequence of IUPAC base symbols for a pattern matching a search string of IUPAC degenerate base symbols.
*
* @param first,last Sequence of IUPAC base symbols to search.
* @param s_first,s_last Search string of IUPAC degenerate base symbols.
* @param stride Distance between consecutive searches.
* @return Iterator to the beginning of the first subsequence matching `[s_first, s_last)` in the sequence `[first, last)`. If no such occurrence is found, @p last is returned.
*/
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride);
/**
* Transcribes a sequence of IUPAC base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`.
*
* @param first,last Range of elements to transcribe.
* @param d_first Beginning of the destination range.
* @return Output iterator to the element past the last element transcribed.
*/
template <class InputIt, class OutputIt>
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
/**
* Translates a sequence of codons into amino acids.
*
* @param first,last Open reading frame.
* @param d_first Beginning of destination range.
* @param table Genetic code translation table.
* @return Output iterator to the element past the last element translated.
*/
template <class InputIt, class OutputIt>
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table);
namespace dna
{
/**
* Generates the complementary sequence for a sequence of IUPAC degenerate DNA base symbols.
*
* @param first,last Range of elements to complement.
* @param d_first Beginning of the destination range.
* @return Output iterator to the element past the last element complemented.
*/
template <class InputIt, class OutputIt>
OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
}
namespace rna
{
/**
* Generates the complementary sequence for a sequence of IUPAC degenerate RNA base symbols.
*
* @param first,last Range of elements to complement.
* @param d_first Beginning of the destination range.
* @return Output iterator to the element past the last element complemented.
*/
template <class InputIt, class OutputIt>
OutputIt complement(InputIt first, InputIt last, OutputIt d_first);
}
template <class ForwardIt1, class ForwardIt2, class URBG>
ForwardIt2 crossover(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
difference_t pos = distribution(g);
std::advance(first1, pos);
std::advance(first2, pos);
std::swap_ranges(first1, last1, first2);
return first2;
}
template <class ForwardIt1, class ForwardIt2, class Size, class URBG>
void crossover_n(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, Size count, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt1>::difference_type difference_t;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first1, last1) - 1);
ForwardIt1 crossover1, crossover2;
while (count)
{
crossover1 = first1;
crossover2 = first2;
difference_t pos = distribution(g);
std::advance(crossover1, pos);
std::advance(crossover2, pos);
std::swap_ranges(crossover1, last1, crossover2);
--count;
}
}
template <class ForwardIt>
orf<ForwardIt> find_orf(ForwardIt first, ForwardIt last, const translation_table& table)
{
ForwardIt second;
ForwardIt third;
orf<ForwardIt> result;
auto distance = std::distance(first, last);
if (distance >= 3)
{
second = first;
++second;
third = second;
++third;
do
{
if (codon::is_start(*first, *second, *third, table.starts))
{
result.start = first;
distance -= 3;
break;
}
first = second;
second = third;
++third;
--distance;
}
while (third != last);
}
for (; distance >= 3; distance -= 3)
{
first = ++third;
second = ++third;
++third;
if (codon::is_stop(*first, *second, *third, table.aas))
{
result.stop = first;
return result;
}
}
return {last, last};
}
template <class ForwardIt, class UnaryOperation, class URBG>
ForwardIt mutate(ForwardIt first, ForwardIt last, UnaryOperation unary_op, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
if (first == last)
return first;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
std::advance(first, distribution(g));
*first = unary_op(*first);
return first;
}
template <class ForwardIt, class Size, class UnaryOperation, class URBG>
void mutate_n(ForwardIt first, ForwardIt last, Size count, UnaryOperation unary_op, URBG&& g)
{
typedef typename std::iterator_traits<ForwardIt>::difference_type difference_t;
if (first == last)
return first;
std::uniform_int_distribution<difference_t> distribution(0, std::distance(first, last) - 1);
ForwardIt mutation;
while (count)
{
mutation = first;
std::advance(mutation, distribution(g));
*mutation = unary_op(*mutation);
--count;
}
}
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 search(ForwardIt1 first, ForwardIt1 last, ForwardIt2 s_first, ForwardIt2 s_last, typename std::iterator_traits<ForwardIt1>::difference_type stride)
{
for (auto distance = std::distance(first, last); distance > 0; distance -= stride)
{
ForwardIt1 it = first;
for (ForwardIt2 s_it = s_first; ; ++it, ++s_it)
{
if (s_it == s_last)
return first;
if (it == last)
return last;
if (!base::compare(*it, *s_it))
break;
}
if (distance > stride)
std::advance(first, stride);
}
return last;
}
template <class InputIt, class OutputIt>
inline OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
{
return std::transform(first, last, d_first, base::transcribe);
}
template <class InputIt, class OutputIt>
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, const translation_table& table)
{
auto length = std::distance(first, last);
if (length >= 3)
{
InputIt second = first;
++second;
InputIt third = second;
++third;
*(d_first++) = codon::translate(*first, *second, *third, table.starts);
for (length -= 3; length >= 3; length -= 3)
{
first = ++third;
second = ++third;
++third;
*(d_first++) = codon::translate(*first, *second, *third, table.aas);
}
}
return d_first;
}
namespace dna
{
template <class InputIt, class OutputIt>
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
{
return std::transform(first, last, d_first, base::dna::complement);
}
}
namespace rna
{
template <class InputIt, class OutputIt>
inline OutputIt complement(InputIt first, InputIt last, OutputIt d_first)
{
return std::transform(first, last, d_first, base::rna::complement);
}
}
} // namespace sequence
} // namespace genetics
#endif // ANTKEEPER_GENETICS_SEQUENCE_HPP

+ 0
- 46
src/game/genetics/transcribe.hpp View File

@ -1,46 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP
#define ANTKEEPER_DNA_TRANSCRIBE_HPP
#include "nucleobase.hpp"
#include <algorithm>
namespace dna {
/**
* Transcribes a range of IUPAC degenerate base symbols between DNA and RNA, swapping `T` for `U` or `U` for `T`.
*
* @param first,last Range of elements to transcribe.
* @param d_first Beginning of the destination range.
* @return Output iterator to the element past the last element transcribed.
*/
template <class InputIt, class OutputIt>
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first);
template <class InputIt, class OutputIt>
OutputIt transcribe(InputIt first, InputIt last, OutputIt d_first)
{
return std::transform(first, last, d_first, base::transcribe);
}
} // namespace dna
#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP

+ 0
- 55
src/game/genetics/translate.hpp View File

@ -1,55 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_TRANSLATE_HPP
#define ANTKEEPER_DNA_TRANSLATE_HPP
#include <iterator>
namespace dna {
/**
* Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range.
*
* @param first,last Range of elements to translate.
* @param d_first Beginning of the destination range.
* @param n Number of elements by which to divide the range.
* @param binary_op Binary operation function object that will be applied to each subrange of @p n elements.
* @return Output iterator to the element past the last element translated.
*/
template <class InputIt, class OutputIt, class Size, class BinaryOperation>
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op);
template <class InputIt, class OutputIt, class Size, class BinaryOperation>
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op)
{
for (auto length = std::distance(first, last); length >= n; length -= n)
{
InputIt next = first;
std::advance(next, n);
*(d_first++) = binary_op(first, next);
first = next;
}
return d_first;
}
} // namespace dna
#endif // ANTKEEPER_DNA_TRANSLATE_HPP

+ 48
- 0
src/game/genetics/translation-table.hpp View File

@ -0,0 +1,48 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP
#define ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP
namespace genetics {
/**
* Genetic code translation table.
*
* @see https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
*/
struct translation_table
{
/// String of 64 IUPAC amino acid base symbols, in TCAG order.
const char* aas;
/// String of 64 IUPAC amino acid base symbols, in TCAG order, where symbols other than `-` and `*` indicate a start codon and its amino acid.
const char* starts;
};
/// Translation table for standard genetic code.
constexpr translation_table standard_code =
{
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
"---M------**--*----M---------------M----------------------------",
};
} // namespace genetics
#endif // ANTKEEPER_GENETICS_TRANSLATION_TABLE_HPP

Loading…
Cancel
Save