Browse Source

Add nucleobase functions

master
C. J. Howard 3 years ago
parent
commit
8f943ff4ad
7 changed files with 304 additions and 228 deletions
  1. +1
    -2
      src/game/genetics/crossover.hpp
  2. +136
    -0
      src/game/genetics/frame.hpp
  3. +1
    -2
      src/game/genetics/mutate.hpp
  4. +91
    -0
      src/game/genetics/nucleobase.cpp
  5. +62
    -0
      src/game/genetics/nucleobase.hpp
  6. +0
    -58
      src/game/genetics/transcribe.hpp
  7. +13
    -166
      src/game/genetics/translate.hpp

+ 1
- 2
src/game/genetics/crossover.hpp View File

@ -24,8 +24,7 @@
#include <iterator>
#include <random>
namespace dna
{
namespace dna {
/**
* Exchanges elements between two ranges, starting at a random offset.

+ 136
- 0
src/game/genetics/frame.hpp View File

@ -0,0 +1,136 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_FRAME_HPP
#define ANTKEEPER_DNA_FRAME_HPP
#include <iterator>
namespace dna {
/**
* Finds the first start codon in a sequence.
*
* @param first,last Range of elements to search.
* @param n Number of elements per codon.
* @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
* @return Iterator to the first element in the start codon, or @p last if no start codon was found.
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p)
{
auto length = std::distance(first, last);
if (length >= n)
{
InputIt next = first;
std::advance(next, n);
do
{
if (p(first, next))
return first;
++first;
++next;
--length;
}
while (length >= n);
}
return last;
}
/**
* Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p.
*
* @param first,last Range of elements to search.
* @param n Number of elements in the sequence.
* @param stride Number of elements between searches.
* @param p
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p)
{
if (auto length = std::distance(first, last); length >= n)
{
Size offset = n + stride;
InputIt next = first;
std::advance(next, n);
do
{
if (p(first, next))
return first;
if (length < offset)
break;
std::advance(first, stride);
std::advance(next, stride);
length -= offset;
}
while (1);
}
return last;
}
/**
* Finds the first stop codon in a sequence.
*
* @param first,last Range of elements to search.
* @param n Number of elements per codon.
* @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
* @return Iterator to the first element in the stop codon, or @p last if no stop codon was found.
*/
template <class InputIt, class Size, class BinaryPredicate>
InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p)
{
for (auto length = std::distance(first, last); length >= n; length -= n)
{
InputIt next = first;
std::advance(next, n);
if (p(first, next))
return first;
first = next;
}
return last;
}
/**
* Finds the first open reading frame (ORF) in a range of elements.
*
* @param[in,out] first Iterator to the beginning of the sequence, which will point to th
*
* @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
* @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
*/
template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2>
void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p)
{
first = find_start(first, last, n, start_p);
if (first != last)
last = find_stop(first, last, n, stop_p);
}
} // namespace dna
#endif // ANTKEEPER_DNA_FRAME_HPP

+ 1
- 2
src/game/genetics/mutate.hpp View File

@ -24,8 +24,7 @@
#include <iterator>
#include <random>
namespace dna
{
namespace dna {
/**
* Applies the given function to a randomly selected element in a range.

+ 91
- 0
src/game/genetics/nucleobase.cpp View File

@ -0,0 +1,91 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#include "nucleobase.hpp"
#include <cstdint>
namespace dna {
namespace base {
/**
* Decodes an IUPAC degenerate base symbol into a bit mask representing the possible bases represented by the symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return Bit mask representing the possible bases represented by the symbol.
*/
static std::uint8_t decode(char symbol)
{
static constexpr std::uint8_t bases[26] =
{
0b0001, // A
0b1110, // B
0b0010, // C
0b1101, // D
0, // E
0, // F
0b0100, // G
0b1011, // H
0, // I
0, // J
0b1100, // K
0, // L
0b0011, // M
0b1111, // N
0, // O
0, // P
0, // Q
0b0101, // R
0b0110, // S
0b1000, // T
0b1000, // U
0b0111, // V
0b1001, // W
0, // X
0b1010, // Y
0, // Z
};
return (symbol < 'A' || symbol > 'Z') ? 0 : bases[symbol - 'A'];
}
char complement_rna(char symbol)
{
static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZRZ";
return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
}
char complement_dna(char symbol)
{
static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZRZ";
return (symbol < 'A' || symbol > 'Z') ? 'Z' : complements[symbol - 'A'];
}
char transcribe(char symbol)
{
return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
}
int compare(char a, char b)
{
std::uint8_t bases = decode(a) & decode(b);
return (bases & 1) + (bases >> 1 & 1) + (bases >> 2 & 1) + (bases >> 3 & 1);
}
} // namespace base
} // namespace dna

+ 62
- 0
src/game/genetics/nucleobase.hpp View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_NUCLEOBASE_HPP
#define ANTKEEPER_DNA_NUCLEOBASE_HPP
namespace dna {
namespace base {
/**
* Returns the DNA complement of an IUPAC degenerate base symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of DNA complement.
*/
char complement_dna(char symbol);
/**
* Returns the RNA complement of an IUPAC degenerate base symbol.
*
* @param symbol IUPAC degenerate base symbol.
* @return IUPAC degenerate base symbol of RNA complement.
*/
char complement_rna(char symbol);
/**
* Transcribes an IUPAC degenerate base symbol between DNA and RNA, swapping `T` for `U` or `U` for `T`.
*
* @param symbol IUPAC degenerate base symbol.
* @return `U` if @p symbol was `T`, `T` if @p symbol was `U`, or `symbol` if @p symbol was neither `T` nor `U`.
*/
char transcribe(char symbol);
/**
* Returns the number of bases that are represented by both IUPAC degenerate base symbols.
*
* @param a First IUPAC degenerate base symbol.
* @param b Second IUPAC degenerate base symbol.
* @return Number of bases represented by both symbols.
*/
int compare(char a, char b);
} // namespace base
} // namespace dna
#endif // ANTKEEPER_DNA_NUCLEOBASE_HPP

+ 0
- 58
src/game/genetics/transcribe.hpp View File

@ -1,58 +0,0 @@
/*
* Copyright (C) 2020 Christopher J. Howard
*
* This file is part of Antkeeper source code.
*
* Antkeeper source code is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Antkeeper source code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ANTKEEPER_DNA_TRANSCRIBE_HPP
#define ANTKEEPER_DNA_TRANSCRIBE_HPP
#include <algorithm>
namespace dna
{
/**
* Transcribes DNA into RNA, replacing all occurences of `T` with `U`.
*
* @param first,last Range of DNA bases to transcribe.
*/
template <class ForwardIt>
void transcribe(ForwardIt first, ForwardIt last);
/**
* Transcribes RNA back into DNA, replacing all occurences of `U` with `T`.
*
* @param first,last Range of RNA bases to transcribe.
*/
template <class ForwardIt>
void untranscribe(ForwardIt first, ForwardIt last);
template <class ForwardIt>
void transcribe(ForwardIt first, ForwardIt last)
{
std::replace(first, last, 'T', 'U');
}
template <class ForwardIt>
void untranscribe(ForwardIt first, ForwardIt last)
{
std::replace(first, last, 'U', 'T');
}
} // namespace dna
#endif // ANTKEEPER_DNA_TRANSCRIBE_HPP

+ 13
- 166
src/game/genetics/translate.hpp View File

@ -20,181 +20,28 @@
#ifndef ANTKEEPER_DNA_TRANSLATE_HPP
#define ANTKEEPER_DNA_TRANSLATE_HPP
#include <algorithm>
#include <cstdint>
#include <iterator>
namespace dna
{
/// DNA translation table for standard genetic code.
constexpr char* standard_code =
"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" // Amino acid
"---M------**--*----M---------------M----------------------------" // Start/stop
"TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG" // Base 1
"TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG" // Base 2
"TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"; // Base 3
namespace dna {
/**
* Translates codons into amino acids until a stop codon is read or the end of the sequence is reached.
* Divides a range into consecutive subranges of @p n elements, then applies the given function to each subrange and stores the result in another range.
*
* @param first,last Range of codons to translate.
* @param t_first Beginning of the translation table.
* @param first,last Range of elements to translate.
* @param d_first Beginning of the destination range.
* @return Output iterator to the amino acid in the destination range, one past the last amino acid translated.
*/
template <class InputIt1, class InputIt2, class OutputIt>
OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first);
/**
* Finds the first start codon in a sequence of bases.
*
* @param first,last Range of bases to search.
* @param t_first Beginning of the translation table.
* @return Iterator to the first base of the first start codon in the sequence, or @p last if no start codon is found.
* @param n Number of elements by which to divide the range.
* @param binary_op Binary operation function object that will be applied to each subrange of @p n elements.
* @return Output iterator to the element past the last element translated.
*/
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first);
/**
* Finds the first stop codon in a sequence of codons.
*
* @param first,last Range of codons to search.
* @param t_first Beginning of the translation table.
* @return Iterator to the first base of the first stop codon in the sequence, or @p last if no stop codon is found.
*/
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first);
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 find_start(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first)
template <class InputIt, class OutputIt, class Size, class BinaryOperation>
OutputIt translate(InputIt first, InputIt last, OutputIt d_first, Size n, BinaryOperation binary_op)
{
ForwardIt1 second = first;
++second;
ForwardIt1 third = second;
++third;
ForwardIt2 start_first = t_first;
std::advance(start_first, 64);
ForwardIt2 base1_first = start_first;
std::advance(base1_first, 64);
ForwardIt2 base2_first = base1_first;
std::advance(base2_first, 64);
ForwardIt2 base3_first = base2_first;
std::advance(base3_first, 64);
if (first != last && second != last)
{
while (third != last)
{
ForwardIt2 start = start_first;
ForwardIt2 base1 = base1_first;
ForwardIt2 base2 = base2_first;
ForwardIt2 base3 = base3_first;
for (std::uint_fast8_t i = 64; i; --i)
{
if (*start != '-' && *start != '*' && *first == *base1 && *second == *base2 && *third == *base3)
return first;
++start;
++base1;
++base2;
++base3;
}
first = second;
second = third;
++third;
}
}
return last;
}
template <class ForwardIt1, class ForwardIt2>
ForwardIt1 find_stop(ForwardIt1 first, ForwardIt1 last, ForwardIt2 t_first)
{
ForwardIt1 second = first;
++second;
ForwardIt1 third = second;
++third;
ForwardIt2 base1_first = t_first;
std::advance(base1_first, 128);
ForwardIt2 base2_first = base1_first;
std::advance(base2_first, 64);
ForwardIt2 base3_first = base2_first;
std::advance(base3_first, 64);
while (first != last && second != last && third != last)
{
ForwardIt2 aa = t_first;
ForwardIt2 base1 = base1_first;
ForwardIt2 base2 = base2_first;
ForwardIt2 base3 = base3_first;
for (std::uint_fast8_t i = 64; i; --i)
{
if (*aa == '*' && *first == *base1 && *second == *base2 && *third == *base3)
return first;
++aa;
++base1;
++base2;
++base3;
}
first = ++third;
second = ++third;
++third;
}
return last;
}
template <class InputIt1, class InputIt2, class OutputIt>
OutputIt translate(InputIt1 first, InputIt1 last, InputIt2 t_first, OutputIt d_first)
{
InputIt1 second = first;
++second;
InputIt1 third = second;
++third;
InputIt2 base1_first = t_first;
std::advance(base1_first, 128);
InputIt2 base2_first = base1_first;
std::advance(base2_first, 64);
InputIt2 base3_first = base2_first;
std::advance(base3_first, 64);
while (first != last && second != last && third != last)
for (auto length = std::distance(first, last); length >= n; length -= n)
{
InputIt2 aa = t_first;
InputIt2 base1 = base1_first;
InputIt2 base2 = base2_first;
InputIt2 base3 = base3_first;
for (std::uint_fast8_t i = 64; i; --i)
{
if (*first == *base1 && *second == *base2 && *third == *base3)
{
if (*aa == '*')
return d_first;
*(d_first++) = *aa;
break;
}
++aa;
++base1;
++base2;
++base3;
}
first = ++third;
second = ++third;
++third;
InputIt next = first;
std::advance(next, n);
*(d_first++) = binary_op(first, next);
first = next;
}
return d_first;

Loading…
Cancel
Save