/*
|
|
* Copyright (C) 2020 Christopher J. Howard
|
|
*
|
|
* This file is part of Antkeeper source code.
|
|
*
|
|
* Antkeeper source code is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Antkeeper source code is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef ANTKEEPER_DNA_FRAME_HPP
|
|
#define ANTKEEPER_DNA_FRAME_HPP
|
|
|
|
#include <iterator>
|
|
|
|
namespace dna {
|
|
|
|
/**
|
|
* Finds the first start codon in a sequence.
|
|
*
|
|
* @param first,last Range of elements to search.
|
|
* @param n Number of elements per codon.
|
|
* @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
|
|
* @return Iterator to the first element in the start codon, or @p last if no start codon was found.
|
|
*/
|
|
template <class InputIt, class Size, class BinaryPredicate>
|
|
InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p)
|
|
{
|
|
auto length = std::distance(first, last);
|
|
|
|
if (length >= n)
|
|
{
|
|
InputIt next = first;
|
|
std::advance(next, n);
|
|
|
|
do
|
|
{
|
|
if (p(first, next))
|
|
return first;
|
|
|
|
++first;
|
|
++next;
|
|
--length;
|
|
}
|
|
while (length >= n);
|
|
}
|
|
|
|
return last;
|
|
}
|
|
|
|
/**
|
|
* Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p.
|
|
*
|
|
* @param first,last Range of elements to search.
|
|
* @param n Number of elements in the sequence.
|
|
* @param stride Number of elements between searches.
|
|
* @param p
|
|
*/
|
|
template <class InputIt, class Size, class BinaryPredicate>
|
|
InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p)
|
|
{
|
|
if (auto length = std::distance(first, last); length >= n)
|
|
{
|
|
Size offset = n + stride;
|
|
InputIt next = first;
|
|
std::advance(next, n);
|
|
|
|
do
|
|
{
|
|
if (p(first, next))
|
|
return first;
|
|
|
|
if (length < offset)
|
|
break;
|
|
|
|
std::advance(first, stride);
|
|
std::advance(next, stride);
|
|
length -= offset;
|
|
}
|
|
while (1);
|
|
}
|
|
|
|
return last;
|
|
}
|
|
|
|
/**
|
|
* Finds the first stop codon in a sequence.
|
|
*
|
|
* @param first,last Range of elements to search.
|
|
* @param n Number of elements per codon.
|
|
* @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
|
|
* @return Iterator to the first element in the stop codon, or @p last if no stop codon was found.
|
|
*/
|
|
template <class InputIt, class Size, class BinaryPredicate>
|
|
InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p)
|
|
{
|
|
for (auto length = std::distance(first, last); length >= n; length -= n)
|
|
{
|
|
InputIt next = first;
|
|
std::advance(next, n);
|
|
if (p(first, next))
|
|
return first;
|
|
first = next;
|
|
}
|
|
|
|
return last;
|
|
}
|
|
|
|
/**
|
|
* Finds the first open reading frame (ORF) in a range of elements.
|
|
*
|
|
* @param[in,out] first Iterator to the beginning of the sequence, which will point to th
|
|
*
|
|
* @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon.
|
|
* @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon.
|
|
*/
|
|
template <class InputIt, class Size, class BinaryPredicate1, class BinaryPredicate2>
|
|
void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p)
|
|
{
|
|
first = find_start(first, last, n, start_p);
|
|
if (first != last)
|
|
last = find_stop(first, last, n, stop_p);
|
|
}
|
|
|
|
} // namespace dna
|
|
|
|
#endif // ANTKEEPER_DNA_FRAME_HPP
|