/* * Copyright (C) 2020 Christopher J. Howard * * This file is part of Antkeeper source code. * * Antkeeper source code is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Antkeeper source code is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Antkeeper source code. If not, see . */ #ifndef ANTKEEPER_DNA_FRAME_HPP #define ANTKEEPER_DNA_FRAME_HPP #include namespace dna { /** * Finds the first start codon in a sequence. * * @param first,last Range of elements to search. * @param n Number of elements per codon. * @param p Binary predicate which returns `true` if a subrange of length @p n is a start codon. * @return Iterator to the first element in the start codon, or @p last if no start codon was found. */ template InputIt find_start(InputIt first, InputIt last, Size n, BinaryPredicate p) { auto length = std::distance(first, last); if (length >= n) { InputIt next = first; std::advance(next, n); do { if (p(first, next)) return first; ++first; ++next; --length; } while (length >= n); } return last; } /** * Searches the range `[first, last)` for a sequence of @p n elements which satifies predicate @p p. * * @param first,last Range of elements to search. * @param n Number of elements in the sequence. * @param stride Number of elements between searches. * @param p */ template InputIt find_sequence(InputIt first, InputIt last, Size n, Size stride, BinaryPredicate p) { if (auto length = std::distance(first, last); length >= n) { Size offset = n + stride; InputIt next = first; std::advance(next, n); do { if (p(first, next)) return first; if (length < offset) break; std::advance(first, stride); std::advance(next, stride); length -= offset; } while (1); } return last; } /** * Finds the first stop codon in a sequence. * * @param first,last Range of elements to search. * @param n Number of elements per codon. * @param p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. * @return Iterator to the first element in the stop codon, or @p last if no stop codon was found. */ template InputIt find_stop(InputIt first, InputIt last, Size n, BinaryPredicate p) { for (auto length = std::distance(first, last); length >= n; length -= n) { InputIt next = first; std::advance(next, n); if (p(first, next)) return first; first = next; } return last; } /** * Finds the first open reading frame (ORF) in a range of elements. * * @param[in,out] first Iterator to the beginning of the sequence, which will point to th * * @param start_p Binary predicate which returns `true` if a subrange of length @p n is a start codon. * @param stop_p Binary predicate which returns `true` if a subrange of length @p n is a stop codon. */ template void find_orf(InputIt& first, InputIt& last, Size n, BinaryPredicate1 start_p, BinaryPredicate2 stop_p) { first = find_start(first, last, n, start_p); if (first != last) last = find_stop(first, last, n, stop_p); } } // namespace dna #endif // ANTKEEPER_DNA_FRAME_HPP