💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
3.6 KiB

  1. /*
  2. * Copyright (C) 2021 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_GENETICS_PROTEIN_HPP
  20. #define ANTKEEPER_GENETICS_PROTEIN_HPP
  21. #include "amino-acid.hpp"
  22. #include <type_traits>
  23. namespace genetics {
  24. /// Functions which operate on sequences of IUPAC amino acid symbols.
  25. namespace protein {
  26. /**
  27. * Returns the percent identity between two proteins.
  28. *
  29. * @param first1,last1 Range of IUPAC amino acids which constitute the first protein.
  30. * @param first2 Beginning of the range of IUPAC amino acids which constitute the second protein.
  31. * @return Percent identity between the two proteins.
  32. */
  33. template <class T, class ForwardIt1, class ForwardIt2>
  34. T identity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2);
  35. /**
  36. * Scores two proteins using a substitution matrix.
  37. *
  38. * @param first1,last1 Range of IUPAC amino acid codes which constitute the first protein.
  39. * @param first2 Beginning of the range of IUPAC amino acid codes which constitute the second protein.
  40. * @param matrix Substitution matrix.
  41. * @return Score of the two proteins.
  42. */
  43. template <class ForwardIt1, class ForwardIt2, class Matrix>
  44. typename std::remove_all_extents<Matrix>::type score(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix);
  45. /**
  46. * Returns the percent similarity between two proteins.
  47. *
  48. * @param first1,last1 Range of IUPAC amino acids which constitute the first protein.
  49. * @param first2 Beginning of the range of IUPAC amino acids which constitute the second protein.
  50. * @return Percent similarity between the two proteins.
  51. */
  52. template <class T, class ForwardIt1, class ForwardIt2, class Matrix>
  53. typename T similarity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix);
  54. template <class T, class ForwardIt1, class ForwardIt2>
  55. T identity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2)
  56. {
  57. auto length = std::distance(first1, last1);
  58. T sum = 0;
  59. for (; first1 != last1; ++first1, ++first2)
  60. if (*first1 == *first2)
  61. ++sum;
  62. return sum / static_cast<T>(length);
  63. }
  64. template <class ForwardIt1, class ForwardIt2, class Matrix>
  65. typename std::remove_all_extents<Matrix>::type score(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix)
  66. {
  67. typename std::remove_all_extents<Matrix>::type result = 0;
  68. for (; first1 != last1; ++first1, ++first2)
  69. result += amino_acid::score(*first1, *first2, matrix);
  70. return result;
  71. }
  72. template <class T, class ForwardIt1, class ForwardIt2, class Matrix>
  73. typename T similarity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix)
  74. {
  75. T length = static_cast<T>(std::distance(first1, last1));
  76. T positive_count = T(0);
  77. for (; first1 != last1; ++first1, ++first2)
  78. if (amino_acid::score(*first1, *first2, matrix) > 0)
  79. ++positive_count;
  80. return positive_count / length;
  81. }
  82. } // namespace protein
  83. } // namespace genetics
  84. #endif // ANTKEEPER_GENETICS_PROTEIN_HPP