💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

98 lines
3.5 KiB

  1. /*
  2. * Copyright (C) 2021 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #ifndef ANTKEEPER_GENETICS_PROTEIN_HPP
  20. #define ANTKEEPER_GENETICS_PROTEIN_HPP
  21. #include "amino-acid.hpp"
  22. #include <type_traits>
  23. namespace genetics {
  24. namespace protein {
  25. /**
  26. * Returns the percent identity between two proteins.
  27. *
  28. * @param first1,last1 Range of IUPAC amino acids which constitute the first protein.
  29. * @param first2 Beginning of the range of IUPAC amino acids which constitute the second protein.
  30. * @return Percent identity between the two proteins.
  31. */
  32. template <class T, class ForwardIt1, class ForwardIt2>
  33. T identity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2);
  34. /**
  35. * Scores two proteins using a substitution matrix.
  36. *
  37. * @param first1,last1 Range of IUPAC amino acid codes which constitute the first protein.
  38. * @param first2 Beginning of the range of IUPAC amino acid codes which constitute the second protein.
  39. * @param matrix Substitution matrix.
  40. * @return Score of the two proteins.
  41. */
  42. template <class ForwardIt1, class ForwardIt2, class Matrix>
  43. typename std::remove_all_extents<Matrix>::type score(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix);
  44. /**
  45. * Returns the percent similarity between two proteins.
  46. *
  47. * @param first1,last1 Range of IUPAC amino acids which constitute the first protein.
  48. * @param first2 Beginning of the range of IUPAC amino acids which constitute the second protein.
  49. * @return Percent similarity between the two proteins.
  50. */
  51. template <class T, class ForwardIt1, class ForwardIt2, class Matrix>
  52. typename T similarity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix);
  53. template <class T, class ForwardIt1, class ForwardIt2>
  54. T identity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2)
  55. {
  56. auto length = std::distance(first1, last1);
  57. T sum = 0;
  58. for (; first1 != last1; ++first1, ++first2)
  59. if (*first1 == *first2)
  60. ++sum;
  61. return sum / static_cast<T>(length);
  62. }
  63. template <class ForwardIt1, class ForwardIt2, class Matrix>
  64. typename std::remove_all_extents<Matrix>::type score(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix)
  65. {
  66. typename std::remove_all_extents<Matrix>::type result = 0;
  67. for (; first1 != last1; ++first1, ++first2)
  68. result += amino_acid::score(*first1, *first2, matrix);
  69. return result;
  70. }
  71. template <class T, class ForwardIt1, class ForwardIt2, class Matrix>
  72. typename T similarity(ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, const Matrix& matrix)
  73. {
  74. T length = static_cast<T>(std::distance(first1, last1));
  75. T positive_count = T(0);
  76. for (; first1 != last1; ++first1, ++first2)
  77. if (amino_acid::score(*first1, *first2, matrix) > 0)
  78. ++positive_count;
  79. return positive_count / length;
  80. }
  81. } // namespace protein
  82. } // namespace genetics
  83. #endif // ANTKEEPER_GENETICS_PROTEIN_HPP