💿🐜 Antkeeper source code https://antkeeper.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
2.3 KiB

  1. /*
  2. * Copyright (C) 2021 Christopher J. Howard
  3. *
  4. * This file is part of Antkeeper source code.
  5. *
  6. * Antkeeper source code is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * Antkeeper source code is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with Antkeeper source code. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include "base.hpp"
  20. namespace genetics {
  21. namespace base {
  22. /**
  23. * Decodes an IUPAC degenerate base symbol into a bit mask representing the possible bases represented by the symbol.
  24. *
  25. * @param symbol IUPAC degenerate base symbol.
  26. * @return Bit mask representing the possible bases represented by the symbol.
  27. */
  28. static inline unsigned char decode(char symbol)
  29. {
  30. static constexpr unsigned char bases[25] =
  31. {
  32. 0b0001, // A
  33. 0b1110, // B
  34. 0b0010, // C
  35. 0b1101, // D
  36. 0, // E
  37. 0, // F
  38. 0b0100, // G
  39. 0b1011, // H
  40. 0, // I
  41. 0, // J
  42. 0b1100, // K
  43. 0, // L
  44. 0b0011, // M
  45. 0b1111, // N
  46. 0, // O
  47. 0, // P
  48. 0, // Q
  49. 0b0101, // R
  50. 0b0110, // S
  51. 0b1000, // T
  52. 0b1000, // U
  53. 0b0111, // V
  54. 0b1001, // W
  55. 0, // X
  56. 0b1010, // Y
  57. };
  58. return (symbol < 'A' || symbol >= 'Z') ? 0 : bases[symbol - 'A'];
  59. }
  60. int compare(char a, char b)
  61. {
  62. static constexpr int popcount[16] =
  63. {
  64. 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
  65. };
  66. return popcount[decode(a) & decode(b)];
  67. }
  68. char transcribe(char symbol)
  69. {
  70. return (symbol == 'T') ? 'U' : (symbol == 'U') ? 'T' : symbol;
  71. }
  72. namespace dna
  73. {
  74. char complement(char symbol)
  75. {
  76. static constexpr char* complements = "TVGHZZCDZZMZKNZZZYSAABWZR";
  77. return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
  78. }
  79. }
  80. namespace rna
  81. {
  82. char complement(char symbol)
  83. {
  84. static constexpr char* complements = "UVGHZZCDZZMZKNZZZYSAABWZR";
  85. return (symbol < 'A' || symbol >= 'Z') ? 'Z' : complements[symbol - 'A'];
  86. }
  87. }
  88. } // namespace base
  89. } // namespace genetics