1use std::collections::{HashMap, HashSet};
21use std::sync::OnceLock;
22
23#[rustfmt::skip]
24pub(crate) fn default_distance_range(a: &str, b: &str) -> (f32, f32) {
25 match (a, b) {
26 ("B", "C" ) => (1.556 - 2.0*0.015, 1.556 + 2.0*0.015),
30 ("BR", "C" ) => (1.875 - 2.0*0.029, 1.966 + 2.0*0.029),
31 ("BR", "O" ) => (1.581 - 2.0*0.007, 1.581 + 2.0*0.007),
32 ("C", "C" ) => (1.174 - 2.0*0.011, 1.588 + 2.0*0.025),
33 ("C", "CL") => (1.713 - 2.0*0.011, 1.849 + 2.0*0.011),
34 ("C", "F" ) => (1.320 - 2.0*0.009, 1.428 + 2.0*0.009),
35 ("C", "H" ) => (1.059 - 2.0*0.030, 1.099 + 2.0*0.007),
36 ("C", "I" ) => (2.095 - 2.0*0.015, 2.162 + 2.0*0.015),
37 ("C", "N" ) => (1.325 - 2.0*0.009, 1.552 + 2.0*0.023),
38 ("C", "O" ) => (1.187 - 2.0*0.011, 1.477 + 2.0*0.008),
39 ("C", "P" ) => (1.791 - 2.0*0.006, 1.855 + 2.0*0.019),
40 ("C", "S" ) => (1.630 - 2.0*0.014, 1.863 + 2.0*0.015),
41 ("C", "SE") => (1.893 - 2.0*0.013, 1.970 + 2.0*0.032),
42 ("C", "SI") => (1.837 - 2.0*0.012, 1.888 + 2.0*0.023),
43 ("CL", "O" ) => (1.414 - 2.0*0.026, 1.414 + 2.0*0.026),
44 ("CL", "P" ) => (1.997 - 2.0*0.035, 2.008 + 2.0*0.035),
45 ("CL", "S" ) => (2.072 - 2.0*0.023, 2.072 + 2.0*0.023),
46 ("CL", "SI") => (2.072 - 2.0*0.009, 2.072 + 2.0*0.009),
47 ("F", "N" ) => (1.406 - 2.0*0.016, 1.406 + 2.0*0.016),
48 ("F", "P" ) => (1.495 - 2.0*0.016, 1.579 + 2.0*0.025),
49 ("F", "S" ) => (1.640 - 2.0*0.011, 1.640 + 2.0*0.011),
50 ("F", "SI") => (1.588 - 2.0*0.014, 1.694 + 2.0*0.013),
51 ("H", "N" ) => (1.009 - 2.0*0.022, 1.033 + 2.0*0.022),
52 ("H", "O" ) => (0.967 - 2.0*0.010, 1.015 + 2.0*0.017),
53 ("I", "O" ) => (2.144 - 2.0*0.028, 2.144 + 2.0*0.028),
54 ("N", "N" ) => (1.124 - 2.0*0.015, 1.454 + 2.0*0.021),
55 ("N", "O" ) => (1.210 - 2.0*0.011, 1.463 + 2.0*0.012),
56 ("N", "P" ) => (1.571 - 2.0*0.013, 1.697 + 2.0*0.015),
57 ("N", "S" ) => (1.541 - 2.0*0.022, 1.710 + 2.0*0.019),
58 ("N", "SI") => (1.711 - 2.0*0.019, 1.748 + 2.0*0.022),
59 ("O", "P" ) => (1.449 - 2.0*0.007, 1.689 + 2.0*0.024),
60 ("O", "S" ) => (1.423 - 2.0*0.008, 1.580 + 2.0*0.015),
61 ("O", "SI") => (1.622 - 2.0*0.014, 1.680 + 2.0*0.008),
62 ("P", "P" ) => (2.214 - 2.0*0.022, 2.214 + 2.0*0.022),
63 ("P", "S" ) => (1.913 - 2.0*0.014, 1.954 + 2.0*0.005),
64 ("P", "SE") => (2.093 - 2.0*0.019, 2.093 + 2.0*0.019),
65 ("P", "SI") => (2.264 - 2.0*0.019, 2.264 + 2.0*0.019),
66 ("S", "S" ) => (1.897 - 2.0*0.012, 2.070 + 2.0*0.022),
67 ("S", "SE") => (2.193 - 2.0*0.015, 2.193 + 2.0*0.015),
68 ("S", "SI") => (2.145 - 2.0*0.020, 2.145 + 2.0*0.020),
69 ("SE", "SE") => (2.340 - 2.0*0.024, 2.340 + 2.0*0.024),
70 ("SI", "SE") => (2.359 - 2.0*0.012, 2.359 + 2.0*0.012),
71 _ => panic!("Unknown atom pair: {} and {}", a, b),
72 }
73}
74
75static AA_BONDS: OnceLock<HashMap<&'static str, Vec<(&'static str, &'static str, i32)>>> =
76 OnceLock::new();
77
78#[rustfmt::skip]
79pub(crate) fn get_bonds_canonical20() -> &'static HashMap<&'static str, Vec<(&'static str, &'static str, i32)>> {
85 AA_BONDS.get_or_init(|| {
86 let mut m = HashMap::new();
87 m.insert("ALA", vec![
88 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
89 ("CB","HB1",1), ("CB","HB2",1), ("CB","HB3",1), ("CA","N",1), ("H","N",1),
90 ("H2","N",1), ("HXT","OXT",1)
91 ]);
92 m.insert("ARG", vec![
93 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
94 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD","HD2",1), ("CD","HD3",1),
95 ("CD","NE",1), ("CD","CG",1), ("CG","HG2",1), ("CG","HG3",1), ("CZ","NH1",1),
96 ("CZ","NH2",2), ("CA","N",1), ("H","N",1), ("H2","N",1), ("CZ","NE",1),
97 ("HE","NE",1), ("HH11","NH1",1), ("HH12","NH1",1), ("HH21","NH2",1),
98 ("HH22","NH2",1), ("HXT","OXT",1)
99 ]);
100 m.insert("ASN", vec![
101 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
102 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CG","ND2",1), ("CG","OD1",2),
103 ("CA","N",1), ("H","N",1), ("H2","N",1), ("HD21","ND2",1), ("HD22","ND2",1),
104 ("HXT","OXT",1)
105 ]);
106 m.insert("ASP", vec![
107 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
108 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CG","OD1",2), ("CG","OD2",1),
109 ("CA","N",1), ("H","N",1), ("H2","N",1), ("HD2","OD2",1), ("HXT","OXT",1)
110 ]);
111 m.insert("CYS", vec![
112 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
113 ("CB","HB2",1), ("CB","HB3",1), ("CB","SG",1), ("CA","N",1), ("H","N",1),
114 ("H2","N",1), ("HXT","OXT",1), ("HG","SG",1)
115 ]);
116 m.insert("GLN", vec![
117 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
118 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD","NE2",1), ("CD","OE1",2),
119 ("CD","CG",1), ("CG","HG2",1), ("CG","HG3",1), ("CA","N",1), ("H","N",1),
120 ("H2","N",1), ("HE21","NE2",1), ("HE22","NE2",1), ("HXT","OXT",1)
121 ]);
122 m.insert("GLU", vec![
123 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
124 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD","OE1",2), ("CD","OE2",1),
125 ("CD","CG",1), ("CG","HG2",1), ("CG","HG3",1), ("CA","N",1), ("H","N",1),
126 ("H2","N",1), ("HE2","OE2",1), ("HXT","OXT",1)
127 ]);
128 m.insert("GLY", vec![
129 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","HA2",1), ("CA","HA3",1),
130 ("CA","N",1), ("H","N",1), ("H2","N",1), ("HXT","OXT",1)
131 ]);
132 m.insert("HIS", vec![
133 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
134 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD2","HD2",1), ("CD2","NE2",5),
135 ("CE1","HE1",1), ("CE1","NE2",5), ("CD2","CG",6), ("CG","ND1",5), ("CA","N",1),
136 ("H","N",1), ("H2","N",1), ("CE1","ND1",6), ("HD1","ND1",1), ("HE2","NE2",1),
137 ("HXT","OXT",1)
138 ]);
139 m.insert("ILE", vec![
140 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
141 ("CB","CG1",1), ("CB","CG2",1), ("CB","HB",1), ("CD1","HD11",1), ("CD1","HD12",1),
142 ("CD1","HD13",1), ("CD1","CG1",1), ("CG1","HG12",1), ("CG1","HG13",1),
143 ("CG2","HG21",1), ("CG2","HG22",1), ("CG2","HG23",1), ("CA","N",1), ("H","N",1),
144 ("H2","N",1), ("HXT","OXT",1)
145 ]);
146 m.insert("LEU", vec![
147 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
148 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD1","HD11",1), ("CD1","HD12",1),
149 ("CD1","HD13",1), ("CD2","HD21",1), ("CD2","HD22",1), ("CD2","HD23",1),
150 ("CD1","CG",1), ("CD2","CG",1), ("CG","HG",1), ("CA","N",1), ("H","N",1),
151 ("H2","N",1), ("HXT","OXT",1)
152 ]);
153 m.insert("LYS", vec![
154 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
155 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD","CE",1), ("CD","HD2",1),
156 ("CD","HD3",1), ("CE","HE2",1), ("CE","HE3",1), ("CE","NZ",1), ("CD","CG",1),
157 ("CG","HG2",1), ("CG","HG3",1), ("CA","N",1), ("H","N",1), ("H2","N",1),
158 ("HZ1","NZ",1), ("HZ2","NZ",1), ("HZ3","NZ",1), ("HXT","OXT",1)
159 ]);
160 m.insert("MET", vec![
161 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
162 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CE","HE1",1), ("CE","HE2",1),
163 ("CE","HE3",1), ("CG","HG2",1), ("CG","HG3",1), ("CG","SD",1), ("CA","N",1),
164 ("H","N",1), ("H2","N",1), ("HXT","OXT",1), ("CE","SD",1)
165 ]);
166 m.insert("PHE", vec![
167 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
168 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD1","CE1",5), ("CD1","HD1",1),
169 ("CD2","CE2",6), ("CD2","HD2",1), ("CE1","CZ",6), ("CE1","HE1",1), ("CE2","CZ",5),
170 ("CE2","HE2",1), ("CD1","CG",6), ("CD2","CG",5), ("CZ","HZ",1), ("CA","N",1),
171 ("H","N",1), ("H2","N",1), ("HXT","OXT",1)
172 ]);
173 m.insert("PRO", vec![
174 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
175 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD","HD2",1), ("CD","HD3",1),
176 ("CD","CG",1), ("CG","HG2",1), ("CG","HG3",1), ("CA","N",1), ("CD","N",1),
177 ("H","N",1), ("HXT","OXT",1)
178 ]);
179 m.insert("SER", vec![
180 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
181 ("CB","HB2",1), ("CB","HB3",1), ("CB","OG",1), ("CA","N",1), ("H","N",1),
182 ("H2","N",1), ("HG","OG",1), ("HXT","OXT",1)
183 ]);
184 m.insert("THR", vec![
185 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
186 ("CB","CG2",1), ("CB","HB",1), ("CB","OG1",1), ("CG2","HG21",1), ("CG2","HG22",1),
187 ("CG2","HG23",1), ("CA","N",1), ("H","N",1), ("H2","N",1), ("HG1","OG1",1),
188 ("HXT","OXT",1)
189 ]);
190 m.insert("TRP", vec![
191 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
192 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD1","HD1",1), ("CD1","NE1",5),
193 ("CD2","CE2",6), ("CD2","CE3",5), ("CE2","CZ2",5), ("CE3","CZ3",6), ("CE3","HE3",1),
194 ("CD1","CG",6), ("CD2","CG",5), ("CH2","HH2",1), ("CH2","CZ2",6), ("CZ2","HZ2",1),
195 ("CH2","CZ3",5), ("CZ3","HZ3",1), ("CA","N",1), ("H","N",1), ("H2","N",1),
196 ("CE2","NE1",5), ("HE1","NE1",1), ("HXT","OXT",1)
197 ]);
198 m.insert("TYR", vec![
199 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
200 ("CB","CG",1), ("CB","HB2",1), ("CB","HB3",1), ("CD1","CE1",5), ("CD1","HD1",1),
201 ("CD2","CE2",6), ("CD2","HD2",1), ("CE1","CZ",6), ("CE1","HE1",1), ("CE2","CZ",5),
202 ("CE2","HE2",1), ("CD1","CG",6), ("CD2","CG",5), ("CZ","OH",1), ("CA","N",1),
203 ("H","N",1), ("H2","N",1), ("HH","OH",1), ("HXT","OXT",1)
204 ]);
205 m.insert("VAL", vec![
206 ("C","O",2), ("C","OXT",1), ("C","CA",1), ("CA","CB",1), ("CA","HA",1),
207 ("CB","CG1",1), ("CB","CG2",1), ("CB","HB",1), ("CG1","HG11",1), ("CG1","HG12",1),
208 ("CG1","HG13",1), ("CG2","HG21",1), ("CG2","HG22",1), ("CG2","HG23",1),
209 ("CA","N",1), ("H","N",1), ("H2","N",1), ("HXT","OXT",1)
210 ]);
211 m
212 })
213}
214
215static AMINO_ACIDS: OnceLock<HashSet<&'static str>> = OnceLock::new();
216static CARBOHYDRATES: OnceLock<HashSet<&'static str>> = OnceLock::new();
217static NUCLEOTIDES: OnceLock<HashSet<&'static str>> = OnceLock::new();
218
219fn get_amino_acids() -> &'static HashSet<&'static str> {
220 AMINO_ACIDS.get_or_init(|| include_str!("ccddata/amino_acids.txt").lines().collect())
221}
222
223fn get_carbohydrates() -> &'static HashSet<&'static str> {
224 CARBOHYDRATES.get_or_init(|| include_str!("ccddata/carbohydrates.txt").lines().collect())
225}
226
227fn get_nucleotides() -> &'static HashSet<&'static str> {
228 NUCLEOTIDES.get_or_init(|| include_str!("ccddata/nucleotides.txt").lines().collect())
229}
230
231pub(crate) fn is_amino_acid(symbol: &str) -> bool {
232 get_amino_acids().contains(symbol)
233}
234
235pub(crate) fn is_carbohydrate(symbol: &str) -> bool {
236 get_carbohydrates().contains(symbol)
237}
238
239pub(crate) fn is_nucleotide(symbol: &str) -> bool {
240 get_nucleotides().contains(symbol)
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 #[test]
248 fn test_residue_checking() {
249 assert!(is_amino_acid("ALA"));
250 assert!(is_amino_acid("ARG"));
251 assert!(!is_amino_acid("ZZZ"));
252
253 assert!(is_carbohydrate("045"));
254 assert!(is_carbohydrate("05L"));
255 assert!(!is_carbohydrate("ZZZ"));
256
257 assert!(is_nucleotide("02I"));
258 assert!(is_nucleotide("05A"));
259 assert!(!is_nucleotide("ZZZ"));
260 }
261}