From 985ca4dce7e0b72eea7e47f65a250c65162ff9d9 Mon Sep 17 00:00:00 2001 From: Augustin Zidek Date: Wed, 4 Mar 2020 09:45:08 +0000 Subject: [PATCH] Fix documentation for the deletion_probability feature. PiperOrigin-RevId: 298798686 --- alphafold_casp13/README.md | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/alphafold_casp13/README.md b/alphafold_casp13/README.md index 64cece0..3db25c1 100644 --- a/alphafold_casp13/README.md +++ b/alphafold_casp13/README.md @@ -248,14 +248,26 @@ def sequence_to_onehot(sequence): #### `deletion_probability` -The fraction of sequences that had a deletion (denoteby by a lowercase letter -in the A3M format) at this position. Example: +The fraction of sequences that had an insert state (denoted by a lowercase +letter in the A3M format) at this position. We used the following code to +compute it from the HHBlits MSA in the A3M format: -``` -MSA = A c r k - A C r k - A C R k -deletion_probability = [[0], [1/3], [2/3], [1]] +```python +deletion_matrix = [] +for msa_sequence in hhblits_a3m_sequences: + deletion_vec = [] + deletion_count = 0 + for j in msa_sequence: + if j.islower(): + deletion_count += 1 + else: + deletion_vec.append(deletion_count) + deletion_count = 0 + deletion_matrix.append(deletion_vec) + +deletion_matrix = np.array(deletion_matrix) +deletion_matrix[deletion_matrix != 0] = 1.0 +deletion_probability = deletion_matrix.sum(axis=0) / len(deletion_matrix) ``` #### `gap_matrix`