mirror of
https://github.com/google-deepmind/deepmind-research.git
synced 2026-02-06 03:32:18 +08:00
98 lines
3.5 KiB
Python
98 lines
3.5 KiB
Python
# Copyright 2019 DeepMind Technologies Limited
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Write contact map predictions to a tf.io.gfile.
|
|
|
|
Either write a binary contact map as an RR format text file, or a
|
|
histogram prediction as a pickle of a dict containing a numpy array.
|
|
"""
|
|
|
|
import os
|
|
|
|
import numpy as np
|
|
import six.moves.cPickle as pickle
|
|
import tensorflow as tf # pylint: disable=g-explicit-tensorflow-version-import
|
|
|
|
|
|
RR_FORMAT = """PFRMAT RR
|
|
TARGET {}
|
|
AUTHOR DM-ORIGAMI-TEAM
|
|
METHOD {}
|
|
MODEL 1
|
|
{}
|
|
"""
|
|
|
|
|
|
def save_rr_file(filename, probs, domain, sequence,
|
|
method='dm-contacts-resnet'):
|
|
"""Save a contact probability matrix as an RR file."""
|
|
assert len(sequence) == probs.shape[0]
|
|
assert len(sequence) == probs.shape[1]
|
|
with tf.io.gfile.GFile(filename, 'w') as f:
|
|
f.write(RR_FORMAT.format(domain, method, sequence))
|
|
for i in range(probs.shape[0]):
|
|
for j in range(i + 1, probs.shape[1]):
|
|
f.write('{:d} {:d} {:d} {:d} {:f}\n'.format(
|
|
i + 1, j + 1, 0, 8, probs[j, i]))
|
|
f.write('END\n')
|
|
|
|
|
|
def save_torsions(torsions_dir, filebase, sequence, torsions_probs):
|
|
"""Save Torsions to a file as pickle of a dict."""
|
|
filename = os.path.join(torsions_dir, filebase + '.torsions')
|
|
t_dict = dict(probs=torsions_probs, sequence=sequence)
|
|
with tf.io.gfile.GFile(filename, 'w') as fh:
|
|
pickle.dump(t_dict, fh, protocol=2)
|
|
|
|
|
|
def save_distance_histogram(
|
|
filename, probs, domain, sequence, min_range, max_range, num_bins):
|
|
"""Save a distance histogram prediction matrix as a pickle file."""
|
|
dh_dict = {
|
|
'min_range': min_range,
|
|
'max_range': max_range,
|
|
'num_bins': num_bins,
|
|
'domain': domain,
|
|
'sequence': sequence,
|
|
'probs': probs.astype(np.float32)}
|
|
save_distance_histogram_from_dict(filename, dh_dict)
|
|
|
|
|
|
def save_distance_histogram_from_dict(filename, dh_dict):
|
|
"""Save a distance histogram prediction matrix as a pickle file."""
|
|
fields = ['min_range', 'max_range', 'num_bins', 'domain', 'sequence', 'probs']
|
|
missing_fields = [f for f in fields if f not in dh_dict]
|
|
assert not missing_fields, 'Fields {} missing from dictionary'.format(
|
|
missing_fields)
|
|
assert len(dh_dict['sequence']) == dh_dict['probs'].shape[0]
|
|
assert len(dh_dict['sequence']) == dh_dict['probs'].shape[1]
|
|
assert dh_dict['num_bins'] == dh_dict['probs'].shape[2]
|
|
assert dh_dict['min_range'] >= 0.0
|
|
assert dh_dict['max_range'] > 0.0
|
|
with tf.io.gfile.GFile(filename, 'wb') as fw:
|
|
pickle.dump(dh_dict, fw, protocol=2)
|
|
|
|
|
|
def contact_map_from_distogram(distogram_dict):
|
|
"""Split the boundary bin."""
|
|
num_bins = distogram_dict['probs'].shape[-1]
|
|
bin_size_angstrom = distogram_dict['max_range'] / num_bins
|
|
threshold_cts = (8.0 - distogram_dict['min_range']) / bin_size_angstrom
|
|
threshold_bin = int(threshold_cts) # Round down
|
|
pred_contacts = np.sum(distogram_dict['probs'][:, :, :threshold_bin], axis=-1)
|
|
if threshold_bin < threshold_cts: # Add on the fraction of the boundary bin.
|
|
pred_contacts += distogram_dict['probs'][:, :, threshold_bin] * (
|
|
threshold_cts - threshold_bin)
|
|
return pred_contacts
|
|
|