Spaces:
Sleeping
Sleeping
File size: 2,087 Bytes
87e7d05 9b98ab0 87e7d05 9b98ab0 87e7d05 9b98ab0 87e7d05 9b98ab0 87e7d05 9b98ab0 87e7d05 9b98ab0 87e7d05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
"""
This files includes a predict function for the Tox21.
As an input it takes a list of SMILES and it outputs a nested dictionary with
SMILES and target names as keys.
"""
# ---------------------------------------------------------------------------------------
# Dependencies
from collections import defaultdict
import numpy as np
from src.model import Tox21XGBClassifier
from src.preprocess import create_descriptors
from src.utils import TASKS
# ---------------------------------------------------------------------------------------
def predict(smiles_list: list[str]) -> dict[str, dict[str, float]]:
"""Applies the classifier to a list of SMILES strings. Returns prediction=0.5 for
any molecule that could not be cleaned.
Args:
smiles_list (list[str]): list of SMILES strings
Returns:
dict: nested prediction dictionary, following {'<smiles>': {'<target>': <pred>}}
"""
print(f"Received {len(smiles_list)} SMILES strings")
# preprocessing pipeline
features, is_clean = create_descriptors(smiles_list)
print(f"Created {features.shape[1]} descriptors for the molecules.")
# print(
# f"{len(mol_mask) - sum(mol_mask)} molecules removed during cleaning. All predictions for these will be set to 0.0."
# )
# setup model
model = Tox21XGBClassifier(seed=42)
model_dir = "assets/"
model.load_model(model_dir)
print(f"Loaded model and feature processors from {model_dir}")
# make predictions
predictions = defaultdict(dict)
preds = []
for target in TASKS:
X = features.copy()
preds = np.empty_like(is_clean, dtype=np.float64)
preds[~is_clean] = 0.5
feature_processors = model.feature_processors[target]
task_features = feature_processors["selector"].transform(X)
task_features = feature_processors["scaler"].transform(task_features)
preds[is_clean] = model.predict(target, task_features)
for smiles, pred in zip(smiles_list, preds):
predictions[smiles][target] = float(pred)
return predictions
|