Spaces:
Runtime error
Runtime error
| import os | |
| import argparse | |
| import json | |
| from tqdm import tqdm | |
| # conda install -c conda-forge -c bioconda foldseek | |
| def get_foldseek_structure_seq(pdb_dir, rm_tmp=True): | |
| # foldseek createdb INPUT_dir_with_structures tmp_db | |
| # foldseek lndb tmp_db_h tmp_db_ss_h | |
| # foldseek convert2fasta tmp_db_ss OUTPUT_3di.fasta | |
| # use command to generate foldseek structure seq | |
| os.makedirs("tmp_db", exist_ok=True) | |
| os.system(f"foldseek createdb {pdb_dir} tmp_db/tmp_db") | |
| os.system(f"foldseek lndb tmp_db/tmp_db_h tmp_db/tmp_db_ss_h") | |
| os.system(f"foldseek convert2fasta tmp_db/tmp_db_ss tmp_db/tmp_db_ss.fasta") | |
| results = [] | |
| # read fasta file | |
| with open("tmp_db/tmp_db_ss.fasta", "r") as f: | |
| for line in tqdm(f): | |
| if line.startswith(">"): | |
| name = line.split()[0][1:] | |
| seq = next(f).strip() | |
| results.append({"name":name.split('.')[0], "foldseek_seq":seq}) | |
| if rm_tmp: | |
| os.system("rm -rf tmp_db") | |
| return results | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--pdb_dir", type=str, default=None) | |
| parser.add_argument("--out_file", type=str, default=None) | |
| parser.add_argument("--rm_tmp", type=bool, default=True) | |
| args = parser.parse_args() | |
| results = get_foldseek_structure_seq(args.pdb_dir, args.rm_tmp) | |
| with open(args.out_file, "w") as f: | |
| f.write("\n".join([json.dumps(r) for r in results])) |