Hi,
I had recently encountered a similar problem. There is also another issue, when running 8 jobs concurrently in the same directory each thread needs some input files (in my understanding) which are deleted or modified by some other threads and that leads to deadlock. The solution to this problem in to run each thread in a different directory. I am pasting my scripts to save you from trouble. The script will find how many CPUs are available on your machine and spawn a job to each one. To launch the loop modeling type "python loop_parallel.py". Of course you will need to modify the scripts according to your needs, and I think MODELLER libraries must be in your PYTHONPATH (read the installation instructions how to do this). I will make them neater and as from the MODELLER caretaker to put the in the script library.
Script name "loop_parallel.py":
#!/usr/bin/env python
from modeller import *
from modeller.scripts import complete_pdb
from modeller.parallel import *
# Load in my task from mytask.py (note: needs to be in a separate Python
# module like this, in order for Python's pickle module to work correctly)
from build_model_class import Build_Model
import sys, re, os, shutil
import multiprocessing
INITIAL_CONFORMATION = "initial_loop1_conformation"
log.verbose()
env = environ()
env.io.hetatm = True
CURRENT_DIR = os.getcwd()
# directories for input atom files
env.io.atom_files_directory = ['.']
j = job()
for cpu in range(0, multiprocessing.cpu_count()):
j.append(local_slave())
if not os.path.exists(CURRENT_DIR + "/results_" + str(cpu+1)):
os.mkdir(CURRENT_DIR + "/results_" + str(cpu+1))
shutil.copy(INITIAL_CONFORMATION+".pdb", CURRENT_DIR + "/results_" + str(cpu+1)+"/")
start = 1
offset = 374 ## the number of loop models -1 each thread will create
for n in range(1,9):
end = start+offset
print start, end
j.queue_task(Build_Model("initial_loop1_conformation", "p110a_loop1_model", start, end, CURRENT_DIR + "/results_"+str(n)))
start = end+1
results = j.run_all_tasks()
#!/usr/bin/env python
from modeller import *
from modeller.automodel import *
from modeller.parallel import *
from modeller.scripts import complete_pdb
import os, sys, glob
import multiprocessing
class MyLoopModel(loopmodel):
# This routine picks the residues to be refined by loop modeling
def select_loop_atoms(self):
return selection(self.residue_range('148:', '165:'))
def special_restraints(self, aln):
rsr = self.restraints
# Keep whole model but loop atoms rigid_bodies
wholeSel = selection(self) - self.select_loop_atoms()
r = rigid_body(wholeSel)
rsr.rigid_bodies.append(r)
class Build_Model(task):
"""A task to build models with MODELLER"""
def run(self, initial_structure, output_model_name, start, end, OUT_DIR):
os.chdir(OUT_DIR)
log.verbose()
env = environ()
env.io.hetatm = False
# scale up sheet hydrogen bond weights
#env.schedule_scale = physical.values(default=1.0, h_bond=100.0)
# directories for input atom files
env.io.atom_files_directory = ["."]
a = MyLoopModel(env,
inimodel=initial_structure,
sequence=output_model_name) # code of the target
a.loop.starting_model = start # First loop model
a.loop.ending_model = end # Last loop model
a.loop.md_level = refine.very_slow # Loop model refinement level
a.make()
a.cluster(cluster_cut=1.5) ## cluster all of the output models, and output an averaged cluster structure, both optimized (in the file cluster.opt) and unoptimized (in cluster.ini)
### remove redundant files to save disk space
#filepatterns = [".rsr", ".V*", ".D*", ".ini", ".sch", ".sch"]
#for filepattern in filepatterns:
# for filename in glob.glob(initial_structure+filepattern):
# os.remove(filename)
return
# -*- coding: utf-8 -*-
# Example for: selection.assess_dope()
from modeller import *
from modeller.scripts import complete_pdb
import glob
output_model_name="p110a_loop1_model"
env = environ()
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
# Read a model previously generated by Modeller's automodel class
fout = open(output_model_name + "_dopehr_scores.txt", 'w')
for modelname in glob.glob("results_*/"+output_model_name+".BL*.pdb"):
print "Evaluating model ", modelname
mdl = complete_pdb(env, modelname)
# Select all atoms in the first chain
atmsel = selection(mdl.chains[0])
score = atmsel.assess_dopehr()
print "DOPE-HR score for model ", modelname, " is " + str(score)
fout.write("DOPE-HR score for model " + modelname + " is " + str(score) + "\n")
Hi,
Let's suppose we are sending a protein to refine four of its loops, eight repeats (32 loops).
If I spawn eight Slaves (job().append()), a single model is sent to each slave and each slave compute the four loops.
Now suppose we are sending the same protein, to refine four of its loops just one time (4 loop). Spawning the same eight Slaves, the model is sent to just one Slave, and that slave compute all of the four loops alone (i.e. modeller is not parallelizing).
My question is: is there any difference between a) sending the eight jobs from within the modeller script or b) sending one job after another to a third queue software initializing the enviroment with
env = environ(0 - random.randrange(2, 50000)) #A different random seed each time.
or is modeller taking into account previous results?
Thanks in advance,
--David Rodríguez Díaz, PhD Student
Fundación Pública Galega de Medicina Xenómica (SERGAS)
Santiago de Compostela (Spain)
http://webspersoais.usc.es/persoais/david.rodriguez.diaz
_______________________________________________
modeller_usage mailing list
modeller_usage@salilab.org
https://salilab.org/mailman/listinfo/modeller_usage
======================================================================
Thomas Evangelidis
PhD student
Biomedical Research Foundation, Academy of Athens
4 Soranou Ephessiou , 115 27
Athens, Greece
email: tevang@bioacademy.gr
website:
https://sites.google.com/site/thomasevangelidishomepage/