Exemplo n.º 1
0
    def __init__(self,
                 hmmdb=hmmDatabase,
                 verbose=1,
                 log=StdLog(),
                 debug=False):
        """
        @param hmmdb: Pfam hmm database
        @type  hmmdb: str
        @param verbose: verbosity level (default: 1)
        @type  verbose: 1|0
        @param log: Log file for messages [STDOUT]
        @type  log: Biskit.LogFile
        @param debug: don't cleanup temporary files [False]
        @type  debug: 1|0
        
        """
        self.hmmdb = hmmdb
        self.verbose = verbose
        self.log = log or StdLog()
        self.tempDir = settings.tempDirShared
        self.debug = debug

        self.fastaID = ''

        self.hmmFile = tempfile.mktemp('.hmm', dir=self.tempDir)
        self.fastaFile = tempfile.mktemp('.fasta', dir=self.tempDir)
        self.sub_fastaFile = tempfile.mktemp('_sub.fasta', dir=self.tempDir)
Exemplo n.º 2
0
    def __init__(self, outFolder='.', clusterLimit=50, verbose=0, log=None):
        """
        @param outFolder: project folder (results are put into subfolder) ['.']
        @type  outFolder: str
        @param clusterLimit: maximal number of returned sequence clusters
                             (default: 50)
        @type  clusterLimit: int
        @param verbose: keep temporary files (default: 0)
        @type  verbose: 1|0
        @param log: log file instance, if None, STDOUT is used (default: None)
        @type  log: LogFile
        """
        self.outFolder = T.absfile(outFolder)

        self.verbose = verbose
        self.log = log or StdLog()

        self.record_dic = None  #: dict ID - Bio.Fasta.Record
        self.clusters = None  #: list of lists of ids
        self.bestOfCluster = None  #: list of non-redundant ids

        #: the maximal number of clusters to return
        self.clusterLimit = clusterLimit

        self.clustersCurrent = None  #: current number of clusters

        self.prepareFolders()
Exemplo n.º 3
0
    def __init__( self, outFolder='.', clusterLimit=20, verbose=1,
                  log=None, silent=0 ):
        """
        @param outFolder: project folder (results are put into subfolder) ['.']
        @type  outFolder: str
        @param clusterLimit: maximal number of returned sequence clusters
                             (default: 20)
        @type  clusterLimit: int
        @param verbose: keep temporary files (default: 1)
        @type  verbose: 1|0
        @param log: log file instance, if None, STDOUT is used (default: None)
        @type  log: LogFile
        @param silent: don't print messages to STDOUT is used (default: 0)
        @type  silent: 1|0
        """
        SequenceSearcher.__init__( self, outFolder=outFolder, verbose=1 )

        self.ex_pdb   = re.compile( 'pdb\|([A-Z0-9]{4})\|([A-Z]*)' )
        self.ex_gb    = re.compile( 'gi\|([0-9]+)\|')

        self.ex_resolution = re.compile(\
            'REMARK   2 RESOLUTION\. *([0-9\.]+|NOT APPLICABLE)' )

        self.prepareFolders()

        self.verbose = verbose
        self.silent = silent

        self.log = log or StdLog()

        ## the maximal number of clusters to return
        self.clusterLimit = clusterLimit
Exemplo n.º 4
0
    def __init__(self, aln_info, target_id='target', log=None, verbose=0):
        """
        @param aln_info: CheckIdentities instance
        @type  aln_info: Biskit.Mod.CheckIdentities
        @param target_id: PIR id of the target sequence in the alignment
        @type  target_id: str
        """
        self.log = log or StdLog()
        self.verbose = verbose

        if not aln_info.result:
            aln_info.go()

        self.aln = aln_info.result

        self.target_id = target_id

        #: extract array of template identifiers
        self.templates = self.aln.keys()
        self.templates.remove(target_id)
        self.templates = N.array(self.templates)

        #: extract array of sequence identities to target
        self.identities = [
            self.aln[k]['info_ID'][target_id] for k in self.templates
        ]

        self.identities = N.array(self.identities)

        #: will hold mask for kicking out templates (1..keep template; 0..dispose)
        self.filter_mask = N.ones(len(self.templates))
Exemplo n.º 5
0
    def prepare(self):
        import tempfile
        import shutil
        from Biskit.LogFile import LogFile

        self.query = T.testRoot() + '/Mod/project/target.fasta'
        self.outfolder = tempfile.mkdtemp('_test_TemplateSearcher')
        shutil.copy(self.query, self.outfolder)

        ## log file
        self.f_out = self.outfolder + '/TemplateSearcher.log'

        self.l = LogFile(self.f_out, mode='w')
        if self.local: self.l = StdLog()

        self.f_target = self.outfolder + '/target.fasta'
Exemplo n.º 6
0
 def __init__(self, reslibrary=None, log=None, verbose=False):
     """
     @param reslibrary: Collection of AmberResidueType instances to be 
                        matched against the residues of input models.
     @type  reslibrary: AmberResidueLibrary
     
     @param log: log (default STDOUT)
     @type  log: Biskit.LogFile
     
     @param verbose: give status updates and warnings to log
     @type  verbose: bool
     """
     self.reslib = reslibrary or AmberResidueLibrary(log=log,
                                                     verbose=verbose)
     self.verbose = verbose
     self.log = log or StdLog()
Exemplo n.º 7
0
    def interfaceArea( self, profiles=0, log=StdLog(), verbose=1 ):
        """
        Calculate the difference between the surface area of the
        complex vs. its free components in square angstrom.

        @param profiles: option to return the lig, rec and com profiles
                          rather than the value (both absolute and relative
                          values are returned)
        @type  profiles: 1|0 (default: 0)
        @param log: log file [STDOUT]
        @type  log: Biskit.LogFile
        @param verbose: give progress report [1]
        @type  verbose: bool | int

        @return: AS area, MS area OR
                 a dictionary of lig, rec and com profiles
        @rtype: (float, float) or dict      
        """
        rcom = self.rec()
        lcom = self.lig()
        ccom = self.model()
        result = {}

        def getSurface( model, key ):
            if verbose:
                log.write("Calculating SurfaceRacer data for %s..."%key)
                d = PDBDope( model )
                d.addSurfaceRacer( probe=1.4 )
            if verbose:
                log.writeln('Done.')
                result['%s_AS'%key] = model.profile('AS')
                result['%s_MS'%key] = model.profile('MS')
                result['%s_relAS'%key] = model.profile('relAS')
                result['%s_relMS'%key] = model.profile('relMS')

        getSurface( rcom, 'rec' )
        getSurface( lcom, 'lig' )
        getSurface( ccom, 'com' )

        if not profiles:
            return sum(result['rec_AS']) + sum(result['lig_AS']) - \
                   sum(result['com_AS']),\
                   sum(result['rec_MS']) + sum(result['lig_MS']) - \
                   sum(result['com_MS'])              
        else:
            return result
Exemplo n.º 8
0
    def __init__(self, topofiles=F_RESTYPES, log=None, verbose=False):
        """
        @param topofiles: list of topology file names 
                          (default all_*in in Biskit/data/amber/residues)
        @type  topofiles: [ str ]
        @param log: optional LogFile instance (default STDOUT)
        @type  log: Biskit.LogFile
        @param verbose: add messages to log (default False)
        @type  verbose: bool
        """
        self.aindex = {}  ## residue types indexed by atom key
        self.topoindex = {}  ## residue types indexed by topo and resname

        self.log = log or StdLog()
        self.verbose = verbose

        for f in topofiles:
            self.addTopology(f)
Exemplo n.º 9
0
    def __init__(self,
                 com,
                 protonate=True,
                 addcharge=True,
                 indi=4.0,
                 exdi=80.0,
                 salt=0.15,
                 ionrad=2,
                 prbrad=1.4,
                 bndcon=4,
                 scale=2.3,
                 perfil=60,
                 template=None,
                 topologies=None,
                 f_charges=None,
                 verbose=True,
                 debug=False,
                 log=None,
                 tempdir=None,
                 cwd=None,
                 **kw):
        """
        @param com: complex to analyze
        @type  com: Biskit.Complex
        @param protonate: (re-)build hydrogen atoms with reduce program (True)
                          see L{Biskit.Reduce}
        @type  protonate: bool
        @param addcharge: Assign partial charges from Amber topologies (True)
        @type  addcharge: bool
        @param indi: interior dilectric (4.0)
        @param exdi: exterior dielectric (80.0)
        @param salt: salt conc. in M (0.15)
        @param ionrad: ion radius (2)
        @param prbrad: probe radius (1.4) 
        @param bndcon: boundary condition (4, delphi default is 2)
        @param scale:  grid spacing (2.3)
        @param perfil: grid fill factor in % (for automatic grid, 60) 
        
        @param template: delphi command file template [None=use default]
        @type  template: str
        @param f_radii: alternative delphi atom radii file [None=use default]
        @type  f_radii: str
        @param topologies: alternative list of residue charge/topology files
                           [default: amber/residues/all*]
        @type  topologies: [ str ]
        @param f_charges: alternative delphi charge file 
                          [default: create custom]
        @type  f_charges: str

        @param kw: additional key=value parameters for Delphi or Executor:
        @type  kw: key=value pairs
        ::
          debug    - 0|1, keep all temporary files (default: 0)
          verbose  - 0|1, print progress messages to log (log != STDOUT)
          node     - str, host for calculation (None->local) NOT TESTED
                          (default: None)
          nice     - int, nice level (default: 0)
          log      - Biskit.LogFile, program log (None->STOUT) (default: None)
        """
        self.com = com
        self.delphicom = None

        self.protonate = protonate
        self.addcharge = addcharge
        ## DELPHI run parameters
        self.indi = indi  # interior dilectric(4.0)
        self.exdi = exdi  # exterior dielectric(80.0)
        self.salt = salt  # salt conc. in M (0.15)
        self.ionrad = ionrad  # ion radius (2)
        self.prbrad = prbrad  # probe radius (1.4)
        self.bndcon = bndcon  # boundary condition (4, delphi default is 2)

        ## DELPHI parameters for custom grid
        self.scale = scale  # grid spacing (1.2 / A)
        self.perfil = perfil  # grid fill factor in % (for automatic grid, 60)

        ## DELPHI parameter file and Amber residue definitions or charge file
        self.template = template
        self.topologies = topologies
        self.f_charges = f_charges

        ## pump everything else into name space, too
        self.__dict__.update(kw)

        ## prepare globally valid grid
        self.grid = None

        self.verbose = verbose
        self.log = log or StdLog()
        self.debug = debug
        self.tempdir = tempdir
        self.cwd = cwd

        self.ezero = self.esalt = None  # raw results assigned by run()
Exemplo n.º 10
0
    def __init__(self,
                 rec=None,
                 lig=None,
                 com=None,
                 out=None,
                 cr=None,
                 var='s',
                 vrange=[0],
                 jack=0,
                 zfilter=None,
                 clean=0,
                 all=1,
                 exrec=[],
                 exlig=[],
                 excom=[],
                 hosts=cpus_all,
                 niceness=nice_dic,
                 w=0,
                 a=1,
                 debug=0,
                 restart=0,
                 **kw):
        """
        @param rec: free rec trajectory              [required]
        @type  rec: str
        @param lig: free lig trajectory              [required]
        @type  lig: str
        @param com: complex trajectory               [required]
        @type  com: str
        @param out: file name for pickled result     [required]
        @type  out: str
        @param cr: chains of receptor in complex trajectory [n_chains rec]
        @type  cr: [int]

        @param var: name of variable option [ s ]
        @type  var: str
        @param vrange: set of values used for variable option
                       OR 'start:stop:step', string convertable to
                       range() input
        @type  vrange: [any]
        @param jack: set up leave-one-trajectory-out jackknife test
                     (default: 0) (replaces var with 'ex1' and vrange with
                     range(1,n_members+1))
        @type  jack: [0|1]

        @param zfilter: kick out outlyer trajectories using z-score threshold
                        on RMSD trace (default: None->don't)
        @type  zfilter: float
        @param clean: remove pickled ref models and member trajectories
                      (default: 0)
        @type  clean: 0|1
        @param all: skip single member trajs (default: 1)
        @type  all: 0|1

        @param exrec: exclude certain members of receptor ensemble    [[]]
        @type  exrec: [int]
        @param exlig: exclude certain members of ligand  ensemble     [[]]
        @type  exlig: [int]
        @param excom: exclude certain members of complex ensemble     [[]]
        @type  excom: [int]

        @param hosts: nodes to be used (default: all known)
        @type  hosts: [str]
        @param debug: don't delete output files (default: 0)
        @type  debug: 1|0


        @param kw: additional key=value parameters for AmberEntropist,
                   AmberCrdEntropist, Executor and Master.
        @type  kw: key=value pairs
        ::
          ... parameters for AmberEntropist
          cast    - 1|0, equalize free and bound atom content [1]
          s,e     - int, start and stop frame                 [0, to end]
          atoms   - [ str ], names of atoms to consider       [all]
          protein - 1|0, remove non-protein atoms             [0..don't]
          step    - int, frame offset                         [no offset]
          thin    - float, use randomly distributed fraction of frames [all]
                    (similar to step but perhaps better for entropy
                    calculations)
          ex      - [int] OR ([int],[int]), exclude member trajectories   [[]]
          ex_n    - int, exclude last n members  OR...                  [None]
          ex3     - int, exclude |ex3|rd tripple of trajectories          [0]
                    (index starts with 1! 0 to exclude nothing)

          ... parameters for AmberCrdEntropist
          f_template - str, alternative ptraj input template  [default]
  
          ... parameters for Executor:
          log      - Biskit.LogFile, program log (None->STOUT)        [None]
          verbose  - 0|1, print progress messages to log     [log != STDOUT]
 
          ... parameters for Master
          w        - 0|1, show X window for each slave [0]
          a        - 0|1, add hosts to PVM [1]
        """
        ## normal and error output
        self.fout = T.absfile(out)
        self.ferror = os.path.dirname(self.fout) + '/AmberEntropy_errors.log'
        self.debug = debug

        self.log = StdLog()

        ## input files and variable option
        self.rec = T.absfile(rec, 0)
        self.lig = T.absfile(lig, 0)
        self.com = T.absfile(com, 0)
        self.cr = cr
        self.cl = None
        self.var = var
        self.vrange = self.__vrange(vrange)
        self.jack = jack
        self.zfilter = zfilter
        self.n_members = None
        self.clean = clean
        self.all = all

        ## members to exclude, outliers will be added if zfilter is not None
        self.ex_frec = exrec
        self.ex_flig = exlig
        self.ex_com = excom

        ## reserve for loaded reference models
        self.ref_frec = self.ref_flig = None
        self.ref_brec = self.ref_blig = self.ref_com = None

        ## reserve for extracted member trajectories
        self.members_frec = self.members_flig = []
        self.members_brec = self.members_blig = []

        ## options to be passed on to AmberEntropist
        self.options = kw

        if not restart:
            ## Load trajectories, dump references, identify outliers
            self.processTrajs()

            ## prepare dict of protocols for AmberEntropist
            self.protocols = self.protocols_var_range(**kw)
            self.saveProtocols()

        TrackingJobMaster.__init__(self,
                                   self.protocols,
                                   chunk_size=1,
                                   hosts=hosts,
                                   niceness=niceness,
                                   slave_script=slave_path,
                                   show_output=w,
                                   add_hosts=a)

        print "JobMaster initialized."
Exemplo n.º 11
0
class AmberEntropyMaster(TrackingJobMaster):
    """
    Run many AmberEntropist calculations on many nodes. The Master has
    a standard set of 13 protocols to run on rec, lig, and com
    trajectories, as well as on every single member trajectory - in
    total 113.  It accepts one variable parameter, e.g. s(tart). Each
    protocol is then run for all values of the variable parameter. A
    protocol is simply a set of options that are passed on to the
    AmberEntropist (which is run from within AmberEntropySlave).
    Comparing the different protocols allows to more or less separate
    random from real correlations, rigid body from intermolecular
    vibrations, etc.

    Results are put into a tree-shaped dictionary of dictionaries. The
    first dimension/key is the member index -- None for the complete
    ensemble trajectory, 0 for the first member, etc. The second
    dimension/key is the name of the protocol, e.g. 'com_split' for
    the complex trajectory with seperately fitted receptor and
    ligand. The last dimension contains the different values obtained
    from the ptraj run, e.g. 'S_total' points to the total entropy in
    cal/mol/K, 'contributions' to the entropy contribution of each
    mode, 'T' to the assumed temperature, 'vibes' gives the number of
    vibrations with too low frequencies (according to ptraj). All these
    are lists of values - one for each value of the variable option.

    Example::
             * r[None]['fcom']['S_vibes'][0] -> float
               first vibr. Entropy of free fake complex for complete ensemble
             * r[0]['com']['S_total'] -> [ float, float, .. ]
               the total entropies of the complex calculated for the first
               ensemble member and the different values of the variable option
    """
    def __init__(self,
                 rec=None,
                 lig=None,
                 com=None,
                 out=None,
                 cr=None,
                 var='s',
                 vrange=[0],
                 jack=0,
                 zfilter=None,
                 clean=0,
                 all=1,
                 exrec=[],
                 exlig=[],
                 excom=[],
                 hosts=cpus_all,
                 niceness=nice_dic,
                 w=0,
                 a=1,
                 debug=0,
                 restart=0,
                 **kw):
        """
        @param rec: free rec trajectory              [required]
        @type  rec: str
        @param lig: free lig trajectory              [required]
        @type  lig: str
        @param com: complex trajectory               [required]
        @type  com: str
        @param out: file name for pickled result     [required]
        @type  out: str
        @param cr: chains of receptor in complex trajectory [n_chains rec]
        @type  cr: [int]

        @param var: name of variable option [ s ]
        @type  var: str
        @param vrange: set of values used for variable option
                       OR 'start:stop:step', string convertable to
                       range() input
        @type  vrange: [any]
        @param jack: set up leave-one-trajectory-out jackknife test
                     (default: 0) (replaces var with 'ex1' and vrange with
                     range(1,n_members+1))
        @type  jack: [0|1]

        @param zfilter: kick out outlyer trajectories using z-score threshold
                        on RMSD trace (default: None->don't)
        @type  zfilter: float
        @param clean: remove pickled ref models and member trajectories
                      (default: 0)
        @type  clean: 0|1
        @param all: skip single member trajs (default: 1)
        @type  all: 0|1

        @param exrec: exclude certain members of receptor ensemble    [[]]
        @type  exrec: [int]
        @param exlig: exclude certain members of ligand  ensemble     [[]]
        @type  exlig: [int]
        @param excom: exclude certain members of complex ensemble     [[]]
        @type  excom: [int]

        @param hosts: nodes to be used (default: all known)
        @type  hosts: [str]
        @param debug: don't delete output files (default: 0)
        @type  debug: 1|0


        @param kw: additional key=value parameters for AmberEntropist,
                   AmberCrdEntropist, Executor and Master.
        @type  kw: key=value pairs
        ::
          ... parameters for AmberEntropist
          cast    - 1|0, equalize free and bound atom content [1]
          s,e     - int, start and stop frame                 [0, to end]
          atoms   - [ str ], names of atoms to consider       [all]
          protein - 1|0, remove non-protein atoms             [0..don't]
          step    - int, frame offset                         [no offset]
          thin    - float, use randomly distributed fraction of frames [all]
                    (similar to step but perhaps better for entropy
                    calculations)
          ex      - [int] OR ([int],[int]), exclude member trajectories   [[]]
          ex_n    - int, exclude last n members  OR...                  [None]
          ex3     - int, exclude |ex3|rd tripple of trajectories          [0]
                    (index starts with 1! 0 to exclude nothing)

          ... parameters for AmberCrdEntropist
          f_template - str, alternative ptraj input template  [default]
  
          ... parameters for Executor:
          log      - Biskit.LogFile, program log (None->STOUT)        [None]
          verbose  - 0|1, print progress messages to log     [log != STDOUT]
 
          ... parameters for Master
          w        - 0|1, show X window for each slave [0]
          a        - 0|1, add hosts to PVM [1]
        """
        ## normal and error output
        self.fout = T.absfile(out)
        self.ferror = os.path.dirname(self.fout) + '/AmberEntropy_errors.log'
        self.debug = debug

        self.log = StdLog()

        ## input files and variable option
        self.rec = T.absfile(rec, 0)
        self.lig = T.absfile(lig, 0)
        self.com = T.absfile(com, 0)
        self.cr = cr
        self.cl = None
        self.var = var
        self.vrange = self.__vrange(vrange)
        self.jack = jack
        self.zfilter = zfilter
        self.n_members = None
        self.clean = clean
        self.all = all

        ## members to exclude, outliers will be added if zfilter is not None
        self.ex_frec = exrec
        self.ex_flig = exlig
        self.ex_com = excom

        ## reserve for loaded reference models
        self.ref_frec = self.ref_flig = None
        self.ref_brec = self.ref_blig = self.ref_com = None

        ## reserve for extracted member trajectories
        self.members_frec = self.members_flig = []
        self.members_brec = self.members_blig = []

        ## options to be passed on to AmberEntropist
        self.options = kw

        if not restart:
            ## Load trajectories, dump references, identify outliers
            self.processTrajs()

            ## prepare dict of protocols for AmberEntropist
            self.protocols = self.protocols_var_range(**kw)
            self.saveProtocols()

        TrackingJobMaster.__init__(self,
                                   self.protocols,
                                   chunk_size=1,
                                   hosts=hosts,
                                   niceness=niceness,
                                   slave_script=slave_path,
                                   show_output=w,
                                   add_hosts=a)

        print "JobMaster initialized."

    def __vrange(self, v):
        """
        Interprete the vrange option -> [ int ] or [ float ]

        @param v: vrange option
        @type  v: lst OR str
        
        @return: range option
        @rtype: [int] OR [float]
        """
        if type(v) is list:
            return [self.__float_int(x) for x in v]
        if type(v) is str and ':' in v:
            v = tuple([self.__float_int(x) for x in v.split(':')])
            return N0.arange(*v)

        return self.__float_int(v)

    def __float_int(self, v):
        """
        Convert v to int or, if necessary, float

        @param v: value
        @type  v: any

        @return: converted value
        @rtype: int OR float        
        """
        if float(v) % 1. != 0:
            return float(v)
        return int(float(v))

    def loadTraj(self, fname, outliers=[], refname=None):
        """
        Load trajectory from file.
        
        @param fname: path to trajectory
        @type  fname: str
        @param outliers: Identify outlier trajectories (default: [], identify)
        @type  outliers: [int] OR [] 
        @param refname: name of reference (efault: None)
        @type  refname: str
        
        @return: t, outliers, members
        @rtype: trajectoty, [int], [int]
        """
        self.log.add('Loading ' + fname)
        t = T.load(fname)

        t.ref.addChainId()
        t = t.compressAtoms(t.ref.maskProtein())

        outliers = self.getOutliers(t, outliers)

        if refname:
            self.dumpMissing(t.ref, refname)

        members = None
        if not self.all:
            members = self.dumpMembers(t, self.rec)

        return t, outliers, members

    def processTrajs(self):
        """
        Extract reference model and member trajectories from rec, lig, and
        com trajectories. Identify outlier member trajectories, if requested.
        """
        ## free rec
        self.ref_frec = self.nameRef(self.rec)

        t, self.ex_frec, self.members_frec = self.loadTraj(
            self.rec, self.ex_frec, self.ref_frec)

        n_rec_members = t.n_members
        self.cr = self.cr or range(t.ref.lenChains(breaks=0))
        del t

        ## free lig
        self.ref_flig = self.nameRef(self.lig)

        t, self.ex_flig, self.members_flig = self.loadTraj(
            self.lig, self.ex_flig, self.ref_flig)

        n_lig_members = t.n_members
        del t

        ## complex
        fname = T.stripSuffix(T.absfile(self.com, resolveLinks=0))
        self.ref_com = fname + '_ref.complex'
        self.ref_blig = fname + '_blig.model'
        self.ref_brec = fname + '_brec.model'

        t, self.ex_com, self.members_com = self.loadTraj(self.com, self.ex_com)

        n_com_members = t.n_members

        self.cl = self.cl or MU.difference(range(t.ref.lenChains()), self.cr)
        rec = t.ref.takeChains(self.cr, breaks=0)
        lig = t.ref.takeChains(self.cl, breaks=0)

        del t
        self.dumpMissing(Complex(rec, lig), self.ref_com)
        self.dumpMissing(rec, self.ref_brec)
        self.dumpMissing(lig, self.ref_blig)

        self.equalizeMemberCount(n_rec_members, n_lig_members, n_com_members)

        if self.jack: self.prepareJackknife()

    def equalizeMemberCount(self, n_rec, n_lig, n_com):
        """
        Ensure we keep equal number of members trajectories from frec,
        flig, and com.

        @param n_rec: number of receptor trajectories
        @type  n_rec: int
        @param n_lig: number of ligand trajectories
        @type  n_lig: int
        @param n_com: number of complex trajectories
        @type  n_com: int
        """
        ex = [self.ex_frec, self.ex_flig, self.ex_com]
        n_members = [n_rec, n_lig, n_com]

        ## pair list of excluded members with number of remaining members
        ex = [(ex[i], n_members[i] - len(ex[i])) for i in range(3)]

        ## lowest number of members after exclusion
        n_min = min([x[1] for x in ex])

        self.log.add('excluding non-outliers to match member count: ')

        label = ['com', 'lig', 'rec']

        for x, n in ex:
            i = 0
            s = label.pop()

            while n > n_min:
                self.log.write('%s: ' % s)
                if not i in x:
                    x.append(i)
                    n -= 1
                    self.log.write('%i, ' % i)
                i += 1

            self.log.add('')

        self.n_members = n_min

    def prepareJackknife(self):
        """
        Prepare leave-one-trajectory-out jackknife test.
        """
        self.vrange = range(self.n_members + 1)  ## 0: exclude nothing
        self.var = 'ex1'

    def nameRef(self, fname):
        fname = T.stripSuffix(T.absfile(fname, resolveLinks=0))
        return fname + '_ref.model'

    def nameRefCom(self, fname):
        fname = T.stripSuffix(T.absfile(fname, resolveLinks=0))
        return fname + '_ref.complex'

    def dumpMissing(self, o, fname):
        """
        Pickle *o* to path *fname*, if it is not already there.

        @param o: object to dump
        @type  o: any
        @param fname: file name
        @type  fname: str
        
        @return: file name
        @rtype: str
        """
        if os.path.exists(fname):
            self.log.add('using existing ' + fname)
        else:
            self.log.add('Saving ' + fname)
            T.dump(o, fname)

        return fname

    def getOutliers(self, traj, outlaws=[]):
        """
        Identify member trajectories that haved moved much further than normal.

        @param traj: Trajectory to analyze
        @type  traj: Trajectory
        @param outlaws: members already marked for exclusion
        @type  outlaws: [int]

        @return: member indices of outlyer trajectories (plus outlaws)
        @rtype: [int]
        """
        if not self.zfilter:
            return outlaws

        outliers = N0.nonzero(
            traj.outliers(z=self.zfilter, mask=traj.ref.maskCA(), step=10))
        self.log.add('identified %i outliers with z-threshold %3.1f' %\
                     ( len(outliers), self.zfilter ) )

        return MU.union(outliers, outlaws)

    def dumpMembers(self, traj, fname):
        """
        Dump ensemble member trajectories
        
        @param traj: Trajectory to dump
        @type  traj: Trajectory
        @param fname: trajectory file name - used to derrive name for members
        @type  fname: str'
        
        @return: list of trajectory files
        @rtype: [str]
        """
        fname = T.stripSuffix(T.absfile(fname, resolveLinks=0))
        members = range(traj.n_members)

        r = []
        for n in members:
            f = fname + '_member_%02i.traj' % n
            if os.path.exists(f):
                self.log.add('using existing ' + f)
            else:
                self.log.write('saving ' + f + '...')
                m = traj.takeMember(n)
                T.dump(m, f)
                self.log.add('done')
            r += [f]

        return r

    def getInitParameters(self, slave_tid):
        """
        hand over parameters to slave once.

        @param slave_tid: slave task id
        @type  slave_tid: int

        @return: dictionary with init parameters
        @rtype: {param:value}
        """
        host = self.hostnameFromTID(slave_tid)
        nice = self.niceness.get(host, self.niceness.get('default', 0))

        return {
            'ferror': self.ferror,
            'debug': self.debug,
            'nice': nice,
            'host': host
        }

    def cleanup(self):
        """
        Tidy up
        """
        if self.clean:
            self.cleanCache()

    def cleanCache(self):
        """
        Remove left-over cache files
        """
        fs = [
            self.ref_frec, self.ref_flig, self.ref_com, self.ref_brec,
            self.ref_blig
        ]
        fs.extend(self.members_frec + self.members_flig)
        fs.extend(self.members_brec + self.members_blig)
        fs.extend(self.members_com)

        for f in fs:
            self.log.add('removing %s: %i' % (f, T.tryRemove(f)))

    def saveProtocols(self):
        """
        Save protocol to file.
        """
        f_prot = T.stripSuffix(T.absfile(self.fout)) + '_protocols.dat'
        self.log.write('Saving parameters to %s...' % f_prot)
        T.dump(self.protocols, f_prot)

    def done(self):
        """
        Write result to file.
        """
        tree = self.getResult()
        self.log.add("Saving result to %s..." % self.fout)
        T.dump(tree, self.fout)
        self.log.add("Done")

    ##
    ## Assemble the protocols for many AmberEntropist runs
    ##
    def __cpupdate(self, d1, d2):
        """
        Merge 2 dictionaries *d1* and *d2* and return a copy
        """
        r = copy.copy(d1)
        r.update(d2)
        return r

    def protocols_standard(self,
                           trec,
                           tlig,
                           tcom,
                           ex_frec=None,
                           ex_flig=None,
                           ex_com=None,
                           doshift=1,
                           **options):
        """
        Create 13 parameter sets for AmberEntropist that cover the calculation
        of rec, lig, com and fcom entropies with and without splitting of the
        complex, with and without shifting and shuffling of frames.
        
        @param options: additional options (like cast, s, e, atoms, thin, step)
                        that are the same in all parameter sets
        @type  options: key=value
        
        @return: each value of the returned dict contains a set of
                 arguments for one AmberEntropist run
        @rtype: dict of dict
        """
        fcp = self.__cpupdate
        r = {}
        S = self  ## make rest more readable

        d = {
            'ref': None,
            'cast': 1,
            'chains': None,
            'split': 0,
            'shift': 0,
            'shuffle': 0,
            'ex_n': 0,
            'ex3': None,
            'thin': None,
            'step': 1,
            'ss': 0,
            'se': None,
            'atoms': None
        }
        d.update(options)

        r['frec'] = fcp(d, {'traj': trec, 'ref': S.ref_brec, 'ex': ex_frec})
        r['flig'] = fcp(d, {'traj': tlig, 'ref': S.ref_blig, 'ex': ex_flig})
        r['brec'] = fcp(d, {
            'traj': tcom,
            'ref': S.ref_frec,
            'ex': ex_com,
            'chains': S.cr
        })
        r['blig'] = fcp(d, {
            'traj': tcom,
            'ref': S.ref_flig,
            'ex': ex_com,
            'chains': S.cl
        })

        r['fcom'] = fcp(
            d, {
                'traj': '%s+%s' % (trec, tlig),
                'ex': (ex_frec, ex_flig),
                'ref': S.ref_com,
                'split': 1
            })

        ##         if doshift:
        ##             r['fcom_shift'] = fcp( r['fcom'], {'shift':1 } )

        r['fcom_shuff'] = fcp(r['fcom'], {'shuffle': 1})

        r['com'] = fcp(d, {
            'traj': tcom,
            'ex': ex_com,
            'ref': '%s+%s' % (S.ref_frec, S.ref_flig)
        })

        r['com_split'] = fcp(r['com'], {'split': 1, 'border': S.cl[0]})
        ##      r['com_shuff'] = fcp( r['com'], { 'shuffle':1, 'border':S.cl[0] } )
        r['com_split_shuff'] = fcp(r['com'], {
            'split': 1,
            'shuffle': 1,
            'border': S.cl[0]
        })
        if doshift:
            ##             r['com_shift'] = fcp( r['com'], { 'shift':1,'border':S.cl[0] } )
            r['com_split_shift'] = fcp(r['com'], {
                'split': 1,
                'shift': 1,
                'border': S.cl[0]
            })

        return r

    def protocols_single_all(self, **options):
        """
        Set of protocols for all-member trajectories AND single-member traj.
        with the different shuffle, shift, split settings.
        Usually 11 x 13 protocols for AmberEntropist (10 members and 1 for all)
        
        @param options: additional options (like cast, s, e, atoms, thin, step)
                        that are the same in all parameter sets
        @type  options: key=value
        
        @return: each value of the returned dict contains a set of arguments
                 for one AmberEntropist run, each key is a tuple of the
                 member index and the protocol name, i.e. (0, 'fcom_shuffle')
                 The set of protocols for all-member trajectories has member
                 index None.
        @rtype: dict of dict
        """
        r = {}
        ## put all-member protocolls under member index 'None'
        prots = self.protocols_standard(self.rec, self.lig, self.com,
                                        self.ex_frec, self.ex_flig,
                                        self.ex_com, **options)
        for k, p in prots.items():
            r[(None, k)] = p

        if not self.all:
            ## put single-member protocols under their respective member index
            for i in range(len(self.members_frec)):
                prots = self.protocols_standard(self.members_frec[i],
                                                self.members_flig[i],
                                                self.members_com[i],
                                                doshift=0,
                                                **options)
                for k, p in prots.items():
                    r[(i, k)] = p

        return r

    def protocols_var_range(self, **options):
        """
        Complete set of protocols also considering different values of the
        variable option.
        """
        self.log.add( 'variable option %s with %i values' \
                      % (self.var, len(self.vrange)))

        r = {}
        for v in self.vrange:
            d = copy.copy(options)
            d[self.var] = v

            prots = self.protocols_single_all(**d)

            for k, p in prots.items():
                r[(v, ) + k] = p

        return r

    ##
    ## Re-organize results
    ##
    def dictionate(self, d):
        """
        Take dict with tuple keys (value, int_member, str_protocol) and build
        a tree-like dict of dicts in which the values of d can be accessed
        like::
          d[value][int_member][str_protocol]

        @param d: the raw results accumulated from the slave nodes
        @type d: dict

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of dict
        """
        r = {}

        keys = d.keys()

        ## only convert single value tuple keys into non-tuple keys
        if len(keys[0]) == 1:
            for k in keys:
                r[k[0]] = d[k]
            return r

        x_values = MU.nonredundant([k[0] for k in keys])

        for x in x_values:

            sub_keys = [k for k in keys if k[0] == x]
            y_values = MU.nonredundant([k[1:] for k in sub_keys])

            r[x] = {}
            for y in y_values:
                r[x][y] = d[(x, ) + y]

            r[x] = self.dictionate(r[x])

        return r

    def getResult(self, **arg):
        """
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        """
        tree = self.dictionate(self.result)

        vvalues = tree.keys()
        vvalues.sort()

        keys = self.result.keys()
        sub_keys = [k for k in keys if k[0] == vvalues[0]]

        r = {}
        for v, member, protcl in sub_keys:

            try:
                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
            except:
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange'] = self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r
Exemplo n.º 12
0
    def conservationScore( self, cons_type='cons_ent', ranNr=150,
                           log=StdLog(), verbose=1 ):
        """
        Score of conserved residue pairs in the interaction surface.
        Optionally, normalized by radom surface contacts.

        @param cons_type: precalculated conservation profile name,
                          see L{Biskit.PDBDope}.
        @type  cons_type: str
        @param ranNr: number of random matricies to use (default: 150)
        @type  ranNr: int
        @param log: log file [STDOUT]
        @type  log: Biskit.LogFile
        @param verbose: give progress report [1]
        @type  verbose: bool | int

        @return: conservation score
        @rtype: float
        """
        try:
            recCons = self.rec().profile( cons_type, updateMissing=1 )
        except:
            if verbose:
                log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\
                        '*'*30+'\n')
            recCons = N.ones( self.rec().lenResidues() )
        try:
            ligCons = self.lig().profile( cons_type, updateMissing=1 )
        except:
            if verbose:
                log.add(\
                            '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n')
            ligCons = N.ones( self.lig().lenResidues() )

        if self.rec().profile( 'surfMask' ):
            recSurf = self.rec().profile( 'surfMask' )
        else:
            d = PDBDope(self.rec())
            d.addSurfaceMask()

        if self.lig().profile( 'surfMask' ):
            ligSurf = self.lig().profile( 'surfMask' )
        else:
            d = PDBDope(self.lig())
            d.addSurfaceMask()

        surfMask = N.ravel(N.outerproduct( recSurf, ligSurf ))

        missing = N.outerproduct( N.equal( recCons, 0), N.equal(ligCons,0))

        cont = self.resContacts() * N.logical_not(missing)

        consMat = N.outerproduct( recCons, ligCons )

        score = cont* consMat

        # get a random score
        if ranNr != 0:
            if self.verbose:
                self.log.write('.')
            ranMat =  mathUtils.random2DArray( cont, ranNr, mask=surfMask )
            random_score = N.sum(N.sum( ranMat * consMat ))/( ranNr*1.0 )
            return N.sum(N.sum(score))/random_score

        else:
            return N.sum(N.sum(score))/ N.sum(N.sum(cont))
Exemplo n.º 13
0
    def __init__(self, rec=None, lig=None, com=None, out=None,
                 cr=None, var='s', vrange=[0], jack=0,
                 zfilter=None, clean=0, all=1,
                 exrec=[], exlig=[], excom=[],
                 hosts=cpus_all,
                 niceness=nice_dic,
                 w=0, a=1, debug=0,
                 restart=0,
                 **kw ):
        """
        @param rec: free rec trajectory              [required]
        @type  rec: str
        @param lig: free lig trajectory              [required]
        @type  lig: str
        @param com: complex trajectory               [required]
        @type  com: str
        @param out: file name for pickled result     [required]
        @type  out: str
        @param cr: chains of receptor in complex trajectory [n_chains rec]
        @type  cr: [int]

        @param var: name of variable option [ s ]
        @type  var: str
        @param vrange: set of values used for variable option
                       OR 'start:stop:step', string convertable to
                       range() input
        @type  vrange: [any]
        @param jack: set up leave-one-trajectory-out jackknife test
                     (default: 0) (replaces var with 'ex1' and vrange with
                     range(1,n_members+1))
        @type  jack: [0|1]

        @param zfilter: kick out outlyer trajectories using z-score threshold
                        on RMSD trace (default: None->don't)
        @type  zfilter: float
        @param clean: remove pickled ref models and member trajectories
                      (default: 0)
        @type  clean: 0|1
        @param all: skip single member trajs (default: 1)
        @type  all: 0|1

        @param exrec: exclude certain members of receptor ensemble    [[]]
        @type  exrec: [int]
        @param exlig: exclude certain members of ligand  ensemble     [[]]
        @type  exlig: [int]
        @param excom: exclude certain members of complex ensemble     [[]]
        @type  excom: [int]

        @param hosts: nodes to be used (default: all known)
        @type  hosts: [str]
        @param debug: don't delete output files (default: 0)
        @type  debug: 1|0


        @param kw: additional key=value parameters for AmberEntropist,
                   AmberCrdEntropist, Executor and Master.
        @type  kw: key=value pairs
        ::
          ... parameters for AmberEntropist
          cast    - 1|0, equalize free and bound atom content [1]
          s,e     - int, start and stop frame                 [0, to end]
          atoms   - [ str ], names of atoms to consider       [all]
          protein - 1|0, remove non-protein atoms             [0..don't]
          step    - int, frame offset                         [no offset]
          thin    - float, use randomly distributed fraction of frames [all]
                    (similar to step but perhaps better for entropy
                    calculations)
          ex      - [int] OR ([int],[int]), exclude member trajectories   [[]]
          ex_n    - int, exclude last n members  OR...                  [None]
          ex3     - int, exclude |ex3|rd tripple of trajectories          [0]
                    (index starts with 1! 0 to exclude nothing)

          ... parameters for AmberCrdEntropist
          f_template - str, alternative ptraj input template  [default]
  
          ... parameters for Executor:
          log      - Biskit.LogFile, program log (None->STOUT)        [None]
          verbose  - 0|1, print progress messages to log     [log != STDOUT]
 
          ... parameters for Master
          w        - 0|1, show X window for each slave [0]
          a        - 0|1, add hosts to PVM [1]
        """
        ## normal and error output
        self.fout = T.absfile( out )
        self.ferror = os.path.dirname(self.fout) +'/AmberEntropy_errors.log'
        self.debug = debug

        self.log = StdLog()

        ## input files and variable option
        self.rec = T.absfile( rec, 0 )
        self.lig = T.absfile( lig, 0 )
        self.com = T.absfile( com, 0 )
        self.cr  = cr
        self.cl  = None
        self.var    = var
        self.vrange = self.__vrange( vrange )
        self.jack = jack
        self.zfilter = zfilter
        self.n_members = None
        self.clean = clean
        self.all = all

        ## members to exclude, outliers will be added if zfilter != None
        self.ex_frec = exrec
        self.ex_flig = exlig
        self.ex_com  = excom

        ## reserve for loaded reference models
        self.ref_frec = self.ref_flig = None
        self.ref_brec = self.ref_blig = self.ref_com  = None

        ## reserve for extracted member trajectories
        self.members_frec = self.members_flig = []
        self.members_brec = self.members_blig = []

        ## options to be passed on to AmberEntropist
        self.options = kw

        if not restart:
            ## Load trajectories, dump references, identify outliers
            self.processTrajs()

            ## prepare dict of protocols for AmberEntropist
            self.protocols = self.protocols_var_range( **kw )
            self.saveProtocols()

        TrackingJobMaster.__init__(self, self.protocols,
                                   chunk_size=1,
                                   hosts=hosts,
                                   niceness=niceness,
                                   slave_script=slave_path,
                                   show_output=w,
                                   add_hosts=a)

        print "JobMaster initialized."
Exemplo n.º 14
0
class AmberEntropyMaster(TrackingJobMaster):
    """
    Run many AmberEntropist calculations on many nodes. The Master has
    a standard set of 13 protocols to run on rec, lig, and com
    trajectories, as well as on every single member trajectory - in
    total 113.  It accepts one variable parameter, e.g. s(tart). Each
    protocol is then run for all values of the variable parameter. A
    protocol is simply a set of options that are passed on to the
    AmberEntropist (which is run from within AmberEntropySlave).
    Comparing the different protocols allows to more or less separate
    random from real correlations, rigid body from intermolecular
    vibrations, etc.

    Results are put into a tree-shaped dictionary of dictionaries. The
    first dimension/key is the member index -- None for the complete
    ensemble trajectory, 0 for the first member, etc. The second
    dimension/key is the name of the protocol, e.g. 'com_split' for
    the complex trajectory with seperately fitted receptor and
    ligand. The last dimension contains the different values obtained
    from the ptraj run, e.g. 'S_total' points to the total entropy in
    cal/mol/K, 'contributions' to the entropy contribution of each
    mode, 'T' to the assumed temperature, 'vibes' gives the number of
    vibrations with too low frequencies (according to ptraj). All these
    are lists of values - one for each value of the variable option.

    Example::
             * r[None]['fcom']['S_vibes'][0] -> float
               first vibr. Entropy of free fake complex for complete ensemble
             * r[0]['com']['S_total'] -> [ float, float, .. ]
               the total entropies of the complex calculated for the first
               ensemble member and the different values of the variable option
    """

    def __init__(self, rec=None, lig=None, com=None, out=None,
                 cr=None, var='s', vrange=[0], jack=0,
                 zfilter=None, clean=0, all=1,
                 exrec=[], exlig=[], excom=[],
                 hosts=cpus_all,
                 niceness=nice_dic,
                 w=0, a=1, debug=0,
                 restart=0,
                 **kw ):
        """
        @param rec: free rec trajectory              [required]
        @type  rec: str
        @param lig: free lig trajectory              [required]
        @type  lig: str
        @param com: complex trajectory               [required]
        @type  com: str
        @param out: file name for pickled result     [required]
        @type  out: str
        @param cr: chains of receptor in complex trajectory [n_chains rec]
        @type  cr: [int]

        @param var: name of variable option [ s ]
        @type  var: str
        @param vrange: set of values used for variable option
                       OR 'start:stop:step', string convertable to
                       range() input
        @type  vrange: [any]
        @param jack: set up leave-one-trajectory-out jackknife test
                     (default: 0) (replaces var with 'ex1' and vrange with
                     range(1,n_members+1))
        @type  jack: [0|1]

        @param zfilter: kick out outlyer trajectories using z-score threshold
                        on RMSD trace (default: None->don't)
        @type  zfilter: float
        @param clean: remove pickled ref models and member trajectories
                      (default: 0)
        @type  clean: 0|1
        @param all: skip single member trajs (default: 1)
        @type  all: 0|1

        @param exrec: exclude certain members of receptor ensemble    [[]]
        @type  exrec: [int]
        @param exlig: exclude certain members of ligand  ensemble     [[]]
        @type  exlig: [int]
        @param excom: exclude certain members of complex ensemble     [[]]
        @type  excom: [int]

        @param hosts: nodes to be used (default: all known)
        @type  hosts: [str]
        @param debug: don't delete output files (default: 0)
        @type  debug: 1|0


        @param kw: additional key=value parameters for AmberEntropist,
                   AmberCrdEntropist, Executor and Master.
        @type  kw: key=value pairs
        ::
          ... parameters for AmberEntropist
          cast    - 1|0, equalize free and bound atom content [1]
          s,e     - int, start and stop frame                 [0, to end]
          atoms   - [ str ], names of atoms to consider       [all]
          protein - 1|0, remove non-protein atoms             [0..don't]
          step    - int, frame offset                         [no offset]
          thin    - float, use randomly distributed fraction of frames [all]
                    (similar to step but perhaps better for entropy
                    calculations)
          ex      - [int] OR ([int],[int]), exclude member trajectories   [[]]
          ex_n    - int, exclude last n members  OR...                  [None]
          ex3     - int, exclude |ex3|rd tripple of trajectories          [0]
                    (index starts with 1! 0 to exclude nothing)

          ... parameters for AmberCrdEntropist
          f_template - str, alternative ptraj input template  [default]
  
          ... parameters for Executor:
          log      - Biskit.LogFile, program log (None->STOUT)        [None]
          verbose  - 0|1, print progress messages to log     [log != STDOUT]
 
          ... parameters for Master
          w        - 0|1, show X window for each slave [0]
          a        - 0|1, add hosts to PVM [1]
        """
        ## normal and error output
        self.fout = T.absfile( out )
        self.ferror = os.path.dirname(self.fout) +'/AmberEntropy_errors.log'
        self.debug = debug

        self.log = StdLog()

        ## input files and variable option
        self.rec = T.absfile( rec, 0 )
        self.lig = T.absfile( lig, 0 )
        self.com = T.absfile( com, 0 )
        self.cr  = cr
        self.cl  = None
        self.var    = var
        self.vrange = self.__vrange( vrange )
        self.jack = jack
        self.zfilter = zfilter
        self.n_members = None
        self.clean = clean
        self.all = all

        ## members to exclude, outliers will be added if zfilter != None
        self.ex_frec = exrec
        self.ex_flig = exlig
        self.ex_com  = excom

        ## reserve for loaded reference models
        self.ref_frec = self.ref_flig = None
        self.ref_brec = self.ref_blig = self.ref_com  = None

        ## reserve for extracted member trajectories
        self.members_frec = self.members_flig = []
        self.members_brec = self.members_blig = []

        ## options to be passed on to AmberEntropist
        self.options = kw

        if not restart:
            ## Load trajectories, dump references, identify outliers
            self.processTrajs()

            ## prepare dict of protocols for AmberEntropist
            self.protocols = self.protocols_var_range( **kw )
            self.saveProtocols()

        TrackingJobMaster.__init__(self, self.protocols,
                                   chunk_size=1,
                                   hosts=hosts,
                                   niceness=niceness,
                                   slave_script=slave_path,
                                   show_output=w,
                                   add_hosts=a)

        print "JobMaster initialized."


    def __vrange( self, v ):
        """
        Interprete the vrange option -> [ int ] or [ float ]

        @param v: vrange option
        @type  v: lst OR str
        
        @return: range option
        @rtype: [int] OR [float]
        """
        if type( v ) is list:
            return [ self.__float_int(x) for x in v ]
        if type( v ) is str and ':' in v:
            v = tuple( [ self.__float_int(x) for x in v.split(':') ] )
            return N.arange( *v )

        return self.__float_int( v )


    def __float_int( self, v ):
        """
        Convert v to int or, if necessary, float

        @param v: value
        @type  v: any

        @return: converted value
        @rtype: int OR float        
        """
        if float(v) % 1. != 0:
            return float( v )
        return int( float(v) )


    def loadTraj( self, fname, outliers=[], refname=None  ):
        """
        Load trajectory from file.
        
        @param fname: path to trajectory
        @type  fname: str
        @param outliers: Identify outlier trajectories (default: [], identify)
        @type  outliers: [int] OR [] 
        @param refname: name of reference (efault: None)
        @type  refname: str
        
        @return: t, outliers, members
        @rtype: trajectoty, [int], [int]
        """
        self.log.add('Loading ' + fname )
        t = T.load( fname )

        t.ref.addChainId()
        t = t.compressAtoms( t.ref.maskProtein() )

        outliers = self.getOutliers( t, outliers )

        if refname:
            self.dumpMissing( t.ref, refname )

        members = None
        if not self.all:
            members = self.dumpMembers( t, self.rec )

        return t, outliers, members


    def processTrajs( self ):
        """
        Extract reference model and member trajectories from rec, lig, and
        com trajectories. Identify outlier member trajectories, if requested.
        """
        ## free rec
        self.ref_frec = self.nameRef( self.rec )

        t, self.ex_frec, self.members_frec = self.loadTraj(
            self.rec, self.ex_frec, self.ref_frec )

        n_rec_members = t.n_members
        self.cr       = self.cr or range( t.ref.lenChains( breaks=0 ) )
        del t

        ## free lig
        self.ref_flig = self.nameRef( self.lig )

        t, self.ex_flig, self.members_flig = self.loadTraj(
            self.lig, self.ex_flig, self.ref_flig )

        n_lig_members = t.n_members
        del t

        ## complex
        fname = T.stripSuffix( T.absfile( self.com, resolveLinks=0 ) )
        self.ref_com = fname + '_ref.complex'
        self.ref_blig= fname + '_blig.model'
        self.ref_brec= fname + '_brec.model'

        t, self.ex_com, self.members_com = self.loadTraj(
            self.com, self.ex_com  )

        n_com_members = t.n_members

        self.cl = self.cl or MU.difference( range(t.ref.lenChains()), self.cr)
        rec = t.ref.takeChains( self.cr, breaks=0 )
        lig = t.ref.takeChains( self.cl, breaks=0 )

        del t
        self.dumpMissing( Complex( rec, lig ), self.ref_com )
        self.dumpMissing( rec, self.ref_brec )
        self.dumpMissing( lig, self.ref_blig )

        self.equalizeMemberCount( n_rec_members, n_lig_members, n_com_members )

        if self.jack:  self.prepareJackknife()


    def equalizeMemberCount( self, n_rec, n_lig, n_com ):
        """
        Ensure we keep equal number of members trajectories from frec,
        flig, and com.

        @param n_rec: number of receptor trajectories
        @type  n_rec: int
        @param n_lig: number of ligand trajectories
        @type  n_lig: int
        @param n_com: number of complex trajectories
        @type  n_com: int
        """
        ex        = [ self.ex_frec, self.ex_flig, self.ex_com ]
        n_members = [ n_rec, n_lig, n_com ]

        ## pair list of excluded members with number of remaining members
        ex = [ ( ex[i], n_members[i] - len(ex[i]) ) for i in range(3) ]

        ## lowest number of members after exclusion
        n_min = min( [ x[1] for x in ex ] )

        self.log.add('excluding non-outliers to match member count: ')

        label = ['com','lig','rec']

        for x, n in ex:
            i = 0
            s = label.pop()

            while n > n_min:
                self.log.write( '%s: ' % s )
                if not i in x:
                    x.append( i )
                    n -= 1
                    self.log.write('%i, ' % i )
                i += 1

            self.log.add('')

        self.n_members = n_min


    def prepareJackknife( self ):
        """
        Prepare leave-one-trajectory-out jackknife test.
        """
        self.vrange = range( self.n_members + 1 )  ## 0: exclude nothing
        self.var = 'ex1'


    def nameRef( self, fname ):
        fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) )
        return fname + '_ref.model'


    def nameRefCom( self, fname ):
        fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) )
        return fname + '_ref.complex'


    def dumpMissing( self, o, fname ):
        """
        Pickle *o* to path *fname*, if it is not already there.

        @param o: object to dump
        @type  o: any
        @param fname: file name
        @type  fname: str
        
        @return: file name
        @rtype: str
        """
        if os.path.exists( fname ):
            self.log.add('using existing ' + fname )
        else:
            self.log.add('Saving ' + fname )
            T.dump( o, fname )

        return fname


    def getOutliers( self, traj, outlaws=[] ):
        """
        Identify member trajectories that haved moved much further than normal.

        @param traj: Trajectory to analyze
        @type  traj: Trajectory
        @param outlaws: members already marked for exclusion
        @type  outlaws: [int]

        @return: member indices of outlyer trajectories (plus outlaws)
        @rtype: [int]
        """
        if not self.zfilter:
            return outlaws

        outliers = N.nonzero( traj.outliers( z=self.zfilter,
                                             mask=traj.ref.maskCA(), step=10) )
        self.log.add('identified %i outliers with z-threshold %3.1f' %\
                     ( len(outliers), self.zfilter ) )

        return MU.union( outliers, outlaws )


    def dumpMembers( self, traj, fname  ):
        """
        Dump ensemble member trajectories
        
        @param traj: Trajectory to dump
        @type  traj: Trajectory
        @param fname: trajectory file name - used to derrive name for members
        @type  fname: str'
        
        @return: list of trajectory files
        @rtype: [str]
        """
        fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) )
        members = range( traj.n_members )

        r = []
        for n in members:
            f = fname + '_member_%02i.traj' % n
            if os.path.exists( f ):
                self.log.add('using existing ' + f )
            else:
                self.log.write('saving ' + f + '...')
                m = traj.takeMember( n )
                T.dump( m, f )
                self.log.add('done')
            r += [ f ]

        return r


    def getInitParameters(self, slave_tid):
        """
        hand over parameters to slave once.

        @param slave_tid: slave task id
        @type  slave_tid: int

        @return: dictionary with init parameters
        @rtype: {param:value}
        """
        host = self.hostnameFromTID( slave_tid )
        nice = self.niceness.get( host, self.niceness.get('default',0) )

        return {'ferror':self.ferror,
                'debug':self.debug, 'nice':nice, 'host':host}


    def cleanup( self ):
        """
        Tidy up
        """
        if self.clean:
            self.cleanCache()

    def cleanCache( self ):
        """
        Remove left-over cache files
        """
        fs = [ self.ref_frec, self.ref_flig, self.ref_com, self.ref_brec,
               self.ref_blig ]
        fs.extend( self.members_frec + self.members_flig )
        fs.extend( self.members_brec + self.members_blig )
        fs.extend( self.members_com )

        for f in fs:
            self.log.add('removing %s: %i' % (f, T.tryRemove(f)) )


    def saveProtocols( self ):
        """
        Save protocol to file.
        """
        f_prot = T.stripSuffix( T.absfile(self.fout) ) + '_protocols.dat'
        self.log.write( 'Saving parameters to %s...' % f_prot )
        T.dump( self.protocols, f_prot )


    def done(self):
        """
        Write result to file.
        """
        tree = self.getResult()
        self.log.add("Saving result to %s..." % self.fout)
        T.dump( tree, self.fout )
        self.log.add( "Done" )


    ##
    ## Assemble the protocols for many AmberEntropist runs
    ##
    def __cpupdate( self, d1, d2 ):
        """
        Merge 2 dictionaries *d1* and *d2* and return a copy
        """
        r = copy.copy( d1 )
        r.update( d2 )
        return r

    def protocols_standard( self, trec, tlig, tcom,
                            ex_frec=None, ex_flig=None, ex_com=None,
                            doshift=1,
                            **options ):
        """
        Create 13 parameter sets for AmberEntropist that cover the calculation
        of rec, lig, com and fcom entropies with and without splitting of the
        complex, with and without shifting and shuffling of frames.
        
        @param options: additional options (like cast, s, e, atoms, thin, step)
                        that are the same in all parameter sets
        @type  options: key=value
        
        @return: each value of the returned dict contains a set of
                 arguments for one AmberEntropist run
        @rtype: dict of dict
        """
        fcp = self.__cpupdate
        r = {}
        S = self  ## make rest more readable

        d = { 'ref':None, 'cast':1, 'chains':None,
              'split':0, 'shift':0, 'shuffle':0, 'ex_n':0, 'ex3':None,
              'thin':None, 'step':1, 'ss':0, 'se':None, 'atoms':None }
        d.update( options )

        r['frec'] = fcp( d, {'traj':trec, 'ref':S.ref_brec, 'ex':ex_frec } )
        r['flig'] = fcp( d, {'traj':tlig, 'ref':S.ref_blig, 'ex':ex_flig } )
        r['brec'] = fcp( d, {'traj':tcom, 'ref':S.ref_frec, 'ex':ex_com,
                             'chains':S.cr } )
        r['blig'] = fcp( d, {'traj':tcom, 'ref':S.ref_flig, 'ex':ex_com,
                             'chains':S.cl } )

        r['fcom'] = fcp( d, {'traj':'%s+%s'%(trec, tlig),
                             'ex':(ex_frec, ex_flig),
                             'ref':S.ref_com, 'split':1 } )

##         if doshift:
##             r['fcom_shift'] = fcp( r['fcom'], {'shift':1 } )

        r['fcom_shuff'] = fcp( r['fcom'], {'shuffle':1 } )

        r['com']  = fcp( d, {'traj':tcom, 'ex':ex_com,
                             'ref':'%s+%s' % (S.ref_frec, S.ref_flig) } )

        r['com_split'] = fcp( r['com'], { 'split':1,   'border':S.cl[0] } )
##      r['com_shuff'] = fcp( r['com'], { 'shuffle':1, 'border':S.cl[0] } )
        r['com_split_shuff'] = fcp( r['com'],
                                {'split':1,'shuffle':1,'border':S.cl[0] } )
        if doshift:
##             r['com_shift'] = fcp( r['com'], { 'shift':1,'border':S.cl[0] } )
            r['com_split_shift'] = fcp( r['com'],
                                     {'split':1,'shift':1, 'border':S.cl[0] } )

        return r


    def protocols_single_all( self, **options ):
        """
        Set of protocols for all-member trajectories AND single-member traj.
        with the different shuffle, shift, split settings.
        Usually 11 x 13 protocols for AmberEntropist (10 members and 1 for all)
        
        @param options: additional options (like cast, s, e, atoms, thin, step)
                        that are the same in all parameter sets
        @type  options: key=value
        
        @return: each value of the returned dict contains a set of arguments
                 for one AmberEntropist run, each key is a tuple of the
                 member index and the protocol name, i.e. (0, 'fcom_shuffle')
                 The set of protocols for all-member trajectories has member
                 index None.
        @rtype: dict of dict
        """
        r = {}
        ## put all-member protocolls under member index 'None'
        prots = self.protocols_standard( self.rec, self.lig, self.com,
                                     self.ex_frec, self.ex_flig, self.ex_com,
                                     **options )
        for k,p in prots.items():
            r[ (None, k) ] = p 

        if not self.all:
        ## put single-member protocols under their respective member index
            for i in range( len( self.members_frec ) ):
                prots = self.protocols_standard(self.members_frec[i],
                                            self.members_flig[i],
                                            self.members_com[i], doshift=0,
                                            **options )
                for k, p in prots.items():
                    r[ (i, k) ] = p

        return r


    def protocols_var_range( self, **options ):
        """
        Complete set of protocols also considering different values of the
        variable option.
        """
        self.log.add( 'variable option %s with %i values' \
                      % (self.var, len(self.vrange)))

        r = {}
        for v in self.vrange:
            d = copy.copy( options )
            d[ self.var ] = v

            prots = self.protocols_single_all( **d )

            for k, p in prots.items():
                r[ (v,) + k ] = p

        return r

    ##
    ## Re-organize results
    ##
    def dictionate( self, d ):
        """
        Take dict with tuple keys (value, int_member, str_protocol) and build
        a tree-like dict of dicts in which the values of d can be accessed
        like::
          d[value][int_member][str_protocol]

        @param d: the raw results accumulated from the slave nodes
        @type d: dict

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of dict
        """
        r = {}

        keys = d.keys()

        ## only convert single value tuple keys into non-tuple keys
        if len( keys[0] ) == 1:
            for k in keys:
                r[ k[0] ] = d[ k ]
            return r

        x_values = MU.nonredundant( [ k[0] for k in keys ] )

        for x in x_values:

            sub_keys = [ k for k in keys if k[0] == x ]
            y_values = MU.nonredundant( [ k[1:] for k in sub_keys] )

            r[ x ] = {}
            for y in y_values:
                r[x][y] = d[ (x,) + y ]

            r[ x ] = self.dictionate( r[x] )

        return r


    def getResult( self, **arg ):
        """
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        """
        tree = self.dictionate( self.result )

        vvalues = tree.keys()
        vvalues.sort()

        keys = self.result.keys()
        sub_keys = [ k for k in keys if k[0] == vvalues[0] ]

        r = {}
        for v, member, protcl in sub_keys:

            try:
                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
            except:
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange']= self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r