Пример #1
0
def main():
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<workspace>'])
    script_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            '..', 'rifdock',
            'cluster_sge.py')
    if not os.path.exists(script_path):
        raise Exception("Error: {} does not exist.".format(script_path))
    if args['--target']:
        targets = [workspace.target_rifdock_path(args['--target'])]
    else:
        targets = workspace.all_rifdock_workspaces

    for target in targets:

        rif_workspace = ws.workspace_from_dir(target)
        inputs = 1
        scaffolds = workspace.scaffolds
        ntasks = len(scaffolds)

        cmd = workspace.python_path, script_path
        cmd += target,

        if args['--task']:
            cmd += '--task', args['--task']
            ntasks = 1
            if args['--local']:
                utils.run_command(cmd)
                sys.exit()

        if args['--local']:
            for n in range(1, ntasks + 1):
                local_cmd = deepcopy(cmd)
                if not args['--task']:
                    local_cmd += '--task', str(n),
                utils.run_command(local_cmd)
        else:
            script_name='cluster'
            print('Submitting jobs for {}'.format(target))
            # submit.submit(rif_workspace, cmd, distributor='sge',
                    # make_dirs=args['--make-dirs'],
                    # test_run=args['--test-run'], clear=args['--clear'],
                    # ntasks=ntasks,
                    # )
            if args['--clear']:
                rif_workspace.clear_cluster_outputs()
            print('Submitting the following command to SGE:')
            print(' '.join(cmd))
            # Call big_jobs.submit directly, so that it doesn't care
            # about unclaimed inputs
            big_jobs.submit(
                    rif_workspace, cmd,
                    nstruct=ntasks,
                    max_runtime=args['--max-runtime'],
                    max_memory=args['--max-memory'],
                    test_run=False,
                    job_name=script_name,
                    create_job_info=False,
                    )
Пример #2
0
def initiate():

    workspace = ws.workspace_from_dir(sys.argv[1])
    workspace.cd_to_root()

    try:
        print('Trying qsub')
        """Return some relevant information about the currently running job."""
        print_debug_header()
        job_info = read_job_info(workspace.job_info_path(os.environ['JOB_ID']))
        job_info['job_id'] = int(os.environ['JOB_ID'])
        job_info['task_id'] = int(os.environ['SGE_TASK_ID']) - 1
    except:
        try:
            print('Trying slurm')
            # If not qsub, slurm?
            job_info = read_job_info(workspace.slurm_cmd_file)
            print('Read job info')
            job_info['task_id'] = int(sys.argv[2])
            print('Assigned task id')
        except:
            print('Trying local')
            # Apparently this is a local job.
            # TODO: Need a better way to get job info for local jobs.
            job_info = {
                'inputs': [x for x in workspace.unclaimed_inputs],
                'nstruct': 1,
                'test_run': False,
                'task_id': None
            }

    return workspace, job_info
Пример #3
0
def main():
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<workspace>'])
    script_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               '..', 'rifdock', 'run_sge.py')
    if not os.path.exists(script_path):
        raise Exception("Error: {} does not exist.".format(script_path))
    if args['--target']:
        targets = [workspace.target_rifdock_path(args['--target'])]
    else:
        targets = workspace.all_rifdock_workspaces

    for target in targets:

        rif_workspace = ws.workspace_from_dir(target)
        inputs = rif_workspace.unclaimed_inputs
        ntasks = len(inputs)

        cmd = workspace.python_path, script_path
        cmd += target,

        if args['--task']:
            cmd += '--task', args['--task']
            ntasks = 1

        if args['--local']:
            local_cmd = deepcopy(cmd)
            for n in range(1, ntasks + 1):
                if not args['--task']:
                    local_cmd += '--task', str(n),
                utils.run_command(local_cmd)
        else:
            cmd += '--sge',
        print('Submitting jobs for {}'.format(target))
        submit.submit(
            rif_workspace,
            cmd,
            distributor='sge',
            make_dirs=args['--make-dirs'],
            test_run=args['--test-run'],
            clear=args['--clear'],
        )
Пример #4
0
def main():
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<workspace>'])
    pdffolder = args['<pdbfolder>']

    script_path = \
            os.path.join(os.path.abspath(
                os.path.dirname(os.path.realpath(__file__))), '..',
            'matching', 'scan_pdb_folder.py')
    if not os.path.exists(script_path):
        raise Exception("Error: {} does not exist.".format(script_path))

    if args['--sge']:
        args['--local'] = False
    if args['--clear']:
        workspace.clear_database()

    cmd = workspace.python_path, script_path
    cmd += workspace.root_dir, args['<pdbfolder>']
    argpass = ['--ntasks']
    for arg in argpass:
        cmd += arg, str(args[arg])
    if args['--recursive']:
        cmd += '--recursive',
    if args['--split-chains']:
        cmd += '--split-chains'
    ntasks = int(args['--ntasks'])

    if args['--local']:
        if args['--task']:
            local_cmd = deepcopy(cmd)
            utils.run_command(local_cmd)
        else:
            for n in range(1, ntasks + 1):
                local_cmd = deepcopy(cmd)
                local_cmd += '--task', str(n)
                utils.run_command(local_cmd)

    else:
        script_name = 'scan_helices'
        print('Submitting the following command to SGE:')
        print(' '.join(cmd))
        big_jobs.submit(workspace,
                        cmd,
                        nstruct=ntasks,
                        max_runtime=args['--max-runtime'],
                        max_memory=args['--max-memory'],
                        test_run=False,
                        create_job_info=False)
Пример #5
0
def main():
    args = docopt.docopt(__doc__)
    print(args)
    df = pd.DataFrame()
    init('-ignore_unrecognized_res')

    if 'SGE_TASK_ID' in os.environ:
        idx = int(os.environ['SGE_TASK_ID']) - 1
    else:
        idx = int(args['--task']) - 1

    ntasks = int(args['--ntasks'])
    start = idx * ntasks
    stop = idx * ntasks + ntasks - 1
    print('START: {}'.format(start))
    print('STOP: {}'.format(stop))
    folder = args['<pdbfolder>']
    workspace = ws.workspace_from_dir(args['<workspace>'])

    exts = ('.pdb', '.pdb.gz')
    if args['--recursive']:
        files = [
            str(p) for p in Path(folder).rglob('*') if p.name.endswith(exts)
        ]
    else:
        files = [
            str(p) for p in Path(folder).iterdir() if p.name.endswith(exts)
        ]

    print(files)

    for f in files:
        pose = pose_from_file(f)
        scanner = PoseScanner(pose)
        posename = os.path.basename(f).split('.')[0]
        helices = pd.DataFrame(
            scanner.scan_pose_helices(name=posename,
                                      split_chains=args['--split-chains']))
        df = pd.concat([df, helices], ignore_index=True)

    today = date.today()
    outdir = os.path.join(workspace.project_params_dir, 'database')
    os.makedirs(outdir, exist_ok=True)
    out = os.path.join(
        outdir, 'helixdf_custom_{0}.'.format(today.strftime("%m-%d-%Y")))
    df.to_pickle(out + 'pkl')
    # Comment this out later - no need for csv in final
    df.to_csv(out + 'csv')
Пример #6
0
def main():
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<workspace>'])
    script_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                               '..', 'matching', 'matcher.py')
    if not os.path.exists(script_path):
        print(script_path)
        raise ("Error: matcher.py does not exist.")

    if args['--database']:
        db = args['--database']
    else:
        db = workspace.database_path

    # with open(workspace.settings, 'r') as stream:
    # try:
    # settings = yaml.safe_load(stream)
    # except yaml.YAMLError as exc:
    # print(exc)

    settings = workspace.settings
    check_overwrite = ['--length', '--angstroms', '--degrees']
    # For these settings, use command-line arguments over inputs from
    # settings file.
    for setting in check_overwrite:
        if args[setting]:
            settings['match'][setting] = args[setting]

    db_origin = 'default'
    if not args['--database']:
        # This is the default database path. If it has not been
        # modified, make sure to append the subdirectory which
        # corresponds to the actual database. This code may need to be
        # modified in the future depending on what other database types
        # end up in the default package.
        if not workspace.is_default_database(settings['match']['--database']):
            # Database is not default, therefore do not default to
            # project-params.
            database = settings['match']['--database']
            db_origin = 'custom settings'
        else:
            # If the settings database is default, see if there is a
            # non-default database in the workspace (usually
            # project_params)
            if not workspace.is_default_database(workspace.database_path):
                database = workspace.database_path
                db_origin = 'project_params/database'
            # Otherwise, use the default settings database.
            else:
                database = settings['match']['--database']

    else:
        database = args['--database']
        db_origin = 'command argument'

    dbexp = 'bins_.*A_.*D'
    match = False
    for subfolder in os.listdir(database):
        if re.match(dbexp, subfolder):
            match = True
            break

    if not match:
        if args['--length']:
            db_subdir = 'length'
        else:
            db_subdir = 'standard'
        database = os.path.join(database, db_subdir)

    if not os.path.exists(database):
        sys.exit("Could not find database at {}. Make sure your database "\
                "path is correct. Database determined via "\
                "{}.".format(database, db_origin))

    else:
        print("Matching using database at {}, determined via "\
                "{}".format(database, db_origin))

    if args['--target']:
        targets = [os.path.abspath(args['--target'])]
    else:
        targets = workspace.all_rifdock_workspaces

    for target in targets:
        match_workspace = ws.MatchWorkspace(workspace.root_dir, target)
        match_workspace.make_dirs()

        if args['--clear']:
            match_workspace.clear_outputs()

        if args['--make-dirs']:
            continue

        cmd = match_workspace.python_path, script_path
        cmd += 'match', match_workspace.focus_dir
        # cmd += match_workspace.target_clusters(target),
        for setting in settings['match']:
            if setting != '--database':
                cmd += setting, settings['match'][setting]
            else:
                cmd += setting, database
        if args['--tasks']:
            cmd += '--tasks', args['--tasks']

        if args['--scaffold']:
            cmd += '--scaffold', args['--scaffold']

        if args['--local']:
            cmd += '--local',
            utils.run_command(cmd)
            continue
        else:
            script_name = 'matcher'
            print('Submitting jobs for {}'.format(target))
            big_jobs.submit(
                match_workspace,
                cmd,
                nstruct=args['--tasks'],
                max_memory=args['--max-memory'],
                max_runtime=args['--max-runtime'],
                test_run=False,
                job_name=script_name,
                create_job_info=False,
            )
Пример #7
0
def main():
    args = docopt.docopt(__doc__)
    print(args)

    if args['--settings']:
        # Deprecated; settings handled by submission command
        import yaml
        runtype = 'bin' if args['bin'] else 'match'
        settings = yaml.load(open(args['--settings'], 'r'))
        print(settings)
        for option in settings[runtype]:
            args[option] = settings[runtype][option]
        print(args)

    dbpath = os.path.join(
        args['--database'], "bins_{}A_{}D".format(float(args['--angstroms']),
                                                  float(args['--degrees'])))
    if args['bin']:
        lookup = HelixBin(pd.read_pickle(args['<helix_dataframe>']),
                          exposed_cutoff=0.3,
                          length_cutoff=10.8,
                          angstroms=float(args['--angstroms']),
                          degrees=float(args['--degrees']),
                          verbose=args['--verbose'])
        lookup.bin_db(outdir=dbpath, bin_length=args['--length'])
    if args['match']:
        # import scan_helices
        from helix.matching import scan_helices
        workspace = ws.workspace_from_dir(args['<match_workspace>'])
        # Import pdb
        if args['--scaffold']:
            pdbfolders = [workspace.scaffold_clusters(args['--scaffold'])]
        else:
            pdbfolders = workspace.all_scaffold_clusters
        init()


        if not args['--scaffold'] and \
                os.path.exists(workspace.all_scaffold_dataframe):
            all_helices = pd.read_pickle(workspace.all_scaffold_dataframe)
        else:
            all_helices = []
            for pdbfolder in pdbfolders:
                # helicepath = os.path.join(pdbfolder, 'query_helices.pkl')
                helicepath = workspace.scaffold_dataframe(pdbfolder)
                if os.path.exists(helicepath):
                    helices = pd.read_pickle(helicepath)
                else:
                    folder_helices = []
                    import glob
                    gz = glob.glob(pdbfolder + '/*.pdb.gz')
                    dotpdb = glob.glob(pdbfolder + '/*.pdb')
                    gz.extend(dotpdb)
                    pdbs = sorted(gz)
                    for path in pdbs:
                        # First chain is the docked helix
                        pose = pose_from_file(path).split_by_chain(1)

                        # Scan pdb helices
                        scanner = scan_helices.PoseScanner(pose)
                        helices = scanner.scan_pose_helices(name='query',
                                                            split_chains=False,
                                                            path=path)
                        folder_helices.extend(helices)
                    helices = pd.DataFrame(folder_helices)
                    helices.to_pickle(helicepath)
                    all_helices.append(helices)
            all_helices = pd.concat(all_helices, ignore_index=True)
            if not args['--scaffold']:
                # Don't save to the all_scaffold path if not using all
                # scaffolds
                all_helices.to_pickle(workspace.all_scaffold_dataframe)

        print("HELICES")
        print(all_helices)
        print(all_helices['vector'])

        # Bin pdb helices
        query = HelixBin(all_helices,
                         exposed_cutoff=0.3,
                         length_cutoff=10.8,
                         angstroms=float(args['--angstroms']),
                         degrees=float(args['--degrees']),
                         verbose=args['--verbose'])
        query_bins = query.bin_db(bin_length=args['--length'])
        print('QUERY BINS')
        print(query_bins)

        # Match
        # name = os.path.basename(path).split('.')[0]
        name = 'query'
        print('Database:')
        print(dbpath)
        matcher = HelixLookup(dbpath,
                              query_bins,
                              name=name,
                              verbose=args['--verbose'])
        if args['--local']:
            matcher.submit_local(workspace.output_dir)
        elif args['--tasks']:
            matcher.submit_cluster(workspace.output_dir, int(args['--tasks']))
        else:
            matcher.submit_cluster(workspace.output_dir, 1)
Пример #8
0
import pymol
import sys, os, glob
from helix import workspace as ws

workspace = ws.workspace_from_dir(sys.argv[1])
if 'SGE_TASK_ID' in os.environ:
    task = int(os.environ['SGE_TASK_ID']) - 1
else:
    task = int(sys.argv[2]) - 1
targets = workspace.targets
target = targets[task]
workspace = RIFWorkspace(workspace.root_dir, target)
# folders = sorted(glob.glob(parent + '/*_output'))
# folder = folders[task - 1]
print('Aligning for folder {}'.format(target))
pdbs = sorted(glob.glob(workspace.focus_dir + '/*/docked_full/*.pdb.gz'))

for pdb in pdbs:
    print('Aligning {} to {}'.format(pdb, workspace.target_path))
    pymol.cmd.reinitialize()
    target = pymol.cmd.load(workspace.target_path, 'target')
    mobile = pymol.cmd.load(pdb, 'mobile')
    pymol.cmd.align('mobile and not chain A', 'target')
    pymol.cmd.save(pdb, 'mobile')
Пример #9
0
def main():
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<workspace>'])
    script_path = os.path.join(
        os.path.abspath(os.path.dirname(os.path.realpath(__file__))), '..',
        'matching', 'matcher.py')

    if args['--database']:
        db = args['--database']
    else:
        db = workspace.database_path

    settings = workspace.settings
    check_overwrite = ['--length', '--angstroms', '--degrees']
    # For these settings, use command-line arguments over inputs from
    # settings file.
    for setting in check_overwrite:
        if args[setting]:
            settings['match'][setting] = args[setting]

    db_origin = 'default'
    if not args['--database']:
        # This is the default database path. If it has not been
        # modified, make sure to append the subdirectory which
        # corresponds to the actual database. This code may need to be
        # modified in the future depending on what other database types
        # end up in the default package.
        if not workspace.is_default_database(settings['match']['--database']):
            print(settings['match']['--database'])
            # Database is not default, therefore do not default to
            # project-params.
            database = settings['match']['--database']
            db_origin = 'custom settings'
        else:
            # If the settings database is default, see if there is a
            # non-default database in the workspace (usually
            # project_params)
            if not workspace.is_default_database(workspace.database_path):
                database = workspace.database_path
                db_origin = 'project_params/database'
            # Otherwise, use the default settings database.
            else:
                database = settings['match']['--database']

    else:
        database = args['--database']
        db_origin = 'command argument'

    print('Database at {} being used based on {}'.format(database, db_origin))

    if args['--length']:
        out = os.path.join(database, 'length')
    else:
        out = os.path.join(database, 'standard')

    os.makedirs(out, exist_ok=True)
    picklepath = glob.glob(os.path.join(database, 'helixdf*.pkl'))
    if len(picklepath) > 1:
        print("Multiple helix dataframes found in {0}! Please consolodate them "\
                "or remove extraneous pkl files beginning with "\
                "'helixdf'.".format(database))
        sys.exit()
    elif len(picklepath) == 0:
        print("No helix dataframe ('helixdf*.pkl') found in provided "\
                "database path ({0}). Please runs 'helix scan' command "\
                "before binning.".format(database))
        sys.exit()

    helixdf = picklepath[0]

    cmd = workspace.python_path, script_path
    cmd += 'bin', helixdf
    cmd += '--database', out
    for setting in settings['match']:
        if setting != '--database':
            cmd += setting, settings['match'][setting]

    if args['--ntasks']:
        cmd += '--tasks', args['--tasks']

    if args['--local']:
        cmd += '--local',
        utils.run_command(cmd)

    else:
        script_name = 'helixbin'
        big_jobs.submit(workspace,
                        cmd,
                        nstruct=args['--ntasks'],
                        max_memory=args['--max-memory'],
                        max_runtime=args['--max-runtime'],
                        test_run=False,
                        job_name=script_name,
                        create_job_info=False)
Пример #10
0
                    # subprocess.check_output(command)

                print()

    def calculate_rmsd(self, design_1, design_2):
        # assert len(design_1.backbone_coords) and len(design_2.backbone_coords)
        difference = design_1.backbone_coords - design_2.backbone_coords
        num_atoms = design_1.backbone_coords.shape[0]

        return np.sqrt(np.sum(difference**2) / num_atoms)


if __name__=='__main__':
    # folders = sorted(glob.glob(sys.argv[1] + '/*_output'))
    args = docopt.docopt(__doc__)
    workspace = ws.workspace_from_dir(args['<rif_workspace>'])
    assert(type(workspace).__name__ == 'RIFWorkspace')

    if 'SGE_TASK_ID' in os.environ:
        task = int(os.environ['SGE_TASK_ID']) - 1
    elif args['--task']:
        task = int(args['--task']) - 1
    else:
        task = 0
    # targets = workspace.targets
    helices = workspace.scaffolds
    helix = helices[task]
    # folders = workspace.patches
    # workspace = ws.RIFWorkspace(workspace.root_dir, targets[task])

    # for helixlength in [3,4,6,8]:
Пример #11
0
def main():
    init()
    args = docopt.docopt(__doc__)
    root_workspace = ws.workspace_from_dir(args['<workspace>'])
    # posefile = os.path.abspath(args['<target_pdb>'])
    if args['--target']:
        targets = args['--target']
    else:
        targets = root_workspace.targets
    chainmap = None
    if args['--chainmap']:
        with open(args['--chainmap']) as file:
            chainmap = yaml.load(file)
    for target in targets:
        workspace = ws.RIFWorkspace(args['<workspace>'], target)
        workspace.make_dirs()
        pose = pose_from_file(workspace.initial_target_path)
        chain = None
        if chainmap:
            chain = chainmap[workspace.focus_name]
        elif args['--chain']:
            chain = args['--chain']

        if chain:
            print('MAKING PATCHES FOR CHAIN {}'.format(args['--chain']))
            poses = []
            for i in range(1, pose.num_chains() + 1):
                chainpose = pose.split_by_chain(i)
                info = chainpose.pdb_info().pose2pdb(1)
                if info.split(' ')[1] in chain and chainpose.residue(1).is_protein():
                    if chainpose.size() < 5:
                        raise('Error: chain {} too small.'.format(chain))
                    else:
                        poses.append(chainpose)
            pose = poses[0]
            if len(poses) > 1:
                for chainpose in poses[1:]:
                    append_pose_to_pose(pose, chainpose)

        else:
            pose = pose.split_by_chain(1)
        reslist = []
        for res in range(1, pose.size() + 1):
            if pose.residue(res).is_protein():
                reslist.append(res)
        print('POSE SIZE')
        print(pose.size())
        patches = Patches(pose)
        patches.set_reslist(reslist)
        patches.determine_surface_residues()
        print(patches.reslist)
        patches.map_residues()
        print(patches.resmap)
        # parent_folder = os.path.abspath(os.path.join(args['<output_folder>']))
        target_pdb = workspace.target_path
        i = 1
        for res in patches.reslist:
            patch_folder = os.path.join(workspace.focus_dir, 'patch_{}'.format(i))
            i += 1
            if not os.path.exists(patch_folder):
                os.makedirs(patch_folder, exist_ok=True)
            # print(patches.nearest_n_residues(res, 100,
                # cutoff=float(args['--patchsize']),
                # pymol=True))
            write_to_file(patches.nearest_n_residues(res, 100,
                cutoff=float(args['--patchsize'])),
                    patch_folder)
            write_flags(patch_folder, target_pdb)

        pose.dump_pdb(target_pdb)
Пример #12
0
def main():
    workspace = ws.workspace_from_dir(sys.argv[1])
    init('-ignore_unrecognized_res')
    df = pd.DataFrame()
    start = idx * num
    stop = idx * num + num - 1
    print('START: {}'.format(start))
    print('STOP: {}'.format(stop))
    # with gzip.open('test_files/nrpdb.gz', 'rb') as f:
    with gzip.open('test_files/nr_custom.gz', 'rb') as f:
        lines = f.readlines()[start:stop]
    errors = []
    for line in lines:
        line = line.decode('utf-8')
        if not line.startswith('#'):
            try:
                print('Opening from line {}'.format(line))
                sys.stdout.flush()
                pose, pdb = get_pose(str(line))
                if pose:
                    scanner = PoseScanner(pose)
                    helices = pd.DataFrame(
                        scanner.scan_pose_helices(name=pdb,
                                                  split_chains=False))

                    df = pd.concat([df, helices], ignore_index=True)
            except Exception as e:
                print("Error scanning line: \n{}".format(line))
                print('Error was:')
                print(e)
                sys.stdout.flush()
                errors.append(line)
    # for subdir in sorted(os.listdir(pdb_prefix))[idx*num:idx*num + num - 1]:
    # for f in os.listdir(
    # os.path.join(
    # pdb_prefix, subdir
    # )
    # ):
    # if f.endswith('.ent.gz'):
    # print('Scanning {}'.format(f))
    # path = os.path.join(pdb_prefix, subdir, f)
    # pdb = f[3:7]
    # try:
    # pose = pose_from_file(path)
    # scanner = PoseScanner(pose)
    # helices = pd.DataFrame(
    # scanner.scan_pose_helices(name=pdb)
    # )

    # df = pd.concat([df, helices], ignore_index=True)
    # except:
    # print("Error scanning {}".format(f))

    os.makedirs('nr_dataframes/errors/', exist_ok=True)
    df.to_pickle('nr_dataframes/{}.pkl'.format(idx))
    df.to_csv('nr_dataframes/{}.csv'.format(idx))

    errorlog = os.path.join('nr_dataframes', 'errors',
                            'helix_scan.e{}'.format(idx))
    with open(errorlog, 'w') as f:
        for err in errors:
            f.write(err + '\n')