示例#1
0
    def handle(self, *args, **options):

        tmp = os.path.abspath(options['tmp'])
        if not os.path.exists(tmp):
            os.makedirs(tmp)
        qs = PDB.objects.filter(code=options["pdb"]) if options["pdb"] else PDB.objects.all()
        total = qs.count()
        utils = PDBs(options["pdbs_dir"])

        with tqdm(qs, total=total) as pbar:
            for pdb in pbar:
                pbar.set_description(pdb.code)

                try:
                    fpocket2sql = FPocket2SQL()
                    fpocket2sql.create_or_get_pocket_properties()
                    fpocket2sql.load_pdb(pdb.code)
                    fpocket2sql.run_fpocket(options['tmp'],pdb_path=utils.pdb_path(pdb.code),
                                            pockets_path=utils.pdb_pockets_path(pdb.code),
                                            force=options["force"])
                    fpocket2sql.load_pockets()
                    # res.delete_dir()


                except IOError as ex:
                    traceback.print_exc()
                    self.stderr.write("error processing pockets from %s: %s" % (pdb.code, str(ex)))

                except Exception as ex:
                    traceback.print_exc()
                    raise CommandError(ex)
示例#2
0
 def __init__(self, pdb_dir="/data/databases/pdb/"):
     self.utils = PDBs(pdb_dir)
     self.seqs_path = "/tmp/seq.faa"
     self.aln_path = "/tmp/msa.faa"
     self.ref_seq = None
     self.pdbfile = None
     self.pdb_data = defaultdict(dict)
示例#3
0
    def handle(self, *args, **options):

        if options["verbose"] == 1:
            import logging
            logging.basicConfig(level=logging.DEBUG)

        pdbs = PDBs(options["pdbs_dir"])
        pdbs.url_pdb_entries = options["entries_url"]
        if not os.path.exists(options["entries_path"]):
            pdbs.download_pdb_entries()

        pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'],
                      options['tmp'])
        pdbio.init()

        try:
            pdbs.update_pdb(options['code'])
            pdbio.process_pdb(options['code'],
                              force=options['force'],
                              pocket_path=pdbs.pdb_pockets_path(
                                  options['code']),
                              pdb_path=pdbs.pdb_path(options['code']))

        except IOError as ex:
            traceback.print_exc()
            self.stderr.write("error processing pockets from %s: %s" %
                              (options['code'], str(ex)))
        except Exception as ex:
            traceback.print_exc()
            raise CommandError(ex)
示例#4
0
    def pdbs_seq_for_modelling(self,
                               out_fasta=None,
                               pdbsIter=None,
                               reuse_previours=None):
        if pdbsIter == None:
            pdbsIter = PDBs(self.pdb_dir)
        if not out_fasta:
            out_fasta = self.pdb_dir + "processed/seqs_from_pdb.fasta"

        pdb_codes = {x.lower(): 1 for x in self.entries_df().IDCODE}

        reuse = defaultdict(lambda: [])
        if reuse_previours:
            for x in bpio.parse(reuse_previours, "fasta"):
                pdb = x.id.split("_")[0]
                reuse[pdb].append(x)
        reuse = dict(reuse)

        pdblist = list(pdbsIter)
        with open(out_fasta, "w") as out_fasta_handle:
            for (pdb, pdb_file_path) in tqdm(pdblist):
                if pdb in pdb_codes:
                    if pdb in reuse:
                        bpio.write(reuse[pdb], out_fasta_handle, "fasta")
                    else:
                        self.seq_from_pdb(out_fasta_handle, pdb, pdb_file_path)
示例#5
0
    def load_pdb_pocket(self, pdb, pdb_dir="/data/databases/pdb/"):
        utils = PDBs(pdb_dir)
        if not os.path.exists(utils.pdb_pockets_path(pdb)):
            utils.update_pdb(pdb)
            fpocket = FPocket(utils.pdb_path(pdb))
            result = fpocket.hunt_pockets()
            mkdir(os.path.dirname(utils.pdb_pockets_path(pdb)))
            result.save(utils.pdb_pockets_path(pdb))
        with open(utils.pdb_pockets_path(pdb)) as h:
            result = json.load(h)

        self.pdb_data[pdb]["pockets"] = result
        return self.pdb_data[pdb]["pockets"]
示例#6
0
 def add_arguments(self, parser):
     pdbs = PDBs()
     parser.add_argument('--code', required=True, help="4 letter PDB code")
     parser.add_argument('--tmp', default="data/tmp/load_pdb")
     parser.add_argument('--pdbs_dir',
                         default="/data/databases/pdb/divided/")
     parser.add_argument('--entries_path',
                         default="/data/databases/pdb/entries.idx")
     parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
示例#7
0
 def add_arguments(self, parser):
     pdbs = PDBs()
     parser.add_argument('--pdbs_dir', default="data/pdb/")
     parser.add_argument('--entries_path', default=None)
     parser.add_argument(
         '--only_annotated',
         action='store_false',
         help="by default only cross referenced pdbs are downloaded")
     parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
示例#8
0
 def add_arguments(self, parser):
     pdbs = PDBs()
     parser.add_argument('--code', required=True, help="4 letter PDB code")
     parser.add_argument('--tmp', default="data/tmp/load_pdb")
     parser.add_argument('--pdbs_dir', default="/data/databases/pdb/")
     parser.add_argument('--entries_path',
                         default="/data/databases/pdb/entries.idx")
     parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
     parser.add_argument('--force', action="store_true")
     parser.add_argument('--verbose', default=0, choices=[0, 1], type=int)
示例#9
0
    def handle(self, *args, **options):

        pdbs = PDBs()
        pdbs.url_pdb_entries = options["entries_url"]
        if not os.path.exists(options["entries_path"]):
            pdbs.download_pdb_entries()

        pdbio = PDBIO(options['pdbs_dir'] + "/", options['entries_path'],
                      options['tmp'])
        pdbio.init()

        try:
            pdbio.process_pdb(options['code'])

        except IOError as ex:
            traceback.print_exc()
            self.stderr.write("error processing pockets from %s: %s" %
                              (options['code'], str(ex)))
        except Exception as ex:
            traceback.print_exc()
            raise CommandError(ex)
示例#10
0
    def handle(self, *args, **options):
        pdbs_utils = PDBs(pdb_dir=options['pdbs_dir'])
        pdbs_utils.url_pdb_entries = options["entries_url"]
        if not options['entries_path']:
            options['entries_path'] = options['pdbs_dir'] + "/entries.idx"
        if (datetime.now() - datetime.fromtimestamp(
                os.path.getctime(options["entries_path"]))).days > 7:
            pdbs_utils.download_pdb_entries()

        pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path'])
        pdb2sql.load_entries()
        if options["only_annotated"]:
            self.stderr.write("only_annotated option activated by default")
            from bioseq.models.Dbxref import Dbxref
            pdbs = [(x.accession.lower(),
                     pdbs_utils.pdb_path(x.accession.lower()))
                    for x in Dbxref.objects.filter(dbname="PDB")]
        else:
            pdbs = list(tqdm(iterpdbs(options['pdbs_dir'])))
        # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent")
        # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent")

        with tqdm(pdbs) as pbar:
            for code, pdb_path in pbar:
                code = code.lower()

                if PDBsWS.is_obsolete(code):
                    self.stderr.write(f"{code} entry is obsolete")
                    continue

                try:
                    pdb_path = pdbs_utils.update_pdb(code)
                except KeyboardInterrupt:
                    raise
                except:
                    self.stderr.write("PDB %s could not be downloaded" % code)
                    continue

                if PDB.objects.filter(code=code).exists():
                    self.stderr.write("PDB %s already exists" % code)
                    continue

                pbar.set_description(code)
                try:
                    pdb2sql.create_pdb_entry(code, pdb_path)
                    pdb2sql.update_entry_data(code, pdb_path)
                except KeyboardInterrupt:
                    raise
                except Exception as ex:
                    import traceback
                    traceback.print_exc()
                    raise CommandError(ex)
示例#11
0
    def handle(self, *args, **options):
        pdbs = PDBs(pdb_dir=options['pdbs_dir'])
        pdbs.url_pdb_entries = options["entries_url"]
        if not os.path.exists(options["entries_path"]):
            pdbs.download_pdb_entries()


        pdb2sql = PDB2SQL(options['pdbs_dir'], options['entries_path'])
        pdb2sql.load_entries()
        if options["only_annotated"]:
            self.stderr.write("only_annotated option activated by default")
            from bioseq.models.Dbxref import Dbxref
            pdbs = [(x.accession.lower(),pdbs.pdb_path( x.accession.lower()))
                    for x in Dbxref.objects.filter(dbname="PDB")]
        else:
            pdbs = list(tqdm(iterpdbs(options['pdbs_dir'])))
        # 4zux 42 mer 2lo7("5my5","/data/databases/pdb/divided/my/pdb5my5.ent")
        # ("4zu4", "/data/databases/pdb/divided/zu/pdb4zu4.ent")

        with tqdm(pdbs) as pbar:
            for code,pdb_path in pbar:
                code = code.lower()
                try:
                    pdb_path = pdb2sql.download(code)
                except:
                    self.stderr.write("PDB %s could not be downloaded" % code)
                    continue

                if PDB.objects.filter(code=code).exists():
                    self.stderr.write("PDB %s already exists" % code)
                    continue

                pbar.set_description(code)
                try:
                    pdb2sql.create_pdb_entry(code, pdb_path)
                    pdb2sql.update_entry_data(code, pdb_path)
                except Exception as ex:
                    raise CommandError(ex)
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-i", "--data_path", default='/data/databases/pdb/')
    parser.add_argument(
        "-o",
        "--output_path",
        default='/data/databases/pdb/processed/domain_analisis')

    args = parser.parse_args()

    domains = defaultdict(lambda: [])
    for seq in bpio.parse(args.data_path + "/processed/domains.fasta",
                          "fasta"):
        domains["_".join(seq.id.split("_")[0:2])].append(seq.id.split("_"))

    for (code, pdb_path) in tqdm(PDBs(pdb_dir=args.data_path)):

        pdb_model = PDB(code=code)
        pdb_model.save()

        p = PDBParser(PERMISSIVE=True, QUIET=True)
        try:
            for chain in p.get_structure(code, pdb_path).get_chains():
                chains_dir = args.output_path + "/chains/" + code[1:3] + "/"
                mkdir(chains_dir)
                cs = ChainSplitter(chains_dir)
                process_chain(pdb_path, code, chain.id, pdb_model)

                for (_, _, res_start, res_end, dn, dn_start,
                     dn_end) in domains[code + "_" + chain.id]:
                    # 1r9d_A_2_787_PF02901.14_8_648
示例#13
0
class StructureVariant:
    def __init__(self, pdb_dir="/data/databases/pdb/"):
        self.utils = PDBs(pdb_dir)
        self.seqs_path = "/tmp/seq.faa"
        self.aln_path = "/tmp/msa.faa"
        self.ref_seq = None
        self.pdbfile = None
        self.pdb_data = defaultdict(dict)

    def load_msa(self, input_sequence, pdb_code, pdb_chain=None):
        pdb_code = pdb_code.lower()
        self.utils.update_pdb(pdb_code)
        self.ref_seq = bpio.read(input_sequence, "fasta")
        self.pdbfile = PDBFile(pdb_code, self.utils.pdb_path(pdb_code))
        with open(self.seqs_path, "w") as h:
            bpio.write(self.ref_seq, h, "fasta")
            bpio.write(self.pdbfile.seq(selected_chain=pdb_chain), h, "fasta")

        cmd = docker_wrap_command(
            f'mafft --quiet --localpair --maxiterate 1000 {self.seqs_path} > {self.aln_path} '
        )
        execute(cmd)

        self.msa = MSAMap.from_msa(self.aln_path)
        self.res_map = self.pdbfile.residues_map(pdb_chain)

    def residues_from_pos(self, pos):
        pos_data = []
        for sample in self.msa.samples():
            if sample != self.ref_seq.id:
                pdb, chain = sample.split("_")[:2]
                if self.msa.exists_pos(self.ref_seq.id, pos, sample):
                    msa_pos = self.msa.pos_seq_msa_map[self.ref_seq.id][pos]
                    sample_pos = self.msa.pos_from_seq(self.ref_seq.id, pos,
                                                       sample)
                    line = {
                        "pos": pos + 1,
                        "ref": self.msa.seqs[self.ref_seq.id][msa_pos],
                        "alt": self.msa.seqs[sample][msa_pos],
                        "pdb": pdb,
                        "chain": chain,
                        "resid": str(self.res_map[chain][sample_pos][1]),
                        "icode": str(self.res_map[chain][sample_pos][2]),
                        "pdb_pos": sample_pos
                    }
                    pos_data.append(line)
        return pos_data

    def residues_from_aln_seq(self, input_sequence, pdb_code, pdb_chain=None):
        self.load_msa(input_sequence, pdb_code, pdb_chain)
        variants = [
            (k, v)
            for k, v in sorted(self.msa.variants(self.ref_seq.id).items(),
                               key=lambda x: int(x[0].split("_")[1]))
        ]

        output = []
        for ref_pos, alt_samples in variants:
            ref, pos = ref_pos.split("_")
            pos = int(pos)
            for alt, samples in alt_samples.items():
                if alt != self.msa.gap_code:
                    pos_data = self.residues_from_pos(pos)
                    output += pos_data
        return pd.DataFrame(output)

    def annotate_resid(self, pdb: str, resid: str,
                       structure_annotator: StructureAnnotator):
        pdb = pdb.lower()
        data = {}
        if pdb not in self.pdb_data:
            self.load_pdb_ann(pdb, structure_annotator)

        if str(resid) in self.pdb_data[pdb]["binding"]:
            data["binding"] = self.pdb_data[pdb]["binding"][str(resid)]
        if str(resid) in self.pdb_data[pdb]["pockets"]:
            data["pockets"] = self.pdb_data[pdb]["pockets"][str(resid)]
        return data

    def load_pdb_ann(self, pdb, structure_annotator: StructureAnnotator):
        binding_data = structure_annotator.load_pdb_binding_data(pdb)
        binding_dict = defaultdict(list)
        for site in binding_data:
            for site_res in site["site_residues"]:
                res = str(site_res["residue_number"]) + (site_res.get(
                    "author_insertion_code", "") or "")
                binding_dict[res].append({
                    "site_id":
                    site["site_id"],
                    "details":
                    site["details"],
                    "ligands": [{
                        c: x[c]
                        for c in
                        ["chain_id", "author_residue_number", "chem_comp_id"]
                    } for x in site["site_residues"]
                                if x["chem_comp_id"] in binding_dict and (
                                    x["chem_comp_id"] != "HOH")]
                })
        self.pdb_data[pdb]["binding"] = binding_dict
        pockets_data = structure_annotator.load_pdb_pocket(
            pdb, self.utils.pdb_dir)
        pockets_dict = defaultdict(list)
        for pocket in pockets_data:
            for residue in set(pocket["residues"]):
                pockets_dict[residue].append({
                    "pocket_num":
                    pocket["number"],
                    "druggabilitty":
                    pocket["properties"]['Druggability Score']
                })
        self.pdb_data[pdb]["pockets"] = dict(pockets_dict)

    def annotate_residue_list(self, df,
                              structure_annotator: StructureAnnotator):
        """
        
        :param df: columns=["pdb", "chain", "resid", "alt", "ref", "pos"] or generated by residues_from_aln_seq
        :return: 
        """

        output = {}
        for i, r in df.iterrows():
            output[
                f'{r.pdb}_{r.chain}_{r.resid}_{r.alt}'] = self.annotate_resid(
                    r.pdb, str(r.resid), structure_annotator)

        return output
示例#14
0

def old_or_inexistent(filepath, period=30):
    return not os.path.exists(filepath) or ((
        (time.time() - os.path.getatime(filepath)) / 60 / 60 / 24) > period)


#os.environ["http_proxy"] = "http://proxy.fcen.uba.ar:8080"
#os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080"

mkdir("/data/pdb/")
download_file("ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx",
              "/data/pdb/entries.idx",
              ovewrite=True)

pdbs = PDBs("/data/pdb/")
pdbs.download_pdb_seq_ses()
pdbs.update_pdb_dir()
mkdir("/data/pdb/processed/")
pdbs.pdbs_seq_for_modelling()
execute("makeblastdb -dbtype prot -in /data/pdb/processed/seqs_from_pdb.fasta")

if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta"):
    mkdir("/data/uniprot/uniref/uniref90")
    download_file(
        "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz",
        "/data/uniprot/uniref/uniref90/uniref90.fasta.gz",
        ovewrite=True)
    execute("gunzip /data/uniprot/uniref/uniref90/uniref90.fasta.gz")

if old_or_inexistent("/data/uniprot/uniref/uniref90/uniref90.fasta.pal"):
def main(argv=None):  # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)



    parser = ArgumentParser( formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-v", "--verbose", dest="verbose", action="count",
                        help="set verbosity level [default: %(default)s]")

    # parser.add_argument("-dir", "--structs_dir", required = True )
    parser.add_argument("-db", "--database_name", default='pdb')
    parser.add_argument("-host", "--db_host", default='127.0.0.1')

    parser.add_argument( "--csa", default='/data/databases/csa/csa.txt')
    parser.add_argument( "--hmm", default='/data/databases/pdb/pdb_seq_res.hmm')
    parser.add_argument( "--pdbs", default='/data/databases/pdb/')
    parser.add_argument( "--distances", default='/data/databases/pdb/processed/distances.tbl')


    args = parser.parse_args()


    #         pdbs = PDBs()
    #         pdbs.update('/data/pdb/divided/')

    BioMongoDB(args.database_name) #args.db_host

    # update_quaternary()
    #         # clusters cd hit
    #         update_clusters()
    #
    # residues near ligands --> metal drug/cofactor

    if not os.path.exists(args.csa):
        sys.stderr.write("%s not found. Download it from %s" % (
            args.csa,
            "http://www.ebi.ac.uk/thornton-srv/databases/CSA/downloads/CSA_2_0_121113.txt"
        ))
        sys.exit(1)

    if not os.path.exists(args.pdbs):
        sys.stderr.write("%s not found. Specify where is pdbs/divided directory" % (
            args.pdbs
        ))
        sys.exit(1)
    if not os.path.exists(args.distances):
        sys.stderr.write("%s not found. Run extended_domain.py script to create it." % (
            args.distances
        ))
        sys.exit(1)


    pdbUtils = PDBs(pdb_dir=args.pdbs)
    print("Update Quaternary")
    update_quaternary(pdbUtils)
    print("Update CSA")
    update_csa(args.csa)
    print("Update CYS/TYR")
    free_cys_tyr(pdbUtils)


    print("Update Importan Pfam")
    important_pfam(args.hmm)
    print("Update Binding residues")
    update_binding_residues(args.distances)
    _log.info("update pdb properties finished!!")
示例#16
0
    parser.add_argument("-p", "--dbpass", required=True)
    parser.add_argument("-i", "--pdb_dir", default="/data/databases/pdb/")
    parser.add_argument("-db", "--dbname", default="pdbdb")
    parser.add_argument("-u", "--dbuser", default="root")

    args = parser.parse_args()
    from peewee import MySQLDatabase

    mysql_db = MySQLDatabase(args.dbname,
                             user=args.dbuser,
                             password=args.dbpass)
    mysql_db.close()
    sqldb.initialize(mysql_db)

    pdb_utils = PDBs(pdb_dir=args.pdb_dir)
    df = pdb_utils.entries_df()
    pdbs = list(pdb_utils)
    with tqdm(pdbs) as pbar:
        for (code, pdb_path) in pbar:
            mysql_db.connect(reuse_if_open=True)
            pbar.set_description(code)
            try:
                entry = df[df.IDCODE == code.upper()].iloc[0]
            except IndexError:
                continue

            pdb_model = PDB(code=code, experiment=str(entry.EXPERIMENT))

            try:
                resolution = float(entry.RESOLUTION)
    parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-p", "--dbpass", required=True)
    parser.add_argument("-i", "--pdb_dir", default="/data/databases/pdb/")
    parser.add_argument("-db", "--dbname", default="pdbdb")
    parser.add_argument("-u", "--dbuser", default="root")

    args = parser.parse_args()
    from peewee import MySQLDatabase

    mysql_db = MySQLDatabase(args.dbname,
                             user=args.dbuser,
                             password=args.dbpass)

    sqldb.initialize(mysql_db)

    pdb_utils = PDBs(pdb_dir=args.pdb_dir)
    props = {x.name: x for x in Property.select()}
    pdbs = list(pdb_utils)
    with tqdm(pdbs) as pbar:
        for (code, pdb_path) in pbar:

            pdb_model = PDB.select().where(PDB.code == code).first()

            p = PDBParser(PERMISSIVE=True, QUIET=True)
            try:
                for chain in p.get_structure(code, pdb_path).get_chains():
                    chains_dir = args.pdb_dir + "/chains/" + code[1:3] + "/"
                    mkdir(chains_dir)
                    cs = ChainSplitter(chains_dir)
                    process_chain(pdb_path, code, chain.id, pdb_model, props)
示例#18
0
 def add_arguments(self, parser):
     pdbs = PDBs()
     parser.add_argument('--pdbs_dir', default="data/pdb/")
     parser.add_argument('--entries_path', default="data/pdb/entries.idx")
     parser.add_argument('--only_annotated', action='store_false')
     parser.add_argument('--entries_url', default=pdbs.url_pdb_entries)
示例#19
0
        return self.pdbs_dir + pdb[1:3] + "/pdb" + pdb + ".ent"

    @staticmethod
    def sequence_from_residues(residues):
        return "".join([
            protein_letters_3to1[res.get_resname()[0] +
                                 res.get_resname()[1:3].lower()]
            for res in residues
        ])


if __name__ == '__main__':
    from SNDG import init_log

    import argparse
    from SNDG.Structure.PDBs import PDBs
    parser = argparse.ArgumentParser(description='PDB Update utils')

    init_log()
    pdbs = PDBs(pdb_dir="/data/databases/pdb/")
    #os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080"
    # pdbs.download_pdb_seq_ses()
    pdbs.download_pdb_entries()

    pdbs.update_pdb_dir()
    # from SNDG.Structure.PDBs import PDBs
    # pdbs = PDBs(pdb_dir="/data/databases/pdb/")
    # pdbs.pdbs_seq_for_modelling("/data/databases/pdb/processed/seqs_from_pdb.fasta")
    #pepe = pdbs.entries_df()
    #print pepe
示例#20
0
                        "--verbose",
                        dest="verbose",
                        action="count",
                        help="set verbosity level [default: %(default)s]")

    parser.add_argument("-host", "--db_host", default='127.0.0.1')
    parser.add_argument("-db", "--db_name", default='tdr')
    parser.add_argument("--pdb_entries",
                        default='/data/databases/pdb/entries.idx')
    parser.add_argument("--pdbs", default='/data/databases/pdb/')

    args = parser.parse_args()

    BioMongoDB(args.db_name)

    pdbUtils = PDBs(pdb_dir=args.pdbs)

    db = MongoClient(args.db_host)["pdb"]
    col_name = "pdb"

    if not os.path.exists(args.pdb_entries):
        sys.stderr.write("%s does not exists" % args.pdb_entries)
        sys.exit(1)
    """
    collection = SeqCollection.objects(name=col_name)
    if len(collection):
        collection = collection.get()
    else:
        collection = SeqCollection(name=col_name, description="Protein Data Bank", organism="?")
        collection.save()
    """
示例#21
0
    parser = argparse.ArgumentParser(description='PDB utils')

    subparsers = parser.add_subparsers(help='commands', description='valid subcommands', dest='command')

    update_pdb = subparsers.add_parser('update', help='List contents')
    update_pdb.add_argument('-i', '--pdbs_dir', help="pdbs_directory", default="/data/databases/pdb/")

    # update_pdb = subparsers.add_parser('getpdb', help='List contents')
    # update_pdb.add_argument('-i', '--pdb_code', help="4 letter code", required=True)
    # update_pdb.add_argument('-o', '--ouput_file', help="output file")


    args = parser.parse_args()
    if args.command == "update":
        # remzemeber to configure ftp
        pdbs = PDBs(pdb_dir=args.pdbs_dir)
        pdbs.download_pdb_entries()
        pdbs.update_pdb_dir()
        pdbs.download_pdb_seq_ses()
        sys.exit(0)


    # os.environ["ftp_proxy"] = "http://proxy.fcen.uba.ar:8080"
    # pdbs.download_pdb_seq_ses()

    # from SNDG.Structure.PDBs import PDBs
    # pdbs = PDBs(pdb_dir="/data/databases/pdb/")
    # pdbs.pdbs_seq_for_modelling("/data/databases/pdb/processed/seqs_from_pdb.fasta")
    # pepe = pdbs.entries_df()
    # print pepe
示例#22
0
                        help='pdb files directory')
    parser.add_argument('--tmp_dir',
                        default=mkdtemp(),
                        help='temporal directory')
    parser.add_argument('--cpus', default=1, type=int, help='cpu cores to use')
    # parser.add_argument('--max_alns', default=3, type=int, help='max different templates to use')
    parser.add_argument('-t',
                        "--templates_to_use",
                        default=3,
                        type=int,
                        help='max amount of templates to use.')

    args = parser.parse_args()
    pdbs_dir = args.pdbs_dir + ("/" if args.pdbs_dir[-1] != "/" else "")
    mkdir(f'{pdbs_dir}/divided')
    pdb_utils = PDBs(pdbs_dir)

    # pbar = tqdm(args.alns)
    sys.stderr.write(str(args))
    sys.stderr.write(f'reading alignment file\n')
    alns = [{
        "aln_file": x,
        "templates2use": args.templates_to_use,
        "output_dir": args.output_dir,
        "tmp_dir": args.tmp_dir
    } for x in args.alns]
    mkdir(args.output_dir)
    assert os.path.exists(
        args.output_dir), f'"{args.output_dir}" could not be created'

    sys.stderr.write(f'processing alignment files\n')
示例#23
0
                        help="set verbosity level [default: %(default)s]")

    parser.add_argument("-host", "--db_host", default='127.0.0.1')
    parser.add_argument("-db", "--db_name", default='tdr')
    parser.add_argument( "--pdb_entries", default='/data/databases/pdb/entries.idx')
    parser.add_argument( "--pdbs", default='/data/databases/pdb/')
    parser.add_argument( "--pdb_timeout", default=60,type=int)
    parser.add_argument( "--review_pockets", action="store_true")
    parser.add_argument("--organism_name", default=None)


    args = parser.parse_args()

    mdb = BioMongoDB(args.db_name,host=args.db_host)

    pdbUtils = PDBs(pdb_dir=args.pdbs)

    db = MongoClient(args.db_host)["pdb"]
    col_name = "pdb"

    if not os.path.exists(args.pdb_entries):
        sys.stderr.write("%s does not exists" % args.pdb_entries)
        sys.exit(1)

    """
    collection = SeqCollection.objects(name=col_name)
    if len(collection):
        collection = collection.get()
    else:
        collection = SeqCollection(name=col_name, description="Protein Data Bank", organism="?")
        collection.save()