Exemplo n.º 1
0
def insert_property_from_file(
    library_name,
    library_ver,
    ID_tag,
    filename,
    ):
    """for JOELib Property
....   input files: parsed name-value pairs in SDF format
....   name-value pairs: ID and property only """

    import datetime
    begin = datetime.datetime.now()
    print 'starts at: %s' % begin

    library = get_library(library_name, library_ver)
    for sdf in sdfiterator.sdf_iter(filename):
        cid = ''
        tagdict = get_sdf_tags(sdf)

        cid = tagdict.pop(ID_tag)
        compound = Compound.objects.get(library=library, cid=cid)

        for (tag, value) in tagdict.items():
            try:
                field = PropertyField.objects.get(source_tag=tag)
            except:
                raise
            p = Property(field=field, value=value, compound=compound)
            p.save()

    end = datetime.datetime.now()
    print 'finished at: %s' % end
    print 'time lapsed: %s' % (end - begin)

    return
Exemplo n.º 2
0
def format_qsar_property_file(
    lib_name,
    lib_ver,
    input,
    output,
    sdffile,
    id_tag,
    ):
    """cerius2 program not giving correct cid for some
....   libraries, need to find out them manually
....   note: input file and sdffile have same number of compounds"""

    library = get_library(lib_name, lib_ver)
    fp = file(input)
    result = file(output, 'w')
    line1 = fp.readline()
    result.write(line1)
    for sdf in sdfiterator.sdf_iter(sdffile):
        tagdict = get_sdf_tags(sdf)
        cid = tagdict[id_tag]
        line = fp.readline()

        # line = line.split('\t',1)[1]

        result.write('%s\t%s' % (cid, line))

    fp.close()
    result.close()
    return
Exemplo n.º 3
0
def list_all_cid_from_sdf(sdffile, ID_tag, outfile):
    fp = file(outfile, 'w')
    for sdf in sdfiterator.sdf_iter(sdffile):
        tagdict = get_sdf_tags(sdf)
        cid = tagdict[ID_tag]
        fp.write('%s\n' % cid)

    fp.close()
    return
Exemplo n.º 4
0
def prepare_property_file(
    library_name,
    library_ver,
    ID,
    input,
    output,
    ):
    """for JOELib property
....   input: parsed name-value pairs in SDF format
....   output: result file to write SQL sentences
....   name-value pair: ID and value only"""

    import datetime
    begin = datetime.datetime.now()
    print 'now begin at: %s' % begin

    library = get_library(library_name, library_ver)
    fields = {}
    for f in PropertyField.objects.all():
        fields[f.source_tag] = f.id

    fp = file(output, 'w')

    for sdf in sdfiterator.sdf_iter(input):
        cid = ''
        tagdict = get_sdf_tags(sdf)
        cid = tagdict.pop(ID)
        try:
            c_id = Compound.objects.get(library=library, cid=cid).id
        except:
            print '-------exception!!!-----------'
            print Compound.objects.filter(library=library, cid=cid)
            continue

        for (tag, value) in tagdict.items():
            try:
                field_id = fields[tag]
            except:
                print '-------field exception!!!-----------'
                print 'cid:%s, c_id:%s' % (cid, c_id)
                print '''tag:%s

''' % tag
                raise
            query = \
                """INSERT INTO compounddb_property (compound_id, value, field_id) VALUES (%s, %s, %s);""" \
                % (c_id, value, field_id)
            fp.write('%s\n' % query)

    fp.close()
    end = datetime.datetime.now()
    print 'finished at: %s' % end
    print 'time lapsed: %s' % (end - begin)

    return
def batch_sdf_to_smiles(sdfs):
    from sdfiterator import sdf_iter
    from cStringIO import StringIO
    buf = ''
    err = 0
    for sdf in sdf_iter(StringIO(sdfs)):
        try:
            buf += sdf_to_smiles(sdf)
        except InputError:
            err += 1
    return (buf, err)
Exemplo n.º 6
0
def batch_sdf_to_smiles(sdfs):
    from sdfiterator import sdf_iter
    from cStringIO import StringIO
    buf = ''
    err = 0
    for sdf in sdf_iter(StringIO(sdfs)):
        try:
            buf += sdf_to_smiles(sdf)
        except InputError:
            err += 1
    return (buf, err)
Exemplo n.º 7
0
def check_or_update_compounds(
    filename,
    library,
    namekey,
    idkey,
    check_only=True,
    ):
    """Before updating a library, manually check which compounds have update """

    need_update = False

    # if update, create a new library first

    if check_only == False:
        new_lib = create_library_w_header(header=library.header)

    for sdf in sdfiterator.sdf_iter(codecs.open(filename, 'r', 'latin1'
                                    )):

    # for sdf in sdfiterator.sdf_iter(filename):

        moldata = parse_annotation(sdf, namekey, idkey)
        cid = moldata[idkey]

        try:
            c = library.compound_set.get(cid=cid)
            sdf_old = SDFFile.objects.get(compound=c).sdffile

            if sdf != sdf_old:
                need_update = True

                if check_only:
                    return need_update
                else:

                    # update cmp: insert cmp actually

                    insert_single_compound(moldata, sdf, new_lib,
                            namekey, idkey)
            else:

                # same compound, link it to new library

                if not check_only:
                    link_compound_to_library(c, new_lib)
        except Compound.DoesNotExist:
            insert_single_compound(moldata, sdf, new_lib, namekey,
                                   idkey)

    return False
Exemplo n.º 8
0
    def newdb(self, filepath, dbpath=None):
        """take a filepath, and store all SDFs inside to a database"""

        if dbpath is None:
            dbpath = filepath + '.db'
        info('opening %s for writing' % dbpath)
        db = open(dbpath, 'n')
        cntr = 1
        for sdf in sdf_iter(filepath):
            info(str(cntr))
            db[str(cntr)] = sdf
            cntr += 1
        info('writing %s finished' % dbpath)
        if self.db:
            info('rebinding to %s' % dbpath)
            self.db.close()
        else:
            info('binding to %s' % dbpath)
        self.db = db
Exemplo n.º 9
0
def format_sdf_for_qsar(sdffile, output, ID_tag):
    """Cerius2 uses 1st line in SDF as ID tag
....   some sdf has blank 1st line, so we need to format SDF
....   by filling cid to 1st line in SDF"""

    fp = file(output, 'w')
    for sdf in sdfiterator.sdf_iter(sdffile):
        tagdict = get_sdf_tags(sdf)
        cid = tagdict[ID_tag]

        fp.write('%s\n' % cid)
        fp.write(sdf.split('\n', 1)[1].split('M  END')[0])
        fp.write('M  END\n')
        fp.write('''> <%s>
%s

''' % (ID_tag, cid))
        fp.write('$$$$\n')

    fp.close()
    return
Exemplo n.º 10
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""make sdf smaller by keeping only the MOL. Also, compounds with bonds
fewer than the limit will be skipped"""

limit = 0

import sys
inp = sys.argv[1]
if sys.argv[2] == '-':
    outp = sys.stdout
else:
    outp = file(sys.argv[2], 'w')
if len(sys.argv) == 4:
    limit = int(sys.argv[3])

from sdfiterator import sdf_iter
iter = sdf_iter(inp, True)

cnt = 0
for i in iter:
    bonds_cnt = int((i.split('\n')[3])[3:6])
    if bonds_cnt >= limit:
        outp.write(i)
    else:
        sys.stderr.write('skipping %s\n' % cnt)
    cnt += 1
Exemplo n.º 11
0
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""make sdf smaller by keeping only the MOL. Also, compounds with bonds
fewer than the limit will be skipped"""

limit = 0

import sys
inp = sys.argv[1]
if sys.argv[2] == '-':
    outp = sys.stdout
else:
    outp = file(sys.argv[2], 'w')
if len(sys.argv) == 4:
    limit = int(sys.argv[3])

from sdfiterator import sdf_iter
iter = sdf_iter(inp, True)

cnt = 0
for i in iter:
    bonds_cnt = int((i.split('\n')[3])[3:6])
    if bonds_cnt >= limit:
        outp.write(i)
    else:
        sys.stderr.write('skipping %s\n' % cnt)
    cnt += 1
Exemplo n.º 12
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""remove sdf with 0 bonds"""

import sys
inp = sys.argv[1]
outp = file(sys.argv[2], 'w')

from sdfiterator import sdf_iter
iter = sdf_iter(inp)

cnt = 0
for i in iter:
    bonds_cnt = int((i.split('\n')[3])[3:6])
    if bonds_cnt != 0:
        outp.write(i)
    else:
        print 'skipping ' + cnt
    cnt += 1