Exemplo n.º 1
0
def process():
    if len(sys.argv) < 4:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [exclude-molecules.sdf] [output.sdf]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    xifs = Base.FileIOStream(sys.argv[2], 'r')
    ofs = Base.FileIOStream(sys.argv[3], 'w')

    reader = Chem.SDFMoleculeReader(ifs)
    xreader = Chem.SDFMoleculeReader(xifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    mol = Chem.BasicMolecule()

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfImportParameter(xreader, False)
    Chem.setMultiConfExportParameter(writer, False)

    stats = Stats()
    stats.read = 0
    stats.dropped = 0

    xhashes = set()

    while xreader.read(mol):
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)
        xhashes.add(hashcode)

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        setupMolecule(mol)

        hashcode = Chem.calcHashCode(mol)

        if hashcode in xhashes:
            stats.dropped += 1
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  Chem.generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)
        else:
            writer.write(mol)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read) + ' Molecules...',
                  file=sys.stderr)

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
Exemplo n.º 2
0
def CDPLmolFromSdf(sdf_path, conformation):
    '''
    generates a single CDPL Molecule from an sdf-file. If conformations is true, then
    one random conformation will be generated. \n
    Input: \n
    sdf_path (string): path to the sdf file \n
    conformation (boolean): generates one 3d conformation according to MMFF94 \n
    Return: \n
    (CDPL BasicMolecule): the corresponding CDPL BasicMolecule 
    '''
    mol = Chem.BasicMolecule()
    ifs = Base.FileIOStream(sdf_path, 'r')
    sdf_reader = Chem.SDFMoleculeReader(ifs)

    if not sdf_reader.read(mol):
        log.error("COULD NOT READ SDF", sdf_path)
        return False
    if conformation:
        return _CDPLgenerateConformation(mol)
    return mol
Exemplo n.º 3
0
        def process(sdf_file, psd_file_path):

            ifs = Base.FileIOStream(sdf_file, 'r')

            reader = Chem.SDFMoleculeReader(ifs)

            mol = Chem.BasicMolecule()

            Chem.setMultiConfImportParameter(reader, True)

            psd_creator = Pharm.PSDScreeningDBCreator(
                psd_file_path, Pharm.PSDScreeningDBCreator.CREATE, True)
            i = 0
            t0 = time.clock()

            while reader.read(mol):
                setupMolecule(mol)

                psd_creator.process(mol)
                i += 1

                if i % 100 == 0:
                    print 'Processed ' + str(i) + ' molecules (' + str(
                        time.clock() - t0), 's elapsed)...'
                    t0 = time.clock()

                mol.clear()

            print ''
            print '-- Summary --'
            print 'Molecules processed: ' + str(psd_creator.numProcessed)
            print 'Molecules rejected: ' + str(psd_creator.numRejected)
            print 'Molecules deleted: ' + str(psd_creator.numDeleted)
            print 'Molecules inserted: ' + str(psd_creator.numInserted)

            psd_creator.close()
Exemplo n.º 4
0
import CDPL.Base as Base
import CDPL.Chem as Chem
import CDPL.Math as Math


def process():
    if len(sys.argv) < 4:
	    print('Usage:', sys.argv[0], 'training-set.sdf logP-data regression-coeff-file', file=sys.stderr)
        sys.exit(2)

	struct_is = Base.FileIOStream(sys.argv[1], 'r')
	exp_logp_is = Base.FileIOStream(sys.argv[2], 'r')
	coeff_os = Base.FileIOStream(sys.argv[3], 'w')

    mlr_model = Math.DMLRModel()
	sdf_reader = Chem.SDFMoleculeReader(struct_is)
	mol = Chem.BasicMolecule()
	xlogp_calc = Chem.XLogPCalculator()

    histo = Math.DVector()
    histo.resize(Chem.XLogPCalculator.FEATURE_VECTOR_SIZE)

    Chem.setMultiConfImportParameter(sdf_reader, False)

	while sdf_reader.read(mol):
		exp_logp = float(exp_logp_is.readline())

		Chem.perceiveComponents(mol, False)
		Chem.perceiveSSSR(mol, False)
		Chem.setRingFlags(mol, False)
		Chem.calcImplicitHydrogenCounts(mol, False)
Exemplo n.º 5
0
def cleanStructures():
    if len(sys.argv) < 5:
        print('Usage:',
              sys.argv[0],
              '[input.sdf] [output.sdf] [dropped.sdf] [start_index] [[count]]',
              file=sys.stderr)
        sys.exit(2)

    ifs = Base.FileIOStream(sys.argv[1], 'r')
    ofs = Base.FileIOStream(sys.argv[2], 'w')
    dofs = Base.FileIOStream(sys.argv[3], 'w')
    offset = int(sys.argv[4])
    count = 0

    if len(sys.argv) > 5:
        count = int(sys.argv[5])

    reader = Chem.SDFMoleculeReader(ifs)
    writer = Chem.SDFMolecularGraphWriter(ofs)
    dwriter = Chem.SDFMolecularGraphWriter(dofs)
    mol = Chem.BasicMolecule()

    #Chem.setSMILESRecordFormatParameter(reader, 'SN')

    stats = Stats()
    stats.read = 0
    stats.dropped = 0
    stats.modified = 0

    Chem.setMultiConfImportParameter(reader, False)
    Chem.setMultiConfExportParameter(writer, False)
    Chem.setMultiConfExportParameter(dwriter, False)

    if offset > 0:
        print('Skipping Molecules to Start Index ' + str(offset),
              file=sys.stderr)
        reader.setRecordIndex(offset)
        #print('Finished Setting Record Index', file=sys.stderr)

    stats.read = offset

    while reader.read(mol):
        #print('Processing Molecule ' + str(stats.read)
        proc_mol = processMolecule(mol, stats)

        if proc_mol:
            writer.write(proc_mol)
        else:
            stats.dropped += 1
            dwriter.write(mol)
            print('Dropped Molecule ' + str(stats.read) + ': ' +
                  generateSMILES(mol) + ' ' + Chem.getName(mol),
                  file=sys.stderr)

        stats.read += 1

        if stats.read % 10000 == 0:
            print('Processed ' + str(stats.read - offset) + ' Molecules...',
                  file=sys.stderr)

        if count > 0 and (stats.read - offset) >= count:
            break

    print('', file=sys.stderr)
    print('-- Summary --', file=sys.stderr)
    print('Molecules processed: ' + str(stats.read - offset), file=sys.stderr)
    print('Molecules dropped: ' + str(stats.dropped), file=sys.stderr)
    print('Molecules modified: ' + str(stats.modified), file=sys.stderr)