Exemplo n.º 1
0
def query(r, d, query_sdf, ref_iddb, embedOnly=False):

    current_dir = os.path.abspath(".")
    query_sdf = os.path.join(current_dir, query_sdf)
    ref_iddb = os.path.join(current_dir, ref_iddb)
    work_dir = os.path.join(current_dir, 'run-%s-%s' % (r, d))

    if not os.path.isfile(query_sdf):
        raise StandardError("query file " + query_sdf + " not found")
    if not os.path.isfile(ref_iddb):
        raise StandardError("reference file " + ref_iddb + " not found")
    if not os.path.isdir(work_dir):
        raise StandardError("working directory " + work_dir + " not found")

    temp_dir = mkdtemp()
    query_base = os.path.splitext(os.path.basename(query_sdf))[0]

    query_cdb = os.path.join(temp_dir, query_base + ".cdb")

    matrix_file = os.path.join(work_dir, "matrix.%s-%s" % (r, d))
    coord_file = ref_iddb + ".distmat.coord"

    try:
        #TODO move this to non-batch branch and check
        parsing_time, num_compounds = createQueryCdb(query_sdf, query_cdb)

        info("found %s compounds" % num_compounds)
        ref_db = gen_subdb(ref_iddb, None, DB_SUBSET, CDB)
        names = [i.strip() for i in file(CDB + ".names")]

        f = file(query_base + ".out", 'w')

        os.chdir(temp_dir)
        if embedOnly or num_compounds > 1:
            batchQuery(f, r, d, ref_db, query_sdf, coord_file, matrix_file,
                       names, embedOnly)
        else:
            puzzle_file = os.path.join(temp_dir, "puzzle")

            refineResult = refine(
                QueryFile(query_cdb),
                lshSearch(
                    matrix_file,
                    solvePuzzle(r, d, ref_db, query_cdb, coord_file,
                                puzzle_file)))
            for seq_id, dist in refineResult:
                f.write('%s %s\n' % (names[int(seq_id) - 1], dist))

            info("refine result: " + str(refineResult))

        f.close()

        os.chdir(current_dir)

        from shutil import rmtree
        #warning("NOT CLEANING UP")
        rmtree(temp_dir)

    except:
        print_exc()
Exemplo n.º 2
0
def query(r,d,query_sdf,ref_iddb,embedOnly=False):

	current_dir = os.path.abspath(".")
	query_sdf = os.path.join(current_dir,query_sdf)
	ref_iddb = os.path.join(current_dir,ref_iddb)
	work_dir = os.path.join(current_dir,'run-%s-%s' % (r, d))

	if not os.path.isfile(query_sdf):
		raise StandardError("query file "+query_sdf+" not found")
	if not os.path.isfile(ref_iddb):
		raise StandardError("reference file "+ref_iddb+" not found")
	if not os.path.isdir(work_dir):
		raise StandardError("working directory "+work_dir+" not found")

	temp_dir=mkdtemp()
	query_base = os.path.splitext(os.path.basename(query_sdf))[0]

	query_cdb = os.path.join(temp_dir,query_base+".cdb")

	matrix_file = os.path.join(work_dir,"matrix.%s-%s" % (r,d))
	coord_file = ref_iddb+".distmat.coord"

	try:
		#TODO move this to non-batch branch and check
		parsing_time,num_compounds = createQueryCdb(query_sdf,query_cdb)

		info("found %s compounds" % num_compounds)
		ref_db = gen_subdb(ref_iddb,None,DB_SUBSET,CDB)
		names = [i.strip() for i in file(CDB+".names")]

		f = file(query_base+".out",'w')

		os.chdir(temp_dir)
		if embedOnly  or num_compounds > 1:
			batchQuery(f,r,d,ref_db,query_sdf,
					coord_file,matrix_file,names,embedOnly)
		else:
			puzzle_file = os.path.join(temp_dir,"puzzle")

			refineResult = refine(QueryFile(query_cdb),
										 lshSearch(matrix_file,
													  solvePuzzle(r,d,ref_db,query_cdb,
														  coord_file,puzzle_file)))
			for seq_id,dist in refineResult:
				f.write('%s %s\n' %(names[int(seq_id)-1],dist))

			info("refine result: "+str(refineResult))

		f.close()
	
		os.chdir(current_dir)

		from shutil import rmtree
		#warning("NOT CLEANING UP")
		rmtree(temp_dir)
		
		
	except:
		print_exc()
Exemplo n.º 3
0
        sys.stderr.write("Invalid reference coordinate file: no such file.")
        sys.stderr.write("Missing:\n")
        sys.stderr.write(ref_coord)
        sys.exit(1)
    
    f = file(ref_coord)
    d = len(f.readline().split())
    for i, _ in enumerate(f): pass
    r = 2 + i
    sys.stderr.write("r = %d d = %d\n" % (r, d))

    db_builder = DB_BUILDER
    db2db_distance = DB2DB_DISTANCE

    if opts.m:
        db_builder += ('.' + opts.m)
        db2db_distance += ('.' + opts.m)

   # this assumes absolute paths, which we don't need to have
    #if not os.path.isfile(db_builder):
        #sys.stderr.write("Cannot find database builder to parse your input")
        #sys.stderr.write("\nI cannot find: ")
        #sys.stderr.write(db_builder)
        #sys.exit(1)

    ref_real_db = gen_subdb(ref_db_path, opts.m,DB_SUBSET,CDB)
 
    print embed(args[0], r, d, ref_real_db, ref_coord, db_builder, db2db_distance)

# vim:tw=78:ts=4:sw=4:expandtab
Exemplo n.º 4
0
        sys.stderr.write(ref_coord)
        sys.exit(1)

    f = file(ref_coord)
    d = len(f.readline().split())
    for i, _ in enumerate(f):
        pass
    r = 2 + i
    sys.stderr.write("r = %d d = %d\n" % (r, d))

    db_builder = DB_BUILDER
    db2db_distance = DB2DB_DISTANCE

    if opts.m:
        db_builder += ('.' + opts.m)
        db2db_distance += ('.' + opts.m)

# this assumes absolute paths, which we don't need to have
#if not os.path.isfile(db_builder):
#sys.stderr.write("Cannot find database builder to parse your input")
#sys.stderr.write("\nI cannot find: ")
#sys.stderr.write(db_builder)
#sys.exit(1)

    ref_real_db = gen_subdb(ref_db_path, opts.m, DB_SUBSET, CDB)

    print embed(args[0], r, d, ref_real_db, ref_coord, db_builder,
                db2db_distance)

# vim:tw=78:ts=4:sw=4:expandtab