Exemplo n.º 1
0
def mp_helper_special_mode(one_fragment,
                           fragment_file_chunks,
                           options,
                           chunk_size=100):

    iteration_depth = options.iteration_depth
    repeats = options.repeats

    results = list()
    # one_fragment in a list of one fragment
    fragments = one_fragment

    def mp_callback(res):
        r, f = res
        if r:
            results.append(r)
        if f:
            fragments.append(f)

    for repeat_number in range(repeats):
        logging.info(
            '%s run. You can change the repeat count with --repeats.' %
            (repeat_number + 1))
        for counter, fragment_file_two in enumerate(fragment_file_chunks):
            combined_fragments = tempfile.NamedTemporaryFile(
                dir=temp_dir, prefix='tmp_fragments_', delete=False)
            unique_files(fragments, combined_fragments, '1')

            #logging.debug('fragments (%s)' % ( open(combined_fragments.name).read().strip() ))
            if counter > 0:
                clean(fragments)
            fragments = [combined_fragments.name]
            #logging.debug('Linecount: %s (%s) - loop-counter (%s)' % ( CountLines( combined_fragments.name ), combined_fragments.name, counter))

            splitted_files = split_smi_library(combined_fragments.name,
                                               chunk_size)
            logging.debug(
                'Fragments to process: %s (%s); Files to process: %s (%s)' %
                (CountLines(combined_fragments.name), combined_fragments.name,
                 len(splitted_files), counter))

            pool = multiprocessing.Pool(options.processors)
            for fragment_file in splitted_files:
                pool.apply_async(mp_helper,
                                 args=(fragment_file, fragment_file_two,
                                       iteration_depth),
                                 callback=mp_callback)
            pool.close()
            pool.join()
            clean(splitted_files)

    if results:
        combined_results = tempfile.NamedTemporaryFile(dir=temp_dir,
                                                       prefix='tmp_results_',
                                                       delete=False)
        unique_files(results, combined_results, '2')
        clean(results)
        return combined_results.name
    else:
        return False
Exemplo n.º 2
0
def run(compound_path, procs, schema, conn_string, filetype):
    clean_up_files = False
    if os.path.isdir(compound_path):
        paths = []
        for root, dirs, files in os.walk(compound_path):
            for compound_file in files:
                path = os.path.join(root, compound_file)
                paths.append((path, schema, conn_string, filetype))
        paths.sort()
    else:
        if filetype in ["smi", "inchi"]:
            clean_up_files = True
            compound_count = cheminfolib.CountLines(compound_path)
            sys.stdout.write(
                "Splitting inputfile (%s) with %s molecules in files with %s molecules.\n"
                % (compound_path, compound_count, int(compound_count / procs)))
            paths = cheminfolib.split_smi_library(compound_path,
                                                  structures_in_one_file=int(
                                                      compound_count / procs))
            paths = [(path, schema, conn_string, filetype) for path in paths]
        if filetype == "sdf":
            paths = [compound_path]

    pool = Pool(processes=procs)
    sys.stdout.write("Process initialized with %s processes.\n" % procs)
    result = pool.map_async(start_parser, paths)
    result.get()
    if clean_up_files:
        for path in paths:
            os.remove(path[0])
Exemplo n.º 3
0
def run(compound_path, procs, schema, conn_string, filetype):
    clean_up_files = False
    if os.path.isdir( compound_path ):
        paths = []
        for root, dirs, files in os.walk(compund_path):
            for filename in files:
                path = os.path.join( root, compound_file )
                paths.append( (path, schema, conn_string, filetype) )
        paths.sort()
    else:
        if filetype in ['smi', 'inchi']:
            clean_up_files = True
            compound_count = cheminfolib.CountLines( compound_path )
            sys.stdout.write('Splitting inputfile (%s) with %s molecules in files with %s molecules.\n' % (compound_path, compound_count, int(compound_count / procs)))
            paths = cheminfolib.split_smi_library( compound_path, structures_in_one_file = int(compound_count / procs) )
            paths = [(path, schema, conn_string, filetype) for path in paths]
        if filetype == 'sdf':
            paths = [compound_path]

    pool = Pool(processes = procs)
    sys.stdout.write('Process initialized with %s processes.\n' % procs)
    result = pool.map_async(start_parser, paths)
    result.get()
    if clean_up_files:
        for path in paths:
            os.remove( path[0] )
Exemplo n.º 4
0
def mp_helper_special_mode(one_fragment, fragment_file_chunks, options, chunk_size = 100):

    iteration_depth = options.iteration_depth
    repeats = options.repeats

    results = list()
    # one_fragment in a list of one fragment
    fragments = one_fragment

    def mp_callback(res):
        r, f = res
        if r:
            results.append(r)
        if f:
            fragments.append(f)

    for repeat_number in range(repeats):
        logging.info('%s run. You can change the repeat count with --repeats.' % ( repeat_number + 1 ) )
        for counter, fragment_file_two in enumerate( fragment_file_chunks ):
            combined_fragments = tempfile.NamedTemporaryFile(dir=temp_dir, prefix='tmp_fragments_', delete=False)
            unique_files( fragments, combined_fragments, '1' )

            #logging.debug('fragments (%s)' % ( open(combined_fragments.name).read().strip() ))
            if counter > 0:
                clean( fragments )
            fragments = [combined_fragments.name]
            #logging.debug('Linecount: %s (%s) - loop-counter (%s)' % ( CountLines( combined_fragments.name ), combined_fragments.name, counter))

            splitted_files = split_smi_library( combined_fragments.name, chunk_size )
            logging.debug('Fragments to process: %s (%s); Files to process: %s (%s)' % ( CountLines(combined_fragments.name), combined_fragments.name, len(splitted_files), counter) )

            pool = multiprocessing.Pool( options.processors )
            for fragment_file in splitted_files:
                pool.apply_async(mp_helper, args=(fragment_file, fragment_file_two, iteration_depth), callback=mp_callback)
            pool.close()
            pool.join()
            clean( splitted_files )

    if results:
        combined_results = tempfile.NamedTemporaryFile(dir=temp_dir, prefix='tmp_results_', delete=False)
        unique_files( results, combined_results, '2' )
        clean( results )
        return combined_results.name
    else:
        return False
Exemplo n.º 5
0
        log_level = logging.INFO

    logging.basicConfig(level=log_level)
    test(options)

    temp_dir = tempfile.mkdtemp(dir=options.tempdir)
    unique_compounds = set()
    multiple_merge_compounds = set()
    trash_list = list()
    result_files = list()

    unique_input, unique_input_non_fragments = filter_input_files(
        options.input_path)
    # adding the non-fragments to the results
    # result_files.append( unique_input_non_fragments.name )
    splitted_files = split_smi_library(unique_input.name, 1)
    trash_list.extend(splitted_files)

    # If we have two input files, merge one against the other
    if options.second_input:
        logging.info("Merging file %s against file %s." %
                     (options.input_path, options.second_input))
        unique_input2, unique_input_non_fragments2 = filter_input_files(
            options.second_input)
        # adding the non-fragments to the results

        # result_files.append( unique_input_non_fragments2.name )
        splitted_files2 = split_smi_library(unique_input2.name, 1)
        trash_list.extend(splitted_files2)

        for counter, fragment_file_one in enumerate(splitted_files):
Exemplo n.º 6
0
    logging.basicConfig(level=log_level)
    test(options)

    temp_dir = tempfile.mkdtemp(dir=options.tempdir)
    unique_compounds = set()
    multiple_merge_compounds = set()
    trash_list = list()
    result_files = list()



    unique_input, unique_input_non_fragments = filter_input_files( options.input_path )
    # adding the non-fragments to the results
    # result_files.append( unique_input_non_fragments.name )
    splitted_files = split_smi_library( unique_input.name, 1 )
    trash_list.extend( splitted_files )



    # If we have two input files, merge one against the other
    if options.second_input:
        logging.info('Merging file %s against file %s.' % (options.input_path, options.second_input ))
        unique_input2, unique_input_non_fragments2 = filter_input_files( options.second_input )
        # adding the non-fragments to the results
        
        # result_files.append( unique_input_non_fragments2.name )
        splitted_files2 = split_smi_library( unique_input2.name, 1 )
        trash_list.extend( splitted_files2 )

        for counter, fragment_file_one in enumerate( splitted_files ):