Exemplo n.º 1
0
def approximator(molecules,
                 options,
                 sort_order=None,
                 frameworks=[],
                 ensemble=[]):
    """
    recursively rank queries
    :param molecules:
    :param options:
    :param sort_order:
    :param frameworks:
    :param ensemble:
    :return:
    """

    # set variables
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    # construct ensemble
    print("Performing calculations for ensemble size {s}".format(
        s=(len(ensemble) + 1)))
    ensemble = rank_queries(molecules, ensemble, sort_order, options)

    # write stats & ensemble
    output.write_ensemble(list(ensemble), options)

    if len(ensemble) == ensemble_size:
        return 1
    else:
        return approximator(molecules, options, sort_order, frameworks,
                            ensemble)
Exemplo n.º 2
0
def approximator(molecules, options, sort_order=None, frameworks=[], ensemble=[]):
    """
    recursively rank queries
    :param molecules:
    :param options:
    :param sort_order:
    :param frameworks:
    :param ensemble:
    :return:
    """

    # set variables
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    # construct ensemble
    print("Performing calculations for ensemble size {s}".format(s=(len(ensemble) + 1)))
    ensemble = rank_queries(molecules, ensemble, sort_order, options)

    # write stats & ensemble
    output.write_ensemble(list(ensemble), options)

    if len(ensemble) == ensemble_size:
        return 1
    else:
        return approximator(molecules, options, sort_order, frameworks, ensemble)
Exemplo n.º 3
0
def optimizor(molecules, sort_order, ensemble_size, options):
    """
	Evaluate the performance of all ensembles of fixed size.
	"""
    # set variables
    ncpu = options.ncpu
    score_field = options.score_field

    # generate an exhaustive list of all possible ensembles
    ensemble_list = make_ensemble_list(molecules, score_field, ensemble_size)

    # set number of processors.
    if not ncpu:
        ncpu = multiprocessing.cpu_count()

    if ncpu > 1:
        print("Determining the performance of {d} ensembles using {n} processors".format(d=len(ensemble_list), n=ncpu))

        if ncpu > len(ensemble_list):
            ncpu = len(ensemble_list)

        jobs = []
        output_queue = multiprocessing.Queue()

        for ensemble_chunk in chunker(ensemble_list, ncpu):
            p = multiprocessing.Process(target=evaluate,
                                        args=(molecules, ensemble_chunk, sort_order, options, output_queue))
            jobs.append(p)
            p.start()

        # collect results into a dictionary
        results = {}
        for i in range(len(jobs)):
            results.update(output_queue.get())

        # stop jobs
        for j in jobs:
            j.join()

    else:
        print("Determining the performance of {d} ensembles using {n} processor".format(d=len(ensemble_list), n=ncpu))
        results = evaluate(molecules, ensemble_list, sort_order, options)

    # peel away the best performing ensemble
    ensemble = screener.find_best_ensemble(results, options)

    # write out the best performing ensemble
    output.write_ensemble(list(ensemble), options)

    # temp 2/9/15 generate and return a list of auc values and ef at fpf = 0.001 to build up a histogram
    nd = max([results[x].ef.keys() for x in results.keys()][0])
    n = int(round(0.001 * nd))
    ef_list = [results[x].get_prop(n, 'ef') for x in results.keys()]
    auc_list = [results[x].get_prop('auc') for x in results.keys()]
    # auc_list = [[results[x].get_prop('auc'), results[x].get_prop('ensemble')] for x in results.keys()]
    return auc_list, ef_list
Exemplo n.º 4
0
def optimizor(molecules, sort_order, ensemble_size, options):
    """
	Evaluate the performance of all ensembles of fixed size.
	"""
    # set variables
    ncpu = options.ncpu
    score_field = options.score_field

    # generate an exhaustive list of all possible ensembles
    ensemble_list = make_ensemble_list(molecules, score_field, ensemble_size)

    # set number of processors.
    if not ncpu:
        ncpu = multiprocessing.cpu_count()

    if ncpu > 1:
        print(("Determining the performance of {d} ensembles using {n} processors".format(d=len(ensemble_list), n=ncpu)))

        if ncpu > len(ensemble_list):
            ncpu = len(ensemble_list)

        jobs = []
        output_queue = multiprocessing.Queue()

        for ensemble_chunk in chunker(ensemble_list, ncpu):
            p = multiprocessing.Process(target=evaluate,
                                        args=(molecules, ensemble_chunk, sort_order, options, output_queue))
            jobs.append(p)
            p.start()

        # collect results into a dictionary
        results = {}
        for i in range(len(jobs)):
            results.update(output_queue.get())

        # stop jobs
        for j in jobs:
            j.join()

    else:
        print(("Determining the performance of {d} ensembles using {n} processor".format(d=len(ensemble_list), n=ncpu)))
        results = evaluate(molecules, ensemble_list, sort_order, options)

    # peel away the best performing ensemble
    ensemble = screener.find_best_ensemble(results, options)

    # write out the best performing ensemble
    output.write_ensemble(list(ensemble), options)

    # temp 2/9/15 generate and return a list of auc values and ef at fpf = 0.001 to build up a histogram
    nd = max([list(results[x].ef.keys()) for x in list(results.keys())][0])
    n = int(round(0.001 * nd))
    ef_list = [results[x].get_prop(n, 'ef') for x in list(results.keys())]
    auc_list = [results[x].get_prop('auc') for x in list(results.keys())]
    # auc_list = [[results[x].get_prop('auc'), results[x].get_prop('ensemble')] for x in results.keys()]
    return auc_list, ef_list
Exemplo n.º 5
0
def approximator(molecules, options, sort_order=None):
    ensemble_size = options.ensemble_size

    if not sort_order:
        sort_order = classification.get_sort_order(molecules)

    print("Performing calculations")
    results = rank_queries(molecules, sort_order, options)

    ensemble = []
    while results:
        ensemble = construct_ensemble(results, ensemble, options)
        output.write_ensemble(ensemble, options)
        if len(ensemble) == ensemble_size:
            return 0