def calculate_performance(molecules, ensemble, sort_order, options): """ determine the virtual screening performance of the input ensemble, and return the results in an ensemble storage object. :param molecules: :param ensemble: :param sort_order: string. either 'asc' (for binding energy estimates) or 'dsc' (for similarity scores) :param options: instance of s :return: """ es = EnsembleStorage() es.set_prop('ensemble', ensemble) # calculate the appropriate score structure type score_structure = classification.make_score_structure(molecules, ensemble) # determine auc value auc_structure = classification.make_auc_structure(score_structure) auc = classification.calculate_auc(auc_structure, sort_order, 'no stats') es.set_prop('auc', auc) # calculate enrichment factors for fpf in classification.make_fpfList(options, score_structure): fpf = float(fpf) ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order) if ef_structure: ef = classification.calculate_ef(ef_structure, fpf) es.set_prop(ef[0], ef[1], 'ef') return es
def calculate_performance(molecules, ensemble, sort_order, options): """ determine the virtual screening performance of the input ensemble, and return the results in an ensemble storage object. :param molecules: :param ensemble: :param sort_order: string. either 'asc' (for binding energy estimates) or 'dsc' (for similarity scores) :param options: instance of s :return: """ es = EnsembleStorage() es.set_prop('ensemble', ensemble) # calculate the appropriate score structure type score_structure = classification.make_score_structure(molecules, ensemble) # determine auc value auc_structure = classification.make_auc_structure(score_structure) auc = classification.calculate_auc(auc_structure, sort_order, 'no stats') es.set_prop('auc', auc) # calculate enrichment factors for fpf in classification.make_fpfList(options, score_structure): fpf = float(fpf) ef_structure = classification.make_ef_structure( score_structure, fpf, sort_order) if ef_structure: ef = classification.calculate_ef(ef_structure, fpf) es.set_prop(ef[0], ef[1], 'ef') return es
def rank_queries(molecules, ensemble, sort_order, options): """ rank queries by value added to existing ensemble :param molecules: :param score_field: :param ensemble: :param sort_order: :param options: :return: """ # generate query list query_list = [ x for x in list(molecules[0].scores.keys()) if x not in ensemble ] results = {} for query in query_list: es = EnsembleStorage() # an ensemble storage project # generate test_ensemble test_ensemble = ensemble[0:] test_ensemble.append(query) test_ensemble = tuple(test_ensemble) es.set_prop('ensemble', test_ensemble) # calculate its performance score_structure = classification.make_score_structure( molecules, test_ensemble) # determine auc value auc_structure = classification.make_auc_structure(score_structure) auc = classification.calculate_auc(auc_structure, sort_order, 'no stats') es.set_prop('auc', auc) # if the enrichment factor was set to anything other than 1, then we're training to maximize the corresponding # enrichment factor for fpf in classification.make_fpfList(options, score_structure): fpf = float(fpf) ef_structure = classification.make_ef_structure( score_structure, fpf, sort_order) if ef_structure: ef = classification.calculate_ef(ef_structure, fpf) es.set_prop(ef[0], ef[1], 'ef') # append results to metric list results[test_ensemble] = es # peel away the best performing ensemble best_ensemble = screener.find_best_ensemble(results, options) return list(best_ensemble)
def rank_queries(molecules, ensemble, sort_order, options): """ rank queries by value added to existing ensemble :param molecules: :param score_field: :param ensemble: :param sort_order: :param options: :return: """ # generate query list query_list = [x for x in list(molecules[0].scores.keys()) if x not in ensemble] results = {} for query in query_list: es = EnsembleStorage() # an ensemble storage project # generate test_ensemble test_ensemble = ensemble[0:] test_ensemble.append(query) test_ensemble = tuple(test_ensemble) es.set_prop('ensemble', test_ensemble) # calculate its performance score_structure = classification.make_score_structure(molecules, test_ensemble) # determine auc value auc_structure = classification.make_auc_structure(score_structure) auc = classification.calculate_auc(auc_structure, sort_order, 'no stats') es.set_prop('auc', auc) # if the enrichment factor was set to anything other than 1, then we're training to maximize the corresponding # enrichment factor for fpf in classification.make_fpfList(options, score_structure): fpf = float(fpf) ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order) if ef_structure: ef = classification.calculate_ef(ef_structure, fpf) es.set_prop(ef[0], ef[1], 'ef') # append results to metric list results[test_ensemble] = es # peel away the best performing ensemble best_ensemble = screener.find_best_ensemble(results, options) return list(best_ensemble)
def rank_queries(molecules, sort_order, options): results = {} for query in [query for query in list(molecules[0].scores.keys())]: formatted_query = [] formatted_query.append(query) formatted_query = tuple(formatted_query) es = EnsembleStorage() es.set_prop('ensemble', formatted_query) score_structure = classification.make_score_structure(molecules, formatted_query) auc_structure = classification.make_auc_structure(score_structure) auc = classification.calculate_auc(auc_structure, sort_order, 'no stats') es.set_prop('auc', auc) for fpf in classification.make_fpfList(options, score_structure): fpf = float(fpf) ef_structure = classification.make_ef_structure(score_structure, fpf, sort_order) if ef_structure: ef = classification.calculate_ef(ef_structure, fpf) es.set_prop(ef[0], ef[1], 'ef') results[formatted_query] = es return results