Python CalculateCoElutionScores.CalculateCoElutionScores примеры использования

Язык программирования: Python

Метод/Функция: CalculateCoElutionScores

Примеров на hotexamples.com: 7

Python CalculateCoElutionScores.CalculateCoElutionScores - 7 примеров найдено. Это лучшие примеры Python кода для CalculateCoElutionScores.CalculateCoElutionScores, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CalculateCoElutionScores(7)

CLF_Wrapper(4)

loadData(4)

loadEData(2)

Apex(1)

Poisson(1)

loadScoreData(1)

lineCount(1)

Wcc(1)

STRING(1)

MutualInformation(1)

Pearson(1)

Bayes(1)

Jaccard(1)

Genemania(1)

ExternalEvidence(1)

Euclidiean(1)

ElutionData(1)

CLF_Wrapper_Hari(1)

trainML(1)

Пример #1

Показать файл

def calc_feature_combination(args):
    feature_combination, se, input_dir, use_rf, num_folds, overlap, local, cutoff, num_cores, scoreF, mode, anno, faF, ref_complexes, output_dir = args
    #Create feature combination
    cutoff = float(cutoff) / 100
    num_folds = int(num_folds)

    if feature_combination == "00000000": sys.exit()
    this_scores = get_fs_comb(feature_combination)
    num_cores = int(num_cores)
    use_rf = use_rf == "True"
    overlap = overlap == "True"
    local = local == "True"

    clf_name = "SVM"
    if use_rf: clf_name = "RF"

    clf = CS.CLF_Wrapper(num_cores, use_rf)

    ref_gs = Goldstandard_from_cluster_File(ref_complexes)

    scoreCalc = CS.CalculateCoElutionScores(this_scores,
                                            "",
                                            scoreF,
                                            num_cores=num_cores,
                                            cutoff=cutoff)
    scoreCalc.readTable(scoreF, ref_gs)
    feature_comb = feature_selector([fs.name for fs in this_scores], scoreCalc)

    print feature_comb.scoreCalc.scores.shape
    print scoreCalc.scores.shape
    if mode == "comb":
        fa = utils.get_FA_data(anno, faF)
        feature_comb.add_fun_anno(fa)
    elif mode == "fa":
        feature_comb = utils.get_FA_data(anno, faF)
        print type(feature_comb)

    elif mode != "exp":
        print "not support this mode"
        sys.exit()

    scores, head = n_fold_cross_validation(num_folds, ref_gs, feature_comb,
                                           clf, output_dir, overlap, local)

    outFH = open(output_dir + ".eval.txt", "w")
    print "FS\tSE\tCLF\t" + head
    print "%s\t%s\t%s\t" % (feature_combination, se, clf_name) + scores

    print >> outFH, "FS\tSE\tCLF\t" + head
    print >> outFH, "%s\t%s\t%s\t" % (feature_combination, se,
                                      clf_name) + scores
    outFH.close()

Пример #2

Показать файл

def ppi_fs(args):
    fsc, scoreF, use_rf, se, num_cores, refComplexesF, output_dir = args
    num_cores = int(num_cores)
    use_rf = use_rf == "True"

    clf_name = "SVM"
    if use_rf: clf_name = "RF"
    clf = CS.CLF_Wrapper(num_cores, use_rf)

    this_fs = get_fs_comb(fsc)
    all_gs = Goldstandard_from_cluster_File(refComplexesF)
    valprots = all_gs.get_proteins()

    scoreCalc = CS.CalculateCoElutionScores(this_fs,
                                            "",
                                            scoreF,
                                            num_cores=num_cores,
                                            cutoff=-1)
    scoreCalc.readTable(scoreF, all_gs)
    print scoreCalc.scores.shape

    test_scoreCalc = feature_selector([fs.name for fs in this_fs], scoreCalc)

    print("The size of chopped matrix for selected features")
    print np.shape(test_scoreCalc.get_scoreCalc().get_all_scores())

    print "training ppis: %i" % len(set(test_scoreCalc.ppiToIndex.keys()))

    train_gold_complexes = all_gs.return_gold_standard_complexes(
        set(test_scoreCalc.ppiToIndex.keys()))

    print "Train_gold comp:%i" % len(train_gold_complexes.complexes.complexes)

    print "Num valid ppis in pos: %i" % len(train_gold_complexes.positive)
    print "Num valid ppis in neg: %i" % len(train_gold_complexes.negative)

    # Evaluate classifier
    evaluation_results = utils.bench_by_PPI_clf(10, test_scoreCalc,
                                                train_gold_complexes, clf)

    print evaluation_results

    outFH = open("%s.ppi_eva.txt" % (output_dir), "w")
    print >> outFH, "FS\tSE\tCLF\tFM\tauPR\tauROC\n%s\t%s\t%s\t%s" % (
        fsc, se, clf_name, "\t".join(map(str, evaluation_results)))
    outFH.close()

Пример #3

Показать файл

def cut(args):
    fc, scoreF, outF = args
    if fc == "00000000": sys.exit()
    this_scores = get_fs_comb(fc)
    scoreCalc = CS.CalculateCoElutionScores("", "", "", "", cutoff=0.5)
    empty_gs = GS.Goldstandard_from_Complexes()
    empty_gs.positive = set([])
    empty_gs.negative = set([])
    scoreCalc.readTable(scoreF, empty_gs)
    print scoreCalc.to_predict
    feature_comb = feature_selector([fs.name for fs in this_scores], scoreCalc)
    feature_comb.open()
    outFH = open(outF, "w")
    print >> outFH, "\t".join(feature_comb.scoreCalc.header)
    for i in range(feature_comb.to_predict):
        edge, edge_scores = feature_comb.get_next()
        if edge == "" or edge_scores == []: continue
        print >> outFH, "%s\t%s" % (edge, "\t".join(map(str, edge_scores)))
    outFH.close()
    feature_comb.close()

Пример #4

Показать файл

def calc_scores(args):
    topred = []
    if args[0] == "-ref":
        _, refF, fs, numcores, cutoff, e_dir, outF = args
        gs = Goldstandard_from_cluster_File(refF)
        topred = list(gs.positive | gs.negative)
        print len(topred)
    else:
        fs, numcores, cutoff, e_dir, outF = args

    numcores = int(numcores)
    cutoff = float(cutoff)

    this_fs = get_fs_comb(fs)
    prots, edatas = utils.load_data(e_dir, this_fs)
    scoreCalc = CS.CalculateCoElutionScores(this_fs,
                                            edatas,
                                            outF,
                                            num_cores=numcores,
                                            cutoff=cutoff)
    if topred == []: topred = scoreCalc.getAllPairs()
    scoreCalc.calculateScores(topred)

Пример #5

Показать файл

 def filter_scoreCalc(self, scoreCalc):
     filtered_sc = CS.CalculateCoElutionScores("", "", "", 1)
     filtered_sc.scoreF = scoreCalc.scoreF
     filtered_sc.header = list(
         np.array(scoreCalc.header)[self.to_keep_header])
     filtered_sc.scores = np.zeros(
         (len(scoreCalc.ppiToIndex.keys()), len(self.to_keep_score)))
     ppi_index = 0
     for i in range(scoreCalc.scores.shape[0]):
         ppi = scoreCalc.IndexToPpi[i]
         protA, protB = ppi.split("\t")
         if (protA not in self.valprots
                 or protB not in self.valprots) and self.valprots != []:
             continue
         ppi_scores = self.filter_score(scoreCalc.scores[i, :])
         if ppi_scores == []: continue
         filtered_sc.ppiToIndex[ppi] = ppi_index
         filtered_sc.IndexToPpi[ppi_index] = ppi
         filtered_sc.scores[ppi_index, :] = ppi_scores
         ppi_index += 1
     filtered_sc.scores = filtered_sc.scores[0:ppi_index, :]
     return filtered_sc

Пример #6

Показать файл

Файл: main.py Проект: shaoxiuma/EPIC

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s",
        "--feature_selection",
        type=str,
        help=
        "Select which features to use. This is an 8 position long array of 0 and 1, where each position determines which co-elution feature to use. Features sorted by position are: MI, Bayes, Euclidean, WCC, Jaccard, PCCN, PCC, and Apex.  Each default=11101001",
        default="11101001")
    parser.add_argument(
        "input_dir",
        type=str,
        help="Directory containing the elution files for each experiment")

    parser.add_argument(
        "-t",
        "--taxid",
        type=str,
        help="TAXID to automatically download reference from GO,CORUM,INtACT",
        default="")
    parser.add_argument(
        "-c",
        "--cluster",
        type=str,
        help="Path to file containing protein clsuter reference",
        default="")
    parser.add_argument("-p",
                        "--ppi",
                        type=str,
                        help="path to ppi File",
                        default="")

    parser.add_argument("output_dir",
                        type=str,
                        help="Directory containing the output files")
    parser.add_argument("-o",
                        "--output_prefix",
                        type=str,
                        help="Prefix name for all output Files",
                        default="Out")

    parser.add_argument(
        "-M",
        "--classifier",
        type=str,
        help="Select which classifier to use. Values: RF SVM, default RF",
        default="RF")
    parser.add_argument("-n",
                        "--num_cores",
                        type=int,
                        help="Number of cores to be used, default 1",
                        default=1)

    parser.add_argument(
        "-m",
        "--mode",
        type=str,
        help=
        "Run EPIC with experimental, functional, or both evidences. Values: EXP, FA, COMB, default: EXP  ",
        default="EXP")
    parser.add_argument(
        "-f",
        "--fun_anno_source",
        type=str,
        help=
        "Where to get functional annotaiton from. Values: STRING or GM or FILE, default= GM",
        default="GM")
    parser.add_argument(
        "-F",
        "--fun_anno_file",
        type=str,
        help=
        "Path to File containing functional annotation. This flag needs to be set when using FILE as fun_anno_source.",
    )
    parser.add_argument("-r",
                        "--co_elution_cutoff",
                        type=float,
                        help="Co-elution score cutoff. default 0.5",
                        default=0.5)
    parser.add_argument(
        "-R",
        "--classifier_cutoff",
        type=float,
        help="Classifier confidence valye cutoff. default = 0.5",
        default=0.5)
    parser.add_argument(
        "-e",
        "--elution_max_count",
        type=int,
        help=
        "Removies protein that have a maximal peptide count less than the given value. default = 1",
        default=1)
    parser.add_argument(
        "-E",
        "--frac_count",
        type=int,
        help=
        "Number of fracrions a protein needs to be measured in. default = 2",
        default=2)

    parser.add_argument(
        "-P",
        "--precalcualted_score_file",
        type=str,
        help=
        "Path to precalulated scorefile to read scores from for faster rerunning of EPIC. default = None",
        default="NONE")

    args = parser.parse_args()

    args.mode = args.mode.upper()
    args.fun_anno_source = args.fun_anno_source.upper()

    #Create feature combination
    if args.feature_selection == "00000000":
        print "Select at least one feature"
        sys.exit()

    this_scores = utils.get_fs_comb(args.feature_selection)
    print "\t".join([fs.name for fs in this_scores])

    # Initialize CLF
    use_rf = args.classifier == "RF"
    clf = CS.CLF_Wrapper(args.num_cores, use_rf)

    # Load elution data
    foundprots, elution_datas = utils.load_data(args.input_dir,
                                                this_scores,
                                                fc=args.frac_count,
                                                mfc=args.elution_max_count)

    # Generate reference data set
    gs = ""
    if ((args.taxid != "" and args.ppi != "")
            or (args.cluster != "" and args.ppi != "")):
        print "Refernce from cluster and PPI are nor compatiple. Please supply ppi or complex reference, not both!"
        sys.exit()

    if args.taxid == "" and args.ppi == "" and args.cluster == "":
        print "Please supply a reference by setting taxid, cluster, or ppi tag"
        sys.exit()

    gs_clusters = []
    if (args.taxid != "" and args.cluster == "" and args.ppi == ""):
        print "Loading clusters from GO, CORUM, and Intact"
        gs_clusters.extend(utils.get_reference_from_net(args.taxid))

    if args.cluster != "":
        print "Loading complexes from file"
        if args.mode == "FA":
            gs_clusters.append(GS.FileClusters(args.cluster, "all"))
        else:
            gs_clusters.append(GS.FileClusters(args.cluster, foundprots))

    if args.ppi != "":
        print "Reading PPI file from %s" % args.reference
        gs = Goldstandard_from_PPI_File(args.ppi, foundprots)

    print gs_clusters
    if len(gs_clusters) > 0:
        gs = utils.create_goldstandard(gs_clusters, args.taxid, foundprots)

    output_dir = args.output_dir + os.sep + args.output_prefix

    refFH = open(output_dir + ".ref_complexes.txt", "w")
    for comp in gs.complexes.complexes:
        print >> refFH, "%s\t%s" % (",".join(comp), ",".join(
            gs.complexes.complexes[comp]))
    refFH.close()

    scoreCalc = CS.CalculateCoElutionScores(this_scores,
                                            elution_datas,
                                            output_dir + ".scores.txt",
                                            num_cores=args.num_cores,
                                            cutoff=args.co_elution_cutoff)
    if args.precalcualted_score_file == "NONE":
        scoreCalc.calculate_coelutionDatas(gs)
    else:
        scoreCalc.readTable(args.precalcualted_score_file, gs)

    print scoreCalc.scores.shape

    functionalData = ""
    gs.positive = set(gs.positive & set(scoreCalc.ppiToIndex.keys()))
    gs.negative = set(gs.negative & set(scoreCalc.ppiToIndex.keys()))
    gs.rebalance()

    print len(gs.positive)
    print len(gs.negative)

    if args.mode != "EXP":
        print "Loading functional data"
        functionalData = utils.get_FA_data(args.fun_anno_source, args.taxid,
                                           args.fun_anno_file)
        print "Dimension of fun anno " + str(functionalData.scores.shape)

    print "Start benchmarking"

    if args.mode == "EXP":
        utils.cv_bench_clf(scoreCalc,
                           clf,
                           gs,
                           output_dir,
                           format="pdf",
                           verbose=True,
                           folds=5)

    if args.mode == "COMB":
        tmp_sc = copy.deepcopy(scoreCalc)
        tmp_sc.add_fun_anno(functionalData)
        utils.cv_bench_clf(tmp_sc,
                           clf,
                           gs,
                           output_dir,
                           format="pdf",
                           verbose=True,
                           folds=5)

    if args.mode == "FA":
        utils.cv_bench_clf(functionalData,
                           clf,
                           gs,
                           output_dir,
                           format="pdf",
                           verbose=True,
                           folds=5)

    # PPI evaluation
    print utils.cv_bench_clf(scoreCalc,
                             clf,
                             gs,
                             args.output_dir,
                             verbose=False,
                             format="pdf",
                             folds=5)
    #print "I am here"

    network = utils.make_predictions(scoreCalc,
                                     args.mode,
                                     clf,
                                     gs,
                                     fun_anno=functionalData)

    # Predict protein interaction
    outFH = open("%s.pred.txt" % (output_dir), "w")

    final_network = []
    for PPI in network:
        items = PPI.split("\t")
        if float(items[2]) >= args.classifier_cutoff:
            final_network.append(PPI)

    print >> outFH, "\n".join(final_network)
    outFH.close()

    # Predicting clusters
    utils.predict_clusters("%s.pred.txt" % (output_dir),
                           "%s.clust.txt" % (output_dir))

    # Evaluating predicted clusters
    pred_clusters = GS.Clusters(False)
    pred_clusters.read_file("%s.clust.txt" % (output_dir))
    overlapped_complexes_with_reference = gs.get_complexes(
    ).get_overlapped_complexes_set(pred_clusters)
    print "# of complexes in reference dataset: " + str(
        len(overlapped_complexes_with_reference))
    #clust_scores, header = utils.clustering_evaluation(gs.complexes, pred_clusters, "", False)
    clust_scores, header, composite_score = utils.clustering_evaluation(
        gs.complexes, pred_clusters, "", False)
    outFH = open("%s.eval.txt" % (output_dir), "w")
    header = header.split("\t")
    clust_scores = clust_scores.split("\t")
    for i, head in enumerate(header):
        print "%s\t%s" % (head, clust_scores[i])
        print >> outFH, "%s\t%s" % (head, clust_scores[i])
    outFH.close()

Пример #7

Показать файл

def exp_comb(args):
    FS, i, j, num_iter, input_dir, num_cores, ref_complexes, scoreF, mode, fun_anno_F, ppi, output_dir = args
    i, j, num_iter, num_cores = map(int, [i, j, num_iter, num_cores])
    ppi == "True"

    search_engine = input_dir.split(os.path.sep)[-2]

    def get_eData_comb(data_dir, num_iex, num_beads):
        all_exp = map(str, glob.glob(data_dir + "*.txt"))
        iex_exp = [
            f for f in all_exp if (f.split(os.sep)[-1].startswith("all"))
        ]
        beads_exp = [
            f for f in all_exp if (not f.split(os.sep)[-1].startswith("all"))
        ]
        if (i > len(iex_exp)):
            print "i is to large"
            sys.exit()
        if (j > len(beads_exp)):
            print "j is to large"
            sys.exit()

        sel_iex = rnd.sample(iex_exp, num_iex)
        sel_beads = rnd.sample(beads_exp, num_beads)
        return sel_iex + sel_beads

    # EPIC paramters
    if FS == "00000000": sys.exit()
    this_scores = get_fs_comb(FS)
    clf = CS.CLF_Wrapper(num_cores, True)

    ref_gs = Goldstandard_from_cluster_File(ref_complexes)

    scoreCalc = CS.CalculateCoElutionScores(this_scores,
                                            "",
                                            scoreF,
                                            num_cores=num_cores,
                                            cutoff=0.5)
    scoreCalc.readTable(scoreF, ref_gs)

    # the supplied functional evidence data needs to have the correct header row...
    functionalData = ""
    if mode == "comb":
        functionalData = utils.get_FA_data("FILE", fun_anno_F)

    if i == 0 and j == 0: sys.exit()

    out_head = ""
    all_scores = []

    for iter in range(num_iter):

        rnd.seed()
        this_eprofiles = get_eData_comb(input_dir, i, j)
        this_eprofiles_fnames = [
            f.rsplit(os.sep, 1)[1] for f in this_eprofiles
        ]
        rnd.seed(1)

        print this_eprofiles_fnames

        this_foundprots, _ = utils.load_data(this_eprofiles, [])
        print len(this_foundprots)

        feature_comb = feature_selector(
            [fs.name for fs in this_scores],
            scoreCalc,
            valprots=this_foundprots,
            elution_file_names=this_eprofiles_fnames)
        if mode == "comb":

            feature_comb.add_fun_anno(functionalData)

        scores = ""
        head = ""

        if ppi:
            print "Running PPI cross fold"
            ppi_ref = ref_gs.return_gold_standard_complexes(
                set(feature_comb.scoreCalc.ppiToIndex.keys()))
            fmeasure, auc_pr, auc_roc = utils.bench_by_PPI_clf(
                10, feature_comb, ppi_ref, clf)
            scores = "\t".join(map(str, [fmeasure, auc_pr, auc_roc]))
            head = "\tFM\taucPR\taucROC"
        else:
            print "Running Cluster cross fold"
            scores, head = n_fold_cross_validation(2,
                                                   ref_gs,
                                                   feature_comb,
                                                   clf,
                                                   "%s_%i_%i" %
                                                   (output_dir, i, j),
                                                   overlap=True,
                                                   local=False)

    #	head, scores = run_epic_with_feature_combinations(this_scores, ref_gs, scoreCalc, clf, output_dir, valprots=this_foundprots)
        out_head = head
        all_scores.append(
            "%s\t%s\t%i\t%i\t%s\t%i\t%s" %
            (FS, mode, i, j, search_engine, len(this_foundprots), scores))
        print head
        print scores

    outFH = open(output_dir + ".%i_%i.all.eval.txt" % (i, j), "w")
    print >> outFH, "FS\tNum_iex\tNum_beads\tSearch_engine\tNum_Prots\t%s" % out_head
    for score in all_scores:
        print >> outFH, "%s" % (score)
    outFH.close()