Пример #1
0
def main_predict(model_initializer, args):
    iterator = model_initializer.load_data(args)
    from itertools import tee
    iterator, iterator_ = tee(iterator)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())

    predictions = model.decision_function(iterator)
    text = []
    for p in predictions:
        text.append(str(p) + "\n")
    save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt')

    text = []
    for p in predictions:
        if p > 0:
            prediction = 1
        else:
            prediction = -1
        text.append(str(prediction) + "\n")
    save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='classifications.txt')

    text = []
    from itertools import izip
    info_iterator = model.get_info(iterator_)
    for p, info in izip(predictions, info_iterator):
        text.append("%.4f\t%s\n" % (p, info))
    save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='info.txt')
Пример #2
0
def main_feature(model_initializer, args):
    iterator = model_initializer.load_data(args)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())
    X = model._data_matrix(iterator)
    store_matrix(matrix=X, output_dir_path=args.output_dir_path, out_file_name='data_matrix', output_format=args.output_format)
Пример #3
0
def main_estimate(model_initializer, args):
    pos_test_iterator = model_initializer.load_positive_data(args)
    neg_test_iterator = model_initializer.load_negative_data(args)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())
    apr, rocauc = model.estimate(pos_test_iterator, neg_test_iterator)
Пример #4
0
def main_matrix(model_initializer, args):
    iterator = model_initializer.load_data(args)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())
    X = model._data_matrix(iterator)
    K = metrics.pairwise.pairwise_kernels(X, metric='linear')
    store_matrix(matrix=K, output_dir_path=args.output_dir_path, out_file_name='Gram_matrix', output_format=args.output_format)
Пример #5
0
def main_feature(model_initializer, args):
    iterator = model_initializer.load_data(args)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())
    X = model._data_matrix(iterator)
    store_matrix(matrix=X,
                 output_dir_path=args.output_dir_path,
                 out_file_name='data_matrix',
                 output_format=args.output_format)
Пример #6
0
def main_matrix(model_initializer, args):
    iterator = model_initializer.load_data(args)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())
    X = model._data_matrix(iterator)
    K = metrics.pairwise.pairwise_kernels(X, metric='linear')
    store_matrix(matrix=K,
                 output_dir_path=args.output_dir_path,
                 out_file_name='Gram_matrix',
                 output_format=args.output_format)
Пример #7
0
def main_predict(model_initializer, args):
    iterator = model_initializer.load_data(args)
    from itertools import tee
    iterator, iterator_ = tee(iterator)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())

    text = []
    for margin, graph_info in model.decision_function_info(iterator, key='id'):
        if margin > 0:
            prediction = 1
        else:
            prediction = -1
        text.append("%d\t%s\t%s\n" % (prediction, margin, graph_info))
    save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt')
Пример #8
0
def main_predict(model_initializer, args):
    iterator = model_initializer.load_data(args)
    from itertools import tee
    iterator, iterator_ = tee(iterator)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())

    text = []
    for margin, graph_info in model.decision_function_info(iterator, key='id'):
        if margin > 0:
            prediction = 1
        else:
            prediction = -1
        text.append("%d\t%s\t%s\n" % (prediction, margin, graph_info))
    save_output(text=text,
                output_dir_path=args.output_dir_path,
                out_file_name='predictions.txt')
Пример #9
0
def main_predict(model_initializer, args):
    iterator = model_initializer.load_data(args)
    from itertools import tee
    iterator, iterator_ = tee(iterator)

    from eden.model import ActiveLearningBinaryClassificationModel
    model = ActiveLearningBinaryClassificationModel()
    model.load(args.model_file)
    logger.info(model.get_parameters())

    predictions = model.decision_function(iterator)
    text = []
    for p in predictions:
        text.append(str(p) + "\n")
    save_output(text=text,
                output_dir_path=args.output_dir_path,
                out_file_name='predictions.txt')

    text = []
    for p in predictions:
        if p > 0:
            prediction = 1
        else:
            prediction = -1
        text.append(str(prediction) + "\n")
    save_output(text=text,
                output_dir_path=args.output_dir_path,
                out_file_name='classifications.txt')

    text = []
    from itertools import izip
    info_iterator = model.get_info(iterator_)
    for p, info in izip(predictions, info_iterator):
        text.append("%.4f\t%s\n" % (p, info))
    save_output(text=text,
                output_dir_path=args.output_dir_path,
                out_file_name='info.txt')
Пример #10
0
    def get_stem(seqs,window_c,model_c_name,window_d, model_d_name,flank_size_l,flank_size_r):
        from itertools import izip 
        import re
        from itertools import tee,islice
       
        #1)c_finder
        seqs_c = get_Cbox(seqs,window_c)
		

        #2)submit the Cbox candidates to the model
        from eden.model import ActiveLearningBinaryClassificationModel
        model = ActiveLearningBinaryClassificationModel()
        model.load(model_c_name)
        
        seqs_c_pred = list()
        cands_c = list()
        max_count = 0        
        
        for seq_c in seqs_c:
            max_count +=1
            cands_c.append(seq_c)
            if (max_count == 10000): #in order to not generate memory leak I've restricted the number of samples to be submited to the model
				preds = model.decision_function(cands_c)
				seqs_c_pred = seqs_c_pred + zip(cands_c,preds)
				cands_c = list()
				max_count = 0
        if (max_count != 0):
			preds = model.decision_function(cands_c)
			seqs_c_pred = seqs_c_pred + zip(cands_c,preds)
        
        #discard sequences with pred < 0
        seqs_c = list()
        for cand in seqs_c_pred:
			if (cand[1] >= 0.0):
				seqs_c.append(cand)
        
        
        #D_finder
        seqs_cd = get_Dbox(seqs_c,window_d)
        #submit Dboxes candidate to its model
        model = ActiveLearningBinaryClassificationModel()
        model.load(model_d_name)
        
        seqs_d_pred = list()
        cands_d = list()
        max_count = 0        
        
        for seq_d in seqs_cd:
            max_count +=1
            cands_d.append(seq_d)
            if (max_count == 10000): #in order to not generate memory leak I've restricted the number of samples to be submited to the model
				preds = model.decision_function(cands_d)
				seqs_d_pred = seqs_d_pred + zip(cands_d,preds)
				cands_d = list()
				max_count = 0
        if (max_count != 0):
			preds = model.decision_function(cands_d)
			seqs_d_pred = seqs_d_pred + zip(cands_d,preds)
		
	#Get the stem region from the sequences
        stem_cands=[]
        stem_info =[]
        #(([[(header, seq), pos_c], cand_c, pred_c, pos_d], 'UAAxCUGAyGAU'), 77.000434164559792)

        for ([[(header,nts),pos_c],cand_c,pred_c,pos_d],cand_d),pred_d in seqs_d_pred:
			#print header,'\t',seq,pos_c,'\t',cand_c,'\t',pred_c,'\t',cand_d,'\t',pred_d,"\n---\n" 
			if ( int(pos_c) - 10 < 0):
				if (int(pos_d)+10 > len(nts)):
					stem_cands.append([[header,pos_c,pos_d],nts[0:int(pos_c)+6]+"&"+nts[int(pos_d)-1:len(nts)]])
				else:
					stem_cands.append([[header,pos_c,pos_d],nts[0:int(pos_c)+6]+"&"+nts[int(pos_d)-1:int(pos_d)+3+10]])
					
			else:
				if (int(pos_d)+10 > len(nts)):
					stem_cands.append([[header,pos_c,pos_d],nts[int(pos_c)-10:int(pos_c)+6]+"&"+nts[int(pos_d)-1:len(nts)]])
					
				else:
					stem_cands.append([[header,pos_c,pos_d],nts[int(pos_c)-10:int(pos_c)+6]+"&"+nts[int(pos_d)-1:int(pos_d)+3+10]])
					
		
        return stem_cands