def main_predict(model_initializer, args): iterator = model_initializer.load_data(args) from itertools import tee iterator, iterator_ = tee(iterator) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) predictions = model.decision_function(iterator) text = [] for p in predictions: text.append(str(p) + "\n") save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt') text = [] for p in predictions: if p > 0: prediction = 1 else: prediction = -1 text.append(str(prediction) + "\n") save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='classifications.txt') text = [] from itertools import izip info_iterator = model.get_info(iterator_) for p, info in izip(predictions, info_iterator): text.append("%.4f\t%s\n" % (p, info)) save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='info.txt')
def main_feature(model_initializer, args): iterator = model_initializer.load_data(args) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) X = model._data_matrix(iterator) store_matrix(matrix=X, output_dir_path=args.output_dir_path, out_file_name='data_matrix', output_format=args.output_format)
def main_estimate(model_initializer, args): pos_test_iterator = model_initializer.load_positive_data(args) neg_test_iterator = model_initializer.load_negative_data(args) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) apr, rocauc = model.estimate(pos_test_iterator, neg_test_iterator)
def main_matrix(model_initializer, args): iterator = model_initializer.load_data(args) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) X = model._data_matrix(iterator) K = metrics.pairwise.pairwise_kernels(X, metric='linear') store_matrix(matrix=K, output_dir_path=args.output_dir_path, out_file_name='Gram_matrix', output_format=args.output_format)
def main_feature(model_initializer, args): iterator = model_initializer.load_data(args) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) X = model._data_matrix(iterator) store_matrix(matrix=X, output_dir_path=args.output_dir_path, out_file_name='data_matrix', output_format=args.output_format)
def main_matrix(model_initializer, args): iterator = model_initializer.load_data(args) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) X = model._data_matrix(iterator) K = metrics.pairwise.pairwise_kernels(X, metric='linear') store_matrix(matrix=K, output_dir_path=args.output_dir_path, out_file_name='Gram_matrix', output_format=args.output_format)
def main_predict(model_initializer, args): iterator = model_initializer.load_data(args) from itertools import tee iterator, iterator_ = tee(iterator) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) text = [] for margin, graph_info in model.decision_function_info(iterator, key='id'): if margin > 0: prediction = 1 else: prediction = -1 text.append("%d\t%s\t%s\n" % (prediction, margin, graph_info)) save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt')
def main_predict(model_initializer, args): iterator = model_initializer.load_data(args) from itertools import tee iterator, iterator_ = tee(iterator) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) text = [] for margin, graph_info in model.decision_function_info(iterator, key='id'): if margin > 0: prediction = 1 else: prediction = -1 text.append("%d\t%s\t%s\n" % (prediction, margin, graph_info)) save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt')
def main_predict(model_initializer, args): iterator = model_initializer.load_data(args) from itertools import tee iterator, iterator_ = tee(iterator) from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(args.model_file) logger.info(model.get_parameters()) predictions = model.decision_function(iterator) text = [] for p in predictions: text.append(str(p) + "\n") save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='predictions.txt') text = [] for p in predictions: if p > 0: prediction = 1 else: prediction = -1 text.append(str(prediction) + "\n") save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='classifications.txt') text = [] from itertools import izip info_iterator = model.get_info(iterator_) for p, info in izip(predictions, info_iterator): text.append("%.4f\t%s\n" % (p, info)) save_output(text=text, output_dir_path=args.output_dir_path, out_file_name='info.txt')
def get_stem(seqs,window_c,model_c_name,window_d, model_d_name,flank_size_l,flank_size_r): from itertools import izip import re from itertools import tee,islice #1)c_finder seqs_c = get_Cbox(seqs,window_c) #2)submit the Cbox candidates to the model from eden.model import ActiveLearningBinaryClassificationModel model = ActiveLearningBinaryClassificationModel() model.load(model_c_name) seqs_c_pred = list() cands_c = list() max_count = 0 for seq_c in seqs_c: max_count +=1 cands_c.append(seq_c) if (max_count == 10000): #in order to not generate memory leak I've restricted the number of samples to be submited to the model preds = model.decision_function(cands_c) seqs_c_pred = seqs_c_pred + zip(cands_c,preds) cands_c = list() max_count = 0 if (max_count != 0): preds = model.decision_function(cands_c) seqs_c_pred = seqs_c_pred + zip(cands_c,preds) #discard sequences with pred < 0 seqs_c = list() for cand in seqs_c_pred: if (cand[1] >= 0.0): seqs_c.append(cand) #D_finder seqs_cd = get_Dbox(seqs_c,window_d) #submit Dboxes candidate to its model model = ActiveLearningBinaryClassificationModel() model.load(model_d_name) seqs_d_pred = list() cands_d = list() max_count = 0 for seq_d in seqs_cd: max_count +=1 cands_d.append(seq_d) if (max_count == 10000): #in order to not generate memory leak I've restricted the number of samples to be submited to the model preds = model.decision_function(cands_d) seqs_d_pred = seqs_d_pred + zip(cands_d,preds) cands_d = list() max_count = 0 if (max_count != 0): preds = model.decision_function(cands_d) seqs_d_pred = seqs_d_pred + zip(cands_d,preds) #Get the stem region from the sequences stem_cands=[] stem_info =[] #(([[(header, seq), pos_c], cand_c, pred_c, pos_d], 'UAAxCUGAyGAU'), 77.000434164559792) for ([[(header,nts),pos_c],cand_c,pred_c,pos_d],cand_d),pred_d in seqs_d_pred: #print header,'\t',seq,pos_c,'\t',cand_c,'\t',pred_c,'\t',cand_d,'\t',pred_d,"\n---\n" if ( int(pos_c) - 10 < 0): if (int(pos_d)+10 > len(nts)): stem_cands.append([[header,pos_c,pos_d],nts[0:int(pos_c)+6]+"&"+nts[int(pos_d)-1:len(nts)]]) else: stem_cands.append([[header,pos_c,pos_d],nts[0:int(pos_c)+6]+"&"+nts[int(pos_d)-1:int(pos_d)+3+10]]) else: if (int(pos_d)+10 > len(nts)): stem_cands.append([[header,pos_c,pos_d],nts[int(pos_c)-10:int(pos_c)+6]+"&"+nts[int(pos_d)-1:len(nts)]]) else: stem_cands.append([[header,pos_c,pos_d],nts[int(pos_c)-10:int(pos_c)+6]+"&"+nts[int(pos_d)-1:int(pos_d)+3+10]]) return stem_cands