def main(): """ The main executable function """ parser = make_argument_parser() args = parser.parse_args() input_dir = args.inputdir model_dir = args.modeldir tf = args.factor bed_file = args.bed output_file = args.outputfile print 'Loading genome' genome = utils.load_genome() print 'Loading model' model_tfs, model_bigwig_names, features, model = utils.load_model( model_dir) L = model.input_shape[0][1] utils.L = L assert tf in model_tfs assert 'bigwig' in features use_meta = 'meta' in features use_gencode = 'gencode' in features print 'Loading test data' is_sorted = True bigwig_names, meta_names, datagen_bed, nonblacklist_bools = utils.load_beddata( genome, bed_file, use_meta, use_gencode, input_dir, is_sorted) assert bigwig_names == model_bigwig_names if use_meta: model_meta_file = model_dir + '/meta.txt' assert os.path.isfile(model_meta_file) model_meta_names = np.loadtxt(model_meta_file, dtype=str) if len(model_meta_names.shape) == 0: model_meta_names = [str(model_meta_names)] else: model_meta_names = list(model_meta_names) assert meta_names == model_meta_names print 'Generating predictions' model_tf_index = model_tfs.index(tf) model_predicts = model.predict_generator(datagen_bed, val_samples=len(datagen_bed), pickle_safe=True) if len(model_tfs) > 1: model_tf_predicts = model_predicts[:, model_tf_index] else: model_tf_predicts = model_predicts final_scores = np.zeros(len(nonblacklist_bools)) final_scores[nonblacklist_bools] = model_tf_predicts print 'Saving predictions' df = pandas.read_csv(bed_file, sep='\t', header=None) df[3] = final_scores df.to_csv(output_file, sep='\t', compression='gzip', float_format='%.3e', header=False, index=False)
def main(): """ The main executable function """ parser = make_argument_parser() args = parser.parse_args() input_dir = args.inputdir model_dir = args.modeldir bed_file = args.bed chrom = args.chrom if args.outputdir is None: clobber = True output_dir = args.outputdirc else: clobber = False output_dir = args.outputdir try: # adapted from dreme.py by T. Bailey os.makedirs(output_dir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print >> sys.stderr, ( 'output directory (%s) already exists ' 'but you specified not to clobber it') % output_dir sys.exit(1) else: print >> sys.stderr, ('output directory (%s) already exists ' 'so it will be clobbered') % output_dir print 'Loading genome' genome = utils.load_genome() print 'Loading model' model_tfs, model_bigwig_names, features, model = utils.load_model( model_dir) L = model.input_shape[0][1] utils.L = L use_meta = 'meta' in features use_gencode = 'gencode' in features print 'Loading BED data' is_sorted = False bigwig_names, meta_names, datagen_bed, nonblacklist_bools = utils.load_beddata( genome, bed_file, use_meta, use_gencode, input_dir, is_sorted, chrom) assert bigwig_names == model_bigwig_names if use_meta: model_meta_file = model_dir + '/meta.txt' assert os.path.isfile(model_meta_file) model_meta_names = np.loadtxt(model_meta_file, dtype=str) if len(model_meta_names.shape) == 0: model_meta_names = [str(model_meta_names)] else: model_meta_names = list(model_meta_names) assert meta_names == model_meta_names output_results(bigwig_names, datagen_bed, model, output_dir)
def main(): """ The main executable function """ parser = make_argument_parser() args = parser.parse_args() input_dirs = args.inputdirs tf = args.factor valid_chroms = args.validchroms valid_input_dirs = args.validinputdirs test_chroms = args.testchroms epochs = args.epochs patience = args.patience learningrate = args.learningrate seed = args.seed utils.set_seed(seed) dropout_rate = args.dropout L = args.seqlen w = args.motifwidth utils.L = L utils.w = w utils.w2 = w/2 negatives = args.negatives assert negatives > 0 meta = args.meta gencode = args.gencode motif = args.motif num_motifs = args.kernels num_recurrent = args.recurrent num_dense = args.dense features = ['bigwig'] #pdb.set_trace() if tf: print('Single-task training:', tf) else: print('Multi-task training') singleTask = False #Cannot use any metadata features assert not meta assert not gencode if args.outputdir is None: clobber = True output_dir = args.outputdirc else: clobber = False output_dir = args.outputdir try: # adapted from dreme.py and train.py by T. Bailey & Daniel Quang os.makedirs(output_dir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print(sys.stderr, ('output directory (%s) already exists ' 'but you specified not to clobber it') % output_dir) sys.exit(1) else: print(sys.stderr, ('output directory (%s) already exists ' 'so it will be clobbered') % output_dir) print('Loading genome') genome = utils.load_genome() if valid_input_dirs: print('You specified at least one validation input directory') assert singleTask # This option only works for single-task training print('Loading ChIP labels') if singleTask: num_tfs = 1 else: assert len(input_dirs) == 1 # multi-task training only supports one cell line input_dir = input_dirs[0] tfs, positive_windows, y_positive, nonnegative_regions_bed = \ utils.load_chip_multiTask(input_dir) num_tfs = len(tfs) print('Loading bigWig data') bigwig_names, bigwig_files_list = utils.load_bigwigs(input_dirs) num_bigwigs = len(bigwig_names) if valid_input_dirs: valid_bigwig_names, valid_bigwig_files_list = utils.load_bigwigs(valid_input_dirs) assert valid_bigwig_names == bigwig_names if not singleTask: bigwig_files = bigwig_files_list[0] if meta:## did not use in scFAN print('Loading metadata features') else:# meta option was not selected, pass empty metadata features to the functions meta_list = [[] for bigwig_files in bigwig_files_list] if valid_input_dirs: valid_meta_list = [[] for bigwig_files in valid_bigwig_files_list] print('Making features') if singleTask:## did not use in scFAN print('single Task feature') else: datagen_train, datagen_valid,datagen_test,data_valid,data_test = \ utils.make_features_multiTask(positive_windows, y_positive, nonnegative_regions_bed, bigwig_files, bigwig_names, genome, epochs, valid_chroms, test_chroms) #pdb.set_trace() print('Building model') if len(data_test) == 0: model_predicts,model_tfs = test(output_dir,datagen_valid) data_test = data_valid else: model_predicts,model_tfs = test(output_dir,datagen_test) ### test and evaluate users and uncomment this part for evaluation ''' test_label_all = [] for _,item in enumerate(data_test): test_label_all.append(item[-1]) test_label_all = np.array(test_label_all) atac = [] atacpr = [] atacrc = [] #pdb.set_trace() for index in range(len(model_tfs)): truth = test_label_all[:,index] pred = model_predicts[:,index] atac.append(roc_auc_score(truth, pred)) atacpr.append(average_precision_score(truth, pred)) #atacrc.append(recall_score(truth,pred,average='macro')) try: precision, recall, thresholds = precision_recall_curve(truth, pred) atacrc.append(recall[np.where(precision==0.5)[0][0]]) except: #pdb.set_trace() atacrc.append(0)#(recall[np.where((precision>=0.49)&(precision<0.51))[0][0]]) #continue #np.save('draw_plot/%s/more_TFs/auc_value_%s_ATAC_moreTFs_FactorNet'%(motif,motif),atac) #np.save('draw_plot/%s/more_TFs/auc_value_%s_ATAC_moreTFs_FactorNet'%(motif,motif),atac) #np.save('draw_plot/%s/more_TFs/aupr_value_%s_ATAC_moreTFs_FactorNet'%(motif,motif),atacpr) #np.save('draw_plot/%s/more_TFs/recall_value_%s_ATAC_moreTFs_FactorNet'%(motif,motif),atacrc) #pdb.set_trace() print("Average AUC ROC (ATAC)", np.mean(atac)) print("Average AUPR ROC (ATAC)", np.mean(atacpr)) print("Average Recall ROC (ATAC)", np.mean(atacrc)) ''' return model_predicts
def main(): """ The main executable function """ parser = make_argument_parser() args = parser.parse_args() input_dirs = args.inputdirs tf = args.factor valid_chroms = args.validchroms valid_input_dirs = args.validinputdirs test_chroms = args.testchroms epochs = args.epochs patience = args.patience learningrate = args.learningrate seed = args.seed utils.set_seed(seed) dropout_rate = args.dropout L = args.seqlen w = args.motifwidth utils.L = L utils.w = w utils.w2 = w/2 negatives = args.negatives assert negatives > 0 meta = args.meta gencode = args.gencode motif = args.motif num_motifs = args.kernels num_recurrent = args.recurrent num_dense = args.dense features = ['bigwig'] #pdb.set_trace() if tf: print('Single-task training:', tf) else: print('Multi-task training') singleTask = False #Cannot use any metadata features assert not meta assert not gencode if args.outputdir is None: clobber = True output_dir = args.outputdirc else: clobber = False output_dir = args.outputdir try: # adapted from dreme.py and train.py by T. Bailey & Daniel Quang os.makedirs(output_dir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print(sys.stderr, ('output directory (%s) already exists ' 'but you specified not to clobber it') % output_dir) sys.exit(1) else: print(sys.stderr, ('output directory (%s) already exists ' 'so it will be clobbered') % output_dir) print('Loading genome') genome = utils.load_genome() if valid_input_dirs: print('You specified at least one validation input directory') assert singleTask # This option only works for single-task training print('Loading ChIP labels') if singleTask: num_tfs = 1 else: assert len(input_dirs) == 1 # multi-task training only supports one cell line input_dir = input_dirs[0] tfs, positive_windows, y_positive, nonnegative_regions_bed = \ utils.load_chip_multiTask(input_dir) num_tfs = len(tfs) print('Loading bigWig data') bigwig_names, bigwig_files_list = utils.load_bigwigs(input_dirs) num_bigwigs = len(bigwig_names) if valid_input_dirs: valid_bigwig_names, valid_bigwig_files_list = utils.load_bigwigs(valid_input_dirs) assert valid_bigwig_names == bigwig_names if not singleTask: bigwig_files = bigwig_files_list[0] if meta:## did not use in scFAN print('Loading metadata features') else:# meta option was not selected, pass empty metadata features to the functions meta_list = [[] for bigwig_files in bigwig_files_list] if valid_input_dirs: valid_meta_list = [[] for bigwig_files in valid_bigwig_files_list] print('Making features') if singleTask:## did not use in scFAN print('single Task feature') else: datagen_train, datagen_valid,datagen_test,data_valid,data_test = \ utils.make_features_multiTask(positive_windows, y_positive, nonnegative_regions_bed, bigwig_files, bigwig_names, genome, epochs, valid_chroms, test_chroms) #pdb.set_trace() print('Building model') if num_recurrent == 0: print('You specified 0 LSTM units. Omitting BLSTM layer') if num_recurrent < 0: print('You specified less than 0 LSTM units. Replacing BLSTM layer with global max-pooling layer') if meta or gencode: num_meta = 0 if meta: num_meta = len(meta_names) if gencode: num_meta += 6 model = utils.make_meta_model(num_tfs, num_bigwigs, num_meta, num_motifs, num_recurrent, num_dense, dropout_rate) else: #model = utils.make_model(num_tfs, num_bigwigs, num_motifs, num_recurrent, num_dense, dropout_rate) #model = utils.DeepSEA(num_tfs,num_recurrent,num_bigwigs) model = utils.scFANet(num_tfs,num_recurrent,num_bigwigs) if motif: assert singleTask # This option only works with single-task training output_tf_file = open(output_dir + '/chip.txt', 'w') if singleTask:## did not use in scFAN output_tf_file.write("%s\n" % tf) else: for tf in tfs: output_tf_file.write("%s\n" % tf) output_tf_file.close() output_feature_file = open(output_dir + '/feature.txt', 'w') for feature in features: output_feature_file.write("%s\n" % feature) output_feature_file.close() output_bw_file = open(output_dir + '/bigwig.txt', 'w') for bw in bigwig_names: output_bw_file.write("%s\n" % bw) output_bw_file.close() model_json = model.to_json() output_json_file = open(output_dir + '/model.json', 'w') output_json_file.write(model_json) output_json_file.close() train(datagen_train, datagen_valid, model, epochs, patience, learningrate, output_dir)
def main(): """ The main executable function """ parser = make_argument_parser() args = parser.parse_args() input_dirs = args.inputdirs tf = args.factor valid_chroms = args.validchroms valid_input_dirs = args.validinputdirs test_chroms = args.testchroms epochs = args.epochs patience = args.patience learningrate = args.learningrate seed = args.seed utils.set_seed(seed) dropout_rate = args.dropout L = args.seqlen w = args.motifwidth utils.L = L utils.w = w utils.w2 = w / 2 negatives = args.negatives assert negatives > 0 meta = args.meta gencode = args.gencode motif = args.motif num_motifs = args.kernels num_recurrent = args.recurrent num_dense = args.dense features = ['bigwig'] if tf: print 'Single-task training:', tf singleTask = True if meta: print 'Including metadata features' features.append('meta') if gencode: print 'Including genome annotations' features.append('gencode') else: print 'Multi-task training' singleTask = False #Cannot use any metadata features assert not meta assert not gencode if args.outputdir is None: clobber = True output_dir = args.outputdirc else: clobber = False output_dir = args.outputdir try: # adapted from dreme.py by T. Bailey os.makedirs(output_dir) except OSError as exc: if exc.errno == errno.EEXIST: if not clobber: print >> sys.stderr, ( 'output directory (%s) already exists ' 'but you specified not to clobber it') % output_dir sys.exit(1) else: print >> sys.stderr, ('output directory (%s) already exists ' 'so it will be clobbered') % output_dir print 'Loading genome' genome = utils.load_genome() if valid_input_dirs: print 'You specified at least one validation input directory' assert singleTask # This option only works for single-task training print 'Loading ChIP labels' if singleTask: chip_bed_list, nonnegative_regions_bed_list = \ utils.load_chip_singleTask(input_dirs, tf) if valid_input_dirs: valid_chip_bed_list, valid_nonnegative_regions_bed_list = \ utils.load_chip_singleTask(valid_input_dirs, tf) num_tfs = 1 else: assert len( input_dirs) == 1 # multi-task training only supports one cell line input_dir = input_dirs[0] tfs, positive_windows, y_positive, nonnegative_regions_bed = \ utils.load_chip_multiTask(input_dir) num_tfs = len(tfs) print 'Loading bigWig data' bigwig_names, bigwig_files_list = utils.load_bigwigs(input_dirs) num_bigwigs = len(bigwig_names) if valid_input_dirs: valid_bigwig_names, valid_bigwig_files_list = utils.load_bigwigs( valid_input_dirs) assert valid_bigwig_names == bigwig_names if not singleTask: bigwig_files = bigwig_files_list[0] if meta: print 'Loading metadata features' meta_names, meta_list = utils.load_meta(input_dirs) if valid_input_dirs: valid_meta_names, valid_meta_list = utils.load_load( valid_input_dirs) assert valid_meta_names == meta_names else: # meta option was not selected, pass empty metadata features to the functions meta_list = [[] for bigwig_files in bigwig_files_list] if valid_input_dirs: valid_meta_list = [[] for bigwig_files in valid_bigwig_files_list] print 'Making features' if singleTask: if not valid_input_dirs: #validation directories not used, must pass placeholder values valid_chip_bed_list = None valid_nonnegative_regions_bed_list = None valid_bigwig_files_list = None valid_meta_list = None datagen_train, datagen_valid = \ utils.make_features_singleTask(chip_bed_list, nonnegative_regions_bed_list, bigwig_files_list, bigwig_names, meta_list, gencode, genome, epochs, negatives, valid_chroms, test_chroms, valid_chip_bed_list, valid_nonnegative_regions_bed_list, valid_bigwig_files_list, valid_meta_list) else: datagen_train, datagen_valid = \ utils.make_features_multiTask(positive_windows, y_positive, nonnegative_regions_bed, bigwig_files, bigwig_names, genome, epochs, valid_chroms, test_chroms) print 'Building model' if num_recurrent == 0: print 'You specified 0 LSTM units. Omitting BLSTM layer' if num_recurrent < 0: print 'You specified less than 0 LSTM units. Replacing BLSTM layer with global max-pooling layer' if meta or gencode: num_meta = 0 if meta: num_meta = len(meta_names) if gencode: num_meta += 6 model = utils.make_meta_model(num_tfs, num_bigwigs, num_meta, num_motifs, num_recurrent, num_dense, dropout_rate) else: model = utils.make_model(num_tfs, num_bigwigs, num_motifs, num_recurrent, num_dense, dropout_rate) if motif: assert singleTask # This option only works with single-task training motifs_db = utils.load_motif_db('resources/HOCOMOCOv9.meme') if tf in motifs_db: print 'Injecting canonical motif' pwm = motifs_db[tf] pwm += 0.001 pwm = pwm / pwm.sum(axis=1)[:, np.newaxis] pwm = np.log2(pwm / 0.25) utils.inject_pwm(model, pwm) output_tf_file = open(output_dir + '/chip.txt', 'w') if singleTask: output_tf_file.write("%s\n" % tf) else: for tf in tfs: output_tf_file.write("%s\n" % tf) output_tf_file.close() output_feature_file = open(output_dir + '/feature.txt', 'w') for feature in features: output_feature_file.write("%s\n" % feature) output_feature_file.close() output_bw_file = open(output_dir + '/bigwig.txt', 'w') for bw in bigwig_names: output_bw_file.write("%s\n" % bw) output_bw_file.close() if meta: output_meta_file = open(output_dir + '/meta.txt', 'w') for meta_name in meta_names: output_meta_file.write("%s\n" % meta_name) output_meta_file.close() model_json = model.to_json() output_json_file = open(output_dir + '/model.json', 'w') output_json_file.write(model_json) output_json_file.close() train(datagen_train, datagen_valid, model, epochs, patience, learningrate, output_dir)