if batch_size != 32: name += '_bs=' + str(batch_size) epsilon = args.e if epsilon != 0.05: name += '_eps=' + str(epsilon) num_steps = args.steps if num_steps != 20: name += '_steps=' + str(num_steps) if args.name: name += '_' + args.name print(name) results_path = helper.make_directory('../results', args.o) #----------------------------------------------------------------- # load data data_path = '../data' filepath = os.path.join(data_path, 'synthetic_code_dataset.h5') x_train, y_train, x_valid, y_valid, x_test, y_test, model_test = helper.load_data( filepath) N, L, A = x_train.shape num_labels = y_train.shape[1] #----------------------------------------------------------------- # create model model = genome_model.model(input_shape=(L, A), num_labels=1, activation=activation,
import numpy as np import matplotlib.pyplot as plt import tensorflow as tf from tensorflow import keras import argparse import helper, tfomics from model_zoo import cnn_p as genome_model #----------------------------------------------------------------- base_name = 'cnn_local' batch_size = 32 num_trials = 5 pool_size = 25 results_path = helper.make_directory('../results_test', 'synthetic') # load data data_path = '../data' filepath = os.path.join(data_path, 'synthetic_code_dataset.h5') x_train, y_train, x_valid, y_valid, x_test, y_test, model_test = helper.load_data( filepath) N, L, A = x_train.shape num_labels = y_train.shape[1] for reg in [True, False]: if reg: dropout = [0.2, 0.2, 0.5] bn = [True, True, True] else: dropout = [0, 0, 0]
import os import numpy as np from tensorflow.keras import backend as K from residualbind import ResidualBind import helper #--------------------------------------------------------------------------------------- normalization = 'log_norm' # 'log_norm' or 'clip_norm' ss_type = 'seq' # 'seq', 'pu', or 'struct' data_path = '../data/RNAcompete_2013/rnacompete2013.h5' results_path = helper.make_directory('../results', 'rnacompete_2013') save_path = helper.make_directory(results_path, normalization + '_' + ss_type) #--------------------------------------------------------------------------------------- # loop over different RNA binding proteins pearsonr_scores = [] experiments = helper.get_experiment_names(data_path) for rbp_index, experiment in enumerate(experiments): print('Analyzing: ' + experiment) # load rbp dataset train, valid, test = helper.load_rnacompete_data( data_path, ss_type=ss_type, normalization=normalization, rbp_index=rbp_index) # load residualbind model input_shape = list(train['inputs'].shape)[1:]
#--------------------------------------------------------------------------------------------------- models = ['clip_conv_net', 'clip_residualbind'] ss_types = ['seq', 'pu'] window = 200 # training parameters batch_size = 100 num_epochs = 200 # dataset path dataset_path = '/media/peter/storage/encode_eclip/eclip_datasets' # set results path results_path = helper.make_directory('../../results', 'encode_eclip') # get list of .h5 files in dataset path file_names = helper.get_file_names(dataset_path) # loop through models for model in models: # model results path model_path = helper.make_directory(results_path, model) # loop through secondary structure types for ss_type in ss_types: # model results path sstype_path = helper.make_directory(model_path, ss_type)
import numpy as np import logomaker from six.moves import cPickle import matplotlib.pyplot as plt from scipy import stats from residualbind import ResidualBind, GlobalImportance import helper, explain #--------------------------------------------------------------------- normalization = 'log_norm' # 'log_norm' or 'clip_norm' ss_type = 'seq' # 'seq', 'pu', or 'struct' data_path = '../data/RNAcompete_2013/rnacompete2013.h5' results_path = os.path.join('../results', 'rnacompete_2013') save_path = os.path.join(results_path, normalization + '_' + ss_type) plot_path = helper.make_directory(save_path, 'plots') motif_path = helper.make_directory(save_path, 'motifs') kmer_path = helper.make_directory(save_path, 'kmer_motifs') alphabet = 'ACGU' #--------------------------------------------------------------------------------------- # get experiment names experiments = helper.get_experiment_names(data_path) # loop over different RNA binding proteins multiple_sites_all = [] gcbias_all = [] hairpin_all = [] for rbp_index, experiment in enumerate(experiments): print(rbp_index, experiment)
from six.moves import cPickle import matplotlib.pyplot as plt from scipy import stats from residualbind import ResidualBind, GlobalImportance import helper, explain #--------------------------------------------------------------------- null_model = 'profile' # 'profile', 'random' , 'dinuc', 'quartile1', 'quartile2', 'quartile3', 'quartile4'] normalization = 'log_norm' # 'log_norm' or 'clip_norm' ss_type = 'seq' # 'seq', 'pu', or 'struct' data_path = '../data/RNAcompete_2013/rnacompete2013.h5' results_path = os.path.join('../results', 'rnacompete_2013') save_path = os.path.join(results_path, normalization+'_'+ss_type) plot_path = helper.make_directory(save_path, 'plots_'+null_model) motif_path = helper.make_directory(save_path, 'motifs_'+null_model) kmer_path = helper.make_directory(save_path, 'kmer_motifs_'+null_model) alphabet = 'ACGU' #--------------------------------------------------------------------------------------- # get experiment names experiments = helper.get_experiment_names(data_path) # loop over different RNA binding proteins multiple_sites_all = [] gcbias_all = [] hairpin_all = [] for rbp_index, experiment in enumerate(experiments): print(rbp_index, experiment)