def create_rev_com_genome(source_genome_path, write_dir): source_genome_path = source_genome_path.rstrip('/') genome_name = source_genome_path.split('/')[-1] write_dir = os.path.join(write_dir, genome_name) if not os.path.exists(write_dir): os.mkdir(write_dir) source_genome_dict = load_directory(source_genome_path, in_memory=True) file_shapes = {} for key in source_genome_dict.keys(): data_arr = source_genome_dict[key].__dict__['_arr'][:].transpose() ##Reverse complement by taking base pair complements as well as reversing rev_comp_data_arr = data_arr[::-1, :: -1] #Reverse complement the entrire chromosome _array_writer['bcolz'](rev_comp_data_arr.astype(np.float32), os.path.join(write_dir, key)) file_shapes[key] = rev_comp_data_arr.shape print("Created chromosome %s \n" % (key)) ##Write the metadata.json file:: print("Writing metadata file \n") with open(os.path.join(write_dir, 'metadata.json'), 'w') as fp: json.dump( { 'file_shapes': file_shapes, 'type': 'array_{}'.format('bcolz'), 'source': source_genome_path }, fp)
def create_complemented_genome(source_genome_path, write_dir): """ Does not reverse. Only takes complements """ source_genome_path = source_genome_path.rstrip('/') source_genome_dict = load_directory(source_genome_path, in_memory=True) genome_name = source_genome_path.split('/')[-1] write_dir = os.path.join(write_dir, genome_name) if not os.path.exists(write_dir): os.mkdir(write_dir) file_shapes = {} for key in source_genome_dict.keys(): data_arr = source_genome_dict[key].__dict__['_arr'][:].transpose() ##Take the complement by just flipping bases ##The shape is now (4,N) rev_comp_data_arr = data_arr[::-1] _array_writer['bcolz'](rev_comp_data_arr.astype(np.float32), os.path.join(write_dir, key)) file_shapes[key] = rev_comp_data_arr.shape print("Created chromosome %s \n" % (key)) print("Writing metadata file \n") ##Write the metadata.json file:: with open(os.path.join(write_dir, 'metadata.json'), 'w') as fp: json.dump( { 'file_shapes': file_shapes, 'type': 'array_{}'.format('bcolz'), 'source': source_genome_path }, fp)
frag = args.frag histone = args.histone model_path = args.model_path save_dir = args.save_dir cuda = args.cuda os.environ["CUDA_VISIBLE_DEVICES"] = cuda # Logging directories srv_dir = os.path.join("/srv", "www", "kundaje", "jesikmin", "test_experiments", save_dir) if not os.path.exists(srv_dir): os.makedirs(srv_dir) data = Data_Directories() X_test = load_directory(data.input_atac[day][frag], in_memory=True)['chr22']._arr X_test = np.expand_dims(np.nan_to_num(X_test), axis=0) print "Finished fetching X_test" print X_test.shape print "Dimension of ATAC-seq signal (input): {}".format(X_test[0].shape) y_test = load_directory('/srv/scratch/jesikmin/output/bcolz/', in_memory=True)['chr22'] y_test = np.expand_dims(y_test, axis=0) y_test = np.expand_dims(y_test, axis=2) print "Finished fetching Y_test" print y_test.shape print "Dimension of ChIP-seq signal (output): {}".format(y_test[0].shape) ''' Generator only '''
help= "bigwig prefix. Example: a `outfile` prefix results in a `outfile.bw` bigwig file" ) args = parser.parse_args() return args # parse args args = parse_args() print(args) bigwig = '{}.bw'.format(args.output_prefix) # load data directory logger.info("Loading genomelake data..") data = load_directory(args.data_dir, in_memory=True) file_shapes = {} for chrom, chrom_data in data.items(): logger.info("Chrom " + str(chrom) + "...") for _channel_idx in range(5): channel = np.copy(chrom_data._arr[:, _channel_idx]) output_path = os.path.join( "/srv/scratch/jesikmin/temp/" + str(_channel_idx), chrom) os.makedirs(output_path) _array_writer['bcolz'](channel.astype(np.float32), output_path) file_shapes[chrom] = (chrom_data._arr.shape[0], ) for idx in range(5): with open( os.path.join("/srv/scratch/jesikmin/temp/" + str(idx), 'metadata.json'), 'w') as fpp: