def process_in_each_thread(line, name, apply_cmvn): if name != 'test': utt_id, inputs_path, labels_path = line.strip().split() else: utt_id, inputs_path = line.strip().split() tfrecords_name = os.path.join(FLAGS.output_dir, name, utt_id + ".tfrecords") with tf.python_io.TFRecordWriter(tfrecords_name) as writer: tf.logging.info( "Writing utterance %s to %s" % (utt_id, tfrecords_name)) inputs = read_binary_file(inputs_path, FLAGS.input_dim).astype(np.float64) if name != 'test': labels = read_binary_file(labels_path, FLAGS.output_dim).astype(np.float64) else: labels = None if apply_cmvn: if name != 'test': spk_id = utt_id[:3] cmvn = np.load(os.path.join(FLAGS.output_dir, "{}_cmvn.npz".format(spk_id))) else: cmvn = np.load(os.path.join(FLAGS.output_dir, "train_cmvn.npz")) inputs = (inputs - cmvn["mean_inputs"]) / cmvn["stddev_inputs"] if labels is not None: labels = (labels - cmvn["mean_labels"]) / cmvn["stddev_labels"] ex = make_sequence_example(inputs, labels) writer.write(ex.SerializeToString())
def get_train_data(sequence_length=100): """ Prepare the input/output data for the Neural Network """ network_input = list() network_output = list() notes = read_binary_file(str(data_dir / "notes.pkl")) # get all pitch names pitch_names = sorted(set(item for item in notes)) # Embedding #TODO use keras Embedding layer instead note_to_int = read_binary_file(metadata_dir / "note_to_int.pkl") vocab_size = len(set(note_to_int)) # create input sequences and the corresponding outputs for i in range(0, len(notes) - sequence_length, 1): sequence_in = notes[i:i + sequence_length] sequence_out = notes[i + sequence_length] network_input.append([note_to_int[char] for char in sequence_in]) network_output.append(note_to_int[sequence_out]) n_patterns = len(network_input) # reshape the input into a format compatible with LSTM layers network_input = np.reshape(network_input, (n_patterns, sequence_length, 1)) # normalize input network_input = network_input / float(vocab_size) network_output = np_utils.to_categorical(network_output) with open(metadata_dir / "sequence_in.pkl", "wb") as f: pickle.dump(network_input, f) with open(metadata_dir / "sequence_out.pkl", "wb") as f: pickle.dump(network_output, f) return network_input, network_output, vocab_size
def prepare_validation_data(files, sequence_length): """have to re-write""" notes = list() sequence_in = list() sequence_out = list() note_to_int = read_binary_file(metadata_dir / "note_to_int.pkl") for file_name in files: data = read_binary_file(file_name) notes.extend(data) for i in range(0, len(notes) - sequence_length, 1): sequence_in.append(notes[i:i + sequence_length]) sequence_out.append(notes[i + sequence_length]) for i in range(0, len(sequence_in)): for j in range(0, len(sequence_in[i])): sequence_in[i][j] = note_to_int[sequence_in[i][j]] for i in range(0, len(sequence_out)): sequence_out[i] = note_to_int[sequence_out[i]] sequence_in = np.reshape(sequence_in, (len(sequence_in), len(sequence_in[0]), 1)) sequence_in = sequence_in / float(len(note_to_int)) sequence_out = np_utils.to_categorical(sequence_out, num_classes=len(note_to_int)) return sequence_in, sequence_out
def calculate_cmvn(file_list, spk_id): inputs_frame_count, labels_frame_count = 0, 0 for line in file_list: utt_id, inputs_path, labels_path = line.strip().split() tf.logging.info("Reading utterance %s" % utt_id) inputs = read_binary_file(inputs_path, FLAGS.input_dim) labels = read_binary_file(labels_path, FLAGS.output_dim) if inputs_frame_count == 0: # create numpy array for accumulating ex_inputs = np.sum(inputs, axis=0) ex2_inputs = np.sum(inputs**2, axis=0) ex_labels = np.sum(labels, axis=0) ex2_labels = np.sum(labels**2, axis=0) else: ex_inputs += np.sum(inputs, axis=0) ex2_inputs += np.sum(inputs**2, axis=0) ex_labels += np.sum(labels, axis=0) ex2_labels += np.sum(labels**2, axis=0) inputs_frame_count += len(inputs) labels_frame_count += len(labels) mean_inputs = ex_inputs / inputs_frame_count stddev_inputs = np.sqrt(ex2_inputs / inputs_frame_count - mean_inputs**2) stddev_inputs[stddev_inputs < 1e-20] = 1e-20 mean_labels = ex_labels / labels_frame_count stddev_labels = np.sqrt(ex2_labels / labels_frame_count - mean_labels**2) stddev_labels[stddev_labels < 1e-20] = 1e-20 cmvn_name = os.path.join(FLAGS.output_dir, spk_id + "_cmvn.npz") np.savez(cmvn_name, mean_inputs=mean_inputs, stddev_inputs=stddev_inputs, mean_labels=mean_labels, stddev_labels=stddev_labels) tf.logging.info("Wrote to %s" % cmvn_name)
def generate_sloth_input(self): img_data = utils.read_binary_file(self.image_filepath) tw_data = utils.read_binary_file(self.tweets_filepath) img_tw_bytes = bytearray(img_data + tw_data) ret_val = utils.hash_sha512(img_tw_bytes) return ret_val
def read_data(args, raw): label_dir = os.path.join(cur_file, raw, 'prepared_label') cmp_dir = os.path.join(cur_file, raw, 'prepared_cmp') if os.path.exists(label_dir) and os.path.exists(cmp_dir): logger.info('Raw data has been prepared.') return if not os.path.exists(label_dir): os.mkdir(label_dir) if not os.path.exists(cmp_dir): os.mkdir(cmp_dir) label_files = os.listdir(args.label_dir) cmp_files = os.listdir(args.cmp_dir) # Do frame alignment for line in label_files: filename, _ = os.path.splitext(line.strip()) logger.info('processing ' + filename) sys.stdout.flush() label_mat = np.loadtxt(os.path.join(args.label_dir, filename + '.lab')) if args.model_type == 'acoustic': cmp_mat = read_binary_file(os.path.join(args.cmp_dir, filename + ".cmp"), dimension=hparams['target_channels'], dtype=np.float64) elif args.model_type == 'acoustic_mgc': cmp_mat = read_binary_file( os.path.join(args.cmp_dir, filename + ".cmp"), dimension=hparams['mgc_target_channels'], dtype=np.float32) if label_mat.shape[0] <= cmp_mat.shape[0]: cmp_mat = cmp_mat[:label_mat.shape[0], :] else: frame_diff = label_mat.shape[0] - cmp_mat.shape[0] rep = np.repeat(cmp_mat[-1:, :], frame_diff, axis=0) cmp_mat = np.concatenate([cmp_mat, rep], axis=0) write_binary_file(label_mat, os.path.join(label_dir, filename + '.lab')) if args.model_type == 'acoustic': write_binary_file(cmp_mat, os.path.join(cmp_dir, filename + '.cmp'), dtype=np.float64) elif args.model_type == 'acoustic_mgc': write_binary_file(cmp_mat, os.path.join(cmp_dir, filename + '.cmp'), dtype=np.float32)
def scale_pdf(filedata): """ Scales the given PDF filedata down and returns the compressed PDF data. """ # If there are more than 50 pages, it's going to take forever to scale - # don't even bother trying. pagecount = utils.pdf_count_pages(filedata) if pagecount > 50: al.error("Abandon PDF scaling - has > 50 pages (%s found)" % pagecount, "media.scale_pdf") return filedata inputfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) outputfile = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) inputfile.write(filedata) inputfile.flush() inputfile.close() outputfile.close() # If something went wrong during the scaling, use the original data if not scale_pdf_file(inputfile.name, outputfile.name): return filedata compressed = utils.read_binary_file(outputfile.name) os.unlink(inputfile.name) os.unlink(outputfile.name) # If something has gone wrong and the scaled one has no size, return the original if len(compressed) == 0: return filedata # If the original is smaller than the scaled one, return the original if len(compressed) > len(filedata): return filedata return compressed
def scale_all_animal_images(dbo): """ Goes through all animal images in the database and scales them to the current incoming media scaling factor. """ mp = dbo.query("SELECT ID, MediaName FROM media WHERE MediaMimeType = 'image/jpeg' AND LinkTypeID = 0") for i, m in enumerate(mp): filepath = dbo.query_string("SELECT Path FROM dbfs WHERE Name = ?", [m.MEDIANAME]) name = str(m.MEDIANAME) inputfile = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) outputfile = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) odata = dbfs.get_string(dbo, name) inputfile.write(odata) inputfile.flush() inputfile.close() outputfile.close() al.debug("scaling %s (%d of %d)" % (name, i, len(mp)), "media.scale_all_animal_images", dbo) try: scale_image_file(inputfile.name, outputfile.name, configuration.incoming_media_scaling(dbo)) except Exception as err: al.error("failed scaling image, doing nothing: %s" % err, "media.scale_all_animal_images", dbo) continue data = utils.read_binary_file(outputfile.name) os.unlink(inputfile.name) os.unlink(outputfile.name) # Update the image file data dbfs.put_string(dbo, name, filepath, data) dbo.update("media", m.ID, { "MediaSize": len(data) }) al.debug("scaled %d images" % len(mp), "media.scale_all_animal_images", dbo)
def check_saved_embedding_variables(args, embedding_variable_name, use_hashtable=True, gpu_num=None, atol=1e-4, rtol=1e-4): filepath = r"./embedding_variables" sok_keys_filename = os.path.join(filepath, embedding_variable_name + r"_keys.file") element_type = "long long" if hasattr(args, "key_dtype"): element_type = "long long" if args.key_dtype == "int64" else "unsigned int" sok_keys = utils.read_binary_file(sok_keys_filename, element_type=element_type) sok_values_filename = os.path.join( filepath, embedding_variable_name + r"_values.file") sok_values = utils.read_binary_file(sok_values_filename, element_type="float") sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key( sok_keys, sok_values, embedding_vec_size=args.embedding_vec_size, use_hashtable=use_hashtable, gpu_num=gpu_num) tf_values_filename = os.path.join(filepath, r"tf_variable.file") tf_values = utils.restore_from_file(tf_values_filename) valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys, tf_values[0]) import numpy as np atol, rtol = atol, rtol sorted_sok_values = np.reshape(sorted_sok_values, newshape=(sorted_sok_keys.size, args.embedding_vec_size)) allclose = np.allclose(sorted_sok_values, valid_tf_values, atol=atol, rtol=rtol) if not allclose: raise ValueError( f"The Variable from SOK: \n{sorted_sok_values}, \nis not near to that from TF: \n{valid_tf_values}" f" \n at atol: {atol}, rtol: {rtol}") print( "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow" )
def check_saved_embedding_variables(args, embedding_variable_names, use_hashtable=True, gpu_num=None, atol=1e-4, rtol=1e-4): filepath = r"./embedding_variables" for i, embedding_variable_name in enumerate(embedding_variable_names): sok_keys_filename = os.path.join( filepath, embedding_variable_name + r"_keys.file") element_type = "long long" if hasattr(args, "key_dtype"): element_type = "long long" if args.key_dtype == "int64" else "unsigned int" sok_keys = utils.read_binary_file(sok_keys_filename, element_type=element_type) sok_values_filename = os.path.join( filepath, embedding_variable_name + r"_values.file") sok_values = utils.read_binary_file(sok_values_filename, element_type="float") sorted_sok_keys, sorted_sok_values = utils.sort_embedding_variables_by_key( sok_keys, sok_values, embedding_vec_size=args.embedding_vec_size[i], use_hashtable=use_hashtable, gpu_num=gpu_num) tf_values_filename = os.path.join(filepath, r"tf_variable_" + str(i) + r".file") tf_values = utils.restore_from_file(tf_values_filename) valid_tf_values = utils.get_valid_tf_values(sorted_sok_keys, tf_values[0]) vec_size = args.embedding_vec_size[i] newshape = tuple([sorted_sok_keys.size, vec_size]) sorted_sok_values = np.reshape(sorted_sok_values, newshape=newshape) allclose = np.allclose(sorted_sok_values, valid_tf_values, atol=atol, rtol=rtol) if not allclose: raise ValueError( f"\n{sorted_sok_values} \nis not near to \n{valid_tf_values} " f"\nat rotl={rtol}, atol={atol}") print( "[INFO]: the saved parameters are consistent between sparse operation kit and TensorFlow" )
def put_file(dbo, name, path, filepath): """ Reads the the file from filepath and stores it with name/path """ check_create_path(dbo, path) s = utils.read_binary_file(filepath) dbfsid = dbo.insert("dbfs", {"Name": name, "Path": path}) o = DBFSStorage(dbo) o.put(dbfsid, name, s) return dbfsid
def prepare_normalizer(list_paths, dim): dataset = [] for file_path in list_paths: try: data, _ = read_binary_file(file_path, dim) dataset.append(data) except FileNotFoundError: print(FileNotFoundError) dataset = np.concatenate(dataset) scaler = StandardScaler().fit(dataset) del dataset return scaler
def calculate_cmvn(name): """Calculate mean and var.""" tf.logging.info("Calculating mean and var of %s" % name) config_filename = open(os.path.join(FLAGS.config_dir, name + '.lst')) inputs_frame_count, labels_frame_count = 0, 0 for line in config_filename: utt_id, inputs_path, labels_path = line.strip().split() tf.logging.info("Reading utterance %s" % utt_id) inputs = read_binary_file(inputs_path, FLAGS.input_dim) labels = read_binary_file(labels_path, FLAGS.output_dim) if inputs_frame_count == 0: # create numpy array for accumulating ex_inputs = np.sum(inputs, axis=0) ex2_inputs = np.sum(inputs**2, axis=0) ex_labels = np.sum(labels, axis=0) ex2_labels = np.sum(labels**2, axis=0) else: ex_inputs += np.sum(inputs, axis=0) ex2_inputs += np.sum(inputs**2, axis=0) ex_labels += np.sum(labels, axis=0) ex2_labels += np.sum(labels**2, axis=0) inputs_frame_count += len(inputs) labels_frame_count += len(labels) mean_inputs = ex_inputs / inputs_frame_count stddev_inputs = np.sqrt(ex2_inputs / inputs_frame_count - mean_inputs**2) stddev_inputs[stddev_inputs < 1e-20] = 1e-20 mean_labels = ex_labels / labels_frame_count stddev_labels = np.sqrt(ex2_labels / labels_frame_count - mean_labels**2) stddev_labels[stddev_labels < 1e-20] = 1e-20 cmvn_name = os.path.join(FLAGS.output_dir, name + "_cmvn.npz") np.savez(cmvn_name, mean_inputs=mean_inputs, stddev_inputs=stddev_inputs, mean_labels=mean_labels, stddev_labels=stddev_labels) config_filename.close() tf.logging.info("Wrote to %s" % cmvn_name)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cmp_dir', default='') parser.add_argument('--output', default='./splited_cmp/', type=str, help='path to output cmp') parser.add_argument('--model_type', default='') args = parser.parse_args() logging.basicConfig( format='%(asctime)s %(filename)s %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', level=logging.DEBUG, stream=sys.stdout) if not os.path.exists(args.output): os.mkdir(args.output) cmp_file = os.listdir(args.cmp_dir) if args.model_type == 'acoustic': for cmp_filename in cmp_file: cmp = read_binary_file(os.path.join(args.cmp_dir, cmp_filename), dimension=hparams['target_channels'], dtype=np.float64) sp = np.zeros(cmp.shape) sp[:, :hparams['spec_units']] = cmp[:, :hparams['spec_units']] sp[:, -hparams['energy_units']] = cmp[:, -hparams['energy_units']] lf0 = cmp[:, hparams['spec_units']:hparams['spec_units'] + hparams['lf0_units']] uv = cmp[:, hparams['spec_units'] + hparams['lf0_units']:hparams['spec_units'] + hparams['lf0_units'] + hparams['uv_units']] cap = cmp[:, hparams['spec_units'] + hparams['lf0_units'] + hparams['uv_units']:hparams['cap_units'] + hparams['spec_units'] + hparams['lf0_units'] + hparams['uv_units']] lf0[uv == 0] = 0 write_binary_file(sp, os.path.join( args.output, os.path.splitext(cmp_filename)[0] + '.sp'), dtype=np.float64) write_binary_file(sp, os.path.join( args.output, os.path.splitext(cmp_filename)[0] + '.lf0'), dtype=np.float64) write_binary_file(sp, os.path.join( args.output, os.path.splitext(cmp_filename)[0] + '.ap'), dtype=np.float64)
def test(netG, opt): assert opt.netG != '' test_dir = opt.testdata_dir for f in os.listdir(test_dir): fname, ext = os.path.splitext(f) if ext == '.cmp': print(fname) cmp_file = os.path.join(test_dir, f) ac_data = read_binary_file(cmp_file, dim=47) ac_data = torch.FloatTensor(ac_data) noise = torch.FloatTensor(ac_data.size(0), nz) if opt.cuda: ac_data, noise = ac_data.cuda(), noise.cuda() ac_data = Variable(ac_data) noise = Variable(noise) noise.data.normal_(0, 1) generated_pulses = netG(noise, ac_data) generated_pulses = generated_pulses.data.cpu().numpy() generated_pulses = generated_pulses.reshape(ac_data.size(0), -1) out_file = os.path.join(test_dir, fname + '.pls') with open(out_file, 'wb') as fid: generated_pulses.tofile(fid)
def read_binary_file(self, file_subpath): return read_binary_file(self.to_full_path(file_subpath))
def get_image_file_data(dbo, mode, iid, seq=0, justdate=False): """ Gets an image mode: animal | media | animalthumb | person | personthumb | dbfs iid: The id of the animal for animal/thumb mode or the media record or a template path for dbfs mode seq: If the mode is animal or person, returns image X for that person/animal The first image is always the preferred photo and seq is 1-based. if justdate is True, returns the last modified date if justdate is False, returns a tuple containing the last modified date and image data """ def nopic(): NOPIC_DATE = datetime.datetime(2011, 1, 1) if justdate: return NOPIC_DATE return (NOPIC_DATE, "NOPIC") def thumb_nopic(): NOPIC_DATE = datetime.datetime(2011, 1, 1) if justdate: return NOPIC_DATE return (NOPIC_DATE, "NOPIC") def mrec(mm): if len(mm) == 0: return nopic() if justdate: return mm[0].DATE return (mm[0].DATE, dbfs.get_string(dbo, mm[0].MEDIANAME)) def thumb_mrec(mm): if len(mm) == 0: return thumb_nopic() if justdate: return mm[0].DATE return (mm[0].DATE, scale_thumbnail(dbfs.get_string(dbo, mm[0].MEDIANAME))) if mode == "animal": if seq == 0: return mrec(get_web_preferred(dbo, ANIMAL, int(iid))) else: return mrec(get_media_by_seq(dbo, ANIMAL, int(iid), seq)) elif mode == "person": if seq == 0: return mrec(get_web_preferred(dbo, PERSON, int(iid))) else: return mrec(get_media_by_seq(dbo, PERSON, int(iid), seq)) elif mode == "animalthumb": return thumb_mrec(get_web_preferred(dbo, ANIMAL, int(iid))) elif mode == "personthumb": return thumb_mrec(get_web_preferred(dbo, PERSON, int(iid))) elif mode == "media": return mrec(get_media_by_id(dbo, int(iid))) elif mode == "dbfs": if justdate: return dbo.now() else: if str(iid).startswith("/"): # Complete path was given return (dbo.now(), dbfs.get_string_filepath(dbo, str(iid))) else: # Only name was given return (dbo.now(), dbfs.get_string(dbo, str(iid))) elif mode == "nopic": if dbfs.file_exists(dbo, "nopic.jpg"): return (dbo.now(), dbfs.get_string_filepath(dbo, "/reports/nopic.jpg")) else: return (dbo.now(), utils.read_binary_file(dbo.installpath + "media/reports/nopic.jpg")) else: return nopic()
def decode(args, model, device): model.eval() data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data_' + args.name) config_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config_' + args.name) data_list = open(os.path.join(config_dir, 'test.lst'), 'r').readlines() cmvn = np.load(os.path.join(data_dir, "train_cmvn.npz")) if not os.path.exists(args.output): os.mkdir(args.output) if args.model_type == 'acoustic': for input_name in data_list: input_name = input_name.split(' ')[0] + '.lab' logging.info(f'decode {input_name} ...') input = read_binary_file(os.path.join( os.path.join(data_dir, 'test', 'label'), input_name), dimension=hparams['in_channels']) input = torch.from_numpy(input).to(device) input = input.unsqueeze(0) output, uv_output = model(input) output = output.squeeze() uv_output = F.softmax(uv_output, dim=-1)[:, :, 0] uv_output = uv_output.squeeze() uv = torch.ones(uv_output.shape).to(device) uv[uv_output > 0.5] = 0.0 uv = uv.unsqueeze(-1) output = torch.cat((uv, output), -1) output = output.cpu().squeeze().detach().numpy() uv = uv.cpu().squeeze().detach().numpy() output = output * cmvn['stddev_labels'] + cmvn["mean_labels"] cap = output[:, 1:hparams['cap_units']] sp = np.concatenate( (output[:, hparams['cap_units'] + hparams['energy_units'] + 1:hparams['cap_units'] + hparams['energy_units'] + hparams['spec_units'] + 1], output[:, hparams['cap_units'] + 1:hparams['cap_units'] + hparams['energy_units'] + 1]), axis=-1) lf0 = output[:, hparams['cap_units'] + hparams['energy_units'] + hparams['spec_units'] + 1:hparams['cap_units'] + hparams['energy_units'] + hparams['spec_units'] + hparams['lf0_units'] + 1] lf0[uv == 0] = -1.0e+10 write_binary_file(sp, os.path.join( args.output, os.path.splitext(input_name)[0] + '.sp'), dtype=np.float64) write_binary_file(lf0, os.path.join( args.output, os.path.splitext(input_name)[0] + '.lf0'), dtype=np.float32) write_binary_file(cap, os.path.join( args.output, os.path.splitext(input_name)[0] + '.ap'), dtype=np.float64) elif args.model_type == 'acoustic_mgc': for input_name in data_list: input_name = input_name.split(' ')[0] + '.lab' logging.info(f'decode {input_name} ...') input = read_binary_file(os.path.join( os.path.join(data_dir, 'test', 'label'), input_name), dimension=hparams['in_channels']) input = torch.from_numpy(input).to(device) input = input.unsqueeze(0) output, uv_output = model(input) output = output.squeeze() uv_output = F.softmax(uv_output, dim=-1)[:, :, 0] uv_output = uv_output.squeeze() uv = torch.ones(uv_output.shape).to(device) uv[uv_output > 0.5] = 0.0 uv = uv.unsqueeze(-1) output = torch.cat( (output[:, :hparams['mgc_units']], uv, output[:, -(hparams['bap_units'] + hparams['lf0_units']):]), -1) output = output.cpu().squeeze().detach().numpy() uv = uv.cpu().squeeze().detach().numpy() output = output * cmvn['stddev_labels'] + cmvn["mean_labels"] mgc = output[:, :hparams['mgc_units']] lf0 = output[:, hparams['mgc_units'] + 1:hparams['mgc_units'] + hparams['lf0_units'] + 1] bap = output[:, -(hparams['bap_units']):] write_binary_file( mgc, os.path.join(args.output, os.path.splitext(input_name)[0] + '.mgc')) write_binary_file( lf0, os.path.join(args.output, os.path.splitext(input_name)[0] + '.lf0')) write_binary_file( bap, os.path.join(args.output, os.path.splitext(input_name)[0] + '.bap'))
with open('gen_files.list', 'r') as fid: gen_files_list = [l.strip() for l in fid.readlines()] with open('ref_files.list', 'r') as fid: ref_files_list = [l.strip() for l in fid.readlines()] gen_normalizer = prepare_normalizer(gen_files_list, dim=40) ref_normalizer = prepare_normalizer(ref_files_list, dim=40) print(gen_normalizer.mean_) assert len(list(syn_dir_path.glob('*.mgc'))) > 0 for file in syn_dir_path.glob('*.mgc'): print(file) syn_mgc_ori, _ = read_binary_file(file, dim=40) rnd = random.randint(0, syn_mgc_ori.shape[0] - 40) print(rnd) syn_mgc = gen_normalizer.transform(syn_mgc_ori) syn_mgc = syn_mgc.T print(syn_mgc.shape) syn_mgc = syn_mgc.reshape(1, 1, syn_mgc.shape[0], syn_mgc.shape[1]) nat_mgc, _ = read_binary_file(nat_dir_path.joinpath(file.name), dim=40) # nat_mgc = ref_normalizer.transform(nat_mgc) # nat_sp = mgc2sp(nat_mgc.astype('float64'), alpha=0.42, gamma=0.0, fftlen=1024).T if 0: np.savetxt(file.name + 'a', nat_mgc.flatten()) nat_mgc = nat_mgc.T print(nat_mgc.shape)
def get(self, dbfsid, url): """ Returns the file data for url """ filepath = "%s/%s/%s" % (DBFS_FILESTORAGE_FOLDER, self.dbo.database, url.replace("file:", "")) return utils.read_binary_file(filepath)