def get_abundance(): w = 4566.52 d = read_cog('cog.dat', w=w) make_linelist(w=w, id=26.0, ep=3.30, loggf=-2.164, ew=d[:, 1]) batch(atmosphere_model='out.atm') os.system('MOOGSILENT > /dev/null') params = (5777, 4.44, 0.0, 1.0) m = Readmoog(params=params) df = m.all_table() return df
def make_train_inputs(self, input_seq, target_seq): '''This method is responsible for preparing the given sequences so that they can be used for training the model.''' inputs_, inputs_length_ = utils.batch(input_seq) targets_, targets_length_ = utils.batch(target_seq) return { self.encoder_inputs: inputs_, self.encoder_inputs_length: inputs_length_, self.decoder_targets: targets_, self.decoder_targets_length: targets_length_, }
def benchmark(): global device get_files() print("Running Benchmark..") time = datetime.datetime.now() emb = load_embedding('RWSGwn.emb', length_normalize=False, delete_duplicates=True) time = print_time('Loading embedding from Disk to RAM step', time) emb.length_normalize() time = print_time( 'Embedding length normalization step (' + CPUcolor + 'CPU' + RESETcolor + ')', time) vocab_to_search = emb.words for i in range(100): for word in vocab_to_search: v = emb.word_to_vector(word) time = print_time( 'Searching for vocabulary step (' + CPUcolor + 'CPU' + RESETcolor + ')', time) m = emb.vectors M = emb.vectors for i_batch, mb in enumerate(batch(m, batch_size)): _ = matrix_dot(mb, M) time = print_time( 'Matrix dot product step step ' + ('(' + CPUcolor + 'CPU' + RESETcolor + ')' if device == 'CPU' else '(' + GPUcolor + 'GPU' + RESETcolor + ')'), time) for i_batch, mb in enumerate(batch(m, batch_size)): _ = cosine_knn(mb, M, 10) time = print_time( 'Searching for nearest neighbors step ' + ('(' + CPUcolor + 'CPU' + RESETcolor + ')' if device == 'CPU' else '(' + GPUcolor + 'GPU' + RESETcolor + ')'), time) emb.export('temp.emb') time = print_time('Exporting embedding from RAM to Disk step', time) os.remove("temp.emb") print() print("Benchmark is over.")
async def main(git_url, folder_path): timeout = aiohttp.ClientTimeout(total=60) async with aiohttp.ClientSession(timeout=timeout) as session: await init_git(session, git_url, folder_path) # get blobs hashes proc = await asyncio.subprocess.create_subprocess_exec( 'git', 'ls-files', '--stage', stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) stdout, stderr = await proc.communicate() index = stdout.decode() hashes_paths = re.findall(r'([\w]{40})\s+\d+\s+([\w\/\.\-_#]+)', index) # download, decode and store files loop = asyncio.get_running_loop() for hps in batch(hashes_paths, size=20): tasks = [ loop.create_task(fetch_file(hp, git_url, session)) for hp in hps ] await asyncio.gather(*tasks)
def finetune(net, optimizer, criterion, trainset, log, path, iters=100, epochs=None, batch_size=2, gpu=True, scale=0.5): net.train() bce_meter = AverageMeter() dir_img = 'data/train/' dir_mask = 'data/train_masks/' if epochs is None: # Fine-tune using iterations of mini-batches epochs = 1 else: # Fine-tune using entire epochs iters = None for e in range(epochs): # reset the generators train = get_imgs_and_masks(trainset, dir_img, dir_mask, scale) with tqdm(total=len(trainset)) as progress_bar: for i, b in enumerate(batch(train, batch_size)): imgs = np.array([i[0] for i in b]).astype(np.float32) true_masks = np.array([i[1] for i in b]) imgs = torch.from_numpy(imgs) true_masks = torch.from_numpy(true_masks) if gpu: imgs = imgs.cuda() true_masks = true_masks.cuda() masks_pred = net(imgs).squeeze() loss = criterion(masks_pred, true_masks) bce_meter.update(loss.item(), batch_size) optimizer.zero_grad() loss.backward() optimizer.step() progress_bar.update(batch_size) progress_bar.set_postfix(epoch=e, BCE=bce_meter.avg) if i == 0 and e == 0: log.info("FLOPs after pruning: \n{}".format( flops_count(net, imgs.shape[2:]))) if i == iters: # Stop finetuning after sufficient mini-batches break log.info("Finished finetuning") log.info("Finetuned loss: {}".format(bce_meter.avg)) torch.save(net.state_dict(), path) log.info('Saving finetuned to {}...'.format(path))
def strategy(self, x, **kwargs): flat_x = flatten(x) k = kwargs.get('k') # self.flat_x records the true position of shuffled data self.flat_x, flat_x = shuffle(self.flat_x, flat_x) pooling = batch(flat_x, k) return self.layer(pooling, r=[1, 1], op=[1, 1])
def anonymize(self, infer=False, include_rest=True): if infer: self.reader.infer_providers() # rather than a map of values per field we create a map of values per type - this ensures fields are consistently mapped across fields in a document as well as across values self.field_maps = {key: {} for key in self.provider_map.keys()} data = self.reader.get_data(list(self.field_maps.keys()), self.reader.suppressed_fields, include_rest) exclude = set(self.reader.suppressed_fields) count = 0 file_name = "documents-%s" i = 0 for batchiter in utils.batch(data, 100000): tmp = [] for item in batchiter: if include_rest: tmp.append( json.dumps( self.__anon_doc_include_all( item, self.reader.masked_fields, exclude))) else: tmp.append( json.dumps( self.__anon_doc(item, self.reader.masked_fields, exclude))) self.writer.write_data(tmp, file_name=file_name % i) count += len(tmp) logging.info(f"{count} documents complete") i += 1
def forward(self, jets): levels, children, n_inners, contents = batch(jets) n_levels = len(levels) embeddings = [] for i, nodes in enumerate(levels[::-1]): j = n_levels - 1 - i inner = nodes[:n_inners[j]] outer = nodes[n_inners[j]:] u_k = self.fc_u(contents[j]) u_k = self.activation(u_k) ##eq(3) in Louppe's paper if len(inner) > 0: zero = torch.zeros(1).long() one = torch.ones(1).long() if torch.cuda.is_available(): zero = zero.cuda() one = one.cuda() h_L = embeddings[-1][children[inner, zero]] h_R = embeddings[-1][children[inner, one]] h = torch.cat((h_L, h_R, u_k[:n_inners[j]]), 1) h = self.fc_h(h) h = self.activation(h) embeddings.append(torch.cat((h, u_k[n_inners[j]:]), 0)) else: embeddings.append(u_k) return embeddings[-1].view((len(jets), -1))
def get_feed_data(self, x, t, y, p=None, e=None, class_weights=None, is_training=True): x_m, x_sizes, xwordm, pm = utils.batch_posmask(x, p) t_m, t_sizes, twordm = utils.batch(t) fd = { self.inputs: x_m, self.targets: t_m, self.text_word_lengths: x_sizes, self.target_word_lengths: t_sizes, self.textwm: xwordm, self.targetwm: twordm, self.posmask: pm } if y is not None: fd[self.labels] = y if e is not None: fd[self.embedding_matrix] = e, fd[self.is_training] = is_training return fd
def train(hp, dataset, ops): (train_x, train_y), (test_x, test_y) = dataset (model, trainer, saver, init_ops) = ops with tf.Session(config=utils.tf_config(ALLOW_GPU_MEM_GROWTH)) as sess: # Initialize parameters and create summary writer best_acc = 0. sess.run(init_ops) hp['save_dir'] = utils.replace_dir(hp['save_dir']) summary_writer = tf.summary.FileWriter(hp['save_dir'], sess.graph) for epoch in range(hp['epochs']): print('\n' + '-' * 80 + '\nEpoch %03d/%03d' % (epoch + 1, hp['epochs'])) # Train and evaluate model train_loss = 0 for step, batch_size, (batch_x, batch_y) in utils.batch(hp['batch_size'], train_x, train_y, shuffle=True): if hp['extended_summary'] and step == 0 and epoch % EXTENDED_SUMMARY_EVAL_PERIOD == 0: loss, summary = trainer.fit(sess, batch_x, batch_y, extended_summary=True) summary_writer.add_summary(summary, global_step=epoch) else: loss = trainer.fit(sess, batch_x, batch_y) train_loss += loss * batch_size / len(train_x) validloss, validacc = _valid(trainer, sess, test_x, test_y, hp) print('\ttrain_loss=%2.5f\tvalid_loss=%2.5f\tvalid_acc=%3.4f' % (train_loss, validloss, validacc)) # Summaries metrics utils.add_summary_values(summary_writer, global_step=epoch, train_loss=train_loss, validloss=validloss, validacc=validacc) # Save model if accuracy improvement if validacc > best_acc: print('\tBest accuracy encountered so far, saving model...') best_acc = validacc saver.save(sess, hp['save_dir']) return best_acc
def _valid(trainer, sess, test_x, test_y, hp): tot_loss, tot_acc = (0, 0) for step, batch_size, (batch_x, batch_y) in utils.batch(hp['batch_size'], test_x, test_y): probs, classes, loss = trainer.evaluate(sess, batch_x, batch_y) tot_acc += np.sum(np.equal(classes, batch_y)) / len(test_x) tot_loss += loss * batch_size / len(test_x) return (tot_loss, tot_acc)
def make_inference_inputs(self, input_seq): '''This method is responsible for preparing the given sequences so that they can be used for inference using the model.''' inputs_, inputs_length_ = utils.batch(input_seq) return { self.encoder_inputs: inputs_, self.encoder_inputs_length: inputs_length_, }
def train_one_epoch(df, batch_size, optimizer, text_encoder, meta_model, counts_model, multimode_model, train_text_encoder=False, repeat_first_batch=False, binary_classification=True): """Trains(feedforward and backward) the model for one step and returns useful metrics Args: dataset (utils.Dataset): Dataset object epoch (int): Current epoch number model (tf.keras.Model): Model object optimizer (tf.keras.optimizers): Description Returns: TYPE: multiclass mutlilabel metrics Raises: SfirstIteration: When the Dataset batch iterator is exhausted """ train_ds = utils.batch(df, batch_size, repeat_first_batch=repeat_first_batch) # step = 0 losses = [] # weighted_f1s = [] accuracies = [] # # hamming_lossees = [] # precisions = [] # recalls = [] # exact_match_scores = [] try: while True: statements_tf, justifications_tf, meta_tf, counts_tf, \ binary_labels_tf, labels_tf = next(train_ds) if binary_classification: _labels = binary_labels_tf else: _labels = labels_tf loss, accuracy = train_one_step( optimizer, text_encoder, meta_model, counts_model, multimode_model, statements_tf, justifications_tf, meta_tf, counts_tf, _labels, train_text_encoder, binary_classification) losses.append(loss.numpy()) accuracies.append(accuracy) except StopIteration: return np.mean(losses), np.mean(accuracy)
def make_train_inputs(self, input_seq, target_seq): max_input_length = self.cfg.get('max_input_length') max_output_length = self.cfg.get('max_output_length') input_seq_batched, _ = utils.batch( input_seq, max_sequence_length=max_input_length) target_seq_batched, _ = utils.batch( target_seq, max_sequence_length=max_output_length) feed_dict = { self.encoder_inputs[t]: input_seq_batched[t] for t in range(max_input_length) } feed_dict.update({ self.labels[t]: target_seq_batched[t] for t in range(max_output_length) }) feed_dict[self.keep_prob] = 1.0 return feed_dict
def data_loader(args): train_data, train_labels = utils.get_raw_data(args.train_file) # 获取一堆句子构成的列表 val_data, val_labels = utils.get_raw_data(args.dev_file) args.catogories = ['EnterSports', 'Military', 'Economics', 'Technology', 'Government'] args.cat_dict = dict(zip(args.catogories, range(len(args.catogories)))) word_vocab, num_total_words = utils.build_dict(train_data) trainlabels_to_idx = [args.cat_dict[label] for label in train_labels] vallabels_to_idx = [args.cat_dict[label] for label in val_labels] train_data, train_labels = utils.encode(train_data, trainlabels_to_idx, word_vocab) val_data, val_labels = utils.encode(val_data, vallabels_to_idx, word_vocab) train_data = utils.pad_features(train_data, max_len=args.max_features) val_data = utils.pad_features(val_data, max_len=args.max_features) train_set = utils.batch(train_data.copy(), train_labels.copy(), args.batch_size) val_set = utils.batch(val_data.copy(), val_labels.copy(), args.batch_size) return train_set, val_set, num_total_words
def _get_sentence_embeddings(description, embed_batch_size=10): embeddings_list = [] sentences = [ re.sub('\[\d+\]', '', sent).strip().lower() for sent in sent_tokenize(description) ] for sentence_batch in batch(sentences, embed_batch_size): embeddings = embed_sentences(sentence_batch) embeddings_list.append(embeddings) return torch.cat(embeddings_list)
def anonymize(self, infer=False, include_rest=False): """this is the core method for anonymizing data it utilizes specific reader and writer class methods to retrieve and store data. in the process we define mappings of unmasked values to masked values, and anonymize fields using self.faker """ # first, infer mappings based on indices and overwrite the config. if infer: self.reader.infer_providers() # next, create masking maps that will be used for lookups when anonymizing data self.field_maps = self.reader.create_mappings() for field, map in self.field_maps.items(): for value, _ in map.items(): mask_str = self.reader.masked_fields[field] if mask_str != 'infer': mask = self.provider_map[mask_str] map[value] = mask(value) # get generator object from reader total = self.reader.get_count() logging.info("total number of records {}...".format(total)) data = self.reader.get_data(list(self.field_maps.keys()), self.reader.suppressed_fields, include_rest) # batch process the data and write out to json in chunks count = 0 for batchiter in utils.batch(data, 10000): tmp = [] for item in batchiter: bulk = { "index": { "_index": item.meta['index'], "_type": 'doc' } } tmp.append(json.dumps(bulk)) item = utils.flatten_nest(item.to_dict()) for field, v in item.items(): if self.field_maps[field]: item[field] = self.field_maps[field][item[field]] tmp.append(json.dumps(utils.flatten_nest(item))) self.writer.write_data(tmp) count += len(tmp) / 2 # There is a bulk row for every document logging.info("{} % complete...".format(count / total * 100))
def make_inference_inputs(self, input_seq): '''This method is responsible for preparing the given sequences so that they can be used for inference using the model.''' max_input_length = self.cfg.get('max_input_length') input_seq_batched, _ = utils.batch( input_seq, max_sequence_length=max_input_length) feed_dict = { self.encoder_inputs[t]: input_seq_batched[t] for t in range(max_input_length) } feed_dict[self.keep_prob] = 1.0 return feed_dict
def main(): parser = argparse.ArgumentParser() parser.add_argument('-i', '--embedding', required=True) parser.add_argument('-l', '--search_words', required=True) parser.add_argument('-o', '--output', required=True) parser.add_argument('-b', '--batch_size', type=int, default=1024) parser.add_argument('-k', '--num_nearest_neighbor', type=int, default=10) args = parser.parse_args() emb = load_embedding(args.embedding, vocabulary=None, lower=False, length_normalize=True, normalize_dimensionwise=False, delete_duplicates=True) words_2_search = vocab_from_path(args.search_words) m = emb.words_to_matrix(words_2_search) M = emb.words_to_matrix(emb.words) nn = [] for i_batch, mb in enumerate(batch(m, args.batch_size)): string = "<" + str( datetime.datetime.now()) + "> " + 'Calculating nn words ' + str( int(100 * (i_batch * args.batch_size) / len(m))) + '%' print(string, end="\r") result = cosine_knn(mb, M, args.num_nearest_neighbor) for i_result, indexes in enumerate(result): nn.append(["\"" + emb.words[i] + "\"" for i in indexes]) file = open(args.output, 'w+', encoding='utf-8') for word, nns in zip(words_2_search, nn): print(word + ': ' + ' '.join(nns), file=file)
def get_feed_data(self, x, c, y=None, a=None, p= None, e=None, class_weights=None, is_training=True): x_m, x_sizes, xwordm = utils.batch(x) p_m = utils.batch_loc(p) #print('p',p) #print('p_m',p_m) fd = { self.inputs: x_m, self.text_word_lengths: x_sizes, self.textwm: xwordm, self.category: c } if y is not None: fd[self.labels] = y if e is not None: fd[self.embedding_matrix] = e if a is not None: fd[self.aspcat] = a if p is not None: fd[self.location] = p_m fd[self.is_training] = is_training return fd
x.rotate(uniform(0,360)) x.scale(0.5) x.c = uniform(-1,1) x.f = uniform(-1,1) x.weight = 0.25 x.color_speed = 0.9 x.color = 0 xf = f.add_final() xf.linear = 0 xf.spherical = uniform(.5,1.5) xf.julia = uniform(.25,1.25) xf.color_speed = 0 xf.c = uniform(0,1.5)-.75 xf.f = uniform(0,1.5)-.75 xf.rotate(uniform(0,360)) f.reframe() return f if __name__ == "__main__": lst = batch(julia_wires, 20) save_flames("parameters/julia_wires.flame", *lst)
x.weight = 7 x.color = 1 x.color_speed = 0.01 # Final X fx = f.add_final() fx.a = fx.e = -uniform(0.5,1.0) fx.d = 0.373412 fx.b = -fx.d fx.c = uniform(0,1) fx.f = randint(-1,1) - uniform(0,1) fx.rotate(uniform(0,360)) fx.linear = 0 setattr(fx,variation_list[this_var],1.0) fx.color_speed = 0 f.highlight_power = 1.0 f.gamma = 2 f.brightness = 5 f.reframe() return f if __name__ == "__main__": lst = batch(gnarlcomplex, 20) save_flames("parameters/gnarlcomplex.flame", *lst)
x.wedge_sph_hole = uniform(-0.5,0.5) x.wedge_sph_count = 2 x.wedge_sph_swirl = 0 x.weight = uniform(0.05, 0.1) x.color_speed = 0.75 x.opacity = 0 x.color = 1 x.animate = 1 #fiddle # randomly change post of third if uniform(0,1) > 0.5: x.post.c = uniform(-0.5, 0.5) x.post.f = uniform(-0.5, 0.5) f.xform[0].chaos[:] = 0,1,1 f.xform[1].chaos[:] = 1,0,1 f.xform[2].chaos[:] = 1,0,1 f.gamma = 3 f.brightness = 25 f.scale = 20 return f if __name__ == "__main__": lst = batch(gnarloscope, 20) save_flames("parameters/gnarloscopes.flame", *lst)
x.waves2 = 1 x.waves2_freqx = randint(-15,-1) x.waves2_scalex = 0.01+uniform(0,0.01) x.waves2_freqy = randint(14,30) x.waves2_scaley = 0.02+uniform(0,0.01) x.color_speed = 0.01 # Final X fx = f.add_final() fx.linear = 0 fx.a = 1.5 fx.e = 1.5 fx.foci = 1 fx.color = uniform(0,1) fx.color_speed = 0 f.highlight_power = 1.0 f.gamma = 2 f.brightness = 10 f.scale = 20 return f if __name__ == "__main__": lst = batch(gnarlbasefoci, 20) save_flames("parameters/gnarlbasefoci.flame", *lst)
x.weight = 0.5 x.color_speed = 1 x.color = random.uniform(0,1) x.animate = 1 # Third Xform if random.uniform(0,1)>0.5: x = f.add_xform() x.e = 0.015; # or is it e x.rotate(random.uniform(-45,135)) x.c = random.uniform(-1,1) x.f = random.uniform(-1,1) x.weight = 0.5 x.color_speed = 1 x.color = random.uniform(0,1) x.animate = 1 f.scale = 9 f.gamma = 4 f.brightness = 35 f.reframe() return f if __name__ == "__main__": lst = batch(gnarl, 20) save_flames("parameters/gnarls.flame", *lst)
x = f.add_xform(color_speed=0.95, color=ed_color) x.linear = 0 x.edisc = ed_weight x.c += uniform(-1,1) x.f += uniform(-1,1) x.rotate(uniform(0,360)) x = f.add_xform(color_speed=0.95, color=ed_color) x.linear = 0 x.julian = ed_weight x.julian_power = 50 x.julian_dist = -1 x = f.add_xform(color_speed = 0.95, color=ed_color) x.linear = 0 x.julian = ed_weight + 0.08 + uniform(0,0.04) x.julian_power = 50 x.julian_dist = -1 f.scale = uniform(12,16) f.brightness = 35 return f if __name__ == "__main__": lst = batch(edisc_julian, 20) save_flames("parameters/edisc_julians.flame", *lst)
x = flame.add_xform(linear=0, julian=random.uniform(0.725, 1), julian_power=-2, julian_dist=1, color=random.uniform(0, 1), color_speed=.25, weight=5) x.c = 0.25 + random.uniform(0, 0.2) x.scale(0.25) x.post.scale(1.5) # Add additional julians for i in range(random.randint(2, 5)): x = flame.add_xform(linear=0, color=random.uniform(0, 1)) x.c = random.uniform(-0.5, 0.5) x.scale(random.uniform(0.5, .8)) x.julian_power = (2 ** random.randrange(2, 6)) x.julian_dist = random.randrange(-3, 3) if not x.julian_dist: x.julian_dist = random.uniform(-0.1, 0.1) x.julian_power /= 2 x.julian = random.uniform(0.25, 4 - abs(x.julian_dist)) return flame if __name__ == "__main__": lst = batch(grand_julian, 20) save_flames("parameters/grand_julian.flame", *lst)
# Choose a random xform. Delete it and create a new one # with the same weight using variations in special_vars # choosing 3 of them delx = random.randint(0,len(f.xform)-1) delxw = f.xform[delx].weight delcol = f.xform[delx].color f.xform[delx].delete() Xform.random(f, xv=special_vars, n=3, xw=delxw, col=delcol) # Add one more all-linear xform with weight 0.5 # with the magic rotation & offset lastx = f.add_xform(weight=0.5, color_speed=0) lastx.rotate(random.uniform(rand_angle_min,rand_angle_max)) lastx.c = random.uniform(-0.5, 0.5) lastx.f = random.uniform(-0.5, 0.5) # reframe and name the flame f.reframe() return f if __name__ == "__main__": lst = utils.batch(lolpolpolpo, 20) save_flames("parameters/lolpolpolpo_batch.flame", *lst)