def action_config(): if utils.needs_user(): return flask.redirect(flask.url_for('setup')) if ('logged_in' not in flask.session) or (not flask.session['logged_in']): return flask.redirect(flask.url_for('login')) error = None if flask.request.method == 'POST': if "delete" in flask.request.form: action_id = int(flask.request.form['id']) utils.delete_action(action_id) flask.flash('Action Deleted') else: code = flask.request.form['code'] cmd = flask.request.form['cmd'] reason = flask.request.form['reason'] if "edit" in flask.request.form: action_id = int(flask.request.form['edit']) utils.modify_action(action_id, code, cmd, reason) flask.flash('Action Edited') else: utils.create_action(code, cmd, reason) flask.flash('Action Created') actions = utils.get_actions() return flask.render_template('action_config.j2', error=error, actions=actions, commands=Alarm.ACTIONS)
def generate_training_data(self, docs, batch_size=1000): logger = logging.getLogger('progress_logger') while 1: x_train = [] y_train = [] random.seed() random.shuffle(docs) doc_len = len(docs) for i, doc in enumerate(docs): for paragraph in range(doc.get_amount_of_paragraphs()): entities = doc.get_entities(paragraph=paragraph) relations = doc.get_relations(paragraph=paragraph) for (configuration, action) in oracle.get_training_sequence( entities, relations, doc): feature = ConfigurationVector(configuration, doc).get_vector() x_train.append(feature) y_train.append(utils.get_actions()[action]) if len(x_train) == batch_size: logger.info("{i} out of len {l}".format(i=i, l=doc_len)) yield (np.vstack(x_train), np.vstack(y_train)) x_train = [] y_train = []
def web_index(request): result = {'user': None} #if True: try: user, token = check_auth(request) result['user'] = user result['projects'] = build_index_projects(user, limit=25) #result['projects'] = get_user_projects(user) #result['ticket_assignments'] = get_ticket_assignments(user, limit=25) #result['task_assignments'] = get_task_assignments(user, limit=25) actions = get_actions(user, limit=25) result['actions'] = actions except: pass return result
def action_config(): if utils.needs_user(): return flask.redirect(flask.url_for('setup')) if ('logged_in' not in flask.session) or (not flask.session['logged_in']): return flask.redirect(flask.url_for('login')) error = None if flask.request.method == 'POST': if "delete" in flask.request.form: action_id = int(flask.request.form['id']) utils.delete_action(action_id) flask.flash('Action Deleted') else: code = flask.request.form['code'] cmd = flask.request.form['cmd'] reason = flask.request.form['reason'] if "edit" in flask.request.form: action_id = int(flask.request.form['edit']) utils.modify_action(action_id, code, cmd, reason) flask.flash('Action Edited') else: utils.create_action(code, cmd, reason) flask.flash('Action Created') actions = utils.get_actions() return flask.render_template( 'action_config.j2', error=error, actions=actions, commands=Alarm.ACTIONS)
def home_contribute(request): list_utensils = get_utensils() list_actions = get_actions() c = { "utensils": list_utensils, "actions": list_actions, } return render_to_response('recette/home_contribute.html', c, RequestContext(request))
def in_beam_search(configuration, nn, golden_sequence, k, beam=2): """ Returns all beams the model predicts up until golden sequence falls outside of beam. Beam = 1 generalizes to Greedy search. :param k: max length of a sequence :param golden_sequence: training sequence :param beam: size of beam :param nn: Neural network returning probability distribution :param configuration: Starting configuration :return: All paths in beam """ dead_nodes = [] live_nodes = [Node(None, configuration, None, 0, True)] actions = utils.get_actions() in_beam = True l = 0 gold_output = [] while live_nodes and in_beam and l < k: l += 1 in_beam = False try: (next_golden_config, next_golden_action) = next(golden_sequence) except StopIteration: break new_nodes = [] for node in live_nodes: distribution = nn.predict(node.configuration) for i, prob in enumerate(distribution[0]): action = list(actions.keys())[list(actions.values()).index(i)] if not node.configuration.empty_buffer( ) and node.configuration.action_possible(action): conf_copy = cPickle.loads( cPickle.dumps(node.configuration, -1)) # applies action to config getattr(conf_copy, action)() new_nodes.append( Node(node, conf_copy, action, score(node, prob))) new_nodes.sort(key=lambda x: x.score) end = min(beam, len(new_nodes)) live_nodes = new_nodes[:end] for node in live_nodes: if node.configuration.empty_buffer(): dead_nodes.append(node) beam -= 1 live_nodes.remove(node) gold_output.append( Node(None, next_golden_config, next_golden_action, 0)) for node in live_nodes: if node.action == next_golden_action and node.parent.golden: node.golden = True in_beam = True break beam_sequences = [] for node in dead_nodes + live_nodes: beam_sequences.append(to_list(node)) return gold_output, beam_sequences
def check_some_order(): order_id = 1530281 from concurent_utils import get_features from utils import get_actions info = get_orders_info([order_id]).reset_index() actions = get_actions(info) all_times = pd.concat((actions.dt_order_placed, pd.Series([timedelta(seconds=stime) for stime in times]))) all_times = all_times.sort_values(0).drop_duplicates().reset_index(drop=True) order_features = pd.concat([get_features(actions, dt_order_placed) for k, dt_order_placed in all_times.iteritems()])
def intersting_points(live_game, kind='fastBreak'): ''' Kinds availabe: fastBreak, secondChancePoints, pointsFromTurnover ''' actions = utils.get_actions(live_game) points = Counter() for a in actions: if a['type'] == 'shot': print a['parameters'] if a['parameters'][kind]: print 'relevant ', 'made:', made_shot(a) points[a['teamId']] += a['parameters']['points']*made_shot(a) return points
def next_step(self, configuration): # The next step is the best decision according to self.network if it is possible to do that action, otherwise # it is the next best one. distribution = self.network.predict(configuration) actions = utils.get_actions() distribution = distribution.tolist()[0] en = list(enumerate(distribution)) en.sort(key=lambda tup: tup[1]) for (ind, val) in en[::-1]: action = list(actions.keys())[list(actions.values()).index(ind)] if configuration.action_possible(action): return action print("This should not print") return None
def next_step(self, configuration): indices = range(4) # TODO: not hardcoded distribution = [0.04, 0.15, 0.13, 0.68] actions = utils.get_actions() for i in range(4): ind = numpy.random.choice(indices, 1, distribution)[0] print(ind) action = list(actions.keys())[list(actions.values()).index(ind)] if configuration.action_possible(action): print(action) return action else: x = indices.index(ind) del indices[x] del distribution[x] return None
def get_orders_features_by_times(order_ids, times, include_own_actions=True): from concurent_utils import get_features, process_partial_df from utils import get_actions info = get_orders_info(order_ids).reset_index() actions = get_actions(info) if include_own_actions: all_times = pd.concat((actions.dt_order_placed, pd.Series([timedelta(seconds=stime) for stime in times]))) else: all_times = pd.Series([timedelta(seconds=stime) for stime in times]) all_times = all_times.sort_values(0).drop_duplicates().reset_index(drop=True) def get_order_features(df): return pd.concat([get_features(df, dt_order_placed) for k, dt_order_placed in all_times.iteritems()]) orders_features = actions.groupby('order_id').apply(get_order_features) return orders_features.reset_index().set_index('order_id')
def beam_search(configuration, nn, beam=2): """ Returns best sequence within beam. Beam = 1 generalizes to Greedy search. :param beam: size of beam :param nn: Neural network returning probability distribution :param configuration: Starting configuration :return: End node """ dead_nodes = [] live_nodes = [Node(None, configuration, None, 0)] actions = utils.get_actions() while live_nodes: new_nodes = [] for node in live_nodes: distribution = nn.predict(node.configuration) for i, prob in enumerate(distribution[0]): action = list(actions.keys())[list(actions.values()).index(i)] if node.configuration.action_possible(action): conf_copy = cPickle.loads( cPickle.dumps(node.configuration, -1)) # applies action to config getattr(conf_copy, action)() new_nodes.append( Node(node, conf_copy, action, score(node, prob))) node.configuration = None new_nodes.sort(key=lambda x: x.score) end = min(beam, len(new_nodes)) live_nodes = new_nodes[:end] for node in live_nodes: if node.configuration.empty_buffer(): dead_nodes.append(node) beam -= 1 live_nodes.remove(node) best = max(dead_nodes, key=lambda x: x.score) return best
def extract_reactions(content: Tag) -> List[Action]: return get_actions(content, "Reacciones")
def extract_legendary_actions(content: Tag) -> LegendaryActions: legendary_actions = get_actions(content, "Acciones legendarias", True) if not legendary_actions: return {} list_, help_ = legendary_actions return {"help": help_, "list": list_}
def forward(self, images, captions, lengths, img_lengths, img_txts, img_spans, txt_spans, labels, ids=None, epoch=None, *args): self.niter += 1 self.logger.update('Eit', self.niter) self.logger.update('lr', self.optimizer.param_groups[0]['lr']) img_lengths = torch.tensor(img_lengths).long() if isinstance( img_lengths, list) else img_lengths lengths = torch.tensor(lengths).long() if isinstance(lengths, list) else lengths if torch.cuda.is_available(): images = images.cuda() captions = captions.cuda() lengths = lengths.cuda() img_lengths = img_lengths.cuda() bsize = captions.size(0) img_emb, nll_img, kl_img, span_margs_img, argmax_spans_img, trees_img, lprobs_img = self.forward_img_parser( images, img_lengths) ll_loss_img = nll_img.sum() kl_loss_img = kl_img.sum() txt_emb, nll_txt, kl_txt, span_margs_txt, argmax_spans_txt, trees_txt, lprobs_txt = self.forward_txt_parser( captions, lengths) ll_loss_txt = nll_txt.sum() kl_loss_txt = kl_txt.sum() contrastive_loss = self.forward_loss(img_emb, txt_emb, img_lengths, lengths, argmax_spans_img, argmax_spans_txt, span_margs_img, span_margs_txt) mt_loss = contrastive_loss.sum() loss_img = self.vse_lm_alpha * (ll_loss_img + kl_loss_img) / bsize loss_txt = self.vse_lm_alpha * (ll_loss_txt + kl_loss_txt) / bsize loss_mt = self.vse_mt_alpha * mt_loss / bsize loss = loss_img + loss_txt + loss_mt self.optimizer.zero_grad() loss.backward() if self.grad_clip > 0: clip_grad_norm_(self.all_params, self.grad_clip) self.optimizer.step() self.logger.update('Loss_img', loss_img.item(), bsize) self.logger.update('Loss_txt', loss_txt.item(), bsize) self.logger.update('KL-Loss_img', kl_loss_img.item() / bsize, bsize) self.logger.update('KL-Loss_txt', kl_loss_txt.item() / bsize, bsize) self.logger.update('LL-Loss_img', ll_loss_img.item() / bsize, bsize) self.logger.update('LL-Loss_txt', ll_loss_txt.item() / bsize, bsize) self.n_word_img += (img_lengths + 1).sum().item() self.n_word_txt += (lengths + 1).sum().item() self.n_sent += bsize for b in range(bsize): max_img_len = img_lengths[b].item() pred_img = [(a[0], a[1]) for a in argmax_spans_img[b] if a[0] != a[1]] pred_set_img = set(pred_img[:-1]) gold_img = [(img_spans[b][i][0].item(), img_spans[b][i][1].item()) for i in range(max_img_len - 1)] gold_set_img = set(gold_img[:-1]) utils.update_stats(pred_set_img, [gold_set_img], self.all_stats_img) max_txt_len = lengths[b].item() pred_txt = [(a[0], a[1]) for a in argmax_spans_txt[b] if a[0] != a[1]] pred_set_txt = set(pred_txt[:-1]) gold_txt = [(txt_spans[b][i][0].item(), txt_spans[b][i][1].item()) for i in range(max_txt_len - 1)] gold_set_txt = set(gold_txt[:-1]) utils.update_stats(pred_set_txt, [gold_set_txt], self.all_stats_txt) # if self.niter % self.log_step == 0: p_norm, g_norm = self.norms() all_f1_img = utils.get_f1(self.all_stats_img) all_f1_txt = utils.get_f1(self.all_stats_txt) train_kl_img = self.logger.meters["KL-Loss_img"].sum train_ll_img = self.logger.meters["LL-Loss_img"].sum train_kl_txt = self.logger.meters["KL-Loss_txt"].sum train_ll_txt = self.logger.meters["LL-Loss_txt"].sum info = '|Pnorm|: {:.6f}, |Gnorm|: {:.2f}, ReconPPL-Img: {:.2f}, KL-Img: {:.2f}, ' + \ 'PPLBound-Img: {:.2f}, CorpusF1-Img: {:.2f}, ' + \ 'ReconPPL-Txt: {:.2f}, KL-Txt: {:.2f}, ' + \ 'PPLBound-Txt: {:.2f}, CorpusF1-Txt: {:.2f}, ' + \ 'Speed: {:.2f} sents/sec' info = info.format( p_norm, g_norm, np.exp(train_ll_img / self.n_word_img), train_kl_img / self.n_sent, np.exp((train_ll_img + train_kl_img) / self.n_word_img), all_f1_img[0], np.exp(train_ll_txt / self.n_word_txt), train_kl_txt / self.n_sent, np.exp((train_ll_txt + train_kl_txt) / self.n_word_txt), all_f1_txt[0], self.n_sent / (time.time() - self.s_time)) pred_action_img = utils.get_actions(trees_img[0]) sent_s_img = img_txts[0] pred_t_img = utils.get_tree(pred_action_img, sent_s_img) gold_t_img = utils.span_to_tree(img_spans[0].tolist(), img_lengths[0].item()) gold_action_img = utils.get_actions(gold_t_img) gold_t_img = utils.get_tree(gold_action_img, sent_s_img) info += "\nPred T Image: {}\nGold T Image: {}".format( pred_t_img, gold_t_img) pred_action_txt = utils.get_actions(trees_txt[0]) sent_s_txt = [ self.vocab.idx2word[wid] for wid in captions[0].cpu().tolist() ] pred_t_txt = utils.get_tree(pred_action_txt, sent_s_txt) gold_t_txt = utils.span_to_tree(txt_spans[0].tolist(), lengths[0].item()) gold_action_txt = utils.get_actions(gold_t_txt) gold_t_txt = utils.get_tree(gold_action_txt, sent_s_txt) info += "\nPred T Text: {}\nGold T Text: {}".format( pred_t_txt, gold_t_txt) return info
def extract_actions(content: Tag) -> List[Dict[str, str]]: return get_actions(content, "Acciones")
earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto') csv_logger = keras.callbacks.CSVLogger('training.log') def negativeActivation(x): return -x def vectorize_config(x, doc): return np.asarray(ConfigurationVector(x, doc).get_vector())[np.newaxis] actions = utils.get_actions() def vectorize_action(action): em = np.zeros(len(actions)) em[actions[action]] = 1 return em[np.newaxis] class GlobalNormNN(Classifier): """ k: maximum length of sequence b: beam size """ k = 100 b = 2
def get_all_actions(request): """ retourne toutes les actions au format json """ return HttpResponse(json.dumps(get_actions(json=True)))
def main(instructions=None, params=None, do_one_iteration=False): if not instructions: return if not params: params = __import__('params') data_json = "data.json" actions_fname = os.path.abspath(__file__).rsplit("/",1)[0]+"/actions.txt" u.copy_jecs() logger_name = u.setup_logger() logger = logging.getLogger(logger_name) time_stats = [] if os.path.isfile(data_json): with open(data_json, "r") as fhin: data = json.load(fhin) if "time_stats" in data: time_stats = data["time_stats"] all_samples = [] for i in range(5000): if u.proxy_hours_left() < 60 and not params.FORSAKE_HEAVENLY_PROXY: u.proxy_renew() data = { "samples": [], "last_updated": None, "time_stats": time_stats } # read instructions file. if new sample found, add it to list # for existing samples, try to update params (xsec, kfact, etc.) for samp in u.read_samples(instructions): samp["params"] = params if samp not in all_samples: s = Samples.Sample(**samp) all_samples.append(s) else: all_samples[all_samples.index(samp)].update_params(samp) n_done = 0 n_samples = len(all_samples) for isample, s in enumerate(all_samples): try: stat = s.get_status() typ = s.get_type() # grab actions from a text file and act on them, consuming (removing) them if successful for dataset_name, action in u.get_actions(actions_fname=actions_fname,dataset_name=s["dataset"]): if s.handle_action(action): u.consume_actions(dataset_name=s["dataset"],action=action, actions_fname=actions_fname) if not s.pass_tsa_prechecks(): continue if typ == "CMS3": if stat == "new": s.crab_submit() elif stat == "crab": s.crab_parse_status() if s.is_crab_done(): s.make_miniaod_map() s.make_merging_chunks() s.submit_merge_jobs() elif stat == "postprocessing": if s.is_merging_done(): if s.check_output(): s.make_metadata() s.copy_files() else: s.submit_merge_jobs() elif stat == "done": s.do_done_stuff() n_done += 1 elif typ == "BABY": if stat == "new": s.set_baby_inputs() s.submit_baby_jobs() elif stat == "condor" or stat == "postprocessing": if params.open_datasets: s.check_new_merged_for_babies() if not params.open_datasets and s.is_babymaking_done(): s.set_status("done") else: # s.sweep_babies() s.sweep_babies_parallel() s.submit_baby_jobs() elif stat == "done": if params.open_datasets: s.check_new_merged_for_babies() else: s.do_done_stuff() n_done += 1 s.save() data["samples"].append( s.get_slimmed_dict() ) except Exception, err: logger.info( "send an (angry?) email to Nick with the Traceback below!!") logger.info( traceback.format_exc() ) breakdown_crab = u.sum_dicts([samp["crab"]["breakdown"] for samp in data["samples"] if "crab" in samp and "breakdown" in samp["crab"]]) # breakdown_baby = u.sum_dicts([{"baby_"+key:samp["baby"].get(key,0) for key in ["running", "sweepRooted"]} for samp in data["samples"] if samp["type"] == "BABY"]) breakdown_baby = u.sum_dicts([{"running_babies":samp["baby"]["running"], "sweepRooted_babies":samp["baby"]["sweepRooted"]} for samp in data["samples"] if samp["type"] == "BABY"]) tot_breakdown = u.sum_dicts([breakdown_crab, breakdown_baby]) data["last_updated"] = u.get_timestamp() data["time_stats"].append( (u.get_timestamp(), tot_breakdown) ) data["log"] = u.get_last_n_lines(fname=params.log_file, N=100) with open(data_json, "w") as fhout: data["samples"] = sorted(data["samples"], key=lambda x: x.get("status","done")=="done") json.dump(data, fhout, sort_keys = True, indent = 4) u.copy_json(params) if params.exit_when_done and (n_done == n_samples): print ">>> All %i samples are done. Exiting." % n_samples break if not do_one_iteration: sleep_time = 60 if i < 2 else 2*600 logger.debug("sleeping for %i seconds..." % sleep_time) u.smart_sleep(sleep_time, files_to_watch=[actions_fname, instructions]) else: break
AND t0.test_order = 0 AND t0.site_id != 31 ORDER BY RAND()""" df0 = pd.read_sql(sql=sql_query, con=db_engine_edusson_replica) df = df0.copy() df = df0[:50000] print('DF len', len(df)) # df = df[df.order_id.isin([1088491, 1058728, 1505552, 1494183])] ################################ train ########################################################## print('Get actions') stime = time() actions_df = get_actions(df) actions_df = actions_df.set_index('order_id') # actions_df['is_paid_order'] = df.set_index('order_id').is_paid_order # actions_df = actions_df.join(df.set_index('order_id').is_paid_order).drop_duplicates('order_id') actions_df = actions_df.join( df.drop_duplicates('order_id').set_index('order_id').is_paid_order) print('Done', time() - stime, 'len', len(actions_df)) times_count = 21 times_df = actions_df[actions_df.dt_order_placed != timedelta( seconds=0)].dt_order_placed.quantile(q=np.linspace(0, 1, times_count)) times_df = times_df.reset_index()[:times_count - 1].dt_order_placed times_df.iloc[0] = timedelta() # times_df.to_pickle('times_df.pkl')
def main(instructions=None, params=None, do_one_iteration=False): if not instructions: return if not params: params = __import__('params') data_json = "data.json" actions_fname = os.path.abspath(__file__).rsplit("/", 1)[0] + "/actions.txt" u.copy_jecs() logger_name = u.setup_logger() logger = logging.getLogger(logger_name) time_stats = [] if os.path.isfile(data_json): with open(data_json, "r") as fhin: data = json.load(fhin) if "time_stats" in data: time_stats = data["time_stats"] all_samples = [] for i in range(5000): if u.proxy_hours_left() < 60 and not params.FORSAKE_HEAVENLY_PROXY: u.proxy_renew() data = {"samples": [], "last_updated": None, "time_stats": time_stats} # read instructions file. if new sample found, add it to list # for existing samples, try to update params (xsec, kfact, etc.) for samp in u.read_samples(instructions): samp["params"] = params if samp not in all_samples: s = Samples.Sample(**samp) all_samples.append(s) else: all_samples[all_samples.index(samp)].update_params(samp) n_done = 0 n_samples = len(all_samples) for isample, s in enumerate(all_samples): try: stat = s.get_status() typ = s.get_type() # grab actions from a text file and act on them, consuming (removing) them if successful for dataset_name, action in u.get_actions( actions_fname=actions_fname, dataset_name=s["dataset"]): if s.handle_action(action): u.consume_actions(dataset_name=s["dataset"], action=action, actions_fname=actions_fname) if not s.pass_tsa_prechecks(): continue if typ == "CMS3": if stat == "new": s.crab_submit() elif stat == "crab": s.crab_parse_status() if s.is_crab_done(): s.make_miniaod_map() s.make_merging_chunks() s.submit_merge_jobs() elif stat == "postprocessing": if s.is_merging_done(): if s.check_output(): s.make_metadata() s.copy_files() else: s.submit_merge_jobs() elif stat == "done": s.do_done_stuff() n_done += 1 elif typ == "BABY": if stat == "new": s.set_baby_inputs() s.submit_baby_jobs() elif stat == "condor" or stat == "postprocessing": if params.open_datasets: s.check_new_merged_for_babies() if not params.open_datasets and s.is_babymaking_done(): s.set_status("done") else: s.sweep_babies() s.submit_baby_jobs() elif stat == "done": if params.open_datasets: s.check_new_merged_for_babies() else: s.do_done_stuff() n_done += 1 s.save() data["samples"].append(s.get_slimmed_dict()) except Exception, err: logger.info( "send an (angry?) email to Nick with the Traceback below!!" ) logger.info(traceback.format_exc()) breakdown_crab = u.sum_dicts([ samp["crab"]["breakdown"] for samp in data["samples"] if "crab" in samp and "breakdown" in samp["crab"] ]) # breakdown_baby = u.sum_dicts([{"baby_"+key:samp["baby"].get(key,0) for key in ["running", "sweepRooted"]} for samp in data["samples"] if samp["type"] == "BABY"]) breakdown_baby = u.sum_dicts([{ "running_babies": samp["baby"]["running"], "sweepRooted_babies": samp["baby"]["sweepRooted"] } for samp in data["samples"] if samp["type"] == "BABY"]) tot_breakdown = u.sum_dicts([breakdown_crab, breakdown_baby]) data["last_updated"] = u.get_timestamp() data["time_stats"].append((u.get_timestamp(), tot_breakdown)) data["log"] = u.get_last_n_lines(fname=params.log_file, N=100) with open(data_json, "w") as fhout: data["samples"] = sorted( data["samples"], key=lambda x: x.get("status", "done") == "done") json.dump(data, fhout, sort_keys=True, indent=4) u.copy_json(params) if params.exit_when_done and (n_done == n_samples): print ">>> All %i samples are done. Exiting." % n_samples break if not do_one_iteration: sleep_time = 60 if i < 2 else 2 * 600 logger.debug("sleeping for %i seconds..." % sleep_time) u.smart_sleep(sleep_time, files_to_watch=[actions_fname, instructions]) else: break
dt.minute, dt.second) start_date_string = start_date_py.strftime( "'%Y/%m/%d %H:%M:%S'") except StandardError, e: print "Failed get start date", e get_count = request.POST.get('get_count') == 'true' target_id = None group_by_date = request.POST.get('group_by_date') == 'true' date_pattern = request.POST.get('date_pattern') reverse = request.POST.get('reverse') == 'true' actions = get_actions(operation_id, reverse, get_count, user_id, target_id, start_date_string, end_date_string, group_by_date, date_pattern) #Operate with the query results results = {} if get_count: results["count"] = actions count_results = results else: results["count"] = actions count_results = results #count_results = {} #for action in actions: # key = str(action[4]) #target_id
AND order_additional.device_type_id_create = 1 AND t0.order_id NOT IN (SELECT order_id FROM es_order_reassign_history) AND t0.test_order = 0 AND t0.site_id != 31 ORDER BY RAND()""" df0 = pd.read_sql(sql=sql_query, con=db_engine_edusson_replica) df = df0.copy() df = df0[:10000] print('DF len', len(df)) ################################ train ########################################################## print('Get actions') stime = time() actions_df = get_actions(df) actions_df = actions_df.set_index('order_id') actions_df['is_paid_order'] = df.set_index('order_id').is_paid_order print('Done', time() - stime) times = [timedelta(minutes=i) for i in range(6)] def get_order_features(df): # return pd.concat([get_features(df, dt_order_placed) for dt_order_placed in times]) return pd.concat([ get_features(df, dt_order_placed) for dt_order_placed in df.dt_order_placed ])
pretrain_length] # Observe next_state = utils.state_gen(state_in, action, obs) # Go to next state reward = obs # Reward total_rewards += reward # Total Reward exp_memory.add( (state_in, action, reward, next_state)) # Add in exp memory state_in = next_state history_input = next_state if (time > state_size or episode != 0): # If sufficient minibatch is available batch = exp_memory.sample(batch_size) # Sample without replacement states = utils.get_states( batch) # Get state,action,reward and next state from memory actions = utils.get_actions(batch) rewards = utils.get_rewards(batch) next_state = utils.get_next_states(batch) feed_dict = {q_network.input_in: next_state} actuals_Q = sess.run( q_network.out_layer, feed_dict=feed_dict) # Get the Q values for next state actuals = rewards + gamma * np.max( actuals_Q, axis=1) # Make it actuals with discount factor actuals = actuals.reshape(batch_size) # Feed in here to get loss and optimise it loss, _ = sess.run( [q_network.Q_loss, q_network.opt],
def main(args): print('loading model from ' + args.model_file) checkpoint = torch.load(args.model_file) model = checkpoint['model'] word2idx = checkpoint['word2idx'] cuda.set_device(args.gpu) model.eval() model.cuda() corpus_f1 = [0., 0., 0.] sent_f1 = [] pred_out = open(args.out_file, "w") gold_out = open(args.gold_out_file, "w") with torch.no_grad(): for j, gold_tree in enumerate(open(args.data_file, "r")): tree = gold_tree.strip() action = get_actions(tree) tags, sent, sent_lower = get_tags_tokens_lowercase(tree) sent_orig = sent[::] if args.lowercase == 1: sent = sent_lower gold_span, binary_actions, nonbinary_actions = get_nonbinary_spans(action) length = len(sent) if args.replace_num == 1: sent = [clean_number(w) for w in sent] if length == 1: continue # we ignore length 1 sents. this doesn't change F1 since we discard trivial spans sent_idx = [word2idx["<s>"]] + [word2idx[w] if w in word2idx else word2idx["<unk>"] for w in sent] sents = torch.from_numpy(np.array(sent_idx)).unsqueeze(0) sents = sents.cuda() ll_word_all, ll_action_p_all, ll_action_q_all, actions_all, q_entropy = model( sents, samples = 1, is_temp = 1, has_eos = False) _, binary_matrix, argmax_spans = model.q_crf._viterbi(model.scores) tree = get_tree_from_binary_matrix(binary_matrix[0], len(sent)) actions = utils.get_actions(tree) pred_span= [(a[0], a[1]) for a in argmax_spans[0]] pred_span_set = set(pred_span[:-1]) #the last span in the list is always the gold_span_set = set(gold_span[:-1]) #trival sent-level span so we ignore it tp, fp, fn = get_stats(pred_span_set, gold_span_set) corpus_f1[0] += tp corpus_f1[1] += fp corpus_f1[2] += fn binary_matrix = binary_matrix[0].cpu().numpy() pred_tree = {} for i in range(length): tag = tags[i] # need gold tags so evalb correctly ignores punctuation pred_tree[i] = "(" + tag + " " + sent_orig[i] + ")" for k in np.arange(1, length): for s in np.arange(length): t = s + k if t > length - 1: break if binary_matrix[s][t] == 1: nt = "NT-1" span = "(" + nt + " " + pred_tree[s] + " " + pred_tree[t] + ")" pred_tree[s] = span pred_tree[t] = span pred_tree = pred_tree[0] pred_out.write(pred_tree.strip() + "\n") gold_out.write(gold_tree.strip() + "\n") print(pred_tree) # sent-level F1 is based on L83-89 from https://github.com/yikangshen/PRPN/test_phrase_grammar.py overlap = pred_span_set.intersection(gold_span_set) prec = float(len(overlap)) / (len(pred_span_set) + 1e-8) reca = float(len(overlap)) / (len(gold_span_set) + 1e-8) if len(gold_span_set) == 0: reca = 1. if len(pred_span_set) == 0: prec = 1. f1 = 2 * prec * reca / (prec + reca + 1e-8) sent_f1.append(f1) pred_out.close() gold_out.close() tp, fp, fn = corpus_f1 prec = tp / (tp + fp) recall = tp / (tp + fn) corpus_f1 = 2*prec*recall/(prec+recall) if prec+recall > 0 else 0. print('Corpus F1: %.2f, Sentence F1: %.2f' % (corpus_f1*100, np.mean(np.array(sent_f1))*100))