def generateParams(filters: list): paramsStr = '' # 转换成字符组 strList = arrays.mapcat(filters, lambda item: item['key']) # 获取相似indexsGroup indexs = DataUtils.enumSimilarityGroup(strList) # 遍历group,查看是不是时间字段,如果是则将indexGroup存下来 [[1,2],[4,5]] tempList = [] for index in range(len(indexs)): key = strList[indexs[index][0]].lower() if 'time' in key or 'date' in key: tempList.append(indexs[index]) tarList = DataUtils.convertTimeGroup(filters, tempList) for item in tarList: # 输入类型 if item['type'] == 'string': paramsStr = paramsStr + generateInput(item) # 时间类型 elif item['type'] in ['integer', 'long', 'number'] and DataUtils.isLikeDate(item['key']): paramsStr = paramsStr + generateDate(item) # select 类型 elif DataUtils.isSelectType(item): paramsStr = paramsStr + generateSelect(item) # 未知类型,均按照input进行渲染 else: paramsStr = paramsStr + generateInput(item) return paramsStr
def __init__(self, args): self.args = args with open(args.config, 'r') as stream: config = yaml.load(stream, Loader=yaml.SafeLoader) self.config = config[self.args.task] if args.model_type == 'transformer': self.data_utils = DataUtils(self.config, args.train, args.task) elif args.model_type == 'bert': assert args.task == 'seq2seq' self.data_utils = bert_utils(self.config, args.train, args.task) if args.train and args.save_checkpoints: self.model_dir = make_save_dir( os.path.join(args.model_dir, args.task, args.exp_name)) self._disable_comet = args.disable_comet self._model_type = args.model_type self._save_checkpoints = args.save_checkpoints ###### loading .... ###### print("====================") print("start to build model") print('====================') vocab_size = self.data_utils.vocab_size print("Vocab Size: %d" % (vocab_size)) self.model = self.make_model(src_vocab=vocab_size, tgt_vocab=vocab_size, config=self.config['model'])
def on_pushButton_2_clicked(self): # 存储路径 savePath = self.lineEdit_2.text() # 生成TableContent的TSX fieldsIndex = self.tableWidget_3.selectedIndexes() fieldsIndex = DataUtils.getSelectIndexs(fieldsIndex, 2) fieldsData = DataUtils.getSelectFilter(fieldsIndex, self.fields) fieldsData = DataUtils.convertSelectFields(fieldsData) CreaterTools.generateContent(fieldsData, savePath) self.label_7.setText(u'状态:生成Content成功!') self.label_7.repaint()
def on_pushButton_3_clicked(self): # 存储路径 savePath = self.lineEdit_2.text() # 生成manage 部分的TSX paramsItems = self.tableWidget_2.selectedIndexes() paramsIndexs = DataUtils.getSelectIndexs(paramsItems) filteredData = DataUtils.getSelectFilter(paramsIndexs, self.params) filteredData = DataUtils.convertSelectFilter(filteredData) CreaterTools.generateManage(savePath, filteredData, self.currentItem) self.label_7.setText('状态:生成Manage成功!') self.label_7.repaint()
def on_pushButton_clicked(self): # 存储路径 savePath = self.lineEdit_2.text() # 生成查询参数部分的Form表单TSX paramsItems = self.tableWidget_2.selectedIndexes() paramsIndexs = DataUtils.getSelectIndexs(paramsItems) filteredData = DataUtils.getSelectFilter(paramsIndexs, self.params) filteredData = DataUtils.convertSelectFilter(filteredData) CreaterTools.generateFilterForm(filteredData, savePath) self.label_7.setText(u'状态:生成FilterForm成功!') self.label_7.repaint()
def getInterfaceCount(tags): rowIndex = 0 for row in range(len(tags)): for childIndex in range(len(tags[row]['child'])): rowData = tags[row]['child'][childIndex] methodTypes = DataUtils.getValidMethod(rowData) rowIndex += len(methodTypes) return rowIndex
def train(args): graph_file = './data/%s/%s.npz' % (args.name, args.name) graph_file = graph_file.replace('.npz', '_train.npz') data_loader = DataUtils(graph_file) n = args.n_trials res_hom, res_het = [0] * n, [0] * n tm = [0] * n for i in range(n): tm[i] = TrialManager(args=copy.deepcopy(args), ind=i, data_loader=data_loader) import tensorflow tf = tensorflow.compat.v1 sess = tf.Session() tf.global_variables_initializer().run(session=sess) losses = [] with sess.as_default(): for b in range(1, args.num_batches + 1): fd = {} to_comp = [] for to_comp1, fd1 in map(train_batch_command, tm): to_comp.extend(to_comp1) for k, v in fd1.items(): fd[k] = v res = sess.run(to_comp, feed_dict=fd) losses.append(res[0::2]) if (b % 25) == 0: losses = np.array(losses) for i in range(n): res, val_hom_auc = tm[i].test() best_test_hom_auc, best_test_het_auc = res['hom'], res[ 'het'] res_hom[i], res_het[ i] = best_test_hom_auc * 100, best_test_het_auc * 100 print( f'batch:{b:8} - ' f'time:{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} - ' f'loss:{np.mean(losses[:, i]):.4f} - ' f'val(hom):{val_hom_auc*100:.4f} - ' f'test(by best val):[hom:{best_test_hom_auc:.4f},het:{best_test_het_auc:.4f}]' ) losses = [] print('finished') def stats(x): return f'{np.mean(x):.2f}, {np.std(x) / np.sqrt(len(x)):.2f}' print('hom', stats(res_hom), [f'{xx:.2f}' for xx in res_hom]) print('het', stats(res_het), [f'{xx:.2f}' for xx in res_het])
def _distance_edge_server_base_station(self, edge_server: EdgeServer, base_station: BaseStation) -> float: """ Calculate distance between given edge server and base station :param edge_server: :param base_station: :return: distance(km) """ if edge_server.base_station_id: return self.distances[edge_server.base_station_id][base_station.id] return DataUtils.calc_distance(edge_server.latitude, edge_server.longitude, base_station.latitude, base_station.longitude)
def caculateImportance(source_id): # 连接数据库,获取到sourceid相关信息 user = "******" password = '******' write_db = 'pp_conf' host = 'localhost' # source_id = getSourceId() conn_conf = DataUtils.getConfigDBConn() sql = "select name, url, sqld, user, pwd from isf_data_source_conf where uuid='" + str( source_id) + "'" df_conf = pd.read_sql(sql, con=conn_conf) df_conf.head() name = df_conf.name[0] url = df_conf.url[0] sql = df_conf.sqld[0] print("name is: ", name, "url is: ", url) url_parts = url.split('/') host = url_parts[0].split(":")[0] port = int(url_parts[0].split(":")[1]) db = url_parts[1] conf = df_conf.head(1) user = df_conf.user[0] password = df_conf.pwd[0] print('host is: ', host, 'port is: ', port, 'db: ', db, 'user: '******'password: '******'utf8') df = pd.read_sql(sql, con=conn) df = df.drop(["REPORT_DATE"], axis=1) df = df.astype(float) json = df.corr()['QLI'].to_json() # update the factor_importance to config database insert_sql = "update isf_forecast_factor set factor_impact='" + json + "' where ds_conf_id=" + str( source_id) print(insert_sql) cursor = conn_conf.cursor() cursor.execute(insert_sql)
def get_data(csv_fname, video_fname, avg_fname, num_frames=None, start_frame=0, OBJECTS=['person'], resol=(50, 50), center=True, dtype='float32', train_ratio=0.6): def print_class_numbers(Y, nb_classes): classes = np_utils.probas_to_classes(Y) for i in xrange(nb_classes): print 'class %d: %d' % (i, np.sum(classes == i)) print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS)) all_counts = DataUtils.get_binary(csv_fname, limit=num_frames, OBJECTS=OBJECTS, start=start_frame) #返回的是所有帧是否有特定物体的二值矩阵 print '\tRetrieving all frames from %s' % video_fname all_frames = VideoUtils.get_all_frames(len(all_counts), video_fname, scale=resol, start=start_frame) print '\tSplitting data into training and test sets' X_train, X_test, Y_train, Y_test = to_test_train(avg_fname, all_frames, all_counts) nb_classes = all_counts.max() + 1 print '(train) positive examples: %d, total examples: %d' % \ (np.count_nonzero(np_utils.probas_to_classes(Y_train)), len(Y_train)) print_class_numbers(Y_train, nb_classes) print '(test) positive examples: %d, total examples: %d' % \ (np.count_nonzero(np_utils.probas_to_classes(Y_test)), len(Y_test)) print_class_numbers(Y_test, nb_classes) print 'shape of image: ' + str(all_frames[0].shape) print 'number of classes: %d' % (nb_classes) data = (X_train, Y_train, X_test, Y_test) return data, nb_classes
def buildListData(tags, keyWord=''): listData = [] for row in range(len(tags)): for childIndex in range(len(tags[row]['child'])): rowData = tags[row]['child'][childIndex] path = rowData['path'] methodTypes = DataUtils.getValidMethod(rowData) for methodType in methodTypes: itemData = objects.clone_deep( tags[row]['child'][childIndex][methodType]) itemData = objects.assign(itemData, { 'path': path, 'type': methodType }) listData.append(itemData) return listData
class Solver(): ''' Do training, validation and testing. ''' def __init__(self, args): self.args = args with open(args.config, 'r') as stream: config = yaml.load(stream, Loader=yaml.SafeLoader) self.config = config[self.args.task] if args.model_type == 'transformer': self.data_utils = DataUtils(self.config, args.train, args.task) elif args.model_type == 'bert': assert args.task == 'seq2seq' self.data_utils = bert_utils(self.config, args.train, args.task) if args.train and args.save_checkpoints: self.model_dir = make_save_dir( os.path.join(args.model_dir, args.task, args.exp_name)) self._disable_comet = args.disable_comet self._model_type = args.model_type self._save_checkpoints = args.save_checkpoints ###### loading .... ###### print("====================") print("start to build model") print('====================') vocab_size = self.data_utils.vocab_size print("Vocab Size: %d" % (vocab_size)) self.model = self.make_model(src_vocab=vocab_size, tgt_vocab=vocab_size, config=self.config['model']) def make_model(self, src_vocab, tgt_vocab, config): "Helper: Construct a model from hyperparameters." if self._model_type == 'transformer': model = make_transformer_model(src_vocab, tgt_vocab, config) elif self._model_type == 'bert': tokenizer = BertTokenizer.from_pretrained('bert-base-chinese', padding_side='left') num_added_tokens = tokenizer.add_tokens(self.data_utils.all_tokens) print('We have added %d tokens to the bert tokenizer.' % num_added_tokens) self.data_utils.set_tokenizer(tokenizer) model = BERT(BertModel.from_pretrained('bert-base-chinese'), self.config['max_len'], config['d_bert'], self.data_utils.vocab_size) return model.cuda() def train(self): if not self._disable_comet: # logging COMET_PROJECT_NAME = 'weibo-stc' COMET_WORKSPACE = 'timchen0618' self.exp = Experiment( project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging='simple', auto_metric_logging=None, display_summary=False, ) self.exp.add_tag(self.args.task) if self.args.task != 'pure_seq2seq': if self.args.processed: self.exp.add_tag('processed') else: self.exp.add_tag('unprocessed') if self.args.sampler_label != 'none': self.exp.add_tag(self.args.sampler_label) if self._model_type == 'bert': self.exp.add_tag('BERT') self.exp.set_name(self.args.exp_name) self.exp.log_parameters(self.config) self.exp.log_parameters(self.config['model']) # if finetune, load pretrain if self.args.task == 'finetune': lr = 5e-7 state_dict = torch.load(self.args.load_model)['state_dict'] print('loading model from %s ...' % self.args.load_model) self.model.load_state_dict(state_dict) else: lr = self.config['lr_init'] if self.args.load_model is not None: state_dict = torch.load(self.args.load_model, map_location='cuda:%d' % self.args.gpuid)['state_dict'] print('loading model from %s ...' % self.args.load_model) self.model.load_state_dict(state_dict) if self.args.pretrain_embedding: self.model.load_embedding(self.args.pretrain_embedding) # Optimizer and some info for logging. if self.config['optimizer'] == 'adam': optim = torch.optim.Adam(self.model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9, weight_decay=0) elif self.config['optimizer'] == 'adamw': optim = torch.optim.AdamW(self.model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) else: raise NotImplementedError total_loss = [] p_gen_list = [] start = time.time() step = self.args.start_step print('starting from step %d' % step) for epoch in range(self.config['num_epoch']): self.model.train() train_data = self.data_utils.data_yielder(valid=False) for batch in train_data: # print('-'*30) # Whether do noam learning rate scheduling if self.config['noam_decay']: if step % 5 == 1: lr = self.config['lr'] * ( 1 / (self.config['model']['d_model']**0.5)) * min( (1 / (step)**0.5), (step) * (1 / (self.config['warmup_steps']**1.5))) if self.args.task == 'finetune': lr /= self.config['lr_decay'] for param_group in optim.param_groups: param_group['lr'] = lr tgt_mask = batch['tgt_mask'].long() y = batch['y'].long() if self._model_type == 'bert': inp = batch['src']['input_ids'].cuda() out = self.model.forward(inp) pred = tens2np(out.topk(1, dim=-1)[1].squeeze()) p_gen_list.append(0.0) else: tgt = batch['tgt'].long() src = batch['src'].long() src_mask = batch['src_mask'].long() # Forwarding (with mask or not) if self.config['pos_masking']: out, p_gen = self.model.forward_with_mask( src, tgt, src_mask, tgt_mask, batch['posmask']) elif self.args.task == 'joint_gen' and self.config[ 'greedy']: out = self.model.forward_with_ss( src, src_mask, tgt, self.config['max_decode_step'], self.data_utils.bos) # print('out', out.size()) p_gen = torch.zeros((1, 1)) else: out, p_gen = self.model.forward( src, tgt, src_mask, tgt_mask) # Info for printing pred = tens2np(out.topk(1, dim=-1)[1].squeeze()) p_gen = p_gen.mean() p_gen_list.append(p_gen.item()) loss = self.model.loss_compute(out, y, self.data_utils.pad) loss.backward() optim.step() optim.zero_grad() total_loss.append(tens2np(loss)) # print out info if step % self.config['print_every_step'] == 0: elapsed = time.time() - start print( "Epoch Step: %d Loss: %f P_gen:%f Time: %f Lr: %4.6f" % (step, np.mean(total_loss), sum(p_gen_list) / len(p_gen_list), elapsed, lr)) if self._model_type == 'bert': source_text = tens2np(inp.long()) target_text = tens2np(batch['y'].long()) elif self._model_type == 'transformer': source_text = tens2np(batch['src'].long()) target_text = tens2np(batch['tgt'].long()) print('src:', self.data_utils.id2sent(source_text[0])) print('tgt:', self.data_utils.id2sent(target_text[0])) print('pred:', self.data_utils.id2sent(pred[0])) # If using transformer, we want to see greedy decoding result if self._model_type == 'transformer': if self.config['pos_masking']: greedy_text = self.model.greedy_decode( src.long()[:1], src_mask[:1], self.config['max_len'], self.data_utils.bos, batch['posmask'][:1]) else: greedy_text = self.model.greedy_decode( src.long()[:1], src_mask[:1], self.config['max_len'], self.data_utils.bos) greedy_text = tens2np(greedy_text) print('pred_greedy:', self.data_utils.id2sent(greedy_text[0])) # logging statistics if not self._disable_comet: self.exp.log_metric('Train Loss', np.mean(total_loss), step=step) self.exp.log_metric('Lr', lr, step=step) print() start = time.time() total_loss = [] p_gen_list = [] # Do validation if step % self.config['valid_every_step'] == self.config[ 'valid_every_step'] - 1: self.validate(step) step += 1 @torch.no_grad() def validate(self, step): print('*********************************') print(' Validation ') print('*********************************') fw = open(self.args.w_valid_file, 'w') val_yielder = self.data_utils.data_yielder(valid=True) self.model.eval() total_loss = [] # Validate one batch, writing valid hypothesis to file for batch in val_yielder: if self._model_type == 'bert': inp = batch['src']['input_ids'].cuda() out = self.model.forward(inp) else: # model is transformer batch['src'] = batch['src'].long() batch['tgt'] = batch['tgt'].long() if self.config['pos_masking']: out, _ = self.model.forward_with_mask( batch['src'], batch['tgt'], batch['src_mask'], batch['tgt_mask'], batch['posmask']) else: out, _ = self.model.forward(batch['src'], batch['tgt'], batch['src_mask'], batch['tgt_mask']) loss = self.model.loss_compute(out, batch['y'].long(), self.data_utils.pad) total_loss.append(loss.item()) if self.config['pos_masking']: out = self.model.greedy_decode(batch['src'].long(), batch['src_mask'], self.config['max_len'], self.data_utils.bos, batch['posmask']) else: out = self.model.greedy_decode(batch['src'].long(), batch['src_mask'], self.config['max_len'], self.data_utils.bos) # Writing sentences to hypothesis file for l in out: sentence = self.data_utils.id2sent(l[1:], True) fw.write(sentence) fw.write("\n") fw.close() # Calculate BLEU score and log to comet if needed bleus = cal_bleu(self.args.w_valid_file, self.args.w_valid_tgt_file) if not self._disable_comet: self.exp.log_metric('BLEU-1', bleus[0], step=step) self.exp.log_metric('BLEU-2', bleus[1], step=step) self.exp.log_metric('BLEU-3', bleus[2], step=step) self.exp.log_metric('BLEU-4', bleus[3], step=step) self.exp.log_metric('Valid Loss', sum(total_loss) / len(total_loss), step=step) print('=============================================') print('Validation Result -> Loss : %6.6f' % (sum(total_loss) / len(total_loss))) print('=============================================') self.model.train() # Saving model checkpoints if self._save_checkpoints: print('saving!!!!') model_name = str(int( step / 1000)) + 'k_' + '%6.6f__%4.4f_%4.4f_' % ( sum(total_loss) / len(total_loss), bleus[0], bleus[3]) + 'model.pth' state = {'step': step, 'state_dict': self.model.state_dict()} torch.save(state, os.path.join(self.model_dir, model_name)) @torch.no_grad() def test(self): # Prepare model path = self.args.load_model state_dict = torch.load(path)['state_dict'] self.model.load_state_dict(state_dict) # file path for prediction pred_dir = make_save_dir(self.args.pred_dir) filename = self.args.filename outfile = open(os.path.join(pred_dir, self.args.task, filename), 'w') # Start decoding data_yielder = self.data_utils.data_yielder() total_loss = [] start = time.time() # If beam search, create sequence generator object self._beam_search = self.config['eval']['beam_size'] > 1 # self._beam_search = True if self._beam_search: seq_gen = SequenceGenerator( self.model, self.data_utils, beam_size=self.config['eval']['beam_size'], no_repeat_ngram_size=self.config['eval']['block_ngram']) self.model.eval() step = 0 # Run one batch for batch in data_yielder: step += 1 if step % 10 == 1: print('Step ', step) # Decoding according to scheme if self._beam_search: out = seq_gen.generate(batch, pos_masking=self.config['pos_masking'], bos_token=self.data_utils.bos) else: max_length = self.config['max_len'] if self.config['pos_masking']: out = self.model.greedy_decode(batch['src'].long(), batch['src_mask'], max_length, self.data_utils.bos, batch['posmask']) else: if self.args.task == 'joint_gen': max_length = self.config['max_decode_step'] out = self.model.greedy_decode(batch['src'].long(), batch['src_mask'], max_length, self.data_utils.bos) # Write sentences to file for l in out: if self._beam_search: sentence = self.data_utils.id2sent(l[0]['tokens'][:-1], True) else: sentence = self.data_utils.id2sent(l[1:], True) outfile.write(sentence) outfile.write("\n") outfile.close()
def train(args): graph_file = '/Users/bhagya/PycharmProjects/Old data/line-master data/%s/%s.npz' % (args.name, args.name) graph_file = graph_file.replace('.npz', '_train.npz') if not args.is_all else graph_file data_loader = DataUtils(graph_file, args.is_all) suffix = args.proximity args.X = data_loader.X if args.suf != 'oh' else sp.identity(data_loader.X.shape[0]) if not args.is_all: args.val_edges = data_loader.val_edges args.val_ground_truth = data_loader.val_ground_truth m = args.model name = m + '_' + args.name if m == 'lace': model = LACE(args) elif 'glace' == m: model = GLACE(args) with tf.Session() as sess: print('-------------------------- ' + m + ' --------------------------') if model.val_set: print('batches\tloss\tval_auc\tval_ap\tsampling time\ttraining_time\tdatetime') else: print('batches\tloss\tsampling time\ttraining_time\tdatetime') tf.global_variables_initializer().run() sampling_time, training_time = 0, 0 for b in range(args.num_batches): t1 = time.time() u_i, u_j, label, w = data_loader.fetch_next_batch(batch_size=args.batch_size, K=args.K) feed_dict = {model.u_i: u_i, model.u_j: u_j, model.label: label} t2 = time.time() sampling_time += t2 - t1 loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict) training_time += time.time() - t2 if model.val_set: if b % 50 == 0: val_energy = sess.run(model.neg_val_energy) val_auc, val_ap = score_link_prediction(data_loader.val_ground_truth, val_energy) print('%d\t%f\t%f\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, val_auc, val_ap, sampling_time, training_time, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) sampling_time, training_time = 0, 0 else: if b % 50 == 0: print('%d\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, sampling_time, training_time, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) sampling_time, training_time = 0, 0 if b % 50 == 0 or b == (args.num_batches - 1): if m == 'glace': mu, sigma = sess.run([model.embedding, model.sigma]) pickle.dump({'mu': data_loader.embedding_mapping(mu), 'sigma': data_loader.embedding_mapping(sigma)}, open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb')) # if model.val_set: # r = kl_link_pred(mu, sigma, test_edges) # print('{:.4f}, {:.4f}'.format(r[0], r[1])) else: embedding = sess.run(model.embedding) pickle.dump(data_loader.embedding_mapping(embedding), open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
from algorithms import * from utils import DataUtils if __name__ == '__main__': logging.basicConfig(level=logging.INFO) data = DataUtils('data/基站经纬度.csv', 'data/上网信息输出表(日表)6月15号之后.csv') mip_placer = MIPServerPlacer(data.base_stations, data.distances) mip_placer.place_server(10, 2) print(mip_placer.objective_latency(), mip_placer.objective_workload()) # kmeans_placer = KMeansServerPlacement(data.base_stations, data.distances) # kmeans_placer.place_server(300, 30) # print(kmeans_placer.objective_latency(), kmeans_placer.objective_workload()) # top_k_placer = TopKServerPlacement(data.base_stations, data.distances) # top_k_placer.place_server(300, 30) # print(top_k_placer.objective_latency(), top_k_placer.objective_workload()) # random_placer = RandomServerPlacement(data.base_stations, data.distances) # random_placer.place_server(300, 30) # print(random_placer.objective_latency(), random_placer.objective_workload()) pass
def load_data(self): du = DataUtils(self.cfg) self.train = du.train self.dev = du.dev
def generateManage(path: str, filters, apiItem): fileName = path + os.sep + 'Manage.tsx' contentTpl = FileTools.readFile(tplPaths['manage']) # 寻找filter中时间组索引 # 转换成字符组 strList = arrays.mapcat(filters, lambda item: item['key']) # 获取相似indexsGroup indexs = DataUtils.enumSimilarityGroup(strList) # 遍历group,查看是不是时间字段,如果是则将indexGroup存下来 [[1,2],[4,5]] tempList = [] for index in range(len(indexs)): key = strList[indexs[index][0]].lower() if 'time' in key or 'date' in key: tempList.append(indexs[index]) keyMaps = [] # 生成相似的索引组与实际formkey关联结构 # 建立formKey与filterKey的对应关系 keyMaps = DataUtils.buildMaps(tempList, filters) # 循环构建替换代码 timeTpl = '' for formKey in keyMaps: tempKey = keyMaps[formKey][0].lower() if 'to' in tempKey or 'end' in tempKey: timeTpl += 'filterDump.' + keyMaps[formKey][ 1] + '= getValue(filterDump,\'' + formKey + '.startTime\',undefined);' timeTpl += 'filterDump.' + keyMaps[formKey][0] + '= getValue(filterDump,\'' + formKey + '.endTime\',undefined);' else: timeTpl += 'filterDump.' + keyMaps[formKey][ 0] + '= getValue(filterDump,\'' + formKey + '.startTime\',undefined);' timeTpl += 'filterDump.' + keyMaps[formKey][1] + '= getValue(filterDump,\'' + formKey + '.endTime\',undefined);' timeTpl += 'delete filterDump.' + formKey + ';' print(timeTpl) # filter.contractStartDate = filter.signDate && filter.signDate.startTime; # filter.contractEndDate = filter.signDate && filter.signDate.endTime; # 执行替换 # 寻找分页参数 # 执行替换 # # Manage 筛选条件部分替换 # REPLACE_MANAGE_FILTER = '##REPLACE_MANAGE_FILTER##' # # Manage api导出名称 # REPLACE_MANAGE_API = '##REPLACE_MANAGE_API##' # # Manage api 方法名称 # REPLACE_MANAGE_API_METHOD = '##REPLACE_MANAGE_API_METHOD##' # # 分页参数 - 页 no # MANAGE_PAGE_NO = '##MANAGE_PAGE_NO##' # # 分页参数 - 页 size # MANAGE_PAGE_SIZE = '##MANAGE_PAGE_SIZE##' contentTpl = DataUtils.replaceManageTpl(contentTpl, filter, apiItem, timeTpl) FileTools.writeFile(fileName, contentTpl) contentTpl = PrettierTools.format(fileName) FileTools.writeFile(fileName, contentTpl)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--csv_in', required=True, help='CSV input filename') parser.add_argument('--csv_out_base', required=True, help='CSV output filename--do NOT confuse with csv_in') parser.add_argument('--video_in', required=True, help='Video input filename') parser.add_argument( '--num_frames', type=int, default=1000, help='Number of frames to use to form training and test set. \ Default: 1000') parser.add_argument( '--frame_delay', type=int, default=15, help='Delta between current frame and previous frame to compare \ against. Must be greater than 0. Default: 15') parser.add_argument('--object', required=True, help='Object to detect.') parser.add_argument( '--scale', type=float, default=0.1, help='Scale factor applied to each frame. Default: 0.1') parser.add_argument( '--features', default='hog', help='Type of features: HOG (hog), SIFT (sift), Color Histogram (ch), \ or raw images (raw). Multiple values must be separated by comma (e.g., hog,ch). Default: hog' ) args = parser.parse_args() csv_out_base = args.csv_out_base video_in = args.video_in csv_in = args.csv_in if args.frame_delay <= 0: import sys print '--frame_delay must be greater than 0' sys.exit(1) print args features_to_try = args.features.strip().split(',') args_dict = args.__dict__ del (args_dict['features']) del (args_dict['csv_out_base']) del (args_dict['video_in']) del (args_dict['csv_in']) init_header, init_row = zip(*sorted(list(args_dict.iteritems()))) init_header, init_row = list(init_header), list(init_row) print 'Retrieving %d frames from %s' % (args.num_frames, video_in) video_frames = VideoUtils.get_all_frames(args.num_frames, video_in, scale=args.scale, interval=1) print 'Retrieving %d labels from %s' % (args.num_frames, csv_in) # 0 represents no difference between 2 frames, 1 represents difference Y_truth = DataUtils.get_differences(csv_in, args.object, limit=args.num_frames, interval=1, delay=args.frame_delay) header = init_header + [ 'feature', 'distance metric', 'threshold', 'filtration', 'true positive ratio' ] rows = [] for feature_type in features_to_try: row_with_feat = init_row[:] row_with_feat.append(feature_type) print feature_type feature_fn, get_distance_fn, dist_metrics_to_try = get_feature_and_dist_fns( feature_type) features = get_features(feature_fn, video_frames) for dist_metric in dist_metrics_to_try: recorder = StatsUtils.OutputRecorder( '%s_%s_%s.csv' % (csv_out_base, feature_type, dist_metric)) row = row_with_feat[:] row.append(dist_metric) print dist_metric dists = get_distances(get_distance_fn(dist_metric), features, args.frame_delay) prev_thresh = None prev_metrics = None best_Y_preds = None thresholds_to_try = np.linspace(np.min(dists), np.max(dists), 250) for thresh in thresholds_to_try[1:]: Y_preds = dists > thresh metrics = evaluate_model(Y_preds, Y_truth) if metrics['false negative ratio'] > 0.01: break prev_metrics = metrics prev_thresh = thresh best_Y_preds = Y_preds if not prev_metrics: prev_thresh = 0.0 prev_metrics = {'filtration': 0.0, 'true positive ratio': 0.0} print prev_thresh, prev_metrics['filtration'], prev_metrics[ 'true positive ratio'] _row = row[:] _row.append(prev_thresh) for key in ['filtration', 'true positive ratio']: val = prev_metrics[key] _row.append(val) rows.append(_row) for i in xrange(args.frame_delay): recorder.add_row(False, args.object) if best_Y_preds is not None: for pred in best_Y_preds: recorder.add_row(pred, args.object) recorder.output_csv() StatsUtils.output_csv(csv_out_base + '_summary.csv', np.array(rows), np.array(header))