def __init__(self, hparams, train_size: int, class_weight: Optional[Tensor] = None): # model, criterion self.model = VAE() # optimizer and scheduler self.optimizer = torch.optim.Adam(self.model.parameters(), lr=hparams.learning_rate, eps=hparams.eps, weight_decay=hparams.weight_decay) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, **hparams.scheduler) self.bce = nn.BCEWithLogitsLoss(reduction='none') # self.kld = nn.KLDivLoss(reduction='sum') # device device_for_summary = self.__init_device(hparams.device, hparams.out_device) # summary self.writer = SummaryWriter(logdir=hparams.logdir) # TODO: fill in ~~DUMMY~~INPUT~~SIZE~~ path_summary = Path(self.writer.logdir, 'summary.txt') if not path_summary.exists(): print_to_file(path_summary, summary, (self.model, (40, 11)), dict(device=device_for_summary)) # save hyperparameters path_hparam = Path(self.writer.logdir, 'hparams.txt') if not path_hparam.exists(): print_to_file(path_hparam, hparams.print_params)
def __init__(self, hp): # TODO: model, criterion self.loss = nn.L1Loss(reduction='none') self.model = waveunet_residual.WaveUNet( ch_double=hp.model['ch_double']) # optimizer and scheduler self.optimizer = AdamW( self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) # device device_for_summary = self._init_device(hp.device, hp.out_device) # summary self.writer = SummaryWriter(log_dir=hp.logdir) path_summary = Path(self.writer.log_dir, 'summary.txt') if not path_summary.exists(): print_to_file(path_summary, summary, (self.model, hp.dummy_input), dict(device=device_for_summary)) # save hyper-parameters path_hp = Path(self.writer.log_dir, 'hp.txt') if not path_hp.exists(): print_to_file(path_hp, hp.print_params) # evaluation metric self.metric = ['SegSNR', 'fwSegSNR', 'PESQ', 'STOI']
def post_enewstree_forum(post_url, src): """ 消息树发主贴函数 @param post_url: 板块地址 如:http://enewstree.com/discuz/forum.php?mod=forumdisplay&fid=47 @type post_url: str @param src: 用户名,密码,标题,主帖内容,等等。 @type src: dict @return: 是否发帖成功,帖子URL @rtype: bool,str """ logger = utils.RAPLogger(post_url) sess = utils.RAPSession(src) # Step 1: 登录 if not login_enewstree(post_url, sess, src): logger.error(' Login Error') return ('', str(logger)) logger.info(' Login OK') fid = re.findall(r'fid=(\d*)', post_url)[0] resp = sess.get('http://enewstree.com/discuz/forum.php?mod=post&action=newthread&fid='+fid) formhash = re.findall(r'formhash=(.*?)"', resp.content)[0] logger.info('formhash:'+ formhash) payload = { 'formhash':formhash, 'posttime':int(time.time()), 'wysiwyg':'1', 'subject':src['subject'].decode('utf8').encode(CHARSET,'ignore'), 'message':src['content'].decode('utf8').encode(CHARSET,'ignore'), 'allownoticeauthor':'1', 'usesig':'1', 'secqaahash':'qSxqZkkW', 'secanswer':'1776', 'seccodehash':'cSxqZkkW', 'seccodemodid':'forum::post', 'seccodeverify':'ec48', 'save':'', 'uploadalbum':'-2', 'newalbum':'', } # 发送登录post包 resp = sess.post('http://enewstree.com/discuz/forum.php?mod=post&action=newthread&fid='+fid +'&extra=&topicsubmit=yes', data=payload, headers = { 'Referer':post_url }) utils.print_to_file(resp.content) # 若指定字样出现在response中,表示回复成功 if src['subject'].decode('utf8') not in resp.content.decode(CHARSET): logger.error(' Post Error') return ('', str(logger)) logger.info(' Post OK') url = resp.url print url return (url, str(logger))
def main(test_epoch: int): train_loader, valid_loader, test_loader = data_manager.get_dataloader(hp) if test_epoch == -1: runner = Runner(hp) # TODO: add all the evaluation metrics dict_loss = dict(loss=['Multiline', ['loss/train', 'loss/valid']]) dict_eval = dict( PESQ=['Multiline', ['eval/PESQ_out', 'eval/PESQ_x']], STOI=['Multiline', ['eval/STOI_out', 'eval/STOI_x']], SegSNR=['Multiline', ['eval/SegSNR_out', 'eval/SegSNR_x']], fwSegSNR=['Multiline', ['eval/fwSegSNR_out', 'eval/fwSegSNR_x']]) runner.writer.add_custom_scalars(dict(train=dict_loss, valid=dict_eval)) epoch = 0 test_epoch_or_zero = 0 print(f'Training on {runner.str_device}') for epoch in range(hp.num_epochs): # training train_loss, train_eval = runner.run(train_loader, 'train', epoch) if train_loss is not None: if torch.isfinite(torch.tensor(train_loss)): runner.writer.add_scalar('loss/train', train_loss, epoch) # checkpoint save torch.save(runner.model.module.state_dict(), Path(runner.writer.log_dir, f'{epoch}.pt')) # validation valid_loss, valid_eval = runner.run(valid_loader, 'valid', epoch) if valid_loss is not None: if torch.isfinite(torch.tensor(valid_loss)): runner.writer.add_scalar('loss/valid', valid_loss, epoch) print('Training Finished') test_epoch = test_epoch_or_zero if test_epoch_or_zero > 0 else epoch else: runner = Runner(hp) # test test_eval_outs, test_eval_xs = runner.test(test_loader, test_epoch) # TODO: write test result str_metric = ['SegSNR', 'fwSegSNR', 'PESQ', 'STOI'] print_eval_outs, print_eval_xs = dict(), dict() for k, eval_out, eval_x in zip(str_metric, test_eval_outs, test_eval_xs): print_eval_outs[k] = eval_out print_eval_xs[k] = eval_x print(f'Test - Input Eval: {print_eval_xs}') print(f'Test - Out Eval: {print_eval_outs}') path_eval = Path(hp.logdir, f'test_{test_epoch}', 'test_eval.txt') if not path_eval.exists(): print_to_file(path_eval, print_eval, (print_eval_xs, print_eval_outs)) runner.writer.close()
def e_step(): # E-step # h_ik = P(c_i|s_k)/SUM(P(c_i|s_k) h_ik.clear() print("E-step...") for i, context in enumerate(P_cs): temp = {} for sense in context: temp[sense] = P_cs[i][sense]/P_c[i] h_ik.append(temp) utils.print_to_file('h_ik.json', h_ik)
def select_k_best(X, y, k): features = list(X.columns) if len(features) <= k: return [(feature, 0) for feature in features] clf = SelectKBest(chi2, k=k) clf.fit(X, y) features_mask = clf.get_support() new_features = [a for a, b in zip(features, features_mask) if b] print_to_file('List of {} features selected by select k best'.format(k), 'select_k_best.txt') for feat in new_features: print_to_file(feat, 'select_k_best.txt') return new_features
def __init__(self, hparams, train_size: int, class_weight: Optional[Tensor] = None): # model, criterion, and prediction self.model = UNet(ch_in=2, ch_out=1, **hparams.model) self.sigmoid = torch.nn.Sigmoid() self.criterion = torch.nn.BCEWithLogitsLoss(reduction='none') self.class_weight = class_weight # for prediction self.frame2time = hparams.hop_size / hparams.sample_rate self.T_6s = round(6 / self.frame2time) - 1 self.T_12s = round(12 / self.frame2time) - 1 self.metrics = ('precision', 'recall', 'F1') # optimizer and scheduler self.optimizer = AdamW( self.model.parameters(), lr=hparams.learning_rate, weight_decay=hparams.weight_decay, ) self.scheduler = CosineLRWithRestarts(self.optimizer, batch_size=hparams.batch_size, epoch_size=train_size, **hparams.scheduler) self.scheduler.step() self.f1_last_restart = -1 # device device_for_summary = self._init_device(hparams.device, hparams.out_device) # summary self.writer = SummaryWriter(logdir=hparams.logdir) path_summary = Path(self.writer.logdir, 'summary.txt') if not path_summary.exists(): print_to_file(path_summary, summary, (self.model, (2, 128, 16 * hparams.model['stride'][1]**4)), dict(device=device_for_summary)) # save hyperparameters path_hparam = Path(self.writer.logdir, 'hparams.txt') if not path_hparam.exists(): with path_hparam.open('w') as f: for var in vars(hparams): value = getattr(hparams, var) print(f'{var}: {value}', file=f)
def __init__(self, path_state_dict=''): self.model_name = hp.model_name module = eval(hp.model_name) self.model = module(**getattr(hp, hp.model_name)) self.criterion = nn.MSELoss(reduction='none') self.optimizer = AdamW(self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) self.__init_device(hp.device, hp.out_device) self.scheduler: Optional[CosineLRWithRestarts] = None self.max_epochs = hp.n_epochs self.loss_last_restart = float('inf') self.writer: Optional[CustomWriter] = None self.valid_eval_sample: Dict[str, Any] = dict() # Load State Dict if path_state_dict: st_model, st_optim = torch.load(path_state_dict, map_location=self.in_device) try: if hasattr(self.model, 'module'): self.model.module.load_state_dict(st_model) else: self.model.load_state_dict(st_model) self.optimizer.load_state_dict(st_optim) except: raise Exception('The model is different from the state dict.') path_summary = hp.logdir / 'summary.txt' if not path_summary.exists(): print_to_file( path_summary, summary, (self.model, hp.dummy_input_size), dict(device=self.str_device[:4]) ) # dd.io.save((hp.logdir / hp.hparams_fname).with_suffix('.h5'), asdict(hp)) with (hp.logdir / 'hparams.txt').open('w') as f: f.write(repr(hp))
def compute_likelihood(): # P(c_i|s_k) = PROD_{f_j in c_i) (P(f_j|s_k)) P_cs.clear() P_c.clear() for text in corpus: for i, sentence in enumerate(text): temp = {} for word in filter(lambda w: w['sense'] != '', sentence): try: for possible_sense in possible_senses[word['lemma']]: # print("- Possible sense", possible_sense) product = 1 for feature_in_sentence in filter(lambda w: w['sense'] != '' and w['lemma'] != word['lemma'], sentence): # print("--", feature_in_sentence['lemma']) found = False for feature_in_senses in P_fs[possible_sense]: if feature_in_sentence['lemma'] == feature_in_senses: found = True # print("---", feature_in_senses, P_fs[possible_sense][feature_in_senses]) product *= P_fs[possible_sense][feature_in_senses] break if not found: product = 0 # print("P(c_" + str(i), "|", possible_sense, ") =", product) temp[possible_sense] = product except KeyError: print('KeyError: ' + word['lemma']) P_cs.append(temp) utils.print_to_file('P_cs.json', P_cs) # P(c_i) = SUM P(c_i|s_k)P(s_k) for context in P_cs: sum = 0 for sense in context: sum += context[sense] * P_s[sense] P_c.append(sum) utils.print_to_file('P_c.json', P_c) # l(C) -> Likelihood of the corpus prod_contexts = np.prod(P_c) likelihood = log(prod_contexts) print("Likelihood:", likelihood) return likelihood
def __init__(self, path_state_dict=''): module = eval(hp.model_name) self.model = module(**getattr(hp, hp.model_name)) if isinstance(hp.criterion_names, str): criterion_names = (hp.criterion_names, ) else: criterion_names = hp.criterion_names self.criterions = [ eval(f'nn.{name}')(reduction='sum') for name in criterion_names ] self.__init_device(hp.device, hp.out_device) self.writer: CustomWriter = None self.optimizer = optim.Adam( self.model.parameters(), lr=hp.learning_rate, weight_decay=hp.weight_decay, ) # Load State Dict if path_state_dict: st_model, st_optim = torch.load(path_state_dict, self.in_device) try: if isinstance(self.model, nn.DataParallel): self.model.module.load_state_dict(st_model) else: self.model.load_state_dict(st_model) self.optimizer.load_state_dict(st_optim) except RuntimeError: raise Exception('The model is different from the state dict.') path_summary = hp.logdir / 'summary.txt' if not path_summary.exists(): print_to_file(path_summary, summary, (self.model, hp.dummy_input_size), dict(device=self.str_device[:4])) with (hp.logdir / 'hparams.txt').open('w') as f: f.write(repr(hp))
def mutal_information_filter(data, feature_list, features_map): print_to_file(f' Computing Mutual Information Scores', 'mutual.txt') scores = np.zeros((len(feature_list), len(feature_list))) print_to_file(',{}'.format(','.join(feature_list)), 'mutual.txt') for i in range(len(feature_list)): for j in range(i): feature1_type = features_map[feature_list[i]].get_type() feature2_type = features_map[feature_list[j]].get_type() if not feature1_type == feature2_type: continue if feature1_type == FeatureType.CONTINUOUS: scores[i][j] = mutual_info_regression() scores[i][j] = normalized_mutual_info_score( data[feature], data[feature2]) if i != j else 0 print_to_file( '{},{}'.format(feature_list[i], ','.join([str(x) for x in scores[i]])), 'mutual.txt') #print_to_file(f' final scores:\n{scores}\n end', 'mutual.txt') over_threshold = [ sum([1 for i in j if i > FILTER_THRESHOLD]) for j in scores ] while any(i > 0 for i in over_threshold): max_index = over_threshold.index(max(over_threshold)) print_to_file( f' Deleting feature number {max_index} which is {feature_list[max_index]}', 'mutual.txt') scores = np.delete(scores, max_index, 0) scores = np.delete(scores, max_index, 1) data.drop(feature_list[max_index], axis=1) features_map.pop(feature_list[max_index]) feature_list.pop(max_index) over_threshold = [ sum(1 for i in j if i > FILTER_THRESHOLD) for j in scores ] print_to_file( f' Feature list after filter {feature_list} for total of {len(feature_list)} features', 'mutual.txt') return data, feature_list, features_map
def sfs(X, y, features, feature_map, clf, outfile, eps=1e-8): selected_features = [] selected_features_extended = [] scores = [] score_diff = 1 last_score = 0 epoch = 0 print_to_file('Sequential forward selection search:', outfile) candidates = features.copy() while score_diff > eps: best_feature = None forward_best_score = 0 epoch += 1 # Forward search for feature in candidates: temp_features = selected_features_extended.copy() if isinstance(feature_map[feature], CategoricalFeature): temp_features.extend(feature_map[feature].sub_features) else: temp_features.append(feature) test_data = X[temp_features] score = cross_val_score(estimator=clf, X=test_data, y=y, cv=5).mean() if score > forward_best_score: forward_best_score = score best_feature = feature score_diff = forward_best_score - last_score if score_diff < eps: break last_score = forward_best_score selected_features.append(best_feature) scores.append(last_score) candidates.remove(best_feature) if isinstance(feature_map[best_feature], CategoricalFeature): selected_features_extended.extend( feature_map[best_feature].sub_features) else: selected_features_extended.append(best_feature) print(f'{epoch},', end='') ret_val = [(selected_features[i], scores[i]) for i in range(len(selected_features))] ret_val.sort(key=lambda x: x[1], reverse=True) print_to_file('Features and scores for Sequential forward search:', outfile) print_to_file('Feature,Score', outfile) for feature in ret_val: print_to_file('{},{}'.format(feature[0], feature[1]), outfile) return selected_features
def m_step(): # M-step print("M-step...") Z = {} for sense in P_fs: for feature in P_fs[sense]: sum = 0 Z[feature] = 0 for text in corpus: for i, sentence in enumerate(text): try: if feature in [x['lemma'] for x in sentence]: sum += h_ik[i][sense] except KeyError: # TODO: Too much keyerrors, probably there is a bug pass # print('KeyError:', feature) # print(feature, sum) Z[feature] += sum P_fs[sense][feature] = sum / Z[feature] utils.print_to_file('P_fs.json', P_fs) total = 0 for sense in P_fs: for i, context in enumerate(h_ik): try: total += h_ik[i][sense] except KeyError: pass for sense in P_fs: sum = 0 for i, context in enumerate(h_ik): try: sum += h_ik[i][sense] except KeyError: pass P_s[sense] = sum / total utils.print_to_file('P_s.json', P_s)
def explore_repository(repo_name, tree_str='', files_dict=None, depth=0): """ Método principal da aplicação Percorre recursivamente o repositório """ repository_content = pull_folder_content(repo_name) if (repository_content): if depth == 0: print('[+] Scraping no repositório ' + repo_name + ' iniciado...') if files_dict is None: files_dict = {} folders, files = extract_hrefs(repository_content) for f in folders: tree_str += generate_str_with_spaces(depth, get_folder_or_file_name(f), is_folder=True) tree_str, files_dict = explore_repository(f, tree_str=tree_str, files_dict=files_dict, depth=depth + 1) for f in files: filename, lines, bytes_, extension = explore_file(f) files_dict = include_extension_in_files_dict(files_dict, lines=lines, bytes_=bytes_, extension=extension) tree_str += generate_str_with_spaces(depth, filename, is_folder=False, loc=lines) if depth == 0: print_to_file(repo_name, tree_str, files_dict) print('[+] Scraping no repositório ' + repo_name + ' finalizado!') return tree_str, files_dict else: return
def classify_reviews(self, folderpath): negc = 0 posc = 0 undc = 0 total = 0 filenames = glob.glob(folderpath) totalnumfiles = len(filenames) print("Testing data set") print("Number of files:{0}".format(totalnumfiles)) if totalnumfiles == 0: print("No files to classify, exiting...") return for f in filenames: with open(f, 'r') as infile: clsfy = self.classify(infile) total += clsfy if clsfy > 0: posc += 1 elif clsfy < 0: negc += 1 elif clsfy == 0: undc += 1 results = """ Results... Average classification: {0} Percentage of positve classifications: %{1} Percentage of negative classifications: %{2} Percentage of undertermined classifications: %{3} """.format(total/totalnumfiles,utils.percentage(posc, len(filenames)),utils.percentage(negc, len(filenames)),utils.percentage(undc, len(filenames))) print results utils.print_to_file("testResults.txt", "\nFolderpath: {0}\nNumber of Files: {1}\n{2}".format(folderpath,totalnumfiles,results))
def iterative_k_best(data, clf, eps=1e-8): X, y = data_get_label(data) k = 1 last_score = 0 new_score = eps * 2 res_for_k = [] while new_score - last_score > eps: features = select_k_best(X, y, k) new_score = cross_val_score(estimator=clf, X=X, y=y, cv=5).mean() res_for_k.append((new_score, features)) k += 1 print_to_file('Feature sets and scores for iterative best k features:', 'itk.csv') print_to_file('K,Score,Features', 'itk.csv') for i in range(len(res_for_k)): print_to_file( '{},{},{}'.format(i + 1, res_for_k[i][0], ','.join(res_for_k[i][1])), 'itk.csv')
def new_mutual_info_filter(data, feature_list, features_map): features_num = len(feature_list) scores = np.zeros((features_num, features_num)) print_to_file(f' Computing Mutual Information Scores', 'mutual.txt') print_to_file(',{}'.format(','.join(feature_list)), 'mutual.txt') for i, target in enumerate(feature_list): temp_list = feature_list.copy() temp_list.remove(target) X = data[temp_list] discrete_features = [ features_map[feature].get_type() != FeatureType.CONTINUOUS for feature in temp_list ] f_scores = None if features_map[target].get_type() == FeatureType.CONTINUOUS: f_scores = mutual_info_regression( X, data[target], discrete_features=discrete_features) else: f_scores = mutual_info_classif(X, data[target].astype(int), discrete_features=discrete_features) for j in range(i): scores[i][j] = f_scores[j] for j in range(i + 1, features_num): scores[i][j] = f_scores[j - 1] print_to_file( '{},{}'.format(feature_list[i], ','.join([str(x) for x in scores[i]])), 'mutual.txt') print_to_file( 'overall there are {} pairs of strongly corelated features'.format( len(scores[scores > 3]) / 2), 'mutual.txt') indices = np.nonzero(scores > 3) indices_fix = [] for i in range(len(indices[0])): indices_fix.append( (min(indices[0][i], indices[1][i]), max(indices[0][i], indices[1][i]))) indices_fix = list(set(indices_fix)) for ind in indices_fix: print_to_file( 'Features {} and {} with score {}'.format(feature_list[ind[0]], feature_list[ind[1]], scores[ind[0]][ind[1]]), 'mutual.txt') print_to_file('Dropping feature {}'.format(feature_list[ind[1]]), 'mutual.txt') feature_list.remove(feature_list[ind[1]]) return scores, feature_list
answers = 0 correct_answers = 0 allwords = utils.read_from_file('allwords.json') allwords_with_sensekey = utils.read_from_file('allwords_with_sensekey.json') possible_senses = utils.read_from_file('possibile_senses.json') corpus = utils.read_from_file('corpus.json') print("Counting occurrencies...") # Count all occurrencies of a word in the corpus # count(w_j) count_w = list(map(lambda w: (w, 1), allwords)) count_w = list(reduce.reduceByKey(lambda x, y: x + y, count_w)) count_w.sort(key=lambda x: x[0], reverse=True) utils.print_to_file('count_w.json', count_w) # Count all occurrencies of a (word, sense) in the corpus # count(s_i, w_j) count_ws = list(map(lambda w: (w, 1), filter(lambda w: w[1] != '', utils.matrix_to_array(allwords_with_sensekey)))) count_ws = list(reduce.reduceByKey(lambda x, y: x + y, count_ws)) count_ws.sort(key=lambda x: x[1], reverse=True) utils.print_to_file('count_sw.json', count_ws) # Count all occurrencies of a sense in the corpus # count(s_i) count_s = list(map(lambda w: (w[1], 1), filter(lambda w: w[1] != '', utils.matrix_to_array(allwords_with_sensekey)))) count_s = list(reduce.reduceByKey(lambda x, y: x + y, count_s)) count_s.sort(key=lambda x: x[0], reverse=True) utils.print_to_file('count_s.json', count_s)
suffix='Complete', length=50) allwords_with_sensekey_formatted = list( map( lambda w: { "word": w[0], "sense": w[1] }, filter(lambda w: w[1] != '', utils.matrix_to_array(allwords_with_sensekey)))) corpus.append([[{ "lemma": y[0], "sense": y[1] } for y in x] for x in allwords_with_sensekey]) for x in allwords_with_sensekey_formatted: try: possible_senses[x['word']].append(x['sense']) except KeyError: possible_senses[x['word']] = [x['sense']] possible_senses_copy = possible_senses.copy() for key in possible_senses_copy.keys(): possible_senses[key] = list(set(possible_senses[key])) utils.print_to_file('allwords.json', allwords) utils.print_to_file('allwords_with_sensekey.json', allwords_with_sensekey) utils.print_to_file('possibile_senses.json', possible_senses) utils.print_to_file('corpus.json', corpus)