def build_system1(texts_all,n=2): # ===================== create the model ======================= tfidftransformer=TfidfVectorizer(ngram_range=(1,n))#,max_df=0.6) texts_all_tf=tfidftransformer.fit_transform(texts_all) vocab_map = {v: k for k, v in tfidftransformer.vocabulary_.iteritems()} master_phrases_vectors=[] for text_tf,text in zip(texts_all_tf,texts_all): text_tf= text_tf.todense() phrases=use.noun_tokenize(text) phrases=list(set(phrases)) phrases_vectors=[list(tfidftransformer.transform([x])[0].indices) for x in phrases] phrases_dict={} for x,phrase in zip(phrases_vectors,phrases): x=np.array(text_tf).flatten()[x] avg=np.mean(x) phrases_dict[phrase]=avg # a list of dictionaries with {phase:score} master_phrases_vectors.append(phrases_dict) # ===================== save the model ================= use.savemodel(master_phrases_vectors,'master_phrases_vectors_1') use.savemodel(texts_all_tf,'texts_all_tf_1') use.savemodel(tfidftransformer,'tfidftransformer_1')
min_loss = float('Inf') for epoch in range(1, n_epochs + 1): model.train() tr_loss, tr_acc = fit(model, train_dl, criteria, optimizer, device) model.eval() val_loss, val_acc = valid(model, valid_dl, criteria, optimizer, device) test_loss, test_acc = valid(model, test_dl, criteria, optimizer, device) log_value('Loss/train', tr_loss, epoch) log_value('Accuracy/train', tr_acc, epoch) log_value('Loss/valid', val_loss, epoch) log_value('Accuracy/valid', val_acc, epoch) log_value('Loss/test', test_loss, epoch) log_value('Accuracy/test', test_acc, epoch) if val_loss < min_loss: savemodel(model, dir='siamese', device=device) min_loss = val_loss if epoch % id == 0: print( "epoch {} tr_loss {:.3f} acc {:.3f} valid_loss {:.3f} acc {:.3f} test_loss {:.3f} acc {:.3f}" .format(epoch, tr_loss, tr_acc, val_loss, val_acc, test_loss, test_acc)) else: model = loadmodel(dir='siamese_lstm') print("loaded model\n evaluating....") test_loss, test_acc = valid(model, test_dl, criteria, optimizer, device) print("test_loss {:.3f} acc {:.3f}".format(test_loss, test_acc))
scheduler.step(valid_acc) # save model state = { 'epoch': epoch, # last epoch 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), } suffix = trial if pruned: state['cfg'] = pruned_pkl['cfg'] suffix += '_' + args.pruned.split('_')[-1][:-4] savemodel(state, is_best, checkpoint_freq, suffix, False) # print result if (epoch + 1) % 1 == 0: print( 'Epoch:{}/{}\nAccuracy(Train/Valid):{:.02f}/{:.02f}% Loss(Train/Valid):{:.3f}/{:.3f}' .format(epoch, start_epoch + EPOCHS - 1, train_acc, valid_acc, train_loss, valid_loss)) # early stopping if early_stop and train_acc > 99.99: print('Early stop beacause train accuracy > 99.9.') break end_training = time.time()
# save model state = { 'epoch': epoch, # last epoch 'state_dict': net.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() } state.update(para) suffix = para['trial'] # save pruned structure if para['pruned']: state['cfg'] = pruned_pkl['cfg'] suffix += '_' + args.pruned.split('_')[-1][:-4] save = savemodel(state, is_best, para['checkpoint_freq'], suffix, False) if save: log.log(save) # print result if (epoch + 1) % 1 == 0: log.log( 'Epoch:{}/{}\nAccuracy(Train/Valid):{:.02f}/{:.02f}% Loss(Train/Valid):{:.3f}/{:.3f}' .format(epoch, start_epoch + para['epochs'] - 1, train_acc, valid_acc, train_loss, valid_loss)) # early stopping if para['early_stop'] and valid_acc > 99.5: log.log('Early stop beacause valid accuracy > 99.5.') break
print("Training....") for epoch in range(1, n_epochs + 1): model.train() tr_loss, tr_acc = fit(model, train_dl, criteria, optimizer, device) model.eval() val_loss, val_acc = valid(model, valid_dl, criteria, optimizer, device) test_loss, test_acc = valid(model, test_dl, criteria, optimizer, device) log_value('Loss/train', tr_loss, epoch) log_value('Accuracy/train', tr_acc, epoch) log_value('Loss/valid', val_loss, epoch) log_value('Accuracy/valid', val_acc, epoch) log_value('Loss/test', test_loss, epoch) log_value('Accuracy/test', test_acc, epoch) if val_loss < min_loss: savemodel(model, dir='siamese') min_loss = val_loss if epoch % id == 0: print( "tr_loss {:.3f} acc {:.3f} valid_loss {:.3f} acc {:.3f} test_loss {:.3f} acc {:.3f}" .format(tr_loss, tr_acc, val_loss, val_acc, test_loss, test_acc)) else: model = loadmodel(dir='siamese_base') print("loaded model\n evaluating....") test_loss, test_acc = valid(model, test_dl, criteria, optimizer, device) print("test_loss {:.3f} acc {:.3f}".format(test_loss, test_acc))
# args.save <> save # save_path = '{}_pruned{}.pkl'.format(model_path[:-4], int(percent*100)) # torch.save({'cfg': cfg, 'model': newmodel.state_dict()}, save_path) # print('Pruned model: {}'.format(save_path)) # checkpoint # notes = model_path.split('/')[-1][:-4] + '_pruned{}.pkl'.format(percent) suffix = trial + '_pruned{:.0f}'.format(percent) savemodel( { 'epoch': 0, # last epoch 'state_dict': newmodel.state_dict(), 'best_prec1': 0., 'cfg': cfg }, True, 499, suffix, True) print('Finish pruning.\n') #%% # test # channels = torch.ones(512, 28, 28) # idxs = [44, 122] # # prune channels 44, 122 # for idx in range(512): # if idx in idxs: # channels[idx, :] = torch.zeros_like(channels[idx, :])
def build_system2(texts_all): # ===================== create the model ======================= # pos tags to retain in the graph poss=['JJ','NN','RB'] texts_all_new=[] # remove the unwanted pos tags from the texts for text in texts_all: text_new=[] t = nltk.pos_tag(nltk.word_tokenize(text)) for a,b in t: for pos in poss: if pos in b: text_new.append(a) text_new=' '.join(text_new) texts_all_new.append(text_new) # vectorize to create 1-grams and 2-grams tfidftransformer_1=TfidfVectorizer(ngram_range=(1,1)) tfidftransformer_2=TfidfVectorizer(ngram_range=(2,2)) texts_all_tf_2=tfidftransformer_2.fit_transform(texts_all_new) texts_all_tf_1=tfidftransformer_1.fit_transform(texts_all_new) tfidftransformer_1.fit(texts_all_new) # create id to word mapping vocab_map_2 = {v: k for k, v in tfidftransformer_2.vocabulary_.iteritems()} vocab_map_1 = {v: k for k, v in tfidftransformer_1.vocabulary_.iteritems()} master_phrases_vectors=[] master_nerank_vectors=[] for text,text_tf_1,text_tf_2 in zip(texts_all,texts_all_tf_1,texts_all_tf_2): # put a check for no text. see that the titles are aligned if len(text_tf_1.indices)==0 or len(text_tf_2.indices)==0: master_phrases_vectors.append({}) master_nerank_vectors.append({}) continue final_vec=ne_rank(text_tf_1,text_tf_2,tfidftransformer_1,vocab_map_1,vocab_map_2) phrases=use.noun_tokenize(text) phrases=list(set(phrases)) phrases_vectors=[list(tfidftransformer_1.transform([x])[0].indices) for x in phrases] phrases_dict = {} for x, phrase in zip(phrases_vectors, phrases): x=[final_vec[y] for y in x if y in final_vec.keys()] avg = np.sum(x) phrases_dict[phrase] = avg master_phrases_vectors.append(phrases_dict) master_nerank_vectors.append(final_vec) # ===================== save the model ================= use.savemodel(master_phrases_vectors, 'master_phrases_vectors_2') use.savemodel(tfidftransformer_1,'tfidftransformer_1_2') use.savemodel(tfidftransformer_2,'tfidftransformer_2_2') use.savemodel(master_nerank_vectors,'master_nerank_vectors_2')