예제 #1
0
def build_system1(texts_all,n=2):



    # ===================== create the model =======================
    tfidftransformer=TfidfVectorizer(ngram_range=(1,n))#,max_df=0.6)
    texts_all_tf=tfidftransformer.fit_transform(texts_all)
    vocab_map = {v: k for k, v in tfidftransformer.vocabulary_.iteritems()}

    master_phrases_vectors=[]
    for text_tf,text in zip(texts_all_tf,texts_all):
        text_tf= text_tf.todense()
        phrases=use.noun_tokenize(text)
        phrases=list(set(phrases))
        phrases_vectors=[list(tfidftransformer.transform([x])[0].indices) for x in phrases]
        phrases_dict={}
        for x,phrase in zip(phrases_vectors,phrases):
            x=np.array(text_tf).flatten()[x]
            avg=np.mean(x)
            phrases_dict[phrase]=avg
        # a list of dictionaries with {phase:score}
        master_phrases_vectors.append(phrases_dict)


    # ===================== save the model =================

    use.savemodel(master_phrases_vectors,'master_phrases_vectors_1')
    use.savemodel(texts_all_tf,'texts_all_tf_1')
    use.savemodel(tfidftransformer,'tfidftransformer_1')
예제 #2
0
        min_loss = float('Inf')
        for epoch in range(1, n_epochs + 1):
            model.train()
            tr_loss, tr_acc = fit(model, train_dl, criteria, optimizer, device)
            model.eval()
            val_loss, val_acc = valid(model, valid_dl, criteria, optimizer,
                                      device)
            test_loss, test_acc = valid(model, test_dl, criteria, optimizer,
                                        device)

            log_value('Loss/train', tr_loss, epoch)
            log_value('Accuracy/train', tr_acc, epoch)
            log_value('Loss/valid', val_loss, epoch)
            log_value('Accuracy/valid', val_acc, epoch)
            log_value('Loss/test', test_loss, epoch)
            log_value('Accuracy/test', test_acc, epoch)
            if val_loss < min_loss:
                savemodel(model, dir='siamese', device=device)
                min_loss = val_loss
            if epoch % id == 0:
                print(
                    "epoch {} tr_loss {:.3f} acc {:.3f} valid_loss {:.3f} acc {:.3f} test_loss {:.3f} acc {:.3f}"
                    .format(epoch, tr_loss, tr_acc, val_loss, val_acc,
                            test_loss, test_acc))
    else:
        model = loadmodel(dir='siamese_lstm')
        print("loaded model\n evaluating....")
        test_loss, test_acc = valid(model, test_dl, criteria, optimizer,
                                    device)
        print("test_loss {:.3f} acc {:.3f}".format(test_loss, test_acc))
예제 #3
0
        scheduler.step(valid_acc)

        # save model
        state = {
            'epoch': epoch,  # last epoch
            'state_dict': net.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
        }
        suffix = trial
        if pruned:
            state['cfg'] = pruned_pkl['cfg']
            suffix += '_' + args.pruned.split('_')[-1][:-4]

        savemodel(state, is_best, checkpoint_freq, suffix, False)

        # print result
        if (epoch + 1) % 1 == 0:

            print(
                'Epoch:{}/{}\nAccuracy(Train/Valid):{:.02f}/{:.02f}% Loss(Train/Valid):{:.3f}/{:.3f}'
                .format(epoch, start_epoch + EPOCHS - 1, train_acc, valid_acc,
                        train_loss, valid_loss))

        # early stopping
        if early_stop and train_acc > 99.99:
            print('Early stop beacause train accuracy > 99.9.')
            break

    end_training = time.time()
예제 #4
0
        # save model
        state = {
            'epoch': epoch,  # last epoch
            'state_dict': net.state_dict(),
            'best_prec1': best_prec1,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }
        state.update(para)
        suffix = para['trial']
        # save pruned structure
        if para['pruned']:
            state['cfg'] = pruned_pkl['cfg']
            suffix += '_' + args.pruned.split('_')[-1][:-4]

        save = savemodel(state, is_best, para['checkpoint_freq'], suffix,
                         False)
        if save:
            log.log(save)

        # print result
        if (epoch + 1) % 1 == 0:

            log.log(
                'Epoch:{}/{}\nAccuracy(Train/Valid):{:.02f}/{:.02f}% Loss(Train/Valid):{:.3f}/{:.3f}'
                .format(epoch, start_epoch + para['epochs'] - 1, train_acc,
                        valid_acc, train_loss, valid_loss))

        # early stopping
        if para['early_stop'] and valid_acc > 99.5:
            log.log('Early stop beacause valid accuracy > 99.5.')
            break
예제 #5
0
        print("Training....")
        for epoch in range(1, n_epochs + 1):
            model.train()
            tr_loss, tr_acc = fit(model, train_dl, criteria, optimizer, device)
            model.eval()
            val_loss, val_acc = valid(model, valid_dl, criteria, optimizer,
                                      device)
            test_loss, test_acc = valid(model, test_dl, criteria, optimizer,
                                        device)

            log_value('Loss/train', tr_loss, epoch)
            log_value('Accuracy/train', tr_acc, epoch)
            log_value('Loss/valid', val_loss, epoch)
            log_value('Accuracy/valid', val_acc, epoch)
            log_value('Loss/test', test_loss, epoch)
            log_value('Accuracy/test', test_acc, epoch)
            if val_loss < min_loss:
                savemodel(model, dir='siamese')
                min_loss = val_loss
            if epoch % id == 0:
                print(
                    "tr_loss {:.3f} acc {:.3f} valid_loss {:.3f} acc {:.3f} test_loss {:.3f} acc {:.3f}"
                    .format(tr_loss, tr_acc, val_loss, val_acc, test_loss,
                            test_acc))
    else:
        model = loadmodel(dir='siamese_base')
        print("loaded model\n evaluating....")
        test_loss, test_acc = valid(model, test_dl, criteria, optimizer,
                                    device)
        print("test_loss {:.3f} acc {:.3f}".format(test_loss, test_acc))
예제 #6
0
# args.save <> save

# save_path = '{}_pruned{}.pkl'.format(model_path[:-4], int(percent*100))
# torch.save({'cfg': cfg, 'model': newmodel.state_dict()}, save_path)
# print('Pruned model: {}'.format(save_path))

# checkpoint
# notes = model_path.split('/')[-1][:-4] + '_pruned{}.pkl'.format(percent)
suffix = trial + '_pruned{:.0f}'.format(percent)

savemodel(
    {
        'epoch': 0,  # last epoch
        'state_dict': newmodel.state_dict(),
        'best_prec1': 0.,
        'cfg': cfg
    },
    True,
    499,
    suffix,
    True)

print('Finish pruning.\n')
#%%
# test
# channels = torch.ones(512, 28, 28)
# idxs = [44, 122]
# # prune channels 44, 122
# for idx in range(512):
#     if idx in idxs:
#         channels[idx, :] = torch.zeros_like(channels[idx, :])
예제 #7
0
def build_system2(texts_all):

    # ===================== create the model =======================

    # pos tags to retain in the graph
    poss=['JJ','NN','RB']
    texts_all_new=[]
    # remove the unwanted pos tags from the texts
    for text in texts_all:
        text_new=[]
        t = nltk.pos_tag(nltk.word_tokenize(text))
        for a,b in t:
            for pos in poss:
                if pos in b:
                    text_new.append(a)
        text_new=' '.join(text_new)
        texts_all_new.append(text_new)

    # vectorize to create 1-grams and 2-grams
    tfidftransformer_1=TfidfVectorizer(ngram_range=(1,1))
    tfidftransformer_2=TfidfVectorizer(ngram_range=(2,2))


    texts_all_tf_2=tfidftransformer_2.fit_transform(texts_all_new)
    texts_all_tf_1=tfidftransformer_1.fit_transform(texts_all_new)

    tfidftransformer_1.fit(texts_all_new)

    # create id to word mapping
    vocab_map_2 = {v: k for k, v in tfidftransformer_2.vocabulary_.iteritems()}
    vocab_map_1 = {v: k for k, v in tfidftransformer_1.vocabulary_.iteritems()}



    master_phrases_vectors=[]
    master_nerank_vectors=[]

    for text,text_tf_1,text_tf_2 in zip(texts_all,texts_all_tf_1,texts_all_tf_2):

        # put a check for no text. see that the titles are aligned
        if len(text_tf_1.indices)==0 or len(text_tf_2.indices)==0:
            master_phrases_vectors.append({})
            master_nerank_vectors.append({})

            continue

        final_vec=ne_rank(text_tf_1,text_tf_2,tfidftransformer_1,vocab_map_1,vocab_map_2)
        phrases=use.noun_tokenize(text)
        phrases=list(set(phrases))
        phrases_vectors=[list(tfidftransformer_1.transform([x])[0].indices) for x in phrases]
        phrases_dict = {}
        for x, phrase in zip(phrases_vectors, phrases):
            x=[final_vec[y] for y in x if y in final_vec.keys()]
            avg = np.sum(x)
            phrases_dict[phrase] = avg
        master_phrases_vectors.append(phrases_dict)
        master_nerank_vectors.append(final_vec)



    # ===================== save the model =================

    use.savemodel(master_phrases_vectors, 'master_phrases_vectors_2')
    use.savemodel(tfidftransformer_1,'tfidftransformer_1_2')
    use.savemodel(tfidftransformer_2,'tfidftransformer_2_2')
    use.savemodel(master_nerank_vectors,'master_nerank_vectors_2')