Exemplo n.º 1
0
vectorizer = dataset.vectorizer

# Create embeddings
embeddings = None
if args.use_med_embeddings:
    words = vectorizer.paper_vocab.token_to_idx.keys()
    embeddings = make_embeddings_matrix(words=words)
    print ("<Embeddings(words={0}, dim={1})>".format(np.shape(embeddings)[0], np.shape(embeddings)[1])) 

del(med_w2v)

# Initialize model 
model = PapersModel(embedding_dim=args.embedding_dim, 
                    num_embeddings=len(vectorizer.paper_vocab), 
                    num_input_channels=args.embedding_dim, filter_sizes=args.filter_sizes,
                    num_channels=args.num_filters, hidden_dim=args.hidden_dim, 
                    num_classes=len(vectorizer.label_vocab), 
                    dropout_p=args.dropout_p, pretrained_embeddings=embeddings, # pretrained_embeddings=None, 
                    padding_idx=vectorizer.paper_vocab.mask_index)
print(model.named_modules)


#%% Train
trainer = Trainer(dataset=dataset, model=model, 
                  model_state_file=args.model_state_file, 
                  save_dir=args.save_dir, 
                  device=args.device,
                  shuffle=args.shuffle, num_epochs=args.num_epochs, 
                  batch_size=args.batch_size, learning_rate=args.learning_rate, 
                  early_stopping_criteria=args.early_stopping_criteria)
Exemplo n.º 2
0
print("Using CUDA: {}".format(args.cuda))   



   

#%% Using embeddings

#%% Initialization
dataset = PapersDataset.load_dataset_and_make_vectorizer(df=split_df, cutoff=args.cutoff)
dataset.save_vectorizer(args.vectorizer_file)
vectorizer = dataset.vectorizer
model = PapersModel(embedding_dim=args.embedding_dim, 
                    num_embeddings=len(vectorizer.title_vocab), 
                    num_input_channels=args.embedding_dim, 
                    num_channels=args.num_filters, hidden_dim=args.hidden_dim, 
                    num_classes=len(vectorizer.category_vocab), 
                    dropout_p=args.dropout_p, pretrained_embeddings=None, 
                    padding_idx=vectorizer.title_vocab.mask_index)
print (model.named_modules)


#%% Train
trainer = Trainer(dataset=dataset, model=model, 
                  model_state_file=args.model_state_file, 
                  save_dir=args.save_dir, device=args.device,
                  shuffle=args.shuffle, num_epochs=args.num_epochs, 
                  batch_size=args.batch_size, learning_rate=args.learning_rate, 
                  early_stopping_criteria=args.early_stopping_criteria)
trainer.run_train_loop()
Exemplo n.º 3
0
embeddings = None
if args.use_med_embeddings:
    words = vectorizer.paper_vocab.token_to_idx.keys()
    embeddings = make_embeddings_matrix(words=words)
    print("<Embeddings(words={0}, dim={1})>".format(
        np.shape(embeddings)[0],
        np.shape(embeddings)[1]))

del (med_w2v)

# Initialize model
model = PapersModel(embedding_dim=args.embedding_dim,
                    num_embeddings=len(vectorizer.paper_vocab),
                    rnn_hidden_dim=args.rnn_hidden_dim,
                    hidden_dim=args.hidden_dim,
                    output_dim=len(vectorizer.label_vocab),
                    num_layers=args.num_layers,
                    bidirectional=args.bidirectional,
                    dropout_p=args.dropout_p,
                    pretrained_embeddings=embeddings,
                    padding_idx=vectorizer.paper_vocab.mask_index)
print(model.named_modules)

###### Train ######
trainer = Trainer(dataset=dataset,
                  model=model,
                  model_state_file=args.model_state_file,
                  save_dir=args.save_dir,
                  device=args.device,
                  shuffle=args.shuffle,
                  num_epochs=args.num_epochs,
                  batch_size=args.batch_size,