示例#1
0
import torch
from torch import nn
from nlplay.data.cache import WordVectorsManager, WV, DS, DSManager
from nlplay.models.pytorch.classifiers.qrnn import QRNN
from nlplay.models.pytorch.pretrained import get_pretrained_vecs
from nlplay.features.text_cleaner import *
from nlplay.models.pytorch.trainer import PytorchModelTrainer
from nlplay.models.pytorch.dataset import DSGenerator
from nlplay.utils import utils

logging.basicConfig(format='%(asctime)s %(message)s',
                    level=logging.DEBUG,
                    datefmt="%Y-%m-%d %H:%M:%S")

# Input data files
ds = DSManager(DS.IMDB.value)
train_csv, test_csv, val_csv = ds.get_partition_paths()
lm = WordVectorsManager(WV.GLOVE_EN6B_100.value)
pretrained_vec = lm.get_wv_path()

# Model Parameters
num_epochs = 10
batch_size = 64
ngram_range = (1, 1)
max_features = 20000
max_seq = 80
embedding_size = 100
dropout = 0.3
lr = 0.001
num_workers = 1
示例#2
0
import logging
import torch
from torch import nn
from nlplay.data.cache import DSManager, DS
from nlplay.features.text_cleaner import base_cleaner
from nlplay.models.pytorch.classifiers.linear import SMLinearModel
from nlplay.models.pytorch.dataset import CSRDatasetGenerator
from nlplay.models.pytorch.trainer import PytorchModelTrainer

logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S")

# Input data files
ds = DSManager(DS.IMDB.value)
train_csv, test_csv, val_csv = ds.get_partition_paths()

# Model Parameters
batch_size = 512
learning_rate = 0.0075
weight_decay = 0.000005
ngram_range = (1, 2)
min_df = 5
max_df = 0.87
max_features = 50000
sublinear_tf = True
stop_words = None
num_epochs = 8
num_workers = 1

# Data preparation
ds = CSRDatasetGenerator()
train_ds, val_ds = ds.from_csv(train_file=train_csv, val_file=test_csv, ngram_range=ngram_range,
示例#3
0
import logging
import torch
from torch import nn
from nlplay.data.cache import DSManager, DS
from nlplay.features.text_cleaner import *
from nlplay.models.pytorch.classifiers.exam import EXAM
from nlplay.models.pytorch.dataset import DSGenerator
from nlplay.models.pytorch.trainer import PytorchModelTrainer

logging.basicConfig(
    format="%(asctime)s %(message)s", level=logging.DEBUG, datefmt="%Y-%m-%d %H:%M:%S"
)

# Input data files
ds = DSManager(DS.AG_NEWS.value)
train_csv, test_csv, val_csv = ds.get_partition_paths()

# Inputs & Model Parameters
num_epochs = 3
batch_size = 16
ngram_range = (1, 1)
region_size = 7
max_features = 100000
max_seq = 256
embedding_size = 128
dropout = 0.2
lr = 0.0001
num_workers = 1

# Data preparation
ds = DSGenerator()
import pandas as pd
from torch import nn
from torch.utils.data import TensorDataset
from nlplay.data.cache import DSManager, DS
from nlplay.features.text_cleaner import base_cleaner
from nlplay.models.pytorch.classifiers.charcnn import CharCNN_Zhang
from nlplay.models.pytorch.trainer import PytorchModelTrainer
from nlplay.models.pytorch.utils import char_vectorizer
from nlplay.utils.parlib import parallelApply

logging.basicConfig(format="%(asctime)s %(message)s",
                    level=logging.DEBUG,
                    datefmt="%Y-%m-%d %H:%M:%S")

# Input data files
ds = DSManager(DS.IMDB.value)
train_csv, test_csv, val_csv = ds.get_partition_paths()

# Vocabulary Setup
vocab = (list(string.ascii_lowercase) + list(string.digits) +
         list(string.punctuation) + ["\n"])
char2idx = {}
idx2char = {}
vocab = list(set(vocab))
for idx, t in enumerate(vocab):
    char2idx[t] = idx

# Experiment parameters
max_seq = 1014
vocabulary_size = len(vocab)
num_epochs = 100