def test_builtin_conf(app, status, warning): warnings = warning.getvalue() assert_in('master_doc', warnings, 'override on builtin "master_doc" should raise a type warning') assert_not_in('language', warnings, 'explicitly permitted ' 'override on builtin "language" should NOT raise a type warning') assert_not_in('primary_domain', warnings, 'override to None on builtin ' '"primary_domain" should NOT raise a type warning')
def __init__(self, dataset, datapath, indexer, preprocessor): dataset_media, dataset_regime = dataset.split('.') util.assert_in(dataset_media, ['yelp', 'twitter']) self.dataset_media = dataset_media self.dataset_regime = dataset_regime self.datapath = datapath self.pp = preprocessor if (self.pp is None): self.pp = Preprocessor() self.indexer = indexer
def __init__(self, dimensions=200, finetune=False, vocab_size=1000, pooling='max', activation='relu', kernel_sizes=(1, 2, 3), filters=5, dropout_rate=0.0, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, weight_decay=0.0, embeddings_matrix=None): """ :param dimensions: int: dimension of each vector :param finetune: bool : weather or not to finetune word emdeddings :param vocab_size: int: size of the vocabulary, emdeddings layer will be this big :param pooling: ['average', 'logsumexp']: pooling operation for word vectors in a document :param activation: str: activation for convolutional stack :param kernel_sizes: tuple: convolve using unigrams / bigrams / trigrams :param filters: int : number of filters for convolutional layer :param dropout_rate: float: probability of dropout common across all the dropout layers :param lr: learning rate for adam optimiser :param beta_1: parameter for adam optimiser :param beta_2: parameter for adam optimiser :param epsilon: parameter for adam optimiser :param weight_decay: parameter for adam optimiser (l2 regularization weight, kernel_l2_regularization) :param embeddings_matrix: None or numpy.ndarray : embeddings_matrix to be used for the model """ # Initialize torch model super(Net, self).__init__() # Validate arguments assert (type(dimensions) == int), type(dimensions) assert (type(finetune) == bool), type(finetune) assert (type(vocab_size) == int), type(vocab_size) util.assert_in(pooling, ['max', 'average', 'logsumexp']) assert (all(map(lambda x: isinstance(x, int), kernel_sizes))), '{} should all be ints'.format( str(kernel_sizes)) assert (isinstance(filters, int)), type(filters) assert isinstance(dropout_rate, float) assert isinstance(lr, float) assert isinstance(beta_1, float) assert isinstance(beta_2, float) assert isinstance(epsilon, float) assert isinstance(weight_decay, float) if isinstance(embeddings_matrix, np.ndarray): assert ( vocab_size, dimensions ) == embeddings_matrix.shape, "mismatched dimensions of embeddings_matrix" elif embeddings_matrix is None: pass else: raise TypeError("Unsupported embeddings_matrix type: " + type(embeddings_matrix)) # save hyperparameters self.hyperparameters = { k: v for k, v in locals().iteritems() if not k in ('embeddings_matrix', 'self') } logger.debug(self.to_json(indent=None)) # our layers # Pass the input through embeddings # https://discuss.pytorch.org/t/can-we-use-pre-trained-word-embeddings-for-weight-initialization-in-nn-embedding/1222/12 self.embeddings = nn.Embedding(vocab_size, dimensions) self.embeddings.training = finetune if not embeddings_matrix is None: self.embeddings.weight.data.copy_( torch.FloatTensor(embeddings_matrix)) # add droupout layer # self.dropout = nn.Dropout(p=dropout_rate) # get the convolutional stack self.pad1_layers = [] self.conv1_layers = [] self.drop1_layers = [] conv_stack = get_conv_stack(dimensions, filters, kernel_sizes, dropout_rate) for i, (pad, conv, drop) in enumerate(conv_stack): setattr(self, 'pad1_' + str(i), pad) self.pad1_layers.append('pad1_' + str(i)) setattr(self, 'conv1_' + str(i), conv) self.conv1_layers.append('conv1_' + str(i)) setattr(self, 'drop1_' + str(i), drop) self.drop1_layers.append('drop1_' + str(i)) self.conv1_stack_pooling = pooling self.conv1_stack_activation = activation self.fc = nn.Linear(len(kernel_sizes) * filters, 1)