def __init__(self, opts, vocab, char_vocab, label_vocab): super(Char_CNN, self).__init__() random.seed(opts.seed) torch.manual_seed(opts.seed) torch.cuda.manual_seed(opts.seed) # embedding parameters self.embed_dim = opts.embed_size self.char_embed_dim = opts.char_embed_size self.vocab_size = vocab.m_size self.char_num = char_vocab.m_size self.pre_embed_path = opts.pre_embed_path self.str2idx = vocab.str2idx self.char_str2idx = char_vocab.str2idx self.embed_uniform_init = opts.embed_uniform_init # network parameters self.stride = opts.stride self.kernel_size = opts.kernel_size self.kernel_num = opts.kernel_num self.label_num = label_vocab.m_size self.embed_dropout = opts.embed_dropout self.fc_dropout = opts.fc_dropout # gpu option self.use_cuda = opts.use_cuda # embeddings: word level and char level self.word_embeddings = nn.Embedding(self.vocab_size, self.embed_dim) self.char_embeddings = nn.Embedding(self.char_num, self.char_embed_dim) if opts.pre_embed_path != '': embedding = Embedding.load_predtrained_emb_zero( self.pre_embed_path, self.str2idx) self.word_embeddings.weight.data.copy_(embedding) else: nn.init.uniform_(self.word_embeddings.weight.data, -self.embed_uniform_init, self.embed_uniform_init) nn.init.uniform_(self.char_embeddings.weight.data, -self.embed_uniform_init, self.embed_uniform_init) word_char_embed_dim = self.embed_dim + len( self.kernel_size) * self.kernel_num self.word_char_convs = nn.ModuleList([ nn.Conv2d(1, self.kernel_num, (K, word_char_embed_dim), stride=self.stride, padding=(K // 2, 0)) for K in self.kernel_size ]) self.char_convs = nn.ModuleList([ nn.Conv2d(1, self.kernel_num, (K, self.char_embed_dim), stride=self.stride, padding=(K // 2, 0)) for K in self.kernel_size ]) infea = len(self.kernel_size) * self.kernel_num self.linear1 = nn.Linear(infea, infea // 2) self.linear2 = nn.Linear(infea // 2, self.label_num) self.embed_dropout = nn.Dropout(self.embed_dropout) self.fc_dropout = nn.Dropout(self.fc_dropout)
product_sentences_prior = order_products_prior.sort_values([ 'order_id', 'add_to_cart_order' ]).groupby('order_id').apply(lambda order: order['product_id'].tolist()) product_sentences = product_sentences_prior.append( product_sentences_train).values return product_sentences if __name__ == '__main__': print('Generating sentences...') product_sentences = gen_product_sentences() print('Generating product_vector features...') embedding = Embedding(product_sentences) embedding.word_to_vector(size=100, window=5, min_count=2) embedding.reduce_dimension(n_components=2) product_vector_feat = embedding.return_dataframe(name='product_id') product_vector_feat['product_id'] = product_vector_feat[ 'product_id'].astype(np.int) product_vector_feat['product_id_vector_1'] = product_vector_feat[ 'product_id_vector_1'].astype(np.float) product_vector_feat['product_id_vector_2'] = product_vector_feat[ 'product_id_vector_1'].astype(np.float) product_vector_feat.set_index('product_id', inplace=True) pickle_dump(product_vector_feat, '{}/product_vector_feat.pkl'.format(config.feat_folder)) print('Done - product_vector features')