Exemplos de InputData.get_pairs em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: input_data

Classe / Tipo: InputData

Método / Função: get_pairs

Exemplos em hotexamples.com: 4

InputData.get_pairs em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de input_data.InputData.get_pairs em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

InputData(30)

get_batch_pairs(15)

evaluate_pair_count(8)

get_neg_v_neg_sampling(6)

split_data(5)

evaluate_pairs_count(5)

function(5)

get_test_dataset_size(4)

next_pair_batch(4)

reset_scan(4)

get_pairs(4)

next_batch_scan(4)

get_negative_sampling(4)

df_dx1(3)

df_dx2(3)

get_cbow_pairs_by_neg_sampling(2)

get_pairs_by_neg_sampling(2)

get_pairs_by_huffman(2)

get_cbow_batch_all_pairs(2)

get_cbow_pairs_by_huffman(2)

next_batch(2)

readFile(2)

next_batch_evaluation_data(2)

preprocess_image(1)

random_pick(1)

prepeare_one_hot_input(1)

pop(1)

next_tt_scan(1)

readDir(1)

read_file(1)

sents_to_id_lists(1)

splitData(1)

test_data(1)

get_pair_count(1)

initPrefixBucketing(1)

generatePartitionedBatch(1)

chars_to_unknown(1)

dict(1)

encodeEvent(1)

encodeTrace(1)

estimate_pair_count(1)

generateNoPartitionBatch(1)

get_bacth_data(1)

get_tt_dataset_size(1)

get_cbow_batch_pairs(1)

get_neg_pairs(1)

get_node_pairs(1)

get_ns_batch(1)

build_train_input_fn(1)

get_ps_batch(1)

Métodos Frequentes

InputData (30)

get_batch_pairs (15)

evaluate_pair_count (8)

get_neg_v_neg_sampling (6)

split_data (5)

evaluate_pairs_count (5)

function (5)

get_test_dataset_size (4)

next_pair_batch (4)

reset_scan (4)

Métodos Frequentes

get_pairs (4)

next_batch_scan (4)

get_negative_sampling (4)

df_dx1 (3)

df_dx2 (3)

get_cbow_pairs_by_neg_sampling (2)

get_pairs_by_neg_sampling (2)

get_pairs_by_huffman (2)

get_cbow_batch_all_pairs (2)

get_cbow_pairs_by_huffman (2)

next_batch (2)

readFile (2)

next_batch_evaluation_data (2)

preprocess_image (1)

random_pick (1)

prepeare_one_hot_input (1)

pop (1)

next_tt_scan (1)

readDir (1)

read_file (1)

Métodos Frequentes

next_batch (2)

readFile (2)

next_batch_evaluation_data (2)

preprocess_image (1)

random_pick (1)

prepeare_one_hot_input (1)

pop (1)

next_tt_scan (1)

readDir (1)

read_file (1)

sents_to_id_lists (1)

splitData (1)

test_data (1)

get_pair_count (1)

initPrefixBucketing (1)

generatePartitionedBatch (1)

chars_to_unknown (1)

dict (1)

encodeEvent (1)

encodeTrace (1)

estimate_pair_count (1)

generateNoPartitionBatch (1)

get_bacth_data (1)

get_tt_dataset_size (1)

get_cbow_batch_pairs (1)

get_neg_pairs (1)

get_node_pairs (1)

get_ns_batch (1)

build_train_input_fn (1)

get_ps_batch (1)

Métodos Frequentes

sents_to_id_lists (1)

splitData (1)

test_data (1)

get_pair_count (1)

initPrefixBucketing (1)

generatePartitionedBatch (1)

chars_to_unknown (1)

dict (1)

encodeEvent (1)

encodeTrace (1)

estimate_pair_count (1)

generateNoPartitionBatch (1)

get_bacth_data (1)

get_tt_dataset_size (1)

get_cbow_batch_pairs (1)

get_neg_pairs (1)

get_node_pairs (1)

get_ns_batch (1)

build_train_input_fn (1)

get_ps_batch (1)

write (1)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: model_train.py Projeto: vllgle/nlp_baseline

class Word2Vec: def __init__(self, input_file_name, output_file_name): self.output_file_name = output_file_name self.data = InputData(input_file_name, MIN_COUNT) self.model = SkipGramModel(self.data.word_count, EMB_DIMENSION) self.lr = LR self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) def train(self): print("SkipGram Training......") pairs_count = self.data.evaluate_pairs_count(WINDOW_SIZE) print("pairs_count", pairs_count) batch_count = pairs_count / BATCH_SIZE print("batch_count", batch_count) process_bar = tqdm(range(int(batch_count))) for i in process_bar: pos_pairs = self.data.get_batch_pairs(BATCH_SIZE, WINDOW_SIZE) pos_pairs, neg_pairs = self.data.get_pairs(pos_pairs) pos_u = [pair[0] for pair in pos_pairs] pos_v = [int(pair[1]) for pair in pos_pairs] neg_u = [pair[0] for pair in neg_pairs] neg_v = [int(pair[1]) for pair in neg_pairs] self.optimizer.zero_grad() loss = self.model.forward(pos_u, pos_v, neg_u, neg_v) loss.backward() self.optimizer.step() if i * BATCH_SIZE % 100000 == 0: self.lr = self.lr * (1.0 - 1.0 * i / batch_count) for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr self.model.save_embedding(self.data.id2word_dict, self.output_file_name)

Exemplo n.º 2

0

Exibir arquivo

class Word2Vec: def __init__(self, input_file_name, output_file_name): self.output_file_name = output_file_name self.data = InputData(input_file_name, MIN_COUNT) self.model = CBOWModel(self.data.word_count, EMB_DIMENSION) self.lr = LR self.optimizer = optim.SparseAdam(self.model.parameters(), lr=self.lr) def train(self): start = time.clock() max_accuracy = 0 for epoch in range(5000): all_pairs = self.data.get_batch_pairs(BATCH_SIZE, WINDOW_SIZE) pos_pairs, neg_pairs = self.data.get_pairs(all_pairs) # pos是huffman编码为1的部分 pos_u = [pair[0] for pair in pos_pairs] pos_v = [int(pair[1]) for pair in pos_pairs] # 与1对应的非叶子节点 #neg是huffman编码为0的部分 neg_u = [pair[0] for pair in neg_pairs] neg_v = [int(pair[1]) for pair in neg_pairs] # 与0对应的非叶子节点 self.optimizer.zero_grad() loss = self.model.forward(pos_u, pos_v, neg_u, neg_v) loss.backward() self.optimizer.step() #梯度更新 #mid_end=time.clock() #print('one time:%s seconds'%(mid_end-start)) if epoch % 100 == 0: print("Epoch : %d, loss : %.02f" % (epoch, loss)) ac = self.model.predict(all_pairs, self.data.huffman_tree) if ac > max_accuracy: max_accuracy = ac end = time.clock() print('time:%s seconds' % (end - start)) print('accuracy:%.06f' % (max_accuracy)) #self.model.save_embedding(self.data.id2word_dict, self.output_file_name) tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=500) #词向量图 embed_two = tsne.fit_transform( self.model.u_embeddings.weight.cpu().detach().numpy()) labels = [self.data.id2word_dict[i] for i in range(200)] plt.figure(figsize=(15, 12)) for i, label in enumerate(labels): x, y = embed_two[i, :] plt.scatter(x, y) plt.annotate(label, (x, y), ha='center', va='top') plt.savefig('HS.png')

Exemplo n.º 3

0

Exibir arquivo

class Word2Vec: def __init__(self, input_file_name, output_file_name): self.output_file_name = output_file_name self.data = InputData(input_file_name, MIN_COUNT) self.model = SkipGramModel(self.data.word_count, EMB_DIMENSION).cuda() self.lr = LR self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) def train(self): for _ in range(1, EPOCH + 1): print("SkipGram Training......") pairs_count = self.data.evaluate_pairs_count(WINDOW_SIZE) print("pairs_count", pairs_count) batch_count = pairs_count / BATCH_SIZE print("batch_count", batch_count) process_bar = tqdm(range(int(batch_count))) for i in process_bar: pos_pairs = self.data.get_batch_pairs(BATCH_SIZE, WINDOW_SIZE) pos_pairs, neg_pairs = self.data.get_pairs(pos_pairs) pos_u = [pair[0] for pair in pos_pairs] pos_v = [int(pair[1]) for pair in pos_pairs] neg_u = [pair[0] for pair in neg_pairs] neg_v = [int(pair[1]) for pair in neg_pairs] self.optimizer.zero_grad() loss = self.model.forward(pos_u, pos_v, neg_u, neg_v) loss.backward() self.optimizer.step() if i * BATCH_SIZE % 100000 == 0: self.lr = self.lr * (1.0 - 1.0 * i / batch_count) for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr process_bar.set_postfix(loss=loss.data.cpu().numpy()) process_bar.update() print('\n') torch.save(self.model.state_dict(), "../results/url_with_location_skipgram_hs_wyz.pkl") self.model.save_embedding(self.data.id2word_dict, self.output_file_name)

Exemplo n.º 4

0

Exibir arquivo

class Net2vec: def __init__(self, input_user_file_name, input_links_file_name, output_file_name, emb_dimension=100, num_batch=30000, batch_size=100, initial_lr=0.025): """Initilize class parameters. Args: input_user_file_name: 用户数据文件 input_links_file_name: 关系数据文件 output_file_name:保存文件 emb_dimention: 向量维度 num_batch:处理次数 batch_size:批处理大小 initial_lr: 初始学习率 Returns: None. """ ##处理数据 self.data = InputData(input_user_file_name, input_links_file_name) self.output_file_name = output_file_name ##emb_size为embed的大小，等于顶点个数 self.emb_size = self.data.vertex_count self.emb_dimension = emb_dimension ##batch_size是每次更新时的数据规模 self.batch_size = batch_size self.initial_lr = initial_lr self.num_batch = num_batch ##调用模型，+1的原因是顶点是从1开始的，所以我们把0位置的向量保存下来，但其实没啥意思 self.NetModel = NetModel(self.emb_size + 1, self.emb_dimension) ##是否使用cuda加速 self.use_cuda = torch.cuda.is_available() if self.use_cuda: self.NetModel.cuda() ##使用随机梯度下降的方法来更新参数 self.optimizer = optim.SGD(self.NetModel.parameters(), lr=self.initial_lr) def train(self): """Multiple training. Returns: None. """ ##设置进度条 process_bar = tqdm(range(self.num_batch)) lr = self.initial_lr for i in process_bar: ##返回正样本集的ui，uj和负样本集的ui和uj，5为一个正样本对应的负样本的个数 u_i, u_j, neg_u, neg_v = self.data.get_pairs(self.batch_size, 5) pos_u = Variable(torch.LongTensor(u_i)) pos_v = Variable(torch.LongTensor(u_j)) neg_u = Variable(torch.LongTensor(neg_u)) neg_v = Variable(torch.LongTensor(neg_v)) if self.use_cuda: pos_u = pos_u.cuda() pos_v = pos_v.cuda() neg_u = neg_u.cuda() neg_v = neg_v.cuda() ##将正样本集和负样本集传入模型计算，2表示选择second-order proximities loss = self.NetModel.forward(pos_u, pos_v, neg_u, neg_v, 2) ##清空梯度 self.optimizer.zero_grad() loss.backward() self.optimizer.step() process_bar.set_description( "Loss: %0.8f, lr: %0.6f" % (loss.data[0], self.optimizer.param_groups[0]['lr'])) ##调整学习率 if i % 1500000 == 0: lr = 0.5 * lr for param_group in self.optimizer.param_groups: param_group['lr'] = lr ##将学习的参数保存下来 self.NetModel.save_embedding(self.output_file_name, self.use_cuda)