def load_data(args, word_vocab, char_vocab, entities_context_data): train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, unique_entities_train, unique_entities_test = build_data( args.data, is_unweigted=False, directed=True) if args.pretrained_emb: entity_embeddings, relation_embeddings = init_embeddings( os.path.join(args.data, 'entity2vec.txt'), os.path.join(args.data, 'relation2vec.txt')) print("Initialised relations and entities from TransE") else: entity_embeddings = np.random.randn(len(entity2id), args.embedding_size) relation_embeddings = np.random.randn(len(relation2id), args.embedding_size) print("Initialised relations and entities randomly") corpus = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train, unique_entities_test, entities_context_data, word_vocab, char_vocab, args.get_2hop, args.get_1hop) return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor( relation_embeddings)
def load_data(args): train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, unique_entities_train = build_data( args.data, is_unweigted=False, directed=True) if args.pretrained_emb: entity_embeddings, relation_embeddings = init_embeddings( os.path.join(args.data, 'entity2vec.txt'), os.path.join(args.data, 'relation2vec.txt')) print("Initialised relations and entities from TransE") else: entity_embeddings = np.random.randn(len(entity2id), args.embedding_size) relation_embeddings = np.random.randn(len(relation2id), args.embedding_size) print("Initialised relations and entities randomly") corpus = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train, args.get_2hop) if (args.get_2hop): file = args.data + "/2hop.pickle" with open(file, 'wb') as handle: pickle.dump(corpus.node_neighbors_2hop, handle, protocol=pickle.HIGHEST_PROTOCOL) if (args.use_2hop): print("Opening node_neighbors pickle object") file = args.data + "/2hop.pickle" with open(file, 'rb') as handle: node_neighbors_2hop = pickle.load(handle) # return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor(relation_embeddings) return corpus, torch.cuda.FloatTensor( entity_embeddings), torch.cuda.FloatTensor( relation_embeddings), node_neighbors_2hop
def load_data(args): train_data, validation_data, test_data, entity2id, relation2id, args.id2entity, args.id2relation, headTailSelector, unique_entities_train, unique_relations_train = build_data( args.data, is_unweigted=False, directed=False) print('Training size', len(train_data), 'Val size', len(validation_data), 'Test size', len(test_data)) if args.pretrained_emb: # no relation embedding for us now entity_embeddings, relation_embeddings = init_embeddings(os.path.join(args.data, 'entity2vec.txt'), None) if entity_embeddings.shape[0] == 0: entity_embeddings = np.random.randn( len(entity2id), args.embedding_size) if relation_embeddings.shape[0] == 0: relation_embeddings = np.random.randn( len(relation2id), args.embedding_size) print("Initialised relations and entities from SSP") else: entity_embeddings = np.random.randn( len(entity2id), args.embedding_size) relation_embeddings = np.random.randn( len(relation2id), args.embedding_size) print("Initialised relations and entities randomly") corpus = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id, headTailSelector, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train, unique_relations_train, args.get_2hop) return corpus, torch.FloatTensor(entity_embeddings), torch.FloatTensor(relation_embeddings)
def load_data(args): train_data_e2t, validation_data_e2t, test_data_e2t, \ train_data_ere, validation_data_ere, test_data_ere, \ train_data_trt, validation_data_trt, test_data_trt, \ entity2id, relation2id, type2id, headTailSelector, unique_entities_train_e2t, unique_types_train_e2t,\ unique_entities_train_ere, unique_types_train_trt= build_data( args.data, is_unweigted=False, directed=True) if args.pretrained_emb: entity_embeddings, relation_embeddings = init_embeddings( os.path.join(args.data, 'entity2vec.txt'), os.path.join(args.data, 'relation2vec.txt')) print("Initialised relations and entities from TransE") else: entity_embeddings = np.random.randn(len(entity2id), args.entity_embedding_size) relation_embeddings = np.random.randn(len(relation2id), args.entity_embedding_size) rdf_relation_embeddings = np.random.randn(1, args.type_embedding_size) type_embeddings = np.random.randn(len(type2id), args.type_embedding_size) # entity2typeMat = np.random.randn( # args.type_embedding_size, args.entity_embedding_size) print("Initialised relations and entities randomly") corpus_e2t = Corpus_e2t(args, train_data_e2t, validation_data_e2t, test_data_e2t, entity2id, type2id, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train_e2t, unique_types_train_e2t, args.get_2hop) corpus_ere = Corpus(args, train_data_ere, validation_data_ere, test_data_ere, entity2id, relation2id, headTailSelector, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_entities_train_ere, args.get_2hop) corpus_trt = Corpus(args, train_data_trt, validation_data_trt, test_data_trt, type2id, relation2id, headTailSelector, args.batch_size_gat, args.valid_invalid_ratio_gat, unique_types_train_trt, args.get_2hop) return corpus_e2t, corpus_ere, corpus_trt, torch.FloatTensor( entity_embeddings), torch.FloatTensor( relation_embeddings), torch.FloatTensor( rdf_relation_embeddings), torch.FloatTensor(type_embeddings)