def load_fb15k_data(dataset, sim_relations, keep_fraction=100):
    train_network = FB15kReader(dataset)
    dev_network = FB15kReader(dataset)
    test_network = FB15kReader(dataset)

    train_network.read_network(data_dir="data/FB15k-237/",
                               keep_fraction=keep_fraction,
                               split="train")
    train_network.print_summary()
    node_list = train_network.graph.iter_nodes()
    node_degrees = [node.get_degree() for node in node_list]
    degree_counter = Counter(node_degrees)
    avg_degree = sum([k * v for k, v in degree_counter.items()]) / sum(
        [v for k, v in degree_counter.items()])
    print("Average Degree: ", avg_degree)

    dev_network.read_network(data_dir="data/FB15k-237/",
                             split="valid",
                             train_network=train_network)
    test_network.read_network(data_dir="data/FB15k-237/",
                              split="test",
                              train_network=train_network)
    word_vocab = train_network.graph.node2id

    train_data, _ = reader_utils.prepare_batch_dgl(word_vocab, train_network,
                                                   train_network)
    test_data, test_labels = reader_utils.prepare_batch_dgl(
        word_vocab, test_network, train_network)
    valid_data, valid_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev_network, train_network)
    return len(
        train_network.graph.nodes), train_data, valid_data, test_data, len(
            train_network.graph.relations
        ), valid_labels, test_labels, train_network
def load_cn_data(dataset, sim_relations, eval_accuracy=False):
    train_network = ConceptNetTSVReader(dataset)
    dev1_network = ConceptNetTSVReader(dataset)
    dev2_network = ConceptNetTSVReader(dataset)
    test_network = ConceptNetTSVReader(dataset)

    positive_only = not eval_accuracy

    train_network.read_network(data_dir="data/", split="train")

    train_network.print_summary()
    #node_list = train_network.graph.iter_nodes()
    #node_degrees = [node.get_degree() for node in node_list]
    #degree_counter = Counter(node_degrees)
    #avg_degree = sum([k*v for k,v in degree_counter.items()]) / sum([v for k,v in degree_counter.items()])
    #print("Average Degree: ", avg_degree)

    dev1_network.read_network(data_dir="data/",
                              split="valid1",
                              train_network=train_network,
                              positive_only=positive_only)
    dev2_network.read_network(data_dir="data/",
                              split="valid2",
                              train_network=train_network,
                              positive_only=positive_only)
    test_network.read_network(data_dir="data/",
                              split="valid2",
                              train_network=train_network,
                              positive_only=positive_only)

    # Add sim nodes
    if sim_relations:
        print("Adding sim edges..")
        train_network.add_sim_edges_bert()

    #word_vocab, word_freqs = reader_utils.create_vocab(train_network)
    word_vocab = train_network.graph.node2id
    train_data, _ = reader_utils.prepare_batch_dgl(word_vocab, train_network,
                                                   train_network)
    test_data, test_labels = reader_utils.prepare_batch_dgl(
        word_vocab, test_network, train_network)
    valid1_data, valid1_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev1_network, train_network)
    valid2_data, valid2_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev2_network, train_network)

    return len(
        train_network.graph.nodes), train_data, valid1_data, test_data, len(
            train_network.graph.relations
        ), valid1_labels, test_labels, train_network
def load_cn_full_data(dataset, sim_relations):

    train_network = ConceptNetFullReader(dataset)
    dev_network = ConceptNetFullReader(dataset)
    test_network = ConceptNetFullReader(dataset)

    train_network.read_network(
        data_dir="/net/efs.mosaic/chaitanyam/ConvE/data/", split="train")
    train_network.print_summary()
    node_list = train_network.graph.iter_nodes()
    node_degrees = [node.get_degree() for node in node_list]
    degree_counter = Counter(node_degrees)
    avg_degree = sum([k * v for k, v in degree_counter.items()]) / sum(
        [v for k, v in degree_counter.items()])
    print("Average Degree: ", avg_degree)

    dev_network.read_network(data_dir="/net/efs.mosaic/chaitanyam/ConvE/data/",
                             split="valid",
                             train_network=train_network)
    test_network.read_network(
        data_dir="/net/efs.mosaic/chaitanyam/ConvE/data/",
        split="test",
        train_network=train_network)

    #node_names = []
    #for node in train_network.graph.iter_nodes():
    #    node_names.append(node.name)
    #with open("cn-full_node_names.txt", 'w') as f:
    #    f.writelines([n.split("/")[-2].replace("_", " ")+"\n" for n in node_names if n not in string.punctuation and not n.isdigit()])
    #import sys; sys.exit(0)

    if sim_relations:
        print("Adding sim edges..")
        train_network.add_sim_edges_bert()

    #word_vocab, word_freqs = reader_utils.create_vocab(train_network)
    word_vocab = train_network.graph.node2id
    train_data, _ = reader_utils.prepare_batch_dgl(word_vocab, train_network,
                                                   train_network)
    test_data, test_labels = reader_utils.prepare_batch_dgl(
        word_vocab, test_network, train_network)
    valid_data, valid_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev_network, train_network)

    return len(
        train_network.graph.nodes), train_data, valid_data, test_data, len(
            train_network.graph.relations
        ), valid_labels, test_labels, train_network
def load_atomic_data(dataset, sim_relations):
    train_network = AtomicTSVReader(dataset)
    dev_network = AtomicTSVReader(dataset)
    test_network = AtomicTSVReader(dataset)

    #train_network.read_network(data_dir="data/atomic-original/", split="train")
    train_network.read_network(data_dir="data/atomic/", split="train")
    train_network.print_summary()
    node_list = train_network.graph.iter_nodes()
    node_degrees = [node.get_degree() for node in node_list]
    degree_counter = Counter(node_degrees)
    avg_degree = sum([k * v for k, v in degree_counter.items()]) / sum(
        [v for k, v in degree_counter.items()])
    print("Average Degree: ", avg_degree)

    #dev_network.read_network(data_dir="data/atomic-original/", split="valid", train_network=train_network)
    #test_network.read_network(data_dir="data/atomic-original/", split="test", train_network=train_network)

    dev_network.read_network(data_dir="data/atomic/",
                             split="valid",
                             train_network=train_network)
    test_network.read_network(data_dir="data/atomic/",
                              split="test",
                              train_network=train_network)
    word_vocab = train_network.graph.node2id

    #node_names = []
    #for node in train_network.graph.iter_nodes():€?€?€?€?€?€?
    #    node_names.append(node.name + "\n")
    #with open("atomic_node_names.txt", 'w') as f:
    #    f.writelines([reader_utils.preprocess_atomic_sentence(n.replace("-", " €?€?€?€?€?€?")) for n in node_names])
    #import sys; sys.exit(0)

    # Add sim nodes
    if sim_relations:
        print("Adding sim edges..")
        train_network.add_sim_edges_bert()

    train_data, _ = reader_utils.prepare_batch_dgl(word_vocab, train_network,
                                                   train_network)
    test_data, test_labels = reader_utils.prepare_batch_dgl(
        word_vocab, test_network, train_network)
    valid_data, valid_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev_network, train_network)
    return len(
        train_network.graph.nodes), train_data, valid_data, test_data, len(
            train_network.graph.relations
        ), valid_labels, test_labels, train_network
def load_data(dataset, reader_cls, data_dir, sim_relations):
    train_network = reader_cls(dataset)
    dev_network = reader_cls(dataset)
    test_network = reader_cls(dataset)

    train_network.read_network(data_dir=data_dir, split="train")
    train_network.print_summary()
    node_list = train_network.graph.iter_nodes()
    node_degrees = [node.get_degree() for node in node_list]
    degree_counter = Counter(node_degrees)
    avg_degree = sum([k * v for k, v in degree_counter.items()]) / sum(
        [v for k, v in degree_counter.items()])
    print("Average Degree: ", avg_degree)

    dev_network.read_network(data_dir=data_dir,
                             split="valid",
                             train_network=train_network)
    test_network.read_network(data_dir=data_dir,
                              split="test",
                              train_network=train_network)

    word_vocab = train_network.graph.node2id

    # Add sim nodes
    if sim_relations:
        print("Adding sim edges..")
        train_network.add_sim_edges_bert()

    train_data, _ = reader_utils.prepare_batch_dgl(word_vocab, train_network,
                                                   train_network)
    test_data, test_labels = reader_utils.prepare_batch_dgl(
        word_vocab, test_network, train_network)
    valid_data, valid_labels = reader_utils.prepare_batch_dgl(
        word_vocab, dev_network, train_network)

    return train_data, valid_data, test_data, valid_labels, test_labels, train_network