Exemplo n.º 1
0
    def __init__(self,
                 data,
                 claims_dict,
                 batch_size=1,
                 split=None,
                 testFile="train.jsonl",
                 sparse_evidences=None):
        """
        Initializes the class.
        """

        if split:
            self.indicies = split
        else:
            self.indicies = list(range(len(data)))

        self.data = data[::-1]

        if sparse_evidences:
            self.evidence_to_sparse = sparse_evidences
        else:
            self.evidence_to_sparse = None

        use_cuda = True
        self.device = torch.device("cuda:0" if use_cuda else "cpu")
        self.encoder = utils.ClaimEncoder()
        self.claims_dict = claims_dict
        self.batch_size = batch_size
        _, _, _, _, self.claim_to_article = utils.extract_fever_jsonl_data(
            testFile)
Exemplo n.º 2
0
 def __init__(self,
              data,
              claims_dict,
              data_sampling=10,
              batch_size=32,
              split=None,
              randomize=True,
              testFile="train.jsonl",
              sparse_evidences=None):
     """
         Sets the initial arguments and creates
         an indicies array to randomize the dataset
         between epochs
     """
     if split:
         self.indicies = split
     else:
         self.indicies = list(range(len(data)))
     self.data = data[::-1]
     self.randomize = randomize
     if sparse_evidences:
         self.evidence_to_sparse = sparse_evidences
     else:
         self.evidence_to_sparse = None
     use_cuda = True
     self.device = torch.device("cuda:0" if use_cuda else "cpu")
     self.data_sampling = data_sampling
     self.encoder = utils.ClaimEncoder()
     self.claims_dict = claims_dict
     self.batch_size = batch_size
     self.collate_fn = PadCollate()
     _, _, _, _, self.claim_to_article = utils.extract_fever_jsonl_data(
         testFile)
Exemplo n.º 3
0
 def __init__(self, data, batch_size=32, split=None):
     """
         Sets the initial arguments and creates
         an indicies array to randomize the dataset
         between epochs
     """
     if split:            
         self.indicies = split
     else:
         self.indicies = list(range(len(data)))
     self.data = data
     encoder = utils.ClaimEncoder()
     self.batch_size = batch_size
     _, _, _, _, self.claim_to_article = utils.extract_fever_jsonl_data("../train.jsonl")
Exemplo n.º 4
0
 def __init__(self, data, claims_dict, data_batch_size=10, batch_size=32, split=None):
     """
         Sets the initial arguments and creates
         an indicies array to randomize the dataset
         between epochs
     """
     if split:            
         self.indicies = split
     else:
         self.indicies = list(range(len(data)))
     self.data = data
     use_cuda = True
     self.device = torch.device("cuda:0" if use_cuda else "cpu")
     self.data_batch_size = data_batch_size
     self.encoder = utils.ClaimEncoder()
     self.claims_dict = claims_dict
     self.batch_size = batch_size
     _, _, _, _, self.claim_to_article = utils.extract_fever_jsonl_data("../train.jsonl")
Exemplo n.º 5
0
import pickle

import joblib
import keras
import numpy as np
from scipy import sparse
from tqdm import tqdm_notebook
from deep_semantic_similarity_model import create_model

import utils

train = joblib.load("train.pkl")

encoder = utils.ClaimEncoder()

claims, labels, article_list, claim_set, claim_to_article = utils.extract_fever_jsonl_data("../train.jsonl")

def stack_uneven(arrays, fill_value=0.):
        '''
        Fits arrays into a single numpy array, even if they are
        different sizes. `fill_value` is the default value.

        Args:
                arrays: list of np arrays of various sizes
                    (must be same rank, but not necessarily same size)
                fill_value (float, optional):

        Returns:
                np.ndarray
        '''
        sizes = [a.shape for a in arrays]
# **Input**: This document requires the Lucene index, and JSON files to run.

# ## Setting up Lucene Query

# In[78]:
import utils
import pickle
from tqdm import tqdm_notebook
from joblib import Parallel, delayed
from multiprocessing import cpu_count
import numpy as np
import subprocess
import string
import argparse

claims, labels, article_list, claim_set, claim_to_article = utils.extract_fever_jsonl_data(
    "dev_wo_nei_short.jsonl")

k = [1, 2, 5, 10, 20, 50, 100, 200, 300, 400]

parser = argparse.ArgumentParser(
    description='Learning the optimal convolution for network.')
parser.add_argument("--small",
                    action="store_true",
                    help="Verify that the script works on a small dataset.",
                    default=False)
parser.add_argument("--jar",
                    help="Location of the JAR to execute.",
                    default="untitled1.jar",
                    type=str)
args = parser.parse_args()
print(args)