Пример #1
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [
            'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
            'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI',
            'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15',
            'STS16'
        ]
Пример #2
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [
            'BioC', 'CitationSA', 'ClinicalSA', 'BioASQ', 'PICO', 'PubMed20K',
            'RQE', 'ClinicalSTS', 'BIOSSES', 'MedNLI'
        ]
Пример #3
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [
            'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
            'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI',
            'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15',
            'STS16', 'Length', 'WordContent', 'Depth', 'TopConstituents',
            'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber', 'OddManOut',
            'CoordinationInversion', 'AmenitySimilarEvents'
        ]
Пример #4
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = ['STS12', 'STS13', 'STS14', 'STS15', 'STS16']
Пример #5
0
def apply_logician(s1, s2 , is_list=False, sick_model = False):

	# is_list : If you are directly sending sentences then keep is_list = False
	#			If you are sending list of list of words then keep is_list = True

	# sick_model: if True, will use sick model for prediction
	#			: if False, will use snli model for prediction

	# Load InferSent model
	params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048,
					'pool_type': 'max', 'dpout_model': 0.0, 'version': V}
	model = InferSent(params_model)
	model.load_state_dict(torch.load(MODEL_PATH))
	model.set_w2v_path(PATH_TO_W2V)

	params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}
	params_senteval['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,
									 'tenacity': 3, 'epoch_size': 2}

	params_senteval['infersent'] = model.cuda()

	if not is_list:
		s1 = convert_str2lst(s1)
		s2 = convert_str2lst(s2)
	samples = s1+s2
	params_senteval['batch_size'] = min(128,len(s1))
	params_senteval = utils.dotdict(params_senteval)
	params_senteval.usepytorch  = True

	prepare(params_senteval, samples)

	emb_s1 = batcher(params_senteval, s1)
	emb_s2 = batcher(params_senteval, s2)
	if sick_model:
		testF = np.c_[ np.abs(emb_s1 - emb_s2),emb_s1 * emb_s2]
		cp = torch.load('./saved_sick.pth')
		print('[Contradiction  Neutral  Entailment]')
	else:
		testF = np.c_[emb_s1, emb_s2, emb_s1 * emb_s2, np.abs(emb_s1 - emb_s2)]
		cp = torch.load('./saved_snli_augment_ordered.pth')
		print('[ Entailment  Neutral Contradiction ]')
	inputdim = testF.shape[1]
	nclasses = 3
	clf = nn.Sequential(nn.Linear(inputdim, nclasses),).cuda()
	clf.load_state_dict(cp)

	testF = torch.FloatTensor(testF).cuda()
	out = clf(testF)
	sf = nn.Softmax(1)
	probs = sf(out)
	return probs
Пример #6
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if "usepytorch" not in params else params.usepytorch
        params.seed = 1111 if "seed" not in params else params.seed

        params.batch_size = 128 if "batch_size" not in params else params.batch_size
        params.nhid = 0 if "nhid" not in params else params.nhid
        params.kfold = 5 if "kfold" not in params else params.kfold

        if "classifier" not in params or not params["classifier"]:
            params.classifier = {"nhid": 0}

        if "nhid" not in params.classifier:
            raise ValueError(
                "Number of hidden units not set. Please set number of hidden units "
                + "in classifier config.")

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = [
            # "Length",
            # "EntityDistance",
            # "ArgumentOrder",
            # "EntityExistsBetweenHeadTail",
            # "EntityCountORGBetweenHeadTail",
            # "EntityCountPERBetweenHeadTail",
            # "EntityCountDATEBetweenHeadTail",
            # "EntityCountMISCBetweenHeadTail",
            # "EntityCountLOCBetweenHeadTail",
            # "PosTagHeadLeft",
            # "PosTagHeadRight",
            # "PosTagTailLeft",
            # "PosTagTailRight",
            # "ArgTypeHead",
            # "ArgTypeTail",
            # "TreeDepth",
            # "SDPTreeDepth",
            # "ArgumentHeadGrammaticalRole",
            # "ArgumentTailGrammaticalRole",
            # Kirk's new code
            "ArgumentAddGrammarRole_Head",
            "ArgumentAddGrammarRole_Tail",
            "ArgumentGrammarRole_ControlHead",
            "ArgumentGrammarRole_ControlTail",
        ]
Пример #7
0
    def __init__(self, params, batcher, prepare=None):
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed
        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None
        self.list_tasks = [
            'WCCRS_HOTELS', 'WCCRS_MEDICINE', 'SICKRelatedness',
            'SICKEntailment', '8TAGS'
        ]
Пример #8
0
    def __init__(self, params, batcher, prepare=None):
        # parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # batcher and prepare
        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        self.list_tasks = ['SST2', 'SST3', 'MRPC', 'ReadabilityCl', 'TagCl', 'PoemsCl', 'ProzaCl', 'TREC', 'STS', 'SICK']
Пример #9
0
    def __init__(self, params, batcher, prepare=None):
        # setting default parameters
        params = utils.dotdict(params)
        params.usepytorch = True if 'usepytorch' not in params else params.usepytorch
        params.seed = 1111 if 'seed' not in params else params.seed

        params.batch_size = 128 if 'batch_size' not in params else params.batch_size
        params.nhid = 0 if 'nhid' not in params else params.nhid
        params.kfold = 5 if 'kfold' not in params else params.kfold

        if 'classifier' not in params or not params['classifier']:
            params.classifier = {'nhid': 0}

        assert 'nhid' in params.classifier, 'Set number of hidden units in classifier config!!'

        self.params = params

        # set up bilinear projection, with learnable matrix W
        params.bilinear = False if 'bilinear' not in params else params.bilinear

        self.batcher = batcher
        self.prepare = prepare if prepare else lambda x, y: None

        # sanity check
        # assert params.classifier in ['LogReg', 'MLP']
        # if params.classifier == 'MLP':
        #     assert params.nhid > 0, 'When using an MLP, \
        #         you need to set params.nhid>0'
        # if not params.usepytorch and params.classifier == 'MLP':
        #     assert False, 'No MLP implemented in scikit-learn'

        self.list_tasks = [
            'CR', 'MR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
            'SICKRelatedness', 'SICKEntailment', 'STSBenchmark', 'SNLI',
            'ImageCaptionRetrieval', 'STS12', 'STS13', 'STS14', 'STS15',
            'STS16', 'DIS', 'PDTB', 'DAT', 'PDTB_EX', 'PDTB_IMEX', 'ABSA_CH',
            'ABSA_SP', 'STS_SP'
        ]
Пример #10
0
            for norm in [True]:
                for sim_name in [
                        'von_mises_correction_tic', 'von_mises_correction_aic'
                ]:
                    print('Similarity: {0}'.format(sim_name))
                    params_senteval = {
                        'task_path': PATH_TO_DATA,
                        'usepytorch': True,
                        'kfold': 10,
                        'padding': '.'
                    }

                    # Word2Vec Google News does not have a . embedding
                    if word_vec == 'word2vec_GN':
                        params_senteval['padding'] = 'the'

                    sim_params = dotdict({
                        'similarity': sim_name,
                        'word_vec': word_vec,
                        'word_count_path': word_count_path,
                        'norm': norm
                    })
                    params_senteval['sim_params'] = sim_params

                    se = senteval.engine.SE(params_senteval, batcher, prepare)
                    result = se.eval(transfer_tasks)
                    result_dict = {'param': dict(sim_params), 'eval': result}
                    results.append(result_dict)
                    with open(args.output_path, 'w') as f:
                        json.dump(results, f)
Пример #11
0
Файл: probe.py Проект: jwcmu/bgt
    #transfer_tasks = ['MR', 'CR', 'MPQA', 'SUBJ', 'SST2', 'SST5', 'TREC', 'MRPC',
    #                  'SICKEntailment', 'SICKRelatedness', 'STSBenchmark',
    #                  'Length', 'WordContent', 'Depth', 'TopConstituents',
    #                  'BigramShift', 'Tense', 'SubjNumber', 'ObjNumber']

    from embed import Embedder
    from fairseq import options
    import sentencepiece as spm
    from senteval import utils

    os.chdir(CODE_DIR)

    parser = options.get_generation_parser(interactive=True)
    options.add_embed_args(parser)
    args = options.parse_args_and_arch(parser)
    params = utils.dotdict(params_senteval)

    sp = spm.SentencePieceProcessor()
    sp.Load(args.sentencepiece)

    embedder = Embedder(args)

    params.batch_size = 32
    params.sp = sp
    params.embedder = embedder
    params.encoder = args.eval_encoder
    params.lang_emb = args.lang_emb

    results = {}
    s = ENLengthEval(PATH_TO_MY_DATA)
    s.do_prepare(params, prepare)