Exemplo n.º 1
0
 def context_texts(
     context_data_dicts: List[Dict[str, Any]]
 ) -> Tuple[List[str], List[str]]:
     '''
     :param context_data_dicts: A list of dictonaries that contains a
                                `text` and `spans` field.
     :return: A list of the left and right text contexts for all the
              dictionaries.
     '''
     # Context returns all of the left and right context occurrences
     # therefore if a target is mentioned Twice and are associated then
     # for a single text two left and right occurrences are returned.
     # Thus these are a list of lists we therefore chose only the
     # first mentioned target as the paper linked to this method does
     # not specify which they used.
     left_texts = [
         context(data, 'left', inc_target=self.include_target)
         for data in context_data_dicts
     ]
     right_texts = [
         context(data, 'right', inc_target=self.include_target)
         for data in context_data_dicts
     ]
     left_texts = [texts[0] for texts in left_texts]
     right_texts = [texts[0] for texts in right_texts]
     return left_texts, right_texts
Exemplo n.º 2
0
 def test_full_context(self):
     '''
     Tests :py:func:`bella.contexts.full_context`
     '''
     single_targets = [
         ['This is a fake news article that is to represent a Tweet!!!!'],
         ['I had a great day however I did not get much work done'],
         ['I cycled in today and it was ok as it was not raining.']
     ]
     multi_targets = [['This is a fake news article that is to represent a '\
                       'Tweet!!!! and it was an awful News Article I think.',
                       'This is a fake news article that is to represent a '\
                       'Tweet!!!! and it was an awful News Article I think.'],
                      ['I had a great Day however I did not get much '\
                       'work done in the day',
                       'I had a great Day however I did not get much '\
                       'work done in the day']]
     for index, test_context in enumerate(self.single_context):
         test_text = test_context['text']
         correct_target = single_targets[index]
         target_string = context(test_context, 'full')
         msg = 'Cannot get the target for text {}, target found {} correct {}'\
               .format(test_text, target_string, correct_target)
         self.assertEqual(correct_target, target_string, msg=msg)
     for index, test_context in enumerate(self.multi_contexts):
         test_text = test_context['text']
         correct_targets = multi_targets[index]
         target_strings = context(test_context, 'full')
         msg = 'Cannot get the targets for text {}, targets found {} correct {}'\
               .format(test_text, target_strings, correct_targets)
         self.assertEqual(correct_targets, target_strings, msg=msg)
Exemplo n.º 3
0
 def test_context(self):
     '''
     Tests :py:func:`bella.contexts._context`
     '''
     with self.assertRaises(ValueError, msg='Should only accept left, right '\
                            'or target context words for parameters'):
         context(self.single_context[0], 'itself')
Exemplo n.º 4
0
    def test_right_context(self):
        '''
        Tests :py:func:`bella.contexts.right_context`
        '''

        single_right = [[' that is to represent a Tweet!!!!'],
                        [' however I did not get much work done'],
                        [' in today and it was ok as it was not raining.']]
        for index, test_context in enumerate(self.single_context):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = single_right[index]
            right_string = context(test_context, 'right', inc_target=False)
            msg = 'Cannot get the right context of target {} text {} '\
                  'which should be {} and not {}'\
                  .format(test_target, test_text, correct_context, right_string)
            self.assertEqual(correct_context, right_string, msg=msg)
        # Handle including targets
        single_right = [
            ['news article that is to represent a Tweet!!!!'],
            ['day however I did not get much work done'],
            ['cycled in today and it was ok as it was not raining.']
        ]
        for index, test_context in enumerate(self.single_context):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = single_right[index]
            right_string = context(test_context, 'right', inc_target=True)
            msg = 'Cannot get the right context of target {} text {} including the '\
                  'target which should be {} and not {}'\
                  .format(test_target, test_text, correct_context, right_string)
            self.assertEqual(correct_context, right_string, msg=msg)

        multi_right = [[' that is to represent a Tweet!!!! and it was an awful News'\
                       ' Article I think.', ' I think.'],
                       [' however I did not get much work done in the day', '']]
        for index, test_context in enumerate(self.multi_contexts):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = multi_right[index]
            right_string = context(test_context, 'right', inc_target=False)
            msg = 'Cannot get the right context of target {} text {} which should be {}'\
                  ' and not {}'\
                  .format(test_target, test_text, correct_context, right_string)
            self.assertEqual(correct_context, right_string, msg=msg)
        # Handle including targets
        multi_right = [['news article that is to represent a Tweet!!!! and it was'\
                        ' an awful News Article I think.', 'News Article I think.'],
                       ['Day however I did not get much work done in the day', 'day']]
        for index, test_context in enumerate(self.multi_contexts):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = multi_right[index]
            right_string = context(test_context, 'right', inc_target=True)
            msg = 'Cannot get the right context of target {} text {} including the '\
                  'target which should be {} and not {}'\
                  .format(test_target, test_text, correct_context, right_string)
            self.assertEqual(correct_context, right_string, msg=msg)
Exemplo n.º 5
0
    def test_left_context(self):
        '''
        Tests :py:func:`bella.contexts.left_context`
        '''

        single_left = [['This is a fake '], ['I had a great '], ['I ']]
        for index, test_context in enumerate(self.single_context):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = single_left[index]
            left_string = context(test_context, 'left', inc_target=False)
            msg = 'Cannot get the left context of target {} text {} which should be {}'\
                  ' and not {}'.format(test_target, test_text, correct_context, left_string)
            self.assertEqual(correct_context, left_string, msg=msg)
        # Handle including targets
        single_left = [['This is a fake news article'], ['I had a great day'],
                       ['I cycled']]
        for index, test_context in enumerate(self.single_context):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = single_left[index]
            left_string = context(test_context, 'left', inc_target=True)
            msg = 'Cannot get the left context of target {} text {} including the '\
                  'target which should be {} and not {}'\
                  .format(test_target, test_text, correct_context, left_string)
            self.assertEqual(correct_context, left_string, msg=msg)

        multi_left = [['This is a fake ', 'This is a fake news article that is to'\
                       ' represent a Tweet!!!! and it was an awful '],
                      ['I had a great ', 'I had a great Day however I did not get '\
                        'much work done in the ']]
        for index, test_context in enumerate(self.multi_contexts):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = multi_left[index]
            left_string = context(test_context, 'left', inc_target=False)
            msg = 'Cannot get the left context of target {} text {} which should be {}'\
                  ' and not {}'.format(test_target, test_text, correct_context, left_string)
            self.assertEqual(correct_context, left_string, msg=msg)
        # Handle including targets
        multi_left = [['This is a fake news article', 'This is a fake news article '\
                       'that is to represent a Tweet!!!! and it was an awful News Article'],
                      ['I had a great Day', 'I had a great Day however I did not get '\
                        'much work done in the day']]
        for index, test_context in enumerate(self.multi_contexts):
            test_text = test_context['text']
            test_target = test_context['target']
            correct_context = multi_left[index]
            left_string = context(test_context, 'left', inc_target=True)
            msg = 'Cannot get the left context of target {} text {} including the '\
                  'target which should be {} and not {}'\
                  .format(test_target, test_text, correct_context, left_string)
            self.assertEqual(correct_context, left_string, msg=msg)
Exemplo n.º 6
0
def exchange_targets(target_data: Dict[str, Any], 
                     target: str, new_target_id: str) -> Dict[str, Any]:
    '''
    Given a single Target data point it will replace the target 
    text within it as well as the target field with the given 
    target.
    
    It also adds a field `augmented` = True so that it is known
    that this data point is an augmented data point.
    
    :param target_data: The Target data dict that is to have its 
                        text, spans, and target field with the 
                        given target.
    :param target: The target string to replace the exisiting 
                   target.
    :param new_target_id: A unique target identifier, this is required so that 
                          the TargetCollection that is created for data 
                          augmentation has new identifiers for each target.
                          However the original target_id that created this 
                          target will be accessible through the 
                          'original_target_id' field/key.
    :returns: The Target data dict with the target and its related 
              data replaced with the given target.
    '''
    data_copy = copy.deepcopy(target_data)
    left_context = context(data_copy, 'left')[0]
    right_context = context(data_copy, 'right')[0]
    alternative_text = left_context + target + right_context
    # Finding the span of the new target within the new text
    start = len(left_context)
    end = len(target) + start
    alternative_span = [(start, end)]
    # Changing the fields
    original_target_id = data_copy['target_id']
    # Required when we are augmenting on top of augmentation
    if 'original_target_id' in data_copy:
        original_target_id = data_copy['original_target_id']
        
    data_copy['original_target_id'] = original_target_id
    data_copy['text'] = alternative_text
    data_copy['target'] = target
    data_copy['spans'] = alternative_span
    data_copy['augmented'] = True
    data_copy['target_id'] = new_target_id
    return data_copy
Exemplo n.º 7
0
 def _json_to_instance(self, json_dict: JsonDict) -> Instance:
     """
     Expects JSON that looks like ``{"text": "...", "target": "...."}``.
     Returns that json object as an Instance based on the dataset readers 
     `text_to_instance` method.
     """
     text = json_dict["text"]
     target = json_dict["target"]
     if isinstance(self._dataset_reader, TargetDatasetReader):
         return self._dataset_reader.text_to_instance(text, target)
     # This allows the Target and TDLSTM models to use the same predictors.
     include_target = self._dataset_reader.incl_target
     left_text = context(json_dict, 'left', 
                         inc_target=include_target)[0]
     right_text = context(json_dict, 'right',
                          inc_target=include_target)[0]
     return self._dataset_reader.text_to_instance(left_text, right_text, 
                                                  target)
Exemplo n.º 8
0
    def test_target_context(self):
        '''
        Tests :py:func:`bella.contexts.target_context`
        '''
        single_targets = [['news article'], ['day'], ['cycled']]
        for index, test_context in enumerate(self.single_context):
            test_text = test_context['text']
            correct_target = single_targets[index]
            target_string = context(test_context, 'target')
            msg = 'Cannot get the target for text {}, target found {} correct {}'\
                  .format(test_text, target_string, correct_target)
            self.assertEqual(correct_target, target_string, msg=msg)

        multi_targets = [['news article', 'News Article'], ['Day', 'day']]
        for index, test_context in enumerate(self.multi_contexts):
            test_text = test_context['text']
            correct_targets = multi_targets[index]
            target_strings = context(test_context, 'target')
            msg = 'Cannot get the targets for text {}, targets found {} correct {}'\
                  .format(test_text, target_strings, correct_targets)
            self.assertEqual(correct_targets, target_strings, msg=msg)
Exemplo n.º 9
0
 def _read(self, file_path):
     print(file_path)
     with open(file_path, "r") as data_file:
         # This should be a logger
         logger.info("Reading instances from lines in file at: "
                     f"{file_path}")
         for line in data_file:
             line = line.strip("\n")
             if not line:
                 continue
             target_data = json.loads(line)
             target = target_data['target']
             left_text = context(target_data,
                                 'left',
                                 inc_target=self.incl_target)[0]
             right_text = context(target_data,
                                  'right',
                                  inc_target=self.incl_target)[0]
             epoch_numbers = None
             if 'epoch_number' in target_data:
                 epoch_numbers = list(target_data['epoch_number'])
             sentiment = self.sentiment_mapper[target_data['sentiment']]
             yield self.text_to_instance(left_text, right_text, target,
                                         sentiment, epoch_numbers)
Exemplo n.º 10
0
    def transform(self, target_dicts):
        '''
        Given a list of target dictionaries containing the spans of the targets
        and the texts that are about the targets it returns the relevant left,
        right and target contexts with respect to the target word(s). Returns a
        list of contexts.

        :param target_dicts: list of dictionaries containing at least `spans` and \
        `text` keys.
        :type target_dicts: list
        :returns: a list of left, right and target contexts with respect to the \
        target word and the values in the self.context if self.context = 'lt' will \
        only return the left and target contexts and not right.
        :rtype: list
        '''

        all_context_data = []
        for target_dict in target_dicts:
            context_data = []
            context_data.extend(
                contexts.context(target_dict, self.context, self.inc_target))
            all_context_data.append(context_data)
        return all_context_data