Exemplo n.º 1
0
    def init_y_annotations(self):
        """
        initialises the joint y vector with data from manually annotated abstracts
        """
        logging.info("Identifying seed data from annotated data")
        p = progressbar.ProgressBar(len(self.biviewer), timer=True)
        annotation_viewer = LabeledAbstractReader()

        counter = 0
        for study in annotation_viewer:
            study_id = int(study["Biview_id"])
            text = swap_num(
                annotation_viewer.get_biview_id(study_id)['abstract'])

            parsed_tags = tag_words(text, flatten=True)
            tagged_number = [w[0] for w in parsed_tags if 'n' in w[1]]
            if tagged_number:
                number = re.match("[Nn]?=?([1-9]+[0-9]*)", tagged_number[0])

                if number:
                    self.data["y_lookup_init"][study_id] = int(number.group(1))
                    counter += 1
                else:
                    raise TypeError(
                        'Unable to convert tagged number %s to integer',
                        tagged_number[0])

        self.seed_abstracts = counter
        logging.info("%d seed abstracts found", counter)
Exemplo n.º 2
0
    def init_y_annotations(self):
        """
        initialises the joint y vector with data from manually annotated abstracts
        """
        logging.info("Identifying seed data from annotated data")
        p = progressbar.ProgressBar(len(self.biviewer), timer=True)
        annotation_viewer = LabeledAbstractReader()

        counter = 0
        for study in annotation_viewer:
            study_id = int(study["Biview_id"])
            text = swap_num(annotation_viewer.get_biview_id(study_id)['abstract'])                

            parsed_tags = tag_words(text, flatten=True)
            tagged_number = [w[0] for w in parsed_tags if 'n' in w[1]]
            if tagged_number:
                number = re.match("[Nn]?=?([1-9]+[0-9]*)", tagged_number[0])

                if number:
                    self.data["y_lookup_init"][study_id] = int(number.group(1))
                    counter += 1
                else:
                    raise TypeError('Unable to convert tagged number %s to integer', tagged_number[0])

        self.seed_abstracts = counter
        logging.info("%d seed abstracts found", counter)
Exemplo n.º 3
0
    def __init__(self, text, window_size):

        if isinstance(text, str):
            self.text = re.sub("(?:[0-9]+)\,(?:[0-9]+)", "", text)
            self.text = swap_num(text)
            self.tag_tuple_sents = tag_words(self.text)
        elif isinstance(text, list):
            self.tag_tuple_sents = text

        self.functions = self.set_functions(self.tag_tuple_sents)

        self.w_pos_window = window_size
        self.load_templates()
Exemplo n.º 4
0
    def __init__(self, text, window_size):

        if isinstance(text, str):
            self.text = re.sub('(?:[0-9]+)\,(?:[0-9]+)', '', text)
            self.text = swap_num(text)
            self.tag_tuple_sents = tag_words(self.text)
        elif isinstance(text, list):
            self.tag_tuple_sents = text

        self.functions = self.set_functions(self.tag_tuple_sents)

        self.w_pos_window = window_size
        self.load_templates()