示例#1
0
 def get_features(self, text,  model_type='00'):
     try:
         return self.get_lexical_features(text)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error get_features: {0}'.format(e))
         return None
 def dependency_child(self, text):
     result = []
     try:
         doc = self.analysis_pipe(text.lower())
         for token in doc:
             item = {
                 'chunk': token.text,
                 'text': token.text,
                 'pos_': token.pos_,
                 'dep_': token.dep_,
                 'tag_': token.tag_,
                 'head_text': token.head.text,
                 'head_pos': token.head.pos_,
                 'children': None
             }
             if len(list(token.children)) > 0:
                 item['children'] = [{
                     'child': child,
                     'pos_': child.pos_,
                     'dep_': child.dep_,
                     'tag_': child.tag_,
                     'head.text': child.head.text,
                     'head.pos_': child.head.pos_
                 } for child in token.children]
             result.append(item)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error dependency_child: {0}'.format(e))
     return result
示例#3
0
 def transform(self, text):
     try:
         return self.get_features(text)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error transform: {0}'.format(e))
         return None
示例#4
0
 def __init__(self, lang):
     try:
         dict_lang = {'es': 'spanish', 'en': 'english'}
         self.stemmer = SnowballStemmer(dict_lang[lang])
         Token.set_extension('stem', default='', force=True)
     except Exception as e:
         Util.standard_error(sys.exc_info())
 def tagger(self, text):
     result = None
     try:
         list_tagger = []
         doc = self.analysis_pipe(text.lower())
         for token in doc:
             item = {
                 'text': token.text,
                 'lemma': token.lemma_,
                 'stem': token._.stem,
                 'pos': token.pos_,
                 'tag': token.tag_,
                 'dep': token.dep_,
                 'shape': token.shape_,
                 'is_alpha': token.is_alpha,
                 'is_stop': token.is_stop,
                 'is_digit': token.is_digit,
                 'is_punct': token.is_punct
             }
             list_tagger.append(item)
         result = list_tagger
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error tagger: {0}'.format(e))
     return result
示例#6
0
    def train(self, file_output='predictive_sentiment', iteration=10, fold=10):
        try:
            result = {}
            best_model = None
            best_classifier = None
            best_f1 = 0.0
            date_file = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
            file_report = '{0}_Fold{1}_Iteration{2}_report_{3}.csv'.format(file_output, fold, iteration, date_file)
            output = DIR_OUTPUT + file_report
            label = preprocessing.LabelEncoder()
            x_train, x_test, y_train, y_test = Util.import_dataset()

            with open(output, 'w') as out_csv:
                writer = csv.DictWriter(out_csv, fieldnames=fieldnames, delimiter=';', lineterminator='\n')
                headers = dict((n, n) for n in fieldnames)
                writer.writerow(headers)

                for model_name, value in list_model.items():
                    print('{0}| Start Model: {1}|{0}'.format("#" * 15, model_name))
                    # data train
                    print('Get train features')
                    x_train = [self.fex.get_features(text=text, model_type=value) for text in tqdm(x_train)]
                    x_train = preprocessing.normalize(x_train)
                    y_train = label.fit_transform(y_train)

                    # data test
                    print('Get test features')
                    x_test = [self.fex.get_features(text=text, model_type=value) for text in tqdm(x_test)]
                    x_test = preprocessing.normalize(x_test)
                    y_test = label.fit_transform(y_test)

                    # crear una función que reciba por parametro el modelo(algoritmo de clasificación)
                    # x_train, y_train, x_test, y_test
                    data_result = {}

                    [writer.writerow(model_i) for model_i in data_result]
                    out_csv.flush()
                    print('Model {0} save successful!'.format(model_name))

                    for row in data_result:
                        f1_j = float(row['f1'])
                        classifier = row['classifier']
                        if f1_j > best_f1:
                            best_f1 = f1_j
                            best_model = row['model_name']
                            best_classifier = row['classifier_name']
                            # save model
                            file_model = '{0}{1}_model.sav'.format(DIR_MODELS, file_output)
                            outfile = open(file_model, 'wb')
                            pickle.dump(classifier, outfile)
                            outfile.close()
                            print('Model exported in {0}'.format(file_model))
                out_csv.close()
                print('{0}| End Model: {1}|{0}'.format("#" * 15, model_name))
            print('The best model is {0}, {1} with F1 score = {2}'.format(best_model, best_classifier, best_f1))
        except Exception as e:
            Util.standard_error(sys.exc_info())
            print('Error train: {0}'.format(e))
            return None
 def get_chunks(self, text):
     try:
         doc = self.analysis_pipe(text)
         return [chunk.text for chunk in doc.noun_chunks]
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error get_chunks: {0}'.format(e))
         return None
示例#8
0
 def __call__(self, doc):
     try:
         for token in doc:
             if not token.is_punct and not token.is_stop and not token.is_digit:
                 token._.set('stem', self.stemmer.stem(token.text))
         return doc
     except Exception as e:
         Util.standard_error(sys.exc_info())
 def analysis_pipe(self, text):
     result = None
     try:
         result = self.nlp(text)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error analysis_pipe: {0}'.format(e))
     return result
 def sentence_detection(self, text):
     result = []
     try:
         doc = self.analysis_pipe(text)
         result = [sent.string.strip() for sent in doc.sents]
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error sentence_detection: {0}'.format(e))
     return result
 def proper_encoding(text):
     result = None
     try:
         text = unicodedata.normalize('NFD', text)
         text = text.encode('ascii', 'ignore')
         result = text.decode("utf-8")
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error proper_encoding: {0}'.format(e))
     return result
示例#12
0
 def __init__(self):
     """
     :rtype: object
     :return: Machine learning object
     """
     try:
         print('Load Machine Learning....')
         self.fex = FeatureExtraction(text_analysis=None, lang='es')
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error constructor: {0}'.format(e))
 def delete_special_patterns(self, text):
     result = None
     try:
         text = re.sub(r'\©|\×|\⇔|\_|\»|\«|\~|\#|\$|\€|\Â|\�|\¬', ' ', text)# Elimina caracteres especilaes
         text = re.sub(r'\,|\;|\:|\!|\¡|\’|\‘|\”|\“|\"|\'|\`', ' ', text)# Elimina puntuaciones
         text = re.sub(r'\}|\{|\[|\]|\(|\)|\<|\>|\?|\¿|\°|\|', ' ', text)  # Elimina parentesis
         text = re.sub(r'\/|\-|\+|\*|\=|\^|\%|\&|\$', ' ', text)  # Elimina operadores
         result = text.lower()
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error delete_special_patterns: {0}'.format(e))
     return result
 def language_detector(self, text):
     result = None
     try:
         doc = self.analysis_pipe(text.lower())
         for sent in doc.sents:
             if sent._.language['score'] > 0.8:
                 result = sent._.language['language']
                 break
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error language_detector: {0}'.format(e))
     return result
 def dependency(self, text):
     result = []
     try:
         doc = self.analysis_pipe(text.lower())
         doc_chunks = list(doc.noun_chunks)
         for chunk in doc_chunks:
             item = {'chunk': chunk, 'text': chunk.text,
                     'root_text': chunk.root.text, 'root_dep': chunk.root.dep_}
             result.append(item)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error dependency: {0}'.format(e))
     return result
 def load_sapcy(self, lang):
     result = None
     try:
         stemmer_text = Steaming(lang)  # initialise component
         result = spacy.load('es_core_news_md') if lang == 'es' else spacy.load('en_core_web_md')
         emoji = Emoji(result)
         result.add_pipe(emoji, first=True)
         result.add_pipe(stemmer_text, after='parser', name='stemmer')
         print('Language: {0}\nText Analysis: {1}'.format(lang, result.pipe_names))
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error load_sapcy: {0}'.format(e))
     return result
示例#17
0
 def weighted_position(tokens_text):
     result = None
     try:
         size = len(tokens_text)
         weighted_words = 0.0
         weighted_normalized = 0.0
         for w in tokens_text:
             weighted_words += 1 / (1 + tokens_text.index(w))
             weighted_normalized += (1 + tokens_text.index(w)) / size
         result = (weighted_words, weighted_normalized)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error weighted_position: {0}'.format(e))
     return result
    def clean_text(self, text, **kwargs):
        result = None
        try:
            labels = ['EMAIL', 'EMOJI', 'MENTION', 'HASHTAG', 'URL']
            url = kwargs.get('url') if type(
                kwargs.get('url')) is bool else False
            mention = kwargs.get('mention') if type(
                kwargs.get('mention')) is bool else False
            emoji = kwargs.get('emoji') if type(
                kwargs.get('emoji')) is bool else False
            hashtag = kwargs.get('hashtag') if type(
                kwargs.get('hashtag')) is bool else False
            relabel = kwargs.get('relabel') if type(
                kwargs.get('relabel')) is bool else False
            stopwords = kwargs.get('stopwords') if type(
                kwargs.get('stopwords')) is bool else False

            text_out = str(text).lower()
            text_out = re.sub(
                r'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)',
                '[EMAIL]', text_out)
            text_out = re.sub("[\U0001f000-\U000e007f]", '[EMOJI]',
                              text_out) if emoji else text_out
            text_out = re.sub(
                r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                '[URL]', text_out) if url else text_out
            text_out = re.sub("@([A-Za-z0-9_]{1,40})", '[MENTION]',
                              text_out) if mention else text_out
            text_out = re.sub("#([A-Za-z0-9_]{1,40})", '[HASHTAG]',
                              text_out) if hashtag else text_out
            text_out = re.sub("[0-9]", '', text_out)

            if not relabel:
                for label in labels:
                    text_out = re.sub(r'\[' + label + r'\]', ' ',
                                      text_out) if mention else text_out

            text_out = self.delete_special_patterns(text_out)
            text_out = self.stopwords(text_out) if stopwords else text_out
            # removing any single letter on a string
            text_out = re.sub(r'((?<=^)|(?<= )).((?=$)|(?= ))', ' ',
                              text_out).strip()
            # condense multiple spaces with a single space
            text_out = re.sub(r'\s+', ' ', text_out).strip()
            text_out = text_out.rstrip()
            result = text_out if text_out != ' ' else None
        except Exception as e:
            Util.standard_error(sys.exc_info())
            print('Error clean_text: {0}'.format(e))
        return result
示例#19
0
 def lexical_diversity(text):
     result = None
     try:
         text_out = re.sub(r"[\U00010000-\U0010ffff]", '', text)
         text_out = re.sub(
             r'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+'
             r'|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))',
             '', text_out)
         text_out = text_out.lower()
         result = round((len(set(text_out)) / len(text_out)), 4)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error lexical_diversity: {0}'.format(e))
     return result
 def stopwords(self, text):
     try:
         nlp = Spanish() if self.lang == 'es' else English()
         doc = nlp(text)
         token_list = [token.text for token in doc]
         sentence = []
         for word in token_list:
             lexeme = nlp.vocab[word]
             if not lexeme.is_stop:
                 sentence.append(word)
         return ' '.join(sentence)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error stopwords: {0}'.format(e))
         return None
 def dependency_all(self, text):
     result = []
     try:
         doc = self.analysis_pipe(text.lower())
         for chunk in doc.noun_chunks:
             item = {'chunk': chunk, 'text': chunk.root.text, 'pos_': chunk.root.pos_, 'dep_': chunk.root.dep_,
                     'tag_': chunk.root.tag_, 'lemma_': chunk.root.lemma_, 'is_stop': chunk.root.is_stop,
                     'is_punct': chunk.root.is_punct, 'head_text': chunk.root.head.text,
                     'head_pos': chunk.root.head.pos_,
                     'children': [{'child': child, 'pos_': child.pos_, 'dep_': child.dep_,
                                   'tag_': child.tag_, 'lemma_': child.lemma_, 'is_stop': child.is_stop,
                                   'is_punct': child.is_punct, 'head.text': child.head.text,
                                   'head.pos_': child.head.pos_} for child in chunk.root.children]}
             result.append(item)
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error dependency_all: {0}'.format(e))
     return result
示例#22
0
 def pos_frequency(self, text):
     dict_token = {'NOUN': 0, 'VERB': 0, 'ADJ': 0, 'ANOTHER': 0}
     try:
         doc = self.ta.tagger(text)
         for token in doc:
             if token['pos'] == 'NOUN':
                 value = dict_token['NOUN']
                 dict_token['NOUN'] = value + 1
             elif token['pos'] == 'VERB':
                 value = dict_token['VERB']
                 dict_token['VERB'] = value + 1
             elif token['pos'] == 'ADJ':
                 value = dict_token['ADJ']
                 dict_token['ADJ'] = value + 1
             else:
                 value = dict_token['ANOTHER']
                 dict_token['ANOTHER'] = value + 1
     except Exception as e:
         Util.standard_error(sys.exc_info())
         print('Error pos_frequency: {0}'.format(e))
     return dict_token
示例#23
0
class TestMessage(unittest.TestCase):
    def setUp(self):
        self.util = Util()

    @parameterized.expand([
        [{'A1': ActivityDiagramElement(name='A1', element_type=Util().START_NODE),
          'A2': ActivityDiagramElement(name='A2', element_type=Util().TRANSITION_NODE)}],
        [{'B1':ActivityDiagramElement(name='B1', element_type=Util().START_NODE), \
          'B2': ActivityDiagramElement(name='B2', element_type=Util().DECISION_NODE), \
          'B3': ActivityDiagramElement(name='B3', element_type=Util().MERGE_NODE)}],
        [{'C1': ActivityDiagramElement(name='C1', element_type=Util().START_NODE)}],
    ])
    def test_check_start_node_existence_true(self, nodes):
        response = self.util.check_start_node_existence(nodes, None)
        self.assertTrue(response)
    
    @parameterized.expand([
        [{'A1': ActivityDiagramElement(name='A1', element_type=Util().END_NODE), \
          'A2': ActivityDiagramElement(name='A2', element_type=Util().TRANSITION_NODE)}],
        [{'B1':ActivityDiagramElement(name='B1', element_type=Util().MERGE_NODE), \
         'B2': ActivityDiagramElement(name='B2', element_type=Util().DECISION_NODE), \
         'B3': ActivityDiagramElement(name='B3', element_type=Util().MERGE_NODE)}],
        [{'C1': ActivityDiagramElement(name='C1', element_type=Util().ACTIVITY_NODE)}],
    ])
    def test_check_start_node_existence_false(self, nodes):
        with self.assertRaises(OrderError):
          response = self.util.check_start_node_existence(nodes, None)

    @parameterized.expand([
        [{'A1': ActivityDiagramElement(name='A1', element_type=Util().START_NODE), \
          'A2': ActivityDiagramElement(name='A2', element_type=Util().DECISION_NODE)}],
        [{'B1':ActivityDiagramElement(name='B1', element_type=Util().START_NODE), \
         'B2': ActivityDiagramElement(name='B2', element_type=Util().DECISION_NODE), \
         'B3': ActivityDiagramElement(name='B3', element_type=Util().DECISION_NODE)}],
        [{'C1': ActivityDiagramElement(name='C1', element_type=Util().DECISION_NODE)}],
    ])
    def test_check_join_possibility(self, nodes):
        response = self.util.check_join_possibility(nodes)
        self.assertTrue(response)
    
    @parameterized.expand([
        [{'A1': ActivityDiagramElement(name='A1', element_type=Util().START_NODE), \
          'A2': ActivityDiagramElement(name='A2', element_type=Util().MERGE_NODE)}],
        [{'B1':ActivityDiagramElement(name='B1', element_type=Util().START_NODE), \
         'B2': ActivityDiagramElement(name='B2', element_type=Util().MERGE_NODE), \
         'B3': ActivityDiagramElement(name='B3', element_type=Util().MERGE_NODE)}],
        [{'C1': ActivityDiagramElement(name='C1', element_type=Util().START_NODE)}],
    ])
    def test_check_join_possibility_false(self, nodes):
        with self.assertRaises(OrderError):
          response = self.util.check_join_possibility(nodes)

    @parameterized.expand([
        [False, False, False, ('<ActivityDiagram name="ActivityDiagram1">\n'
                                '    <ActivityDiagramElements>\n'
                                '        <StartNode name="StartNode1"/>\n'
                                '        <DecisionNode name="DecisionNode1"/>\n'
                                '        <MergeNode name="MergeNode1"/>\n'
                                '        <EndNode name="EndNode1"/>\n'
                                '    </ActivityDiagramElements>\n'
                                '    <ActivityDiagramTransitions>\n'
                                '        <Transition name="FirstTr" prob="0.5"/>\n'
                                '        <Transition name="Tr2" prob="0.5"/>\n'
                                '        <Transition name="LastTr" prob="0.5"/>\n'
                                '    </ActivityDiagramTransitions>\n'
                                '</ActivityDiagram>\n')],
        [True, False, False, ('<ActivityDiagram name="ActivityDiagram1">\n'
                                '    <ActivityDiagramElements>\n'
                                '        <StartNode name="StartNode1"/>\n'
                                '        <DecisionNode name="DecisionNode1"/>\n'
                                '        <MergeNode name="MergeNode1"/>\n'
                                '        <Activity name="ActivityNode1"/>\n'
                                '        <EndNode name="EndNode1"/>\n'
                                '    </ActivityDiagramElements>\n'
                                '    <ActivityDiagramTransitions>\n'
                                '        <Transition name="FirstTr" prob="0.5"/>\n'
                                '        <Transition name="Tr2" prob="0.5"/>\n'
                                '        <Transition name="Tr3" prob="0.5"/>\n'
                                '        <Transition name="LastTr" prob="0.5"/>\n'
                                '    </ActivityDiagramTransitions>\n'
                                '</ActivityDiagram>\n'
                                '<SequenceDiagrams>\n'
                                '    <Lifelines>\n'
                                '        <Lifeline name="LifeLineX"/>\n'
                                '    </Lifelines>\n'
                                '</SequenceDiagrams>\n')],
        [True, True, False, ('<ActivityDiagram name="ActivityDiagram1">\n'
                                '    <ActivityDiagramElements>\n'
                                '        <StartNode name="StartNode1"/>\n'
                                '        <DecisionNode name="DecisionNode1"/>\n'
                                '        <MergeNode name="MergeNode1"/>\n'
                                '        <Activity name="ActivityNode1"/>\n'
                                '        <EndNode name="EndNode1"/>\n'
                                '    </ActivityDiagramElements>\n'
                                '    <ActivityDiagramTransitions>\n'
                                '        <Transition name="FirstTr" prob="0.5"/>\n'
                                '        <Transition name="Tr2" prob="0.5"/>\n'
                                '        <Transition name="Tr3" prob="0.5"/>\n'
                                '        <Transition name="LastTr" prob="0.5"/>\n'
                                '    </ActivityDiagramTransitions>\n'
                                '</ActivityDiagram>\n'
                                '<SequenceDiagrams>\n'
                                '    <Lifelines>\n'
                                '        <Lifeline name="LifeLineX"/>\n'
                                '    </Lifelines>\n'
                                '    <Fragments>\n'
                                '        <Optional name="Fragment1" representedBy="SequenceDiagram"/>\n'
                                '    </Fragments>\n'
                                '    <SequenceDiagram name="SequenceDiagram">\n'
                                '        <Message name="MensagemX" prob="0.5" source="LifeLineX" target="LifeLineX"/>\n'
                                '        <Fragment name="Fragment1"/>\n'
                                '    </SequenceDiagram>\n'
                                '</SequenceDiagrams>\n')],
        [True, True, True, ('<ActivityDiagram name="ActivityDiagram1">\n'
                                '    <ActivityDiagramElements>\n'
                                '        <StartNode name="StartNode1"/>\n'
                                '        <DecisionNode name="DecisionNode1"/>\n'
                                '        <MergeNode name="MergeNode1"/>\n'
                                '        <Activity name="ActivityNode1"/>\n'
                                '        <EndNode name="EndNode1"/>\n'
                                '    </ActivityDiagramElements>\n'
                                '    <ActivityDiagramTransitions>\n'
                                '        <Transition name="FirstTr" prob="0.5"/>\n'
                                '        <Transition name="Tr2" prob="0.5"/>\n'
                                '        <Transition name="Tr3" prob="0.5"/>\n'
                                '        <Transition name="LastTr" prob="0.5"/>\n'
                                '    </ActivityDiagramTransitions>\n'
                                '</ActivityDiagram>\n'
                                '<SequenceDiagrams>\n'
                                '    <Lifelines>\n'
                                '        <Lifeline name="LifeLineX"/>\n'
                                '    </Lifelines>\n'
                                '    <Fragments>\n'
                                '        <Optional name="Fragment1" representedBy="SequenceDiagram"/>\n'
                                '        <Optional name="Fragment2" representedBy="SequenceDiagram"/>\n'
                                '    </Fragments>\n'
                                '    <SequenceDiagram name="SequenceDiagram">\n'
                                '        <Message name="MensagemX" prob="0.5" source="LifeLineX" target="LifeLineX"/>\n'
                                '        <Fragment name="Fragment1"/>\n'
                                '    </SequenceDiagram>\n'
                                '    <SequenceDiagram name="SequenceDiagram">\n'
                                '        <Message name="MensagemX" prob="0.5" source="LifeLineX" target="LifeLineX"/>\n'
                                '        <Fragment name="Fragment2"/>\n'
                                '    </SequenceDiagram>\n'
                                '</SequenceDiagrams>\n')],
    ])
    def test_generate_diagram(self, has_activity, has_fragment,
                            has_multiple_fragments, result_diagram):
        activity_diagram = self.mock_activity_diagram(has_activity, has_fragment,
                                                    has_multiple_fragments)
        self.util.generate_diagram(activity_diagram)
        diagram_file = open(f'xmls/{activity_diagram.name}.xml', 'r')
        diagram_from_file = diagram_file.read()
        self.assertEqual(diagram_from_file, result_diagram)
        diagram_file.close()
        os.remove(f'xmls/{activity_diagram.name}.xml')


    def mock_activity_diagram(self, has_activity=False, has_fragment=False, has_multiple_fragments=False):
        activity_diagram = ActivityDiagram(name='ActivityDiagram1')
        # Start Node
        start_node = ActivityDiagramElement(name='StartNode1', element_type=self.util.START_NODE)
        activity_diagram.set_elements(start_node)
        activity_diagram.set_start_node(start_node)
        # Decision Node        
        decision_node = ActivityDiagramElement(name='DecisionNode1',
                                                element_type=self.util.DECISION_NODE)
        activity_diagram.set_elements(decision_node)
        activity_diagram.set_transitions(self.create_transition(name='FirstTr', source_node=start_node,
                                                          target_node=decision_node))
        # Merge Node
        merge_node = ActivityDiagramElement(name='MergeNode1', element_type=self.util.MERGE_NODE)
        activity_diagram.set_elements(merge_node)
        activity_diagram.set_transitions(self.create_transition(name='Tr2', source_node=decision_node,
                                                          target_node=merge_node))
        last_node = merge_node
        if has_activity:
            activity_node = ActivityDiagramElement(name='ActivityNode1', element_type=self.util.ACTIVITY_NODE)
            activity_diagram.set_elements(activity_node)
            activity_diagram.set_transitions(self.create_transition(name='Tr3',
                                                                    source_node=merge_node,
                                                                    target_node=activity_node))
            last_node = activity_node
            sequence_diagram = self.mock_sequence_diagram(has_fragment, has_multiple_fragments)
            activity_diagram.set_sequence_diagrams(sequence_diagram)
      
        # EndNode
        end_node = ActivityDiagramElement(name='EndNode1', element_type=self.util.END_NODE)
        activity_diagram.set_elements(end_node)
        activity_diagram.set_transitions(self.create_transition(name='LastTr', source_node=last_node,
                                                          target_node=end_node))
        return activity_diagram

    def mock_sequence_diagram(self, has_fragment, has_multiple_fragments):
        sequence_diagram = SequenceDiagram(name='SequenceDiagram', guard_condition=True)
        
        # LifeLines
        lifelines = {0 : Lifeline(id=0, name='LifeLineX')}
        sequence_diagram.set_life_lines(lifelines)
        
        # Message
        message = Message(name='MensagemX', source=lifelines[0],
                      target=lifelines[0], prob=0.5,
                      message_type='Synchronous')
        sequence_diagram.set_messages(message)

        if has_fragment:
            fragment = Fragment(name='Fragment1',
                                represented_by=sequence_diagram.name,
                                sequence_diagram=sequence_diagram)
            sequence_diagram.set_fragments(fragment)
        if has_multiple_fragments:
            fragment = Fragment(name='Fragment2',
                                represented_by=sequence_diagram.name,
                                sequence_diagram=self.mock_sequence_diagram(False, False))
            sequence_diagram.set_fragments(fragment)
        
        if has_fragment:
            sequence_diagram.get_fragments()[0].sequence_diagram = sequence_diagram
        
        return sequence_diagram

    def create_transition(self, name, source_node, target_node):
        return Transition(name=name,
                          prob=0.5,
                          source=source_node, 
                          target=target_node, 
                          element_type=self.util.TRANSITION_NODE)
示例#24
0
 def setUp(self):
     self.util = Util()
示例#25
0
from utils.utils import Util
util = Util()


class SequenceDiagramElement():
    def __init__(self, nome=''):
        self.nome = nome

    def __eq__(self, sequence_diagram_element):  # pragma: no cover
        return self.nome == sequence_diagram_element.nome

    def __str__(self):  # pragma: no cover
        return 'nome: {}\n'.format(self.nome)

    def set_nome(self, nome):
        self.nome = nome

    def get_nome(self):
        return self.nome

    def dispose(self):
        self.nome = ""


class Fragment(SequenceDiagramElement):
    def __init__(self, nome='', represented_by=None):
        super().__init__(nome)
        self.represented_by = represented_by

    def __eq__(self, fragment):  # pragma: no cover
        return self.nome == fragment.nome and \
    def syntax_patterns(self, text):
        result = None
        try:
            doc = self.nlp(text)
            dict_noun = {}
            dict_verb = {}
            dict_adv = {}
            dict_adj = {}
            for span in doc.sents:
                result_dependency = self.dependency_all(str(span))
                for item in result_dependency:
                    if item['is_stop'] is not True and item['is_punct'] is not True and item['pos_'] not in 'PRON':
                        if item['pos_'] == 'NOUN':
                            # NOUN
                            chunk = str(item['chunk']).lower()
                            chunk_value = [chunk, item['pos_']]
                            dict_noun[chunk] = chunk_value
                            # Chinking
                            for child in item['children']:
                                if child['pos_'] == 'ADJ':
                                    # ADJ + NOUN
                                    chunk = str(child['child']).lower() + ' ' + str(item['chunk']).lower()
                                    chunk_value = [[str(child['child']).lower(), child['pos_']],
                                                   [str(item['chunk']).lower(), item['pos_']]]
                                    dict_noun[chunk] = chunk_value
                                    dict_adj[chunk] = chunk_value

                                elif child['pos_'] == 'ADP':
                                    # ADP + NOUN
                                    chunk = str(child['child']).lower() + ' ' + str(item['chunk']).lower()
                                    chunk_value = [[str(child['child']).lower(), child['pos_']],
                                                   [str(item['chunk']).lower(), item['pos_']]]
                                    dict_noun[chunk] = chunk_value

                        elif item['pos_'] in ['PRON', 'PROPN']:
                            for child in item['children']:
                                if child['pos_'] == 'NOUN':
                                    # PRON | PROPN + NOUN
                                    chunk = str(item['chunk']).lower() + ' ' + str(child['child']).lower()
                                    chunk_value = [[str(item['chunk']).lower(), item['pos_']],
                                                   [str(child['child']).lower(), child['pos_']]]
                                    dict_noun[chunk] = chunk_value

                        elif item['pos_'] == 'ADJ':
                            # ADJ
                            chunk = str(item['chunk']).lower()
                            chunk_value = [chunk, item['pos_']]
                            dict_adj[chunk] = chunk_value
                            for child in item['children']:
                                if child['pos_'] == 'NOUN':
                                    # ADJ + NOUN
                                    chunk = str(item['chunk']).lower() + ' ' + str(child['child']).lower()
                                    chunk_value = [[str(item['chunk']).lower(), item['pos_']],
                                                   [str(child['child']).lower(), child['pos_']]]
                                    dict_adj[chunk] = chunk_value

                        if item['dep_'] is not ['ROOT']:
                            if item['head_pos'] == 'NOUN':
                                for child in item['children']:
                                    if child['pos_'] == 'ADP':
                                        # NOUN + ADP + NOUN
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(child['child']).lower() + ' ' + \
                                                str(item['chunk']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(child['child']).lower(), child['pos_']],
                                                       [str(item['chunk']).lower(), item['pos_']]]
                                        dict_noun[chunk] = chunk_value

                            elif item['head_pos'] == 'ADJ':
                                for child in item['children']:
                                    if child['pos_'] == 'ADJ':
                                        # ADJ + ADJ + NOUN
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(child['child']).lower() + ' ' + \
                                                str(item['chunk']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(child['child']).lower(), child['pos_']],
                                                       [str(item['chunk']).lower(), item['pos_']]]
                                        dict_noun[chunk] = chunk_value
                                        dict_adj[chunk] = chunk_value

                            elif item['head_pos'] == 'VERB':
                                for child in item['children']:
                                    if child['pos_'] == 'ADJ':
                                        # VERB + NOUN + ADJ
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(item['chunk']).lower() + ' ' + \
                                                str(child['child']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(item['chunk']).lower(), item['pos_']],
                                                       [str(child['child']).lower(), child['pos_']]]
                                        dict_verb[chunk] = chunk_value

                                    elif child['pos_'] == 'ADP':
                                        # VERB + ADP + NOUN
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(child['child']).lower() + ' ' + \
                                                str(item['chunk']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(child['child']).lower(), child['pos_']],
                                                       [str(item['chunk']).lower(), item['pos_']]]
                                        dict_verb[chunk] = chunk_value

                            elif str(item['head_pos']) == 'ADV':
                                for child in item['children']:
                                    if child['pos_'] == 'ADV':
                                        # ADV + ADV + NOUN
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(child['child']).lower() + ' ' + \
                                                str(item['chunk']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(child['child']).lower(), child['pos_']],
                                                       [str(item['chunk']).lower(), item['pos_']]]
                                        dict_adv[chunk] = chunk_value

                                    elif child['pos_'] == 'ADJ':
                                        # ADV + NOUN + ADV
                                        chunk = str(item['head_text']).lower() + ' ' + \
                                                str(item['chunk']).lower() + ' ' + \
                                                str(child['child']).lower()
                                        chunk_value = [[str(item['head_text']).lower(), item['head_pos']],
                                                       [str(item['chunk']).lower(), item['pos_']],
                                                       [str(child['child']).lower(), child['pos_']]]
                                        dict_adv[chunk] = chunk_value

            dict_chunk = {'NOUN': dict_noun, 'VERB': dict_verb, 'ADV': dict_adv, 'ADJ': dict_adj}
            result = dict_chunk
        except Exception as e:
            Util.standard_error(sys.exc_info())
            print('Error syntax_patterns: {0}'.format(e))
        return result
示例#27
0
    def get_lexical_features(self, text):
        try:
            setting = {'url': True, 'mention': True, 'emoji': True,
                       'hashtag': True, 'stopwords': False, 'relabel': True}
            text_tokenizer = TweetTokenizer()
            tags = ('mention', 'url', 'hashtag', 'emoji', 'rt', 'numero', 'nombre', 'apellido')
            vector = dict()
            vector['lexical_diversity'] = self.lexical_diversity(text)

            text = self.ta.clean_text(text, **setting)
            tokens_text = text_tokenizer.tokenize(text)

            vector['weighted_position'], vector['weighted_normalized'] = self.weighted_position(tokens_text)

            vector['label_mention'] = float(sum(1 for word in tokens_text if word == 'mention'))
            vector['label_url'] = float(sum(1 for word in tokens_text if word == 'url'))
            vector['label_hashtag'] = float(sum(1 for word in tokens_text if word == 'hashtag'))
            vector['label_emoji'] = float(sum(1 for word in tokens_text if word == 'emoji'))
            vector['label_retweets'] = float(sum(1 for word in tokens_text if word == 'rt'))

            label_word = vector['label_mention'] + vector['label_url'] + vector['label_hashtag']
            label_word = label_word + vector['label_emoji'] + vector['label_retweets']
            vector['label_word'] = float(len(tokens_text) - label_word)

            vector['first_person_singular'] = float(
                sum(1 for word in tokens_text if word in lexical['first_person_singular']))
            vector['second_person_singular'] = float(
                sum(1 for word in tokens_text if word in lexical['second_person_singular']))
            vector['third_person_singular'] = float(
                sum(1 for word in tokens_text if word in lexical['third_person_singular']))
            vector['first_person_plurar'] = float(
                sum(1 for word in tokens_text if word in lexical['first_person_plurar']))
            vector['second_person_plurar'] = float(
                sum(1 for word in tokens_text if word in lexical['second_person_plurar']))
            vector['third_person_plurar'] = float(
                sum(1 for word in tokens_text if word in lexical['third_person_plurar']))

            vector['avg_word'] = np.nanmean([len(word) for word in tokens_text if word not in tags])
            vector['avg_word'] = vector['avg_word'] if not np.isnan(vector['avg_word']) else 0.0
            vector['avg_word'] = round(vector['avg_word'], 4)

            vector['kur_word'] = kurtosis([len(word) for word in tokens_text if word not in tags])
            vector['kur_word'] = vector['kur_word'] if not np.isnan(vector['kur_word']) else 0.0
            vector['kur_word'] = round(vector['kur_word'], 4)

            vector['skew_word'] = skew(np.array([len(word) for word in tokens_text if word not in tags]))
            vector['skew_word'] = vector['skew_word'] if not np.isnan(vector['skew_word']) else 0.0
            vector['skew_word'] = round(vector['skew_word'], 4)

            # adverbios
            vector['adverb_neg'] = sum(1 for word in tokens_text if word in lexical['adverb_neg'])
            vector['adverb_neg'] = float(vector['adverb_neg'])

            vector['adverb_time'] = sum(1 for word in tokens_text if word in lexical['adverb_time'])
            vector['adverb_time'] = float(vector['adverb_time'])

            vector['adverb_place'] = sum(1 for word in tokens_text if word in lexical['adverb_place'])
            vector['adverb_place'] = float(vector['adverb_place'])

            vector['adverb_mode'] = sum(1 for word in tokens_text if word in lexical['adverb_mode'])
            vector['adverb_mode'] = float(vector['adverb_mode'])

            vector['adverb_cant'] = sum(1 for word in tokens_text if word in lexical['adverb_cant'])
            vector['adverb_cant'] = float(vector['adverb_cant'])

            vector['adverb_all'] = float(vector['adverb_neg'] + vector['adverb_time'] + vector['adverb_place'])
            vector['adverb_all'] = float(vector['adverb_all'] + vector['adverb_mode'] + vector['adverb_cant'])

            vector['adjetives_neg'] = sum(1 for word in tokens_text if word in lexical['adjetives_neg'])
            vector['adjetives_neg'] = float(vector['adjetives_neg'])

            vector['adjetives_pos'] = sum(1 for word in tokens_text if word in lexical['adjetives_pos'])
            vector['adjetives_pos'] = float(vector['adjetives_pos'])

            vector['who_general'] = sum(1 for word in tokens_text if word in lexical['who_general'])
            vector['who_general'] = float(vector['who_general'])

            vector['who_male'] = sum(1 for word in tokens_text if word in lexical['who_male'])
            vector['who_male'] = float(vector['who_male'])

            vector['who_female'] = sum(1 for word in tokens_text if word in lexical['who_female'])
            vector['who_female'] = float(vector['who_female'])

            vector['noun'] = self.pos_frequency(text)['NOUN'] * 0.8
            vector['verb'] = self.pos_frequency(text)['VERB'] * 0.5
            vector['adj'] = self.pos_frequency(text)['ADJ'] * 0.4
            vector['pos_others'] = self.pos_frequency(text)['ANOTHER'] * 0.1

            return np.array(list(vector.values()))
        except Exception as e:
            Util.standard_error(sys.exc_info())
            print('Error get_lexical_features: {0}'.format(e))
            return None