Exemplo n.º 1
0
    def _to_df(self, result):
        response = result._data
        data = []

        # One row/object for all document-level features
        document_data = {}

        if 'extractDocumentSentiment' in self.features:
            sentiment = response['documentSentiment']
            document_data.update(flatten_dict(sentiment, 'sentiment'))

            # Sentence level sentiment
            for sentence in response.get('sentences', []):
                sentence_data = self._get_span(sentence)
                sentiment = sentence['sentiment']
                sentence_data.update(flatten_dict(sentiment, 'sentiment'))
                data.append(sentence_data)

        for category in response.get('categories'):
            key = 'category_%s' % category['name']
            document_data[key] = category['confidence']

        # Include only if there are document-level features
        if document_data:
            data.append(document_data)

        # Entity-level features
        for entity in response.get('entities', []):
            entity_copy = entity.copy()
            mentions = entity_copy.pop('mentions', [])
            entity_copy.pop('name', None)
            entity_copy = flatten_dict(entity_copy)

            for m in mentions:
                entity_data = self._get_span(m)
                entity_data.update(entity_copy)
                # Overwrite top-level sentiment with mention-level
                sentiment = m.get('sentiment', {})
                entity_data.update(flatten_dict(sentiment, 'sentiment'))
                data.append(entity_data)

        # Token-level syntax features
        for token in response.get('tokens', []):
            token_data = self._get_span(token)
            token_data['lemma'] = token['lemma']
            token_data.update(token['partOfSpeech'])
            dependency = flatten_dict(token['dependencyEdge'], 'dependency')
            token_data.update(dependency)
            data.append(token_data)

        df = pd.DataFrame(data)
        df['language'] = response['language']
        return df
Exemplo n.º 2
0
def test_flatten_dict():
    d = { 'a' : 5, 'b' : { 'c' : 6, 'd' : 1 } }
    res = flatten_dict(d)
    assert res == { 'a' : 5, 'b_c' : 6, 'b_d' : 1}
    res = flatten_dict(d, 'prefix', '.')
    assert res == { 'prefix.a' : 5, 'prefix.b.c' : 6, 'prefix.b.d' : 1}
Exemplo n.º 3
0
def test_flatten_dict():
    d = { 'a' : 5, 'b' : { 'c' : 6, 'd' : 1 } }
    res = flatten_dict(d)
    assert res == { 'a' : 5, 'b_c' : 6, 'b_d' : 1}
    res = flatten_dict(d, 'prefix', '.')
    assert res == { 'prefix.a' : 5, 'prefix.b.c' : 6, 'prefix.b.d' : 1}