Exemplo n.º 1
0
    def __init__(self):
        parameters = Parameters()
        outputFolder = parameters.paths['FACT_FEEL_db']
        print('{}'.format('creating fact feel\n'))
        self.afinn = Afinn()
        data_path = os.path.expanduser(FactFeel.corpora_path)
        self.path = str(data_path + FactFeel.nlds_path)
        self.facts = []
        self.feels = []
        self.doclist = {FactFeel.FACT: [], FactFeel.FEEL: []}
        self.doc_list_path = str(self.path + 'doclist.json')
        if self.doc_list_exists():
            self.doclist = self.parse_doc_list()
        else:
            self.generate_doc_list()
        list_of_documents = []

        for doc in self.doclist:
            pat = re.compile(
                '(train|dev|test).(fact|feel).(fact|feel)(_\d+)(.txt)')
            for other in self.doclist[doc]:
                l = re.findall(pat, other)
                capturegroup = l.pop()
                parent = str(capturegroup[0:1][0]) + '/' + str(
                    capturegroup[1:2][0])
                docleaf = str(capturegroup[2:3][0]) + str(capturegroup[3:4][0])
                extension = capturegroup[4:5]
                new_doc = Document(parent, docleaf, outputFolder)
                list_of_documents.append(new_doc)

        ## this line prepares a doclist to be used for our flask server that feeds the annotation viewer
        self.trasformed_doclist = self.transform_doclist()

        remakeDB = False
        if remakeDB:
            self.docs = self.create_db(self.path, list_of_documents)
        else:
            temp_docs = []
            for doc in list_of_documents:
                temp = self.read_db(doc)
                temp_docs.append(temp)
            self.docs = temp_docs
        self.facts = [
            fact for fact in self.docs if 'fact' in fact.get_doc_leaf()
        ]
        self.feels = [
            feel for feel in self.docs if 'feel' in feel.get_doc_leaf()
        ]
Exemplo n.º 2
0
from sklearn.base import TransformerMixin, BaseEstimator
from utils.Parameters import Parameters
import numpy as np
from pymongo import MongoClient
parameters = Parameters()
from afinn import Afinn

afinn = Afinn()

"""
configs:
0- No feature
1- sentence polarity TODO:(needs parameterization )
2- num_polarity_words / num_neutral_words
3- mean_pos mean_neg
4- median_pos median_neg
"""


class AfinnTransformer(TransformerMixin, BaseEstimator):
    features = {'value': 0 }
    def __init__(self, featureSetConfiguration = 1 ):
        self.featureSetConfiguration = featureSetConfiguration

    def transform(self, X, **transform_params):

        if(self.featureSetConfiguration == 0): # not active
            features = np.array([
                AfinnTransformer.features.keys()
                for s in X
            ])