def __init__(self): parameters = Parameters() outputFolder = parameters.paths['FACT_FEEL_db'] print('{}'.format('creating fact feel\n')) self.afinn = Afinn() data_path = os.path.expanduser(FactFeel.corpora_path) self.path = str(data_path + FactFeel.nlds_path) self.facts = [] self.feels = [] self.doclist = {FactFeel.FACT: [], FactFeel.FEEL: []} self.doc_list_path = str(self.path + 'doclist.json') if self.doc_list_exists(): self.doclist = self.parse_doc_list() else: self.generate_doc_list() list_of_documents = [] for doc in self.doclist: pat = re.compile( '(train|dev|test).(fact|feel).(fact|feel)(_\d+)(.txt)') for other in self.doclist[doc]: l = re.findall(pat, other) capturegroup = l.pop() parent = str(capturegroup[0:1][0]) + '/' + str( capturegroup[1:2][0]) docleaf = str(capturegroup[2:3][0]) + str(capturegroup[3:4][0]) extension = capturegroup[4:5] new_doc = Document(parent, docleaf, outputFolder) list_of_documents.append(new_doc) ## this line prepares a doclist to be used for our flask server that feeds the annotation viewer self.trasformed_doclist = self.transform_doclist() remakeDB = False if remakeDB: self.docs = self.create_db(self.path, list_of_documents) else: temp_docs = [] for doc in list_of_documents: temp = self.read_db(doc) temp_docs.append(temp) self.docs = temp_docs self.facts = [ fact for fact in self.docs if 'fact' in fact.get_doc_leaf() ] self.feels = [ feel for feel in self.docs if 'feel' in feel.get_doc_leaf() ]
from sklearn.base import TransformerMixin, BaseEstimator from utils.Parameters import Parameters import numpy as np from pymongo import MongoClient parameters = Parameters() from afinn import Afinn afinn = Afinn() """ configs: 0- No feature 1- sentence polarity TODO:(needs parameterization ) 2- num_polarity_words / num_neutral_words 3- mean_pos mean_neg 4- median_pos median_neg """ class AfinnTransformer(TransformerMixin, BaseEstimator): features = {'value': 0 } def __init__(self, featureSetConfiguration = 1 ): self.featureSetConfiguration = featureSetConfiguration def transform(self, X, **transform_params): if(self.featureSetConfiguration == 0): # not active features = np.array([ AfinnTransformer.features.keys() for s in X ])