def ngramizer(proctxt, hr): txt = [] for s, sentence in enumerate(proctxt['chunksInClauses']): sent = [] for c, clause in enumerate(sentence): nc = len(clause) inClause = [0] * nc claus = [] chPat, pols, negn, negtd = clausePolarity(clause, hr) for h, chunk in enumerate(clause): toks = chunk.tokens tags = chunk.tags pols = chunk.pols ntoks = len(chunk.tokens) if chunk.chunkType == 'NP': if ntoks == 1: if tags[0] in ('O', 'X'): continue # tprops = chunk.tprops # tc = Counter(tags) claus.append((chunk, npContext(chunk))) sent.append(claus) txt.append(sent) return (txt)
def ngramizer(proctxt, hr): txt = [] for s, sentence in enumerate(proctxt['chunksInClauses']): sent = [] for c, clause in enumerate(sentence): nc = len(clause) inClause = [0]*nc claus = [] chPat, pols, negn, negtd = clausePolarity(clause, hr) for h, chunk in enumerate(clause): toks = chunk.tokens tags = chunk.tags pols = chunk.pols ntoks = len(chunk.tokens) if chunk.chunkType == 'NP': if ntoks == 1: if tags[0] in ('O','X'): continue # tprops = chunk.tprops # tc = Counter(tags) claus.append((chunk, npContext(chunk))) sent.append(claus) txt.append(sent) return(txt)
def negatedDomainNoun(procTxt, hr): """ """ # logger = sys.stdout.write domainNouns = hr.resources[RESKEY_DOMAIN_NOUNS] ndn = [] for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]): for c, clause in enumerate(sentence): chPat, pols, negn, negtd = clausePolarity(clause, hr) for h, chunk in enumerate(clause): if chPat[h] == 'NP': hasDomainNoun = False for t, tok in enumerate(chunk.tokens): if tok in domainNouns: hasDomainNoun = True break if hasDomainNoun: if negn[h]: ndn.append(chunk) elif pols[h] < 0 and not negtd[h]: ndn.append(chunk) elif negtd[h]: ndn.append(chunk) # logger('\n') return ndn
def problemPhraseAnalysis(procTxt, hr): """ """ procTxt = updateTokenAndChunkPropertiesPD(procTxt, hr) problems = [] for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]): sentence_problem = [] for c, clause in enumerate(sentence): clause_problem = defaultdict(list) n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause) chPat, pols, negn, negtd = clausePolarity(clause, hr) pols = [cmp(pol, 0) for pol in pols] clpol = [[p, int(n), int(t)] for p, n, t in zip(pols, negn, negtd)] if n_vp == 0: clause_problem = ppd_degenerateClause(clause, clpol, vpidx, hr) clause_problems = [clause_problem] elif n_vpfinite < 2: clause_problem = ppd_SVClause(clause, clpol, vpidx, hr) clause_problems = [clause_problem] else: clause_problems = ppd_MVClause(clause, clpol, vpidx, hr) sentence_problem.append(clause_problems) problems.append(sentence_problem) return problems
def problemPhraseAnalysis(procTxt, hr): """ """ procTxt = updateTokenAndChunkPropertiesPD(procTxt, hr) problems = [] for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]): sentence_problem = [] for c, clause in enumerate(sentence): clause_problem = defaultdict(list) n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause) chPat, pols, negn, negtd = clausePolarity(clause, hr) pols = [cmp(pol,0) for pol in pols] clpol = [[p, int(n), int(t)] for p,n,t in zip(pols, negn, negtd)] if n_vp == 0: clause_problem = ppd_degenerateClause(clause, clpol, vpidx, hr) clause_problems = [clause_problem] elif n_vpfinite < 2: clause_problem = ppd_SVClause(clause, clpol, vpidx, hr) clause_problems = [clause_problem] else: clause_problems = ppd_MVClause(clause, clpol, vpidx, hr) sentence_problem.append(clause_problems) problems.append(sentence_problem) return problems
def printErrorLogs(truLabels, mcPrd, procTxts, computedFeatures, truLbl=None, prdLbl=None, printer = sys.stdout.write): hr = pickle.load(open(DEFAULT_HR_FILE)) if not printer: printer = sys.stdout.write prdLabels = [mcp[MCKEY_LABEL] for mcp in mcPrd] prdScores = [mcp[MCKEY_SCORES] for mcp in mcPrd] if truLbl and (not prdLbl): errIdx = [k for k, tru in enumerate(truLabels) if tru == truLbl and prdLabels[k] != truLbl] elif (not truLbl) and (prdLbl): errIdx = [k for k, tru in enumerate(truLabels) if prdLabels[k] == prdLbl and tru != prdLbl] else: errIdx = [k for k, prd in enumerate(prdLabels) if truLabels[k] == truLbl and prd != truLbl] errLog = [(k, truLabels[k], prdLabels[k]) for k in errIdx] errLog.sort(key = operator.itemgetter(1, 2)) for item in errLog: k = item[0] printer('ID:%d\tTru:%s\tPrd:%s\n' % (item[0], item[1], item[2])) for key, val in prdScores[k].iteritems(): printer('%s:%6.5f ' % (key, val)) printer('\n') procTxt = procTxts[k] eb = computedFeatures[k] for tok, tag in zip(procTxt[PTKEY_TOKENS], procTxt[PTKEY_TAGS]): printer('%s/%s ' % (tok, tag)) printer('\n') isq = questionsInProcTxt(procTxt, hr) # print isq chunkedSentences = procTxt[PTKEY_CHUNKEDCLAUSES] for s, chunkedSentence in enumerate(chunkedSentences): for c, clause in enumerate(chunkedSentence): chPat, pols, negn, negtd = clausePolarity(clause, hr, printer) printer('CLAUSE: %s\n' % clause) printer('POLS: %s\n' % pols) printer('NEGN: %s\n' % negn) printer('NEGTD: %s\n' % negtd) printer('isQ:%s\n' % isq[s]) printer('-\n') for featureFuncName, features in eb.iteritems(): for feature, val in features.iteritems(): if val: printer('%s %s\n' % (feature, val)) printer('\n')
def domainNounInSV(procTxt, hr): """ """ rlhs = [] rrhs = [] rvp = [] for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]): for c, clause in enumerate(sentence): chPat, pols, negn, negtd = clausePolarity(clause, hr) n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause) if n_vp == 1 or n_vpfinite == 1: cpnn = [(ch, pols[k], negn[k], negtd[k]) for k, ch in enumerate(clause)] lhs = [q for q in cpnn[:vpidx]] rhs = [q for q in cpnn[vpidx + 1:]] lhspols = [item[1] for item in lhs] cc = Counter(lhspols) if cc[1] and cc[-1]: print clause print lhs print vp print rhs print '----' # # lhschunks = [q[0] for q in lhs] # rhschunks = [q[0] for q in rhs] # # lhsDNidx = hasDomainNoun(lhschunks, hr) # rhsDNidx = hasDomainNoun(rhschunks, hr) # # rlhs = [lhs[k] for k in lhsDNidx] # rrhs = [rhs[k] for k in rhsDNidx] # rvp = [(vp, pols[vpidx], negn[vpidx], negtd[vpidx])] # if len(rlhs) > 1: # print rlhs, rvp, rrhs # # ## if pols[vpidx] < 0: ## print [lhs[k] for k in lhsDN], vp, [rhs[k] for k in rhsDN] # # print rlhs, rvp, rrhs # print clause # print '-----' return (rlhs, rrhs, rvp)
def domainNounInSV(procTxt, hr): """ """ rlhs = [] rrhs = [] rvp = [] for s, sentence in enumerate(procTxt[PTKEY_CHUNKEDCLAUSES]): for c, clause in enumerate(sentence): chPat, pols, negn, negtd = clausePolarity(clause, hr) n_vp, n_vpfinite, vpidx, lhs, vp, rhs = clauseVPAnalysis(clause) if n_vp == 1 or n_vpfinite == 1: cpnn = [(ch, pols[k], negn[k], negtd[k]) for k, ch in enumerate(clause)] lhs = [q for q in cpnn[:vpidx]] rhs = [q for q in cpnn[vpidx+1:]] lhspols = [item[1] for item in lhs] cc = Counter(lhspols) if cc[1] and cc[-1]: print clause print lhs print vp print rhs print '----' # # lhschunks = [q[0] for q in lhs] # rhschunks = [q[0] for q in rhs] # # lhsDNidx = hasDomainNoun(lhschunks, hr) # rhsDNidx = hasDomainNoun(rhschunks, hr) # # rlhs = [lhs[k] for k in lhsDNidx] # rrhs = [rhs[k] for k in rhsDNidx] # rvp = [(vp, pols[vpidx], negn[vpidx], negtd[vpidx])] # if len(rlhs) > 1: # print rlhs, rvp, rrhs # # ## if pols[vpidx] < 0: ## print [lhs[k] for k in lhsDN], vp, [rhs[k] for k in rhsDN] # # print rlhs, rvp, rrhs # print clause # print '-----' return (rlhs, rrhs, rvp)
def countPolarNGrams(procTxt, hr, featureVals={}, FKEY='countPolarNGrams'): if haskey(featureVals, FKEY): return featureVals try: procTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops except: procTxt = updateTokenLexicalProperties(procTxt, hr) count = { KEY_POLARITY_POSITIVE: {}, KEY_POLARITY_NEGATIVE: {}, KEY_POLARITY_NEUTRAL: {} } for k in count: for n in hr.resources[RESKEY_POLAR_NGRAMS].availableNgrams: count[k][n] = 0 negation = hr.resources[RESKEY_NEGATORS].getDicts(1, KEY_NEGATION) for sentence in procTxt[PTKEY_CHUNKEDCLAUSES]: for clause in sentence: for chunk in clause: chPat, pols, negn, negtd = clausePolarity(clause, hr) for k, tok in enumerate(chunk.tokens): pol = pols[k] if negtd[k]: pol = pol * -1 pkey = __NumToPol__[pol] n = len(tok.split('_NG_')) count[pkey][n] += 1 # for ng in aposng: # if isngToken(ng): # count[ng.polarity][ng.n] += 1 featureVals[FKEY] = count return featureVals
def entity_sentiment(ProcTxt,hr, sentiment_flag=1): # try: # ProcTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops # except: # ProcTxt = updateTokenLexicalProperties(ProcTxt, hr) retvaldict = defaultdict(list) Keyword=[] total_toks=[] fo=0 for sen in ProcTxt[PTKEY_CHUNKEDCLAUSES]: for bn,clause in enumerate(sen): cn=[] fo=1 chPat, pols, negn, negtd = clausePolarity(clause, hr, None) pols_neg=[] for ind,act in enumerate(negtd): if(act==1 and negn[ind]==0): pols_neg.append(pols[ind]*-1) else: pols_neg.append(pols[ind]) pols=pols_neg mn=[] n_vp, n_vpfinite, vpidx, lhs, vp, rhs=clauseVPAnalysis(clause) if ((n_vpfinite == 0 and n_vp == 1) or (n_vpfinite == 1)): pols=single_verb(clause,pols,vpidx) ind_np=[] for c, chunk in enumerate(clause): #logger(' %s' % (chunk)) tt=[] if(chunk.chunkType=="NP"): tt=extract_words(chunk,c,chPat,hr) if(len(tt)!=0): ind_np.append(c) ft=" ".join(tt) mn.extend([ft]) pols=current_chunkpolarity(c,clause,pols,chPat) if(len(ind_np)!=0): for key_ind,ind in enumerate(mn): ff=pols[ind_np[key_ind]] gn=ind+pols_dict[ff] cn.extend([gn]) retvaldict[ind].append(pols_dict[ff]) Keyword.extend(cn) retval = [] if(sentiment_flag==0): return(retvaldict.keys()) else: #return(retvaldict) for k, v in retvaldict.iteritems(): od = OrderedDict() od['entity'] = k od['sentiment'] = v retval.append(od) #{'aspect':k,'sentiment':v}) return(retval)
def entity_sentiment(ProcTxt,hr, sentiment_flag=1): # try: # ProcTxt[PTKEY_CHUNKEDCLAUSES][0][0][0].tprops # except: # ProcTxt = updateTokenLexicalProperties(ProcTxt, hr) retvaldict = defaultdict(list) #Keyword=[] #total_toks=[] fo=0 for sen in ProcTxt[PTKEY_CHUNKEDCLAUSES]: for bn,clause in enumerate(sen): #cn=[] #fo=1 chPat, pols, negn, negtd = clausePolarity(clause, hr, None) pols_neg=[] for ind,act in enumerate(negtd): if(act==1 and negn[ind]==0): pols_neg.append(pols[ind]*-1) else: pols_neg.append(pols[ind]) pols=pols_neg mn=[] n_vp, n_vpfinite, vpidx, lhs, vp, rhs=clauseVPAnalysis(clause) if ((n_vpfinite == 0 and n_vp == 1) or (n_vpfinite == 1)): pols=single_verb(clause,pols,vpidx) ind_np=[] for c, chunk in enumerate(clause): #logger(' %s' % (chunk)) tt=[] if(chunk.chunkType=="NP"): tt=extract_words(chunk,c,chPat,hr) if(len(tt)!=0): ind_np.append(c) ft=" ".join(tt) mn.extend([ft]) pols=current_chunkpolarity(c,clause,pols,chPat) #print 'mn', mn if(len(ind_np)!=0): for key_ind,ind in enumerate(mn): #print 'ind', ind ff=pols[ind_np[key_ind]] phrase = clause[ind_np[key_ind]] phrase = phrase.toktagstr() #gn=ind+pols_dict[ff] #cn.extend([gn]) key = '|'.join([ind, phrase]) #retvaldict[ind].append(pols_dict[ff]) #((pols_dict[ff], phrase)) retvaldict[key].append(pols_dict[ff]) #Keyword.extend(cn) retval = [] if(sentiment_flag==0): return(retvaldict.keys()) else: #return(retvaldict) for k, v in retvaldict.iteritems(): od = OrderedDict() k = k.split('|') od['entity'] = k[0] od['phrase'] = k[1] od['sentiment'] = v[0] retval.append(od) #{'aspect':k,'sentiment':v}) return(retval)