Пример #1
0
def count(path,out):
    with open(path) as fo,open(out,'w') as fw:
        posD = file2dic(os.path.join(pwd,'data','pos.txt'))
        negD = file2dic(os.path.join(pwd,'data','neg.txt'))
        distribution = {}
        for line in fo:
            line = line.strip()
            feature = line.split('   ')[0]
            opinion = line.split('   ')[1]
            if opinion:
                if posD.get(opinion):
                    if distribution.get(feature) is None:
                        distribution[feature] = {"pos":1,"neg":0}
                    else:
                        distribution[feature]["pos"]+=1
                elif negD.get(opinion):
                    if distribution.get(feature) is None:
                        distribution[feature] = {"pos":0,"neg":1}
                    else:
                        distribution[feature]["neg"]+=1


        for i in distribution.keys():
            posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos"))
            negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg"))
            total = posCNT + negCNT
            posRATIO = "%.4f" %(float(posCNT) /total)
            negRATIO = "%.4f" %(float(negCNT) /total)
            fw.write(i+": "+"the positive ratio: "+posRATIO+" , the negative ratio: "+negRATIO+"\n")    
                                   
        fw.close()
Пример #2
0
def count(path, out):
    with open(path) as fo, open(out, 'w') as fw:
        posD = file2dic(os.path.join(pwd, 'data', 'pos.txt'))
        negD = file2dic(os.path.join(pwd, 'data', 'neg.txt'))
        distribution = {}
        for line in fo:
            line = line.strip()
            feature = line.split('   ')[0]
            opinion = line.split('   ')[1]
            if opinion:
                if posD.get(opinion):
                    if distribution.get(feature) is None:
                        distribution[feature] = {"pos": 1, "neg": 0}
                    else:
                        distribution[feature]["pos"] += 1
                elif negD.get(opinion):
                    if distribution.get(feature) is None:
                        distribution[feature] = {"pos": 0, "neg": 1}
                    else:
                        distribution[feature]["neg"] += 1

        for i in distribution.keys():
            posCNT = (0 if distribution[i].get("pos") is None else
                      distribution[i].get("pos"))
            negCNT = (0 if distribution[i].get("neg") is None else
                      distribution[i].get("neg"))
            total = posCNT + negCNT
            posRATIO = "%.4f" % (float(posCNT) / total)
            negRATIO = "%.4f" % (float(negCNT) / total)
            fw.write(i + ": " + "the positive ratio: " + posRATIO +
                     " , the negative ratio: " + negRATIO + "\n")

        fw.close()
Пример #3
0
def pos2basket(path, out):
    exclude = file2dic("./feature-exclude.txt")
    with open(path) as fo, open(out, 'w') as fw:
        nnLIST = []
        for line in fo:
            line = line.strip()
            #if line == "--#PU --#PU --#PU --#PU --#PU":
            if line == "---------#NR -#PU":
                if nnLIST:
                    fw.write(','.join(nnLIST) + '\n')
                    nnLIST = []
            else:
                line = line.decode('utf8')
                match = pat.findall(line)
                if match:
                    nouns = map(lambda x: x.encode('utf8'), match)
                    for noun in nouns:
                        noun = noun.replace("#NN", '').replace("#NR", '')
                        if exclude.get(noun):
                            continue
                        nnLIST.append(noun)
        fw.close()
Пример #4
0
def pos2basket(path,out):
    exclude = file2dic("./feature-exclude.txt")
    with open(path) as fo,open(out,'w') as fw:
        nnLIST= []
        for line in fo:
            line = line.strip()
            #if line == "--#PU --#PU --#PU --#PU --#PU":
            if line == "---------#NR -#PU":
                if nnLIST:
                    fw.write(','.join(nnLIST)+'\n')
                    nnLIST= []
            else:
                line = line.decode('utf8')
                match = pat.findall(line)
                if match:
                    nouns = map(lambda x:x.encode('utf8'), match)
                    for noun in nouns:
                        noun = noun.replace("#NN",'').replace("#NR",'')
                        if exclude.get(noun):
                            continue
                        nnLIST.append(noun)
        fw.close()
Пример #5
0
                        distribution[feature]["neg"]+=1


        for i in distribution.keys():
            posCNT = (0 if distribution[i].get("pos") is None else distribution[i].get("pos"))
            negCNT = (0 if distribution[i].get("neg") is None else distribution[i].get("neg"))
            total = posCNT + negCNT
            posRATIO = "%.4f" %(float(posCNT) /total)
            negRATIO = "%.4f" %(float(negCNT) /total)
            fw.write(i+": "+"the positive ratio: "+posRATIO+" , the negative ratio: "+negRATIO+"\n")    
                                   
        fw.close()

## the distribution is a global variable
distribution = {}
posD = file2dic(os.path.join(pwd,'data','pos.txt'))
negD = file2dic(os.path.join(pwd,'data','neg.txt'))
def countSTREAM(npop,distribution):
    line = npop.strip()
    feature = line.split('   ')[0]
    opinion = line.split('   ')[1]
    if opinion:
        if posD.get(opinion):
            if distribution.get(feature) is None:
                distribution[feature] = {"pos":1,"neg":0}
            else:
                distribution[feature]["pos"]+=1
        elif negD.get(opinion):
            if distribution.get(feature) is None:
                distribution[feature] = {"pos":0,"neg":1}
            else:
Пример #6
0
            posCNT = (0 if distribution[i].get("pos") is None else
                      distribution[i].get("pos"))
            negCNT = (0 if distribution[i].get("neg") is None else
                      distribution[i].get("neg"))
            total = posCNT + negCNT
            posRATIO = "%.4f" % (float(posCNT) / total)
            negRATIO = "%.4f" % (float(negCNT) / total)
            fw.write(i + ": " + "the positive ratio: " + posRATIO +
                     " , the negative ratio: " + negRATIO + "\n")

        fw.close()


## the distribution is a global variable
distribution = {}
posD = file2dic(os.path.join(pwd, 'data', 'pos.txt'))
negD = file2dic(os.path.join(pwd, 'data', 'neg.txt'))


def countSTREAM(npop, distribution):
    line = npop.strip()
    feature = line.split('   ')[0]
    opinion = line.split('   ')[1]
    if opinion:
        if posD.get(opinion):
            if distribution.get(feature) is None:
                distribution[feature] = {"pos": 1, "neg": 0}
            else:
                distribution[feature]["pos"] += 1
        elif negD.get(opinion):
            if distribution.get(feature) is None: