def extractRevsLabel(filetype, pattern):

	review = open('../data/chres_review.json')
	revsKW = open('../data/revsKW'+filetype+'.json','w+')

	revs = []
	lab = []

	i = 1
	if pattern=='AllWords':
		for index,line in enumerate(review):
		    jre = json.loads(line)
		    jstar = jre['stars']  
		    text = jre['text']  
		    ws = tp.removeStopPunc(text)
		    lab.append(jstar)
		    revs.append(ws)
		    print i
		    # if i==50: break
		    i += 1
	else:			
		for index,line in enumerate(review):
		    jre = json.loads(line)
		    jstar = jre['stars']  
		    text = jre['text']  

		    tagText = tp.posTag(text)
		    adj = tp.posExtract(tagText,pattern)
		    adjs = ' '.join(adj)
		    ws = tp.removeStopPunc(adjs)

		    lab.append(jstar)
		    revs.append(ws)
		    print i
		    # if i==50: break
		    i += 1

	np.save('../data/label'+filetype+'.npy', np.array(lab))
	json.dump(revs, revsKW)

	review.close()
	revsKW.close()
	return 1
示例#2
0
def extractRevsLabel(filetype, pattern):

    review = open('../data/chres_review.json')
    revsKW = open('../data/revsKW' + filetype + '.json', 'w+')

    revs = []
    lab = []

    i = 1
    if pattern == 'AllWords':
        for index, line in enumerate(review):
            jre = json.loads(line)
            jstar = jre['stars']
            text = jre['text']
            ws = tp.removeStopPunc(text)
            lab.append(jstar)
            revs.append(ws)
            print i
            # if i==50: break
            i += 1
    else:
        for index, line in enumerate(review):
            jre = json.loads(line)
            jstar = jre['stars']
            text = jre['text']

            tagText = tp.posTag(text)
            adj = tp.posExtract(tagText, pattern)
            adjs = ' '.join(adj)
            ws = tp.removeStopPunc(adjs)

            lab.append(jstar)
            revs.append(ws)
            print i
            # if i==50: break
            i += 1

    np.save('../data/label' + filetype + '.npy', np.array(lab))
    json.dump(revs, revsKW)

    review.close()
    revsKW.close()
    return 1
示例#3
0
voca = []
revs = []
lab = []
dat = []

i=1
for line in review:
    jre = json.loads(line)
    jstar = jre['stars']  
    text = jre['text']  

    tagText = tp.posTag(text)
    adj = tp.posExtract(tagText,'NN.*')
    adjs = ' '.join(adj)
    ws = tp.removeStopPunc(adjs)

    if ws!=[]:
	    lab.append(jstar)
	    revs.append(ws)
	    voca.extend(ws)
	    print i
	    # if i==5: break
	    i += 1


print lab
for i in lab:
	label.write(str(i)+"\n")
print "successfully create label!"
vocabulary = open('../data/vocabulary.txt','w+')
label = open('../data/label.txt','w+')
data = open('../data/data.txt','w+')

voca = []
revs = []
lab = []
dat = []

i=1
for line in review:
    jre = json.loads(line)
    jstar = jre['stars']  
    text = jre['text']  
    lab.append(jstar)
    ws = tp.removeStopPunc(text)
    revs.append(ws)
    voca += ws
    print i
    # if i==5: break
    i += 1

for i in lab:
	label.write(str(i)+"\n")
print "successfully create label!"


voca = list(set(voca))
print len(voca)
for i in voca:
	vocabulary.write(i.encode('utf8')+"\n")
vocabulary = open('../data/vocabulary.txt', 'w+')
label = open('../data/label.txt', 'w+')
data = open('../data/data.txt', 'w+')

voca = []
revs = []
lab = []
dat = []

i = 1
for line in review:
    jre = json.loads(line)
    jstar = jre['stars']
    text = jre['text']
    lab.append(jstar)
    ws = tp.removeStopPunc(text)
    revs.append(ws)
    voca += ws
    print i
    # if i==5: break
    i += 1

for i in lab:
    label.write(str(i) + "\n")
print "successfully create label!"

voca = list(set(voca))
print len(voca)
for i in voca:
    vocabulary.write(i.encode('utf8') + "\n")
print "successfully create vocabulary!"