def extractRevsLabel(filetype, pattern): review = open('../data/chres_review.json') revsKW = open('../data/revsKW'+filetype+'.json','w+') revs = [] lab = [] i = 1 if pattern=='AllWords': for index,line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] ws = tp.removeStopPunc(text) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 else: for index,line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText,pattern) adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 np.save('../data/label'+filetype+'.npy', np.array(lab)) json.dump(revs, revsKW) review.close() revsKW.close() return 1
def extractRevsLabel(filetype, pattern): review = open('../data/chres_review.json') revsKW = open('../data/revsKW' + filetype + '.json', 'w+') revs = [] lab = [] i = 1 if pattern == 'AllWords': for index, line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] ws = tp.removeStopPunc(text) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 else: for index, line in enumerate(review): jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText, pattern) adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) lab.append(jstar) revs.append(ws) print i # if i==50: break i += 1 np.save('../data/label' + filetype + '.npy', np.array(lab)) json.dump(revs, revsKW) review.close() revsKW.close() return 1
voca = [] revs = [] lab = [] dat = [] i=1 for line in review: jre = json.loads(line) jstar = jre['stars'] text = jre['text'] tagText = tp.posTag(text) adj = tp.posExtract(tagText,'NN.*') adjs = ' '.join(adj) ws = tp.removeStopPunc(adjs) if ws!=[]: lab.append(jstar) revs.append(ws) voca.extend(ws) print i # if i==5: break i += 1 print lab for i in lab: label.write(str(i)+"\n") print "successfully create label!"
vocabulary = open('../data/vocabulary.txt','w+') label = open('../data/label.txt','w+') data = open('../data/data.txt','w+') voca = [] revs = [] lab = [] dat = [] i=1 for line in review: jre = json.loads(line) jstar = jre['stars'] text = jre['text'] lab.append(jstar) ws = tp.removeStopPunc(text) revs.append(ws) voca += ws print i # if i==5: break i += 1 for i in lab: label.write(str(i)+"\n") print "successfully create label!" voca = list(set(voca)) print len(voca) for i in voca: vocabulary.write(i.encode('utf8')+"\n")
vocabulary = open('../data/vocabulary.txt', 'w+') label = open('../data/label.txt', 'w+') data = open('../data/data.txt', 'w+') voca = [] revs = [] lab = [] dat = [] i = 1 for line in review: jre = json.loads(line) jstar = jre['stars'] text = jre['text'] lab.append(jstar) ws = tp.removeStopPunc(text) revs.append(ws) voca += ws print i # if i==5: break i += 1 for i in lab: label.write(str(i) + "\n") print "successfully create label!" voca = list(set(voca)) print len(voca) for i in voca: vocabulary.write(i.encode('utf8') + "\n") print "successfully create vocabulary!"