示例#1
0
	def save_labels(self):
		review_txt = ""
		content_list = self.fu.get_content_list()
		print 'get content list'
		grams_list = []
		for content in content_list:
			grams_list.append(get_2_grams(content))
		print 'get grams list'
		label_list = []
		content_len = len(content_list)
		for x in xrange(0,content_len):
			label_list.append(0)
		print 'start labeling'
		for i in xrange(0,content_len):
			grams_a = grams_list[i]
			for j in xrange(i+1,content_len):
				grams_b = grams_list[j]
				sim = jaccard_distance(grams_a, grams_b)
				if sim >= 0.9:
					print "sim is : " , sim
					label_list[i] = 1
					label_list[j] = 1
		with open(self.old_file + '36') as fp:
			lines = fp.readlines()
			for index, line in enumerate(lines):
				product_id = product_list[index]
				review_txt += lines[index].replace('\n', '') + '\t' + str(label_list[index]) +'\n'
				
		with open(self.new_file + '37', 'w') as fp:
			fp.write(review_txt)
示例#2
0
def convert_list_to_grams(threadName, q):
    """
    这个线程主要是负责把content_list的文本弄成2-grams的形式
    然后再添加到content_2_grams_list中
    """
    global content_2_grams_list
    while not exitFlag:
        if not workQueue.empty():
            # data = q.get()
            content_list = q.get()
            content_2_grams_list = content_2_grams_list + [summary_plot.get_2_grams(content) for content in content_list]
            print "%s processing %s" % (threadName, len(content_2_grams_list))
            queueLock.release()
        else:
            queueLock.release()
	def run(self):
		print 'start GramConverter'
		global content_list
		global grams_list
		global file_end_flag
		global gram_end_flag
		while not (file_end_flag and len(content_list) == 0):
			file_lock.acquire()
			if content_list:
				content = content_list.pop()
				grams = summary_plot.get_2_grams(content)
				gram_lock.acquire()
				grams_list.append(grams)
				print '\t\t append one grams into grams_list'
				gram_lock.release()
			file_lock.release()
			time.sleep(1)
		gram_end_flag = True
		print 'end GramConverter'
 def run(self):
     print 'start GramConverter'
     global content_list
     global grams_list
     global file_end_flag
     global gram_end_flag
     while not (file_end_flag and len(content_list) == 0):
         file_lock.acquire()
         if content_list:
             content = content_list.pop()
             grams = summary_plot.get_2_grams(content)
             gram_lock.acquire()
             grams_list.append(grams)
             print '\t\t append one grams into grams_list'
             gram_lock.release()
         file_lock.release()
         time.sleep(1)
     gram_end_flag = True
     print 'end GramConverter'