Exemplo n.º 1
0
	def singleprocess_datanum(self, test_dict):
		fu = file_util.FileUtil('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
		fu.get_structure()
		for data_num in test_dict.keys():
			print data_num
			content_list = fu.get_content_list()[0:data_num]
			content_list_2_grams = summary_plot.get_2_grams_list(content_list)
			start = time.time()
			jd_list = summary_plot.get_jd_list(content_list_2_grams)
			summary_plot.get_reviews_similarity_relation(jd_list)
			finish_time = time.time() - start
			# print 'finish get relation with %s s' % finish_time
			test_dict[data_num] = finish_time
		# test_dict = collections.OrderedDict(sorted(test_dict.items()))
		return test_dict
Exemplo n.º 2
0
 def singleprocess_datanum(self, test_dict):
     fu = file_util.FileUtil('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
     fu.get_structure()
     for data_num in test_dict.keys():
         print data_num
         content_list = fu.get_content_list()[0:data_num]
         content_list_2_grams = summary_plot.get_2_grams_list(content_list)
         start = time.time()
         jd_list = summary_plot.get_jd_list(content_list_2_grams)
         summary_plot.get_reviews_similarity_relation(jd_list)
         finish_time = time.time() - start
         # print 'finish get relation with %s s' % finish_time
         test_dict[data_num] = finish_time
     # test_dict = collections.OrderedDict(sorted(test_dict.items()))
     return test_dict
Exemplo n.º 3
0
def draw_review_distance_multiprocess(list_num=-1, put_num=10000):
    q = Queue()
    l = Lock()
    fu = file_util.FileUtil()
    fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew.mP')
    # fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew103.mP')
    fu.get_structure()
    content_list = fu.get_content_list()[0:list_num]
    content_list_2_grams = summary_plot.get_2_grams_list(content_list)

    start = time.time()
    process_list = []
    cpu_num = cpu_count() / 2
    for i in range(0, cpu_num):
        p = Process(target=write_review_distance_to_file,
                    args=(q, l, i),
                    kwargs={'dirname': 'jaccard_distance_220000'})
        p.start()
        process_list.append(p)
    reviews_len = len(content_list_2_grams)
    print reviews_len
    count = 0
    grams_pair_list = []
    for i in range(0, reviews_len):
        for j in range(i + 1, reviews_len):
            if count != 0:
                if count % put_num == 0:
                    q.put(grams_pair_list)
                    grams_pair_list = []
                    count = 0
                if not q.empty() and count > put_num:
                    print 'queue waiting', count
                    time.sleep(0.1)
            grams_pair = [content_list_2_grams[i], content_list_2_grams[j]]
            grams_pair_list.append(grams_pair)
            count += 1
    q.put(grams_pair_list)
    for i in range(0, cpu_num):
        q.put('STOP')

    for p in process_list:
        p.join()
    finish_time = time.time() - start
    print 'exit main with %s s' % finish_time
    return finish_time
Exemplo n.º 4
0
def draw_review_distance_multiprocess(list_num=-1, put_num=10000):
    q = Queue()
    l = Lock()
    fu = file_util.FileUtil()
    fu.open_file("../AmazonDataBackup/reviewsNew/reviewsNew.mP")
    # fu.open_file('../AmazonDataBackup/reviewsNew/reviewsNew103.mP')
    fu.get_structure()
    content_list = fu.get_content_list()[0:list_num]
    content_list_2_grams = summary_plot.get_2_grams_list(content_list)

    start = time.time()
    process_list = []
    cpu_num = cpu_count() / 2
    for i in range(0, cpu_num):
        p = Process(target=write_review_distance_to_file, args=(q, l, i), kwargs={"dirname": "jaccard_distance_220000"})
        p.start()
        process_list.append(p)
    reviews_len = len(content_list_2_grams)
    print reviews_len
    count = 0
    grams_pair_list = []
    for i in range(0, reviews_len):
        for j in range(i + 1, reviews_len):
            if count != 0:
                if count % put_num == 0:
                    q.put(grams_pair_list)
                    grams_pair_list = []
                    count = 0
                if not q.empty() and count > put_num:
                    print "queue waiting", count
                    time.sleep(0.1)
            grams_pair = [content_list_2_grams[i], content_list_2_grams[j]]
            grams_pair_list.append(grams_pair)
            count += 1
    q.put(grams_pair_list)
    for i in range(0, cpu_num):
        q.put("STOP")

    for p in process_list:
        p.join()
    finish_time = time.time() - start
    print "exit main with %s s" % finish_time
    return finish_time