Exemplo n.º 1
0
def sample():
	blogs = commdatica.load('output/umtc.txt')
	
	has_emo = []
	no_emo = []

	target = 1000
	i = 0
	pbar = progbar.start(target)

	for blog in blogs:
		if blogger.is_valid(blog.text):
			if not len(has_emo) >= 500:
				has_emo.append(blog)
				i += 1
	
		elif blogger.is_valid(blog.text, check_emo = False):
			if not len(no_emo) >= 500:
				no_emo.append(blog)
				i += 1

		pbar.update(i)

	pbar.finish()

	print 'writing to umtc_yes_emo.txt ....',
	open('output/umtc_yes_emo.txt', 'w').write('\n'.join([repr(blog) for blog in has_emo]))
	print 'OK'

	print 'writing to umtc_no_emo.txt ....',
	open('output/umtc_no_emo.txt', 'w').write('\n'.join([repr(blog) for blog in no_emo]))
	print 'OK'	

	bs = commdatica.load('output/umtc_yes_emo.txt')
	print len(bs)
Exemplo n.º 2
0
def sample():
    blogs = commdatica.load("output/umtc.txt")

    has_emo = []
    no_emo = []

    target = 1000
    i = 0
    pbar = progbar.start(target)

    for blog in blogs:
        if blogger.is_valid(blog.text):
            if not len(has_emo) >= 500:
                has_emo.append(blog)
                i += 1

        elif blogger.is_valid(blog.text, check_emo=False):
            if not len(no_emo) >= 500:
                no_emo.append(blog)
                i += 1

        pbar.update(i)

    pbar.finish()

    print "writing to umtc_yes_emo.txt ....",
    open("output/umtc_yes_emo.txt", "w").write("\n".join([repr(blog) for blog in has_emo]))
    print "OK"

    print "writing to umtc_no_emo.txt ....",
    open("output/umtc_no_emo.txt", "w").write("\n".join([repr(blog) for blog in no_emo]))
    print "OK"

    bs = commdatica.load("output/umtc_yes_emo.txt")
    print len(bs)
Exemplo n.º 3
0
def main():
	# get parameters from terminal
	optparser = OptionParser()
	optparser.add_option('-i', '--input', action = 'store', type = 'string', dest = 'infile')
	optparser.add_option('-o', '--output', action = 'store', type = 'string', dest = 'outfile')
	optparser.add_option('-a', '--account', action = 'store', type = 'string', dest = 'acc_range')
	optparser.add_option('-n', '--instance', action = 'store', type = 'int', dest = 'n_instance', default = 5)
	optparser.add_option('-r', '--restart', action = 'store_true', dest = 'restart', default = False)
	optparser.add_option('-t', '--interval', action = 'store', type = 'int', dest = 'interval', default = 3)

	opts, args = optparser.parse_args()

	if not opts.infile:
		print '-i infile not specified'
		return 

	if not opts.outfile:
		print '-o outfile not specified'
		return

	if not opts.acc_range:
		print '-a (start_idx,end_idx) not specified'
		return
	else:
		m = re.match('(\d+),(\d+)', opts.acc_range)
		if not m:
			print '-a start_idx,end_idx should contain no space'
			return
		else:
			opts.acc_range = (int(m.group(1)), int(m.group(2)))

	ftype = 'w' if opts.restart else 'a'

	# prepare the accounts
	all_accounts = weiboparser.load_accounts()
	accounts = all_accounts[opts.acc_range[0]:opts.acc_range[1] + 1]

	# prepare the 
	all_bloginfo = commdatica.load(opts.infile)

	# filter the blogs whose comments have been downloaded

	if opts.restart:
		mids = set()
	else:
		mids = set(downloaded_mids(opts.outfile))
		logger.info('%d downloaded in %s'%(len(mids), opts.outfile))

	bloginfos = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids]

	# for test
	# bloginfos = bloginfos[:20]

	launch(opts.outfile, accounts, bloginfos, ftype, opts.n_instance, opts.interval)
Exemplo n.º 4
0
def test():
	all_accounts = weiboparser.load_accounts()
	accounts = all_accounts[:25]

	all_bloginfo = commdatica.load()
	
	# do not download comments for the same blog again
	mids = set(downloaded_mids())
	filtered_bloginfo = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids]

	bloginfo = filtered_bloginfo[:8]

	launch(JSONS_COMMENT, accounts, bloginfo, 4)
Exemplo n.º 5
0
def main():
	blogs = commdatica.load('output/umtc.txt')
	print '%d in total'%(len(blogs))

	pbar = progbar.start(len(blogs))
	c = 0
	for i, blog in enumerate(blogs):
		if blogger.is_valid(blog.text, check_emo = False):
			c += 1

		pbar.update(i + 1)
	pbar.finish()

	print '%.2f%%'%(100. * c / len(blogs))