def main(): # get parameters from terminal optparser = OptionParser() optparser.add_option('-i', '--input', action = 'store', type = 'string', dest = 'infile') optparser.add_option('-o', '--output', action = 'store', type = 'string', dest = 'outfile') optparser.add_option('-a', '--account', action = 'store', type = 'string', dest = 'acc_range') optparser.add_option('-n', '--instance', action = 'store', type = 'int', dest = 'n_instance', default = 5) optparser.add_option('-r', '--restart', action = 'store_true', dest = 'restart', default = False) optparser.add_option('-t', '--interval', action = 'store', type = 'int', dest = 'interval', default = 3) opts, args = optparser.parse_args() if not opts.infile: print '-i infile not specified' return if not opts.outfile: print '-o outfile not specified' return if not opts.acc_range: print '-a (start_idx,end_idx) not specified' return else: m = re.match('(\d+),(\d+)', opts.acc_range) if not m: print '-a start_idx,end_idx should contain no space' return else: opts.acc_range = (int(m.group(1)), int(m.group(2))) ftype = 'w' if opts.restart else 'a' # prepare the accounts all_accounts = weiboparser.load_accounts() accounts = all_accounts[opts.acc_range[0]:opts.acc_range[1] + 1] # prepare the all_bloginfo = commdatica.load(opts.infile) # filter the blogs whose comments have been downloaded if opts.restart: mids = set() else: mids = set(downloaded_mids(opts.outfile)) logger.info('%d downloaded in %s'%(len(mids), opts.outfile)) bloginfos = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids] # for test # bloginfos = bloginfos[:20] launch(opts.outfile, accounts, bloginfos, ftype, opts.n_instance, opts.interval)
def test(): all_accounts = weiboparser.load_accounts() accounts = all_accounts[:25] all_bloginfo = commdatica.load() # do not download comments for the same blog again mids = set(downloaded_mids()) filtered_bloginfo = [bloginfo for bloginfo in all_bloginfo if not bloginfo.mid in mids] bloginfo = filtered_bloginfo[:8] launch(JSONS_COMMENT, accounts, bloginfo, 4)