示例#1
0
def main(argv):

    opt = Option.validate(argv)

    analyzer = Analyzer()

    listener = Listener(analyzer, opt.hdfs_path, opt.local_path, opt.roll_size)
    auth = OAuthHandler(opt.consumer_key, opt.consumer_secret)
    auth.set_access_token(opt.access_token_key, opt.access_token_secret)

    if not os.path.exists(Util.TMP_DIR + '/' + Util.TWEETS):
        os.makedirs(Util.TMP_DIR + '/' + Util.TWEETS)

    if not os.path.exists(Util.TMP_DIR + '/' + Util.WORDCLOUD):
        os.makedirs(Util.TMP_DIR + '/' + Util.WORDCLOUD)

    # create new local paths
    if opt.local_path:
        if not os.path.exists(opt.local_path + '/' + Util.TWEETS):
            os.makedirs(opt.local_path + '/' + Util.TWEETS)
        if not os.path.exists(opt.local_path + '/' + Util.WORDCLOUD):
            os.makedirs(opt.local_path + '/' + Util.WORDCLOUD)

    stream = Stream(auth, listener)
    stream.filter(track=opt.keywords)
示例#2
0
                      hdfs_path=None,
                      local_path=None):

    tweet_files = os.listdir(tmp_tweet_dir)
    for tf in tweet_files:
        if hdfs_path:
            hadoopy.put(tmp_tweet_dir + '/' + tf,
                        hdfs_path + Util.TWEETS + '/' + tf[-24:-4] + '.csv')
        if local_path:
            shutil.copy(tmp_tweet_dir + '/' + tf,
                        local_path + Util.TWEETS + '/' + tf[-24:-4] + '.csv')
        os.remove(tmp_tweet_dir + '/' + tf)

    wordcloud_files = os.listdir(tmp_wordcloud_dir)
    for wf in wordcloud_files:
        if hdfs_path:
            hadoopy.put(tmp_wordcloud_dir + '/' + wf,
                        hdfs_path + Util.WORDCLOUD + '/' + wf[-24:-4] + '.csv')
        if local_path:
            shutil.copy(
                tmp_wordcloud_dir + '/' + wf,
                local_path + Util.WORDCLOUD + '/' + wf[-24:-4] + '.csv')
        os.remove(tmp_wordcloud_dir + '/' + wf)


# our program's main entry point
if __name__ == '__main__':
    opt = Option.validate(sys.argv[1:])
    main(opt)
    # main(sys.argv[1:])