"""
__author__ = 'kjoseph'

import glob
import os
import sys
from multiprocessing import Queue

from twitter_dm.multiprocess import multiprocess_setup
from twitter_dm.multiprocess.WorkerTweetData import TweetDataWorker
from twitter_dm.utility import general_utils

from twitter_dm.utility.general_utils import mkdir_no_err,collect_system_arguments, chunk_data

handles, output_dir, tweet_ids, is_ids = collect_system_arguments(sys.argv)


# Create the output directory
mkdir_no_err(output_dir)

# chunk tweets into 100s (the API takes them by 100)
i = 0
tweets_chunked = chunk_data(tweet_ids)


print tweets_chunked[0]
# init a sync manager
multiprocess_setup.init_good_sync_manager()

# put data on the queue
"""

__author__ = 'kjoseph'

import glob
import os
import sys

from twitter_dm.multiprocess import multiprocess_setup
from twitter_dm.multiprocess.WorkerUserData import UserDataWorker
from datetime import datetime
from twitter_dm.utility.general_utils import mkdir_no_err, collect_system_arguments

(handles, out_dir, user_ids, is_ids, collect_friends, collect_followers,
 gen_tweet_counts_file) = collect_system_arguments(sys.argv, [
     'collect_friends (y/n)', 'collect_followers (y/n)',
     "gen_tweet_counts_file (y/n)"
 ])

handles = handles[:2]

print 'num users: ', len(user_ids)

mkdir_no_err(out_dir)
mkdir_no_err(os.path.join(out_dir, "obj"))
mkdir_no_err(os.path.join(out_dir, "json"))

multiprocess_setup.init_good_sync_manager()

##put data on the queue
request_queue = multiprocess_setup.load_request_queue(user_ids, len(handles))
示例#3
0
import glob
import os
import sys
from time import sleep
import io

from twitter_dm.utility.general_utils import mkdir_no_err, collect_system_arguments

handles, output_dir, data_to_collect, is_ids, friends_or_followers = collect_system_arguments(
    sys.argv, ['friends or followers'])

# in case its weird format
accounts = [x.strip().split()[-1] for x in data_to_collect]

mkdir_no_err(output_dir)

handle_iter = 0

param_arg = 'user_id' if is_ids else 'screen_name'

for i, handle in enumerate(accounts):
    print 'user: '******'cursor': -1, 'count': 5000}

    n_collected = 0
示例#4
0
"""
Another simple, alternative way to get basic information about a file of user_ids/sns
"""

import io
import sys
from twitter_dm.TwitterUser import get_user_ids_and_sn_data_from_list
from twitter_dm.utility.general_utils import collect_system_arguments

handles, output_file, data_to_collect, is_ids = collect_system_arguments(
    sys.argv)

out_fil = io.open(output_file, "w")

user_data = get_user_ids_and_sn_data_from_list(data_to_collect, handles,
                                               not is_ids, out_fil)
out_fil.close()
"""
An example of how to use BotOMeter in parallel
"""
__author__ = 'kjoseph'

import glob
import sys

from twitter_dm.utility.general_utils import collect_system_arguments
from twitter_dm.multiprocess import multiprocess_setup
from twitter_dm.multiprocess.WorkerBotOMeter import BotOMeterWorker
from twitter_dm.utility import general_utils

handles, out_dir, data_to_collect, is_ids, mashape_key = collect_system_arguments(
    sys.argv, ['mashape_key'])

general_utils.mkdir_no_err(out_dir)

# initialize a better sync manager
multiprocess_setup.init_good_sync_manager()

# put data on the queue
request_queue = multiprocess_setup.load_request_queue(
    [x.strip() for x in data_to_collect], len(handles), add_nones=True)

processes = []
for i in range(len(handles)):
    p = BotOMeterWorker(request_queue, handles[i], i, out_dir, mashape_key)
    p.start()
    processes.append(p)
示例#6
0
import botometer
from twitter_dm.utility.general_utils import Unbuffered, collect_system_arguments
import ujson as json
import sys


handles, out_file, user_ids, is_ids, mashape_key = collect_system_arguments(sys.argv, ['mashape_key'])

twitter_app_auth = {
    'consumer_key': handles[0].consumer_key,
    'consumer_secret': handles[0].consumer_secret,
    'access_token': handles[0].access_token,
    'access_token_secret':  handles[0].access_token_secret,
    "wait_on_ratelimit" : True
  }

bom = botometer.Botometer(mashape_key=mashape_key,**twitter_app_auth)

accounts = set([x.strip() for x in open(out_file)])

with open(out_file) as inf:
    for line in inf:
        uid = json.loads(line)['user']['id_str']
        accounts.remove(uid)

print 'n accounts: ', len(accounts)

of = Unbuffered(open("bot_out.json", "a"))

for i, a in enumerate(accounts):
    if i % 250 == 0: