def run():
    setup_space()
    for fn in glob(join(FETCHED_TWEETS_DIR, '*.json')):
        screen_name = splitext(basename(fn))[0]
        oname = join(COMPILED_TWEETS_DIR, screen_name + '.csv')
        with open(oname, 'w') as o:
            print("Writing:", oname)
            c = csv.DictWriter(o, fieldnames = (['user_screen_name'] +  TWEET_FIELDS))
            c.writeheader()
            for tweet in munge_tweets_file(fn):
                c.writerow(tweet)
def run():
    setup_space()
    f_ids = open(TOP_FRIENDS_IDS_PATH).readlines()
    api = get_twitter_api_from_creds()
    for profile in fetch_profiles(api, user_ids = f_ids):
        # note that we fetch profile id from each retrieved profile
        p_id = str(profile['id'])
        p_sn = profile['screen_name']
        fname = os.path.join(FETCHED_FRIENDS_PROFILES_DIR, p_id + '.json')
        with open(fname, 'w') as f:
            print('Writing (%s): %s' % (p_sn, fname))
            json.dump(profile, f, indent = 2)
Does a month-by-month query of the USGS site for earthquake data
"""
from scripts.settings import setup_space
from scripts.settings import FETCHED_DIR
from scripts.settings import BASE_DATA_URL
import os.path
import requests
from dateutil import rrule
from datetime import datetime, timedelta

START_DATE = datetime(1970, 1, 1)
END_DATE = datetime(2015, 8, 1)  # script will end at the month before

if __name__ == "__main__":
    setup_space()
    timespan = rrule.rrule(rrule.MONTHLY, dtstart=START_DATE, until=END_DATE)
    u_params = {"orderby": "time-asc"}
    u_params["starttime"] = START_DATE
    for dt in timespan[1:]:
        u_params["endtime"] = dt
        # call the API
        resp = requests.get(BASE_DATA_URL, params=u_params)
        # Save the resulting text
        # as: "2015-05.csv"
        fname = os.path.join(FETCHED_DIR, u_params["starttime"].strftime("%Y-%m.csv"))
        with open(fname, "w") as f:
            print(fname)
            f.write(resp.text)
        # set the starttime to the next date for the next iteration
        u_params["starttime"] = u_params["endtime"]
示例#4
0
def run():
    headers = get_leso_headers()
    headers.append('PSC NAME')
    print("Loading PSC data...")
    pscdict = gather_psc_dict()
    cwriter = csv.DictWriter(open(COMPILED_DATA_PATH, 'w'), fieldnames=headers)
    cwriter.writeheader()
    for i, row in enumerate(iterate_leso_data()):
        # get first four digits of NSN:
        ncode = row['NSN'].strip().split('-')[0]
        # some NSN-4-digits aren't in the PSC data for some WTF reason,
        # e.g. 7025 for computer stuff, e.g. 'KEYBOARD,DATA ENTRY'
        # So we truncate it to 7020 to get the broader category
        if not pscdict.get(ncode):
            ncode = ncode[0:3] + '0'
        try:
            row['PSC NAME'] = pscdict[ncode]['PRODUCT AND SERVICE CODE NAME']
        except Exception:
            print("Bad NSN code:", ncode)
            print(row)
            print("--------------------\n")
        else:
            cwriter.writerow(row)
    print("%s rows written to %s" % (i, COMPILED_DATA_PATH))


if __name__ == '__main__':
    setup_space()
    run()