def run(): setup_space() for fn in glob(join(FETCHED_TWEETS_DIR, '*.json')): screen_name = splitext(basename(fn))[0] oname = join(COMPILED_TWEETS_DIR, screen_name + '.csv') with open(oname, 'w') as o: print("Writing:", oname) c = csv.DictWriter(o, fieldnames = (['user_screen_name'] + TWEET_FIELDS)) c.writeheader() for tweet in munge_tweets_file(fn): c.writerow(tweet)
def run(): setup_space() f_ids = open(TOP_FRIENDS_IDS_PATH).readlines() api = get_twitter_api_from_creds() for profile in fetch_profiles(api, user_ids = f_ids): # note that we fetch profile id from each retrieved profile p_id = str(profile['id']) p_sn = profile['screen_name'] fname = os.path.join(FETCHED_FRIENDS_PROFILES_DIR, p_id + '.json') with open(fname, 'w') as f: print('Writing (%s): %s' % (p_sn, fname)) json.dump(profile, f, indent = 2)
Does a month-by-month query of the USGS site for earthquake data """ from scripts.settings import setup_space from scripts.settings import FETCHED_DIR from scripts.settings import BASE_DATA_URL import os.path import requests from dateutil import rrule from datetime import datetime, timedelta START_DATE = datetime(1970, 1, 1) END_DATE = datetime(2015, 8, 1) # script will end at the month before if __name__ == "__main__": setup_space() timespan = rrule.rrule(rrule.MONTHLY, dtstart=START_DATE, until=END_DATE) u_params = {"orderby": "time-asc"} u_params["starttime"] = START_DATE for dt in timespan[1:]: u_params["endtime"] = dt # call the API resp = requests.get(BASE_DATA_URL, params=u_params) # Save the resulting text # as: "2015-05.csv" fname = os.path.join(FETCHED_DIR, u_params["starttime"].strftime("%Y-%m.csv")) with open(fname, "w") as f: print(fname) f.write(resp.text) # set the starttime to the next date for the next iteration u_params["starttime"] = u_params["endtime"]
def run(): headers = get_leso_headers() headers.append('PSC NAME') print("Loading PSC data...") pscdict = gather_psc_dict() cwriter = csv.DictWriter(open(COMPILED_DATA_PATH, 'w'), fieldnames=headers) cwriter.writeheader() for i, row in enumerate(iterate_leso_data()): # get first four digits of NSN: ncode = row['NSN'].strip().split('-')[0] # some NSN-4-digits aren't in the PSC data for some WTF reason, # e.g. 7025 for computer stuff, e.g. 'KEYBOARD,DATA ENTRY' # So we truncate it to 7020 to get the broader category if not pscdict.get(ncode): ncode = ncode[0:3] + '0' try: row['PSC NAME'] = pscdict[ncode]['PRODUCT AND SERVICE CODE NAME'] except Exception: print("Bad NSN code:", ncode) print(row) print("--------------------\n") else: cwriter.writerow(row) print("%s rows written to %s" % (i, COMPILED_DATA_PATH)) if __name__ == '__main__': setup_space() run()