def run(): options = utils.flags() debug = options.get('debug', False) filename = "legislators-current.yaml" args = utils.args() legislators = load_data(filename) if len(args) != 0: bioguides = args print("Fetching contact forms for %s..." % ', '.join(bioguides)) else: bioguides = [member['id']['bioguide'] for member in legislators] print("Fetching contact forms for all current members...") for legislator in legislators: bioguide = legislator['id']['bioguide'] if bioguide not in bioguides: continue if bioguide in SKIP_BIOGUIDES: continue if debug: print("Downloading form for %s" % bioguide, flush=True) try: steps = contact_steps_for(bioguide) except LegislatorNotFoundError as e: if debug: print("skipping, %s..." % e, flush=True) continue legislator['terms'][-1]['contact_form'] = steps['contact_form']['steps'][0]['visit'] print("Saving data to %s..." % filename) save_data(legislators, filename)
def main(args): '''Module main function''' global database global genetic_algorithm global joint_positions global goal_positions pygame.init() random.seed() database = utils.initialize_database(args, 'RobotTrainingData') database.set_objective_names(['Tiempo', r'Error en $\theta_1$', r'Error en $\theta_2$', r'Error en $\theta_3$', 'Energía']) problem = EV3Problem() generation = database.properties['highest_population'] population_size = database.properties['population_size'] genetic_algorithm = evolution.NSGA(problem, population_size) x_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'x_train.txt')) y_path = os.path.abspath(pkg_resources.resource_filename('resources.ev3', 'y_train.txt')) batch_start = (generation % 10) * N_GOALS joint_positions = np.loadtxt(x_path)[batch_start : batch_start + N_GOALS, :] goal_positions = np.loadtxt(y_path)[batch_start : batch_start + N_GOALS, :] if generation > 0: parents, children = utils.load_data(database) genetic_algorithm.set_population(parents) genetic_algorithm.set_children(children) for _ in range(args.iterations): generation += 1 print('Starting generation ' + str(generation)) genetic_algorithm.iterate() database.create_population() utils.save_data(genetic_algorithm, database) print('=' * (SCREEN_WIDTH - 1))
def resolvefb(): updated_media = [] for m in media: social = m['social'] if 'facebook' in social and social['facebook']: graph_url = "https://graph.facebook.com/%s" % social['facebook'] if re.match('\d+', social['facebook']): social['facebook_id'] = social['facebook'] fbobj = requests.get(graph_url).json() if 'username' in fbobj: social['facebook'] = fbobj['username'] else: try: social['facebook_id'] = requests.get(graph_url).json()['id'] except: print "Unable to get graph ID for: %s" % social['facebook'] social['facebook_id'] = None updated_media.append(m) print "Saving social media..." save_data(updated_media, "legislators-social-media.yaml")
def run(): # load in members, orient by bioguide ID print("Loading current legislators...") current = load_data("legislators-current.yaml") current_bioguide = { } for m in current: if "bioguide" in m["id"]: current_bioguide[m["id"]["bioguide"]] = m # remove out-of-office people from current committee membership print("Sweeping committee membership...") membership_current = load_data("committee-membership-current.yaml") for committee_id in list(membership_current.keys()): for member in membership_current[committee_id]: if member["bioguide"] not in current_bioguide: print("\t[%s] Ding ding ding! (%s)" % (member["bioguide"], member["name"])) membership_current[committee_id].remove(member) save_data(membership_current, "committee-membership-current.yaml") # remove out-of-office people from social media info print("Sweeping social media accounts...") socialmedia_current = load_data("legislators-social-media.yaml") for member in list(socialmedia_current): if member["id"]["bioguide"] not in current_bioguide: print("\t[%s] Ding ding ding! (%s)" % (member["id"]["bioguide"], member["social"])) socialmedia_current.remove(member) save_data(socialmedia_current, "legislators-social-media.yaml")
def update_coordinates(matchings, filename, geocode_serv, all_=False): from datetime import datetime from utils.geocode import distance from utils import save_data schools_ = sorted(matchings.values(), key=lambda elem:datetime.strptime(elem['last_modified_at'], '%Y-%m-%d %H:%M:%S.%f')) counter1, counter2, counter3, counter4 = 0, 0, 0, 0 for school in schools_: counter1 += 1 if ('address' in school and 'number' in school['address'] and ('geocoded' not in school['address'] or school['address']['geocoded'] == False) and (all_ or not all_ and 'coordinates' not in school['address'])): counter2 += 1 full_address = full_address(school['address']) coord = geocode_serv(full_address) if coord: counter3 += 1 if 'coordinates' not in school['address'] or distance(coord, school['address']['coordinates']) > 0.1: counter4 += 1 school['address']['geocoded'] = True school['address']['coordinates'] = coord school['last_modified_at'] = str(datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')) if (counter2 % 10 == 0): save_data(matchings, filename) str_counter = str(counter4) + '/' + str(counter3) + '/' + str(counter2) + '/' + str(counter1) + '/' + str(len(schools_)) print (str_counter + ': ' + full_address + ' >> ' + str(coord))
def refresh_categories(self): from utils import save_data print ('Refreshing FS categories...') categories = self._api_venues.venues.categories()['categories'] self.categories_by_id, self.categories_by_name = _prepare_categories(categories) save_data(categories, self._filename) print('Done.')
def main(state_num): matches_filename = 'matches_%d' % state_num print 'Loading %s ...' % matches_filename matches = utils.load_data(matches_filename) matches_reduced_filename = 'matches_reduced' try: print "Loading matches_reduced ..." matches_reduced = utils.load_data(matches_reduced_filename) except: print "Matches_reduced doesn't exists, creating new." matches_reduced = {} num_matches = len(matches.keys()) for keyIdx, matchId in enumerate(matches.keys()): print "\rMatch %d out of %d [%0.1f%%]" % (keyIdx + 1, num_matches, (keyIdx + 1) / float(num_matches) * 100), summoners = [] num_summoners = len(matches[matchId]['participants']) for i in range(num_summoners): champLevel = matches[matchId]['participants'][i]['stats']['champLevel'] summonerId = matches[matchId]['participantIdentities'][i]['player']['summonerId'] winner = matches[matchId]['participants'][i]['stats']['winner'] summoners += [{'champLevel': champLevel, 'summonerId': summonerId, 'winner': winner}] matches_reduced[matchId] = {'summoners': summoners} print "Saving %s ..." % matches_reduced_filename utils.save_data(matches_reduced, matches_reduced_filename) print "Done!"
def update(): for rec in csv.DictReader(open("cache/social_media/%s_candidates.csv" % service)): bioguide = rec["bioguide"] candidate = rec["candidate"] if media_bioguide.has_key(bioguide): media_bioguide[bioguide]['social'][service] = candidate else: new_media = {'id': {}, 'social': {}} new_media['id']['bioguide'] = bioguide thomas_id = current_bioguide[bioguide]['id'].get("thomas", None) govtrack_id = current_bioguide[bioguide]['id'].get("govtrack", None) if thomas_id: new_media['id']['thomas'] = thomas_id if govtrack_id: new_media['id']['govtrack'] = govtrack_id new_media['social'][service] = candidate media.append(new_media) print "Saving social media..." save_data(media, "legislators-social-media.yaml") # if it's a youtube update, always do the resolve if service == "youtube": resolveyt()
def resolvefb(): updated_media = [] for m in media: social = m['social'] if ('facebook' in social and social['facebook']) and ('facebook_id' not in social): graph_url = "https://graph.facebook.com/%s" % social['facebook'] if re.match('\d+', social['facebook']): social['facebook_id'] = social['facebook'] print("Looking up graph username for %s" % social['facebook']) fbobj = requests.get(graph_url).json() if 'username' in fbobj: print("\tGot graph username of %s" % fbobj['username']) social['facebook'] = fbobj['username'] else: print("\tUnable to get graph username") else: try: print("Looking up graph ID for %s" % social['facebook']) fbobj = requests.get(graph_url).json() if 'id' in fbobj: print("\tGot graph ID of %s" % fbobj['id']) social['facebook_id'] = fbobj['id'] else: print("\tUnable to get graph ID") except: print("\tUnable to get graph ID for: %s" % social['facebook']) social['facebook_id'] = None updated_media.append(m) print("Saving social media...") save_data(updated_media, "legislators-social-media.yaml")
def resolveig(): # in order to preserve the comment block at the top of the file, # copy it over into a new RtYamlList instance. We do this because # Python list instances can't hold other random attributes. import rtyaml updated_media = rtyaml.RtYamlList() if hasattr(media, '__initial_comment_block'): updated_media.__initial_comment_block = getattr(media, '__initial_comment_block') client_id_file = open('cache/instagram_client_id','r') client_id = client_id_file.read() bioguide = utils.flags().get('bioguide', None) for m in media: if bioguide and (m['id']['bioguide'] != bioguide): updated_media.append(m) continue social = m['social'] if 'instagram' not in social and 'instagram_id' not in social: updated_media.append(m) continue instagram_handle = social['instagram'] query_url = "https://api.instagram.com/v1/users/search?q={query}&client_id={client_id}".format(query=instagram_handle,client_id=client_id) instagram_user_search = requests.get(query_url).json() for user in instagram_user_search['data']: time.sleep(0.5) if user['username'] == instagram_handle: m['social']['instagram_id'] = int(user['id']) print("matched instagram_id {instagram_id} to {instagram_handle}".format(instagram_id=social['instagram_id'],instagram_handle=instagram_handle)) updated_media.append(m) save_data(updated_media, "legislators-social-media.yaml")
def put(self, name, content, type=None): if type is not None: name = '%s.%s' % (name, type) else: name = name path = '%s/%s' % ('/'.join(name[:3]), name) save_data(self.path + '/' + path, content) return path
def main(): if not os.path.exists('../data/matched_points.pkl') or not os.path.exists('../data/links.pkl'): print "Saving Data" save_data() print "Loading Data" matched_points, links = load_data() print "Data Loaded" link_to_slopes = process(matched_points, links, 10) save_link_to_slopes(link_to_slopes, links)
def measure_tips(out_fname='results/experiment_run'): """ Compute spiral-tip density for all available data files """ data_dir = 'data' files = [os.path.join(data_dir, fn) for fn in os.listdir(data_dir)] with Pool(len(files)) as p: data = p.map(handle_measure_tips, files) save_data(out_fname, data)
def run(legislator_ids=None): legislators = utils.load_data('legislators-district-offices.yaml') try: for l in legislators: if legislator_ids and l['id']['bioguide'] not in legislator_ids: continue geocode_offices(l) finally: # Save in-progress geocodes in case of keyboard interrupt print("Saving data...") utils.save_data(legislators, 'legislators-district-offices.yaml')
def update_enrichments(): global enrichments enrichments = {} _add_instituicoes_basicas(enrichments) _add_instituicoes_superiores(enrichments) _add_attractions(enrichments) _add_museums(enrichments) _add_theaters(enrichments) _add_hospitals(enrichments) _add_hotels(enrichments) _add_offices(enrichments) save_data(enrichments, _filename)
def run(): # load in current members y = load_data("legislators-current.yaml") for m in y: # retrieve C-SPAN id, if available, from ProPublica API # TODO: use utils.download here response = urllib.request.urlopen("https://projects.propublica.org/represent/api/v1/members/%s.json" % m['id']['bioguide']).read() j = json.loads(response.decode("utf8")) cspan = j['results'][0]['cspan_id'] if not cspan == '': m['id']['cspan'] = int(cspan) save_data(y, "legislators-current.yaml")
def run(): # load in current members y = load_data("legislators-current.yaml") for m in y: # retrieve C-SPAN id, if available, from NYT API # TODO: use utils.download here response = urllib.request.urlopen("http://politics.nytimes.com/congress/svc/politics/v3/us/legislative/congress/members/%s.json" % m['id']['bioguide']).read() j = json.loads(response.decode("utf8")) cspan = j['results'][0]['cspan_id'] if not cspan == '': m['id']['cspan'] = int(cspan) save_data(y, "legislators-current.yaml")
def run_system(Generator): """ Apply `Generator` and integrate and cache system """ system = Generator(config.grid_size).generate() print(system) cres = integrate_system(system) fname = gen_run_identifier() save_data("data/%s" % fname, np.array([cres, system.pacemakers, dict(config)])) return system, cres
def clean(): print "Loading historical legislators..." historical = load_data("legislators-historical.yaml") count = 0 for m in historical: if media_bioguide.has_key(m["id"]["bioguide"]): media.remove(media_bioguide[m["id"]["bioguide"]]) count += 1 print "Removed %i out of office legislators from social media file..." % count print "Saving historical legislators..." save_data(media, "legislators-social-media.yaml")
def run(): house_labels = "labels-113.csv" names = utils.flags().get('names', False) y = load_data("legislators-current.yaml") by_district = { } for m in y: last_term = m['terms'][-1] if last_term['type'] != 'sen': full_district = "%s%02d" % (last_term['state'], int(last_term['district'])) by_district[full_district] = m for rec in csv.DictReader(open(house_labels)): full_district = rec['113 ST/DIS'] # empty seat - IL-02 if full_district not in by_district: if full_district == "IL02": continue else: raise "No!!" rec["MIDDLE"] = rec["MIDDLE"].decode("utf8").strip() rec["NICK"] = None m = re.match('^(.*) \u201c(.*)\u201d$', rec["MIDDLE"]) if m: rec["MIDDLE"] = m.group(1) rec["NICK"] = m.group(2) by_district[full_district]['terms'][-1]['office'] = rec["ADDRESS"].strip() # only set name fields if we've been asked to (as a stopgap) if names: by_district[full_district]["name"]["first"] = rec["FIRST"].decode("utf8").strip() if rec["MIDDLE"]: by_district[full_district]["name"]["middle"] = rec["MIDDLE"] if rec["NICK"]: by_district[full_district]["name"]["nickname"] = rec["NICK"] by_district[full_district]["name"]["last"] = rec["LAST"].decode("utf8").strip() if rec["BIOGUIDE ID"] == "G000574": # The Clerk has the wrong ID for Alan Grayson! rec["BIOGUIDE ID"] = "G000556" by_district[full_district]["id"]["bioguide"] = rec["BIOGUIDE ID"] print("[%s] Saved" % full_district) save_data(y, "legislators-current.yaml")
def _commit(confirmed_, unconfirmed_): from utils import load_data from utils import save_data auditing = load_data(_params['filenames'][5]) confirmed = load_data(_params['filenames'][6]) if not confirmed: confirmed = [] unconfirmed = load_data(_params['filenames'][7]) if not unconfirmed: unconfirmed = [] i = 0 while i < len(auditing): if auditing[i]['matching'][0]['venue_id'] in confirmed_: auditing[i]['status'] = 'confirmed' a = auditing.pop(i) confirmed.append(a) i -= 1 elif auditing[i]['matching'][0]['venue_id'] in unconfirmed_: auditing[i]['status'] = 'unconfirmed' a = auditing.pop(i) unconfirmed.append(a) i -= 1 i += 1 save_data([a[0] for a in auditing], _params['filenames'][5]) save_data(auditing, _params['filenames'][5]) save_data(confirmed, _params['filenames'][6]) save_data(unconfirmed, _params['filenames'][7])
def main(): args = parse_arguments() log = prepare_logging(args.log_level) try: if args.scrap_nasdaq: from utils import scrap_nasdaq scrap_nasdaq() log.info("Data written to nasdaq.txt") sys.exit(0) from random import randint if args.restart: from subprocess import call call(["rm", "-rf", args.dbname]) import sqlite3 conn = sqlite3.connect(args.dbname) from Scrappers.Google import Google from utils import save_data, diff_to_last_record google = Google("http://www.google.com/finance/getprices?i=||INTERVAL||&p=||OFFSET||d&f=d,o,h,l,c,v&df=cpct&q=||SYMBOL||", 60) symbols = [symb.strip() for symb in open(args.input, "r").readlines()] for symbol in symbols: log.info("Processing {}".format(symbol)) try: if args.restart: diff_days = args.ticks else: diff_days = diff_to_last_record(conn, symbol) if diff_days > 10: log.error("Interval is bigger than google supports! There will be a gap in the data!") case = input("Should I proceed? Y/N") if case != "Y": raise RuntimeError("No data available to such a far in history.") save_data(conn, symbol.lower(), google.scrap_symbol(symbol, diff_days)) except RuntimeError: log.warning("No data available for {} from google".format(symbol)) open(args.unresponsive_log, "a").write(symbol + "\n") finally: import time time.sleep(randint(1,args.sleep)) except KeyboardInterrupt as ex: log.warning("Terminated by user.") except SystemExit as ex: log.info("Exiting")
def transfer_all_data(params): ph = load_data(os.path.join(params.save_data, "ph")) CIFAR10 = Load_CIFAR10(params) all_train_images, all_train_labels = CIFAR10.get_train_set(ratio=1) print(all_train_images.shape) batch_size = 5000 for i in range(0, all_train_images.shape[0], batch_size): print(i, "-", i + batch_size) out_i = ph.transform(all_train_images[i:i + batch_size]) for j in range(params.num_layers): save_data( out_i[j], os.path.join(params.save_data, 'out_{}_{}'.format(i, j))) del all_train_images save_data(all_train_labels, os.path.join(params.save_data, "all_train_labels")) print("All transfered data saved")
def run_predictor(art,predictor,foldername,filename_prefix,testing_mode=False,skip_save=False,prepend_data_folder=True): # prepend_data_folder - Adds ~/src/mindpocket/data prefix to squaddir folder name from utils import save_data, load_data, exists_datafolder from utils import merge_artfiles verbose2_on=False # Loop through and add results field to data art2 = art.copy() for i,a in enumerate(art): filename = filename_prefix + '_art_' + str(i).zfill(3) + '.json' # Do a short test to see if file exists file_exists = exists_datafolder(filename,foldername,prepend_data_folder) if file_exists: print("File: " + filename + " already exists. Skipping...") continue # If file already exists, skip over to the next file # Otherwise with operation print("Article number:" + str(i).zfill(3) + ". Saving to: " + filename) for j,p in enumerate(a['paragraphs']): if verbose2_on: print("\tParagraph number: " + str(j)) if not testing_mode: results = predictor.predict(sentence=p['context']) if verbose2_on: for word, tag in zip(results["words"], results["tags"]): print(f"{word}\t{tag}") # Merge words and tags together into 1 long sentence, for more efficient json storage results2 = { 'words': ' '.join(results['words']), 'tags': ' '.join(results['tags']), } else: results = 'asdf' results2 = 'asdf' art2[i]['paragraphs'][j]['allenNER']=results2 # Save individual articles if not skip_save: save_data(art2[i],filename,foldername,[],[],prepend_data_folder) # Once all individual files have been saved, merge into 1 large json file merge_artfiles(filename_prefix + '_art_*',foldername,filename_prefix + '.json',verbose=True,do_overwrite=[],prepend_data_folder=prepend_data_folder)
def __init__(self, fold, detection_results_dir='../output/detection_results', classification_results_dir='../output/classification_results', train_on_all_dataset=True, load_only_video_ids=None, is_test=False): self.video_clips = dataset.video_clips(is_test=is_test) if load_only_video_ids is not None: all_video_ids = set(load_only_video_ids) else: all_video_ids = set(self.video_clips.keys()) if train_on_all_dataset: self.test_video_ids = [] self.train_video_ids = all_video_ids else: self.test_video_ids = set(dataset.fold_test_video_ids(fold)) self.train_video_ids = all_video_ids.difference( self.test_video_ids) self.gt = pd.read_csv( '../input/N1_fish_N2_fish_-_Training_set_annotations.csv') self.gt.dropna(axis=0, inplace=True) self.gt['have_frame'] = 1.0 self.video_frames_count = {} self.video_data = {} self.video_data_gt = {} print('load video data...') cache_fn = '../output/sequence_rnn_test.pkl' if is_test else '../output/sequence_rnn_train.pkl' try: self.video_frames_count, self.video_data, self.video_data_gt, self.columns = utils.load_data( cache_fn) except FileNotFoundError: self.video_frames_count, self.video_data, self.video_data_gt, self.columns = self.load( all_video_ids, detection_results_dir, classification_results_dir) utils.save_data((self.video_frames_count, self.video_data, self.video_data_gt, self.columns), cache_fn) print('loaded')
def run(): # pick either current or historical # order is important here, since current defaults to true if utils.flags().get('historical', False): filename = "legislators-historical.yaml" elif utils.flags().get('current', True): filename = "legislators-current.yaml" else: print("No legislators selected.") exit(0) print("Loading %s..." % filename) legislators = load_data(filename) # reoriented cache to access by bioguide ID by_bioguide = {} for m in legislators: if "bioguide" in m["id"]: by_bioguide[m["id"]["bioguide"]] = m count = 0 for id in range(8245, 21131): print(id) url = "http://history.house.gov/People/Detail/%s" % id r = requests.get(url, allow_redirects=False) if r.status_code == 200: dom = lxml.html.parse(io.StringIO(r.text)).getroot() try: bioguide_link = dom.cssselect("a.view-in-bioguide")[0].get( 'href') bioguide_id = bioguide_link.split('=')[1] by_bioguide[bioguide_id]["id"]["house_history"] = id count = count + 1 except: continue else: continue print("Saving data to %s..." % filename) save_data(legislators, filename) print("Saved %d legislators to %s" % (count, filename))
def run(): # pick either current or historical # order is important here, since current defaults to true if utils.flags().get('historical', False): filename = "legislators-historical.yaml" elif utils.flags().get('current', True): filename = "legislators-current.yaml" else: print("No legislators selected.") exit(0) print("Loading %s..." % filename) legislators = load_data(filename) # reoriented cache to access by bioguide ID by_bioguide = { } for m in legislators: if "bioguide" in m["id"]: by_bioguide[m["id"]["bioguide"]] = m count = 0 for id in range(8245,21131): print(id) url = "http://history.house.gov/People/Detail/%s" % id r = requests.get(url, allow_redirects=False) if r.status_code == 200: dom = lxml.html.parse(io.StringIO(r.text)).getroot() try: bioguide_link = dom.cssselect("a.view-in-bioguide")[0].get('href') bioguide_id = bioguide_link.split('=')[1] by_bioguide[bioguide_id]["id"]["house_history"] = id count = count + 1 except: continue else: continue print("Saving data to %s..." % filename) save_data(legislators, filename) print("Saved %d legislators to %s" % (count, filename))
def test(self, sess): self.saver.restore(sess, os.path.join(self.checkpoint_dir, "./Colorization")) data = glob('./test/*.jpg') img = data[0:self.batch_size] sample = np.array([load_data(sample_file, n='gray') for sample_file in img]).astype(np.float32) sample = sess.run(self.sample_images, feed_dict={self.sample: sample}) sample = save_data(sample) for i in range(self.batch_size): cv2.imwrite('./result/{}.jpg'.format(i), sample[i])
def save(self): self.save_history() self.save_plasmid_description() utils.save_data(self.data, self.CONFIG) #TODO save for each repetition ! self.save_config() # WILL BE DELETED # <<<<<<< HEAD # path = PARAMS["w_path_1"] # self.genes = self.genes.transpose() # names = self.genes.index # self.genes.reset_index(drop=True) # self.genes.insert(0,"id",names.values) # self.genes.to_csv(path_or_buf = path+"plasmid_description.csv", index=False, sep=',') return
async def login(): user_id, login_token = utils.get_user_id(), utils.get_login_token() just_logged_in = False if not user_id or not login_token: #webbrowser.open(SERVER_HOST + '/local-products-login?port='+str(LOCAL_SERVER_PORT), new=0, autoraise=True) await utils.show_info( "Sincronizador de archivos", "No hay ningún usuario guardado. Inicia sesión...") user_mail, password = await authenticate_box.ask_login() if user_mail == None or password == None: exit() #user_mail= (await aioconsole.ainput("Correo electrónico: ")).strip() #password= (await aioconsole.ainput("Contraseña: ")).strip() try: user_id, login_token = await server.login(mail=user_mail, password=password) except RemoteException as e: await utils.show_warning("Linarand sincronizador de archivos", "Hubo un problema. " + str(e)) return await login() utils.set_user_id(user_id) utils.set_login_token(login_token) utils.save_data() just_logged_in = True try: username = await server.authenticate(user_id=user_id, token=login_token) except RemoteException as e: await utils.show_warning( "Sincronizador de archivos", "Hubo un problema. " + str(e) + ". Eliminando usuario") utils.set_user_id(None) utils.set_login_token(None) utils.save_data() return await login() if just_logged_in: asyncio.ensure_future( utils.show_info( "Sincronizador de archivos", "Sesión iniciada como %s. Puedes ir a la página de Ingeniería Linarand y sincronizar los archivos que desees desde este equipo." % username))
def main(): args = parse_args() train = True data_train = load_data(True, False) data = data_train if args.fin: train = False data = load_data(train, False) #data_train['common'] = data_train.apply(lambda row: list((Counter(row.question1_tk) & Counter(row.question2_tk)).elements())) #data_train['diff'] = data_train.apply(lambda row: list(((Counter(row.question1_tk) | Counter(row.question2_tk) - (Counter(row.question1_tk) & Counter(row.question2_tk)).elements()))# for vect_type in ["cv", "cv_t", "tf", "tf_t", "tfidf", "tfidf_t"]: for i in range(3): ngrams = i + 1 cv = create_cv(data_train, ngrams, vect_type) add_features(data, cv, ngrams, vect_type) save_data(data, train)
def run_blending(): print('data manipulation') train_comment = train_data['comment_text'].apply(clean_word) test_comment = test_data['comment_text'].apply(clean_word) print('split data') x_train, x_valid, y_train, y_valid = blending_data_split( train_comment, train_data[toxic_types], params.blending.data_split.test_size, params.blending.data_split.ramdom_state) print('create features') train_tfidf, valid_tfidf, test_tfidf = tfidf( x_train, test_comment, x_valid, params.blending.tfidf.max_word_ngram, params.blending.tfidf.max_char_ngram, params.blending.tfidf.stack) print('run blending') preds = blending(train_tfidf, valid_tfidf, y_train, y_valid, test_tfidf) print('save data') save_data(file_name='bleding_1', preds=preds, toxic_types=toxic_types)
def compute_spiral_tip_density(fname, plot=True): """ Compute spiral tip density of given example """ cache_dir = 'cache' pure_fname = os.path.splitext(os.path.basename(fname))[0] if not os.path.isdir(os.path.join(cache_dir, pure_fname)): # preprocess input camp, pacemaker, used_config = np.load(fname) camp = np.rollaxis(camp, 0, 3) camp = preprocess_data(camp) # compute data rolled_camp = np.rollaxis(camp, 2, 0) lphase = compute_local_phase_field(camp) # decreases time dim due to tau grads = compute_discrete_gradient(lphase) singularities = compute_singularity_measure(grads) # cache data os.path.join(cache_dir, pure_fname, '') save_data(os.path.join(cache_dir, pure_fname, 'rolled_camp'), rolled_camp) save_data(os.path.join(cache_dir, pure_fname, 'lphase'), lphase) save_data(os.path.join(cache_dir, pure_fname, 'grads'), grads) save_data(os.path.join(cache_dir, pure_fname, 'singularities'), singularities) else: print(' > Using cached data') rolled_camp = np.load(os.path.join(cache_dir, pure_fname, 'rolled_camp.npy')) lphase = np.load(os.path.join(cache_dir, pure_fname, 'lphase.npy')) grads = np.load(os.path.join(cache_dir, pure_fname, 'grads.npy')) singularities = np.load(os.path.join(cache_dir, pure_fname, 'singularities.npy')) # compute singularity measures avg_singularity = np.mean(singularities, axis=0) thres_singularity = avg_singularity.copy() thres_singularity[thres_singularity > np.pi] = 2 * np.pi thres_singularity[thres_singularity < -np.pi] = -2 * np.pi thres_singularity[(thres_singularity > -np.pi) & (thres_singularity < np.pi)] = 0 # plot data if needed if plot: singularity_plot( pure_fname, rolled_camp, lphase, grads, singularities, avg_singularity, thres_singularity ) # compute tip density tip_num = np.count_nonzero(thres_singularity) tip_density = float(tip_num) / thres_singularity.size return tip_density
def resolvefb(): # in order to preserve the comment block at the top of the file, # copy it over into a new RtYamlList instance. We do this because # Python list instances can't hold other random attributes. import rtyaml updated_media = rtyaml.RtYamlList() if hasattr(media, '__initial_comment_block'): updated_media.__initial_comment_block = getattr(media, '__initial_comment_block') for m in media: social = m['social'] if ('facebook' in social and social['facebook']) and ('facebook_id' not in social): graph_url = "https://graph.facebook.com/%s" % social['facebook'] if re.match('\d+', social['facebook']): social['facebook_id'] = social['facebook'] print("Looking up graph username for %s" % social['facebook']) fbobj = requests.get(graph_url).json() if 'username' in fbobj: print("\tGot graph username of %s" % fbobj['username']) social['facebook'] = fbobj['username'] else: print("\tUnable to get graph username") else: try: print("Looking up graph ID for %s" % social['facebook']) fbobj = requests.get(graph_url).json() if 'id' in fbobj: print("\tGot graph ID of %s" % fbobj['id']) social['facebook_id'] = fbobj['id'] else: print("\tUnable to get graph ID") except: print("\tUnable to get graph ID for: %s" % social['facebook']) social['facebook_id'] = None updated_media.append(m) print("Saving social media...") save_data(updated_media, "legislators-social-media.yaml")
def run(): # load legislators YAML files yamlfiles = {} for fn in ('historical', 'current'): fn = 'legislators-%s.yaml' % fn print("Loading %s..." % fn) yamlfiles[fn] = load_data(fn) # reoriented cache to access by bioguide ID by_bioguide = {} known_house_history_ids = set() for legislators in yamlfiles.values(): for m in legislators: if "bioguide" in m["id"]: by_bioguide[m["id"]["bioguide"]] = m if "house_history" in m["id"]: known_house_history_ids.add(m["id"]["house_history"]) count = 0 # scrape history.house.gov if len(sys.argv) == 1: id_range = range(22000, 25000) else: id_range = [int(arg) for arg in sys.argv[1:]] for id in id_range: # skip known IDs if id in known_house_history_ids: continue print(id) bioguide_id = get_bioguide_for_house_history_id(id) if bioguide_id and bioguide_id in by_bioguide: print(id, bioguide_id) by_bioguide[bioguide_id]["id"]["house_history"] = id count = count + 1 # write YAML files to disk for filename, legislators in yamlfiles.items(): print("Saving data to %s..." % filename) save_data(legislators, filename) # how many updates did we make? print("Saved %d legislators" % count)
def run(): # load legislators YAML files yamlfiles = { } for fn in ('historical', 'current'): fn = 'legislators-%s.yaml' % fn print("Loading %s..." % fn) yamlfiles[fn] = load_data(fn) # reoriented cache to access by bioguide ID by_bioguide = { } known_house_history_ids = set() for legislators in yamlfiles.values(): for m in legislators: if "bioguide" in m["id"]: by_bioguide[m["id"]["bioguide"]] = m if "house_history" in m["id"]: known_house_history_ids.add(m["id"]["house_history"]) count = 0 # scrape history.house.gov if len(sys.argv) == 1: id_range = range(22000, 25000) else: id_range = [int(arg) for arg in sys.argv[1:]] for id in id_range: # skip known IDs if id in known_house_history_ids: continue print(id) bioguide_id = get_bioguide_for_house_history_id(id) if bioguide_id and bioguide_id in by_bioguide: print(id, bioguide_id) by_bioguide[bioguide_id]["id"]["house_history"] = id count = count + 1 # write YAML files to disk for filename, legislators in yamlfiles.items(): print("Saving data to %s..." % filename) save_data(legislators, filename) # how many updates did we make? print("Saved %d legislators" % count)
def run_CNN_subwordlevel(): print('data manipulation') train_comment = train_data['comment_text'].apply(clean_subword) test_comment = test_data['comment_text'].apply(clean_subword) vocab_size = vocab_size(train_comment) print('pad sequence') x_train_pad, x_test_pad = pad_sequence( train_comment, test_comment, vocab_size, max_length=params.CNN_wordlevel.max_length) print('run CNN word level') preds = CNN_subwordlevel(x_train_pad, train_data[toxic_types], x_test_pad, vocab_size) print('save data') save_data(file_name='CNN_subwordlevel_1', preds=preds, toxic_types=toxic_types)
def update(): for rec in csv.DictReader(open("cache/social_media/%s_candidates.csv" % service)): bioguide = rec["bioguide"] candidate = rec["candidate"] if media_bioguide.has_key(bioguide): media_bioguide[bioguide]['social'][service] = candidate else: new_media = {'id': {}, 'social': {}} new_media['id']['bioguide'] = bioguide thomas_id = current_bioguide[bioguide]['id'].get("thomas", None) if thomas_id: new_media['id']['thomas'] = thomas_id new_media['social'][service] = candidate media.append(new_media) print "Saving social media..." save_data(media, "legislators-social-media.yaml")
def _select_for_auditing(): from utils import load_data from utils import save_data from utils.geocode import distance matchings = load_data(_params['filenames'][0]) auditing = [] i = 0 while i >= 0 and i < len(matchings): loc1 = matchings[i][0]['address']['coordinates'] if 'coordinates' in matchings[i][0]['address'] else None loc2 = matchings[i][1]['address']['coordinates'] if 'coordinates' in matchings[i][1]['address'] else None if distance(loc1, loc2) >= 0.6: auditing.append({'status':'unconfirmed', 'matching':matchings[i]}) matchings.pop(i) i -= 1 i += 1 save_data([a['matching'][0] for a in auditing], _params['filenames'][5]) save_data(matchings, _params['filenames'][0])
def convert(self, to, amount, date=None): logging.debug(f"Convert {locals()}") to = to if isinstance(to, str) else to.name if to == self.name: return amount price_data, requested_price, date = self.get_cached(to, date, attr="close") if requested_price is None: requested_price_kraken = get_pair_from_kraken(self.name, to, client, date=date) if (date == None) or (date.normalize() == pd.to_datetime("now").normalize()): save_file = f"{self.name}_{to}_latest.pkl" requested_price = float(requested_price_kraken) save_data(requested_price, save_file) else: save_file = f"{self.name}_{to}.pkl" # Get the exchange rate for the closest date to the requested date requested_price = requested_price_kraken.index.get_loc( date, method='nearest') requested_price = requested_price_kraken["close"].iloc[ requested_price] if not isinstance(requested_price, Number): requested_price = requested_price.iloc[0] requested_price_kraken = requested_price_kraken[["close"]] logging.debug( f"Price data queried {price_data}: {type(price_data)}") save_data(requested_price_kraken, save_file) logging.debug(f"Price data {price_data}: {type(price_data)}") logging.debug(f"Price data {requested_price}: {type(requested_price)}") return requested_price * amount
def convert(self, to, amount, date=None): logging.debug(f"Convert {locals()}") to = to if isinstance(to, str) else to.name if to == self.name: return amount price_data, requested_price, date = self.get_cached(to, date) if requested_price is None: if (date == None) or (date.normalize() == pd.to_datetime("now").normalize()): save_file = f"{self.name}_{to}_latest.pkl" price_url = url_join(URL_MARKET_PRICE_FIAT, "latest") requested_price = requests.get(price_url, params={ "base": self.name }).json()["rates"][to] requested_price = float(requested_price) save_data(requested_price, save_file, add_path_prefix=True) else: save_file = f"{self.name}_{to.name}.pkl" date_query = date.strftime("%Y-%m-%d") price_url = url_join(URL_MARKET_PRICE_FIAT, date_query) requested_price = requests.get(price_url, params={"base": self.name})["rates"][to] requested_price = float(requested_price) new_value = pd.Series(name=date, data={ "price": requested_price }).to_frame() if price_data is None: price_data = price_data.append(new_value) else: price_data = new_value save_data(price_data, save_file, add_path_prefix=True) return requested_price * amount
def _remove_from_matchings(remove): from utils import load_data from utils import save_data matchings = load_data(_params['filenames'][0]) i = 0 while i < len(matchings): if matchings[i][0]['venue_id'] in remove: matchings.pop(i) i -= 1 i += 1 matchings = save_data(matchings, _params['filenames'][0])
def transform(sample_file, dict_file, projection_file, output_file): n_samples = utils.count_lines(sample_file) n_features = utils.count_lines(dict_file) batch_size = 100000 print 'loading projector...' with open(projection_file, 'rb') as f: p = utils.zloads(f.read()) # p = pickle.load(f) with open(output_file, 'w') as f: for (v, idx, ptr) in utils._load_data_batch(sample_file, batch_size, np.intc, n_samples): X = csr_matrix((v, idx, ptr), shape=(len(ptr)-1, n_features)) print 'transforming... \r', sys.stdout.flush() T = p.transform(X) print 'saving... \r', sys.stdout.flush() utils.save_data(f, T) print '\nTransform completed.'
def review_save(unreviewed, approved, flagged): if save_locked(): return else: # create lockfile f = open(LOCKFILE, 'w') f.close() # reorganize data unreviewed_s = review_prep_data_pre_save(unreviewed) approved_s = review_prep_data_pre_save(approved) flagged_s = review_prep_data_pre_save(flagged) save_data(unreviewed_s, 'legislators-district-offices-unreviewed.yaml') save_offices(unreviewed_s, unreviewed=True, flagged=False, approved=False, gpo=False) save_data(approved_s, 'legislators-district-offices-approved.yaml') save_offices(approved_s, unreviewed=False, flagged=False, approved=True, gpo=False) save_data(flagged_s, 'legislators-district-offices-flagged.yaml') save_offices(flagged_s, unreviewed=False, flagged=True, approved=False, gpo=False) # remove lock os.unlink(LOCKFILE)
def resolveig(): # in order to preserve the comment block at the top of the file, # copy it over into a new RtYamlList instance. We do this because # Python list instances can't hold other random attributes. import rtyaml updated_media = rtyaml.RtYamlList() if hasattr(media, '__initial_comment_block'): updated_media.__initial_comment_block = getattr( media, '__initial_comment_block') client_id_file = open('cache/instagram_client_id', 'r') client_id = client_id_file.read() bioguide = utils.flags().get('bioguide', None) for m in media: if bioguide and (m['id']['bioguide'] != bioguide): updated_media.append(m) continue social = m['social'] if 'instagram' not in social and 'instagram_id' not in social: updated_media.append(m) continue instagram_handle = social['instagram'] query_url = "https://api.instagram.com/v1/users/search?q={query}&client_id={client_id}".format( query=instagram_handle, client_id=client_id) instagram_user_search = requests.get(query_url).json() for user in instagram_user_search['data']: time.sleep(0.5) if user['username'] == instagram_handle: m['social']['instagram_id'] = int(user['id']) print( "matched instagram_id {instagram_id} to {instagram_handle}" .format(instagram_id=social['instagram_id'], instagram_handle=instagram_handle)) updated_media.append(m) save_data(updated_media, "legislators-social-media.yaml")
def save_test_set(self): """ Saves a json file with useful information for teh test phase: - training size - test images IDs - attributes - batch size """ try: test_data = { 'train_size': self.train_size, 'test_img_ids': self.test_img_ids, 'attributes': self.attributes, 'batch_size': self.batch_size } file_path = "./test_data" save_data(file_path, test_data) except: raise print("Test img_ids successfully saved.")
def execute_experiment(name, variations, X, y, parameters, kfold): """Train neural network for a set of different parameters and save results Arguments: name -- Name of the parameter to be varied. variations -- Variations of the parameter. X -- Dataset input instances. y -- Dataset output classes. parameters -- Neural network model parameters. kfold -- Object used to create k-folds for crossvalidation. """ parameters = deepcopy(parameters) accuracy = {} for variation in variations: parameters[name] = variation accuracy_train = [] # Cross-validation train accuracy accuracy_test = [] # Cross-validation test accuracy for train_index, test_index in kfold.split(X, y): if name == "extra_size": results = train_network(X, y, train_index, test_index, parameters, True) else: results = train_network(X, y, train_index, test_index, parameters) accuracy_train.append(results["acc"]) accuracy_test.append(results["val_acc"]) accuracy[variation] = { "train_mean": np.mean(accuracy_train, axis=0), "train_std": np.std(accuracy_train, axis=0), "test_mean": np.mean(accuracy_test, axis=0), "test_std": np.std(accuracy_test, axis=0) } utils.save_data(name, accuracy)
def remove_dc(offices): output = {} removal_count = 0 for bioguide in offices: for office in offices[bioguide]: if not office.get('state', '').upper().replace( '.', '').strip() in ('DC', 'DISTRICT OF COLUMBIA'): if not output.has_key(bioguide): output[bioguide] = [] output[bioguide].append(office) else: removal_count = removal_count + 1 print "Removed %d D.C. offices." % removal_count print "Saving..." save_data(output, "legislators-district-offices-unreviewed.yaml") save_offices(output, unreviewed=True, flagged=False, approved=False, gpo=False) return output
def info_main(account, info_url, track_id): """ 简历详情页主方法 :return: True : 正常入库... , False : 出现异常 """ # info_url = "http://ehire.51job.com/Candidate/ResumeViewFolder.aspx?hidSeqID=9216079958&hidFolder=EMP" refer_url = "http://ehire.51job.com/Inbox/InboxRecentEngine.aspx?Style=1" info_html = conn_html(account, info_url, 4, refer_url=refer_url, track_id=track_id) if 'login' == info_html: logger.error("出现登录页面 %s" % account['userName']) return 'login' if info_html: logger.info('获取一条收件箱简历成功!') raw = parse_info_html(account, info_html, track_id) sql = 'INSERT INTO resume_raw (' \ 'trackId,source,content,createTime,createBy,email,resumeUpdateTime,resumeSubmitTime' \ ') values(%s, "FIVE_ONE",%s, now(), "python", %s, %s, %s)' value = (raw.trackId, raw.content, raw.email, raw.resumeUpdateTime, raw.resumeSubmitTime) kafka_data = { 'trackId': raw.trackId, 'source': raw.source, "channelType": 'WEB', 'resourceType': 'RESUME_INBOX', 'resourceDataType': 'RAW', 'content': raw.to_dict(), 'protocolType': 'HTTP' } logger.info('开始保存一份收件箱简历') common_utils.save_data(sql, value, kafka_data) return True else: logger.info('获取一条收件箱简历失败!') return False
def all_data_to_graphs(): #genera una nuova struttura simile alla directory mnist_data, con tutte le immagini convertite in file.dat (grafi) (TROPPO PESANTE) input_path = './mnist_data/' output_path = './mnist_graphs/' for dirpath, dirnames, filenames in os.walk(input_path): structure = os.path.join(output_path, dirpath[len(input_path):]) if not os.path.isdir(structure): os.mkdir(structure) else: print("Folder does already exits!") for root, dirnames, filenames in os.walk(input_path): for file_complete in filenames: filename = os.path.join(root, file_complete) with open(filename, "r") as f: digit = os.path.basename(os.path.dirname(filename)) graph = mnist_on_plane.to_graph(filename, digit) parts = pathlib.Path(filename).parts[1:-1] file, file_extension = os.path.splitext(file_complete) new_filename = output_path + str( pathlib.Path(*parts)) + '/' + file + '.dat' save_data(graph, new_filename)
def resolvefb(): updated_media = [] for m in media: social = m['social'] if ('facebook' in social and social['facebook']) and ('facebook_id' not in social): graph_url = "https://graph.facebook.com/%s" % social['facebook'] if re.match('\d+', social['facebook']): social['facebook_id'] = social['facebook'] print "Looking up graph username for %s" % social[ 'facebook'] fbobj = requests.get(graph_url).json() if 'username' in fbobj: print "\tGot graph username of %s" % fbobj['username'] social['facebook'] = fbobj['username'] else: print "\tUnable to get graph username" else: try: print "Looking up graph ID for %s" % social['facebook'] fbobj = requests.get(graph_url).json() if 'id' in fbobj: print "\tGot graph ID of %s" % fbobj['id'] social['facebook_id'] = fbobj['id'] else: print "\tUnable to get graph ID" except: print "\tUnable to get graph ID for: %s" % social[ 'facebook'] social['facebook_id'] = None updated_media.append(m) print "Saving social media..." save_data(updated_media, "legislators-social-media.yaml")
def change_proxy(): # proxies must be formatted: 182.52.238.111:30098,103.105.77.22:8181, # bad proxies will be updated and skipped next time proxies: deque = get_global('PROXIES') bad_proxies: str = load_data('bad_proxies.txt') while proxies: proxy: str = proxies.popleft() if proxy in bad_proxies: continue #log(f'checking proxy {proxy}') try: old_proxy: str = get_global('PROXY') save_data('bad_proxies.txt', old_proxy, end=',') set_global('PROXY', proxy) set_global('PROXY_ERROR', False) # set_global('SLEEP_TIME', USER_SLEEP_TIME) if USE_BOT: log('Reloading bot') get_global('BOT').close() set_global( 'BOT', Browser(headless=HEADLESS, proxy=proxy, driverpath=WEBDRIVERPATH)) return #acceptable = check_connection() #if acceptable: # log(f'using proxy {proxy}') # set_global('PROXIES', proxies) # return #else: # save_data('bad_proxies.txt', proxy, end=',') # set_global('PROXY', old_proxy) # set_global('PROXY_ERROR', True) except Exception as e: log(e) change_proxy() raise ProxyError('Все прокси использованы.')
def createInputCSV(start, end, filename): """Creates data for a range of numbers and inserts it into a file. Parameters ---------- start : int Starting index for the data to be created end : int Last position for the data to be created filename : string Name of the file to store data into """ # Why list in Python? inputData = [] outputData = [] # Why do we need training Data? for i in range(start, end): inputData.append(i) outputData.append(FizzBuzz().func(i)) # Why Dataframe? # Dataframes are pandas objects, which themselves are enhanced version of # numpy structured arrays # DataFrames are inherently multidimensional arrays with # attached row and column labels, and added support for heterogeneous # types and missing data. dataset = {} dataset["input"] = inputData dataset["label"] = outputData save_data(dataset, filename) print(filename, "Created!")
def main(): args = parse_args() initial = False if args.ini: initial = True data_train = load_data(True, initial) data_test = load_data(False, initial) cv = None tfv = None tfv_matrix = None all_questions = pd.concat([data_train.question1, data_train.question2, data_test.question1, data_test.question2]).unique() svd = None if args.rec: if args.cnt: cv, _ = get_count_vectorizer(all_questions) pickle.dump(cv, open(PATH_COUNT_VECTORIZER, 'wb')) print("count vect dumped and updated") if args.tf: tfv, tfv_matrix = get_tfidf_vectorizer(all_questions) pickle.dump(tfv, open(PATH_TFIDF_VECTORIZER, 'wb')) pickle.dump(tfv_matrix, open(PATH_TFIDF_MATRIX, 'wb')) print("tfidf vectorizer dumped and updated") if args.enr: if args.cnt: if not args.rec: cv = pickle.load(open(PATH_COUNT_VECTORIZER, "rb")) analyzer = cv.build_analyzer() if not args.fin: data_train['question1_tk'] = data_train.question1.apply(lambda x: analyzer(x)) data_train['question2_tk'] = data_train.question2.apply(lambda x: analyzer(x)) save_data(data_train) else: data_test['question1_tk'] = data_test.question1.apply(lambda x: analyzer(x)) data_test['question2_tk'] = data_test.question2.apply(lambda x: analyzer(x)) save_data(data_test, False) print("data updated for count_vectorizer features")
def run(): # load in members, orient by bioguide ID print("Loading current legislators...") current = load_data("legislators-current.yaml") current_bioguide = {} for m in current: if "bioguide" in m["id"]: current_bioguide[m["id"]["bioguide"]] = m # go over current members, remove out-of-office people membership_current = load_data("committee-membership-current.yaml") for committee_id in list(membership_current.keys()): print("[%s] Looking through members..." % committee_id) for member in membership_current[committee_id]: if member["bioguide"] not in current_bioguide: print("\t[%s] Ding ding ding! (%s)" % (member["bioguide"], member["name"])) membership_current[committee_id].remove(member) print("Saving current memberships...") save_data(membership_current, "committee-membership-current.yaml")
def main(): main_df = load_data(CSV_DIR, CSV_FILE) main_df = split_dataset(main_df) train_df = extract_and_expand_subset(main_df, 'train') save_data(train_df, CSV_DIR, TRAIN_CSV_FILE) val_df = extract_and_expand_subset(main_df, 'val') save_data(val_df, CSV_DIR, VAL_CSV_FILE) test_df = extract_and_expand_subset(main_df, 'test') save_data(test_df, CSV_DIR, TEST_CSV_FILE)
def run(): if len(sys.argv) != 3: print("Usage:") print("python retire.py bioguideID termEndDate") sys.exit() try: utils.parse_date(sys.argv[2]) except: print("Invalid date: ", sys.argv[2]) sys.exit() print("Loading current YAML...") y = utils.load_data("legislators-current.yaml") print("Loading historical YAML...") y1 = utils.load_data("legislators-historical.yaml") for moc in y: if moc["id"].get("bioguide", None) != sys.argv[1]: continue print("Updating:") rtyaml.pprint(moc["id"]) print() rtyaml.pprint(moc["name"]) print() rtyaml.pprint(moc["terms"][-1]) moc["terms"][-1]["end"] = sys.argv[2] y.remove(moc) y1.append(moc) break print("Saving changes...") utils.save_data(y, "legislators-current.yaml") utils.save_data(y1, "legislators-historical.yaml")
def run(): if len(sys.argv) != 2: print("Usage:") print("python untire.py bioguideID") sys.exit() print("Loading current YAML...") y = utils.load_data("legislators-current.yaml") print("Loading historical YAML...") y1 = utils.load_data("legislators-historical.yaml") for moc in y1: if moc["id"].get("bioguide", None) != sys.argv[1]: continue print("Updating:") rtyaml.pprint(moc["id"]) print() rtyaml.pprint(moc["name"]) moc["terms"].append( OrderedDict([ ("type", moc["terms"][-1]["type"]), ("start", None), ("end", None), ("state", moc["terms"][-1]["state"]), ("party", moc["terms"][-1]["party"]), ])) y1.remove(moc) y.append(moc) break print("Saving changes...") utils.save_data(y, "legislators-current.yaml") utils.save_data(y1, "legislators-historical.yaml")
def upload(): """Accept data to be uploaded. Either JSON format generic data, or thumbnails as jpegs from camera request.form: id (str): Identifier name of camera token (str): Secret token to allow uploads type (str): "data" for json upload, "small" or "crop" for JPGs Returns: None """ if request.method == "POST": id = request.form["id"] token = request.form["token"] upload_type = request.form["type"] if token != app.config["UPLOAD_TOKEN"]: return "", 401 if upload_type == "data": s_filename = safe_name("{}-{}-{}.json".format( id, upload_type, int(time.time()))) filename = os.path.join(app.config["UPLOAD_PATH"], "data", s_filename) logging.info("Saving file {}".format(s_filename)) save_data(request.form["data"], filename) set_rights(filename) if upload_type in ("small", "crop"): file = request.files["file"] if file: s_filename = safe_name("{}-{}.jpg".format(id, upload_type)) filename = os.path.join(app.config["UPLOAD_PATH"], "view", s_filename) logging.info("Saving file {}".format(s_filename)) file.save(filename) set_rights(filename) return "", 200 return "", 200