def get_movement_sizes(bucket_name, target_cat, l): total_image_count = 0 # progress bar stuff ------------------- print("Calculating movement sizes...") print_progress_bar(total_image_count, l, prefix='Progress:') movement_size_dict = {} # empty movement:size dictionary movement_count = 0 # stores the tentative count of images in the current movement prev_movement = "" # stores the name of the movement associated with the previous file blobs = storage.Client().list_blobs( bucket_name) # get all filepaths in the GCP bucket for blob in blobs: # and loop through every file path category, movement_name = get_category_and_movement( blob.name) # get movement of current image if (category == target_cat): # if images are in target category: if (movement_name != prev_movement): # if we have started a new movement: if (prev_movement != ""): # / check that one was just completed movement_size_dict[ prev_movement] = movement_count # \ and if so update the dictionary. prev_movement = movement_name # update the previous movement to current one movement_count = 0 # and reset the movement image count movement_count += 1 # add 1 to the number of images in current movement total_image_count += 1 # progress bar stuff ------------------ print_progress_bar(total_image_count, l, prefix='Progress:') movement_size_dict[ prev_movement] = movement_count # add the key:value pair of the last movement return movement_size_dict
def vis_focused_grid(kernels): res = 1000 lats_lngs = [] lats_lngs.append(np.mgrid[0.555:0.612:res * 1j, -1.265:-1.248:res * 1j]) lats_lngs.append(np.mgrid[-0.565:-0.46:res * 1j, 1.46:1.6:res * 1j]) lats_lngs.append(np.mgrid[-0.5:0.2:res * 1j, -0.85:-0.7:res * 1j]) lats_lngs.append(np.mgrid[0:0.17:res * 1j, -0.78:-0.72:res * 1j]) lats_lngs.append(np.mgrid[-0.65:-0.4:res * 1j, -0.6:-0.15:res * 1j]) lats_lngs.append(np.mgrid[0.445:0.695:res * 1j, -1.2845:-1.2305:res * 1j]) for j, (lat, lng) in enumerate(lats_lngs[0:1]): pos = np.dstack((lng, lat)) logger.info("%sx%s Grid created.", res, res) heatmap = np.zeros((res, res)) T = len(kernels) percent = T // 100 for i, k in enumerate(kernels): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) np.add(heatmap, k.pdf(pos), heatmap) logger.info("Probabilities for grid calculated.") hlp.save_array(heatmap, "combined_gp_heat_focused/{}_{}x{}".format(j, res, res), logger) plot_heatmap(heatmap, identifier="_focused/{}_{}x{}".format(j, res, res), show_title=False, with_alpha=True)
def run(path): logger.info("Starting execution of file_lengths.py!") event_counts = [] file_sizes = [] files = [f for f in os.listdir(path) if ".log" in f] T = len(files) percent = T // 100 if T >= 100 else 1 for i, file_name in enumerate(files): if (i + 1) % percent == 0 or (i + 1) == T: hlp.print_progress_bar(i + 1, T, length=50) event_counts.append(hlp.file_len(path + file_name)) file_sizes.append(os.path.getsize(path + file_name) / 1e9) logger.info("Total Size: %sGB", sum(file_sizes)) fig = plt.figure() plt.subplot(121) plt.boxplot(event_counts, showmeans=True) plt.title("Number of Events") plt.subplot(122) plt.boxplot(file_sizes, showmeans=True) plt.title("Data Log Sizes (GB)") fig.suptitle("Measurements on {} Logs".format(len(event_counts))) fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.3) plt.savefig("log_sizes.png")
def read_events_from_file_old(file_name, vehicle_id=None, max_events=None, create_timeline=False, filter_vehicle="Bus", group_by_id=True): """Functions that takes a file_name and returns events in the form of key-value objects. Parameter "vehicle_id" is an optional parameter that is used to filter results to only contain events for the given vehicle_id. Returns a list of objects containing data in a key-value format. """ if group_by_id: events = defaultdict(dict) else: events = defaultdict(list) timeline_events = [] if max_events is None: logger.info("Calculating number of events in file...") T = file_len(file_name) logger.info("File has %i events", T) else: T = max_events percent = T // 100 with open(file_name, 'r', encoding="latin-1") as f: for i in range(T): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) event = parse_event(f.readline(), filter_vehicle) if event is None: continue event_type = event["event.type"] event_v_id = event["vehicle.id"] if vehicle_id is None or vehicle_id == event_v_id: if group_by_id: if not event_type in events[event_v_id]: events[event_v_id][event_type] = [] events[event_v_id][event_type].append(event) else: events[event_type].append(event) if create_timeline: if not timeline_events or timeline_events[len( timeline_events) - 1]["event.type"] != event_type: timeline_events.append(event) return events, timeline_events
def mass_dm_followers( username: str, message: str, rank_by: str = "recent", value: str = "", dry_run: bool = True, api: tweepy.API = None, ): """ Send mass DM to all followers in order of specificed ranking and set dm_sent flag for that user to True. Allows for dry run where messages are not actually sent out and dm_sent flag is not changed. params: username(str) - user of followers to DM message(str) - message to send out rank_by(str) - ranking method value(str) - value to search for. only used for location and description filter dry_run(bool) - set to True to only pretend to send messages api(tweepy.API) - tweepy api instance """ user = User.query.filter_by(username=username).first() try: followers = ranked_followers(username, rank_by, value) except Exception as e: print(e) bye() total_followers = len(followers) if not total_followers: print("No followers matched your criteria :(") bye() print() if dry_run: print( "Dry run is ON. Messages are not actually being sent. Phew. Add the --real flag to send DMs" ) print("Sending message to {} followers".format(total_followers), end="\n\n") for i, follower in enumerate(followers): print("\033[F\033[KSending DM to {}".format(follower.screen_name)) print_progress_bar(i + 1, total_followers, suffix="Sent") if dry_run: time.sleep(0.01) else: send_message(follower.id_str, message, api) # Comment this out if testing db.session.query(Follower).filter_by(id_str=follower.id_str, user_id=user.id).update( {"dm_sent": True}) db.session.commit()
def write_csv_file(filename, category, pct_train, pct_valid): print("Retreiving blobs from GCP...") blobs = storage.Client().list_blobs( "art-translated-rvf") # get all filepaths in the GCP bucket/folder if (category == 'fakey'): num_images_in_category = 26884 # number of images in current category else: num_images_in_category = 34485 # number of images in current category num_train = int( (pct_train / 100) * num_images_in_category) # convert percentages num_valid = int( (pct_valid / 100) * num_images_in_category) # to integer counts img_count = 0 # stores the tentative count of images in the current category print("\nWriting " + category + " images to .csv file...") # print_progress_bar(0, num_images_in_category, prefix='Progress:', suffix='lines written') for blob in blobs: # and loop through every file path if (get_category(blob.name) == category): # if img_count in range(0, num_train): # .csv 0 -- dataset dataset = "TRAIN" # assign images to elif img_count in range(num_train, num_train + num_valid): # TRAIN, VALIDATE, or TEST dataset = "UNASSIGNED" # datasets according to the else: # percentages set above dataset = "TEST" # directory = "gs://art-translated-rvf/" + blob.name # .csv 1 -- google cloud directory label = category # .csv 2 -- AutoML classification label csv_list = [dataset, directory, label ] # create list of values for next line of .csv file write_csv_line( filename, csv_list) # write the values to a new line of the file img_count += 1 # update image count and print the updated progress bar print_progress_bar(img_count, num_images_in_category, prefix='Progress:', suffix='lines written') print("\nLook for", filename, "in the parent directory.")
def read_events_from_file(file_name, skip_n=0, max_events=None, geofence=None, vehicle_id=None): """Opens a file containing events and parses them. Checks if a journey has begun and saves all the position updates from the bus on that journey. Bus stops stopped at or passed are also recorded. """ if max_events is None: logger.info("Calculating number of events in file...") T = file_len(file_name) logger.info("File has %i events", T) T -= skip_n else: T = max_events percent = T // 100 if vehicle_id is not None and not isinstance(vehicle_id, list): vehicle_id = [vehicle_id] events = defaultdict(list) with open(file_name, 'r', encoding="latin-1") as f: logger.info("Skipping %s events", skip_n) for _ in range(skip_n): f.readline() for i in range(T): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) event = parse_event(f.readline()) if event is None: continue if vehicle_id is not None and event["vehicle.id"] not in vehicle_id: continue if geofence is not None and not is_inside(event, *geofence): continue assert isinstance(event["date"], datetime.datetime) events[event["vehicle.id"]].append(event) for k, v in events.items(): v.sort(key=lambda e: e["event.id"]) return events
def write_csv_file(filename, l, pct_train, pct_valid): write_csv_header(filename) # write the header of the .csv file movement_size_dict = get_movement_sizes("art-translated-rvf", 'fakey', l) # get the number of images in each movement movement_count = 0 # stores the tentative count of images in the current movement prev_movement = "" # stores the name of the movement associated with the previous file img_count = 0; blobs = storage.Client().list_blobs("art-translated-rvf") # get all filepaths in the GCP bucket/folder print("\nWriting images to .csv file...") # print_progress_bar(0, l, prefix = 'Progress:', suffix = 'lines written') for blob in blobs: # loop through every file path category, movement_name = get_category_and_movement(blob.name) # retreive category and movement if (category == 'fakey'): if (movement_name != prev_movement): # if we have started a new movement, prev_movement = movement_name # update the previous movement to current one movement_count = 0 # and reset the movement image count num_images = movement_size_dict[movement_name] # number of images in current movement num_train = int((pct_train/100) * num_images) # convert percentages num_valid = int((pct_valid/100) * num_images) # to integer counts if movement_count in range(0, num_train): # .csv 0 -- dataset dataset = "TRAIN" # assign images to elif movement_count in range(num_train, num_train + num_valid): # TRAIN, VALIDATE, or TEST dataset = "UNASSIGNED" # datasets according to the else: # percentages set above dataset = "TEST" # directory = "gs://art-translated-rvf/" + blob.name # .csv 1 -- google cloud directory label = movement_name # .csv 2 -- AutoML classification label csv_list = [dataset, directory, label] # create list of values for next line of .csv file movement_count += 1 # increment the number of images in current movement write_csv_line(filename, csv_list) # write the values to a new line of the file img_count += 1 # update image count and print the updated progress bar print_progress_bar(img_count, l, prefix = 'Progress:', suffix = 'lines written') print("\nLook for", filename, "in the parent directory.")
def vis_whole_grid(kernels): res = 7500 lat, lng = np.mgrid[-1.7:2:res * 1j, -1.35:1.65:res * 1j] pos = np.dstack((lng, lat)) logger.info("%sx%s Grid created.", res, res) heatmap = np.zeros((res, res)) T = len(kernels) percent = T // 100 for i, k in enumerate(kernels): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) np.add(heatmap, k.pdf(pos), heatmap) logger.info("Probabilities for grid calculated.") hlp.save_array(heatmap, "heatmap_{}x{}".format(res, res), logger) plot_heatmap(heatmap)
def get_event_types_list(file_name, max_events=None): if max_events is None: logger.info("Calculating number of events in file...") T = file_len(file_name) logger.info("File has %i events", T) else: T = max_events percent = T // 100 types = [] with open(file_name, 'r', encoding="latin-1") as f: for i in range(T): if (i + 1) % percent == 0 or (i + 1) == T: print_progress_bar(i + 1, T, prefix='Progress:', suffix='Complete', length=50) event_type = get_event_type(f.readline()) types.append(event_type) return types
def fetch_followers(username: str, api: tweepy.API): """ Use tweepy to fetch user's followers' ids and then fetch their user objects and save to the db. params: username(str) - username of user to fetch followers for api(tweepy.API) - tweepy api instance """ total_followers = api.me().followers_count print("Fetching {} followers".format(total_followers)) db.create_all() follower_ids = [] print("Fetching follower ids!") for id in rate_limit_handler( tweepy.Cursor(api.followers_ids, count=5000).items()): follower_ids.append(id) print("Fetching user objects from ids!") for list_of_100 in list(divide_into_chunks(follower_ids, 100)): for i, follower in enumerate(api.lookup_users(user_ids=list_of_100)): follower_dict = dict( (k, follower.__dict__[k]) for k in follower_keys) user = User.query.filter_by(username=username).first() if not user: user = User(username=username) follower = Follower(**follower_dict) user.followers.append(follower) db.session.add(user) db.session.commit() print_progress_bar( i + 1, total_followers, prefix="Fetching {}/{} Followers".format( i + 1, total_followers), suffix="Fetched", ) print("Done!")