def __init__(self): input_file = open(ut.get_path('journey_counts.pkl'), 'rb') self.journeys = pickle.load(input_file) input_file.close() # Remove the trips that have no passengers journeys_to_pop = [] for journey in self.journeys: if self.journeys[journey] == 0: journeys_to_pop.append(journey) for journey in journeys_to_pop: self.journeys.pop(journey) self.G = nx.DiGraph() # Add the stations as nodes for line in open(ut.get_path("nodes_with_latlng_updated.txt"), "r"): station_details = line.strip().split("\t") lat_lng = station_details[1].split(",") station = Station(station_details[0], lat_lng) self.G.add_node(station_details[0], station=station) # Link up the stations lines_files = [ 'blue.txt', 'brown.txt', 'green.txt', 'orange.txt', 'yellow.txt', 'pink.txt', 'purple.txt', 'red.txt' ] for metro_line in lines_files: curr_file = open(ut.get_path(metro_line), "r") elist = [] stations = [] for txt_line in curr_file: stations.append(txt_line.strip()) # first direction of edge elist.extend([(stations[i], stations[i + 1]) for i in range(len(stations) - 1)]) # second direction of edge elist.extend([(stations[i + 1], stations[i]) for i in range(len(stations) - 1)]) self.G.add_edges_from(elist) self._set_distances_as_edge_attributes() # Cache the routes followed by each journey since fill_flows_from_mapped_data() # has poor performance (~20 seconds per run) self.previous_path_for_journeys = {} self.previous_path_lengths_for_journeys = {} for journey in self.journeys: self.previous_path_for_journeys[journey] = [] self.previous_path_lengths_for_journeys[journey] = [] # Set the default values for the edges for edge in self.edges(): self.add_attribute_to_edge(edge=edge, capacity=80000, flow=0) self.num_total_trips = None # Will be overwritten later results = self.fill_flows_from_mapped_data(cache_result=True) assert results[0] == 0, "Expected 0 missed trips. Received {0}".format( results[0])
def most_survived_pclass(s3, bucket_name, **context): titanic_df = context['task_instance'].xcom_pull(task_ids='read_csv') titanic_df = preprocessing(titanic_df, 'Pclass', -1) df = titanic_df[titanic_df['Survived'] == 1] # survived = survived[survived['Pclass'] != -1] counts = df['Pclass'].value_counts().rename_axis('Pclass').reset_index( name='counts') pclass_plot = counts.sort_values(by=['Pclass']) pclass_max = pclass_plot['counts'].max() most_survived_pclass = pclass_plot[pclass_plot['counts'] == pclass_max] filename = 'most_survivors_pclass' local_path = '/home/jennie/workspace/titanic/' dest_s3_csv, local_path_csv, dest_s3_img, local_path_img = get_path( most_survived_pclass, filename, local_path) plt.bar(pclass_plot['Pclass'], pclass_plot['counts']) # for i, v in enumerate(pclass_plot['counts']): # plt.text(i + 10, v + 5, str(v), color='black', fontsize=20) plt.xlabel('Pclass') plt.ylabel('Number of survived') plt.title('Number of survived by Pclass') plt.savefig(local_path_img) logging.critical("PCLASS") logging.critical(pclass_plot.to_string()) upload_result_to_s3(s3, bucket_name, dest_s3_csv, local_path_csv, dest_s3_img, local_path_img)
def __init__(self): # Read in the dict with avg ridership for each station from the pickle file pkl_file_path = ut.get_path(avg_data_pkl_name) input_file = open(pkl_file_path, 'rb') self.avg_ridership = pickle.load(input_file) input_file.close()
def tally_up_journeys(): input_file_name = 'sampled_kornhauser_metro_journeys_basic.csv' output_file_name = 'journey_counts.pkl' if testing: input_file_name = 'sampled_kornhauser_metro_journeys_test_basic.csv' output_file_name = "journey_counts_test.pkl" output_path = ut.get_path(output_file_name) input_file = open(input_file_name, 'r') output_file = open(output_path, 'wb') Journey_Dict = {} station_names = get_station_names() for orig_station in station_names: for dest_station in station_names: if orig_station != dest_station: # initialize the counter for this journey to 0 key = (orig_station, dest_station) Journey_Dict[key] = 0 status_every = 100000 print("Tallying up journeys from: " + input_file_name) print("Writing journeys dict to: " + output_path) header = input_file.readline() # Ignore header counter = 0 for line in input_file: items = line.split(",") orig = ut.recomma(items[0].strip()) dest = ut.recomma(items[1].strip()) key = (orig, dest) if orig != dest: Journey_Dict[key] += 1 counter += 1 if counter % status_every == 0: print("Tallied up", str(counter), "journeys") pickle.dump(Journey_Dict, output_file) input_file.close() output_file.close() print("\nDone\n")
def __init__(self): # Create an empty tree of 2 dimensions for lat,long self.tree = kdtree.create(dimensions=2) stations_file = open(ut.get_path("nodes_with_latlng_updated.txt"), "r") # Read each station into the kdtree for line in stations_file: station_details = line.strip().split("\t") lat_lng = station_details[1].split(",") lat = float(lat_lng[0]) lng = float(lat_lng[1]) coords = (lat, lng) name = station_details[0] self.tree.add(metro_parts.Station( name, coords)) # Station is a class in metro_parts self.tree = self.tree.rebalance() print("Created the", self.tree.dimensions, "-d tree")
def write_pkl(): Node_Dictionary = {} input_file = open(avg_data_csv_name, "r") input_file.readline() #ignore headers for line in input_file: items = line.split(",") node_name, avg = items[0], float(items[1]) Node_Dictionary[node_name] = avg print("Printing Node_Dictionary:\n") for x in Node_Dictionary: print(x + ": " + str(Node_Dictionary[x])) pkl_file_path = ut.get_path(avg_data_pkl_name) output_file = open(pkl_file_path,'wb') pickle.dump(Node_Dictionary, output_file) output_file.close()
def avg_fare_by_pclass(s3, bucket_name, **context): titanic_df = context['task_instance'].xcom_pull(task_ids='read_csv') df = preprocessing(titanic_df, 'Fare', -1) df = df[['Pclass', 'Fare']] avg_fare_by_pclass = df.groupby('Pclass').mean() print(avg_fare_by_pclass) filename = 'avg_fare_by_pclass' local_path = '/home/jennie/workspace/titanic/' dest_s3_csv, local_path_csv, dest_s3_img, local_path_img = get_path( avg_fare_by_pclass, filename, local_path) avg_fare_by_pclass.plot.bar() plt.xlabel('Average fare') plt.ylabel('Passenger class') plt.title('Average fare by pclass') plt.savefig(local_path_img) upload_result_to_s3(s3, bucket_name, dest_s3_csv, local_path_csv, dest_s3_img, local_path_img)
def num_of_survivors(s3, bucket_name, **context): titanic_df = context['task_instance'].xcom_pull(task_ids='read_csv') df = preprocessing(titanic_df, 'Survived', -1) df = df[['Survived', 'PassengerId']] num_of_survivors = df.groupby('Survived').count() print(num_of_survivors) filename = 'num_of_survivors' local_path = '/home/jennie/workspace/titanic/' dest_s3_csv, local_path_csv, dest_s3_img, local_path_img = get_path( num_of_survivors, filename, local_path) num_of_survivors.plot.bar() plt.xlabel('Survivor and Victims') plt.ylabel('Number of people') plt.title('Number of survivors (1) and victims (0)') plt.savefig(local_path_img) upload_result_to_s3(s3, bucket_name, dest_s3_csv, local_path_csv, dest_s3_img, local_path_img)
import numpy as np from logistic_regression import LogisticRegression from utils import bin_feat_heart, con_feat_heart, name_features_heart import utilities TRAIN = 'heart_train.csv' TEST = 'heart_test.csv' if __name__ == '__main__': path = utilities.get_path() X_train, y_train = utilities.get_data(path / TRAIN, 10) X_test, y_test = utilities.get_data(path / TEST, 10) encoder = utilities.OneHotEncoder() scaler = utilities.StandardScaler() encoder.fit(X_train[:, bin_feat_heart]) X_train_new = np.hstack( (encoder.transform(X_train[:, bin_feat_heart]), X_train[:, con_feat_heart])) X_test_new = np.hstack( (encoder.transform(X_test[:, bin_feat_heart]), X_test[:, con_feat_heart])) scaler.fit(X_train_new) X_train_scaled = scaler.transform(X_train_new) X_test_scaled = scaler.transform(X_test_new)
from utilities import get_path Config = { 'game': { 'height': 640, 'width': 800, 'tile_width': 32, 'wall_char': "W", 'ground_char': '.' }, 'resources': { 'sprites': { 'player': get_path("src/res/char.png") }, 'levels': { 'level1': "" } } }
import utilities as ut import pickle from create_metro_graph import metro_graph import numpy as np journey_dict_file_name = 'journey_counts.pkl' input_path = ut.get_path(journey_dict_file_name) input_file = open(input_path, 'rb') journeys = pickle.load(input_file) input_file.close() test_graph = metro_graph() print(len(journeys), "journeys") origin_present = {} origin_absent = {} dest_present = {} dest_absent = {} for journey in journeys: a = journey[0] b = journey[1] c = journeys[journey] if test_graph.has_node(a): if a not in origin_present: origin_present[a] = [] origin_present[a].append(c) else: if a not in origin_absent: origin_absent[a] = [] origin_absent[a].append(c)
import os import utilities import pandas as pd import numpy as np import gdal import matplotlib.pyplot as plt import copy import plotables as pltz import math # file paths PRIOR to getting surgical with this shit fp_box_offline = 'C:\\Users\\uhlmann\\box_offline' fp_copco_10 = os.path.join(utilities.get_path(22), 'bathymetry_project/Bathy_1ft_Copco_2010.tif') fp_IG_10 = os.path.join(utilities.get_path(22), 'bathymetry_project/Bathy_1ft_IG_2010.tif') fp_diff_copco = os.path.join( utilities.get_path(22), 'bathymetry_project/2018_2010_diff_copco_exactly_1ft.tif') fp_diff_IG = os.path.join( utilities.get_path(22), 'bathymetry_project/2018_2010_diff_IG_exactly_1ft.tif') fp_JCB_10 = os.path.join(fp_box_offline, 'bathymetry_project\\Bathy_1ft_JCBoyle_2010.tif') fp_diff_JCB = os.path.join(fp_box_offline, 'bathymetry_project\\2018_2010_diff_JCB_1ft.tif') # OpenTopo datasets fp_copco_10_nad83 = os.path.join( utilities.get_path(22), 'bathymetry_project\\Copco_1ft_2010_nad83.tif') fp_diff_copco_openTopo = os.path.join(
def extract_contexts(sentenceList, all_pairs, CT_pairs): pos_contexts = [] neg_contexts = [] CT_contexts = [] for sentence in sentenceList: sentence_str = " ".join(sentence.wordList) for i in range(1, len(sentence.wordList) - 2): if sentence.lemmaList[i] == "say": continue if detect_event_trigger(sentence, i) != True: continue index_result1, words_result1, tag1 = old_extract_event_with_arg( sentence, i) for j in range(i + 2, len(sentence.wordList)): if sentence.lemmaList[j] == "say": continue if detect_event_trigger(sentence, j) != True: continue index_result2, words_result2, tag2 = old_extract_event_with_arg( sentence, j) if tag1 == "verb_alone" and tag2 == "verb_alone": continue event1 = "< " + " ".join(words_result1).lower() + " >" event2 = "< " + " ".join(words_result2).lower() + " >" word_pair1 = event1 + " -> " + event2 word_pair2 = event2 + " -> " + event1 CT_word_pair1 = event1 + " => " + event2 CT_word_pair2 = event2 + " => " + event1 for index in index_result1: if index[0] == "[": mask1_idx = int( index.replace("[", "").replace("]", "")) for index in index_result2: if index[0] == "[": mask2_idx = int( index.replace("[", "").replace("]", "")) masked_sentence = " ".join([ sentence.wordList[k] if k not in [mask1_idx, mask2_idx] else "[MASK]" for k in range(1, len(sentence.wordList)) ]) if word_pair1 in all_pairs: valid_index, word_path, path_pos = get_path( sentence, str(i), str(j)) pos_contexts.append({ "sentence": " ".join(sentence.wordList[1:]), "masked_sentence": masked_sentence, "word_path": word_path, "word_pair": word_pair1, "trigger_index": str(i) + " " + str(j) }) elif word_pair2 in all_pairs: valid_index, word_path, path_pos = get_path( sentence, str(i), str(j)) pos_contexts.append({ "sentence": " ".join(sentence.wordList[1:]), "masked_sentence": masked_sentence, "word_path": word_path, "word_pair": event1 + " <- " + event2, "trigger_index": str(i) + " " + str(j) }) elif CT_word_pair1 in CT_pairs: valid_index, word_path, path_pos = get_path( sentence, str(i), str(j)) CT_contexts.append({ "sentence": " ".join(sentence.wordList[1:]), "masked_sentence": masked_sentence, "word_path": word_path, "word_pair": CT_word_pair1, "trigger_index": str(i) + " " + str(j) }) elif CT_word_pair2 in CT_pairs: valid_index, word_path, path_pos = get_path( sentence, str(i), str(j)) CT_contexts.append({ "sentence": " ".join(sentence.wordList[1:]), "masked_sentence": masked_sentence, "word_path": word_path, "word_pair": event1 + " <= " + event2, "trigger_index": str(i) + " " + str(j) }) elif random.uniform(0, 1) < 0.05: valid_index, word_path, path_pos = get_path( sentence, str(i), str(j)) #if valid_index != None: neg_contexts.append({ "sentence": " ".join(sentence.wordList[1:]), "masked_sentence": masked_sentence, "word_path": word_path, "word_pair": event1 + " <-> " + event2, "trigger_index": str(i) + " " + str(j) }) return pos_contexts, neg_contexts, CT_contexts
""" create_station_kdtree.py Creates a K-d Tree using the stations lat-lng coordinates as keys. """ import kdtree import pickle import metro_parts import utilities as ut # Create an empty tree of 2 dimensions for lat,long tree = kdtree.create(dimensions=2) stations_file = open(ut.get_path("nodes_with_latlng_updated.txt"), "r") # Read each station into the kdtree for line in stations_file: station_details = line.strip().split("\t") lat_lng = station_details[1].split(",") lat = float(lat_lng[0]) lng = float(lat_lng[1]) coords = (lat, lng) name = station_details[0] tree.add(metro_parts.Station(name, coords)) # Station is a class in metro_parts tree = tree.rebalance() print("Created a", tree.dimensions, "- d tree")
import utilities import os fp_KlamathDataHD_Upload = utilities.get_path(21)
import csv import utilities as ut # These files were manually compiled. They have station names on a given line lines_files = [ 'blue.txt', 'brown.txt', 'green.txt', 'orange.txt', 'yellow.txt', "pink.txt", "purple.txt", "red.txt" ] for metro_line in lines_files: f = open(ut.get_path(metro_line), "r") in_paren = False output_file = open("./" + metro_line[:-4] + "_treated.txt", "w") while True: c = f.read(1) if not c: print("End of file") break if c == "(": in_paren = True output_file.write(c) elif c == ")": in_paren = False output_file.write(c) elif c == "," and not in_paren: output_file.write("\n") else: output_file.write(c)
""" WORK IN PROGRESS Created by Sam Bernstein on Nov. 26. This program will take in Kornhauser's trip data after it is pruned to only include trips that could plausibly be taken along the Chicago metro, and use our model to sum up the expected number of people taking the Chicago metro from station A to station B for every ordered pair of nodes (A, B) in the metro system. This program will not calculate the flow through the network; that task will be left to another program. """ import networkx as nx import matplotlib.pyplot as plt import pickle import utilities as ut from metro_parts import * # We need a way of taking in a latitutde and longitude and finding the # station closest to that latitutde and longitude pruned_file = open(ut.get_path("pruned_kornhauser_data.csv"), "r")
# # young of the year copy over feats # creek_dset = ['horse_creek_relocation_sites.gdb', 'horse_creek_relocation_sites.gdb','seiad_creek_relocation_sites.gdb', 'beaver_creek_relocation_site.gdb'] # postpend = ['Points', 'Polylines', 'Points', 'Polylines'] # postpend = [os.path.join('Placemarks', item) for item in postpend] # creek_dset = [os.path.join(creek, postP) for creek, postP in zip(creek_dset, postpend)] # feat_lst = [os.path.join(fp_salmon, creek) for creek in creek_dset] # fp_mp_salmon = os.path.join(fp_orders, 'MP_Juvenile_Salmon') # feat_out_lst = ['horse_creek_relocation_site_pts', 'horse_creek_relocation_sites_line','seid_creek_relocation_sites_pts', 'beaver_creek_relocation_site_line'] # # for feat, feat_out in zip(feat_lst, feat_out_lst): # # arcpy.FeatureClassToFeatureClass_conversion(feat, fp_mp_salmon, feat_out) # # 7) LABELSETS # Field amalgomate 11/16/2020 # import copy fp_cdm = utilities.get_path(6) arcpy.env.workspace = fp_cdm feats = arcpy.ListFeatureClasses(feature_dataset = 'Places_of_Interest') # fields = [] # feat_names = [] # fp_full = [] # temp2 = set([]) # for feat in feats: # temp = [ft.name for ft in arcpy.ListFields(feat)] # for item in temp: # print('{}: FIELDS: {}'.format(feat, item)) # if 'temp2' not in locals(): # temp2 = copy.copy(temp) # temp2 = set(temp).intersection(temp2) # fields.extend(temp) # feat_names.extend(len(temp) * [feat])
import arcpy import utilities import copy import os fp_krrp_project = utilities.get_path(4) fp_krrp_project_scratch = utilities.get_path(19) fp_scratch = utilities.get_path(19) fp_working = utilities.get_path(18) fp_copy_to_scratch = os.path.join(fp_krrp_project, 'LoW\LoW_Final_60_v2') low_feat = 'LoW_60_v2_scratch' cut_fill = 'Cut_Fill_Areas_60_Design_scratch' access_route = 'Access_Routes_scratch' access_route_buffer = 'Access_Routes_Buffer_noDissolve_15ft' low_scratch = 'LoW_90Design_Draft_scratch' # copy working feats to scratch gdb # arcpy.FeatureClassToFeatureClass_conversion(fp_copy_to_scratch, fp_scratch, low_feat) low_feat = low_scratch merge_feat = access_route_buffer fp_access_route = os.path.join(fp_scratch, access_route) fp_access_route_buffer = os.path.join(fp_scratch, access_route_buffer) # utilities.buffer_and_create_feat(fp_access_route, fp_access_route_buffer, "15 feet") # arcpy.FeatureClassToFeatureClass_conversion(fp_copy_to_scratch, fp_scratch, merge_feat) print('fp_scratch') print(fp_scratch) arcpy.env.workspace = copy.copy(fp_scratch) fieldMappings = arcpy.FieldMappings()
class Schedule: template = pd.read_csv(get_path("schedule.csv"), comment='#', skipinitialspace=True, parse_dates=["valid_from", "valid_until"], converters={"day_of_week": lambda x: DAYS.get(x)}) activities = pd.read_csv(get_path("activities.txt"), comment='#', header=None, names=["title"]).title.to_list() # Ensure that activities has len of 2 - otherwise 'choice' throws a tantrum activities += [None, None] def __init__(self): self.total_hours = CONFIG["setup"]["total_hours"] self.year = CONFIG["setup"]["year"] self.start_sheet = CONFIG["general"]["start_sheet"] self.fill_missing = CONFIG["fill"]["fill_missing"] self.avoid_weekends = CONFIG["fill"]["avoid_weekends"] self.max_overtime = CONFIG["fill"]["max_overtime"] self.max_undertime = CONFIG["fill"]["max_undertime"] self.dates = None def fill(self, year, month): idx = get_work_days(year, month) df = pd.DataFrame(index=idx) df["day_of_week"] = df.index.dayofweek df = df.merge(self.template, left_on="day_of_week", right_on="day_of_week", how="left").set_index(idx) df = df.loc[(df.valid_from <= df.index) & (df.index <= df.valid_until), ["activity_start", "activity_end", "note"]] if len(df) == 0: df = pd.DataFrame( index=[idx.min()], columns=["activity_start", "activity_end", "note"]) self.dates = df return df.copy() def autocomplete(self, df): error = random.randint(-int(self.max_undertime), int(self.max_overtime)) target = int(self.total_hours) + error work_days = get_work_days(df.index.min().year, df.index.min().month) slot_generator = generate_slot() slot = next(slot_generator) while get_net_working_hours(df) < target: try: indexes = work_days[~work_days.isin(df.index)] activity = np.random.choice(self.activities) row = create_row(indexes, slot, activity) assert get_net_working_hours(df) + get_net_working_hours( row) <= target df = pd.concat([df, row], axis=0) except AssertionError: slot = next(slot_generator) continue except StopIteration: break return df.sort_index().dropna(subset=["activity_start"], axis=0)
from utilities import get_path Config = { 'game': { 'height': 640, 'width': 800, 'tile_width': 32, 'wall_char': "W", 'ground_char': '.' }, 'resources': { "ui": { 'menu': get_path("src/res/ui/menu.png"), 'inventory': get_path("src/res/ui/Inventory.png"), 'hover': get_path("src/res/ui/hover.png"), 'fonts': { 'tooltip': get_path("src/res/ui/fonts/tooltip.ttf") }, 'icons': { 'default': get_path("src/res/ui/icons/questionmark.png") } }, 'sprites': { 'player': get_path("src/res/sprites/char.png"), 'ground': get_path("src/res/sprites/ground.png"), 'wall': get_path("src/res/sprites/rock.png") }, 'levels': { 'level1': get_path("src/res/levels/example.lvl") } }
# Please check function get_path to use your datasets """ Available datasets 0 : Pneumonia children(PNEUMO-V3) - resized 500 1 : COV-PNEUMO: resized 500 2 : COV-NOR: resized 500 3 : Shenzhen(TB): resized 500 4 : Montgomery(TB-MC) : Prep 500 5 : BCDR-D01 : resized 500 6 : BCDR-D02 : resized 500 """ n_dataset = 4 # Load dataset from 'mem', 'dir', 'df' source = 'dir' path, path_df, zoom, v_flip, rot = ut.get_path(n_dataset, source) # Type of model, pretrained, baseline, custom model_type = 'custom' pretrained = False base_line = 'DenseNet121' weights = None #'imagenet' # Or None BATCH_SIZE = cfg['BATCH_SIZE'] EPOCHS = cfg['epochs'] DEPTH = 3 WIDTH, HEIGHT, color_mode = ut.get_dimensions(model_type, base_line, DEPTH) INPUT_SHAPE = (WIDTH, HEIGHT, DEPTH)