def question_c(): # generate the distribution total_data = [] for plot_index in range(PLOTS): current_data = [] # gaussian indexes to sample from indexes = np.random.randint(1, 4, POINTS_PER_PLOT) unique, number_of_samples = np.unique(indexes, return_counts=True) # add each gaussian to data x_data = np.array([]) y_data = np.array([]) for i in range(1, 4): shape = int(number_of_samples[i - 1]) x_data = np.concatenate( (np.random.normal(i, 2 * i, shape), x_data), axis=0) y_data = np.concatenate( (np.random.normal(i, 2 * i, shape), y_data), axis=0) total_data.append(np.dstack((np.array([x_data]), np.array([y_data])))) data1, data2 = total_data # PLOT the outputs for data in (data1, data2): util.plot_in_R2(data.T[0], data.T[1], "Gaussian Distribution") # save the points util.write_list(data1, "problem3_outputs/question_c/data1.txt") util.write_list(data2, "problem3_outputs/question_c/data2.txt")
def question_a(): # generate the distribution x_data = np.random.uniform(-1, 1, (TOTAL_POINTS, 1)) y_data = np.random.uniform(0, 5, (TOTAL_POINTS, 1)) merged = np.concatenate((x_data, y_data), axis=1) data1, data2 = merged[:POINTS_PER_PLOT], merged[POINTS_PER_PLOT:] # PLOT the outputs for data in (data1, data2): util.plot_in_R2(data.T[0], data.T[1], "Uniform Distribution") # save the points util.write_list(data1, "problem3_outputs/question_a/data1.txt") util.write_list(data2, "problem3_outputs/question_a/data2.txt")
def main(): TARGETS = [ 20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914, ] # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data TARGETS = ["173_high"] # we have retrofitted the top 25% ranked by accessibility impact weights = get_scenario_weights( "12-Dec-2013_12_3909_50_0.55556_25_weights.csv" ) # the annual likelihood of occurance of each of the scenarios ("targets") y = aggregate_accessibility(TARGETS, True) util.write_list(time.strftime("%Y%m%d") + "_accessTotACC_fromMain.txt", y) # TODO: implement the 3 functions below so they actually do something y_array = aggregate_accessibility_by_income(TARGETS, True) util.write_2dlist(time.strftime("%Y%m%d") + "_accessByIncome_fromMain.txt", y_array) y = aggregate_accessibility_by_taz(TARGETS, weights, True) util.write_list(time.strftime("%Y%m%d") + "_accessbyTAZ_fromMain.txt", y) y_array = aggregate_accessibility_by_taz_by_income(TARGETS, weights, True) util.write_2dlist(time.strftime("%Y%m%d") + "_accessByTAZByIncome_fromMain.txt", y_array)
def question_d(): # In order to generate a circle, we will sample two parameters # R and theta, and from them obj_pnts = int(TOTAL_POINTS / 2) plt_obj_pnts = int(POINTS_PER_PLOT / 2) # Sample the inner circle circle_r_data = np.random.uniform(0, 0.5, (obj_pnts, 1)) circle_t_data = np.random.uniform(0, 2 * np.pi, (obj_pnts, 1)) # Sample the outer ring ring_r_data = np.random.uniform(1.5, 1.75, (obj_pnts, 1)) ring_t_data = np.random.uniform(0, 2 * np.pi, (obj_pnts, 1)) # concatenate each plot r_data1 = np.concatenate( (circle_r_data[:plt_obj_pnts], ring_r_data[:plt_obj_pnts]), axis=0) r_data2 = np.concatenate( (circle_r_data[plt_obj_pnts:], ring_r_data[plt_obj_pnts:]), axis=0) t_data1 = np.concatenate( (circle_t_data[:plt_obj_pnts], ring_t_data[:plt_obj_pnts]), axis=0) t_data2 = np.concatenate( (circle_t_data[plt_obj_pnts:], ring_t_data[plt_obj_pnts:]), axis=0) polar_data1 = np.concatenate((r_data1, t_data1), axis=1) polar_data2 = np.concatenate((r_data2, t_data2), axis=1) # turn from polar to cartesian total_data = [] for data in (polar_data1, polar_data2): total_data.append( [util.pol2cart(point[0], point[1]) for point in data]) data1, data2 = np.array(total_data[0]), np.array(total_data[1]) # PLOT the outputs for data in (data1, data2): util.plot_in_R2(data.T[0], data.T[1], "Circle inside a Ring") # save the points util.write_list(data1, "problem3_outputs/question_d/data1.txt") util.write_list(data2, "problem3_outputs/question_d/data2.txt")
def run_simple_iteration(G, ground_motion, demand, multi, j, targets, clean_up = True): #G is a graph (not a multigraph!), demand is a dictionary keyed by source and target of demand per weekday. multi is a boolean that is true if it is a multigraph (can have two parallel edges between nodes) #change edge properties newG, capacities = damage_network(G, ground_motion, multi) #also returns the number of bridges out num_out = sum(x < 100 for x in capacities) update_bridge_damage_dataset(capacities) if j in targets: affected_bridges = [] for i in range(len(capacities)): if capacities[i] < 100: if (i+1) not in SPECIALLY_RETROFITTED_BRIDGES: affected_bridges.append(str(i+1)) util.write_list('20130902_modifyingCapacity/' + time.strftime("%Y%m%d")+'_modifyingCapacitytab' + str(j) + '.txt', affected_bridges) #get max flow start = time.time() #node 5753 is in superdistrict 12, which is santa clara county, and node 3144 is in superdistrict 18, which is alameda county. roughly these are san jose and oakland #node 7619 is in superdistrict 1 (7493 is also), which is sf, and node node 3144 is in superdistrict 18, which is alameda county. roughly these are san francisco and oakland s = '3144' t = '7493' #2702 try: flow = nx.max_flow(newG, s, t, capacity='capacity') #not supported by multigraph except nx.exception.NetworkXError as e: print 'found an ERROR: ', e flow = -1 print s in newG print t in newG print len(newG.nodes()) print len(newG.edges()) # sp_dict = nx.single_source_dijkstra_path_length(newG,'7493',weight='distance') # sp = sum(sp_dict.values())/float(len(sp_dict.values())) # sp2 = 0 # for target in demand.keys(): # sp2 += sp_dict[target] # sp2 = sp2 / float(len(demand.keys())) sp = 0 sp2 = 0 if clean_up == True: damagedG= util.clean_up_graph(newG) return (num_out, flow, sp, sp2, newG)
def main(): # Step 1: get links for projects if len(sys.argv) > 1 and sys.argv[1] == "list": driver = webdriver.Firefox() projects = list_projects.get_project_links(driver) util.write_list(projects) driver.close() else: projects = util.read_list() # Step 2: get data for every project output = dict() output["records"] = {"record": []} for project_index, project_link in enumerate(projects): output["records"]["record"].append( get_data.get_data_from_url(project_link, project_index + 1)) print("Crawled:\t%d/%d" % (project_index + 1, len(projects))) # Have mercy on KickStarter :) time.sleep(MERCY_TIME) # Write into JSON file util.write_dict(output)
def del_persistent_link(c1, c2): persistent_links.discard((c1.lower(), c2.lower())) util.write_list(LINKS_FILE, list(persistent_links))
def add_persistent_link(c1, c2): persistent_links.add((c1.lower(), c2.lower())) util.write_list(LINKS_FILE, list(persistent_links))
def main(): TARGETS = [20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914]# data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data # TARGETS = [20] y = aggregate_accessibility(TARGETS) util.write_list(time.strftime("%Y%m%d")+'_accessTot_fromMain.txt', y)
def question_e(): for i in range(2): points = plot_e_once() util.write_list( points, "problem3_outputs/question_e/data" + str(i + 1) + ".txt")
if (rowCount % breakpoint == 0): input = raw_input("Press enter to continue ") if (not emptyStringCheck(input)): break #Summary print "------------------------------------------------------------------------------------------------" cursor.close() print "cursor closed" database.close() print "finished uploading" print "------------------------------------------------------------------------------------------------" print 'Summary' print 'Number of total entries made: ' + str(rowCount) print 'Number of incorrect isbns given based on length: ' + str(invalidIsbns) write_list('List of rows with incorrect isbns lengths given: ', invalidIsbnList) write_list('List of rows with non numberic quantities: ', quantityTypeFieldErrorList) if (gBooksApiFlag == True): print 'GOOGLE API STATS' print '************************' print 'Number of valid API responses: ' + str( (rowCount - invalidApiResponses)) print 'Number of parseOtherIsbn function calls : ' + str(isbnCallCount) print 'Number of successful parseOtherIsbn function calls: ' + str( findsOtherIsbn) write_list( 'List of rows with valid Apis but unsuccesful parseOtherIsbn calls: ', unsuccesfulParseOtherIsbnList) print '************************'
if not dbh.checkJobIfExist(job['companyId'], job['jobName']): DataEtl.processJobData(job) else: jobList.remove(job) dbh.insertJob(jobList) if __name__ == '__main__': dbh = DBHelper.DBHelper() # companySt=set() nameSt = set() comList = [] with open('company/famous.txt', 'r', encoding='utf-8') as f: for line in f: line = line.strip('\r\n') comList.append(dbh.selectCompanyByName(line)) # ori_List=dbh.selectCompany(limitNum=6000); # print(len(ori_List)) # for com in ori_List: # if com[1] in nameSt: # continue # else: # comList.append(com) # nameSt.add(com[1]) # random.shuffle(comList) rtn = parseJob4CompanyList(comList) write_list(rtn, 'jobs/jobs_1_31.txt') # addJobOfCompanyName('建设银行')
def main(): # get and aggregate accessibility from cube using import_acc_results.py file # TARGETS = [12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #first set of Cube runs TARGETS = [ 20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914, ] # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data # TARGETS = [20] # y = aggregate_accessibility(TARGETS) # util.write_list(time.strftime("%Y%m%d")+'_accessTot.txt', y) y = [ 18.2339128119, 18.2338120181, 18.2338952366, 18.2338109314, 18.2270352566, 18.2177845713, 18.1998501612, 18.2177377231, 18.233770681, 18.2261430987, 18.1691203163, 18.1849249099, 18.2141010264, 18.2139231104, 18.23383158091398, 18.2253745585, 18.2155757901, 18.2012935522, 18.2138556128, 18.1758345198, 18.226103683, 18.2338211763, 18.2260523679, 18.2339486092, 18.2215360497, ] weights = get_scenario_weights("12-Dec-2013_12_3909_50_0.55556_25_weights.csv") # get general x values. These are the various welfare metrics. the_filename = "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges1eps_extensive2.txt" new_x = freq_svm.build_x(TARGETS, the_filename) the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_for_predicting = freq_svm.build_x(range(1, 11728), the_filename_full) # x_for_predicting = freq_svm.build_x(range(1, 3092), the_filename_full) the_x = np.vstack((new_x, x_for_predicting)) the_x = preprocessing.scale(the_x) new_x = the_x[0 : new_x.shape[0], :] x_for_predicting = the_x[new_x.shape[0] :, :] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "by my method I find the threshold to be: ", threshold threshold = ( 18.19933616 ) # from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of acesibility, low is bad so these are the true high loss cases new_y = freq_svm.label( y, threshold, match_label, miss_label ) # less than threshold is miss label. So, this puts high loss in accessibility as miss (lower value) print "new_ y: ", new_y # should be mostly 1's # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY.txt", y_pred) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_pred): if value == miss_label: # high loss means low accessibility, which means miss miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged # for basket in list_of_baskets: # if '609' in basket: # print 'found one: ', basket lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support.txt", support_list) pdb.set_trace()
def main_tt(): print "chin up" # get and aggregate travel time # get general x values. These are the various welfare metrics. the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_raw = freq_svm.build_x(range(1, 11728), the_filename_full) the_x = preprocessing.scale([[row[0]] for row in x_raw]) the_y = np.array([row[1] for row in x_raw]) break_point = 9383 new_x = np.array(the_x[0:break_point]) # 80% x_for_predicting = the_x[break_point:] # 20% y = np.array([row[1] for row in x_raw[0:break_point, :]]) # should be as big as the training dataset numeps = 3 # the number of epsilons tol = ( 0.00001 ) # the minimum annual rate that you care about in the original event set (the weight now is the original annual rate / number of epsilons per event) lnsas, full_weights = travel_main_simple_simplev3.ground_motions( numeps, tol, "/Users/mahalia/Documents/matlab/Research/Herbst2011/output_data/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt", ) weights = full_weights[0:break_point] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "i thought: ", threshold threshold = ( 346420000 ) # 18.19933616 #from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of accesibility, low is bad so these are the true high loss cases new_y = np.array(freq_svm.label(y, threshold, match_label, miss_label)) print "new_ y: ", new_y # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM print new_x.shape print new_y.shape svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY_tt.txt", y_pred) y_test_raw = [row[1] for row in x_raw[break_point:, :]] y_test = freq_svm.label(y_test_raw, threshold, match_label, miss_label) y_tot_raw = [row[1] for row in x_raw] y_tot = freq_svm.label(y_tot_raw, threshold, match_label, miss_label) util.write_list(time.strftime("%Y%m%d") + "_actualY_tt.txt", y_test) print (classification_report(y_test, y_pred)) print (confusion_matrix(y_test, y_pred, labels=range(2))) scores = cross_validation.cross_val_score( svm_object, the_x, freq_svm.label(the_y, threshold, match_label, miss_label), cv=3 ) print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_tot): # cheating and just using the actual data instead of predicted one if value == miss_label: miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support_tt.txt", support_list) pdb.set_trace()
for line in tempf: sys.stdout.write(line) tempf.close() os.remove(tfp) tfp = None # if html file specified if options.html: if not os.path.isfile(options.html): raise JustError("%s is not a file" % options.html) adjusted_fl = [os.path.relpath(f, os.path.dirname(options.html)) for f in dep_list] if options.output_mode == 'list': util.write_list(options.html, adjusted_fl, options.section) elif options.output_mode == 'tags': write_tags(options.html, adjusted_fl, options.section) elif options.output_mode == 'minified' or options.output_mode == 'one-script': adjusted_of = os.path.relpath(options.output_file, options.html) util.write_tags(options.html, [adjusted_of], options.section) else: adjusted_fl = [os.path.relpath(f, os.getcwd()) for f in dep_list] if options.output_mode == 'list': if options.output_file: of = open(options.output_file, "w") for f in adjusted_fl: of.writeline(f) of.close()