def pick_scenarios(lnsas, weights, multi=True): scenarios = [] wout = [] index = 0 easy = True #whether to just take scenarios that are of engineering interest or do some complicated other thing print 'length of lnsas: ', len(lnsas) print 'length of weights: ', len(weights) numeps = int(round(len(lnsas)/4993.0)) print 'numeps: ', numeps wsum = 0 if easy == True: print 'easy' print 0.00001/float(numeps) for w in weights: wsum += weights[w] print weights[w] if weights[w]> 0.00001/float(numeps): #0.0000001/numeps: #10^-5 divided by num eps because the weights get renormalized when take more than one epsilon realization per scenario scenarios.append(index) wout.append((index, weights[w])) index += 1 else: (scenarios, wout) = get_praveen_results(lnsas) util.write_2dlist(time.strftime("%Y%m%d")+'_weights2.txt', wout) #save the weights of the chosen scenarios print 'number of chosen scenarios: ', len(scenarios) print 'weights of all scenarios: ', wsum print 'the sum of the subset weights: ', sum([ww[1] for ww in wout]) return scenarios
def pick_scenarios(lnsas, weights, multi, numeps): '''this function takes some scenarios with an annual rate of occurrence > 10e-5 OR does it based on some other criteria, called get_praveen_results''' scenarios = [] wout = [] index = 0 easy = True #whether to just take scenarios that are of engineering interest or do some complicated other thing print 'length of lnsas: ', len(lnsas) print 'length of weights: ', len(weights) print 'numeps: ', numeps wsum = 0 if easy == True: print 'easy' for w in weights: wsum += weights[w] if weights[w]> 0.00001/float(numeps): #0.0000001/numeps: #10^-5 divided by num eps because the weights get renormalized when take more than one epsilon realization per scenario scenarios.append(index) wout.append((index, weights[w])) index += 1 else: (scenarios, wout) = get_praveen_results(lnsas) util.write_2dlist(time.strftime("%Y%m%d")+'_weights_' + str(numeps) + 'eps.txt', wout) #save the weights of the chosen scenarios print 'number of chosen scenarios: ', len(scenarios) print 'weights of all scenarios: ', wsum print 'the sum of the subset weights: ', sum([ww[1] for ww in wout]) return scenarios
def main(): seed(0) #set seed #get graph info G = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links print '|V| = ', len(G.nodes()) print '|E| = ', len(G.edges()) G = nx.freeze(G) #prevents edges or nodes to be added or deleted #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000. demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #get earthquake info q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #(input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', 'input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): 'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #( q.num_sites = len(q.lnsas[0]) #determine which scenarios you want to run good_indices = pick_scenarios(q.lnsas, q.weights) travel_index_times = [] index = 0 #loop over scenarios for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site if index in good_indices: print 'index: ', index (travel_time, vmt) = run_iteration(G, scenario, demand) travel_index_times.append((index, travel_time, vmt)) # print 'new travel times: ', travel_index_times if index%100 ==0: util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times) index += 1 #IMPORTANT util.write_2dlist(time.strftime("%Y%m%d")+'_travel_time.txt',travel_index_times)
def main(): seed(0) #set seed #get graph info G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") G = nx.freeze(G) #prevents edges or nodes to be added or deleted #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000. demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #get earthquake info q = QuakeMaps('input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): print 'weights: ', q.weights q.num_sites = len(q.lnsas[0]) #determine which scenarios you want to run good_indices = pick_scenarios(q.lnsas, q.weights) travel_index_times = [] index = 0 #loop over scenarios print 'size of lnsas: ', len(q.lnsas) for scenario in q.lnsas: #each 'scenario' has 1557 values of lnsa, i.e. one per site if index in good_indices: print 'index: ', index (bridges, flow, path, path2) = run_simple_iteration(G, scenario, demand, False) travel_index_times.append((index, bridges, flow, path, path2)) # print 'new travel times: ', travel_index_times if index%1000 ==0: util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times) index += 1 #IMPORTANT util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths4.txt',travel_index_times) print 'the number of scenarios I considered doing: ', index print 'the number of scenarios I actually did: ', len(travel_index_times)
def main(): TARGETS = [ 20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914, ] # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data TARGETS = ["173_high"] # we have retrofitted the top 25% ranked by accessibility impact weights = get_scenario_weights( "12-Dec-2013_12_3909_50_0.55556_25_weights.csv" ) # the annual likelihood of occurance of each of the scenarios ("targets") y = aggregate_accessibility(TARGETS, True) util.write_list(time.strftime("%Y%m%d") + "_accessTotACC_fromMain.txt", y) # TODO: implement the 3 functions below so they actually do something y_array = aggregate_accessibility_by_income(TARGETS, True) util.write_2dlist(time.strftime("%Y%m%d") + "_accessByIncome_fromMain.txt", y_array) y = aggregate_accessibility_by_taz(TARGETS, weights, True) util.write_list(time.strftime("%Y%m%d") + "_accessbyTAZ_fromMain.txt", y) y_array = aggregate_accessibility_by_taz_by_income(TARGETS, weights, True) util.write_2dlist(time.strftime("%Y%m%d") + "_accessByTAZByIncome_fromMain.txt", y_array)
def save_results(bridge_array_internal, bridge_array_new, travel_index_times, numeps, seed): util.write_2dlist( 'output/' + time.strftime("%Y%m%d") + '_bridges_flow_path_tt_vmt_bridges_allBridges_roadonly_' + str(numeps) + 'eps_extensive_seed' + str(seed) + '.txt', travel_index_times) with open( 'output/' + time.strftime("%Y%m%d") + '_' + str(numeps) + 'sets_damagedBridgesInternal_roadonly_seed' + str(seed) + '.pkl', 'wb') as f: pickle.dump(bridge_array_internal, f) with open( 'output/' + time.strftime("%Y%m%d") + '_' + str(numeps) + 'sets_damagedBridgesNewID_roadonly_seed' + str(seed) + '.pkl', 'wb') as f: pickle.dump(bridge_array_new, f) print bridge_array_new print bridge_array_internal print travel_index_times
def main(): # cd /Volumes/bakergroup$/ ######################################## #get results for the base case folder_name = 'base_no_road_damage_but_reduced_transit' folder_name = 'no_damage' base = aggregate_results(folder_name) print base base_results = [base] # print base_results # print ['scenario', 'bridge_per', 'vmt', 'vht', 'low_auto', 'med_auto', 'high_auto', 'veryhighauto', 'autoPeakTotal', 'autoOffPeakTotal'].append(base_results) util.write_2dlist(time.strftime("%Y%m%d")+'_scen_bridge_tt_vmt_6acc_vmt_vhtbtop20.txt', base_results) ######################################## print 'now the next' #get results for all the other runs scenario_results = [] folder_names = TARGETS #[261] #TARGETS for folder_name in folder_names: scenario_results.append(aggregate_results(folder_name)) print 'base: ', base_results print 'scenaro: ', scenario_results util.write_2dlist(time.strftime("%Y%m%d")+'_scen_bridge_tt_vmt_6acc_vmt_vhttop20261.txt', scenario_results)
def main(): '''can change the number of epsilons below''' seed(0) #set seed simple = False #False #simple is just %bridges out, which is computationally efficient #get graph info # G = nx.read_gpickle("input/graphMTC_CentroidsLength6.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. There is also the choice of a proper multidigraph: nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") G = nx.read_gpickle("input/graphMTC_CentroidsLength6highways.gpickle") #noCentroidsLength15.gpickle") #does not have centroidal links. Directed! only one edge between nodes # G1 = nx.read_gpickle("input/graphMTC_CentroidsLength5.gpickle") #undirected, multiple edges. It is a little funky because it has two links between A and B and two between B and A so is that double-counting? # '''a multigraph: An undirected graph class that can store multiedges. # Multiedges are multiple edges between two nodes. Each edge # can hold optional data or attributes. # A MultiGraph holds undirected edges. Self loops are allowed.''' print 'nodes: ', len(G.nodes()) G = nx.freeze(G) #prevents edges or nodes to be added or deleted # G1 = nx.freeze(G1) #get od info. This is in format of a dict keyed by od, like demand[sd1][sd2] = 200000. demand = bd.build_demand('input/BATS2000_34SuperD_TripTableData.csv', 'input/superdistricts_centroids.csv') #we just take a percentage in ita.py, namely #to get morning flows, take 5.3% of daily driver values. 11.5/(4.5*6+11.5*10+14*4+4.5*4) from Figure S10 of http://www.nature.com/srep/2012/121220/srep01001/extref/srep01001-s1.pdf #get path #get earthquake info #UPDATED May 23, 2013 #TODO q = QuakeMaps('input/20130612_mtc_total_lnsas5.pkl', 'input/20130612_mtc_magnitudes5.pkl', 'input/20130612_mtc_faults5.pkl', 'input/20130612_mtc_weights5.pkl', 'input/20130612_mtc_scenarios5.pkl') #input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl','input/20130107_mtc_faults1.pkl', 'input/20130107_mtc_weights1.pkl', 'input/20130107_mtc_scenarios1.pkl') #'input/20130210_mtc_total_lnsas3.pkl', 'input/20130210_mtc_magnitudes3.pkl', 'input/20130210_mtc_faults3.pkl', 'input/20130210_mtc_weights3.pkl', 'input/20130210_mtc_scenarios3.pkl') #('input/20130107_mtc_total_lnsas1.pkl', 'input/20130107_mtc_magnitudes1.pkl', #totalfilename=None, magfilename=None, faultfilename=None, weightsfilename=None, scenariofilename=None): q.num_sites = len(q.lnsas[0]) numeps = 5 #CAHNGE THIS CHANGE THIS!!!!!!!! #determine which scenarios you want to run good_indices = pick_scenarios(q.lnsas, q.weights,True, numeps) targets = good_indices #[12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data print 'the number of scenarios for which I want to save bridge info: ', len(targets) travel_index_times = [] index = 0 good_index = 0 # pdb.set_trace() #figure out what the travel time and vmt are if no damage to any bridges no_damage_travel_time = -1 no_damage_vmt = -1 found_no_damage = False for scenario in q.lnsas: #each 'scenario' has 1xxx values of lnsa, i.e. one per site while found_no_damage == False: (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets, True) #since looking for no damage case, it is ok to clean up if bridges == 0: found_no_damage = True print 'found case with no damage so I will save those and save you work later on' (no_damage_travel_time, no_damage_vmt) = run_iteration(G, scenario, demand, newG) #loop over scenarios print 'size of lnsas: ', len(q.lnsas) for scenario in q.lnsas: #each 'scenario' has 1xxx values of lnsa, i.e. one per site if index in good_indices: print 'index: ', index if simple == True: (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets) travel_index_times.append((index, bridges, flow, path, path2, -1, -1, bridges/float(q.num_sites), -1)) else: (bridges, flow, path, path2, newG) = run_simple_iteration(G, scenario, demand, False, good_index, targets, False) #doesn't clean up the damage print 'what i found for bridges: ', bridges if bridges == 0: travel_time = no_damage_travel_time; vmt = no_damage_vmt; else: print 'attempting new' (travel_time, vmt) = run_iteration(G, scenario, demand, newG, True) print 'what i have for (tt, vmt): ', (travel_time, vmt) travel_index_times.append((index, bridges, flow, path, path2, travel_time, vmt, bridges/float(q.num_sites), -1)) good_index += 1 # travel_index_times.append((index, travel_time, vmt)) # print 'new travel times: ', travel_index_times if index%1000 ==0: print 'index: ', index util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths_5eps_extensive.txt',travel_index_times) index += 1 #IMPORTANT util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_paths_5eps_extensive.txt',travel_index_times) print 'the number of scenarios I considered doing: ', index print 'the number of scenarios I actually did: ', len(travel_index_times) print 'i.e.: ', good_index print 'and now, I will save a dataset of damaged bridges in each scenario' util.write_2dlist(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive.txt',BRIDGE_DAMAGE_DATASET) with open(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive.pkl', 'wb') as f: pickle.dump(BRIDGE_DAMAGE_DATASET, f)
L2.sort() if L1 == L2: retList.append(freq_sets[i] | freq_sets[j]) return retList def apriori(dataset, weights, minsupport=0.5): "Generate a list of candidate item sets. iven a data set and a support level, it will generate a list of candidate itemsets." C1 = createC1(dataset) D = map(set, dataset) minsupport_weighted = sum(weights)*minsupport L1, support_data = scanD(D, C1, weights, minsupport_weighted) L = [L1] k = 2 while (len(L[k - 2]) > 0): Ck = aprioriGen(L[k - 2], k) Lk, supK = scanD(D, Ck, weights, minsupport) support_data.update(supK) L.append(Lk) k += 1 #TODO: check that we want to do this return L, support_data if __name__ == '__main__': weights, bla = load_dataset() min_support = 0.4 r, s = apriori(bla, weights, min_support) print 'r: ', r print 's: ', s result = sorted(s.iterkeys(), key=lambda k: s[k], reverse=True) util.write_2dlist(time.strftime("%Y%m%d")+'_damaged_bridges_5eps_extensive_frequentitemsets'+str(min_support)+'.txt', result) print result
def save_results(bridge_array, travel_index_times, numeps): util.write_2dlist(time.strftime("%Y%m%d")+'_bridges_flow_path_tt_vmt_bridges' + str(numeps) + 'eps_extensive.txt',travel_index_times) with open (time.strftime("%Y%m%d")+'_' + str(numeps) + 'eps_damagedBridges.pkl', 'wb') as f: pickle.dump(bridge_array, f)
def main(): # get and aggregate accessibility from cube using import_acc_results.py file # TARGETS = [12, 35, 55, 71, 75, 82, 86, 87, 88, 106, 108, 115, 121, 231, 241, 247, 256, 258, 260, 261, 676, 730, 733, 1231, 1548] #first set of Cube runs TARGETS = [ 20, 33, 36, 137, 142, 143, 144, 151, 152, 159, 166, 167, 171, 173, 183, 184, 192, 193, 194, 196, 205, 1676, 1692, 2851, 2914, ] # data within: 12-Dec-2013_12_3909_50_0.55556_25.mat #indices between 0 and 2110. the scenarios for which you want to save the damaged bridge data # TARGETS = [20] # y = aggregate_accessibility(TARGETS) # util.write_list(time.strftime("%Y%m%d")+'_accessTot.txt', y) y = [ 18.2339128119, 18.2338120181, 18.2338952366, 18.2338109314, 18.2270352566, 18.2177845713, 18.1998501612, 18.2177377231, 18.233770681, 18.2261430987, 18.1691203163, 18.1849249099, 18.2141010264, 18.2139231104, 18.23383158091398, 18.2253745585, 18.2155757901, 18.2012935522, 18.2138556128, 18.1758345198, 18.226103683, 18.2338211763, 18.2260523679, 18.2339486092, 18.2215360497, ] weights = get_scenario_weights("12-Dec-2013_12_3909_50_0.55556_25_weights.csv") # get general x values. These are the various welfare metrics. the_filename = "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges1eps_extensive2.txt" new_x = freq_svm.build_x(TARGETS, the_filename) the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_for_predicting = freq_svm.build_x(range(1, 11728), the_filename_full) # x_for_predicting = freq_svm.build_x(range(1, 3092), the_filename_full) the_x = np.vstack((new_x, x_for_predicting)) the_x = preprocessing.scale(the_x) new_x = the_x[0 : new_x.shape[0], :] x_for_predicting = the_x[new_x.shape[0] :, :] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "by my method I find the threshold to be: ", threshold threshold = ( 18.19933616 ) # from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of acesibility, low is bad so these are the true high loss cases new_y = freq_svm.label( y, threshold, match_label, miss_label ) # less than threshold is miss label. So, this puts high loss in accessibility as miss (lower value) print "new_ y: ", new_y # should be mostly 1's # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY.txt", y_pred) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_pred): if value == miss_label: # high loss means low accessibility, which means miss miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged # for basket in list_of_baskets: # if '609' in basket: # print 'found one: ', basket lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support.txt", support_list) pdb.set_trace()
def main_tt(): print "chin up" # get and aggregate travel time # get general x values. These are the various welfare metrics. the_filename_full = ( "/Users/mahalia/ita/20131212_bridges_flow_path_tt_vmt_bridges3eps_extensive.txt" ) # indices in the first column start at 0 x_raw = freq_svm.build_x(range(1, 11728), the_filename_full) the_x = preprocessing.scale([[row[0]] for row in x_raw]) the_y = np.array([row[1] for row in x_raw]) break_point = 9383 new_x = np.array(the_x[0:break_point]) # 80% x_for_predicting = the_x[break_point:] # 20% y = np.array([row[1] for row in x_raw[0:break_point, :]]) # should be as big as the training dataset numeps = 3 # the number of epsilons tol = ( 0.00001 ) # the minimum annual rate that you care about in the original event set (the weight now is the original annual rate / number of epsilons per event) lnsas, full_weights = travel_main_simple_simplev3.ground_motions( numeps, tol, "/Users/mahalia/Documents/matlab/Research/Herbst2011/output_data/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt", ) weights = full_weights[0:break_point] print "built baby x" # pick threshold. Above this y value, the data is called a "match" and below is a "miss". For frequent itemsets, we'll be doing frequent items ONLY among the items predicted as a match so VORSICHT! target_annual_rate = 0.002 # 1 in 475 years threshold = freq_svm.identify_threshold(target_annual_rate, y, weights) print "i thought: ", threshold threshold = ( 346420000 ) # 18.19933616 #from the Matlab script called cubeAnalysiswDamagedTransit.m for 475 year return period #18.2139 #75th quantile print "I think the threshold is: ", threshold # label events above threshold as match and below as miss match_label = 1 miss_label = 0 # for purposes of accesibility, low is bad so these are the true high loss cases new_y = np.array(freq_svm.label(y, threshold, match_label, miss_label)) print "new_ y: ", new_y # ############################ # h = .02 # step size in the mesh # # we create an instance of SVM and fit out data. We do not scale our # # data since we want to plot the support vectors # C = 1.0 # SVM regularization parameter # svc = svm.SVC(kernel='linear', C=C, class_weight='auto').fit(new_x, new_y) # rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C, class_weight='auto').fit(new_x, new_y) # poly_svc = svm.SVC(kernel='poly', degree=3, C=C, class_weight='auto').fit(new_x, new_y) # lin_svc = svm.LinearSVC(C=C, class_weight='auto').fit(new_x, new_y) # X = new_x.copy() # # create a mesh to plot in # x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 # y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # # title for the plots # titles = ['SVC with linear kernel', # 'SVC with RBF kernel', # 'SVC with polynomial (degree 3) kernel', # 'LinearSVC (linear kernel)'] # for i, clf in enumerate((svc, rbf_svc, poly_svc, lin_svc)): # # Plot the decision boundary. For that, we will assign a color to each # # point in the mesh [x_min, m_max]x[y_min, y_max]. # pl.subplot(2, 2, i + 1) # Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # pl.contourf(xx, yy, Z, cmap=pl.cm.Paired) # pl.axis('off') # pl.xlabel('Percentage increase of bridges damaged (normalized)') # pl.ylabel('Percentage incrase of travel time (normlized') # # Plot also the training points # pl.scatter(X[:, 0], X[:, 1], c=new_y, cmap=pl.cm.Paired) # #plot also the prediction # y_pred = clf.predict(x_for_predicting) # pl.scatter(x_for_predicting[:, 0], x_for_predicting[:, 1], c= y_pred, marker='^', cmap = pl.cm.Paired) # pl.title(titles[i]) # pl.savefig('/Users/mahalia/Dropbox/research/dailyWriting/bridges/classificationComp.png') # #################### # #train SVM print new_x.shape print new_y.shape svm_object = freq_svm.train(new_x, new_y, "auto") # {0:1, 1:1}) ######Done using Cube results. Now just use ITA results....##### # use trained svm to predict values from large set # print 'built x' y_pred = freq_svm.predict(x_for_predicting, svm_object) # y_pred = [] # for i in range(11727): # y_pred.append(0) util.write_list(time.strftime("%Y%m%d") + "_predictedY_tt.txt", y_pred) y_test_raw = [row[1] for row in x_raw[break_point:, :]] y_test = freq_svm.label(y_test_raw, threshold, match_label, miss_label) y_tot_raw = [row[1] for row in x_raw] y_tot = freq_svm.label(y_tot_raw, threshold, match_label, miss_label) util.write_list(time.strftime("%Y%m%d") + "_actualY_tt.txt", y_test) print (classification_report(y_test, y_pred)) print (confusion_matrix(y_test, y_pred, labels=range(2))) scores = cross_validation.cross_val_score( svm_object, the_x, freq_svm.label(the_y, threshold, match_label, miss_label), cv=3 ) print ("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # count up annual rates for each bridge in the list when event predicted as match miss_indices = [] for index, value in enumerate(y_tot): # cheating and just using the actual data instead of predicted one if value == miss_label: miss_indices.append(index + 1) # matlab indices starting from 1 print 'we have this many "misses"=="predicted high loss cases": ', len(miss_indices) item_indices = range(3152) # 1743 highway bridges and 1409 bart structures with open("20131212_3eps_damagedBridges.pkl", "rb") as f: list_of_baskets = pkl.load( f ) # this has list of bridge indices (MATLAB INDICES that start from 1) that are damaged lnsas, weights = travel_main_simple_simplev3.ground_motions( 3, 0.00001, "input/SF2_mtc_total_3909scenarios_1743bridgesPlusBART_3eps.txt" ) support_list = get_support(weights, miss_indices, item_indices, list_of_baskets) # output the sum of weights of scenarios where each bridge was damanged to plot in matlab. First column is counter stsarting at 1. second column is support util.write_2dlist(time.strftime("%Y%m%d") + "_bridgeIndex_support_tt.txt", support_list) pdb.set_trace()