def make_cadence_map(csv_input, csv_output, index_of_interest, lol): #Variable Setup. headers = gd2.get_headers() #print headers start_measure_index = headers.index("start_measure") #print start_measure_index id_index = headers.index("id") header = headers[index_of_interest] headers += ["{}_before".format(header)] headers += ["{}_after".format(header)] new_dicts = gd2.get_data_list_of_dicts() data = lol data_by_composition = {row[id_index]:[] for row in data} new_data = [] #Sort (ascending) the individual entries for each composition by the first element. for row in data: composition_id = row[id_index] data_by_composition[composition_id].append(row) #try: data_by_composition[composition_id].sort(key=lambda x: int(x[start_measure_index])) #data_by_composition[composition_id].sort(key = start_measure_index) #except ValueError: #print row #For each entry, find the next one consecutively. for row in data: comp_entries = data_by_composition[row[id_index]] i = comp_entries.index(row) element_before = comp_entries[i-1][index_of_interest] if i>0 else "None" element_after = comp_entries[i+1][index_of_interest] if i<len(comp_entries)-1 else "None" new_data.append(row + [element_before, element_after]) print "File written successfully! Added 'before' and 'after' for {}".format(header) gd2.write_data(csv_output, new_headers, new_data)
import get_data as gd import get_data2 as gd2 import get_data3 as gd3 list_of_dicts = gd.get_data_list_of_dicts() full = gd2.get_data_list_of_dicts() full_headers = gd2.get_headers() headers = gd.get_headers() headers_income = gd3.get_headers() codes = {} full_clean = [] final_headers = [] for h in headers: h = h.split(" - ") code = h[0] try: name = h[1] codes[code] = name except: print h for h2 in headers_income: h2 = h2.split(" - ") code = h2[0] try: if not "Error" in h2[1]: name = h2[1] codes[code] = name except:
#from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import NearestNeighbors from sklearn.neighbors.kd_tree import KDTree #from sklearn.neighbors import DistanceMetric import numpy as np import get_data2 as gd headers = gd.get_headers() dicts = gd.get_data_list_of_dicts() rows_lol = [] for i in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol.append([]) for i in range(len(headers)): if i ==1 or i==4: column = gd.get_data_slice_numbers(headers[i], dicts) else: column = gd.get_data_slice_numbers(headers[i], dicts) for j in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol[j].append(column[j]) X = np.array(rows_lol) #nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X) kdt = KDTree(X, leaf_size=30, metric='euclidean') kdt.query(X, k=3, return_distance=False)
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import NearestNeighbors #from sklearn.neighbors.kd_tree import KDTree #from sklearn.neighbors import DistanceMetric import numpy as np import get_data2 as gd import json headers = gd.get_headers() dicts = gd.get_data_list_of_dicts() rows_lol = [] for i in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol.append([]) print len(rows_lol) for i in range(len(headers)): column = gd.get_data_slice(headers[i], dicts) for j in range(len(gd.get_data_slice(headers[0], dicts))): rows_lol[j].append(column[j]) print rows_lol[0] #actually get similarities def compare_rows(row1, row2): counter = 0 for i in range(len(row1)):