Пример #1
0
def make_cadence_map(csv_input, csv_output, index_of_interest, lol):
  #Variable Setup.
  headers = gd2.get_headers()
  #print headers
  start_measure_index = headers.index("start_measure")
  #print start_measure_index
  id_index = headers.index("id")
  header = headers[index_of_interest]
  headers += ["{}_before".format(header)]
  headers += ["{}_after".format(header)]
  new_dicts = gd2.get_data_list_of_dicts()
  data = lol
  data_by_composition = {row[id_index]:[] for row in data}
  new_data = []

  #Sort (ascending) the individual entries for each composition by the first element.
  for row in data:
    composition_id = row[id_index]
    data_by_composition[composition_id].append(row)
    #try:
    data_by_composition[composition_id].sort(key=lambda x: int(x[start_measure_index]))
    #data_by_composition[composition_id].sort(key = start_measure_index)
    #except ValueError:
	#print row
  #For each entry, find the next one consecutively.
  for row in data:
    comp_entries = data_by_composition[row[id_index]]
    i = comp_entries.index(row)

    element_before = comp_entries[i-1][index_of_interest] if i>0 else "None"
    element_after = comp_entries[i+1][index_of_interest] if i<len(comp_entries)-1 else "None"
    new_data.append(row + [element_before, element_after])
  
  print "File written successfully! Added 'before' and 'after' for {}".format(header)
  gd2.write_data(csv_output, new_headers, new_data)
Пример #2
0
import get_data as gd
import get_data2 as gd2
import get_data3 as gd3

list_of_dicts = gd.get_data_list_of_dicts()
full = gd2.get_data_list_of_dicts()
full_headers = gd2.get_headers()
headers = gd.get_headers()
headers_income = gd3.get_headers()

codes = {}
full_clean = []
final_headers = []

for h in headers:
    h = h.split(" - ")
    code = h[0]
    try:
        name = h[1]
        codes[code] = name
    except:
        print h

for h2 in headers_income:
    h2 = h2.split(" - ")
    code = h2[0]
    try:
        if not "Error" in h2[1]:
            name = h2[1]
            codes[code] = name
    except:
import get_data as gd
import get_data2 as gd2
import get_data3 as gd3

list_of_dicts = gd.get_data_list_of_dicts()
full = gd2.get_data_list_of_dicts()
full_headers = gd2.get_headers()
headers = gd.get_headers()
headers_income = gd3.get_headers()

codes = {}
full_clean = []
final_headers = []

for h in headers:
    h = h.split(" - ")
    code = h[0]
    try:
	name = h[1]
	codes[code] = name
    except:
	print h

for h2 in headers_income:
    h2 = h2.split(" - ")
    code = h2[0]
    try:
	if not "Error" in h2[1]:
	    name = h2[1]
	    codes[code] = name
    except:
Пример #4
0
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts() 

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
	rows_lol.append([])

for i in range(len(headers)):
	if i ==1 or i==4:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	else:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	for j in range(len(gd.get_data_slice(headers[0], dicts))):
		rows_lol[j].append(column[j])

X = np.array(rows_lol)
#nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X)
kdt = KDTree(X, leaf_size=30, metric='euclidean')
kdt.query(X, k=3, return_distance=False)
Пример #5
0
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
#from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd
import json

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts()

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
    rows_lol.append([])

print len(rows_lol)

for i in range(len(headers)):
    column = gd.get_data_slice(headers[i], dicts)

    for j in range(len(gd.get_data_slice(headers[0], dicts))):
        rows_lol[j].append(column[j])

print rows_lol[0]

#actually get similarities


def compare_rows(row1, row2):
    counter = 0
    for i in range(len(row1)):