示例#1
0
def test():
    classes = [
        "0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80",
        "80-90", "unkown"
    ]
    # classes = ["1800","1810","1820","1830","1840","1850","1860","1870","1880","1890","1900","1910","unkown"]
    full_data = gd2.get_data_list_of_dicts()
    test_data = gd3.get_data_list_of_dicts()
    values = getValues(classes, test_data)

    correct = 0
    almost = 0
    total = len(full_data)
    for entry in full_data:
        decade = entry["class"]
        entry.pop("class", None)
        result = test_single(entry, values)
        if result == decade:
            correct += 1
        try:
            if abs(int(result[0]) - int(decade[0])) <= 2:
                almost += 1
        except:
            continue

    return [float(correct) / float(total), float(almost) / float(total)]
示例#2
0
def make_cadence_map(csv_input, csv_output, index_of_interest, lol):
  #Variable Setup.
  headers = gd2.get_headers()
  #print headers
  start_measure_index = headers.index("start_measure")
  #print start_measure_index
  id_index = headers.index("id")
  header = headers[index_of_interest]
  headers += ["{}_before".format(header)]
  headers += ["{}_after".format(header)]
  new_dicts = gd2.get_data_list_of_dicts()
  data = lol
  data_by_composition = {row[id_index]:[] for row in data}
  new_data = []

  #Sort (ascending) the individual entries for each composition by the first element.
  for row in data:
    composition_id = row[id_index]
    data_by_composition[composition_id].append(row)
    #try:
    data_by_composition[composition_id].sort(key=lambda x: int(x[start_measure_index]))
    #data_by_composition[composition_id].sort(key = start_measure_index)
    #except ValueError:
	#print row
  #For each entry, find the next one consecutively.
  for row in data:
    comp_entries = data_by_composition[row[id_index]]
    i = comp_entries.index(row)

    element_before = comp_entries[i-1][index_of_interest] if i>0 else "None"
    element_after = comp_entries[i+1][index_of_interest] if i<len(comp_entries)-1 else "None"
    new_data.append(row + [element_before, element_after])
  
  print "File written successfully! Added 'before' and 'after' for {}".format(header)
  gd2.write_data(csv_output, new_headers, new_data)
示例#3
0
def test():
    classes = ["0-10","10-20","20-30","30-40","40-50","50-60","60-70","70-80","80-90","unkown"]
    # classes = ["1800","1810","1820","1830","1840","1850","1860","1870","1880","1890","1900","1910","unkown"]
    full_data = gd2.get_data_list_of_dicts()
    test_data = gd3.get_data_list_of_dicts()
    values = getValues(classes,test_data)
    
    correct = 0
    almost = 0
    total = len(full_data)
    for entry in full_data:
	decade = entry["class"]
	entry.pop("class",None)
	result = test_single(entry,values)
	if result == decade:
	    correct += 1
	try:
	    if abs(int(result[0])-int(decade[0])) <= 2:
		almost += 1
	except:
	    continue

    return [float(correct)/float(total), float(almost)/float(total)]
import get_data as gd
import get_data2 as gd2
import get_data3 as gd3

list_of_dicts = gd.get_data_list_of_dicts()
full = gd2.get_data_list_of_dicts()
full_headers = gd2.get_headers()
headers = gd.get_headers()
headers_income = gd3.get_headers()

codes = {}
full_clean = []
final_headers = []

for h in headers:
    h = h.split(" - ")
    code = h[0]
    try:
        name = h[1]
        codes[code] = name
    except:
        print h

for h2 in headers_income:
    h2 = h2.split(" - ")
    code = h2[0]
    try:
        if not "Error" in h2[1]:
            name = h2[1]
            codes[code] = name
    except:
示例#5
0
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts() 

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
	rows_lol.append([])

for i in range(len(headers)):
	if i ==1 or i==4:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	else:
		column = gd.get_data_slice_numbers(headers[i], dicts)
	for j in range(len(gd.get_data_slice(headers[0], dicts))):
		rows_lol[j].append(column[j])

X = np.array(rows_lol)
#nbrs = NearestNeighbors(n_neighbors=5, algorithm ='kd_tree', metric ='jaccard').fit(X)
kdt = KDTree(X, leaf_size=30, metric='euclidean')
kdt.query(X, k=3, return_distance=False)
def find_point(name, points):
    for p in points:
        if name == p.getName():
            return p
    sys.exit("Could not find point")


list_of_dicts = gd.get_data_list_of_dicts()

has_both = []
for entry in list_of_dicts:
    if not (entry["Place Of Origin"] == "") and not (entry["Destination"]
                                                     == ""):
        has_both.append(entry)

list_of_places = gd2.get_data_list_of_dicts()

places = []
for entry in list_of_places:
    places.append(entry["Name"])

has_full = []
for item in has_both:
    # Have to clean the name so it will match the one we have listed
    # for the places
    Poo = item["Place Of Origin"].replace(" ", "")
    Poo = Poo.replace("(", "COMMA")
    Poo = Poo.replace(")", "COMMA")
    Poo = Poo.split("COMMA")
    Poo2 = []
    for word in Poo:
import get_data as gd
import get_data2 as gd2
import get_data3 as gd3

list_of_dicts = gd.get_data_list_of_dicts()
full = gd2.get_data_list_of_dicts()
full_headers = gd2.get_headers()
headers = gd.get_headers()
headers_income = gd3.get_headers()

codes = {}
full_clean = []
final_headers = []

for h in headers:
    h = h.split(" - ")
    code = h[0]
    try:
	name = h[1]
	codes[code] = name
    except:
	print h

for h2 in headers_income:
    h2 = h2.split(" - ")
    code = h2[0]
    try:
	if not "Error" in h2[1]:
	    name = h2[1]
	    codes[code] = name
    except:
示例#8
0
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
#from sklearn.neighbors.kd_tree import KDTree
#from sklearn.neighbors import DistanceMetric
import numpy as np
import get_data2 as gd
import json

headers = gd.get_headers()
dicts = gd.get_data_list_of_dicts()

rows_lol = []
for i in range(len(gd.get_data_slice(headers[0], dicts))):
    rows_lol.append([])

print len(rows_lol)

for i in range(len(headers)):
    column = gd.get_data_slice(headers[i], dicts)

    for j in range(len(gd.get_data_slice(headers[0], dicts))):
        rows_lol[j].append(column[j])

print rows_lol[0]

#actually get similarities


def compare_rows(row1, row2):
    counter = 0
    for i in range(len(row1)):
    return pageRanks

def find_point(name, points):
    for p in points:
	if name == p.getName():
	    return p
    sys.exit("Could not find point")

list_of_dicts = gd.get_data_list_of_dicts()

has_both = []
for entry in list_of_dicts:
    if not (entry["Place Of Origin"] == "") and not (entry["Destination"] == ""):
	has_both.append(entry)

list_of_places = gd2.get_data_list_of_dicts()

places = []
for entry in list_of_places:
    places.append(entry["Name"])

has_full = []
for item in has_both:
    # Have to clean the name so it will match the one we have listed
    # for the places
    Poo = item["Place Of Origin"].replace(" ","")
    Poo = Poo.replace("(","COMMA")
    Poo = Poo.replace(")","COMMA")
    Poo = Poo.split("COMMA")
    Poo2 = []
    for word in Poo: