def get_street_lengths(street_names):
    data = etree.parse(open("data/new_haven.xml"))

    ## first, go through the data for each way,
    ## and extract the ids for each node involved.
    print "     getting street nodes..."
    street_nodes = {}
    for element in data.iter("way"):
        names = element.findall("./tag[@k='name'].")
        if len(names) > 0:
            name = names[0].attrib["v"]
            if name in street_names:
                if not name in street_nodes.keys():
                    street_nodes[name] = []
                refs = []
                for node in element.iter("nd"):
                    refs.append(node.attrib["ref"])
                street_nodes[name].append(refs)

    print "     getting street coords..."
    street_coords = {}
    ## get street coords...
    ## Search the data for each node to get its coordinates
    for name in street_nodes.keys():
        street_coords[name] = []
        for i in range(len(street_nodes[name])):
            street_coords[name].append([])
            for j in range(len(street_nodes[name][i])):
                street_coords[name][i].append([])
                node = data.find("./node[@id='" + street_nodes[name][i][j] + "']")
                street_coords[name][i][j] = [node.attrib["lon"], node.attrib["lat"]]

    with open("data/nh_bounds.pkl", "r") as f:  ## todo: include description and/or code for how to obtain bounds
        nh_bounds = pickle.load(f)

    print "     getting street hoods..."
    ### determine the neighborhood and weather or not a node is in New Haven for each node.
    street_innh = {}
    street_hood = {}
    for name in street_coords.keys():
        street_innh[name] = []
        street_hood[name] = []
        for i in range(len(street_coords[name])):
            lngs = [float(j[0]) for j in street_coords[name][i]]
            lats = [float(j[1]) for j in street_coords[name][i]]
            innhs, hoods = get_neighborhood(lngs, lats)
            street_innh[name].append(innhs)
            street_hood[name].append(hoods)

    print "     getting street lengths..."
    ## finally, we calculate the street length by adding
    ## the length between each pair of consecutive nodes
    ## in each segment, and adding the length of each segment,
    ## only considering those nodes within new haven.
    if len(street_coords) > 0:
        street_lengths = {}
        street_lengths_by_neighborhood = {}
        for name, segments in street_coords.items():
            # todo: possible conditional here to eliminate street lengths of 0
            street_lengths[name] = 0
            street_lengths_by_neighborhood[name] = {}
            for i in range(len(segments)):
                if len(segments[i]) > 1:
                    for j in range(1, len(segments[i])):
                        distance_between_nodes = geopy.distance.vincenty(
                            tuple(street_coords[name][i][j]), tuple(street_coords[name][i][j - 1])
                        ).miles
                        ### find the length of the street strictly within new haven...
                        current_node_innh = street_innh[name][i][j]
                        last_node_innh = street_innh[name][i][j - 1]
                        if current_node_innh or last_node_innh:
                            if current_node_innh and last_node_innh:
                                street_lengths[name] += distance_between_nodes
                            else:
                                street_lengths[name] += distance_between_nodes / 2.0
                        ### find the length of the street within each neighborhood...
                        current_node_hood = street_hood[name][i][j]
                        last_node_hood = street_hood[name][i][j - 1]
                        if not (current_node_hood == "" and last_node_hood == ""):
                            if current_node_hood == "":
                                if not last_node_hood in street_lengths_by_neighborhood[name].keys():
                                    street_lengths_by_neighborhood[name][last_node_hood] = distance_between_nodes / 2.0
                                else:
                                    street_lengths_by_neighborhood[name][last_node_hood] += distance_between_nodes / 2.0
                            elif last_node_hood == "":
                                if not current_node_hood in street_lengths_by_neighborhood[name].keys():
                                    street_lengths_by_neighborhood[name][current_node_hood] = (
                                        distance_between_nodes / 2.0
                                    )
                                else:
                                    street_lengths_by_neighborhood[name][current_node_hood] += (
                                        distance_between_nodes / 2.0
                                    )
                            elif current_node_hood == last_node_hood:
                                if not current_node_hood in street_lengths_by_neighborhood[name].keys():
                                    street_lengths_by_neighborhood[name][current_node_hood] = distance_between_nodes
                                else:
                                    street_lengths_by_neighborhood[name][current_node_hood] += distance_between_nodes
                            else:
                                if not current_node_hood in street_lengths_by_neighborhood[name].keys():
                                    street_lengths_by_neighborhood[name][current_node_hood] = (
                                        distance_between_nodes / 2.0
                                    )
                                else:
                                    street_lengths_by_neighborhood[name][current_node_hood] += (
                                        distance_between_nodes / 2.0
                                    )
                                if not last_node_hood in street_lengths_by_neighborhood[name].keys():
                                    street_lengths_by_neighborhood[name][last_node_hood] = distance_between_nodes / 2.0
                                else:
                                    street_lengths_by_neighborhood[name][last_node_hood] += distance_between_nodes / 2.0

        return street_lengths.keys(), street_lengths.values(), street_lengths_by_neighborhood.values()
    else:
        return [], [], []
def update_local_db():
    #connect to mongodb server and create database...
    client = MongoClient()
    db= client.nh_data

    print "downloading new scf data..."
    ## the commented out code below can be used to
    ## to get date of latest update to save time 
    ## for now, remove all previous 
    ## data and start over from beginning 
    ## so as to account for changes in the data
    db.issues.remove()
    latest_date=none

    #latest_entry = db.issues.find().sort("id",pymongo.DESCENDING).limit(1)
    #try:
    #    latest_date = latest_entry.next()["created_at"]
    #except:
    #    latest_date = None


    ids,repids,rtids,rtts,times,acks,statuses,addrs,lngs,lats,acknowledged_ats,closed_ats = get_data(latest_date)
    print "getting neighborhoods..."
    innhs,hoods = get_neighborhood(lngs,lats)
    print "getting street_names..."
    street_names = get_street_name(addrs)
    print "inserting issues into database..."
    for i in range(len(ids)):
        if innhs[i]:
            if not db.issues.find_one({"id":ids[i]}):
                db.issues.insert_one({"id":ids[i],
				      "repid":repids[i],
                                      "rtid":rtids[i],
                                      "rtt":rtts[i],
                                      "created_at":times[i],
                                      "acknowledged":acks[i],
                                      "status":statuses[i],
                                      "address":addrs[i],
                                      "lng":lngs[i],
                                      "lat":lats[i],
                                      "neighborhood":hoods[i],
                                      "street_name":street_names[i],
                                      "acknowledged_at":acknowledged_ats[i],
                                      "closed_at":closed_ats[i]})

    print "getting street info..."
    ## get street data. To save time, only calculate street length for new streets
    all_street_names=[]
    for issue in db.issues.find():
        all_street_names.append(issue["street_name"])
    existing_street_names=[]
    for street in db.streets.find():
        existing_street_names.append(street["name"])
    print "getting new street lengths..."
    unused_street_names=[street_name for street_name in all_street_names if street_name not in existing_street_names]
    new_street_names,new_street_lengths,new_street_lengths_by_neighborhood=get_street_lengths(unused_street_names)
    for i in range(len(new_street_names)):
        db.streets.insert_one({"name":new_street_names[i],
				"length":new_street_lengths[i]})
	for hood in new_street_lengths_by_neighborhood[i].keys():
		db.streets_by_neighborhood.insert_one({"name":new_street_names[i],
							"neighborhood":hood,
							"length":new_street_lengths_by_neighborhood[i][hood]})