Exemplo n.º 1
0
def queryBookings(typology):
    books_cursor = queryToCollection(typology)
    if (books_cursor == "err from cursor" or books_cursor.count() == 0):
        return "err"
    else:
        #            print books_cursor.count()
        #            bookings_df = pd.DataFrame(columns = pd.Series(books_cursor.next()).index)
        bookings_df = pd.DataFrame(list(books_cursor))

        bookings_df['duration_dr'] = bookings_df.driving.apply(
            lambda x: float(x['duration'] / 60))
        bookings_df['distance_dr'] = bookings_df.driving.apply(
            lambda x: x['distance'])
        bookings_df = bookings_df.drop('driving', 1)

        bookings_df['type'] = bookings_df.origin_destination.apply(
            lambda x: x['type'])
        bookings_df['coordinates'] = bookings_df.origin_destination.apply(
            lambda x: x['coordinates'])
        bookings_df = bookings_df.drop('origin_destination', 1)

        bookings_df['start'] = bookings_df.coordinates.apply(lambda x: x[0])
        bookings_df['end'] = bookings_df.coordinates.apply(lambda x: x[1])
        bookings_df = bookings_df.drop('coordinates', 1)

        bookings_df['start_lon'] = bookings_df.start.apply(
            lambda x: float(x[0]))
        bookings_df['start_lat'] = bookings_df.start.apply(
            lambda x: float(x[1]))
        bookings_df = bookings_df.drop('start', 1)

        bookings_df['end_lon'] = bookings_df.end.apply(lambda x: float(x[0]))
        bookings_df['end_lat'] = bookings_df.end.apply(lambda x: float(x[1]))
        bookings_df = bookings_df.drop('end', 1)

        bookings_df['distance'] = bookings_df.apply(
            lambda x: sf.haversine(float(x['start_lon']), float(x[
                'start_lat']), float(x['end_lon']), float(x['end_lat'])),
            axis=1)

        bookings_df[
            'duration'] = bookings_df.final_date - bookings_df.init_date
        bookings_df['duration'] = bookings_df['duration'].apply(
            lambda x: x.days * 24 * 60 + x.seconds / 60)

        bookings_df['duration_pt'] = bookings_df.public_transport.apply(
            lambda x: x['duration'])
        bookings_df['distance_pt'] = bookings_df.public_transport.apply(
            lambda x: x['distance'])
        bookings_df['arrival_date_pt'] = bookings_df.public_transport.apply(
            lambda x: x['arrival_date'])
        bookings_df['arrival_time_pt'] = bookings_df.public_transport.apply(
            lambda x: x['arrival_time'])
        bookings_df = bookings_df.drop('public_transport', 1)

        bookings_df = bookings_df[bookings_df["start_lon"] <= 7.8]

        return bookings_df
Exemplo n.º 2
0
def EvalDistance(i, j):
    Xi = i % gv.NColumns
    Yi = int(i / gv.NColumns)

    Xj = j % gv.NColumns
    Yj = int(j / gv.NColumns)

    CentalLoni = (Xi + 0.5) * gv.ShiftLon + gv.minLon
    CentalLati = (Yi + 0.5) * gv.ShiftLat + gv.minLat
    CentalLonj = (Xj + 0.5) * gv.ShiftLon + gv.minLon
    CentalLatj = (Yj + 0.5) * gv.ShiftLat + gv.minLat

    dh = sf.haversine(CentalLoni, CentalLati, CentalLonj, CentalLatj)

    de = sqrt(pow((Xi - Xj), 2) + pow((Yi - Yj), 2))

    return de, dh
Exemplo n.º 3
0
def formatBookings(d):

    bookings_df = ""

    if (os.path.isfile('../input/bookings_' + d["city"]) == False):
        collection = "enjoy_PermanentBookings"
        if (d["provider"] == "car2go"):
            collection = "PermanentBookings"
        enjoy_bookings = dc.setup_mongodb(collection)

        print("***********************")
        print("city", d["city"])
        print(
            "initdate ",
            datetime.fromtimestamp(int(
                d["initdate"])).strftime('%Y-%m-%d %H:%M:%S'))
        print(
            "fianldate",
            datetime.fromtimestamp(int(
                d["finaldate"])).strftime('%Y-%m-%d %H:%M:%S'))
        print("***********************")

        bookings = enjoy_bookings.find({
            "city": d["city"],
            "init_time": {
                "$gt": d["initdate"],
                "$lt": d["finaldate"]
            }
        })

        bookings_df = pd.DataFrame(list(bookings))

        if ("Europe" not in d["timezone"]):
            #convert from their time zone in our time zone
            bookings_df['init_time'] = list(
                map(AbroadStamptoLocal, bookings_df["timezone"],
                    bookings_df['init_time']))
            bookings_df['final_time'] = list(
                map(AbroadStamptoLocal, bookings_df["timezone"],
                    bookings_df['final_time']))

        bookings_df.to_pickle('../input/bookings_' + d["city"])

    else:
        print("read")
        bookings_df = pd.read_pickle('../input/bookings_' + d["city"])

    bookings_df[
        "duration"] = bookings_df["final_time"] - bookings_df["init_time"]
    bookings_df["duration"] = bookings_df["duration"].astype(int)
    bookings_df = bookings_df.drop('driving', 1)

    bookings_df['type'] = bookings_df.origin_destination.apply(
        lambda x: x['type'])
    bookings_df['coordinates'] = bookings_df.origin_destination.apply(
        lambda x: x['coordinates'])
    bookings_df = bookings_df.drop('origin_destination', 1)

    bookings_df['end'] = bookings_df.coordinates.apply(lambda x: x[0])
    bookings_df['start'] = bookings_df.coordinates.apply(lambda x: x[1])
    bookings_df = bookings_df.drop('coordinates', 1)

    bookings_df['start_lon'] = bookings_df.start.apply(lambda x: float(x[0]))
    bookings_df['start_lat'] = bookings_df.start.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('start', 1)

    bookings_df['end_lon'] = bookings_df.end.apply(lambda x: float(x[0]))
    bookings_df['end_lat'] = bookings_df.end.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('end', 1)

    bookings_df['distance'] = bookings_df.apply(
        lambda x: sf.haversine(float(x['start_lon']), float(x['start_lat']),
                               float(x['end_lon']), float(x['end_lat'])),
        axis=1)

    bookings_df = bookings_df[bookings_df["distance"] >= 700]
    bookings_df = bookings_df[bookings_df["duration"] >= 120]
    bookings_df = bookings_df[bookings_df["duration"] <= 3600]

    if d["city"] == "Torino":
        bookings_df = bookings_df[bookings_df["start_lon"] <= 7.8]

    return bookings_df
Exemplo n.º 4
0
def main():

    if (os.path.isfile("creating.txt") == False):
        print("missing creating file")
        exit(0)

    city_config = open("creating.txt", "r")
    city = city_config.readline().strip()
    city_config.close()

    gv.init()
    dc.assingVariables(city)

    collection = "enjoy_PermanentBookings"
    if (gv.provider == "car2go"):
        collection = "PermanentBookings"
    enjoy_bookings = dc.setup_mongodb(collection)

    bookings = enjoy_bookings.find({
        "city": gv.city,
        "init_time": {
            "$gt": gv.initDate,
            "$lt": gv.finalDate
        }
    })

    # geolocator = Nominatim()
    # location = geolocator.geocode("Torino")
    #baselon = location.longitude
    #baselat = location.latitude

    i = 0  #id del booking, numero progressivo

    NumEvents = 0
    NumEventsFiltered = 0
    Discarted = 0
    for booking in bookings:
        initt = booking['init_time']
        finalt = booking['final_time']
        duration = finalt - initt
        coords = booking['origin_destination']['coordinates']
        lon1 = coords[0][0]
        lat1 = coords[0][1]
        lon2 = coords[1][0]
        lat2 = coords[1][1]
        #d = haversine(baselon, baselat, lon2, lat2)
        #d1 = haversine(baselon, baselat, lon1, lat1)
        d2 = sf.haversine(lon1, lat1, lon2, lat2)

        if (duration > 120 and duration < 3600 and d2 > 500):
            # if( sf.checkPerimeter(lat1, lon1) and sfcheckPerimeter(lat2, lon2) or
            #    (provider == "car2go" and  ((checkPerimeter(lat1, lon1) and checkCasellePerimeter(lat2, lon2)) or  (checkCasellePerimeter(lat1, lon1) and checkPerimeter(lat2, lon2))))):
            if sf.checkPerimeter(lat1, lon1) and sf.checkPerimeter(lat2, lon2):
                NumEvents += 1
                id_events[i] = [
                    booking['init_time'], booking['final_time'],
                    EventBook(i, "s",
                              booking["origin_destination"]['coordinates'][0]),
                    EventBook(i, "e",
                              booking["origin_destination"]['coordinates'][1])
                ]
                if booking['init_time'] not in dict_bookings:
                    dict_bookings[booking['init_time']] = []
                dict_bookings[booking['init_time']].append([i, "s"])
                if booking['final_time'] not in dict_bookings:
                    dict_bookings[booking['final_time']] = []
                dict_bookings[booking['final_time']].append([i, "e"])
                i = i + 1

                if (i < 1000):
                    if booking['init_time'] not in dict_bookings_short:
                        dict_bookings_short[booking['init_time']] = []
                    dict_bookings_short[booking['init_time']].append(
                        EventBook(
                            i, "s",
                            booking["origin_destination"]['coordinates'][0]))
                    if booking['final_time'] not in dict_bookings_short:
                        dict_bookings_short[booking['final_time']] = []
                    dict_bookings_short[booking['final_time']].append(
                        EventBook(
                            i, "e",
                            booking["origin_destination"]['coordinates'][1]))
        else:
            Discarted += 1

    with open(
            "../events/" + gv.city + "_" + gv.provider + "_dict_bookings.pkl",
            'wb') as handle:
        pickle.dump(dict_bookings, handle)

    with open("../events/" + gv.city + "_" + gv.provider + "_id_events.pkl",
              'wb') as handle:
        pickle.dump(id_events, handle)

    print("End Pickles")

    print("Start")
    to_delete = []
    EventDeleted = 0
    for stamp in dict_bookings:
        startbooking = 0
        for event in dict_bookings[stamp]:
            if (event[1] == "s"): startbooking += 1

        if (startbooking > 30):
            EventDeleted += startbooking
            to_delete.append(stamp)

    for stamp in to_delete:
        events_to_delete = []
        for event in dict_bookings[stamp]:
            if (event[1] == "s"): events_to_delete.append(event[0])

        for event in events_to_delete:
            InitTime = id_events[event][0]
            FinalTime = id_events[event][1]
            InitInd = dict_bookings[InitTime].index([event, "s"])
            FinalInd = dict_bookings[FinalTime].index([event, "e"])

            del dict_bookings[InitTime][InitInd]
            del dict_bookings[FinalTime][FinalInd]

        if (len(dict_bookings[stamp]) == 0):
            del dict_bookings[stamp]

    for stamp in dict_bookings:
        for i in range(0, len(dict_bookings[stamp])):
            NumEventsFiltered += 1
            EventT = dict_bookings[stamp][i]
            if (EventT[1] == "s"):
                dict_bookings[stamp][i] = id_events[EventT[0]][2]
            else:
                dict_bookings[stamp][i] = id_events[EventT[0]][3]

    print("CPE, Num Events Filtered + Event deleted:",
          NumEventsFiltered + EventDeleted)
    print("CPE, Num Events Filtered:", NumEventsFiltered)
    print("CPE, Event Deleted:", EventDeleted)
    print("CPE, Dicarded:", Discarted)

    ordered_dict_booking = collections.OrderedDict(
        sorted(dict_bookings.items()))
    ordered_dict_booking_short = collections.OrderedDict(
        sorted(dict_bookings_short.items()))

    with open(
            "../events/" + gv.city + "_" + gv.provider +
            "_sorted_dict_events_obj.pkl", 'wb') as handle:
        pickle.dump(ordered_dict_booking, handle)

    with open(
            "../events/" + gv.city + "_" + gv.provider +
            "_sorted_dict_events_obj_short.pkl", 'wb') as handle:
        pickle.dump(ordered_dict_booking_short, handle)

    print("CPE, end\n")
Exemplo n.º 5
0
def formatBookings():
    collection = "enjoy_PermanentBookings"
    if gv.provider == "car2go":
        collection = "PermanentBookings"
    enjoy_bookings = sf.setup_mongodb(collection)

    print("***********************")
    print("city", gv.city)
    print(
        "initDate ",
        datetime.datetime.fromtimestamp(int(
            gv.initDate)).strftime('%Y-%m-%d %H:%M:%S'))
    print(
        "fianlDate",
        datetime.datetime.fromtimestamp(int(
            gv.finalDate)).strftime('%Y-%m-%d %H:%M:%S'))
    print("***********************")

    bookings = enjoy_bookings.find({
        "city": gv.city,
        "init_time": {
            "$gt": gv.initDate,
            "$lt": gv.finalDate
        }
    })

    bookings_df = pd.DataFrame(list(bookings))

    if gv.city == "Vancouver":
        bookings_df["init_time"] = bookings_df["init_time"].sub(25200)
        bookings_df["final_time"] = bookings_df["final_time"].sub(25200)

    bookings_df.to_pickle('../input/bookings_' + gv.city)

    bookings_df[
        "duration"] = bookings_df["final_time"] - bookings_df["init_time"]
    bookings_df["duration"] = bookings_df["duration"].astype(int)
    bookings_df = bookings_df.drop('driving', 1)

    bookings_df['type'] = bookings_df.origin_destination.apply(
        lambda x: x['type'])
    bookings_df['coordinates'] = bookings_df.origin_destination.apply(
        lambda x: x['coordinates'])
    bookings_df = bookings_df.drop('origin_destination', 1)

    bookings_df['end'] = bookings_df.coordinates.apply(lambda x: x[0])
    bookings_df['start'] = bookings_df.coordinates.apply(lambda x: x[1])
    bookings_df = bookings_df.drop('coordinates', 1)

    bookings_df['start_lon'] = bookings_df.start.apply(lambda x: float(x[0]))
    bookings_df['start_lat'] = bookings_df.start.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('start', 1)

    bookings_df['end_lon'] = bookings_df.end.apply(lambda x: float(x[0]))
    bookings_df['end_lat'] = bookings_df.end.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('end', 1)

    bookings_df['distance'] = bookings_df.apply(
        lambda x: sf.haversine(float(x['start_lon']), float(x['start_lat']),
                               float(x['end_lon']), float(x['end_lat'])),
        axis=1)

    bookings_df = bookings_df[bookings_df["distance"] >= 700]
    bookings_df = bookings_df[bookings_df["duration"] >= 120]
    bookings_df = bookings_df[bookings_df["duration"] <= 3600]

    if gv.city == "Torino":
        bookings_df = bookings_df[bookings_df["start_lon"] <= 7.8]

    return bookings_df