Exemplo n.º 1
0
def deal_trip(filepath, outpath="../../data/train_trip.csv"):
    rowlist = ["TRIP_ID", "CALL_TYPE", "TAXI_ID", "TIMESTAMP", "DAY_TYPE", "POLYLINE"]
    datas = read_csvfile(filepath, rowlist)

    label_file = open("../../data/label_des.csv","r")
    out = open(outpath, "w")

    i = 0
    trip_id, label = get_label(label_file.readline())
    trip_id = int(trip_id)
    for idx, row in datas.iterrows():

        line = json.loads(row['POLYLINE'])
        try:
            lenght = len(line)
            if lenght < 10:
                continue
            else:
                if lenght < 34:
                    data = trip_unit(row)
                    data.append(line[0:5]+line[-5:])
                    while row['TRIP_ID'] != trip_id:
                        trip_id, label = get_label(label_file.readline())
                        trip_id = int(trip_id)
                    data.append(label)
                    out.write(json.dumps(data)+"\n")

                if 34 < lenght:
                    data = trip_unit(row)
                    data.append(line[0:5]+line[int(lenght*0.3)-5:int(lenght*0.3)])
                    while row['TRIP_ID'] != trip_id:
                        trip_id, label = get_label(label_file.readline())
                        trip_id = int(trip_id)
                    data.append(label)
                    out.swrite(json.dumps(data) + "\n")

                if 70 < lenght:
                    data = trip_unit(row)
                    data.append(line[0:5]+line[int(lenght*0.6)-5:int(lenght*0.6)])
                    while row['TRIP_ID'] != trip_id:
                        trip_id, label = get_label(label_file.readline())
                        trip_id = int(trip_id)
                    data.append(label)
                    out.write(json.dumps(data)+"\n")

                if 100 < lenght:
                    data = trip_unit(row)
                    data.append(line[0:5]+line[int(lenght*0.8)-5:int(lenght*0.8)])
                    while row['TRIP_ID'] != trip_id:
                        trip_id, label = get_label(label_file.readline())
                        trip_id = int(trip_id)
                    data.append(label)
                    out.write(json.dumps(data)+"\n")

                if i % 1000 == 999:
                    out.flush()
                    i = 0
                i += 1
        except :
            print 'error line:', line,  trip_id
Exemplo n.º 2
0
def deal_time2we(filepath, outpath="../../data/time2we.csv"):
    rowlist = ['TRIP_ID','TIMESTAMP']
    datas = read_csvfile(filepath, rowlist)
    out = open(outpath, "w")

    we = []
    i = 0
    for line in datas['TIMESTAMP']:
        data = []
        data.append([datas['TRIP_ID'][i]])
        try:
            if line:
                mdhm = time.strftime("%m:%d:%H:%M:%U:%w",time.localtime(int(line))).split(":")
                month,day,hour,minute,weeks, week = mdhm[0],mdhm[1],mdhm[2],mdhm[3],mdhm[4],mdhm[5]
                #print month,day,hour,minute,weeks,week
                hm = int(hour)*6 + int(minute)/10
                #print hm
                out.write(datas['TRIP_ID'][i]+" "+month+" "+day+" "+week+" "+str(hm)+"\n")
                data.append([month,day,week,str(hm)])
                we.append(data)
        except:
            print 'error line:',line
        i += 1
    print we
    return  we
Exemplo n.º 3
0
def deal_des(filepath, outpath="../../data/des.csv"):
    rowlist =['TRIP_ID','POLYLINE']
    datas = read_csvfile(filepath, rowlist)
    out = open(outpath, "w")

    trip = []
    i = 0
    blank = 0
    errornum = 0
    for line in datas['POLYLINE']:
        data = []
        data.append([datas['TRIP_ID'][i]])
        tmplist = json.loads(line)
        try:
            if tmplist:
                x, y = tmplist[-1][0], tmplist[-1][1]
                out.write(str(datas['TRIP_ID'][i])+" "+str(x)+" "+str(y)+"\n")
                data.append(tmplist[-1])
                trip.append(data)
            else:
                blank += 1
        except :
            print tmplist
            errornum += 1

        i += 1
    print "blank:",blank,"valid:",len(trip),"error:",errornum
    return trip
Exemplo n.º 4
0
def latlon2grid(filepath, outpath="../../data/grid_trip.csv"):
    rowlist = ["TRIP_ID", "POLYLINE", "TIMESTAMP"]
    datas = read_csvfile(filepath, rowlist)
    output = open(outpath, "w")
    codes = {}
    for idx, row in datas.iterrows():
        line = json.loads(row["POLYLINE"])
        trip_time = row["TIMESTAMP"]
        # print idx, row["TRIP_ID"], line
        trip = []
        pre = ""
        for point in line:
            code = gh.encode(point[1], point[0], precision=5)
            # print code
            if codes.has_key(code) == False:
                codes[code] = 1
            if pre != code:
                pre = code
                trip.append(code)

        if len(trip) < 2:
            continue
        data = []
        if trip_time:
            mdhm = time.strftime("%m:%d:%H:%M:%U:%w", time.localtime(int(trip_time))).split(":")
            month, day, hour, minute, weeks, week = mdhm[0], mdhm[1], mdhm[2], mdhm[3], mdhm[4], mdhm[5]
            # print month,day,hour,minute,weeks,week
            hm = int(hour) * 6 + int(minute) / 10
            # print hm
            data = [month, day, week, str(hm)]

        output.write(str(row["TRIP_ID"]) + " | ")
        output.write(json.dumps(trip))
        output.write("|" + json.dumps(data))
        output.write("\n")

    print "code has ", str(len(codes))