示例#1
0
def df_stop_frequency(direction, for_lines=['4','5','6'], fname="default", write_df_root="stopFreq", dt=120):
        D = load_df(fname)

        get_stops = _get_stops(direction=direction, set_range="all")
        D = filter_stops(D, get_stops, for_lines)
        endpoint = "N"
        origin = "S"
        if lower(direction)[0] == "s":
                endpoint = "S"
                origin = "N"
        elif lower(direction)[0] != "n":
                print "Specified direction",direction,"not recognized; forcing NORTH"
        #station_codes = get_stop_dict(direction)
        times = D['timestamp'].unique()
	t_max = times.max()
	t_min = times.min()
	print t_min,t_max
	nbins = int((t_max - t_min)/dt) + 1
	set_index = [int(t_min+n*dt) for n in range(nbins)]
        stops = D['stop'].unique()
	print "Filtered DF contains stops",stops
	df_freq = pd.DataFrame(index=set_index, columns=stops)
	df_freq = df_freq.fillna(0.)

	for i in D.index:
		t_bin = int((D.loc[i,'timestamp'] - t_min)/dt)*dt + int(t_min)
		df_freq.loc[t_bin, D.loc[i,'stop']] = df_freq.loc[t_bin, D.loc[i,'stop']] + 1
	df_freq.to_csv(write_df_root+".csv")
	return df_freq
示例#2
0
def df_trips_by_column(D, direction="N", for_lines=['4','5','6'], fname="default", write_df_root="tripData"):
        D['tref'] = D['timestamp'].map(lambda t: get_TOD_reference(t))
        # This is cumbersome and probably inefficient...
        # But it's what I do to generate unique trip_ids to manipulate
        # Probably want to end up doing this as a pre-processing step
        D['long_id'] = D['id'] + "::" + D['tref'].astype('string')

        stops = _get_stops(direction=direction, set_range="all")
        D = filter_stops(D, stops, for_lines)
        endpoint = "N"
        origin = "S"
        if lower(direction)[0] == "s":
                endpoint = "S"
                origin = "N"
        elif lower(direction)[0] != "n":
                print "Specified direction",direction,"not recognized; forcing NORTH"
        station_codes = get_stop_dict(direction)

        trips = D['long_id'].unique()
        stops = D['stop'].unique()

        tripCol = pd.DataFrame(index=['line','trip_time','tref']+list(stops), columns = trips)
        tripTimes = pd.DataFrame(index = trips, columns = ['line','trip_time','time_of_day'])
        stopCounts = pd.DataFrame(index=list(stops), columns = trips)

        ## Should profile these loops and speed it up
        for trip in trips:
                D_trip = D[D['long_id']==trip]
                l = get_line(trip)
                tripCol.loc['line',trip] = l
                tripCol.loc['tref',trip] = D_trip['tref'].values[0]
                tripTimes.loc[trip,'line'] = l
                tripTimes.loc[trip,'time_of_day'] = l
                for stop in D_trip['stop'].unique():
                        D_trip_for_stop = D_trip[D_trip['stop']==stop]
                        tripCol.loc[stop,trip] = D_trip_for_stop['timestamp'].max()
			stopCounts.loc[stop,trip] = len(D_trip_for_stop)
                trip_time = tripCol.loc[station_codes[stop_for(l,endpoint)],trip] - \
                        tripCol.loc[station_codes[stop_for(l,origin)],trip]
                tripCol.loc['trip_time',trip] = trip_time
                tripTimes.loc[trip,'trip_time'] = trip_time
                tripTimes.loc[trip,'time_of_day'] = \
                        TOD_value(tripCol.loc[station_codes[stop_for(l,origin)],\
                        trip],tripCol.loc['tref',trip])/3600.

        print "created DataFrames with data compiled by trip; writing to files with root", write_df_root
        tripCol.to_csv(write_df_root + "_verbose.csv")
        tripTimes.to_csv(write_df_root + "_trip_times.csv")
        stopCounts.to_csv(write_df_root + "_stop_counts.csv")
        return tripCol, tripTimes, stopCounts