示例#1
0
def get_stop_link(stopA, stopB, src='file', merge_weather=False):
    """
    Almost redundant,or possibly still used by the Big Route Model. Use stop_tools.stop_data() instead
    Retrieves the data describing the link between two stops
    """
    import os
    import pandas as pd
    from dbanalysis import headers as hds
    if src == 'file':
        if not os.path.exists('/data/stops/' + str(stopA) + '/' + str(stopB) +
                              '.csv'):
            print('Error - stop link data not on disk')
            return None
        else:
            df = pd.read_csv('/home/student/data/stops/' + str(stopA) + '/' +
                             str(stopB) + '.csv',
                             names=hds.get_stop_link_headers())
            df['stopA'] = stopA
            df['stopB'] = stopB
            len_df_1 = len(df)
    elif src == 'db':
        #insert method here for grabbing data from database
        pass

    if merge_weather:
        #merge data with weather data .csv
        weather = pd.read_csv('/home/student/data/cleanweather.csv')
        weather['date'] = pd.to_datetime(weather['date'])
        weather['hour'] = weather['date'].dt.hour
        weather['date'] = weather['date'].dt.date
        df['dt'] = pd.to_datetime(df['dayofservice'],
                                  format="%d-%b-%y %H:%M:%S")
        df['date'] = df['dt'].dt.date
        df['hour'] = df['actualtime_arr_from'] // 3600

        cols = [
            'dayofservice', 'tripid', 'plannedtime_arr_from',
            'plannedtime_dep_from', 'actualtime_arr_from',
            'actualtime_dep_from', 'plannedtime_arr_to', 'actualtime_arr_to',
            'routeid', 'stopA', 'stopB', 'hour', 'dewpt', 'msl', 'rain',
            'rhum', 'temp', 'vappr', 'wetb'
        ]
        a = pd.merge(df, weather, on=['date', 'hour'])[cols]
        len_df2 = len(a)
        print(len_df_1, len_df2)
        return a

    else:
        return df
示例#2
0
def prep_test_stop(filename,weather,fromstop,tostop):
    from dbanalysis import headers as hds
    s_getter = stop_getter()
    df=pd.read_csv(filename,names=hds.get_stop_link_headers())
    df['fromstop']=fromstop
    df['tostop']=tostop
    df['traveltime']=df['actualtime_arr_to']-df['actualtime_arr_from']
    df['distance'] = s_getter.get_stop_distance(fromstop,tostop)
    df['speed'] = df['distance'] / (df['traveltime']/3600)
   
    df['dt']=pd.to_datetime(df['dayofservice'],format= "%d-%b-%y %H:%M:%S")
    df['date']=df['dt'].dt.date
    df['day'] = df['dt'].dt.dayofweek 
    df['month'] = df['dt'].dt.month
    df['hour']=df['actualtime_arr_from']//3600
    df['year'] = df['dt'].dt.year
    weather.drop('dt', axis=1,inplace=True)
    df = pd.merge(df,weather, on=['date','hour'])
    del weather
    del s_getter
    return df.dropna()
        d[str(day)+'_'+str(hour)+'speed']=[]



from dbanalysis import headers as hds
stop_dirs = os.listdir('/home/student/data/stops')
for fromstop in stop_dirs:
    
    for tostop in os.listdir('/home/student/data/stops/'+fromstop):
        
        if tostop != 'orphans.csv':
            
            ts = int(tostop.split('.')[0])
            fs=int(fromstop)
            
            df = pd.read_csv('/home/student/data/stops/'+fromstop+'/'+tostop,names=hds.get_stop_link_headers())
            df=prep_stop2(df,fs,ts)
            d['fromstop'].append(fs)
            d['tostop'].append(ts)
            for day in range(7):
    
                for hour in range(24):
        
                    d[str(day)+'_'+str(hour)+'dwell'].append(df[str(day)+'_'+str(hour)+'dwell'])
                    d[str(day)+'_'+str(hour)+'speed'].append(df[str(day)+'_'+str(hour)+'speed'])


import pickle
with open('/home/student/dbanalysis/dbanalysis/resources/dwellmatrix.pickle','wb') as handle:
    pickle.dump(d,handle,protocol=pickle.HIGHEST_PROTOCOL)
    for sB in files:
        count += 1
        print(count)
        if count % 100 == 0:
            print(count)
        stopB = sB.split('.')[0]
        d[stopA][stopB] = {}
        distance = s_getter.get_stop_distance(stopA, stopB)

        if (not isinstance(distance, float)) and (not isinstance(
                distance, int)):

            continue
        d[stopA][stopB]['distance'] = distance
        df = pd.read_csv(base_dir + '/' + stopA + '/' + sB,
                         names=hds.get_stop_link_headers())
        df['dt'] = pd.to_datetime(df['dayofservice'], format=time_format)
        df['hour'] = df['actualtime_arr_from'] // 3600

        df['day'] = df['dt'].dt.dayofweek
        df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from']

        #big mistake here

        for day in range(0, 7):
            d[stopA][stopB][day] = {}

            for hour in range(0, 24):
                x = df[(df['day'] == day) & (df['hour'] == hour)]
                if x.shape[0] > 1:
def get_stop_link(stopA,stopB, src='file',merge_weather=False):
    
    """
    Retrieve the data describing the link between two stops
    """
    import os
    import pandas as pd
    from dbanalysis import headers as hds
    if src== 'file':
        if not os.path.exists('/home/student/data/stops/'+str(stopA) +'/' + str(stopB) +'.csv'):
            print('Error - stop link data not on disk')
            return None
        else:
            df=pd.read_csv('/home/student/data/stops/'+str(stopA) +'/' + str(stopB) +'.csv', names=hds.get_stop_link_headers())
            df['stopA'] = stopA
            df['stopB'] = stopB
            
    elif src=='db':
        pass
 
    if merge_weather:
        weather = pd.read_csv('/home/student/data/cleanweather.csv')
        weather['date']=pd.to_datetime(weather['date'])
        weather['hour']=weather['date'].dt.hour
        weather['date']=weather['date'].dt.date
        df['dt']=pd.to_datetime(df['dayofservice'],format="%d-%b-%y %H:%M:%S")
        df['date']=df['dt'].dt.date
        df['hour']=df['actualtime_arr_from']//3600
        
        cols=['dayofservice', 'tripid', 'plannedtime_arr_from',
       'plannedtime_dep_from', 'actualtime_arr_from', 'actualtime_dep_from',
       'plannedtime_arr_to', 'actualtime_arr_to', 'routeid', 'stopA', 'stopB','hour', 'dewpt', 'msl', 'rain', 'rhum', 'temp', 'vappr',
       'wetb']
        return pd.merge(df,weather,on=['date','hour'])[cols]

    else:
        return df