for i, city in enumerate(cities):
    url = urls[i]
    cdf = create_weather_df(url, http, current_time)
    cdf['city'] = city
    df = df.append(cdf)

df['wind_speed'] = None
df['humidity'] = None
df['precipitation_per'] = None
df['precipitation_l'] = None
df['wind_direction'] = None
df['snow'] = None
df['uvi'] = None

df['website'] = 'https://www.wetter.de'
df['date_of_acquisition'] = current_time.strftime('%Y%m%d%H')
df.date_of_acquisition = df.date_of_acquisition.apply(
    lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(
    lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())

#pkl_name='./wetter_de/daily/'+current_time.strftime('%Y%m%d%H')+'.pkl'
try:
    db_manager.insert_df("DailyPrediction", df)
finally:
    filename = os.path.expanduser(
        '~/Documents/webscraping_2018/data_wetter_de/daily')
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
    filename += timestamp + ".pkl"
    df.to_pickle(filename)
Exemplo n.º 2
0
                wind_text = wind_mapping[wind_text_ger]
            else:
                wind_text = None
            all_features[ci][i][hi][7] = wind_text
            temp_condition = temp_info.find('span',class_="temperature-condition").text
            all_features[ci][i][hi][8] = temp_condition
all_features = all_features.reshape(number_of_predictions,number_of_features)

hourly_dict['city'] = list(all_features[:,0])
hourly_dict['date_for_which_weather_is_predicted'] = list(all_features[:,1])
hourly_dict['temperature'] = list(all_features[:,2])
hourly_dict['wind_speed'] = list(all_features[:,3])
hourly_dict['humidity'] = list(all_features[:,4])
hourly_dict['precipitation_per'] = list(all_features[:,5])
hourly_dict['precipitation_l'] = list(all_features[:,6])
hourly_dict['wind_direction'] = list(all_features[:,7])
hourly_dict['condition'] = list(all_features[:,8])
hourly_dict['snow'] = [None]*number_of_predictions
hourly_dict['uvi'] = [None]*number_of_predictions

df = pd.DataFrame(data=hourly_dict)
df.date_of_acquisition = df.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H').date())
df.date_for_which_weather_is_predicted = df.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(x, '%Y%m%d%H%M').date())
try:
    db_manager.insert_df("HourlyPrediction", df)
finally:
    filename = os.path.expanduser('~/Documents/webscraping_2018/data_wetter_de/hourly_period_')
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
    filename += timestamp + ".pkl"
    df.to_pickle(filename)
Exemplo n.º 3
0
        else:
            daily_dict['precipitation_per'].append(None)

        daily_dict['precipitation_l'].append(None)
        daily_dict['condition'].append(None)
        daily_dict['snow'].append(None)
        daily_dict['uvi'].append(None)

#convert to dataframe and save to file
df_daily = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in daily_dict.items() ]))
print(df_daily)
df_daily.date_of_acquisition = df_daily.date_of_acquisition.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date())
print(df_daily.date_for_which_weather_is_predicted)
df_daily.date_for_which_weather_is_predicted = df_daily.date_for_which_weather_is_predicted.apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d%H').date())
try:
    db_manager.insert_df("DailyPrediction", df_daily)
finally:
    filename = os.path.expanduser('~/Documents/webscraping_2018/data_bild/daily/daily_')
    timestamp = datetime.datetime.now().strftime('%Y%m%d%H')
    filename += timestamp + ".pkl"
    df_daily.to_pickle(filename)

#SECOND PART: FOUR-TIMES-A-DAY PREDICTIONS
#scrape specified cities for morning, noon, afternoon, night, extract temperature,
# precipitation in percent and condition

PREDICTION_TIMES = [datetime.timedelta(days=0, hours=8), #morning
                    datetime.timedelta(days=0, hours=14), #afternoon
                    datetime.timedelta(days=0, hours=20), #evening
                    datetime.timedelta(days=1, hours=2)] #night (tomorrow)