def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 2,500みたいなデータがあるので取り除く。 data_frame['demand'] = data_frame['demand'].astype(str).str.replace( ',', '').astype(int) data_frame['total_supply_capacity'] = data_frame[ 'total_supply_capacity'].astype(str).str.replace(',', '').astype(int) # 後続で計算できないのでfloatに変換している。 data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace( ',', '').astype(float) # 他の電力に合わせて万kwからMWhに揃える。 DataFrameFunction.to_mwh(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # TOTAL算出 Total Supply Capacity data_frame[ 'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity( data_frame) # 後続で計算できないのでfloatに変換している。 data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(str).str.replace(',', '') data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(str).str.replace('−', '0') data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(float) data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(str).str.replace(',', '') data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(str).str.replace('−', '0') data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(float) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # tohokuepcoは、日時で持っているのでTepcoに合わせて分割する。 DataFrameFunction.create_date_and_time_from_datetime(data_frame) # Date型に変換しておく。 data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M') # TOTAL算出 Total Supply Capacity data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 沖縄にない電力項目は0で埋める。 data_frame['nuclear'] = 0 data_frame['geothermal'] = 0 data_frame['pumping'] = 0 data_frame['interconnection'] = 0 processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # TOTAL算出 Total Supply Capacity data_frame[ 'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity( data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 後続で計算できないのでfloatに変換している。 # 地熱がハイフンなので0扱いにする。 data_frame['geothermal'] = data_frame['geothermal'].astype( str).str.replace('-', '0').astype(float) # 他の電力に合わせて万kwからMWhに揃える。 DataFrameFunction.to_mwh(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def get_daily_data(cls, root_path, company_name, unit, year_value, month_value, date_value): data_frame = DataFrameFunction.get_data_frame_from_merged_pkl( root_path, company_name) target_date = datetime(int(year_value), int(month_value), int(date_value)) data_frame = data_frame[(data_frame['date'] == target_date)] result = None if unit == '1H': result = data_frame.to_json(date_format='iso').replace( 'T', ' ').replace(':00.000Z', '') return json.loads(result)
def sum_group_by_year(cls, data_frame): df_y = data_frame.set_index([data_frame.index.year]) df_y.index.names = ['year'] df_y.sort_index(inplace=True) try: result = df_y.sum(level=['year'])[[ 'demand', 'nuclear', 'thermal', 'hydro', 'geothermal', 'biomass', 'solar', 'solar_output_control', 'wind', 'wind_output_control', 'pumping', 'interconnection', 'total_supply_capacity' ]] except Exception as e: raise e return DataFrameFunction.to_float_and_round(result).to_json()
def correct_data(cls, urls, root_path, reflesh): processed_pkl_paths = [] for url in urls: try: pkl_file_name = FileFunction.get_pkl_file_name(url) original_pkl_path = cls.__correct_ex_data(root_path, pkl_file_name, url, reflesh) processed_pkl_path = cls.__process_ex_data(original_pkl_path, root_path, pkl_file_name) processed_pkl_paths.append(processed_pkl_path) except Exception as e: print(f'{pkl_file_name} => {e}') raise e merged_pkl_path = DataFrameFunction.merge_ex_data(processed_pkl_paths, root_path, cls.COMPANY_NAME) return merged_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # Kyudenは、日時で持っているのでTepcoに合わせて分割する。 DataFrameFunction.create_date_and_time_from_datetime(data_frame) # 2,500みたいなデータがあるので取り除く。 data_frame['demand'] = data_frame['demand'].astype(str).str.replace(',', '').astype(float) # 後続で計算できないのでfloatに変換している。 data_frame['nuclear'] = data_frame['nuclear'].astype(str).str.replace(',', '').astype(float) data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace(',', '').astype(float) data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(str).str.replace('None', '0') data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace('nan', '0') data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace(',', '') data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(float) data_frame['pumping'] = data_frame['pumping'].astype(str).str.replace('None', '0') data_frame['pumping'] = data_frame['pumping'].str.replace('nan', '0') data_frame['pumping'] = data_frame['pumping'].str.replace(',', '') data_frame['pumping'] = data_frame['pumping'].astype(float) data_frame['interconnection'] = data_frame['interconnection'].astype(str).str.replace('None', '0') data_frame['interconnection'] = data_frame['interconnection'].str.replace('nan', '0') data_frame['interconnection'] = data_frame['interconnection'].str.replace(',', '') data_frame['interconnection'] = data_frame['interconnection'].astype(float) # Date型に変換しておく。 data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M') # TOTAL算出 Total Supply Capacity data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def correct_data(cls, urls, root_path, reflesh): processed_pkl_paths = [] number = cls.KEY_NUMBER for url in urls: try: target_url = f'{cls.ROOT_URL}{number}/{url}' pkl_file_name = FileFunction.get_pkl_file_name(target_url) original_pkl_path = cls.__correct_ex_data(root_path, pkl_file_name, target_url, reflesh) processed_pkl_path = cls.__process_ex_data(original_pkl_path, root_path, pkl_file_name) processed_pkl_paths.append(processed_pkl_path) number = number + 1 except Exception as e: print(f'{pkl_file_name} => {e}') raise e merged_pkl_path = DataFrameFunction.merge_ex_data(processed_pkl_paths, root_path, cls.COMPANY_NAME) return merged_pkl_path
def sum_group_by_year_and_month_and_date(cls, data_frame): data_frame['date_string'] = data_frame['date'].dt.strftime('%Y/%m/%d') df_ymd = data_frame.set_index([ data_frame.index.year, data_frame.index.month, data_frame.index.date ]) df_ymd.index.names = ['year', 'month', 'date_string'] df_ymd.sort_index(inplace=True) try: result = df_ymd.sum(level=['date_string'])[[ 'demand', 'nuclear', 'thermal', 'hydro', 'geothermal', 'biomass', 'solar', 'solar_output_control', 'wind', 'wind_output_control', 'pumping', 'interconnection', 'total_supply_capacity' ]] except Exception as e: raise e return DataFrameFunction.to_float_and_round(result).to_json( date_format='iso').replace('T00:00:00.000Z', '')
def get(cls, root_path, company_name, unit, from_value, to_value): data_frame = DataFrameFunction.get_data_frame_from_merged_pkl( root_path, company_name) from_values = from_value.split('/') to_values = to_value.split('/') from_date = datetime(int(from_values[0]), int(from_values[1]), 1) to_date = datetime(int(to_values[0]), int(to_values[1]), 1) + relativedelta(months=1, days=-1) data_frame = data_frame[(from_date <= data_frame['date']) & (data_frame['date'] <= to_date)] result = None if unit == 'y': result = cls.sum_group_by_year(data_frame) elif unit == 'ym': result = cls.sum_group_by_year_and_month(data_frame) elif unit == 'ymd': result = cls.sum_group_by_year_and_month_and_date(data_frame) return json.loads(result)
def count(cls, root_path, company_name): data_frame = DataFrameFunction.get_data_frame_from_merged_pkl( root_path, company_name) return len(data_frame.index)
def correct_data(cls, root_path, merged_pkl_path_list): DataFrameFunction.merge_japan_data(root_path, cls.COMPANY_NAME, merged_pkl_path_list)