def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 2,500みたいなデータがあるので取り除く。 data_frame['demand'] = data_frame['demand'].astype(str).str.replace( ',', '').astype(int) data_frame['total_supply_capacity'] = data_frame[ 'total_supply_capacity'].astype(str).str.replace(',', '').astype(int) # 後続で計算できないのでfloatに変換している。 data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace( ',', '').astype(float) # 他の電力に合わせて万kwからMWhに揃える。 DataFrameFunction.to_mwh(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # TOTAL算出 Total Supply Capacity data_frame[ 'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity( data_frame) # 後続で計算できないのでfloatに変換している。 data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(str).str.replace(',', '') data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(str).str.replace('−', '0') data_frame['solar_output_control'] = data_frame[ 'solar_output_control'].astype(float) data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(str).str.replace(',', '') data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(str).str.replace('−', '0') data_frame['wind_output_control'] = data_frame[ 'wind_output_control'].astype(float) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) data_frame = cls.__parse(decoded_data) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # 「2016/10/1 1:00」とか余計な半角が存在するデータがあるので加工する。 tohokuepco_csv = cls.__get_tohokuepco_csv(decoded_data) data_frame = cls.__parse(tohokuepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # okidenは、空行が入っていたりするので、csv読み込み前にデータ補正が必要。 okiden_csv = cls.__get_okiden_csv(decoded_data) data_frame = cls.__parse(okiden_csv) data_frame.reset_index() data_frame.reset_index() FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): decoded_data = FileFunction.get_decoded_data(url) # 末尾に余計な「,」が入っていることがあるので除去する。 # スキップすべき行が違うこともあるので対応する。 rikuden_csv = cls.__get_rikuden_csv(decoded_data) data_frame = cls.__parse(rikuden_csv) data_frame.reset_index() FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # tohokuepcoは、日時で持っているのでTepcoに合わせて分割する。 DataFrameFunction.create_date_and_time_from_datetime(data_frame) # Date型に変換しておく。 data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M') # TOTAL算出 Total Supply Capacity data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 沖縄にない電力項目は0で埋める。 data_frame['nuclear'] = 0 data_frame['geothermal'] = 0 data_frame['pumping'] = 0 data_frame['interconnection'] = 0 processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # TOTAL算出 Total Supply Capacity data_frame[ 'total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity( data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): data_frame_from_xls = pandas.read_excel( url, header=None, index_col=None, skiprows=[0, 1, 2, 3, 4, 5, 6, 7, 8]) # 最終行は不要なので削除する。 data_frame_from_xls.drop(data_frame_from_xls.tail(1).index, inplace=True) # Excelから読み込みやすいフォーマットに加工する。 yonden_csv = cls.__create_yonden_csv_from_xls( data_frame_from_xls.to_csv(index=False)) data_frame = cls.__parse(yonden_csv) print(data_frame) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl( original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # DateとTimeで分割されているので結合した項目を作る。 DataFrameFunction.generate_data_time_field(data_frame) data_frame.set_index('date_time') # 後続で計算できないのでfloatに変換している。 # 地熱がハイフンなので0扱いにする。 data_frame['geothermal'] = data_frame['geothermal'].astype( str).str.replace('-', '0').astype(float) # 他の電力に合わせて万kwからMWhに揃える。 DataFrameFunction.to_mwh(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path
def __correct_ex_data(cls, root_path, pkl_file_name, url, reflesh): original_pkl_path = FileFunction.get_original_pkl_path( root_path, cls.COMPANY_NAME, pkl_file_name) if reflesh or not os.path.exists(original_pkl_path): if '.xls' in url: data_frame_from_xls = pandas.read_excel(url, header=None, index_col=None, skiprows=[0, 1, 2, 3]) hepco_csv = cls.__create_hepco_csv_from_xls( data_frame_from_xls.to_csv(index=False)) data_frame = cls.__parse_csv_from_xls(hepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) else: decoded_data = FileFunction.get_decoded_data(url) # hepcoは、日時周りのフォーマットが他と違うので、csv読み込み前にデータ補正が必要。 hepco_csv = cls.__get_hepco_csv(decoded_data) data_frame = cls.__parse(hepco_csv) FileFunction.create_pkl_file(original_pkl_path, data_frame) return original_pkl_path
def __process_ex_data(cls, original_pkl_path, root_path, pkl_file_name): data_frame = DataFrameFunction.get_data_frame_from_pkl(original_pkl_path) data_frame['company'] = cls.COMPANY_NAME # Kyudenは、日時で持っているのでTepcoに合わせて分割する。 DataFrameFunction.create_date_and_time_from_datetime(data_frame) # 2,500みたいなデータがあるので取り除く。 data_frame['demand'] = data_frame['demand'].astype(str).str.replace(',', '').astype(float) # 後続で計算できないのでfloatに変換している。 data_frame['nuclear'] = data_frame['nuclear'].astype(str).str.replace(',', '').astype(float) data_frame['thermal'] = data_frame['thermal'].astype(str).str.replace(',', '').astype(float) data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(str).str.replace('None', '0') data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace('nan', '0') data_frame['solar_output_control'] = data_frame['solar_output_control'].str.replace(',', '') data_frame['solar_output_control'] = data_frame['solar_output_control'].astype(float) data_frame['pumping'] = data_frame['pumping'].astype(str).str.replace('None', '0') data_frame['pumping'] = data_frame['pumping'].str.replace('nan', '0') data_frame['pumping'] = data_frame['pumping'].str.replace(',', '') data_frame['pumping'] = data_frame['pumping'].astype(float) data_frame['interconnection'] = data_frame['interconnection'].astype(str).str.replace('None', '0') data_frame['interconnection'] = data_frame['interconnection'].str.replace('nan', '0') data_frame['interconnection'] = data_frame['interconnection'].str.replace(',', '') data_frame['interconnection'] = data_frame['interconnection'].astype(float) # Date型に変換しておく。 data_frame['date_time'] = pandas.to_datetime(data_frame['date_time'], format='%Y/%m/%d %H:%M') # TOTAL算出 Total Supply Capacity data_frame['total_supply_capacity'] = DataFrameFunction.get_total_supply_capacity(data_frame) processed_pkl_path = FileFunction.get_processed_pkl_path(root_path, cls.COMPANY_NAME, pkl_file_name) FileFunction.create_pkl_file(processed_pkl_path, data_frame) return processed_pkl_path