def merge_csv_page_selected(ui): csv_file_name = ui.merge_csv_file_name ui.merge_timestamp_combobox.clear() ui.merge_location_combobox.clear() ui.merge_tableview.setModel(None) if csv_file_name != '': ui.merge_file = csv_file_name try: with open(csv_file_name, newline='') as csv_file: reader = csv.reader(csv_file, delimiter=',') columns_count = len(next(reader)) csv_file.seek(0) sniffer = csv.Sniffer() has_header = sniffer.has_header(csv_file.read(2048)) if not has_header: headers = [] for number in range(columns_count): headers.append("Column " + str(number + 1)) df = pd.read_csv(csv_file_name, names=headers, nrows=10) else: df = pd.read_csv(csv_file_name, nrows=10) except Exception as e: error(str(e)) return 'failure' header_list = list(df.columns) model = PandasModel(df) ui.merge_tableview.setModel(model) ui.merge_timestamp_combobox.addItems(header_list) ui.merge_location_combobox.addItems(header_list) return 'success' else: return 'failure'
def list_all_forecast_datasets(forecast_client, dataset_group_arn): """Lists all datasets inside a dataset group""" try: response = forecast_client.describe_dataset_group( DatasetGroupArn=dataset_group_arn ) except Exception as e: error(str(e)) datasets_arns = response['DatasetArns'] dataset_arn_dict = {} dataset_type_dict = {} dataset_status = {} for dataset_arn in datasets_arns: dataset_response = forecast_client.describe_dataset( DatasetArn=dataset_arn ) if dataset_response['Status'] == 'ACTIVE': dataset_arn_dict[dataset_response['DatasetName']] = dataset_response['DatasetArn'] dataset_type_dict[dataset_response['DatasetName']] = dataset_response['DatasetType'] dataset_status[dataset_response['DatasetName']] = 'ACTIVE' elif dataset_response['Status'] == 'UPDATE_IN_PROGRESS': dataset_arn_dict[dataset_response['DatasetName']] = dataset_response['DatasetArn'] dataset_type_dict[dataset_response['DatasetName']] = dataset_response['DatasetType'] dataset_status[dataset_response['DatasetName']] = 'CREATING' return dataset_arn_dict, dataset_type_dict, dataset_status
def create_crawler(glue_client, role, s3_path, name): try: response = glue_client.create_crawler( Name=name, Role=role, DatabaseName='retail_demand_forecasting_db', Targets={'S3Targets': [{ 'Path': s3_path }]}, SchemaChangePolicy={ 'UpdateBehavior': 'UPDATE_IN_DATABASE', 'DeleteBehavior': 'DELETE_FROM_DATABASE' }) return 'success' except glue_client.exceptions.AlreadyExistsException: try: response = glue_client.update_crawler( Name=name, Role=role, DatabaseName='retail_demand_forecasting_db', Targets={'S3Targets': [{ 'Path': s3_path, }]}, SchemaChangePolicy={ 'UpdateBehavior': 'UPDATE_IN_DATABASE', 'DeleteBehavior': 'DELETE_FROM_DATABASE' }) return 'success' except Exception as e: error(str(e)) return 'failure' except Exception as e: error(str(e)) return 'failure'
def create_dataset_import_job(forecast_client, dataset_name, dataset_arn, s3_path, role_arn): """Creates a new dataset import job""" dataset_import_job_number = 1 date = str(datetime.date(datetime.now())) date = date.replace('-', '') # Increment import job number if one already exists with this name while True: dataset_import_name = dataset_name + date + '_' + str(dataset_import_job_number) try: response = forecast_client.create_dataset_import_job( DatasetImportJobName=dataset_import_name, DatasetArn=dataset_arn, DataSource={ 'S3Config': { 'Path': s3_path, 'RoleArn': role_arn, } }, TimestampFormat='yyyy-MM-dd HH:mm:ss' ) return 'success' except forecast_client.exceptions.ResourceAlreadyExistsException: dataset_import_job_number += 1 except Exception as e: error(str(e)) return 'failure'
def lookup_item_clicked(ui): # Get item id and location if enabled item_id = ui.forecast_view_item_id_line_edit.text() location = None if ui.use_location: location = ui.forecast_view_location_line_edit.text() # Run Query on AWS Athena with the provided item_id and location data_file_name, status = run_forecast_query(ui.athena_client, ui.selected_forecast, ui.s3_bucket_path, item_id, location) if status == 'failure': return 'status' key = 'queries/' object_name = key + data_file_name downloading_path = os.path.join(ui.tmp_dir_name, data_file_name) try: ui.s3_client.download_file(ui.s3_bucket_name, object_name, downloading_path) df = pd.read_csv(downloading_path) ui.draw_df(df) return 'success' except Exception as e: error(str(e)) return 'failure'
def create_dataset_group(forecast_client, dataset_group_name): """Creates a new dataset group""" try: response = forecast_client.create_dataset_group(DatasetGroupName=dataset_group_name, Domain="RETAIL") except Exception as e: error(str(e))
def create_database(glue_client): try: response = glue_client.create_database( DatabaseInput={'Name': 'retail_demand_forecasting_db'}) except glue_client.exceptions.AlreadyExistsException: return 'success' except Exception as e: error(str(e)) return 'failure'
def link_dataset_to_dataset_group(forecast_client, dataset_arn_list, dataset_group_arn): """Connects a newly created dataset to a dataset group""" try: response = forecast_client.update_dataset_group( DatasetGroupArn=dataset_group_arn, DatasetArns=dataset_arn_list ) return 'success' except Exception as e: error(str(e)) return 'failure'
def create_forecast(forecast_client, forecast_name, predictor_arn): """Creates a new forecast""" try: response = forecast_client.create_forecast( ForecastName=forecast_name, PredictorArn=predictor_arn ) return response['ForecastArn'], 'success' except Exception as e: error(str(e)) return None, 'failure'
def multi_part_upload_with_s3(s3, bucket_name, key_path, file_path, progress_bar): # Multipart upload try: config = TransferConfig(multipart_threshold=1024 * 25, max_concurrency=10, multipart_chunksize=1024 * 25, use_threads=True) s3.meta.client.upload_file(file_path, bucket_name, key_path, Config=config, Callback=ProgressPercentage(file_path, progress_bar)) return 'success' except ClientError as e: worker_error_page('Access Denied!', str(e)) return 'failure' except Exception as e: error(str(e)) return 'failure'
def run_forecast_query(athena_client, table_name, s3_path, item_id, location=None): try: print(s3_path + '/queries/') if location is None: response = athena_client.start_query_execution( QueryString='SELECT * FROM ' + table_name + ' WHERE cast(item_id as varchar) = \'' + item_id + '\'', ResultConfiguration={'OutputLocation': s3_path + '/queries/'}, QueryExecutionContext={ 'Database': 'retail_demand_forecasting_db', 'Catalog': 'AwsDataCatalog' }, ) else: response = athena_client.start_query_execution( QueryString='SELECT * FROM ' + table_name + ' WHERE cast(item_id as varchar) = \'' + item_id + '\' AND cast(location as varchar) = \'' + location + '\'', ResultConfiguration={'OutputLocation': s3_path + '/queries/'}, QueryExecutionContext={ 'Database': 'retail_demand_forecasting_db', 'Catalog': 'AwsDataCatalog' }, ) execution_id = response["QueryExecutionId"] response = athena_client.get_query_execution( QueryExecutionId=execution_id) query_execution = response['QueryExecution'] query_status = query_execution['Status'] state = query_status['State'] while state != 'SUCCEEDED' and state != 'FAILED' and state != 'CANCELLED': time.sleep(0.5) response = athena_client.get_query_execution( QueryExecutionId=execution_id) query_execution = response['QueryExecution'] query_status = query_execution['Status'] state = query_status['State'] if state == 'SUCCEEDED': data_file_name = f'{execution_id}.csv' return data_file_name, 'success' else: raise Exception("Query Failed or Cancelled") except Exception as e: error(str(e)) return None, 'failure'
def run_crawler(name, glue_client): try: response = glue_client.start_crawler(Name=name) time.sleep(5) response = glue_client.get_crawler(Name=name) crawler = response['Crawler'] while crawler['State'] != 'READY': response = glue_client.get_crawler(Name=name) crawler = response['Crawler'] time.sleep(5) return 'success' except Exception as e: error(str(e)) return 'failure'
def list_all_forecast_dataset_groups(forecast_client): """List all dataset groups inside AWS Forecast""" try: response = forecast_client.list_dataset_groups() dataset_groups = response['DatasetGroups'] retail_dataset_groups = {} for dataset_group in dataset_groups: dataset_group_arn = dataset_group['DatasetGroupArn'] dataset_response = forecast_client.describe_dataset_group( DatasetGroupArn=dataset_group_arn ) if dataset_response['Domain'] == 'RETAIL': retail_dataset_groups[dataset_group['DatasetGroupName']] = dataset_group['DatasetGroupArn'] return retail_dataset_groups except Exception as e: error(str(e))
def create_metadata_dataset(forecast_client, dataset_name, attributes): try: """Creates a new metadata dataset""" response = forecast_client.create_dataset( DatasetName=dataset_name, Domain='RETAIL', DatasetType='ITEM_METADATA', Schema={ 'Attributes': attributes, } ) return response['DatasetArn'], 'success' except Exception as e: error(str(e)) return None, 'failure'
def is_location_in_table(athena_client, table_name): try: response = athena_client.get_table_metadata( CatalogName='AwsDataCatalog', DatabaseName='retail_demand_forecasting_db', TableName=table_name) except Exception as e: error(str(e)) return None, 'failure' table_meta_data = response['TableMetadata'] columns = table_meta_data['Columns'] location_in_table = False for column in columns: if column['Name'] == 'location': location_in_table = True return location_in_table, 'success'
def list_objects_in_dataset_bucket(session, region, bucket_name, dataset_group_name): try: s3_client = session.client( 's3', region_name=region ) response = s3_client.list_objects_v2( Bucket=bucket_name, Prefix=dataset_group_name + '/datasets/' ) objects_list = [] if 'Contents' in response: for dataset in response['Contents']: objects_list.append(dataset['Key']) return objects_list, 'success' except Exception as e: error(str(e)) return None, 'failure'
def create_related_dataset(forecast_client, dataset_name, data_frequency, attributes): try: """Creates a new related dataset""" response = forecast_client.create_dataset( DatasetName=dataset_name, Domain='RETAIL', DatasetType='RELATED_TIME_SERIES', DataFrequency=data_frequency, Schema={ 'Attributes': attributes, } ) return response['DatasetArn'], 'success' except Exception as e: error(str(e)) return None, 'failure'
def create_forecast_export(forecast_client, forecast_export_name, forecast_arn, s3_path, s3_role_arn): """Creates a new forecast export""" try: response = forecast_client.create_forecast_export_job( ForecastExportJobName=forecast_export_name, ForecastArn=forecast_arn, Destination={ 'S3Config': { 'Path': s3_path, 'RoleArn': s3_role_arn, } }, ) return response['ForecastExportJobArn'], 'success' except Exception as e: error(str(e)) return None, 'failure'
def create_predictor(forecast_client, predictor_name, algorithm, auto_ml, forecast_horizon, forecast_frequency, dataset_group_arn, location_in_datasets=False, holidays=None): """Creates a new predictor""" predictor_attributes = {'PredictorName': predictor_name, 'ForecastHorizon': forecast_horizon} if auto_ml: predictor_attributes['PerformAutoML'] = True else: predictor_attributes['AlgorithmArn'] = algorithm if holidays is None: predictor_attributes['InputDataConfig'] = { 'DatasetGroupArn': dataset_group_arn } else: predictor_attributes['InputDataConfig'] = { 'DatasetGroupArn': dataset_group_arn, 'SupplementaryFeatures': [{ 'Name': 'holiday', 'Value': holidays } ] } if location_in_datasets: predictor_attributes['FeaturizationConfig'] = { 'ForecastFrequency': forecast_frequency, 'ForecastDimensions': [ 'location', ] } else: predictor_attributes['FeaturizationConfig'] = { 'ForecastFrequency': forecast_frequency } try: response = forecast_client.create_predictor(**predictor_attributes) return response['PredictorArn'], 'success' except Exception as e: error(str(e)) return None, 'failure'
def upload_and_create_dataset(ui): if ui.current_dataset_type != 'METADATA': if ui.frequency == 'none': status = ui.frequency = combobox_to_freq(ui, 'DATASET') if status == 'failure': error("Failed to identify frequency") return 'failure' change_current_page(ui, ui.upload_page) reset_progress_bar(ui) status = upload_data(ui) if status == 'failure': change_current_page(ui, ui.new_dataset_name_page) return status status = create_dataset(ui) if status == 'failure': change_current_page(ui, ui.new_dataset_name_page) return status ui.select_ds_group_pb.click() return 'success'
def is_location_in_datasets(forecast_client, dataset_group_arn): """Checks if related any dataset has 'location' as a parameter""" try: response = forecast_client.describe_dataset_group( DatasetGroupArn=dataset_group_arn ) datasets_arns = response['DatasetArns'] for dataset_arn in datasets_arns: dataset_response = forecast_client.describe_dataset( DatasetArn=dataset_arn ) schema = dataset_response['Schema'] attributes = schema['Attributes'] for attribute in attributes: if attribute['AttributeName'] == 'location': return True, 'success' return False, 'success' except Exception as e: error(str(e)) return False, 'failure'
def merge_button_clicked(ui): save_location = ui.tmp_dir_name csv_file_name = ui.merge_file related_location_name = ui.related_location_name related_date_name = ui.related_date_name original_df = ui.dataset['RELATED'] additional_location_name = ui.merge_location_combobox.currentText() additional_date_name = ui.merge_timestamp_combobox.currentText() try: with open(csv_file_name, newline='') as csv_file: reader = csv.reader(csv_file, delimiter=',') columns_count = len(next(reader)) csv_file.seek(0) sniffer = csv.Sniffer() has_header = sniffer.has_header(csv_file.read(2048)) if not has_header: headers = [] for number in range(columns_count): headers.append("Column " + str(number + 1)) df = pd.read_csv(csv_file_name, names=headers) else: df = pd.read_csv(csv_file_name) convert_date(df, additional_date_name) convert_date(original_df, related_date_name) result = pd.merge( original_df, df, left_on=[related_date_name, related_location_name], right_on=[additional_date_name, additional_location_name], how='left') result.to_csv(os.path.join(save_location, "merged.csv"), date_format='%Y-%m-%d %H:%M:%S', index=False) select_csv_file(ui, 'RELATED', merge_file=os.path.join(save_location, "merged.csv")) except Exception as e: error(str(e)) return 'failure'
def populate_dataset_view(ui, dataset_type): try: csv_file_name = ui.selected_csv[dataset_type] with open(csv_file_name, newline='') as csv_file: reader = csv.reader(csv_file, delimiter=',') columns_count = len(next(reader)) csv_file.seek(0) sniffer = csv.Sniffer() has_header = sniffer.has_header(csv_file.read(2048)) if not has_header: headers = [] for number in range(columns_count): headers.append("Column " + str(number + 1)) df = pd.read_csv(csv_file_name, parse_dates=True, names=headers) else: df = pd.read_csv(csv_file_name, parse_dates=True) except Exception as e: error(str(e)) return if df.shape[0] == 0 or df.shape[1] == 0: error("Can't import an empty dataset") return header_list = list(df.columns) view_df = df.head(10) ui.dataset[dataset_type] = df model = PandasModel(view_df) if dataset_type == 'TARGET': ui.target_dataset_preview_tableview.setModel(None) ui.target_item_id_combobox.clear() ui.target_timestamp_combobox.clear() ui.target_demand_combobox.clear() ui.target_location_combobox.clear() ui.target_dataset_preview_tableview.setModel(model) ui.target_item_id_combobox.addItems(header_list) ui.target_timestamp_combobox.addItems(header_list) ui.target_demand_combobox.addItems(header_list) ui.target_location_combobox.addItem('') ui.target_location_combobox.addItems(header_list) elif dataset_type == 'RELATED': ui.related_dataset_preview_tableview.setModel(None) ui.related_item_id_combobox.clear() ui.related_timestamp_combobox.clear() ui.related_price_combobox.clear() ui.related_weather_combobox.clear() ui.related_promotion_combobox.clear() ui.related_location_combobox.clear() ui.related_dataset_preview_tableview.setModel(model) ui.related_item_id_combobox.addItems(header_list) ui.related_timestamp_combobox.addItems(header_list) ui.related_price_combobox.addItem('') ui.related_weather_combobox.addItem('') ui.related_location_combobox.addItem('') ui.related_promotion_combobox.addItem('') ui.related_price_combobox.addItems(header_list) ui.related_weather_combobox.addItems(header_list) ui.related_location_combobox.addItems(header_list) ui.related_promotion_combobox.addItems(header_list) else: ui.metadata_dataset_preview_tableview.setModel(None) ui.metadata_item_id_combobox.clear() ui.metadata_color_combobox.clear() ui.metadata_category_combobox.clear() ui.metadata_brand_combobox.clear() ui.metadata_dataset_preview_tableview.setModel(model) ui.metadata_item_id_combobox.addItems(header_list) ui.metadata_color_combobox.addItem('') ui.metadata_brand_combobox.addItem('') ui.metadata_category_combobox.addItem('') ui.metadata_color_combobox.addItems(header_list) ui.metadata_brand_combobox.addItems(header_list) ui.metadata_category_combobox.addItems(header_list)
def prepare_data(ui, dataset_type): ui.remote_datasets = list_all_datasets_names(ui.forecast_client) tmp_dir_name = ui.tmp_dir_name df = ui.dataset[dataset_type] if dataset_type == 'TARGET': item_id_name = str(ui.target_item_id_combobox.currentText()) timestamp_name = str(ui.target_timestamp_combobox.currentText()) demand_name = str(ui.target_demand_combobox.currentText()) location_name = str(ui.target_location_combobox.currentText()) attributes = [{ "AttributeName": "timestamp", "AttributeType": "timestamp" }, { "AttributeName": "item_id", "AttributeType": "string" }, { "AttributeName": "demand", "AttributeType": "float" }] if location_name != '': attributes.append({ "AttributeName": "location", "AttributeType": "string" }) df = df.reindex(columns=[ timestamp_name, item_id_name, demand_name, location_name ]) else: df = df.reindex( columns=[timestamp_name, item_id_name, demand_name]) ui.dataset_attributes[dataset_type] = attributes if df[timestamp_name].dtype == 'object': try: df[timestamp_name] = pd.to_datetime(df[timestamp_name]) except Exception as e: error( "Can't convert timestamp to datetime, please check your data. " + str(e)) return 'failure' else: error( "Can't convert timestamp to datetime, please check your data.") return 'failure' if not df[timestamp_name].is_monotonic_increasing: df = df.sort_values(by=timestamp_name) diff_values = np.unique(np.diff(df[timestamp_name].values)).tolist() frequency = detect_frequency(diff_values) if frequency == 'none': output = set_enable_freq_input(ui, True) else: output = set_enable_freq_input(ui, False, frequency) if output == 'failure': raise Exception("Failed to set enable frequency inputs.") ui.frequency = frequency df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"), date_format='%Y-%m-%d %H:%M:%S', header=False, index=False) return 'success' elif dataset_type == 'RELATED': item_id_name = str(ui.related_item_id_combobox.currentText()) timestamp_name = str(ui.related_timestamp_combobox.currentText()) weather_name = str(ui.related_weather_combobox.currentText()) price_name = str(ui.related_price_combobox.currentText()) promotion_name = str(ui.related_promotion_combobox.currentText()) location_name = str(ui.related_location_combobox.currentText()) columns = [timestamp_name, item_id_name] attributes = [{ "AttributeName": "timestamp", "AttributeType": "timestamp" }, { "AttributeName": "item_id", "AttributeType": "string" }] if location_name != '': attributes.append({ "AttributeName": "location", "AttributeType": "string" }) columns.append(location_name) if weather_name != '': attributes.append({ "AttributeName": "weather", "AttributeType": "float" }) columns.append(weather_name) if price_name != '': attributes.append({ "AttributeName": "price", "AttributeType": "float" }) columns.append(price_name) if promotion_name != '': attributes.append({ "AttributeName": "price", "AttributeType": "float" }) columns.append(promotion_name) df = df.reindex(columns=columns) if promotion_name != '': df[promotion_name] = df[promotion_name].replace({ True: 1, False: 0, 'True': 1, 'False': 0, 'true': 1, 'false': 0, 1.0: 1, 0.0: 0, '1.0': 1, '0.0': 0, '1': 1, '0': 0 }) ui.dataset_attributes[dataset_type] = attributes if df[timestamp_name].dtype == 'object': try: df[timestamp_name] = pd.to_datetime(df[timestamp_name]) except Exception as e: error( "Can't convert timestamp to datetime, please check your data. " + str(e)) return 'failure' else: error( "Can't convert timestamp to datetime, please check your data.") return 'failure' if not df[timestamp_name].is_monotonic_increasing: df = df.sort_values(by=timestamp_name) diff_values = np.unique(np.diff(df[timestamp_name].values)).tolist() frequency = detect_frequency(diff_values) if frequency == 'none': output = set_enable_freq_input(ui, True) else: output = set_enable_freq_input(ui, False, frequency) if output == 'failure': raise Exception("Failed to enable frequency inputs.") ui.frequency = frequency ui.dataset[dataset_type] = df df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"), date_format='%Y-%m-%d %H:%M:%S', header=False, index=False) return 'success' else: item_id_name = str(ui.metadata_item_id_combobox.currentText()) color_name = str(ui.metadata_color_combobox.currentText()) category_name = str(ui.metadata_category_combobox.currentText()) brand_name = str(ui.metadata_brand_combobox.currentText()) columns = [item_id_name] attributes = [{"AttributeName": "item_id", "AttributeType": "string"}] if color_name != '': attributes.append({ "AttributeName": "color", "AttributeType": "string" }) columns.append(color_name) if category_name != '': attributes.append({ "AttributeName": "category", "AttributeType": "string" }) columns.append(category_name) if brand_name != '': attributes.append({ "AttributeName": "brand", "AttributeType": "string" }) columns.append(brand_name) set_enable_freq_input(ui, False) df = df.reindex(columns=columns) ui.dataset_attributes[dataset_type] = attributes df.to_csv(os.path.join(tmp_dir_name, "tmp_dataset.csv"), header=False, index=False) return 'success'
def create_predictor_and_forecast(ui): try: # Using caffeine to prevent pc from sleeping. caffeine.on(display=False) forecast_export_name = forecast_name = predictor_name = ui.new_forecast_line_edit.text( ) forecast_horizon = ui.predictor_forecast_horizon_spinbox.value() forecast_client = ui.forecast_client dataset_group_arn = ui.dataset_group_arn auto_ml = ui.auto_ml_checkbox.isChecked() # Check if a country is selected to use its holidays for forecasting. holidays = None if ui.country_checkbox.isChecked(): holidays = selected_country(ui) # Check if location is a dimension in the data location_in_datasets, status = is_location_in_datasets( forecast_client, dataset_group_arn) if status == 'failure': caffeine.off() return status # Check if auto machine learning is selected algorithm = None if not auto_ml: algorithm = ui.algorithms[ui.algorithm_combobox.currentText()] print(algorithm) # Get frequency from combobox forecast_frequency = combobox_to_freq(ui, 'FORECAST') # Create a predictor predictor_arn, status = create_predictor( forecast_client, predictor_name, algorithm, auto_ml, forecast_horizon, forecast_frequency, dataset_group_arn, location_in_datasets, holidays) if status == 'failure': caffeine.off() return status # Wait until predictor is created response = forecast_client.describe_predictor( PredictorArn=predictor_arn) while response['Status'] != 'ACTIVE' and response[ 'Status'] != 'CREATE_FAILED': response = forecast_client.describe_predictor( PredictorArn=predictor_arn) time.sleep(10) if response['Status'] == 'CREATE_FAILED': caffeine.off() return 'failure' # Create a forecast forecast_arn, status = create_forecast(forecast_client, forecast_name, predictor_arn) if status == 'failure': caffeine.off() return status # Wait until forecast is created response = forecast_client.describe_forecast(ForecastArn=forecast_arn) while response['Status'] != 'ACTIVE' and response[ 'Status'] != 'CREATE_FAILED': response = forecast_client.describe_forecast( ForecastArn=forecast_arn) time.sleep(10) if response['Status'] == 'CREATE_FAILED': caffeine.off() return 'failure' s3_path = ui.s3_bucket_path + "/" + ui.dataset_group_name + "/forecast/" + forecast_name + "/" s3_role_arn = ui.s3_role_arn # Create a forecast export forecast_export_arn, status = create_forecast_export( ui.forecast_client, forecast_export_name, forecast_arn, s3_path, s3_role_arn) if status == 'failure': caffeine.off() return status # Wait until forecast export is created while response['Status'] != 'ACTIVE' and response[ 'Status'] != 'CREATE_FAILED': response = forecast_client.describe_forecast_export_job( ForecastExportJobArn=forecast_export_arn) time.sleep(10) if response['Status'] == 'CREATE_FAILED': caffeine.off() return 'failure' # Wait 30 seconds to make sure exported files show on S3 (this should be immediate) time.sleep(30) # Create a database in AWS Glue status = create_database(ui.glue_client) if status == 'failure': caffeine.off() return status # Create a crawler in AWS Glue status = create_crawler(ui.glue_client, s3_role_arn, s3_path, forecast_name) if status == 'failure': caffeine.off() return status # Run the created crawler status = run_crawler(forecast_name, ui.glue_client) if status == 'failure': caffeine.off() return status caffeine.off() return 'success' except Exception as e: caffeine.off() error(str(e)) return 'failure'
import tempfile from botocore.exceptions import ClientError import sys from PyQt5 import QtWidgets, QtCore from src.gui_connector_utils import buttons_connector from gui.gui import Ui_main_window from gui.error_display_functions import error, access_error import darkdetect from src.shared_utils import gui_initializer, dark_mode with tempfile.TemporaryDirectory() as tmp_dir_name: try: app = QtWidgets.QApplication(sys.argv) main_window = QtWidgets.QWidget() ui = Ui_main_window() gui_initializer(ui, main_window, tmp_dir_name) #If darkmode make use the dark logo if darkdetect.isDark(): dark_mode(ui) main_window.show() buttons_connector(ui) except ClientError as e: access_error(str(e)) except Exception as e: error(str(e)) sys.exit(app.exec_())
def list_all_forecast_forecasts(forecast_client, dataset_group_arn, s3_path): """List all forecasts inside a dataset group""" try: forecast_set = set() predictor_set = set() forecast_exports_set = set() response = forecast_client.list_forecasts( Filters=[ { 'Key': 'DatasetGroupArn', 'Value': dataset_group_arn, 'Condition': 'IS' }, { 'Key': 'Status', 'Value': 'ACTIVE', 'Condition': 'IS' } ] ) forecasts_arn_dict = {} forecasts = response['Forecasts'] for forecast in forecasts: forecast_set.add(forecast['ForecastName']) forecasts_arn_dict[forecast['ForecastName']] = forecast['ForecastArn'] response = forecast_client.list_predictors( Filters=[ { 'Key': 'DatasetGroupArn', 'Value': dataset_group_arn, 'Condition': 'IS' }, { 'Key': 'Status', 'Value': 'ACTIVE', 'Condition': 'IS' } ] ) predictors = response['Predictors'] for predictor in predictors: predictor_set.add(predictor['PredictorName']) for forecast_name, forecast_arn in forecasts_arn_dict.items(): response = forecast_client.list_forecast_export_jobs( Filters=[ { 'Key': 'ForecastArn', 'Value': forecast_arn, 'Condition': 'IS' }, { 'Key': 'Status', 'Value': 'ACTIVE', 'Condition': 'IS' } ] ) forecast_exports = response['ForecastExportJobs'] for forecast_export in forecast_exports: destination = forecast_export['Destination'] s3_config = destination['S3Config'] path = s3_config['Path'] if s3_path in path: forecast_exports_set.add(forecast_export['ForecastExportJobName']) intersection_set = forecast_exports_set.intersection(forecast_set) intersection_set = intersection_set.intersection(predictor_set) return(list(intersection_set)), 'success' except Exception as e: error(str(e)) return None, 'failure'