Exemplo n.º 1
0
 def _resume_execution(self) -> list:
     """ This re-loads the all_files_listing saved as part of _save_progress in order to resume execution """
     all_files_listing = []
     if self.event['local']:
         cache_file_name = os.path.join(self.directory, self.resumption_filename)
         if os.path.exists(cache_file_name):
             with io.open(cache_file_name, 'r', encoding='utf-8') as json_file:
                 all_files_listing = json.load(json_file)
     else:
         s3_key = os.path.join(self.config['pipeline-control-folder'], self.resumption_filename)
         all_files_listing = read_s3_json(self.config['process-bucket'], s3_key)
     return all_files_listing
def load_pipeline_config(event):
    if event.get('local', False):
        config = load_config_local()
    else:
        test_required_fields(event)
        s3Bucket = event['process-bucket']
        s3Path = "pipeline_runs/" + event['config-file']
        config = read_s3_json(s3Bucket, s3Path)

    # merge the current event
    config.update(event)
    return config
 def _get_saved_curate_json(self, item_id: str) -> dict:
     """ If original curate metadata has already been stored, use that instead of retrieving from Curate API
         If not running locally, check S3.  Else, check locally.  """
     if self.config.get('local', True):
         filename = os.path.join(self.local_folder, "save",
                                 item_id + "_curate.json")
         if os.path.exists(filename):
             with io.open(filename, 'r', encoding='utf-8') as json_file:
                 curate_json = json.load(json_file)
     else:
         key = os.path.join('save', item_id + '_curate.json')
         curate_json = read_s3_json(self.config['process-bucket'], key)
     return curate_json
Exemplo n.º 4
0
 def _get_saved_standard_json(self, item_id: str) -> dict:
     """ If standard.json has already been stored, read that
         If not running locally, check S3.  Else, check locally.  """
     standard_json = {}
     if self.config.get('local', True):
         filename = os.path.join(self.local_folder, "save",
                                 item_id + "_standard.json")
         if os.path.exists(filename):
             with io.open(filename, 'r', encoding='utf-8') as json_file:
                 standard_json = json.load(json_file)
     else:
         key = os.path.join('save', item_id + '_standard.json')
         standard_json = read_s3_json(self.config['process-bucket'], key)
     return standard_json
Exemplo n.º 5
0
def run(event, _context):
    """ run the process to retrieve and process web kiosk metadata """
    _suplement_event(event)
    config = setup_pipeline_config(event)
    google_config = load_config_ssm(config['google_keys_ssm_base'])
    config.update(google_config)
    museum_config = load_config_ssm(config['museum_keys_ssm_base'])
    config.update(museum_config)
    time_to_break = datetime.now() + timedelta(
        seconds=config['seconds-to-allow-for-processing'])
    print("Will break after ", time_to_break)

    mode = event.get("mode", "full")
    if mode not in ["full", "incremental", "ids"]:
        mode = "full"
    json_web_kiosk_class = ProcessWebKioskJsonMetadata(config, event,
                                                       time_to_break)
    if event["museumExecutionCount"] == 1:
        if not event.get('local'):
            save_file_system_record(config.get('website-metadata-tablename'),
                                    'Google', 'Museum')
            save_source_system_record(config.get('website-metadata-tablename'),
                                      'EmbARK')
        composite_json = json_web_kiosk_class.get_composite_json_metadata(mode)
        museum_image_metadata = json_web_kiosk_class.find_images_for_composite_json_metadata(
            composite_json)
        composite_json = CleanUpCompositeJson(
            composite_json).cleaned_up_content
        event['countToProcess'] = len(composite_json.get('objects'))
        write_s3_json(config['process-bucket'],
                      'museum_composite_metadata.json', composite_json)
        write_s3_json(config['process-bucket'], 'museum_image_metadata.json',
                      museum_image_metadata)
    else:
        composite_json = read_s3_json(config['process-bucket'],
                                      'museum_composite_metadata.json')
        museum_image_metadata = read_s3_json(config['process-bucket'],
                                             'museum_image_metadata.json')

    if composite_json:
        objects_processed = json_web_kiosk_class.process_composite_json_metadata(
            composite_json, museum_image_metadata)
        event['museumHarvestComplete'] = _done_processing(composite_json)
    else:
        print('No JSON to process')

    if event["museumExecutionCount"] >= event["maximumMuseumExecutions"]:
        event['museumHarvestComplete'] = True
    if event['museumHarvestComplete']:
        if s3_file_exists(config['process-bucket'],
                          'museum_composite_metadata.json'):
            delete_s3_key(config['process-bucket'],
                          'museum_composite_metadata.json')
        if s3_file_exists(config['process-bucket'],
                          'museum_image_metadata.json'):
            delete_s3_key(config['process-bucket'],
                          'museum_image_metadata.json')
    elif composite_json:
        write_s3_json(config['process-bucket'],
                      'museum_composite_metadata.json', composite_json)
        key = 'countHarvestedLoop' + str(event["museumExecutionCount"])
        event[key] = objects_processed
    event['countRemaining'] = len(composite_json.get('objects'))
    return event