def extract_uri(ctx, uri): verbose = ctx.obj['VERBOSE'] no_prompts = ctx.obj['NO_PROMPTS'] source_name = get_uri_source_name(uri) if source_name: cond_print(verbose, "Starting extraction of {} source".format(source_name)) else: click.echo("Could not find extractor for given uri:{}. Goodbye!".format(uri)) sys.exit(1) try: source = get_url_source(uri) except SourceError as e: click.echo(e.message) sys.exit(1) extractor = get_extractor_for_uri(uri, source) cond_print(verbose, "Using {} for extraction".format(extractor.__class__.__name__)) # Check for duplicate entries, by url from source, this is needed since there might be a redirect from the # input uri, like http -> https. Could check this in the extractor or tell the user that the url is changing. # Though if the redirect always happens it wouldn't matter anyway, since the database retains the redirected url cond_print(verbose, 'Checking for duplicates...') if not no_prompts and has_potential_duplicates(source.url, 'source_uri', dbm.EXTRACTED_TABLE): if settle_for_duplicate(source.url, 'source_uri', dbm.EXTRACTED_TABLE): sys.exit(1) cond_print(verbose, 'Validating URI...') # Check if this is a game url for extraction if not extractor.validate(): if no_prompts or not click.confirm('This doesn\'t appear to be a game related uri. Extract anyway?'): sys.exit(1) cond_print(verbose, 'Extracting URI...') # These are separate since file downloads might rely on subprocesses try: extractor.extract() except ExtractorError as e: click.echo(e.message) sys.exit(1) # Block until extraction complete, needed for anything requiring sub-processes while not extractor.extracted_info: pass extracted_info = extractor.extracted_info # Create citation from extracted information if no_prompts or click.confirm('Create citation from extracted data?'): citation, extracted_options = extractor.create_citation() if not no_prompts: citation = get_citation_user_input(citation, extracted_options) if citation.ref_type == GAME_CITE_REF: alternate_citation = choose_game_citation(search_locally_with_citation(citation)) elif citation.ref_type == PERF_CITE_REF: alternate_citation = choose_performance_citation(search_locally_with_citation(citation)) if not alternate_citation: dbm.add_to_citation_table(citation, fts=True) click.echo('Citation added to database.') else: dbm.add_to_citation_table(citation, fts=True) if 'errors' not in extracted_info and dbm.add_to_extracted_table(extracted_info): cond_print(verbose, "Extraction Successful!") if not no_prompts: summary_prompt(extracted_info) else: cond_print(verbose, "Extraction Failed!") pprint.pprint(extracted_info)
def extract_file(ctx, path_to_file, partial_citation): verbose = ctx.obj['VERBOSE'] no_prompts = ctx.obj['NO_PROMPTS'] source_name = get_file_source_name(path_to_file) # Convert to full path if needed full_path = os.path.join(os.getcwd(), path_to_file) if not os.path.isabs(path_to_file) else path_to_file # Check if there's actually a file there if not os.path.isfile(full_path): click.echo("There doesn\'t appear to be a readable file at:{}.\nExiting.".format(path_to_file)) sys.exit(1) # Check if it's actually a potentially valid source if source_name: cond_print(verbose, "Starting extraction of {} source".format(source_name)) else: click.echo("Could not find extractor for given file path:{}. Goodbye!".format(path_to_file)) sys.exit(1) # Get the appropriate extractor extractor = get_extractor_for_file(full_path) cond_print(verbose, "Using {} for extraction".format(extractor.__class__.__name__)) # Check if this is a valid file cond_print(verbose, 'Validating File...') if not extractor.validate(): if no_prompts or click.confirm('This doesn\'t appear to be a game related file. Extract anyway?'): sys.exit(1) # Check for duplicate entries, by hash of source file file_hash = get_file_hash(full_path) cond_print(verbose, 'Checking for duplicates...') if not no_prompts and has_potential_duplicates(file_hash, 'source_file_hash', dbm.EXTRACTED_TABLE): if settle_for_duplicate(file_hash, 'source_file_hash', dbm.EXTRACTED_TABLE): sys.exit(1) cond_print(verbose, 'Extracting URI...') try: extractor.extract() except ExtractorError as e: click.echo(e.message) sys.exit(1) extracted_info = extractor.extracted_info if no_prompts or click.confirm('Create citation from extracted data?'): citation, extracted_options = extractor.create_citation() if partial_citation: partial = json.loads(partial_citation) citation.elements = merge_with_ordered_dict(partial['description'], citation.elements) if not no_prompts: citation = get_citation_user_input(citation, extracted_options) if citation.ref_type == GAME_CITE_REF: alternate_citation = choose_game_citation(search_locally_with_citation(citation)) elif citation.ref_type == PERF_CITE_REF: alternate_citation = choose_performance_citation(search_locally_with_citation(citation)) if not alternate_citation: dbm.add_to_citation_table(citation, fts=True) cond_print(verbose, 'Citation added to database.') else: dbm.add_to_citation_table(citation, fts=True) if 'errors' not in extracted_info and dbm.add_to_extracted_table(extracted_info): cond_print(verbose, "Extraction Successful!") if not no_prompts: summary_prompt(extracted_info) else: cond_print(verbose, "Extraction Failed!") pprint.pprint(extracted_info)