def handle(self, *args, **kwargs): """Add dummy example data to database for demo.""" s3_storage = get_storage_class(settings.S3_FILE_STORAGE)() upsert_helper = UpsertDataHelpers() # Loop through directories in /media/ on S3 bucket directories, filenames = s3_storage.listdir(name=".") for id in directories: logger.info(id) if FileSubmission.objects.filter(id=id): logger.info("FileSubmission object already exists.") continue directories, filenames = s3_storage.listdir(name=id) for filename in filenames: original_file_path = os.path.join(id, filename) logger.info(f"Downloading {original_file_path}") filename_root = os.path.splitext(filename)[0] created = datetime.strptime( filename_root, "%Y%m%dT%H%M%SZ").replace(tzinfo=timezone.utc) supplied_data = FileSubmission( id=id, original_file=original_file_path, created=created) supplied_data.current_app = "bluetail" supplied_data.save() package_json = s3_storage._open(original_file_path).read() upsert_helper.upsert_ocds_data(package_json, supplied_data=supplied_data)
def setUp(self): insert_flags() insert_flag_attachments() bods_test_file_path = os.path.join(PROTOTYPE_DATA_PATH, "bods", "PROC-20-0001", "d_ownership.json") UpsertDataHelpers().upsert_bods_data(bods_test_file_path) bods_test_file_path = os.path.join(PROTOTYPE_DATA_PATH, "bods", "PROC-20-0001", "c_ownership.json") UpsertDataHelpers().upsert_bods_data(bods_test_file_path)
def create_package_from_json(contracts_finder_id, package): """ Create FileSubmission and OCDSPackageDataJSON records in the database for a Contracts Finder JSON OCDS release package :param contracts_finder_id: ID to use in constructing the FileSubmission ID :param package: JSON OCDS package """ publisher = create_publisher_from_package_json(package) published_date = package["publishedDate"] logger.info("Creating FileSubmission %s uri %s date %s", publisher.publisher_name, contracts_finder_id, published_date) # Create FileSubmission entry supplied_data, created = FileSubmission.objects.update_or_create( id=contracts_finder_id, defaults={ "current_app": "silvereye", }) supplied_data.publisher = publisher supplied_data.created = published_date supplied_data.original_file.save( "release_package.json", ContentFile(json.dumps(package, indent=2))) supplied_data.save() json_string = json.dumps(package, indent=2, sort_keys=True, cls=DjangoJSONEncoder) UpsertDataHelpers().upsert_ocds_data(json_string, supplied_data)
def handle(self, *args, **kwargs): """Add simple prototype data to database for demo.""" upsert_helper = UpsertDataHelpers() # Insert PROTOTYPE OCDS JSON example_ocds_path = os.path.join(DATA_DIR, "prototype", "ocds", "ocds_tenderers_package.json") logger.info("Insert prototype OCDS") upsert_helper.upsert_ocds_data(example_ocds_path) # Insert BODS JSON logger.info("Insert prototype BODS") example_bods_path = os.path.join(DATA_DIR, "prototype", "bods", "PROC-20-0001") files = os.listdir(example_bods_path) for f in files: if not f.endswith(".json"): continue f_path = os.path.join(example_bods_path, f) try: upsert_helper.upsert_bods_data(f_path) except: logger.exception("Failed to insert example file %s", f_path) # Insert Flags logger.info("Insert prototype Flags") insert_flags() # Insert assigned Flags logger.info("Insert prototype FlagAttachments") insert_flag_attachments()
class TestBodsHelperFunctions(TestCase): upsert_helper = UpsertDataHelpers() bods_helper = BodsHelperFunctions() def setUp(self): ocds_test_file_path = os.path.join( TEST_DATA_PATH, "ocds-b5fd17suppliermatch-b3f725cb-5a11-4a33-9a37-e068bd48b3e0.json" ) bods_test_file_path = os.path.join(TEST_DATA_PATH, "GB-COH_SC115530_bods.json") self.upsert_helper.upsert_bods_data(bods_test_file_path) self.upsert_helper.upsert_ocds_data(ocds_test_file_path) def test_get_tenderer_context(self): t = OCDSTenderer.objects.get( ocid= "ocds-b5fd17suppliermatch-b3f725cb-5a11-4a33-9a37-e068bd48b3e0", party_id='4') a = self.bods_helper.get_related_bods_data_for_tenderer(t) assert a
def handle(self, *args, **kwargs): """Add dummy example data to database for demo.""" anonymise = kwargs['anonymise'] directory = kwargs['directory'][0] if anonymise: anonymise_ocds_function = anonymise_ocds_json_data anonymise_bods_function = anonymise_bods_json_data else: anonymise_ocds_function = None anonymise_bods_function = None upsert_helper = UpsertDataHelpers() # Insert CF OCDS JSON logger.info("Insert OCDS") ocds_path = os.path.join(directory, "ocds") for root, dirs, files in os.walk(ocds_path): for f in files: if not f.endswith(".json"): continue f_path = os.path.join(root, f) try: upsert_helper.upsert_ocds_data( f_path, process_json=anonymise_ocds_function) except KeyboardInterrupt: raise except: logger.exception("Failed to insert file %s", f_path) # Insert BODS JSON logger.info("Insert BODS") bods_path = os.path.join(directory, "bods") for root, dirs, files in os.walk(bods_path): for f in files: if not f.endswith(".json"): continue f_path = os.path.join(root, f) try: upsert_helper.upsert_bods_data( f_path, process_json=anonymise_bods_function) except KeyboardInterrupt: raise except: logger.exception("Failed to insert file %s", f_path)
def handle(self, *args, **kwargs): """Add dummy example data to database for demo.""" anonymise = kwargs['anonymise'] if anonymise: anonymise_ocds_function = anonymise_ocds_json_data anonymise_bods_function = anonymise_bods_json_data else: anonymise_ocds_function = None anonymise_bods_function = None upsert_helper = UpsertDataHelpers() # Insert CF OCDS JSON logger.info("Insert sample Contracts Finder OCDS") cf_ocds_path = os.path.join(DATA_DIR, "contracts_finder", "ocds") for root, dirs, files in os.walk(cf_ocds_path): for f in files: if not f.endswith(".json"): continue f_path = os.path.join(root, f) try: upsert_helper.upsert_ocds_data( f_path, process_json=anonymise_ocds_function) except: logger.exception("Failed to insert file %s", f_path) # Insert BODS JSON logger.info("Insert sample Contracts Finder BODS") cf_bods_path = os.path.join(DATA_DIR, "contracts_finder", "bods") for root, dirs, files in os.walk(cf_bods_path): for f in files: if not f.endswith(".json"): continue f_path = os.path.join(root, f) try: upsert_helper.upsert_bods_data( f_path, process_json=anonymise_bods_function) except: logger.exception("Failed to insert file %s", f_path)
def explore_ocds(request, pk): context, db_data, error = explore_data_context(request, pk) if error: return error lib_cove_ocds_config = LibCoveOCDSConfig() lib_cove_ocds_config.config["current_language"] = translation.get_language( ) lib_cove_ocds_config.config[ "schema_version_choices"] = settings.COVE_CONFIG[ "schema_version_choices"] lib_cove_ocds_config.config["schema_codelists"] = settings.COVE_CONFIG[ "schema_codelists"] upload_dir = db_data.upload_dir() upload_url = db_data.upload_url() file_name = db_data.original_file.file.name file_type = context["file_type"] post_version_choice = request.POST.get( "version", lib_cove_ocds_config.config["schema_version"]) replace = False validation_errors_path = os.path.join(upload_dir, "validation_errors-3.json") if file_type == "json": # open the data first so we can inspect for record package with open(file_name, encoding="utf-8") as fp: try: json_data = json.load(fp, parse_float=Decimal, object_pairs_hook=OrderedDict) except ValueError as err: raise CoveInputDataError( context={ "sub_title": _("Sorry, we can't process that data"), "link": "index", "link_text": _("Try Again"), "msg": _( format_html( "We think you tried to upload a JSON file, but it is not well formed JSON." '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' "</span> <strong>Error message:</strong> {}", err, )), "error": format(err), }) if not isinstance(json_data, dict): raise CoveInputDataError( context={ "sub_title": _("Sorry, we can't process that data"), "link": "index", "link_text": _("Try Again"), "msg": _("OCDS JSON should have an object as the top level, the JSON you supplied does not." ), }) version_in_data = json_data.get("version", "") db_data.data_schema_version = version_in_data select_version = post_version_choice or db_data.schema_version schema_ocds = SchemaOCDS( select_version=select_version, release_data=json_data, lib_cove_ocds_config=lib_cove_ocds_config, ) if schema_ocds.missing_package: exceptions.raise_missing_package_error() if schema_ocds.invalid_version_argument: # This shouldn't happen unless the user sends random POST data. exceptions.raise_invalid_version_argument(post_version_choice) if schema_ocds.invalid_version_data: if isinstance(version_in_data, str) and re.compile( "^\d+\.\d+\.\d+$").match(version_in_data): exceptions.raise_invalid_version_data_with_patch( version_in_data) else: if not isinstance(version_in_data, str): version_in_data = "{} (it must be a string)".format( str(version_in_data)) context["unrecognized_version_data"] = version_in_data if schema_ocds.version != db_data.schema_version: replace = True if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file( upload_dir, upload_url) schema_url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url if "records" in json_data: context["conversion"] = None else: # Replace the spreadsheet conversion only if it exists already. converted_path = os.path.join(upload_dir, "flattened") replace_converted = replace and os.path.exists(converted_path + ".xlsx") with warnings.catch_warnings(): warnings.filterwarnings( 'ignore' ) # flattentool uses UserWarning, so can't set a specific category convert_json_context = convert_json( upload_dir, upload_url, file_name, lib_cove_ocds_config, schema_url=schema_url, replace=replace_converted, request=request, flatten=request.POST.get("flatten"), ) context.update(convert_json_context) else: # Use the lowest release pkg schema version accepting 'version' field metatab_schema_url = SchemaOCDS( select_version="1.1", lib_cove_ocds_config=lib_cove_ocds_config).release_pkg_schema_url metatab_data = get_spreadsheet_meta_data(upload_dir, file_name, metatab_schema_url, file_type) if "version" not in metatab_data: metatab_data["version"] = "1.0" else: db_data.data_schema_version = metatab_data["version"] select_version = post_version_choice or db_data.schema_version schema_ocds = SchemaOCDS( select_version=select_version, release_data=metatab_data, lib_cove_ocds_config=lib_cove_ocds_config, ) # Unlike for JSON data case above, do not check for missing data package if schema_ocds.invalid_version_argument: # This shouldn't happen unless the user sends random POST data. exceptions.raise_invalid_version_argument(post_version_choice) if schema_ocds.invalid_version_data: version_in_data = metatab_data.get("version") if re.compile("^\d+\.\d+\.\d+$").match(version_in_data): exceptions.raise_invalid_version_data_with_patch( version_in_data) else: context["unrecognized_version_data"] = version_in_data # Replace json conversion when user chooses a different schema version. if db_data.schema_version and schema_ocds.version != db_data.schema_version: replace = True if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file( upload_dir, upload_url) schema_url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url pkg_url = schema_ocds.release_pkg_schema_url if file_type != "csv": # ORIGINAL UNFLATTEN conversion_context = convert_spreadsheet( upload_dir, upload_url, file_name, file_type, lib_cove_ocds_config, schema_url=schema_url, pkg_schema_url=pkg_url, replace=replace, ) else: # Convert Simple CSV to flat OCDS and return context conversion_context = convert_simple_csv_submission( db_data, lib_cove_ocds_config, schema_url, replace=replace, ) context.update(conversion_context) with open(context["converted_path"], encoding="utf-8") as fp: json_data = json.load(fp, parse_float=Decimal, object_pairs_hook=OrderedDict) if replace: if os.path.exists(validation_errors_path): os.remove(validation_errors_path) context = common_checks_ocds(context, upload_dir, json_data, schema_ocds, cache=settings.CACHE_VALIDATION_ERRORS) if schema_ocds.json_deref_error: exceptions.raise_json_deref_error(schema_ocds.json_deref_error) schema_version = getattr(schema_ocds, "version", None) if schema_version: db_data.schema_version = schema_version if not db_data.rendered: db_data.rendered = True db_data.save() context.update({ "data_schema_version": db_data.schema_version, "first_render": not db_data.rendered, "validation_errors_grouped": group_validation_errors(context["validation_errors"]), }) ocds_show_schema = SchemaOCDS() ocds_show_deref_schema = ocds_show_schema.get_release_schema_obj( deref=True) if "records" in json_data: template = "cove_ocds/explore_record.html" if hasattr(json_data, "get") and hasattr(json_data.get("records"), "__iter__"): context["records"] = json_data["records"] else: context["records"] = [] if isinstance(json_data["records"], list) and len(json_data["records"]) < 100: context["ocds_show_data"] = ocds_show_data(json_data, ocds_show_deref_schema) else: template = "silvereye/explore_release.html" if hasattr(json_data, "get") and hasattr(json_data.get("releases"), "__iter__"): context["releases"] = json_data["releases"] if (isinstance(json_data["releases"], list) and len(json_data["releases"]) < 100): context["ocds_show_data"] = ocds_show_data( json_data, ocds_show_deref_schema) # Parse release dates into objects so the template can format them. for release in context["releases"]: if hasattr(release, "get") and release.get("date"): if validate_rfc3339(release["date"]): release["date"] = parser.parse(release["date"]) else: release["date"] = None try: trans_date = release["contracts"][0]["implementation"][ "transactions"][0]["date"] parsed_trans_date = parser.parse(trans_date) release["contracts"][0]["implementation"]["transactions"][ 0]["date"] = parsed_trans_date except KeyError: pass if context.get("releases_aggregates"): date_fields = [ "max_award_date", "max_contract_date", "max_release_date", "max_tender_date", "min_award_date", "min_contract_date", "min_release_date", "min_tender_date", ] for field in date_fields: if context["releases_aggregates"].get(field): if validate_rfc3339( context["releases_aggregates"][field]): context["releases_aggregates"][ field] = parser.parse( context["releases_aggregates"][field]) else: context["releases_aggregates"][field] = None else: context["releases"] = [] # Include field coverage report original_file_path = context["original_file"]["path"] mapper = CSVMapper(csv_path=original_file_path) db_data.notice_type = mapper.release_type db_data.save() coverage_context = mapper.get_coverage_context() context.update({ "field_coverage": coverage_context, }) ocds_validation_errors, simple_csv_errors = prepare_simple_csv_validation_errors( context["validation_errors"], mapper, coverage_context["required_fields_missing"]) context.update({ "ocds_validation_errors": ocds_validation_errors, "simple_csv_errors": simple_csv_errors, "csv_mapper": mapper, }) # Silvereye: Insert OCDS data releases = context.get("releases") if releases: # If we don't have validation errors validation_errors_grouped = context["validation_errors_grouped"] if not validation_errors_grouped: json_string = json.dumps(json_data, indent=2, sort_keys=True, cls=DjangoJSONEncoder) UpsertDataHelpers().upsert_ocds_data(json_string, supplied_data=db_data) average_field_completion = coverage_context.get( "average_field_completion") inst, created = FieldCoverage.objects.update_or_create( file_submission=db_data, defaults={ "tenders_field_coverage": average_field_completion if mapper.release_type == "tender" else None, "awards_field_coverage": average_field_completion if mapper.release_type == "award" else None, "spend_field_coverage": average_field_completion if mapper.release_type == "spend" else None, }) update_publisher_monthly_counts() return render(request, template, context)
def create_output_files(name, df, parent_directory, load_data, unflatten_contracts_finder_data=False): """ Create a set of JSON format release package files from the DataFrame supplied for the releases where the type is tender or award. Load the data into the database if the load_data param is True. :param name: Name of the directory to create :param df: DataFrame containing the data :param parent_directory: Path to the parent directory to create the files :param load_data: Boolean indicating that the data should be loaded :param unflatten_contracts_finder_data: Run legacy unflattening of raw CF data (used for dev/debugging) """ release_types = ['tender', 'award', 'spend'] for release_type in release_types: logger.debug("Creating output files for %s %s", name, release_type) release_name = name + "-" + release_type output_dir = join(parent_directory, release_name) os.makedirs(output_dir) json_file_path = join(output_dir, release_name + ".json") # Filter the DataFrame if release_type == "spend": # Use award data and add fake spend df_release_type = df[df['releases/0/tag'] == "award"] spend_df = pd.DataFrame() for i, row in df_release_type.iterrows(): rowdf = df_release_type.loc[[i]] new_row_df = rowdf.apply(augment_award_row_with_spend, axis=1) spend_df = spend_df.append(new_row_df) if not df_release_type.empty: df_release_type = spend_df.loc[ spend_df["publishedDate"] < str(datetime.now())] else: df_release_type = df[df['releases/0/tag'] == release_type] if df_release_type.shape[0] > 0: csv_file_name = release_name + ".csv" csv_file_path = join(output_dir, csv_file_name) last_published_date = df_release_type['publishedDate'].max() # Write the DataFrame to a CSV df_release_type.to_csv(open(csv_file_path, "w"), index=False, header=True) # Create fake simple submission CSV period_dir_name = os.path.basename(parent_directory) simple_csv_file_name = f"{release_name}_{period_dir_name}.csv" simple_csv_file_path = join(SAMPLE_SUBMISSIONS_DIR, simple_csv_file_name) ocds_1_1_release_df = cf_mapper.convert_cf_to_1_1(df_release_type) if release_type == "tender": ocds_mapper = tender_mapper elif release_type == "award": ocds_mapper = award_mapper elif release_type == "spend": ocds_mapper = spend_mapper # ocds_mapper = CSVMapper(release_type=release_type) simple_csv_df = ocds_mapper.output_simple_csv(ocds_1_1_release_df) simple_csv_df.to_csv(open(simple_csv_file_path, "w"), index=False, header=True) # Upload simple CSV to DB if load_data: try: publisher_name = df_release_type.iloc[0]["publisher/name"] publisher_scheme = df_release_type.iloc[0][ "publisher/scheme"] publisher_id = df_release_type.iloc[0]["publisher/uid"] publisher_uri = df_release_type.iloc[0]["publisher/uri"] ocid_prefix = get_ocid_prefix( df_release_type.iloc[0]["releases/0/ocid"]) # helpers.SimpleSubmissionHelpers().load_simple_csv_into_database(simple_csv_df, publisher) # Load data from Simple CSV logger.debug("Creating or updating Publisher %s (id %s)", publisher_name, publisher_id) contact_name = df_release_type.iloc[0][ "releases/0/buyer/contactPoint/name"] contact_email = df_release_type.iloc[0][ "releases/0/buyer/contactPoint/email"] contact_telephone = df_release_type.iloc[0][ "releases/0/buyer/contactPoint/telephone"] publisher, created = Publisher.objects.update_or_create( publisher_name=publisher_name, defaults={ "publisher_name": publisher_name, "publisher_id": publisher_id, "publisher_scheme": publisher_scheme, "uri": publisher_uri, "ocid_prefix": ocid_prefix, "contact_name": contact_name if contact_name else "", "contact_email": contact_email if contact_email else "", "contact_telephone": contact_telephone if contact_telephone else "", }) published_date = df_release_type.iloc[0]["publishedDate"] uri = df_release_type.iloc[0]["uri"] contracts_finder_id = os.path.splitext( os.path.split(uri)[1])[0] logger.info("Creating FileSubmission %s uri %s date %s", publisher.publisher_name, contracts_finder_id, published_date) # Create FileSubmission entry supplied_data, created = FileSubmission.objects.update_or_create( id=contracts_finder_id, defaults={ "current_app": "silvereye", "notice_type": ocds_mapper.release_type, }) supplied_data.publisher = publisher supplied_data.created = published_date if supplied_data.original_file and os.path.exists( supplied_data.original_file.path): os.remove(supplied_data.original_file.path) supplied_data.original_file.save( simple_csv_file_name, File(open(simple_csv_file_path))) supplied_data.save() if settings.STORE_OCDS_IN_S3: sync_with_s3(supplied_data) # Store field coverage simple_csv_mapper = CSVMapper( csv_path=simple_csv_file_path) coverage_context = simple_csv_mapper.get_coverage_context() average_field_completion = coverage_context.get( "average_field_completion") FieldCoverage.objects.update_or_create( file_submission=supplied_data, defaults={ "tenders_field_coverage": average_field_completion if simple_csv_mapper.release_type == "tender" else None, "awards_field_coverage": average_field_completion if simple_csv_mapper.release_type == "award" else None, "spend_field_coverage": average_field_completion if simple_csv_mapper.release_type == "spend" else None, }) lib_cove_ocds_config = LibCoveOCDSConfig() conversion_context = convert_simple_csv_submission( supplied_data, lib_cove_ocds_config, OCDS_RELEASE_SCHEMA) converted_path = conversion_context.get("converted_path") UpsertDataHelpers().upsert_ocds_data( converted_path, supplied_data) except FileNotFoundError: logger.exception("Error loading data for %s in %s", name, parent_directory) if unflatten_contracts_finder_data: unflatten_cf_data(json_file_path, last_published_date, load_data, output_dir)
def setUp(self): json_path = os.path.join(TEST_DATA_PATH, "GB-COH_00088456_bods.json") UpsertDataHelpers().upsert_bods_data(json_path)
class TestFlagHelperFunctions(TestCase): flag_helper = FlagHelperFunctions() upsert_helper = UpsertDataHelpers() def setUp(self): insert_flags() insert_flag_attachments() bods_test_file_path = os.path.join(PROTOTYPE_DATA_PATH, "bods", "PROC-20-0001", "d_ownership.json") self.upsert_helper.upsert_bods_data(bods_test_file_path) def test_get_flags_for_ocds_party_identifier(self): identifier = { "scheme": "GB-LAC", "id": "1602647563", "legalName": "Synomus Technology Services Ltd." } flags = self.flag_helper.get_flags_for_ocds_party_identifier( identifier) assert any(flag.flag_name == "company_id_invalid" for flag in flags) def test_get_flags_for_ocid(self): ocid = "ocds-123abc-PROC-20-0001" flags = self.flag_helper.get_flags_for_ocid(ocid) assert any( flag.flag_name == "person_in_multiple_applications_to_tender" for flag in flags) assert any( flag.flag_name == "company_in_multiple_applications_to_tender" for flag in flags) def test_get_flags_for_bods_identifier(self): identifier = {"id": "HMCI17014140912423", "schemeName": "National ID"} flags = self.flag_helper.get_flags_for_bods_identifier(identifier) assert any(flag.flag_name == "person_id_matches_cabinet_minister" for flag in flags) def test_get_flags_for_bods_identifier_with_ocid(self): identifier = {"id": "HMCI17014140912423", "schemeName": "National ID"} ocid = "ocds-123abc-PROC-20-0001" # Test we get back all flags for identifier with and without OCID flags = self.flag_helper.get_flags_for_bods_identifier( identifier, ocid) assert any( flag.flag_name == "person_in_multiple_applications_to_tender" for flag in flags) assert any(flag.flag_name == "person_id_matches_cabinet_minister" for flag in flags) # Test we do NOT get flags for identifier where the flag is only for a particular OCID flags = self.flag_helper.get_flags_for_bods_identifier(identifier) assert not any( flag.flag_name == "person_in_multiple_applications_to_tender" for flag in flags) assert any(flag.flag_name == "person_id_matches_cabinet_minister" for flag in flags) def test_get_flags_for_bods_entity_or_person(self): identifier = {"id": "HMCI17014140912423", "schemeName": "National ID"} ocid = "ocds-123abc-PROC-20-0001" person = BODSPersonStatement.objects.get( statement_id="019a93f1-e470-42e9-957b-03554681b2e3") # Test we get back all flags for identifier with and without OCID flags = self.flag_helper.get_flags_for_bods_entity_or_person( person, ocid) assert any( flag.flag_name == "person_in_multiple_applications_to_tender" for flag in flags) assert any(flag.flag_name == "person_id_matches_cabinet_minister" for flag in flags) # Test we do NOT get flags for identifier where the flag is only for a particular OCID flags = self.flag_helper.get_flags_for_bods_identifier(identifier) assert not any( flag.flag_name == "person_in_multiple_applications_to_tender" for flag in flags) assert any(flag.flag_name == "person_id_matches_cabinet_minister" for flag in flags)