Пример #1
0
def main():
    """
    Use ``create_parser`` to get the commandline arguments, and pass them to
    the appropriate function in __init__.py (create_template, flatten or
    unflatten).

    """
    parser = create_parser()
    # Store the supplied arguments in args
    args = parser.parse_args()

    if args.subparser_name is None:
        parser.print_help()
        return

    if not args.verbose:
        sys.excepthook = non_verbose_error_handler
        warnings.formatwarning = non_verbose_warning_formatter

    if args.subparser_name == "create-template":
        # Pass the arguments to the create_template function
        # If the schema file does not exist we catch it in this exception
        try:
            # Note: Ensures that empty arguments are not passed to the create_template function
            create_template(**kwargs_from_parsed_args(args))
        except (OSError, IOError) as e:
            print(str(e))
            return
    elif args.subparser_name == "flatten":
        flatten(**kwargs_from_parsed_args(args))
    elif args.subparser_name == "unflatten":
        unflatten(**kwargs_from_parsed_args(args))
Пример #2
0
def test_360_fields_case_insensitive(tmpdir):
    input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx'
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join('output_grant.json').strpath,
        input_format='xlsx',
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        convert_titles=True)
    output_json_grants = json.load(tmpdir.join('output_grant.json'))

    input_name = 'flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_title_space_case.xlsx'
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join('output_space_case.json').strpath,
        input_format='xlsx',
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        convert_titles=True)
    output_json_space_case = json.load(tmpdir.join('output_space_case.json'))

    assert output_json_grants == output_json_space_case
Пример #3
0
def test_roundtrip_360(tmpdir, output_format, use_titles):
    input_name = (
        "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json"
    )
    flatten(
        input_name=input_name,
        output_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_format=output_format,
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        root_list_path="grants",
        root_id="",
        use_titles=use_titles,
        main_sheet_name="grants",
    )
    unflatten(
        input_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_name=tmpdir.join("roundtrip.json").strpath,
        input_format=output_format,
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        root_list_path="grants",
        root_id="",
        convert_titles=use_titles,
    )
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join("roundtrip.json"))

    assert original_json == roundtripped_json
def convert_spreadsheet(file_path, file_type, tmp_dir):
    #file_type = file_name.split('.')[-1]
    encoding = 'utf-8'
    converted_path = os.path.join(tmp_dir, 'output.json')
    if file_type == 'csv':
        destination = os.path.join(tmp_dir, 'grants.csv')
        shutil.copy(file_path, destination)
        try:
            with open(destination, encoding='utf-8') as main_sheet_file:
                main_sheet_file.read()
        except UnicodeDecodeError:
            try:
                with open(destination, encoding='cp1252') as main_sheet_file:
                    main_sheet_file.read()
                encoding = 'cp1252'
            except UnicodeDecodeError:
                encoding = 'latin_1'
        input_name = tmp_dir
    else:
        input_name = file_path
    try:
        flattentool.unflatten(
            input_name,
            output_name=converted_path,
            input_format=file_type,
            main_sheet_name='grants',
            root_id='',
            schema='https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json',
            convert_titles=True,
            encoding=encoding
        )
    except Exception:
        print("Unflattening failed for file {}".format(file_path))
        raise
def test_roundtrip(tmpdir, output_format):
    input_name = 'flattentool/tests/fixtures/tenders_releases_2_releases.json'
    base_name = 'flattentool/tests/fixtures/tenders_releases_base.json'
    flatten(
        input_name=input_name,
        output_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_format=output_format,
        schema='flattentool/tests/fixtures/release-schema.json',
        root_list_path='releases',
        main_sheet_name='releases')
    unflatten(
        input_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_name=tmpdir.join('roundtrip.json').strpath,
        input_format=output_format,
        base_json=base_name,
        schema='flattentool/tests/fixtures/release-schema.json',
        root_list_path='releases')
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))

    # Not currently possible to roundtrip Nones
    # https://github.com/open-contracting/flattening-ocds/issues/35
    for release in roundtripped_json['releases']:
        release['tender']['awardCriteriaDetails'] = None

    assert original_json == roundtripped_json
Пример #6
0
def test_360_main_sheetname_insensitive(tmpdir):
    input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants.xlsx"
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join("output_grant.json").strpath,
        input_format="xlsx",
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        main_sheet_name="grants",
        root_list_path="grants",
        root_id="",
        convert_titles=True,
    )
    output_json_grants = json.load(tmpdir.join("output_grant.json"))

    input_name = "flattentool/tests/fixtures/xlsx/fundingproviders-grants_2_grants_sheet_title_case.xlsx"
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join("output_grant_sheet_title_case.json").strpath,
        input_format="xlsx",
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        main_sheet_name="grants",
        root_list_path="grants",
        root_id="",
        convert_titles=True,
    )
    output_json_Grants = json.load(
        tmpdir.join("output_grant_sheet_title_case.json"))

    assert output_json_grants == output_json_Grants
Пример #7
0
def main():
    """
    Use ``create_parser`` to get the commandline arguments, and pass them to
    the appropriate function in __init__.py (create_template, flatten or
    unflatten).

    """
    parser = create_parser()
    # Store the supplied arguments in args
    args = parser.parse_args()

    if args.subparser_name is None:
        parser.print_help()
    elif args.subparser_name == 'create-template':
        # Pass the arguments to the create_template function
        # If the schema file does not exist we catch it in this exception
        try:
            # Note: Ensures that empty arguments are not passed to the create_template function
            create_template(**kwargs_from_parsed_args(args))
        except (OSError, IOError) as e:
            print(text_type(e))
            return
    elif args.subparser_name == 'flatten':
        flatten(**kwargs_from_parsed_args(args))
    elif args.subparser_name == 'unflatten':
        unflatten(**kwargs_from_parsed_args(args))
Пример #8
0
def test_roundtrip(tmpdir, output_format):
    input_name = 'flattentool/tests/fixtures/tenders_releases_2_releases.json'
    base_name = 'flattentool/tests/fixtures/tenders_releases_base.json'
    flatten(input_name=input_name,
            output_name=tmpdir.join('flattened').strpath + '.' + output_format,
            output_format=output_format,
            schema='flattentool/tests/fixtures/release-schema.json',
            root_list_path='releases',
            main_sheet_name='releases')
    unflatten(input_name=tmpdir.join('flattened').strpath + '.' +
              output_format,
              output_name=tmpdir.join('roundtrip.json').strpath,
              input_format=output_format,
              base_json=base_name,
              schema='flattentool/tests/fixtures/release-schema.json',
              root_list_path='releases')
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))

    # Not currently possible to roundtrip Nones
    # https://github.com/open-contracting/flattening-ocds/issues/35
    for release in roundtripped_json['releases']:
        release['tender']['awardCriteriaDetails'] = None

    assert original_json == roundtripped_json
def test_roundtrip_360_rollup(tmpdir, use_titles):
    input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json'
    output_format = 'csv'
    output_name = tmpdir.join('flattened').strpath+'.'+output_format
    moved_name = tmpdir.mkdir('flattened_main_only').strpath

    flatten(
        input_name=input_name,
        output_name=output_name,
        output_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        root_list_path='grants',
        root_id='',
        use_titles=use_titles,
        rollup=True,
        main_sheet_name='grants')

    os.rename(output_name+'/grants.csv', moved_name+'/grants.csv')

    unflatten(
        input_name=moved_name,
        output_name=tmpdir.join('roundtrip.json').strpath,
        input_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        root_list_path='grants',
        root_id='',
        convert_titles=use_titles)

    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))
    assert original_json == roundtripped_json
Пример #10
0
def test_commands_hashcomments_sourcemap(tmpdir, input_format):

    unflatten(
        "flattentool/tests/fixtures/{}/commands_hashcomments_sourcemap.{}".format(
            input_format, input_format
        ),
        input_format=input_format,
        output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath,
        cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath,
        heading_source_map=tmpdir.join(
            "commands_hashcomments_heading_source_map.json"
        ).strpath,
        metatab_name="Meta",
        metatab_vertical_orientation=True,
    )

    unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json"))
    cell_source_map = json.load(tmpdir.join("commands_hashcomments_source_map.json"))

    assert unflattened == {
        "publishedDate": "2019-06-20T00:00:00Z",
        "publisher": {"name": "Open Data Services Co-operative"},
        "uri": "http://www.example.com",
        "version": "1.1",
        "main": [{"date": "2010-03-15T09:30:00Z", "id": "Ocds-1"}],
    }

    # check fields have correct column letters
    assert cell_source_map["main/0/date"][0][1] == "E"
    assert cell_source_map["main/0/id"][0][1] == "C"
Пример #11
0
def test_metatab_only(tmpdir):

    unflatten(
        'flattentool/tests/fixtures/xlsx/basic_meta.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('meta_unflattened.json').strpath,
        metatab_name='Meta',
        metatab_vertical_orientation=True,
        metatab_only=True,
        cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath,
        heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath,
        )

    metatab_json = json.load(tmpdir.join('meta_unflattened.json'))

    assert metatab_json == {'a': 'a1',
                            'b': 'b1',
                            'c': 'c1'}


    cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json'))

    assert cell_source_map ==  {'': [['Meta', 2]],
                                'a': [['Meta', '1', 2, 'a']],
                                'b': [['Meta', '2', 2, 'b']],
                                'c': [['Meta', '3', 2, 'c']]}

    heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json'))

    assert heading_source_map == {'a': [['Meta', 'a']],
                                  'b': [['Meta', 'b']],
                                  'c': [['Meta', 'c']]}
Пример #12
0
def test_roundtrip_360(tmpdir, output_format, use_titles):
    input_name = 'flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json'
    flatten(input_name=input_name,
            output_name=tmpdir.join('flattened').strpath + '.' + output_format,
            output_format=output_format,
            schema='flattentool/tests/fixtures/360-giving-schema.json',
            main_sheet_name='grants',
            root_list_path='grants',
            root_id='',
            use_titles=use_titles)
    unflatten(input_name=tmpdir.join('flattened').strpath + '.' +
              output_format,
              output_name=tmpdir.join('roundtrip.json').strpath,
              input_format=output_format,
              schema='flattentool/tests/fixtures/360-giving-schema.json',
              main_sheet_name='grants',
              root_list_path='grants',
              root_id='',
              convert_titles=use_titles)
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))

    # Currently not enough information to successfully roundtrip that values
    # are numbers, when this is not required by the schema
    # for CSV, and for openpyxl under Python 2
    if output_format == 'csv' or sys.version_info < (3, 0):
        for grant in original_json['grants']:
            grant['plannedDates'][0]['duration'] = str(
                grant['plannedDates'][0]['duration'])

    assert original_json == roundtripped_json
Пример #13
0
def test_roundtrip_360(tmpdir, output_format, use_titles):
    input_name = 'flattentool/tests/fixtures/WellcomeTrust-grants_fixed_2_grants.json'
    flatten(
        input_name=input_name,
        output_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        use_titles=use_titles)
    unflatten(
        input_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_name=tmpdir.join('roundtrip.json').strpath,
        input_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        convert_titles=use_titles)
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))

    # Currently not enough information to successfully roundtrip that values
    # are numbers, when this is not required by the schema
    # for CSV, and for openpyxl under Python 2
    if output_format == 'csv' or sys.version_info < (3, 0):
        for grant in original_json['grants']:
            grant['plannedDates'][0]['duration'] = str(grant['plannedDates'][0]['duration'])

    assert original_json == roundtripped_json
Пример #14
0
def test_unflatten_cf_daily_csv_using_base_json():
    CF_DIR = join(TESTS_DIR, "fixtures", "CF_CSV")
    working_dir = join(CF_DIR, "working_files")
    csv_path_or_url = join(CF_DIR, "export-2020-08-05_single_buyer.csv")
    output_file = join(working_dir, "release_packages.json")
    clean_output_dir = join(working_dir, "cleaned")
    clean_output_file = join(clean_output_dir, "cleaned.csv")
    shutil.rmtree(working_dir, ignore_errors=True)
    os.makedirs(clean_output_dir)

    df = pd.read_csv(csv_path_or_url)

    cf_mapper = CSVMapper(mappings_file=CF_MAPPINGS_FILE)
    fixed_df = fix_contracts_finder_flat_csv(df)
    fixed_df = cf_mapper.convert_cf_to_1_1(fixed_df)

    fixed_df.to_csv(open(clean_output_file, "w"), index=False, header=True)
    base_json_path = join(CF_DIR, "working_files", "base.json")
    base_json = cf_mapper.prepare_base_json_from_release_df(fixed_df, base_json_path)
    unflatten(clean_output_dir,
              base_json=base_json_path,
              output_name=output_file,
              root_list_path="releases",
              input_format="csv",
              root_id="ocid",
              root_is_list=False,
              schema=OCDS_SCHEMA)
    js = json.load(open(output_file))
    assert js
Пример #15
0
def convert_spreadsheet(input_path, converted_path, file_type):
    encoding = 'utf-8-sig'
    if file_type == 'csv':
        tmp_dir = tempfile.mkdtemp()
        destination = os.path.join(tmp_dir, 'grants.csv')
        shutil.copy(input_path, destination)
        try:
            with open(destination, encoding='utf-8-sig') as main_sheet_file:
                main_sheet_file.read()
        except UnicodeDecodeError:
            try:
                with open(destination, encoding='cp1252') as main_sheet_file:
                    main_sheet_file.read()
                encoding = 'cp1252'
            except UnicodeDecodeError:
                encoding = 'latin_1'
        input_name = tmp_dir
    else:
        input_name = input_path
    flattentool.unflatten(
        input_name,
        output_name=converted_path,
        input_format=file_type,
        root_list_path='grants',
        root_id='',
        schema=
        'https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json',
        convert_titles=True,
        encoding=encoding)
Пример #16
0
def main():
    """
    Use ``create_parser`` to get the commandline arguments, and pass them to
    the appropriate function in __init__.py (create_template, flatten or
    unflatten).

    """
    parser = create_parser()
    # Store the supplied arguments in args
    args = parser.parse_args()

    if args.subparser_name is None:
        parser.print_help()
    elif args.subparser_name == 'create-template':
        # Pass the arguments to the create_template function
        # If the schema file does not exist we catch it in this exception
        try:
            # Note: Ensures that empty arguments are not passed to the create_template function
            create_template(**kwargs_from_parsed_args(args))
        except (OSError, IOError) as e:
            print(text_type(e))
            return
    elif args.subparser_name == 'flatten':
        flatten(**kwargs_from_parsed_args(args))
    elif args.subparser_name == 'unflatten':
        unflatten(**kwargs_from_parsed_args(args))
Пример #17
0
def convert_spreadsheet(file_path, file_type, tmp_dir):
    #file_type = file_name.split('.')[-1]
    encoding = 'utf-8'
    converted_path = os.path.join(tmp_dir, 'output.json')
    if file_type == 'csv':
        destination = os.path.join(tmp_dir, 'grants.csv')
        shutil.copy(file_path, destination)
        try:
            with open(destination, encoding='utf-8') as main_sheet_file:
                main_sheet_file.read()
        except UnicodeDecodeError:
            try:
                with open(destination, encoding='cp1252') as main_sheet_file:
                    main_sheet_file.read()
                encoding = 'cp1252'
            except UnicodeDecodeError:
                encoding = 'latin_1'
        input_name = tmp_dir
    else:
        input_name = file_path
    try:
        flattentool.unflatten(
            input_name,
            output_name=converted_path,
            input_format=file_type,
            main_sheet_name='grants',
            root_id='',
            schema='https://raw.githubusercontent.com/ThreeSixtyGiving/standard/master/schema/360-giving-schema.json',
            convert_titles=True,
            encoding=encoding
        )
    except Exception:
        print("Unflattening failed for file {}".format(file_path))
        raise
Пример #18
0
def test_metatab_only(tmpdir, input_format):

    unflatten(
        "flattentool/tests/fixtures/{}/basic_meta.{}".format(
            input_format, input_format
        ),
        input_format=input_format,
        output_name=tmpdir.join("meta_unflattened.json").strpath,
        metatab_name="Meta",
        metatab_vertical_orientation=True,
        metatab_only=True,
        cell_source_map=tmpdir.join("meta_cell_source_map.json").strpath,
        heading_source_map=tmpdir.join("meta_heading_source_map.json").strpath,
    )

    metatab_json = json.load(tmpdir.join("meta_unflattened.json"))

    assert metatab_json == {"a": "a1", "b": "b1", "c": "c1"}

    cell_source_map = json.load(tmpdir.join("meta_cell_source_map.json"))

    assert cell_source_map == {
        "": [["Meta", 2]],
        "a": [["Meta", "1", 2, "a"]],
        "b": [["Meta", "2", 2, "b"]],
        "c": [["Meta", "3", 2, "c"]],
    }

    heading_source_map = json.load(tmpdir.join("meta_heading_source_map.json"))

    assert heading_source_map == {
        "a": [["Meta", "a"]],
        "b": [["Meta", "b"]],
        "c": [["Meta", "c"]],
    }
Пример #19
0
def test_roundtrip_360_rollup(tmpdir, use_titles):
    input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json'
    output_format = 'csv'
    output_name = tmpdir.join('flattened').strpath + '.' + output_format
    moved_name = tmpdir.mkdir('flattened_main_only').strpath

    flatten(input_name=input_name,
            output_name=output_name,
            output_format=output_format,
            schema='flattentool/tests/fixtures/360-giving-schema.json',
            root_list_path='grants',
            root_id='',
            use_titles=use_titles,
            rollup=True,
            main_sheet_name='grants')

    os.rename(output_name + '/grants.csv', moved_name + '/grants.csv')

    unflatten(input_name=moved_name,
              output_name=tmpdir.join('roundtrip.json').strpath,
              input_format=output_format,
              schema='flattentool/tests/fixtures/360-giving-schema.json',
              root_list_path='grants',
              root_id='',
              convert_titles=use_titles)

    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))
    assert original_json == roundtripped_json
Пример #20
0
def test_roundtrip_xml(tmpdir, output_format):
    input_name = "examples/iati/expected.xml"
    flatten(
        input_name=input_name,
        output_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_format=output_format,
        root_list_path="iati-activity",
        id_name="iati-identifier",
        xml=True,
    )
    unflatten(
        input_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_name=tmpdir.join("roundtrip.xml").strpath,
        input_format=output_format,
        root_list_path="iati-activity",
        id_name="iati-identifier",
        xml=True,
    )
    original_xml = open(input_name, "rb")
    roundtripped_xml = tmpdir.join("roundtrip.xml").open("rb")

    # Compare without ordering, by using dict_constructor=dict instead of
    # OrderedDict
    original = xmltodict.parse(original_xml, dict_constructor=dict)
    roundtripped = xmltodict.parse(roundtripped_xml, dict_constructor=dict)
    assert original == roundtripped
Пример #21
0
def main():
    """
    Takes any command line arguments and then passes them onto
    create_template
    Defaults are not set here, but rather given in the create_template
    function incase that function is called from elsewhere in future.
    """
    parser = create_parser()
    # Store the supplied arguments in args
    args = parser.parse_args()

    if args.subparser_name is None:
        parser.print_help()
    elif args.subparser_name == 'create-template':
        # Pass the arguments to the create_template function
        # If the schema file does not exist we catch it in this exception
        try:
            # Note: Ensures that empty arguments are not passed to the create_template function
            create_template(**kwargs_from_parsed_args(args))
        except (OSError, IOError) as e:
            print(text_type(e))
            return
    elif args.subparser_name == 'flatten':
        flatten(**kwargs_from_parsed_args(args))
    elif args.subparser_name == 'unflatten':
        unflatten(**kwargs_from_parsed_args(args))
Пример #22
0
def test_commands_id_name(tmpdir, input_format):

    unflatten(
        "flattentool/tests/fixtures/{}/commands_id_name.{}".format(
            input_format, input_format
        ),
        input_format=input_format,
        output_name=tmpdir.join("commands_id_name_unflattened.json").strpath,
        cell_source_map=tmpdir.join("commands_id_name_source_map.json").strpath,
        heading_source_map=tmpdir.join(
            "commands_id_name_heading_source_map.json"
        ).strpath,
        metatab_name="Meta",
        metatab_vertical_orientation=True,
    )

    unflattened = json.load(tmpdir.join("commands_id_name_unflattened.json"))

    assert unflattened == {
        "someroot": [
            {
                "actual": "actual",
                "headings": "data",
                "someId": "some",
                "someArray": [
                    {"heading1": "more data", "heading2": "other data"},
                    {"heading1": "more more data", "heading2": "more other data"},
                ],
            }
        ],
        "some": "data",
    }
Пример #23
0
def test_roundtrip(tmpdir, output_format):
    input_name = "flattentool/tests/fixtures/tenders_releases_2_releases.json"
    base_name = "flattentool/tests/fixtures/tenders_releases_base.json"
    flatten(
        input_name=input_name,
        output_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_format=output_format,
        schema="flattentool/tests/fixtures/release-schema.json",
        root_list_path="releases",
        main_sheet_name="releases",
    )
    unflatten(
        input_name=tmpdir.join("flattened").strpath + "." + output_format,
        output_name=tmpdir.join("roundtrip.json").strpath,
        input_format=output_format,
        base_json=base_name,
        schema="flattentool/tests/fixtures/release-schema.json",
        root_list_path="releases",
    )
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join("roundtrip.json"))

    # Not currently possible to roundtrip Nones
    # https://github.com/open-contracting/flattening-ocds/issues/35
    for release in roundtripped_json["releases"]:
        release["tender"]["awardCriteriaDetails"] = None

    assert original_json == roundtripped_json
Пример #24
0
def test_commands_single_sheet_default(tmpdir):

    unflatten(
        'flattentool/tests/fixtures/xlsx/commands_defaulted.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('command_single_unflattened.json').strpath,
        cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
        heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
        default_configuration="SkipRows 1, headerrows 2",
        )

    unflattened = json.load(tmpdir.join('command_single_unflattened.json'))

    assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}


    unflatten(
        'flattentool/tests/fixtures/xlsx/commands_defaulted.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('command_single_unflattened.json').strpath,
        cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
        heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
        default_configuration="SkipRows 1",
        )

    unflattened = json.load(tmpdir.join('command_single_unflattened.json'))

    assert unflattened == {'main': [{'actual': 'other', 'headings': 'headings', 'some': 'some'}, {'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
Пример #25
0
def test_commands_hashcomments(tmpdir, input_format):

    unflatten(
        "flattentool/tests/fixtures/{}/commands_hashcomments.{}".format(
            input_format, input_format
        ),
        input_format=input_format,
        output_name=tmpdir.join("commands_hashcomments_unflattened.json").strpath,
        cell_source_map=tmpdir.join("commands_hashcomments_source_map.json").strpath,
        heading_source_map=tmpdir.join(
            "commands_hashcomments_heading_source_map.json"
        ).strpath,
        metatab_name="Meta",
        metatab_vertical_orientation=True,
    )

    unflattened = json.load(tmpdir.join("commands_hashcomments_unflattened.json"))

    assert unflattened == {
        "main": [
            {"actual": "actual", "headings": "data", "some": "some"},
            {"actual": "actual", "headings": "Other data", "some": "some"},
        ],
        "some": "data",
    }
Пример #26
0
def test_commands_default_override(tmpdir, input_format):

    unflatten(
        "flattentool/tests/fixtures/{}/commands_in_metatab_defaulted.{}".format(
            input_format, input_format
        ),
        input_format=input_format,
        output_name=tmpdir.join("command_metatab_unflattened.json").strpath,
        cell_source_map=tmpdir.join("command_metatab_source_map.json").strpath,
        heading_source_map=tmpdir.join(
            "command_metatab_heading_source_map.json"
        ).strpath,
        metatab_name="Meta",
        metatab_vertical_orientation=True,
        default_configuration="headerrows 2",
    )

    unflattened = json.load(tmpdir.join("command_metatab_unflattened.json"))

    # In this case want both 'headerrows 2' and 'skiprows 1' (which is defined in the metatab) to be used,
    # as we only override individual commands not all of them,
    # So the results in this case will be the same as if using commands_in_metatab.xlsx (where all commands are in metatab).

    assert unflattened == {
        "main": [
            {"actual": "actual", "headings": "data", "some": "some"},
            {"actual": "actual", "headings": "Other data", "some": "some"},
        ],
        "some": "data",
    }
Пример #27
0
def test_360_main_sheetname_insensitive(tmpdir):
    input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_grants.xlsx'
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join('output_grant.json').strpath,
        input_format='xlsx',
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        convert_titles=True)
    output_json_grants = json.load(tmpdir.join('output_grant.json'))

    input_name = 'flattentool/tests/fixtures/xlsx/WellcomeTrust-grants_2_Grants.xlsx'
    unflatten(
        input_name=input_name,
        output_name=tmpdir.join('output_Grant.json').strpath,
        input_format='xlsx',
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        main_sheet_name='grants',
        root_list_path='grants',
        root_id='',
        convert_titles=True)
    output_json_Grants = json.load(tmpdir.join('output_Grant.json'))

    assert output_json_grants == output_json_Grants
Пример #28
0
def test_roundtrip_360_rollup(tmpdir, use_titles):
    input_name = (
        "flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json"
    )
    output_format = "csv"
    output_name = tmpdir.join("flattened").strpath + "." + output_format
    moved_name = tmpdir.mkdir("flattened_main_only").strpath

    flatten(
        input_name=input_name,
        output_name=output_name,
        output_format=output_format,
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        root_list_path="grants",
        root_id="",
        use_titles=use_titles,
        rollup=True,
        main_sheet_name="grants",
    )

    os.rename(output_name + "/grants.csv", moved_name + "/grants.csv")

    unflatten(
        input_name=moved_name,
        output_name=tmpdir.join("roundtrip.json").strpath,
        input_format=output_format,
        schema="flattentool/tests/fixtures/360-giving-schema.json",
        root_list_path="grants",
        root_id="",
        convert_titles=use_titles,
    )

    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join("roundtrip.json"))
    assert original_json == roundtripped_json
Пример #29
0
def test_unflatten_xslx_unicode(tmpdir):
    unflatten(
        'flattentool/tests/fixtures/xlsx/unicode.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    reloaded_json = json.load(tmpdir.join('release.json'))
    assert reloaded_json == {'main': [{'ocid': 1 if sys.version > '3' else '1', 'id': 'éαГ😼𝒞人'}]}
Пример #30
0
def test_unflatten_xslx_unicode(tmpdir):
    unflatten(
        'flattentool/tests/fixtures/xlsx/unicode.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    reloaded_json = json.load(tmpdir.join('release.json'))
    assert reloaded_json == {'main': [{'ocid': 1 if sys.version > '3' else '1', 'id': 'éαГ😼𝒞人'}]}
Пример #31
0
def test_unflatten_unicode(tmpdir, input_format):
    unflatten(
        "flattentool/tests/fixtures/{}/unicode.{}".format(input_format, input_format),
        input_format=input_format,
        output_name=tmpdir.join("release.json").strpath,
        main_sheet_name="main",
    )
    reloaded_json = json.load(tmpdir.join("release.json"))
    assert reloaded_json == {"main": [{"ocid": 1, "id": "éαГ😼𝒞人"}]}
Пример #32
0
def test_unflatten_org_xml(tmpdir):
    unflatten(input_name='flattentool/tests/fixtures/xlsx/iati-org.xlsx',
              output_name=tmpdir.join('output.xml').strpath,
              input_format='xlsx',
              id_name='organisation-identifier',
              xml=True,
              metatab_name='Meta')
    assert open('flattentool/tests/fixtures/iati-org.xml').read(
    ) == tmpdir.join('output.xml').read()
Пример #33
0
def test_unflatten_csv_latin1(tmpdir):
    input_dir = tmpdir.ensure('release_input', dir=True)
    input_dir.join('main.csv').write_text('ocid,id\n1,é\n', encoding='latin1')
    unflatten(input_dir.strpath,
              input_format='csv',
              encoding='latin1',
              output_name=tmpdir.join('release.json').strpath,
              main_sheet_name='main')
    reloaded_json = json.load(tmpdir.join('release.json'))
    assert reloaded_json == {'main': [{'ocid': '1', 'id': 'é'}]}
Пример #34
0
def test_commands_single_sheet_csv(tmpdir):
    unflatten(
        'flattentool/tests/fixtures/csv/commands_in_file',
        input_format='csv',
        output_name=tmpdir.join('command_single_unflattened.json').strpath,
        cell_source_map=tmpdir.join('command_single_source_map.json').strpath,
        heading_source_map=tmpdir.join('command_single_heading_source_map.json').strpath,
        )
    unflattened = json.load(tmpdir.join('command_single_unflattened.json'))
    assert unflattened == {'main': [{'actual': 'actual', 'headings': 'data', 'some': 'some'}]}
Пример #35
0
    def process_item(self, item, spider):
        if not spider.unflatten or not isinstance(item, (File, FileItem)):
            return item

        input_name = item["file_name"]
        if input_name.endswith(".csv"):
            item["file_name"] = item["file_name"][:-4] + ".json"
            input_format = "csv"
        elif input_name.endswith(".xlsx"):
            item["file_name"] = item["file_name"][:-5] + ".json"
            input_format = "xlsx"
        else:
            raise NotImplementedError(
                f"the file '{input_name}' has no extension or is not CSV or XLSX, "
                f"obtained from: {item['url']}"
            )

        spider_ocds_version = spider.ocds_version.replace(".", "__")
        for tag in reversed(get_tags()):
            if tag.startswith(spider_ocds_version):
                schema = get_release_schema_url(tag)
                break
        else:
            raise NotImplementedError(f"no schema found for '{spider_ocds_version}'")

        with tempfile.TemporaryDirectory() as directory:
            input_path = os.path.join(directory, input_name)
            output_name = os.path.join(directory, item["file_name"])
            if input_format == "csv":
                input_name = directory
            elif input_format == "xlsx":
                input_name = input_path

            with open(input_path, "wb") as f:
                f.write(item["data"])

            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore"
                )  # flattentool uses UserWarning, so we can't set a specific category

                unflatten(
                    input_name,
                    root_list_path="releases",
                    root_id="ocid",
                    schema=schema,
                    input_format=input_format,
                    output_name=output_name,
                    **spider.unflatten_args,
                )

            with open(output_name, "r") as f:
                item["data"] = f.read()

        return item
Пример #36
0
def convert_spreadsheet(request, data, file_type):
    context = {}
    converted_path = os.path.join(data.upload_dir(), 'unflattened.json')
    encoding = 'utf-8'
    if file_type == 'csv':
        # flatten-tool expects a directory full of CSVs with file names
        # matching what xlsx titles would be.
        # If only one upload file is specified, we rename it and move into
        # a new directory, such that it fits this pattern.
        input_name = os.path.join(data.upload_dir(), 'csv_dir')
        os.makedirs(input_name, exist_ok=True)
        destination = os.path.join(input_name, request.cove_config['main_sheet_name'] + '.csv')
        shutil.copy(data.original_file.file.name, destination)
        try:
            with open(destination, encoding='utf-8') as main_sheet_file:
                main_sheet_file.read()
        except UnicodeDecodeError:
            try:
                with open(destination, encoding='cp1252') as main_sheet_file:
                    main_sheet_file.read()
                encoding = 'cp1252'
            except UnicodeDecodeError:
                encoding = 'latin_1'
    else:
        input_name = data.original_file.file.name
    try:
        if not os.path.exists(converted_path):
            flattentool.unflatten(
                input_name,
                output_name=converted_path,
                input_format=file_type,
                main_sheet_name=request.cove_config['main_sheet_name'],
                root_id=request.cove_config['root_id'],
                schema=request.cove_config['item_schema_url'],
                convert_titles=True,
                encoding=encoding
            )
        context['converted_file_size'] = os.path.getsize(converted_path)
    except Exception as err:
        logger.exception(err, extra={
            'request': request,
            })
        raise CoveInputDataError({
            'sub_title': _("Sorry we can't process that data"),
            'link': 'cove:index',
            'link_text': _('Try Again'),
            'msg': _('We think you tried to supply a spreadsheet, but we failed to convert it to JSON.\n\nError message: {}'.format(repr(err)))
        })

    context.update({
        'conversion': 'unflatten',
        'converted_path': converted_path,
        'converted_url': '{}/unflattened.json'.format(data.upload_url())
    })
    return context
Пример #37
0
def test_metatab(tmpdir):
    tmpdir.join('metatab_schema.json').write(
        '{"properties": {}}' 
    )

    unflatten(
        'flattentool/tests/fixtures/xlsx/basic_meta.xlsx',
        input_format='xlsx',
        output_name=tmpdir.join('meta_unflattened.json').strpath,
        metatab_name='Meta',
        metatab_vertical_orientation=True,
        metatab_schema = tmpdir.join('metatab_schema.json').strpath,
        cell_source_map=tmpdir.join('meta_cell_source_map.json').strpath,
        heading_source_map=tmpdir.join('meta_heading_source_map.json').strpath,
        )

    metatab_json = json.load(tmpdir.join('meta_unflattened.json'))

    assert metatab_json == {'a': 'a1',
                             'b': 'b1',
                             'c': 'c1',
                             'main': [{'colA': 'cell1', 'colB': 'cell2'},
                                      {'colA': 'cell3', 'colB': 'cell4'},
                                      {'colC': 'cell5', 'colD': 'cell6'},
                                      {'colC': 'cell7', 'colD': 'cell8'}]}


    cell_source_map = json.load(tmpdir.join('meta_cell_source_map.json'))

    assert cell_source_map ==  {'': [['Meta', 2]],
                                'a': [['Meta', '1', 2, 'a']],
                                'b': [['Meta', '2', 2, 'b']],
                                'c': [['Meta', '3', 2, 'c']],
                                'main/0': [['main', 2]],
                                'main/0/colA': [['main', 'A', 2, 'colA']],
                                'main/0/colB': [['main', 'B', 2, 'colB']],
                                'main/1': [['main', 3]],
                                'main/1/colA': [['main', 'A', 3, 'colA']],
                                'main/1/colB': [['main', 'B', 3, 'colB']],
                                'main/2': [['subsheet', 2]],
                                'main/2/colC': [['subsheet', 'A', 2, 'colC']],
                                'main/2/colD': [['subsheet', 'B', 2, 'colD']],
                                'main/3': [['subsheet', 3]],
                                'main/3/colC': [['subsheet', 'A', 3, 'colC']],
                                'main/3/colD': [['subsheet', 'B', 3, 'colD']]}

    heading_source_map = json.load(tmpdir.join('meta_heading_source_map.json'))

    assert heading_source_map == {'a': [['Meta', 'a']],
                                  'b': [['Meta', 'b']],
                                  'c': [['Meta', 'c']],
                                  'main/colA': [['main', 'colA']],
                                  'main/colB': [['main', 'colB']],
                                  'main/colC': [['subsheet', 'colC']],
                                  'main/colD': [['subsheet', 'colD']]}
Пример #38
0
def test_unflatten_empty(tmpdir):
    input_dir = tmpdir.ensure('release_input', dir=True)
    input_dir.join('main.csv').write_text(
        'ocid,id\n,\n,\n,',
        encoding='utf8'
    )
    unflatten(
        input_dir.strpath,
        input_format='csv',
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{
        "main": []
    }''')
Пример #39
0
def test_unflatten_csv_latin1(tmpdir):
    input_dir = tmpdir.ensure('release_input', dir=True)
    input_dir.join('main.csv').write_text(
        'ocid,id\n1,é\n',
        encoding='latin1'
    )
    unflatten(
        input_dir.strpath,
        input_format='csv',
        encoding='latin1',
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    reloaded_json = json.load(tmpdir.join('release.json'))
    assert reloaded_json == {'main': [{'ocid': '1', 'id': 'é'}]}
Пример #40
0
def test_unflatten_csv_utf8(tmpdir):
    input_dir = tmpdir.ensure('release_input', dir=True)
    input_dir.join('main.csv').write_text(
        'ocid,id\n1,éαГ😼𝒞人\n',
        encoding='utf8'
    )
    unflatten(
        input_dir.strpath,
        input_format='csv',
        # Should default to utf8
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    reloaded_json = json.load(tmpdir.join('release.json'))
    assert reloaded_json == {'main': [{'ocid': '1', 'id': 'éαГ😼𝒞人'}]}
    # The JSON we output should be UTF-8, rather than escaped ASCII
    # https://github.com/OpenDataServices/flatten-tool/issues/71
    assert 'éαГ😼𝒞人' in tmpdir.join('release.json').read_text(encoding='utf-8')
def test_roundtrip_360(tmpdir, output_format, use_titles):
    input_name = 'flattentool/tests/fixtures/fundingproviders-grants_fixed_2_grants.json'
    flatten(
        input_name=input_name,
        output_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        root_list_path='grants',
        root_id='',
        use_titles=use_titles,
        main_sheet_name='grants')
    unflatten(
        input_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_name=tmpdir.join('roundtrip.json').strpath,
        input_format=output_format,
        schema='flattentool/tests/fixtures/360-giving-schema.json',
        root_list_path='grants',
        root_id='',
        convert_titles=use_titles)
    original_json = json.load(open(input_name))
    roundtripped_json = json.load(tmpdir.join('roundtrip.json'))

    assert original_json == roundtripped_json
def test_roundtrip_xml(tmpdir, output_format):
    input_name = 'examples/iati/expected.xml'
    flatten(
        input_name=input_name,
        output_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_format=output_format,
        root_list_path='iati-activity',
        id_name='iati-identifier',
        xml=True)
    unflatten(
        input_name=tmpdir.join('flattened').strpath+'.'+output_format,
        output_name=tmpdir.join('roundtrip.xml').strpath,
        input_format=output_format,
        root_list_path='iati-activity',
        id_name='iati-identifier',
        xml=True)
    original_xml = open(input_name, 'rb')
    roundtripped_xml = tmpdir.join('roundtrip.xml').open('rb')

    # Compare without ordering, by using dict_constructor=dict instead of
    # OrderedDict
    original = xmltodict.parse(original_xml, dict_constructor=dict)
    roundtripped = xmltodict.parse(roundtripped_xml, dict_constructor=dict)
    assert original == roundtripped
Пример #43
0
def test_unflatten(tmpdir):
    """
    Perform a full CSV unflattening, and check the output is what we expect.

    Notable things we are checking for:
        Ordering is preseved - both the order of columns and rows
        On an id column haeder, the information following a colon is the key for the array.
        If this is not provided, the sheet name is used.
    """
    input_dir = tmpdir.ensure('release_input', dir=True)
    input_dir.join('main.csv').write(
        'ocid,id,testA,test/id,test/C\n'
        '1,2,3,4,5\n'
        '1,2a,3a,4a,5a\n'
        '6,7,8,9,10\n'
        '6,7a,8a,9a,10a\n'
    )
    input_dir.join('subsheet.csv').write(
        'ocid,main/id:sub,main/test/id,id,testD,test2/E,test2/F\n'
        '1,2,,S1,11,12,13\n'
        '1,2a,,S1,14,15,16\n'
        '1,2,,S2,17,18,19\n'
        '6,7,,S1,20,21,22\n'
        '1,2,4,S3,24,25,26\n'
    )
    input_dir.join('subsubsheet.csv').write(
        'ocid,main/id,main/sub[]/id:subsub,testG\n'
        '1,2,S1,23\n'
    )
    unflatten(
        input_dir.strpath,
        input_format='csv',
        output_name=tmpdir.join('release.json').strpath,
        main_sheet_name='main')
    assert lines_strip_whitespace(tmpdir.join('release.json').read()) == lines_strip_whitespace('''{
    "main": [
        {
            "ocid": "1",
            "id": "2",
            "testA": "3",
            "test": {
                "id": "4",
                "C": "5",
                "subsheet": [
                    {
                        "id": "S3",
                        "testD": "24",
                        "test2": {
                            "E": "25",
                            "F": "26"
                        }
                    }
                ]
            },
            "sub": [
                {
                    "id": "S1",
                    "testD": "11",
                    "test2": {
                        "E": "12",
                        "F": "13"
                    },
                    "subsub": [
                        {
                            "testG": "23"
                        }
                    ]
                },
                {
                    "id": "S2",
                    "testD": "17",
                    "test2": {
                        "E": "18",
                        "F": "19"
                    }
                }
            ]
        },
        {
            "ocid": "1",
            "id": "2a",
            "testA": "3a",
            "test": {
                "id": "4a",
                "C": "5a"
            },
            "sub": [
                {
                    "id": "S1",
                    "testD": "14",
                    "test2": {
                        "E": "15",
                        "F": "16"
                    }
                }
            ]
        },
        {
            "ocid": "6",
            "id": "7",
            "testA": "8",
            "test": {
                "id": "9",
                "C": "10"
            },
            "sub": [
                {
                    "id": "S1",
                    "testD": "20",
                    "test2": {
                        "E": "21",
                        "F": "22"
                    }
                }
            ]
        },
        {
            "ocid": "6",
            "id": "7a",
            "testA": "8a",
            "test": {
                "id": "9a",
                "C": "10a"
            }
        }
    ]
}''')