Exemplo n.º 1
0
    def __init__(
        self,
        config: Union[Dict[str, Any], Path],
        state: Union[None, Dict[str, Any], Path] = None,
        catalog: Union[None, Dict[str, Any], Catalog, Path] = None,
        discover: bool = False,
        **kwargs,
    ):
        self.catalog_path = self.state_path = self.config_path = None

        if isinstance(catalog, Path):
            self.catalog_path = str(catalog)
            catalog = Catalog.load(catalog)
        elif isinstance(catalog, dict):
            catalog = Catalog.from_dict(catalog)

        if isinstance(config, Path):
            self.config_path = str(config)
            config = load_json(config)
        if isinstance(state, Path):
            self.state_path = state
            state = load_json(state)

        self.config = config
        self.state = state
        self.catalog = catalog
        self.discover = discover

        for name, val in kwargs.items():
            setattr(self, name, val)
Exemplo n.º 2
0
def test_write_schema(mock_write_schema):
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor

    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()
    processor = TapProcessor(catalog=catalog,
                             stream_name="loan_accounts",
                             client=client_mock,
                             config=config_json,
                             state={'currently_syncing': 'loan_accounts'},
                             sub_type="self",
                             generators=[GeneratorMock([])])
    processor.write_schema()

    schema = None
    stream_key_properties = None
    with open(f"{FIXTURES_PATH}/processor_catalog.json", "r") as fd:
        schema_json = json.loads(fd.read())
        for stream in schema_json["streams"]:
            if stream["tap_stream_id"] == "loan_accounts":
                schema = stream["schema"]
                stream_key_properties = stream["key_properties"]
                break
    assert schema is not None
    mock_write_schema.assert_called_with("loan_accounts", schema,
                                         stream_key_properties)
Exemplo n.º 3
0
def parse_args():
    ''' This is to replace singer's default singer_utils.parse_args()
    https://github.com/singer-io/singer-python/blob/master/singer/utils.py

    Parse standard command-line args.
    Parses the command-line arguments mentioned in the SPEC and the
    BEST_PRACTICES documents:
    -c,--config     Config file
    -s,--state      State file
    -d,--discover   Run in discover mode
    --catalog       Catalog file
    Returns the parsed args object from argparse. For each argument that
    point to JSON files (config, state, properties), we will automatically
    load and parse the JSON file.
    '''
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '-c', '--config',
        help='Config file',
        required=True)

    parser.add_argument(
        '-s', '--state',
        help='State file')

    parser.add_argument(
        '-p', '--properties',
        help='Property selections: DEPRECATED, Please use --catalog instead')

    parser.add_argument(
        '--catalog',
        help='Catalog file')

    parser.add_argument(
        '-d', '--discover',
        action='store_true',
        help='Do schema discovery')

    # Capture additional args
    parser.add_argument(
        "--start_datetime", type=str,
        help="Inclusive start date time in ISO8601-Date-String format: 2019-04-11T00:00:00Z")
    parser.add_argument(
        "--end_datetime", type=str,
        help="Exclusive end date time in ISO8601-Date-String format: 2019-04-12T00:00:00Z")

    args = parser.parse_args()
    if args.config:
        args.config = singer_utils.load_json(args.config)
    if args.state:
        args.state = singer_utils.load_json(args.state)
    else:
        args.state = {}
    if args.properties:
        args.properties = singer_utils.load_json(args.properties)
    if args.catalog:
        args.catalog = Catalog.load(args.catalog)

    return args
Exemplo n.º 4
0
def test_get_selected_streams():
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.helpers import get_selected_streams
    catalog = Catalog.load(f"{FIXTURES_PATH}/catalog.json")
    selected_streams = get_selected_streams(catalog)
    expected_streams = ["loan_accounts", "loan_repayments", "audit_trail"]
    assert len(selected_streams) == len(expected_streams) and set(
        selected_streams) == set(expected_streams)
Exemplo n.º 5
0
def test_write_exceptions(mock_write_schema, mock_write_record):
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor
    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")

    mock_write_record.side_effect = [None, OSError("Mock Record Exception")]
    mock_write_schema.side_effect = [None, OSError("Mock Schema Exception")]

    client_mock = MagicMock()
    processor = TapProcessor(catalog=catalog,
                             stream_name="loan_accounts",
                             client=client_mock,
                             config=config_json,
                             state={'currently_syncing': 'loan_accounts'},
                             sub_type="self",
                             generators=[
                                 GeneratorMock([{
                                     "id":
                                     "1",
                                     "last_modified_date":
                                     "2022-01-01T00:00:00+03:00"
                                 }, {
                                     "id":
                                     "2",
                                     "last_modified_date":
                                     "2022-01-01T01:00:00+03:00"
                                 }, {
                                     "id":
                                     "3",
                                     "last_modified_date":
                                     "2022-01-01T02:00:00+03:00"
                                 }])
                             ])
    processor.endpoint_deduplication_key = "id"

    with pytest.raises(OSError) as err:
        processor.process_streams_from_generators()
    assert err.value.args[0] == "Mock Record Exception"

    mock_write_record.assert_has_calls([
        call("loan_accounts",
             IsInstanceMatcher(dict),
             time_extracted=IsInstanceMatcher(datetime.datetime)),
        call("loan_accounts",
             IsInstanceMatcher(dict),
             time_extracted=IsInstanceMatcher(datetime.datetime))
    ])

    with pytest.raises(OSError) as err:
        processor.process_streams_from_generators()
    assert err.value.args[0] == "Mock Schema Exception"

    mock_write_schema.assert_has_calls([
        call("loan_accounts", IsInstanceMatcher(dict),
             IsInstanceMatcher(list)),
        call("loan_accounts", IsInstanceMatcher(dict), IsInstanceMatcher(list))
    ])
Exemplo n.º 6
0
def parse_args(required_config_keys):
    # fork function to be able to grab path of state file
    '''Parse standard command-line args.

    Parses the command-line arguments mentioned in the SPEC and the
    BEST_PRACTICES documents:

    -c,--config     Config file
    -s,--state      State file
    -d,--discover   Run in discover mode
    -p,--properties Properties file: DEPRECATED, please use --catalog instead
    --catalog       Catalog file

    Returns the parsed args object from argparse. For each argument that
    point to JSON files (config, state, properties), we will automatically
    load and parse the JSON file.
    '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-c', '--config', help='Config file', required=True)

    parser.add_argument('-s', '--state', help='State file')

    parser.add_argument(
        '-p',
        '--properties',
        help='Property selections: DEPRECATED, Please use --catalog instead')

    parser.add_argument('--catalog', help='Catalog file')

    parser.add_argument('-d',
                        '--discover',
                        action='store_true',
                        help='Do schema discovery')

    args = parser.parse_args()
    if args.config:
        setattr(args, 'config_path', args.config)
        args.config = utils.load_json(args.config)
    if args.state:
        setattr(args, 'state_path', args.state)
        args.state_file = args.state
        args.state = utils.load_json(args.state)
    else:
        args.state_file = None
        args.state = {}
    if args.properties:
        setattr(args, 'properties_path', args.properties)
        args.properties = utils.load_json(args.properties)
    if args.catalog:
        setattr(args, 'catalog_path', args.catalog)
        args.catalog = Catalog.load(args.catalog)

    utils.check_config(args.config, required_config_keys)

    return args
Exemplo n.º 7
0
def _prep_config():
    cwd, _ = os.path.split(__file__)
    usgs_dir = os.path.join(cwd, "../examples/usgs")
    config = utils.load_json(os.path.join(usgs_dir, "config/tap_config.json"))
    config["schema_dir"] = os.path.join(usgs_dir, "schema")
    config["catalog_dir"] = os.path.join(usgs_dir, "catalog")
    catalog = Catalog.load(os.path.join(usgs_dir, config["catalog_dir"],
                                        "earthquakes.json"))
    config["start_datetime"] = (datetime.datetime.now() -
                                datetime.timedelta(hours=1)).isoformat()
    streams = {}
    streams["earthquakes"] = Stream("earthquakes", config)
    return config, catalog, streams
Exemplo n.º 8
0
def test_loan_accounts_processor():
    from singer.catalog import Catalog
    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()

    from tap_mambu.tap_mambu_refactor.tap_processors.loan_accounts_processor import LoanAccountsProcessor
    processor = LoanAccountsProcessor(
        catalog=catalog,
        stream_name="loan_accounts",
        client=client_mock,
        config=config_json,
        state={'currently_syncing': 'loan_accounts'},
        sub_type="self",
        generators=[GeneratorMock([])])

    assert processor.endpoint_deduplication_key == "id"
    assert processor.endpoint_child_streams == ["loan_repayments"]
Exemplo n.º 9
0
def test_loan_repayments_processor_endpoint_config_init():
    from singer.catalog import Catalog
    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()

    from tap_mambu.tap_mambu_refactor.tap_processors.loan_repayments_processor import LoanRepaymentsProcessor
    processor = LoanRepaymentsProcessor(
        catalog=catalog,
        stream_name="loan_repayments",
        client=client_mock,
        config=config_json,
        state={'currently_syncing': 'loan_repayments'},
        sub_type="self",
        generators=[GeneratorMock([])],
        parent_id='TEST')

    assert processor.endpoint_parent == 'loan_accounts'
    assert processor.endpoint_parent_id == 'TEST'
Exemplo n.º 10
0
def test_deposit_cards_processor_endpoint_config_init():
    from singer.catalog import Catalog
    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()

    from tap_mambu.tap_mambu_refactor.tap_processors.deposit_cards_processor import DepositCardsProcessor
    processor = DepositCardsProcessor(catalog=catalog,
                                      stream_name="cards",
                                      client=client_mock,
                                      config=config_json,
                                      state={'currently_syncing': 'cards'},
                                      sub_type="self",
                                      generators=[GeneratorMock([])],
                                      parent_id='TEST')

    assert processor.endpoint_deduplication_key == 'reference_token'
    assert processor.endpoint_id_field == 'reference_token'
    assert processor.endpoint_parent == 'deposit'
    assert processor.endpoint_parent_id == 'TEST'
Exemplo n.º 11
0
def parse_args(required_config_keys):
    """Parse standard command-line args.
    -c,--config     Config file
    -s,--state      State file
    -d,--discover   Run in discover mode
    -a,--select_all Select all streams and fields for discover mode
    -p,--properties Properties file: DEPRECATED, please use --catalog instead
    --catalog       Catalog file
    Returns the parsed args object from argparse. For each argument that
    point to JSON files (config, state, properties), we will automatically
    load and parse the JSON file.
    """
    parser = argparse.ArgumentParser()

    parser.add_argument('--config', '-c', help='Config file', required=True)
    parser.add_argument('--state', '-s', help='State file')
    parser.add_argument('--catalog', help='Catalog file')
    parser.add_argument('--discover', '-d', action='store_true', help='Do schema discovery')
    parser.add_argument('--select_all', '-a', action='store_true',
                        help='Select all streams and fields in discover mode')
    parser.add_argument('-p', '--properties', help='Property selections: DEPRECATED, Please use --catalog instead')

    args = parser.parse_args()
    if args.config:
        setattr(args, 'config_path', args.config)
        args.config = load_json(args.config)
    if args.state:
        setattr(args, 'state_path', args.state)
        args.state = load_json(args.state)
    else:
        args.state = {}
    if args.catalog:
        setattr(args, 'catalog_path', args.catalog)
        args.catalog = Catalog.load(args.catalog)
    elif args.properties:
        args.properties = load_json(args.properties)
    if args.select_all and not args.discover:
        parser.error('Select all only available for discovery mode')

    check_config(args.config, required_config_keys)
    return args
Exemplo n.º 12
0
def test_bookmarks(mock_write_state):
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor

    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")
    client_mock = MagicMock()
    processor = TapProcessor(catalog=catalog,
                             stream_name="loan_accounts",
                             client=client_mock,
                             config=config_json,
                             state={'currently_syncing': 'loan_accounts'},
                             sub_type="self",
                             generators=[GeneratorMock([])])

    processor.write_bookmark()

    expected_state = {
        'currently_syncing': 'loan_accounts',
        'bookmarks': {
            'loan_accounts': '2021-06-01T00:00:00Z'
        }
    }
    mock_write_state.assert_called_once_with(expected_state)
Exemplo n.º 13
0
def catalog(shared_datadir):
    return Catalog.load(shared_datadir / "test.catalog.json")
Exemplo n.º 14
0
def parse_args(spec_file, required_config_keys):
    ''' This is to replace singer's default utils.parse_args()
    https://github.com/singer-io/singer-python/blob/master/singer/utils.py

    Parse standard command-line args.
    Parses the command-line arguments mentioned in the SPEC and the
    BEST_PRACTICES documents:
    -c,--config     Config file
    -s,--state      State file
    -d,--discover   Run in discover mode
    --catalog       Catalog file
    Returns the parsed args object from argparse. For each argument that
    point to JSON files (config, state, properties), we will automatically
    load and parse the JSON file.
    '''
    # Read default spec file
    default_spec = {}
    default_spec_file = get_abs_path("default_spec.json")
    with open(default_spec_file, "r") as f:
        default_spec.update(json.load(f))

    # Read spec file
    with open(spec_file, "r") as f:
        SPEC.update(json.load(f))

    # TODO: What about the fields other than arg
    for a in default_spec["args"]:
        if SPEC["args"].get(a) is None:
            SPEC["args"][a] = default_spec["args"][a]

    parser = argparse.ArgumentParser(SPEC["application"])
    parser.add_argument("spec_file", type=str, help="Specification file")

    # Capture additional args
    for arg in SPEC["args"].keys():
        parser.add_argument("--" + arg,
                            type=TYPES[SPEC["args"][arg]["type"]],
                            default=SPEC["args"][arg].get("default"),
                            help=SPEC["args"][arg].get("help"),
                            required=SPEC["args"][arg].get("required", False))

    # Default arguments
    parser.add_argument('-c', '--config', help='Config file', required=True)
    """
    parser.add_argument(
        "--schema_dir",
        type=str,
        help="Path to the schema directory.",
        required=True)
    """

    parser.add_argument('-s', '--state', help='State file')

    parser.add_argument('--catalog', help='Catalog file')

    parser.add_argument('-d',
                        '--discover',
                        action='store_true',
                        help='Do schema discovery')

    parser.add_argument('-i',
                        '--infer_schema',
                        action='store_true',
                        help='Do infer schema')

    parser.add_argument(
        "--url",
        type=str,
        help="REST API endpoint with {params}. Required in config.")

    args = parser.parse_args()
    if args.config:
        args.config = utils.load_json(args.config)
    if args.state:
        args.state = utils.load_json(args.state)
    else:
        args.state = {}
    if args.catalog and os.path.isfile(args.catalog):
        args.catalog = Catalog.load(args.catalog)

    utils.check_config(args.config, required_config_keys)

    return args
Exemplo n.º 15
0
def test_tap_processor_process_child_records(
        mock_sync_endpoint_refactor,
        mock_write_bookmark,
        mock_write_schema,  # Mock write_schema so we don't pollute the output
        mock_get_selected_streams,
        capsys):
    from singer.catalog import Catalog
    from tap_mambu.tap_mambu_refactor.tap_processors.parent_processor import ParentProcessor
    fake_children_record_count = 4
    mock_get_selected_streams.return_value = ["child_1", "child_2"]
    mock_sync_endpoint_refactor.return_value = fake_children_record_count
    catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json")

    generator_data = [{
        "encoded_key": "12345678901234567890123456789012",
        "last_modified_date": "2022-01-01T00:00:00.000000Z",
        "id": "2"
    }, {
        "encoded_key": "12345678901234567890123456789013",
        "last_modified_date": "2022-01-01T00:00:00.000000Z",
        "id": "3"
    }, {
        "encoded_key": "12345678901234567890123456789014",
        "last_modified_date": "2022-01-01T00:00:00.000000Z",
        "id": "4"
    }, {
        "encoded_key": "12345678901234567890123456789015",
        "last_modified_date": "2022-01-01T00:00:00.000000Z",
        "id": "5"
    }]
    generator = GeneratorMock(list(generator_data))
    generator.time_extracted = 0

    client_mock = MagicMock()
    processor = ParentProcessor(catalog=catalog,
                                stream_name="loan_accounts",
                                client=client_mock,
                                config=config_json,
                                state={'currently_syncing': 'loan_accounts'},
                                sub_type="self",
                                generators=[generator])
    processor.endpoint_child_streams = ["child_1", "child_2"]
    actual_records_count = processor.process_streams_from_generators()
    # sync_endpoint_refactor called for every record (len(generator_data)) for every child_stream + once for parent
    assert actual_records_count == \
           len(generator_data) * (fake_children_record_count * len(processor.endpoint_child_streams) + 1), \
        "Record count mismatch when adding child records"

    mock_sync_endpoint_refactor.assert_called_with(
        client=client_mock,
        catalog=processor.catalog,
        state={'currently_syncing': 'loan_accounts'},
        stream_name=processor.endpoint_child_streams[-1],
        sub_type="self",
        config=config_json,
        parent_id="5")

    captured = capsys.readouterr()
    stdout_list = [
        json.loads(line) for line in captured.out.split("\n") if line
    ]
    # noinspection PyTypeChecker
    assert stdout_list == [{
        "type": "RECORD",
        "stream": "loan_accounts",
        "record": record
    } for record in generator_data], "Output should contain mocked records"
Exemplo n.º 16
0
def parse_args(spec_file, required_config_keys):
    ''' This is to replace singer's default utils.parse_args()
    https://github.com/singer-io/singer-python/blob/master/singer/utils.py

    Parse standard command-line args.
    Parses the command-line arguments mentioned in the SPEC and the
    BEST_PRACTICES documents:
    -c,--config     Config file
    -s,--state      State file
    -d,--discover   Run in discover mode
    --catalog       Catalog file
    Returns the parsed args object from argparse. For each argument that
    point to JSON files (config, state, properties), we will automatically
    load and parse the JSON file.
    '''
    # Read default spec file
    default_spec = {}
    default_spec_file = get_abs_path("default_spec.json")
    with open(default_spec_file, "r") as f:
        default_spec.update(json.load(f))

    SPEC.update(default_spec)

    custom_spec = {}
    # Overwrite with the custom spec file
    with open(spec_file, "r") as f:
        custom_spec.update(json.load(f))

    SPEC["application"] = custom_spec.get("application", SPEC["application"])
    if custom_spec.get("args"):
        SPEC["args"].update(custom_spec.get("args"))

    parser = argparse.ArgumentParser(SPEC["application"])

    parser.add_argument("spec_file", type=str, help="Specification file")

    # Capture additional args
    for arg in SPEC["args"].keys():
        parser.add_argument("--" + arg,
                            type=TYPES[SPEC["args"][arg]["type"]],
                            default=SPEC["args"][arg].get("default"),
                            help=SPEC["args"][arg].get("help"),
                            required=SPEC["args"][arg].get("required", False))

    # Default singer arguments, commands, and required args
    parser.add_argument('-c', '--config', help='Config file', required=True)

    parser.add_argument('-s', '--state', help='State file')

    parser.add_argument('--catalog', help='Catalog file')

    # commands
    parser.add_argument('-d',
                        '--discover',
                        action='store_true',
                        help='Do schema discovery')

    parser.add_argument('-i',
                        '--infer_schema',
                        action='store_true',
                        help='Do infer schema')

    parser.add_argument("--offline",
                        "-o",
                        action="store_true",
                        help="Offline test mode")

    args = parser.parse_args()

    if args.config:
        args.config = utils.load_json(args.config)
    if args.state:
        args.state = utils.load_json(args.state)
    else:
        args.state = {}
    if args.catalog and os.path.isfile(args.catalog):
        args.catalog = Catalog.load(args.catalog)

    utils.check_config(args.config, required_config_keys)

    return args