def schema(self) -> Dict[str, Any]: validate.is_in_dict_keys('connection', self.target_config) validate.is_in_dict_keys('schema', self.target_config.get('connection')) src = JSONSource( uri=self.target_config.get('connection').get('schema')).load() return src.data
def __init__(self, task: Task, order: Optional[int]) -> None: import numbers validate.is_in_dict_keys('expression', task.operator) validate.is_in_dict_keys('other', task.operator) validate.is_instance_of(task.operator['other'], numbers.Number) validate.is_in_list(task.operator['expression'], ['lt', 'le', 'eq', 'ne', 'ge', 'gt']) super().__init__(TransformationType.FILTER, task, order)
def _load(self, uri: str = None, *args, df: bool = False, **kwargs) -> Source: validate.is_in_dict_keys('filename', self.options) validate.is_in_dict_keys('resolver', self.options) import requests filename = self.options.get('filename') self.logger.info(f"HTTPSource: Downloading file {filename}") req = requests.get(self.uri) file = open(filename, 'wb') for chunk in req.iter_content(100000): file.write(chunk) file.close() options = dict(uri=filename, type=self.options.get('resolver')) return SourceFactory.load(options).load()
def __init__(self, catalog: Dict[str, Any]) -> None: validate.is_in_dict_keys('namespace', catalog) validate.is_in_dict_keys('version', catalog) validate.is_in_dict_keys('sources', catalog) self.namespace: str = catalog.get('namespace') self.version: str = catalog.get('version') self.sources: dict = catalog.get('sources')
def __retrieve_fields(source: Dict) -> Dict[str, Field]: validate.is_in_dict_keys('fields', source) fields = source.get('fields') res = dict() for field in fields: name = JsonCatalogProvider.__get_key_or_die(field, 'name') data_type = JsonCatalogProvider.__get_key_or_die( field, 'data_type' ) default = field.get('default', None) alias = field.get('alias', None) transformations = JsonCatalogProvider.__retrieve_transformations( field ) res[name] = Field( name=name, data_type=data_type if data_type else None, default=default, alias=alias, transformations=transformations ) return res
def parser(self) -> str: validate.is_in_dict_keys('name', self.parser_config) return self.parser_config.get('name')
def field(self, name: str) -> Union[Field, None]: validate.is_in_dict_keys(name, self.fields) return self.fields.get(name, None)
def load(cls, config: Dict[str, Any]) -> Source: validate.is_in_dict_keys('type', config) validate.is_in_dict_keys('uri', config) src = SourceFactory.__find(config.get('type')) return src(uri=config.get('uri'), options=config.get('options', {}))
def __find(name: str) -> Type[Source]: validate.is_in_dict_keys(name, SourceFactory.__MAPPING) return SourceFactory.__MAPPING[name]
def __init__(self, task: Task, order: Optional[int]) -> None: validate.is_in_dict_keys('old', task.operator) validate.is_in_dict_keys('new', task.operator) super().__init__(TransformationType.RULE, task, order)
def load(name: str) -> Type[Transformation]: validate.is_in_dict_keys(name, TransformationFactory.__MAPPING) return TransformationFactory.__MAPPING[name]
def load(name: str) -> Type[Parser]: validate.is_in_dict_keys(name, ParserFactory.__MAPPING) return ParserFactory.__MAPPING[name]