def check_generated_scopes(context: Context, name: str, action: str, *, data: dict = None, spinta_action: str = None): config = context.get('config') token = context.get('auth.token') prefix = config.scope_prefix # Check autogenerated scope name from model and action. action_scope = f'{prefix}{action}' model_scope = name_to_scope('{prefix}{name}_{action}', name, maxlen=config.scope_max_length, params={ 'prefix': prefix, 'action': action, }) token.check_scope({action_scope, model_scope}, operator='OR') # Check if meta fields can be set. spinta_action = spinta_action or action if spinta_action == 'insert' and data and 'id' in data: token.check_scope(f'{prefix}set_meta_fields')
def read_xlsx( context: Context, model: Model, *, source=str, dependency: dict, skip: str = None, limit: int = None, ): path = fetch(context, source) rows = _read_xlsx(str(path)) if skip: if isinstance(skip, dict): value = set(skip['value']) if isinstance(skip['value'], list) else {skip['value']} for row in rows: if len(row) > skip['column'] and row[skip['column']] in skip['value']: break else: context.error(f"Can't find header line: {skip!r}") rows = chain([row], rows) else: rows = islice(rows, skip, None) cols = {i: x.strip() for i, x in enumerate(next(rows, []))} if limit: rows = islice(rows, 0, limit) for row in rows: data = {} for i, value in enumerate(row): if i in cols: data[cols[i]] = value yield data
def _check_extends(context: Context, dataset: Dataset, model: Model): if not model.extends: return if model.extends in dataset.objects: return if model.extends in dataset.manifest.objects['model']: return context.error(f"Can't find model {model.name!r} specified in 'extends'.")
def read_xml(context: Context, prop: Property, *, source=str, value: etree.ElementBase): result = value.xpath(source) if len(result) == 1: return result[0] elif len(result) == 0: return None else: context.error(f"More than one value returned for {source}: {value}")
def fetch(context: Context, url: str, *, text: bool = False) -> Path: cache = context.get('cache') path = cache.get(url) if path is not None: return path requests = context.get('requests') with requests.get(url, stream=True) as r: with cache.set(url, text=text) as f: chunks = r.iter_content(chunk_size=8192, decode_unicode=text) for chunk in filter(None, chunks): f.write(chunk) return cache.get(url)
def changes(context: Context, model: Model, backend: PostgreSQL, *, id=None, offset=None, limit=None): authorize(context, 'changes', model) connection = context.get('transaction').connection table = _get_table(backend, model).changes query = sa.select([table]).order_by(table.c.change_id) query = _changes_id(table, query, id) query = _changes_offset(table, query, offset) query = _changes_limit(query, limit) result = connection.execute(query) for row in result: yield { 'change_id': row[table.c.change_id], 'transaction_id': row[table.c.transaction_id], 'id': row[table.c.id], 'datetime': row[table.c.datetime], 'action': row[table.c.action], 'change': row[table.c.change], }
def get_response_type(context: Context, request: Request, params: dict = None): if params is None and 'path' in request.path_params: path = request.path_params['path'].strip('/') params = parse_url_path(path) elif params is None: params = {} if 'format' in params: return params['format'] if 'accept' in request.headers and request.headers['accept']: formats = { 'text/html': 'html', 'application/xhtml+xml': 'html', } config = context.get('config') for name, exporter in config.exporters.items(): for media_type in exporter.accept_types: formats[media_type] = name media_types, _ = cgi.parse_header(request.headers['accept']) for media_type in media_types.lower().split(','): if media_type in formats: return formats[media_type] return 'json'
def wipe(context: Context, model: Model, backend: Mongo): authorize(context, 'wipe', model) transaction = context.get('transaction') # Delete all data for a given model model_collection = backend.db[model.get_type_value()] return model_collection.delete_many({})
def getall(context: Context, model: Model, backend: PostgreSQL, **kwargs): authorize(context, 'getall', model) connection = context.get('transaction').connection table = backend.tables[model.manifest.name][model.name].main result = connection.execute(sa.select([table])) for row in result: yield prepare(context, 'getall', model, backend, row)
def getall( context: Context, model: Model, backend: PostgreSQL, *, show: typing.List[str] = None, sort: typing.List[typing.Dict[str, str]] = None, offset=None, limit=None, count: bool = False, ): authorize(context, 'getall', model) connection = context.get('transaction').connection table = _get_table(backend, model).main jm = JoinManager(backend, model, table) if count: query = sa.select([sa.func.count()]).select_from(table) result = connection.execute(query) yield {'count': result.scalar()} else: query = sa.select(_getall_show(table, jm, show)) query = _getall_order_by(query, table, jm, sort) query = _getall_offset(query, offset) query = _getall_limit(query, limit) result = connection.execute(query) for row in result: yield _get_data_from_row(model, table, row, show=show)
def wipe(context: Context, model: Model, backend: PostgreSQL): authorize(context, 'wipe', model) connection = context.get('transaction').connection table = _get_table(backend, model) connection.execute(table.changes.delete()) connection.execute(table.main.delete())
def get(context: Context, model: Model, backend: Mongo, id: str): authorize(context, 'getone', model) transaction = context.get('transaction') model_collection = backend.db[model.get_type_value()] row = model_collection.find_one({"_id": ObjectId(id)}) return prepare(context, 'getone', model, backend, row)
def getall(context: Context, model: Model, backend: Mongo, **kwargs): authorize(context, 'getall', model) transaction = context.get('transaction') # Yield all available entries. model_collection = backend.db[model.get_type_value()] for row in model_collection.find({}): yield prepare(context, 'getall', model, backend, row)
def test_set_overwrite(): context = Context() context.set('a', 1) with pytest.raises(Exception) as e: context.set('a', 2) assert str(e.value) == "Context variable 'a' has been already set." assert context.get('a') == 1
def _dependencies(context: Context, model, deps): if deps: command_calls = {} model_names = set() prop_names = [] prop_name_mapping = {} for name, dep in deps.items(): if isinstance(dep, dict): command_calls[name] = dep continue if '.' not in dep: context.error( f"Dependency must be in 'object/name.property' form, got: {dep}." ) model_name, prop_name = dep.split('.', 1) model_names.add(model_name) prop_names.append(prop_name) prop_name_mapping[prop_name] = name if len(model_names) > 1: names = ', '.join(sorted(model_names)) context.error( f"Dependencies are allowed only from single model, but more than one model found: {names}." ) if len(command_calls) > 1: context.error(f"Only one command call is allowed.") if len(command_calls) > 0: if len(model_names) > 0: context.error( f"Only one command call or one model is allowed in dependencies." ) for name, cmd in command_calls.items(): cmd = load(context, Command(), cmd, parent=model, scope='service') for value in cmd(context): yield {name: value} else: model_name = list(model_names)[0] params = parse_url_path(model_name) depmodel = get_model_from_params(model.manifest, params['path'], params) for row in getall(context, depmodel, depmodel.backend, show=prop_names): yield {prop_name_mapping[k]: v for k, v in row.items()} else: yield {}
def wipe(context: Context, model: Model, backend: PostgreSQL): authorize(context, 'wipe', model) connection = context.get('transaction').connection changes = backend.tables[model.manifest.name][model.name].changes connection.execute(changes.delete()) main = backend.tables[model.manifest.name][model.name].main connection.execute(main.delete())
def push(context: Context, model: Model, backend: PostgreSQL, data: dict, *, action: str): authorize(context, action, model, data=data) # load and check if data is a valid for it's model data = load(context, model, data) check(context, model, data) data = prepare(context, model, data) transaction = context.get('transaction') connection = transaction.connection table = backend.tables[model.manifest.name][model.name] data = { k: v for k, v in data.items() if k in table.main.columns } if action == INSERT_ACTION: result = connection.execute( table.main.insert().values(data), ) row_id = result.inserted_primary_key[0] elif action == UPDATE_ACTION: data['id'] = int(data['id']) result = connection.execute( table.main.update(). where(table.main.c.id == data['id']). values(data) ) if result.rowcount == 1: row_id = data['id'] elif result.rowcount == 0: raise Exception("Update failed, {self.obj} with {data['id']} not found.") else: raise Exception("Update failed, {self.obj} with {data['id']} has found and update {result.rowcount} rows.") elif action == DELETE_ACTION: raise NotImplementedError else: raise Exception(f"Unknown action {action!r}.") # Track changes. connection.execute( table.changes.insert().values( transaction_id=transaction.id, id=row_id, datetime=utcnow(), action=action, change={k: v for k, v in data.items() if k not in {'id'}}, ), ) return prepare(context, action, model, backend, {'id': str(row_id)})
def get(context: Context, model: Model, backend: PostgreSQL, id: str): authorize(context, 'getone', model) connection = context.get('transaction').connection table = _get_table(backend, model).main query = (sa.select([table]).where(table.c.id == id)) result = connection.execute(query) result = list(itertools.islice(result, 2)) if len(result) == 1: row = result[0] return _get_data_from_row(model, table, row) elif len(result) == 0: return None else: context.error(f"Multiple rows were found, id={id}.")
def get_auth_token(context: Context) -> Token: scope = None # Scopes will be validated later using Token.check_scope request = context.get('auth.request') config = context.get('config') if config.default_auth_client and 'authorization' not in request.headers: private_key = load_key(context, 'private.json') client = query_client(context, config.default_auth_client) grant_type = 'client_credentials' expires_in = int(datetime.timedelta(days=10).total_seconds()) token = create_access_token(context, private_key, client, grant_type, expires_in, client.scopes) request.headers = request.headers.mutablecopy() request.headers['authorization'] = f'Bearer {token}' resource_protector = context.get('auth.resource_protector') try: token = resource_protector.validate_request(scope, request) except JoseError as e: raise HTTPException(status_code=400, detail=e.error) return token
def query_client(context: Context, client_id: str): config = context.get('config') client_file = config.config_path / 'clients' / f'{client_id}.yml' data = yaml.load(client_file) if not isinstance(data['scopes'], list): raise Exception(f'Client {client_file} scopes must be list of scopes.') client = Client( id=client_id, secret_hash=data['client_secret_hash'], scopes=data['scopes'], ) return client
def load(context: Context, manifest: Manifest, c: Config): config = context.get('config') ignore = c.get('ignore', default=[], cast=list) # Add all supported node types. for name in config.components['nodes'].keys(): manifest.objects[name] = {} for file in manifest.path.glob('**/*.yml'): if is_ignored(ignore, manifest.path, file): continue try: data = yaml.load(file.read_text()) except (ParserError, ScannerError) as e: context.error(f"{file}: {e}.") if not isinstance(data, dict): context.error(f"{file}: expected dict got {data.__class__.__name__}.") if 'type' not in data: raise Exception(f"'type' is not defined in {file}.") if data['type'] not in manifest.objects: raise Exception(f"Unknown type {data['type']!r} in {file}.") node = config.components['nodes'][data['type']]() data = { 'path': file, 'parent': manifest, 'backend': manifest.backend, **data, } load(context, node, data, manifest) if node.name in manifest.objects[node.type]: raise Exception(f"Object {node.type} with name {node.name} already exist.") manifest.objects[node.type][node.name] = node
def push(context: Context, model: Model, backend: Mongo, data: dict, *, action: str): authorize(context, action, model, data=data) # load and check if data is a valid for it's model data = load(context, model, data) check(context, model, data) data = prepare(context, model, data) # Push data to Mongo backend, this can be an insert, update or delete. If # `id` is not given, it is an insert if `id` is given, it is an update. # # Deletes are not yet implemented, but for deletes `data` must equal to # `{'id': 1, _delete: True}`. # # Also this must return inserted/updated/deleted id. # # Also this command must write information to changelog after change is # done. transaction = context.get('transaction') model_collection = backend.db[model.get_type_value()] # Make a copy of data, because `pymongo` changes the reference `data` # object on `insert_one()` call. # # We want to have our data intact from whatever specific mongo metadata # MongoDB may add to our object. raw_data = copy.deepcopy(data) # FIXME: before creating revision check if there's not collision clash revision_id = get_new_id('revision id') raw_data['revision'] = revision_id if 'id' in data: result = model_collection.update_one({'_id': ObjectId(raw_data['id'])}, {'$set': raw_data}) assert result.matched_count == 1 and result.modified_count == 1 data_id = data['id'] else: data_id = model_collection.insert_one(raw_data).inserted_id # parse `ObjectId` to string and add it to our object raw_data['id'] = str(data_id) return prepare(context, action, model, backend, raw_data)
def load( context: Context, command: Command, data: dict, *, parent: Node, scope: str, argname: str = None, ) -> Command: config = context.get('config') name, args = next(iter(data.items())) command.name = name command.parent = parent command.command = config.commands[scope][name] if isinstance(args, str): args = {command.command.schema.get('argname', argname): args} command.args = args return command
def check(context: Context, model: Model, backend: PostgreSQL, data: dict): connection = context.get('transaction').connection table = backend.tables[model.manifest.name][model.name].main action = 'update' if 'id' in data else 'insert' for name, prop in model.properties.items(): if prop.required and name not in data: raise Exception(f"{name!r} is required for {model}.") if prop.unique and prop.name in data: if action == 'update': condition = sa.and_( table.c[prop.name] == data[prop.name], table.c['id'] != data['id'], ) else: condition = table.c[prop.name] == data[prop.name] na = object() result = backend.get(connection, table.c[prop.name], condition, default=na) if result is not na: raise Exception(f"{name!r} is unique for {model} and a duplicate value is found in database.")
def pull(context: Context, dataset: Dataset, *, models: list = None): with context.enter(): tmpdir = context.attach( tempfile.TemporaryDirectory(prefix='spinta-pull-cache-')) context.bind('cache', Cache, path=pathlib.Path(tmpdir)) context.bind('requests', requests.Session) for model in dataset.objects.values(): if model.source is None: continue if models and model.name not in models: continue for dependency in _dependencies(context, model, model.dependencies): for source in model.source.commands: try: yield from _pull(context, model, source, dependency) except Exception as e: context.error( f"Error while pulling model {model.name!r}, with dependency: {dependency!r} and source: {source!r}. Error: {e}" )
def load_type(context: Context, prop: Node, data: dict, manifest: Manifest): na = object() config = context.get('config') if prop.type not in config.components['types']: raise Exception(f"Unknown property type {prop.type!r}.") type = config.components['types'][prop.type]() type_schema = resolve_schema(type, Type) for name in type_schema: schema = type_schema[name] value = data.get(name, na) if schema.get('required', False) and value is na: raise Exception(f"Missing requied option {name!r}.") if value is na: value = schema.get('default') setattr(type, name, value) type.prop = prop type.name = data['type'] return load(context, type, data, manifest)
def load_node(context: Context, node: Node, data: dict, manifest: Manifest, *, check_unknowns=True) -> Node: na = object() store = context.get('store') node.manifest = manifest node.path = data['path'] node.name = data['name'] node.parent = data['parent'] node_schema = resolve_schema(node, Node) for name in set(node_schema) | set(data): if name not in node_schema: if check_unknowns: _load_node_error(context, node, f"Unknown option {name!r}.") else: continue schema = node_schema[name] value = data.get(name, na) if schema.get('inherit', False) and value is na: if node.parent and hasattr(node.parent, name): value = getattr(node.parent, name) else: value = None if schema.get('required', False) and value is na: _load_node_error(context, node, f"Missing requied option {name!r}.") if schema.get('type') == 'backend' and isinstance(value, str): value = store.backends[value] if value is na: value = schema.get('default') setattr(node, name, value) return node
def check(context: Context, model: Model, backend: Mongo, data: dict): # Check data before insert/update. transaction = context.get('transaction')
def check(context: Context, project: Project): if project.owner and project.owner not in project.manifest.objects['owner']: context.error(f"Unknown owner {project.owner}.")
def push(context: Context, model: Model, backend: PostgreSQL, data: dict, *, action): authorize(context, action, model, data=data) transaction = context.get('transaction') connection = transaction.connection table = _get_table(backend, model) data = _serialize(data) key = get_ref_id(data.pop('id')) values = { 'data': data, 'transaction_id': transaction.id, } row = backend.get( connection, [table.main.c.data, table.main.c.transaction_id], table.main.c.id == key, default=None, ) action = None # Insert. if row is None: action = INSERT_ACTION result = connection.execute(table.main.insert().values({ 'id': key, 'created': utcnow(), **values, })) changes = data # Update. else: changes = _get_patch_changes(row[table.main.c.data], data) if changes: action = UPDATE_ACTION result = connection.execute( table.main.update().where(table.main.c.id == key).where( table.main.c.transaction_id == row[ table.main.c.transaction_id]).values({ **values, 'updated': utcnow(), })) # TODO: Retries are needed if result.rowcount is 0, if such # situation happens, that means a concurrent transaction # changed the data and we need to reread it. # # And assumption is made here, than in the same # transaction there are no concurrent updates, if this # assumption is false, then we need to check against # change_id instead of transaction_id. else: # Nothing to update. return None # Track changes. connection.execute( table.changes.insert().values( transaction_id=transaction.id, id=key, datetime=utcnow(), action=action, change=changes, ), ) # Sanity check, is primary key was really what we tell it to be? assert action != INSERT_ACTION or result.inserted_primary_key[ 0] == key, f'{result.inserted_primary_key[0]} == {key}' # Sanity check, do we really updated just one row? assert action != UPDATE_ACTION or result.rowcount == 1, result.rowcount return {'id': key}