def get_contents(coll_scope=None, coll_name=None, request_id=None, workload_id=None, transform_id=None, relation_type=None, status=None, to_json=False, session=None): """ Get contents with collection scope, collection name, request id, workload id and relation type. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param transform_id: The transform id related to this collection. :param relation_type: The relation type between the collection and transform: input, outpu, logs and etc. :param to_json: return json format. :param session: The database session in use. :returns: dict of contents """ collections = get_collections(scope=coll_scope, name=coll_name, request_id=request_id, workload_id=workload_id, transform_id=transform_id, relation_type=relation_type, to_json=to_json, session=session) coll_ids = [coll['coll_id'] for coll in collections] if coll_ids: rets = orm_contents.get_contents(coll_id=coll_ids, status=status, to_json=to_json, session=session) else: rets = [] return rets
def get_output_contents_by_request_id_status(request_id, name, content_status, limit, transform_id=None, to_json=False, session=None): """ Get output content by request_id and content name :param request_id: requestn id. :param name: the content name. :param content_status: The content status. :param limit: limit number of contents. :param to_json: return json format. :param session: The database session in use. :returns: content of the output collection. """ transform_ids = orm_transforms.get_transform_ids(request_id, session=session) found_transform_id = None if transform_ids: if len(transform_ids) == 1: found_transform_id = transform_ids[0] elif len(transform_ids) > 1 and transform_id is None: raise "Number of the transforms(%s) is bigger than 1 and transform id is not provided" % len( transform_ids) else: for tf_id in transform_ids: if tf_id == transform_id: found_transform_id = tf_id break coll_id = None if found_transform_id: coll_id = orm_collections.get_collection_id( transform_id=found_transform_id, relation_type=CollectionRelationType.Output, session=session) contents = [] if coll_id: contents = orm_contents.get_contents(coll_id=coll_id, status=content_status, to_json=to_json, session=session) if name: new_contents = [] for content in contents: if str(content['name']) == str(name): new_contents.append(content) contents = new_contents if contents and limit and len(contents) > limit: contents = contents[:limit] return contents
def get_contents(coll_scope=None, coll_name=None, request_id=None, workload_id=None, relation_type=None, session=None): """ Get contents with collection scope, collection name, request id, workload id and relation type. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param relation_type: The relation type between the collection and transform: input, outpu, logs and etc. :param session: The database session in use. :returns: dict of contents """ if request_id is None and workload_id is None: raise exceptions.WrongParameterException( "Either request_id or workload_id should not be None") req_transfomr_collections = get_collections(scope=coll_scope, name=coll_name, request_id=request_id, workload_id=workload_id, session=session) rets = {} for request_id in req_transfomr_collections: rets[request_id] = {} for transform_id in req_transfomr_collections[request_id]: rets[request_id][transform_id] = {} for collection in req_transfomr_collections[request_id][ transform_id]: if relation_type is not None: if isinstance(relation_type, CollectionRelationType): relation_type = relation_type.value if relation_type is None or collection[ 'relation_type'].value == relation_type: scope = collection['scope'] name = collection['name'] coll_id = collection['coll_id'] coll_relation_type = collection['relation_type'] scope_name = '%s:%s' % (scope, name) contents = orm_contents.get_contents(coll_id=coll_id, session=session) rets[request_id][transform_id][scope_name] = { 'collection': collection, 'relation_type': coll_relation_type, 'contents': contents } return rets
def get_contents(coll_scope=None, coll_name=None, request_id=None, workload_id=None, transform_id=None, relation_type=None, to_json=False, session=None): """ Get contents with collection scope, collection name, request id, workload id and relation type. :param coll_scope: scope of the collection. :param coll_name: name the the collection. :param request_id: the request id. :param workload_id: The workload_id of the request. :param transform_id: The transform id related to this collection. :param relation_type: The relation type between the collection and transform: input, outpu, logs and etc. :param to_json: return json format. :param session: The database session in use. :returns: dict of contents """ req_transfomr_collections = get_collections(scope=coll_scope, name=coll_name, request_id=request_id, workload_id=workload_id, transform_id=transform_id, relation_type=relation_type, to_json=to_json, session=session) rets = {} for request_id in req_transfomr_collections: rets[request_id] = {} for transform_id in req_transfomr_collections[request_id]: rets[request_id][transform_id] = {} for collection in req_transfomr_collections[request_id][ transform_id]: scope = collection['scope'] name = collection['name'] coll_id = collection['coll_id'] coll_relation_type = collection['relation_type'] scope_name = '%s:%s' % (scope, name) contents = orm_contents.get_contents(coll_id=coll_id, to_json=to_json, session=session) rets[request_id][transform_id][scope_name] = { 'collection': collection, 'relation_type': coll_relation_type, 'contents': contents } return rets
def get_contents_by_coll_id_status(coll_id, status=None, to_json=False, session=None): """ Get contents or raise a NoObject exception. :param coll_id: Collection id. :param status: Content status or list of content status. :param to_json: return json format. :param session: The database session in use. :raises NoObject: If no content is founded. :returns: list of contents. """ return orm_contents.get_contents(coll_id=coll_id, status=status, to_json=to_json, session=session)
def update_input_collection_with_contents(coll, parameters, contents, returning_id=False, bulk_size=100, session=None): """ update a collection. :param coll_id: the collection id. :param parameters: A dictionary of parameters. :param contents: dict of contents. :param returning_id: whether to return id. :param bulk_size: bulk per insert to db. :param session: The database session in use. :raises NoObject: If no request is founded. :raises DatabaseException: If there is a database error. :returns new contents """ new_files = 0 processed_files = 0 avail_contents = orm_contents.get_contents(coll_id=coll['coll_id'], session=session) avail_contents_dict = {} for content in avail_contents: key = '%s:%s:%s:%s' % (content['scope'], content['name'], content['min_id'], content['max_id']) avail_contents_dict[key] = content if content['status'] in [ ContentStatus.Mapped, ContentStatus.Mapped.value ]: processed_files += 1 if content['status'] in [ContentStatus.New, ContentStatus.New.value]: new_files += 1 to_addes = [] # to_updates = [] for content in contents: key = '%s:%s:%s:%s' % (content['scope'], content['name'], content['min_id'], content['max_id']) if key in avail_contents_dict: """ to_update = {'content_id': content['content_id'], 'status': content['status']} if 'bytes' in content: to_update['bytes'] = content['bytes'] if 'md5' in content: to_update['md5'] = content['md5'] if 'adler32' in content: to_update['adler32'] = content['adler32'] if 'expired_at' in content: to_update['expired_at'] = content['expired_at'] to_updates.append(to_updated) # not to do anything, no need to update """ pass else: to_addes.append(content) # there are new files if to_addes: add_contents(to_addes, returning_id=returning_id, bulk_size=bulk_size, session=session) parameters['processed_files'] = processed_files parameters['new_files'] = new_files if processed_files == coll['total_files']: parameters['status'] = CollectionStatus.Closed update_collection(coll['coll_id'], parameters, session=session) return to_addes
def test_contents_orm(self): """ Contents (ORM): Test contents """ req_properties = get_request_properties() trans_properties = get_transform_properties() coll_properties = get_collection_properties() content_properties = get_content_properties() request_id = add_request(**req_properties) trans_properties['request_id'] = request_id trans_id = add_transform(**trans_properties) coll_properties['transform_id'] = trans_id coll_id = add_collection(**coll_properties) content_properties['coll_id'] = coll_id origin_content_id = add_content(**content_properties) content_properties1 = copy.deepcopy(content_properties) content_properties1['min_id'] = 101 content_properties1['max_id'] = 200 origin_content_id1 = add_content(**content_properties1) content_properties2 = copy.deepcopy(content_properties) content_properties2['min_id'] = 0 content_properties2['max_id'] = 200 origin_content_id2 = add_content(**content_properties2) content_properties3 = copy.deepcopy(content_properties) content_properties3['name'] = content_properties3['name'] + '_1' origin_content_id3 = add_content(**content_properties3) origin_content_ids = [ origin_content_id, origin_content_id1, origin_content_id2, origin_content_id3 ] contents = get_contents(coll_id=coll_id) assert_equal(len(contents), 4) for content in contents: assert_in(content['content_id'], origin_content_ids) contents = get_contents(scope=content_properties['scope'], name=content_properties['name'], coll_id=coll_id) assert_equal(len(contents), 3) for content in contents: assert_in(content['content_id'], origin_content_ids) contents = get_contents(scope=content_properties3['scope'], name=content_properties3['name'], coll_id=coll_id) assert_equal(len(contents), 1) assert_equal(contents[0]['content_id'], origin_content_id3) contents = get_match_contents(coll_id=content_properties['coll_id'], scope=content_properties['scope'], name=content_properties['name'], min_id=content_properties['min_id'], max_id=content_properties['max_id']) assert_equal(len(contents), 2) for content in contents: assert_in(content['content_id'], [origin_content_id, origin_content_id2]) to_updates = [{ 'path': 'test_path1', 'status': ContentStatus.Processing, 'content_id': origin_content_id }, { 'path': 'test_path2', 'status': ContentStatus.Processing, 'content_id': origin_content_id1 }] update_contents(to_updates) content = get_content(content_id=origin_content_id) assert_equal(content['status'], ContentStatus.Processing) assert_equal(content['path'], 'test_path1') content = get_content(content_id=origin_content_id1) assert_equal(content['status'], ContentStatus.Processing) assert_equal(content['path'], 'test_path2')