def _route(self, table_name, range_array, metadata=None, **kwargs): # PXU TODO: Implement Thread-local Context # PXU TODO: Session life mgt try: table = db.Session.query(Tables).filter( and_(Tables.table_id == table_name, Tables.state != Tables.TO_DELETE)).first() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not table: raise exceptions.TableNotFoundError(table_name, metadata=metadata) files = table.files_to_search(range_array) db.remove_session() servers = self.conn_mgr.conn_names logger.info('Available servers: {}'.format(servers)) ring = HashRing(servers) routing = {} for f in files: target_host = ring.get_node(str(f.id)) sub = routing.get(target_host, None) if not sub: routing[target_host] = {'table_id': table_name, 'file_ids': []} routing[target_host]['file_ids'].append(str(f.id)) return routing
def _route(self, table_name, range_array, partition_tags=None, metadata=None, **kwargs): # PXU TODO: Implement Thread-local Context # PXU TODO: Session life mgt if not partition_tags: cond = and_( or_(Tables.table_id == table_name, Tables.owner_table == table_name), Tables.state != Tables.TO_DELETE) else: cond = and_(Tables.state != Tables.TO_DELETE, Tables.owner_table == table_name, Tables.partition_tag.in_(partition_tags)) try: tables = db.Session.query(Tables).filter(cond).all() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not tables: raise exceptions.TableNotFoundError('{}:{}'.format( table_name, partition_tags), metadata=metadata) total_files = [] for table in tables: files = table.files_to_search(range_array) total_files.append(files) db.remove_session() servers = self.readonly_topo.group_names logger.info('Available servers: {}'.format(list(servers))) ring = HashRing(servers) routing = {} for files in total_files: for f in files: target_host = ring.get_node(str(f.id)) sub = routing.get(target_host, None) if not sub: sub = {} routing[target_host] = sub kv = sub.get(f.table_id, None) if not kv: kv = [] sub[f.table_id] = kv sub[f.table_id].append(str(f.id)) return routing
def _route(self, collection_name, range_array, partition_tags=None, metadata=None, **kwargs): # PXU TODO: Implement Thread-local Context # PXU TODO: Session life mgt if not partition_tags: cond = and_( or_(Tables.table_id == collection_name, Tables.owner_table == collection_name), Tables.state != Tables.TO_DELETE) else: # TODO: collection default partition is '_default' cond = and_(Tables.state != Tables.TO_DELETE, Tables.owner_table == collection_name) # Tables.partition_tag.in_(partition_tags)) if '_default' in partition_tags: default_par_cond = and_(Tables.table_id == collection_name, Tables.state != Tables.TO_DELETE) cond = or_(cond, default_par_cond) try: collections = db.Session.query(Tables).filter(cond).all() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not collections: logger.error("Cannot find collection {} / {} in metadata".format(collection_name, partition_tags)) raise exceptions.CollectionNotFoundError('{}:{}'.format(collection_name, partition_tags), metadata=metadata) collection_list = [] if not partition_tags: collection_list = [str(collection.table_id) for collection in collections] else: for collection in collections: if collection.table_id == collection_name: collection_list.append(collection_name) continue for tag in partition_tags: if re.match(tag, collection.partition_tag): collection_list.append(collection.table_id) break file_type_cond = or_( TableFiles.file_type == TableFiles.FILE_TYPE_RAW, TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX, TableFiles.file_type == TableFiles.FILE_TYPE_INDEX, ) file_cond = and_(file_type_cond, TableFiles.table_id.in_(collection_list)) try: files = db.Session.query(TableFiles).filter(file_cond).all() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not files: logger.warning("Collection file is empty. {}".format(collection_list)) # logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags)) # raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags), # metadata=metadata) db.remove_session() servers = self.readonly_topo.group_names logger.info('Available servers: {}'.format(list(servers))) ring = HashRing(servers) routing = {} for f in files: target_host = ring.get_node(str(f.id)) sub = routing.get(target_host, None) if not sub: sub = [] routing[target_host] = sub routing[target_host].append(str(f.id)) return routing
def _route(self, collection_name, range_array, partition_tags=None, metadata=None, **kwargs): # PXU TODO: Implement Thread-local Context # PXU TODO: Session life mgt """Router filter segment files which status are not suitable to be search. So the ro nodes do not filter files. Cause the writable node may update segment file status. In mishards cluster, the metadata is used to keep consistent segment file status. """ # Select all available partitions from metadata. if not partition_tags: cond = and_( or_(Tables.table_id == collection_name, Tables.owner_table == collection_name), Tables.state != Tables.TO_DELETE) else: # TODO: collection default partition is '_default' cond = and_(Tables.state != Tables.TO_DELETE, Tables.owner_table == collection_name) # Tables.partition_tag.in_(partition_tags)) if '_default' in partition_tags: # There is not a partition entity in meta table, the default partition correspond # to collection entity in meta. default_par_cond = and_(Tables.table_id == collection_name, Tables.state != Tables.TO_DELETE) cond = or_(cond, default_par_cond) try: collections = db.Session.query(Tables).filter(cond).all() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not collections: logger.error( "Cannot find collection {} / {} in metadata during routing. Meta url: {}" .format(collection_name, partition_tags, db.url)) raise exceptions.CollectionNotFoundError( "{}:{} not found in metadata".format(collection_name, partition_tags), metadata=metadata) collection_list = [] if not partition_tags: collection_list = [ str(collection.table_id) for collection in collections ] else: for collection in collections: if collection.table_id == collection_name: collection_list.append(collection_name) continue # Here to support regex match. for tag in partition_tags: if re.match(tag, collection.partition_tag): collection_list.append(collection.table_id) break file_type_cond = or_( TableFiles.file_type == TableFiles.FILE_TYPE_RAW, TableFiles.file_type == TableFiles.FILE_TYPE_TO_INDEX, TableFiles.file_type == TableFiles.FILE_TYPE_INDEX, ) file_cond = and_(file_type_cond, TableFiles.table_id.in_(collection_list)) try: files = db.Session.query(TableFiles).filter(file_cond).all() except sqlalchemy_exc.SQLAlchemyError as e: raise exceptions.DBError(message=str(e), metadata=metadata) if not files: logger.warning( "Collection file is empty. {}".format(collection_list)) # logger.error("Cannot find collection file id {} / {} in metadata".format(collection_name, partition_tags)) # raise exceptions.CollectionNotFoundError('Collection file id not found. {}:{}'.format(collection_name, partition_tags), # metadata=metadata) db.remove_session() # Use consistency hash to router segment files. The nodes are readonly nodes, # and items are segment files. servers = self.readonly_topo.group_names logger.info('Available servers: {}'.format(list(servers))) ring = HashRing(servers) routing = {} for f in files: target_host = ring.get_node(str(f.id)) sub = routing.get(target_host, None) if not sub: sub = [] routing[target_host] = sub # routing[target_host].append({"id": str(f.id), "update_time": int(f.updated_time)}) routing[target_host].append((str(f.id), int(f.updated_time))) # Here to check files need to be updated. filter_routing = {} for host, filess in routing.items(): ud_files = filter_file_to_update(host, filess) search_files = [f[0] for f in filess] filter_routing[host] = (search_files, ud_files) return filter_routing