def _delete_chunk(session: Session, a_chunk: ObjectIDListT) -> Tuple[int, int, List[str]]: """ Delete a chunk from self's object list. Technical Note: We use SQLA Core as we don't want to fetch the rows """ # Start with images which are not deleted via a CASCADE on DB side # This is maybe due to relationship cycle b/w ObjectHeader and Images @See comment in Image class img_del_qry: Delete = Image.__table__.delete() img_del_qry = img_del_qry.where(Image.objid == any_(a_chunk)) img_del_qry = img_del_qry.returning(Image.file_name, Image.thumb_file_name) with CodeTimer("DELETE for %d images: " % len(a_chunk), logger): files_res = session.execute(img_del_qry) img_files = [] nb_img_rows = 0 for a_file_tuple in files_res: # We have main file and optionally the thumbnail one for a_file in a_file_tuple: if a_file: img_files.append(a_file) nb_img_rows += 1 logger.info("Removed: %d rows, to remove: %d files", nb_img_rows, len(img_files)) obj_del_qry: Delete = ObjectHeader.__table__.delete() obj_del_qry = obj_del_qry.where(ObjectHeader.objid == any_(a_chunk)) with CodeTimer("DELETE for %d objs: " % len(a_chunk), logger): nb_objs = session.execute(obj_del_qry).rowcount session.commit() # TODO: Cache delete return nb_objs, nb_img_rows, img_files
def historize_classification(self, only_qual=None, manual=True): """ Copy current classification information into history table, for all rows in self. :param only_qual: If set, only historize for current rows with this classification. :param manual: If set, historize manual entries, otherwise, pick automatic ones. """ # Light up a bit the SQLA expressions oh = ObjectHeader och = ObjectsClassifHisto # What we want to historize, as a subquery if manual: # What we want to historize, as a subquery sel_subqry = select([ oh.objid, oh.classif_when, text("'M'"), oh.classif_id, oh.classif_qual, oh.classif_who ]) if only_qual is not None: qual_cond = oh.classif_qual.in_(only_qual) else: qual_cond = true() sel_subqry = sel_subqry.where( and_(oh.objid == any_(self.object_ids), oh.classif_when.isnot(None), qual_cond)) ins_columns = [ och.objid, och.classif_date, och.classif_type, och.classif_id, och.classif_qual, och.classif_who ] else: # What we want to historize, as a subquery sel_subqry = select([ oh.objid, oh.classif_auto_when, text("'A'"), oh.classif_auto_id, oh.classif_qual, oh.classif_auto_score ]) sel_subqry = sel_subqry.where( and_(oh.objid == any_(self.object_ids), oh.classif_auto_id.isnot(None), oh.classif_auto_when.isnot(None))) ins_columns = [ och.objid, och.classif_date, och.classif_type, och.classif_id, och.classif_qual, och.classif_score ] # Insert into the log table ins_qry: Insert = pg_insert(och.__table__) ins_qry = ins_qry.from_select(ins_columns, sel_subqry) ins_qry = ins_qry.on_conflict_do_nothing( constraint='objectsclassifhisto_pkey') # TODO: mypy crashes due to pg_dialect below # logger.info("Histo query: %s", ins_qry.compile(dialect=pg_dialect())) nb_objs = self.session.execute(ins_qry).rowcount logger.info(" %d out of %d rows copied to log", nb_objs, len(self.object_ids)) return oh
def _get_last_classif_history(self, from_user_id: Optional[int], but_not_from_user_id: Optional[int]) \ -> List[HistoricalLastClassif]: """ Query for last classification history on all objects of self, mixed with present state in order to have restore-able lines. """ # Get the histo entries subqry: Query = self.session.query( ObjectsClassifHisto, func.rank().over( partition_by=ObjectsClassifHisto.objid, order_by=ObjectsClassifHisto.classif_date.desc()).label("rnk")) if from_user_id: subqry = subqry.filter( ObjectsClassifHisto.classif_who == from_user_id) if but_not_from_user_id: subqry = subqry.filter( ObjectsClassifHisto.classif_who != but_not_from_user_id) subqry = subqry.filter(ObjectsClassifHisto.classif_type == "M") subq_alias: Alias = subqry.filter( ObjectsClassifHisto.objid == any_(self.object_ids)).subquery() # Also get some fields from ObjectHeader for referencing, info, and fallback qry = self.session.query( ObjectHeader.objid, ObjectHeader.classif_id, func.coalesce(subq_alias.c.classif_date, ObjectHeader.classif_auto_when), subq_alias.c.classif_type, func.coalesce(subq_alias.c.classif_id, ObjectHeader.classif_auto_id).label("h_classif_id"), func.coalesce( subq_alias.c.classif_qual, case([(ObjectHeader.classif_auto_id.isnot(None), 'P')])), subq_alias.c.classif_who) qry = qry.join(subq_alias, ObjectHeader.objid == subq_alias.c.objid, isouter=(from_user_id is None)) if from_user_id is not None: # If taking history from a user, don't apply to the objects he/she classsified # in last already. qry = qry.filter(ObjectHeader.classif_who != from_user_id) qry = qry.filter(subq_alias.c.rnk == 1) else: # Taking any history, including nothing, so emit blank history (see isouter above) qry = qry.filter(ObjectHeader.objid == any_(self.object_ids)) qry = qry.filter( or_(subq_alias.c.rnk == 1, subq_alias.c.rnk.is_(None))) logger.info("_get_last_classif_history qry:%s", str(qry)) with CodeTimer("HISTORY for %d objs: " % len(self.object_ids), logger): ret = [HistoricalLastClassif(rec) for rec in qry.all()] logger.info("_get_last_classif_history qry: %d rows", len(ret)) return ret
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [ tf.c.id, tf.c.display_name, tf.c.name, tf.c.nbrobj, tf.c.nbrobjcum ] select_list.extend([ text("t%d.name" % level) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject the recursive query, for getting parents _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False) qry = qry.where(tf.c.id == any_(taxon_ids)) # Add another join for getting children logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: ResultProxy = session.execute(qry) self.taxa: List[TaxonBO] = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) an_id, display_name, db_name, nbobj1, nbobj2 = lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0), \ lst_rec.pop(0), lst_rec.pop(0) lineage = [db_name] + [name for name in lst_rec if name] self.taxa.append( TaxonBO(an_id, display_name, db_name, nbobj1, nbobj2, lineage)) # type:ignore self.get_children(session, self.taxa)
def _db_fetch(self, objids: List[int]) -> List[DBObjectTupleT]: """ Do a DB read of given objects, with auxiliary objects. Thanks to 'contains_eager' calls, the objects are loaded into SQLAlchemy session. :param objids: :return: """ ret: Query = self.session.query(Project, Sample, Acquisition, Process, ObjectHeader, ObjectFields, Image) ret = ret.join(Sample, Project.all_samples).options(contains_eager(Project.all_samples)) ret = ret.join(Acquisition, Sample.all_acquisitions) ret = ret.join(Process, Acquisition.process) ret = ret.join(ObjectHeader, Acquisition.all_objects) # Natural joins ret = ret.join(ObjectFields) ret = ret.join(Image, ObjectHeader.all_images).options(contains_eager(ObjectHeader.all_images)) ret = ret.filter(ObjectHeader.objid == any_(objids)) ret = ret.order_by(ObjectHeader.objid) ret = ret.order_by(Image.imgrank) if self.first_query: logger.info("Query: %s", str(ret)) self.first_query = False with CodeTimer("Get Objects:", logger): objs = [an_obj for an_obj in ret.all()] # We get as many lines as images logger.info("NB ROWS JOIN=%d", len(objs)) return objs
def __init__(self, session: Session, prj_ids: ProjectIDListT, public: bool = False): # Query the project and load neighbours as well qry: Query = session.query(Project, ProjectPrivilege) qry = qry.outerjoin(ProjectPrivilege, Project.privs_for_members).options( contains_eager(Project.privs_for_members)) qry = qry.outerjoin(User, ProjectPrivilege.user).options( contains_eager(ProjectPrivilege.user)) qry = qry.filter(Project.projid == any_(prj_ids)) self.projects = [] done = set() with CodeTimer("%s BO projects query & init:" % len(prj_ids), logger): for a_proj, a_pp in qry.all(): # The query yields duplicates so we need to filter if a_proj.projid not in done: if public: self.projects.append(ProjectBO(a_proj)) else: self.projects.append(ProjectBO(a_proj).enrich()) done.add(a_proj.projid) # Add instruments with CodeTimer("%s set instruments:" % len(prj_ids), logger): instruments = DescribedInstrumentSet(session, prj_ids) for a_project in self.projects: instrums = instruments.by_project.get(a_project.projid) if instrums is not None: a_project.instrument = ",".join(instrums)
def get_children(self, session: Session, taxa_list: List[TaxonBO]): # Enrich TaxonBOs with children bos_per_id = {a_bo.id: a_bo for a_bo in taxa_list} tch = Taxonomy.__table__.alias('tch') qry: Query = session.query(Taxonomy.id, tch.c.id) qry = qry.join(tch, tch.c.parent_id == Taxonomy.id) qry = qry.filter(Taxonomy.id == any_(list(bos_per_id.keys()))) for an_id, a_child_id in qry.all(): bos_per_id[an_id].children.append(a_child_id)
def get_cardinalities(self, session: Session): # Enrich TaxonBOs with number of objects. Due to ecotaxa/ecotaxa_dev#648, pick data from projects stats. bos_per_id = {a_bo.id: a_bo for a_bo in self.taxa} qry: Query = session.query(ProjectTaxoStat.id, func.sum(ProjectTaxoStat.nbr_v)) qry = qry.filter(ProjectTaxoStat.id == any_(list(bos_per_id.keys()))) qry = qry.group_by(ProjectTaxoStat.id) for an_id, a_sum in qry.all(): bos_per_id[an_id].nb_objects = a_sum
def get_projects_ids(self) -> ProjectIDListT: """ Return the project IDs for the held sample IDs. """ qry: Query = self.session.query(Project.projid).distinct(Project.projid) qry = qry.join(Sample, Project.all_samples) qry = qry.filter(Sample.sampleid == any_(self.ids)) with CodeTimer("Prjs for %d samples: " % len(self.ids), logger): return [an_id[0] for an_id in qry.all()]
def update(self, params: Dict) -> int: """ Update self's objects using given parameters, dict of column names and values. """ # Update objects table obj_upd_qry: Update = ObjectHeader.__table__.update() obj_upd_qry = obj_upd_qry.where( ObjectHeader.objid == any_(self.object_ids)) obj_upd_qry = obj_upd_qry.values(params) updated_objs = self.session.execute(obj_upd_qry).rowcount return updated_objs
def get_projects_ids(self) -> ProjectIDListT: """ Return the project IDs for the owned objectsIDs. """ qry: Query = self.session.query(Project.projid).distinct( Project.projid) qry = qry.join(Sample) qry = qry.join(Acquisition) qry = qry.join(ObjectHeader) qry = qry.filter(ObjectHeader.objid == any_(self.object_ids)) with CodeTimer("Prjs for %d objs: " % len(self.object_ids), logger): return [an_id for an_id, in qry.all()]
def query(cls, session: Session, restrict_to: ClassifIDListT, priority_set: ClassifIDListT, display_name_filter: str, name_filters: List[str]): """ :param session: :param restrict_to: If not None, limit the query to given IDs. :param priority_set: Regardless of MAX_MATCHES, these IDs must appear in the result if they match. :param display_name_filter: :param name_filters: :return: """ tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() # noinspection PyTypeChecker priority = case([(tf.c.id == any_(priority_set), text('0'))], else_=text('1')).label('prio') qry = select([tf.c.taxotype, tf.c.id, tf.c.display_name, priority], bind=bind) if len(name_filters) > 0: # Add to the query enough to get the full hierarchy for filtering concat_all, qry = cls._add_recursive_query(qry, tf, do_concat=True) # Below is quite expensive taxo_lineage = func.concat(*concat_all) name_filter = "%<" + "".join( name_filters) # i.e. anywhere consecutively in the lineage qry = qry.where(taxo_lineage.ilike(name_filter)) if restrict_to is not None: qry = qry.where(tf.c.id == any_(restrict_to)) # We have index IS_TaxonomyDispNameLow so this lower() is for free qry = qry.where( func.lower(tf.c.display_name).like(display_name_filter)) qry = qry.order_by(priority, func.lower(tf.c.display_name)) qry = qry.limit(cls.MAX_MATCHES) logger.info("Taxo query: %s with params %s and %s ", qry, display_name_filter, name_filters) res: Result = session.execute(qry) return res.fetchall()
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = WoRMS.__table__.alias('tf') # bind = None # Uncomment for portable SQL, no 'ilike' bind = session.get_bind() select_list = [tf.c.aphia_id, tf.c.scientificname] select_list.extend([ text("t%d.aphia_id, t%d.scientificname" % (level, level)) # type:ignore for level in range(1, TaxonBOSetFromWoRMS.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject a query on names and hierarchy # Produced SQL looks like: # left join worms t1 on tf.parent_name_usage_id=t1.aphia_id # left join worms t2 on t1.parent_name_usage_id=t2.aphia_id # ... # left join worms t14 on t13.parent_name_usage_id=t14.aphia_id lev_alias = WoRMS.__table__.alias('t1') # Chain outer joins on Taxonomy, for parents # hook the first OJ to main select chained_joins = tf.join( lev_alias, lev_alias.c.aphia_id == tf.c.parent_name_usage_id, isouter=True) prev_alias = lev_alias for level in range(2, self.MAX_TAXONOMY_LEVELS): lev_alias = WoRMS.__table__.alias('t%d' % level) # hook each following OJ to previous one chained_joins = chained_joins.join( lev_alias, lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id, isouter=True) # Collect expressions prev_alias = lev_alias qry = qry.select_from(chained_joins) qry = qry.where(tf.c.aphia_id == any_(taxon_ids)) logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: Result = session.execute(qry) self.taxa = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) lineage_id = [an_id for an_id in lst_rec[0::2] if an_id] lineage = [name for name in lst_rec[1::2] if name] biota_pos = lineage.index('Biota') + 1 lineage = lineage[:biota_pos] lineage_id = lineage_id[:biota_pos] self.taxa.append( TaxonBO('P', lineage[0], 0, 0, lineage, lineage_id)) # type:ignore self.get_children(session, self.taxa)
def update_all(self, params: Dict) -> int: """ Update all self's objects using given parameters, dict of column names and values. """ # Update objects table obj_upd_qry: Update = ObjectHeader.__table__.update() obj_upd_qry = obj_upd_qry.where( ObjectHeader.objid == any_(self.object_ids)) obj_upd_qry = obj_upd_qry.values(params) updated_objs = self.session.execute(obj_upd_qry).rowcount # TODO: Cache upd # prj_id = self.get_projects_ids()[0] # ObjectCacheUpdater(prj_id).update_objects(self.object_ids, params) return updated_objs
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = WoRMS.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [tf.c.aphia_id, tf.c.scientificname] select_list.extend([ text("t%d.scientificname" % level) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject a query on names and hierarchy # Produced SQL looks like: # left join worms t1 on tf.parent_name_usage_id=t1.aphia_id # left join worms t2 on t1.parent_name_usage_id=t2.aphia_id # ... # left join worms t14 on t13.parent_name_usage_id=t14.aphia_id lev_alias = WoRMS.__table__.alias('t1') # Chain outer joins on Taxonomy, for parents # hook the first OJ to main select chained_joins = tf.join( lev_alias, lev_alias.c.aphia_id == tf.c.parent_name_usage_id, isouter=True) prev_alias = lev_alias for level in range(2, self.MAX_TAXONOMY_LEVELS): lev_alias = WoRMS.__table__.alias('t%d' % level) # hook each following OJ to previous one chained_joins = chained_joins.join( lev_alias, lev_alias.c.aphia_id == prev_alias.c.parent_name_usage_id, isouter=True) # Collect expressions prev_alias = lev_alias qry = qry.select_from(chained_joins) qry = qry.where(tf.c.aphia_id == any_(taxon_ids)) logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: ResultProxy = session.execute(qry) self.taxa = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) an_id, display_name = lst_rec.pop(0), lst_rec.pop(0) lineage = [name for name in lst_rec if name] # In WoRMS, the root is signaled by having itself as parent while lineage and lineage[-1] == lineage[-2]: lineage.pop(-1) self.taxa.append( TaxonBO(an_id, display_name, display_name, 0, 0, lineage)) # type:ignore
def _fetch_classifs_and_lock(self) -> Dict[int, Dict]: """ Fetch, and DB lock, self's objects :return: """ qry = select([ ObjectHeader.objid, ObjectHeader.classif_auto_id, ObjectHeader.classif_auto_when, ObjectHeader.classif_auto_score, ObjectHeader.classif_id, ObjectHeader.classif_qual, ObjectHeader.classif_who, ObjectHeader.classif_when ]).with_for_update(key_share=True) qry = qry.where(ObjectHeader.objid == any_(self.object_ids)) logger.info("Fetch with lock: %s", qry) res: Result = self.session.execute(qry) prev = {rec['objid']: rec for rec in res.fetchall()} return prev
def full_match_aggregated(self, used_taxo_ids): subqry = self.session.query( Taxonomy.id, WoRMS.aphia_id, func.array_agg( WoRMS.status).over(partition_by=(Taxonomy.id, WoRMS.aphia_id)).label("acc"), func.count(Taxonomy.name).over( partition_by=(WoRMS.aphia_id)).label("cnt"), ) subqry = subqry.join(WoRMS, TaxonomyChangeService.match_with_extension()) subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids)) subqry = subqry.filter(Taxonomy.taxotype == 'P') subqry = subqry.subquery().alias("ids") return subqry
def reset_to_predicted(self): """ Reset to Predicted state, keeping log, i.e. history, of previous change. """ oh = ObjectHeader self.historize_classification(['V', 'D']) # Update objects table obj_upd_qry: Update = oh.__table__.update() obj_upd_qry = obj_upd_qry.where( and_(oh.objid == any_(self.object_ids), (oh.classif_qual.in_(['V', 'D'])))) obj_upd_qry = obj_upd_qry.values(classif_qual='P') nb_objs = self.session.execute(obj_upd_qry).rowcount logger.info(" %d out of %d rows reset to predicted", nb_objs, len(self.object_ids)) self.session.commit()
def __init__(self, session: Session, taxon_ids: ClassifIDListT): tf = Taxonomy.__table__.alias('tf') # bind = None # For portable SQL, no 'ilike' bind = session.get_bind() select_list = [ tf.c.taxotype, tf.c.nbrobj, tf.c.nbrobjcum, tf.c.display_name, tf.c.id, tf.c.name, ] select_list.extend([ text("t%d.id, t%d.name" % (level, level)) # type:ignore for level in range(1, TaxonomyBO.MAX_TAXONOMY_LEVELS) ]) qry = select(select_list, bind=bind) # Inject the recursive query, for getting parents _dumm, qry = TaxonomyBO._add_recursive_query(qry, tf, do_concat=False) qry = qry.where(tf.c.id == any_(taxon_ids)) # Add another join for getting children logger.info("Taxo query: %s with IDs %s", qry, taxon_ids) res: Result = session.execute(qry) self.taxa: List[TaxonBO] = [] for a_rec in res.fetchall(): lst_rec = list(a_rec) cat_type, nbobj1, nbobj2, display_name = lst_rec.pop( 0), lst_rec.pop(0), lst_rec.pop(0), lst_rec.pop(0) lineage_id = [an_id for an_id in lst_rec[0::2] if an_id] lineage = [name for name in lst_rec[1::2] if name] # assert lineage_id[-1] in (1, 84960, 84959), "Unexpected root %s" % str(lineage_id[-1]) self.taxa.append( TaxonBO( cat_type, display_name, nbobj1, nbobj2, # type:ignore lineage, lineage_id # type:ignore )) self.get_children(session) self.get_cardinalities(session)
def strict_match_subquery(session, used_taxo_ids, phylo_or_morpho: Optional[str]): subqry = session.query(Taxonomy.name, func.max(Taxonomy.id).label("id"), WoRMS.aphia_id) subqry = subqry.join(WoRMS, TaxonomyChangeService.match_with_extension()) subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids)) if phylo_or_morpho is not None: subqry = subqry.filter(Taxonomy.taxotype == text("'%s'" % phylo_or_morpho)) subqry = subqry.filter(WoRMS.status == text("'accepted'")) # Group to exclude multiple matches subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id) subqry = subqry.having( and_( func.count(Taxonomy.name) == 1, func.count(WoRMS.aphia_id) == 1)) subqry = subqry.subquery().alias("ids") return subqry
def _db_fetch(self, object_ids: ObjectIDListT) -> List[DBObjectTupleT]: """ Do a DB read of given objects, with auxiliary objects. :param object_ids: The list of IDs :return: """ # TODO: Depending on filter, the joins could be plain (not outer) # E.g. if asked for a set of samples ret: Query = self.session.query(ObjectHeader) ret = ret.join(Acquisition).join(Process).join(Sample) ret = ret.outerjoin(Image, ObjectHeader.all_images).outerjoin(ObjectCNNFeature).join(ObjectFields) ret = ret.filter(ObjectHeader.objid == any_(object_ids)) ret = ret.order_by(ObjectHeader.objid, Image.imgid) ret = ret.with_entities(ObjectHeader, ObjectFields, ObjectCNNFeature, Image, Sample, Acquisition, Process) if self.first_query: logger.info("Query: %s", str(ret)) self.first_query = False return ret.all()
def read_user_stats(session: Session, prj_ids: ProjectIDListT) -> List[ProjectUserStats]: """ Read the users (annotators) involved in each project. Also compute a summary of their activity. This can only be an estimate since, e.g. imported data contains exact same data as the one obtained from live actions. """ # Activity count: Count 1 for present classification for a user per object. # Of course, the classification date is the latest for the user. pqry: Query = session.query(Project.projid, User.id, User.name, func.count(ObjectHeader.objid), func.max(ObjectHeader.classif_when)) pqry = pqry.join(Sample).join(Acquisition).join(ObjectHeader) pqry = pqry.join(User, User.id == ObjectHeader.classif_who) pqry = pqry.filter(Project.projid == any_(prj_ids)) pqry = pqry.filter(ObjectHeader.classif_who == User.id) pqry = pqry.group_by(Project.projid, User.id) pqry = pqry.order_by(Project.projid, User.name) ret = [] user_activities: Dict[UserIDT, UserActivity] = {} user_activities_per_project = {} stats_per_project = {} with CodeTimer("user present stats for %d projects, qry: %s:" % (len(prj_ids), str(pqry)), logger): last_prj = None for projid, user_id, user_name, cnt, last_date in pqry.all(): last_date_str = last_date.replace(microsecond=0).isoformat() if projid != last_prj: last_prj = projid prj_stat = ProjectUserStats((projid, [], [])) ret.append(prj_stat) user_activities = {} # Store for second pass with history stats_per_project[projid] = prj_stat user_activities_per_project[projid] = user_activities prj_stat.annotators.append(MinimalUserBO((user_id, user_name))) user_activity = UserActivity((user_id, cnt, last_date_str)) prj_stat.activities.append(user_activity) # Store for second pass user_activities[user_id] = user_activity # Activity count update: Add 1 for each entry in history for each user. # The dates in history are ignored, except for users which do not appear in first resultset. hqry: Query = session.query(Project.projid, User.id, User.name, func.count(ObjectsClassifHisto.objid), func.max(ObjectsClassifHisto.classif_date)) hqry = hqry.join(Sample).join(Acquisition).join(ObjectHeader).join(ObjectsClassifHisto) hqry = hqry.join(User, User.id == ObjectsClassifHisto.classif_who) hqry = hqry.filter(Project.projid == any_(prj_ids)) hqry = hqry.group_by(Project.projid, User.id) hqry = hqry.order_by(Project.projid, User.name) with CodeTimer("user history stats for %d projects, qry: %s:" % (len(prj_ids), str(hqry)), logger): last_prj = None for projid, user_id, user_name, cnt, last_date in hqry.all(): last_date_str = last_date.replace(microsecond=0).isoformat() if projid != last_prj: last_prj = projid # Just in case if projid not in user_activities_per_project: continue # Get stored data for the project user_activities = user_activities_per_project[projid] prj_stat = stats_per_project[projid] already_there = user_activities.get(user_id) if already_there is not None: # A user in both history and present classification already_there.nb_actions += cnt else: # A user _only_ in history prj_stat.annotators.append(MinimalUserBO((user_id, user_name))) user_activity = UserActivity((user_id, cnt, last_date_str)) prj_stat.activities.append(user_activity) user_activities[user_id] = user_activity return ret
def _do_merge(self, dest_prj: Project): """ Real merge operation. """ # Loop over involved tables and remap free columns for a_mapped_tbl in MAPPED_TABLES: remaps = self.remap_operations.get(a_mapped_tbl) # Do the remappings if any if remaps is not None: logger.info("Doing re-mapping in %s: %s", a_mapped_tbl.__tablename__, remaps) ProjectBO.remap(self.session, self.src_prj_id, a_mapped_tbl, remaps) # Collect orig_id dest_parents = InBundle.fetch_existing_parents(self.ro_session, prj_id=self.prj_id) src_parents = InBundle.fetch_existing_parents(self.ro_session, prj_id=self.src_prj_id) # Compute needed projections in order to keep orig_id unicity common_samples = self.get_ids_for_common_orig_id( Sample, dest_parents, src_parents) common_acquisitions = self.get_ids_for_common_orig_id( Acquisition, dest_parents, src_parents) # Align foreign keys, to Project, Sample and Acquisition for a_fk_to_proj_tbl in [ Sample, Acquisition, ObjectHeader, ParticleProject ]: upd: Query = self.session.query(a_fk_to_proj_tbl) if a_fk_to_proj_tbl == Sample: # Move (i.e. change project) samples which are 'new' from merged project, # so take all of them from src project... upd = upd.filter( a_fk_to_proj_tbl.projid == self.src_prj_id) # type: ignore # ...but not the ones with same orig_id, which are presumably equal. upd = upd.filter( Sample.sampleid != all_(list(common_samples.keys()))) # And update the column upd_values = {'projid': self.prj_id} elif a_fk_to_proj_tbl == Acquisition: # Acquisitions which were created, in source, under new samples, will 'follow' # them during above move, thanks to the FK on acq_sample_id. # BUT some acquisitions were potentially created in source project, inside # forked samples. They need to be attached to the dest (self) corresponding sample. if len(common_samples) > 0: # Build a CTE with values for the update smp_cte = values_cte("upd_smp", ("src_id", "dst_id"), [(k, v) for k, v in common_samples.items()]) smp_subqry = self.session.query(smp_cte.c.column2).filter( smp_cte.c.column1 == Acquisition.acq_sample_id) upd_values = { 'acq_sample_id': func.coalesce( smp_subqry.scalar_subquery(), # type: ignore Acquisition.acq_sample_id) } upd = upd.filter(Acquisition.acq_sample_id == any_( list(common_samples.keys()))) # upd = upd.filter(Acquisition.acquisid != all_(list(common_acquisitions.keys()))) if len(common_samples) == 0: # Nothing to do. There were only new samples, all of them moved to self. continue elif a_fk_to_proj_tbl == ObjectHeader: # Generated SQL looks like: # with upd_acq (src_id, dst_id) as (values (5,6), (7,8)) # update obj_head # set acquisid = coalesce((select dst_id from upd_acq where acquisid=src_id), acquisid) # where acquisid in (select src_id from upd_acq) if len(common_acquisitions) > 0: # Object must follow its acquisition acq_cte = values_cte( "upd_acq", ("src_id", "dst_id"), [(k, v) for k, v in common_acquisitions.items()]) acq_subqry = self.session.query(acq_cte.c.column2).filter( acq_cte.c.column1 == ObjectHeader.acquisid) upd_values = { 'acquisid': func.coalesce( acq_subqry.scalar_subquery(), # type:ignore ObjectHeader.acquisid) } upd = upd.filter(ObjectHeader.acquisid == any_( list(common_acquisitions.keys()))) if len(common_acquisitions) == 0: # Nothing to do. There were only new acquisitions, all of them moved to self. continue else: # For Particle project upd = upd.filter( ParticleProject.projid == self.src_prj_id) # type: ignore upd_values = {'projid': self.prj_id} rowcount = upd.update(values=upd_values, synchronize_session=False) table_name = a_fk_to_proj_tbl.__tablename__ # type: ignore logger.info("Update in %s: %s rows", table_name, rowcount) # Acquisition & twin Process have followed their enclosing Sample # Remove the parents which are duplicate from orig_id point of view for a_fk_to_proj_tbl in [Acquisition, Sample]: to_del: Query = self.session.query(a_fk_to_proj_tbl) if a_fk_to_proj_tbl == Acquisition: # Remove conflicting acquisitions, they should be empty? to_del = to_del.filter(Acquisition.acquisid == any_( list(common_acquisitions.keys()))) # type: ignore elif a_fk_to_proj_tbl == Sample: # Remove conflicting samples to_del = to_del.filter(Sample.sampleid == any_( list(common_samples.keys()))) # type: ignore rowcount = to_del.delete(synchronize_session=False) table_name = a_fk_to_proj_tbl.__tablename__ # type: ignore logger.info("Delete in %s: %s rows", table_name, rowcount) self.dest_augmented_mappings.write_to_project(dest_prj) ProjectPrivilegeBO.generous_merge_into(self.session, self.prj_id, self.src_prj_id) # Completely erase the source project ProjectBO.delete(self.session, self.src_prj_id)
def add_filter(self, upd): return upd.filter(Acquisition.acquisid == any_(self.ids))
def matching( self, _current_user_id: int, params: Dict[str, Any]) -> List[Tuple[Taxonomy, Optional[WoRMS]]]: """ Return the list of matching entries b/w Taxonomy and WoRMS. """ ret: List[Tuple[Taxonomy, Optional[WoRMS]]] = [] taxo_ids_qry: Query = self.session.query(ProjectTaxoStat.id).distinct() taxo_ids_qry = taxo_ids_qry.filter(ProjectTaxoStat.nbr > 0) used_taxo_ids = [an_id for (an_id, ) in taxo_ids_qry.all()] # No security check. TODO? case1 = "case1" in params """ Taxa with same name on both sides, Phylo in EcoTaxa and accepted in WoRMS """ case2 = "case2" in params """ Taxa with same name on both sides, Morpho in EcoTaxa and accepted in WoRMS """ case3 = "case3" in params """ Taxa with same name on both sides, Phylo in EcoTaxa and NOT accepted in WoRMS, and there is no equivalent accepted match """ case31 = "case31" in params case4 = "case4" in params case5 = "case5" in params """ No match, phylo """ case6 = "case6" in params if case1: res = self.strict_match(self.session, used_taxo_ids) # Format result for taxo, worms in res: ret.append((taxo, worms)) elif case2: subqry = TaxonomyChangeService.strict_match_subquery( self.session, used_taxo_ids, phylo_or_morpho="M") qry: Query = self.session.query(Taxonomy, WoRMS) qry = qry.join(subqry, subqry.c.id == Taxonomy.id) qry = qry.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id) logger.info("matching qry:%s", str(qry)) res = qry.all() # Format result for taxo, worms in res: ret.append((taxo, worms)) elif case3: # statuses = ["temporary name", "nomen nudum", "interim unpublished", # "nomen dubium", "unaccepted", "taxon inquirendum", # "accepted", "uncertain", "alternate representation"] # Match but the match/all matches are not accepted subqry = self.full_match_aggregated(used_taxo_ids) qry3: Query = self.session.query(Taxonomy, WoRMS) qry3 = qry3.join(subqry, subqry.c.id == Taxonomy.id) qry3 = qry3.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id) qry3 = qry3.filter( not_( subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]")))) qry3 = qry3.filter(WoRMS.valid_name != None) # Status filter for clarity # flt = statuses[4] # status_filt = text("ARRAY['%s'::varchar]" % flt) # qry3 = qry3.filter(subqry.c.acc.op('@>')(status_filt)) logger.info("matching qry:%s", str(qry3)) res3 = qry3.all() # Format result for taxo, worms in res3: ret.append((taxo, worms)) elif case31: # Match but the match/all matches are not accepted subqry = self.full_match_aggregated(used_taxo_ids) qry31: Query = self.session.query(Taxonomy, WoRMS) qry31 = qry31.join(subqry, subqry.c.id == Taxonomy.id) qry31 = qry31.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id) qry31 = qry31.filter( not_( subqry.c.acc.op('@>')(text("ARRAY['accepted'::varchar]")))) qry31 = qry31.filter(WoRMS.valid_name == None) logger.info("matching qry:%s", str(qry31)) res31 = qry31.all() # Format result for taxo, worms in res31: ret.append((taxo, worms)) elif case4: subqry = self.full_match_aggregated(used_taxo_ids) qry4: Query = self.session.query(Taxonomy, WoRMS) qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id) qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id) qry4 = qry4.filter(subqry.c.cnt > 1) # Several accepted matches # subqry = self.session.query(Taxonomy.name, func.max(Taxonomy.id).label("id"), WoRMS.aphia_id) # subqry = subqry.join(WoRMS, TaxonomyChangeService.match_with_extension()) # subqry = subqry.filter(Taxonomy.id == any_(used_taxo_ids)) # subqry = subqry.filter(Taxonomy.taxotype == 'P') # subqry = subqry.filter(WoRMS.status == 'accepted') # # Group to compute multiple matches # subqry = subqry.group_by(Taxonomy.name, WoRMS.aphia_id) # subqry = subqry.having(or_(func.count(Taxonomy.name) > 1, # func.count(WoRMS.aphia_id) > 1)) # subqry = subqry.subquery().alias("ids") # # qry4: Query = self.session.query(Taxonomy, WoRMS) # qry4 = qry4.join(subqry, subqry.c.id == Taxonomy.id) # qry4 = qry4.join(WoRMS, subqry.c.aphia_id == WoRMS.aphia_id) logger.info("matching qry:%s", str(qry4)) res = qry4.all() # Format result for taxo, worms in res: ret.append((taxo, worms)) elif case5: # No match, phylo subqry = TaxonomyChangeService.strict_match_subquery( self.session, used_taxo_ids, phylo_or_morpho=None) subqry2 = self.full_match_aggregated(used_taxo_ids) qry5: Query = self.session.query(Taxonomy) qry5 = qry5.filter(Taxonomy.id == any_(used_taxo_ids)) qry5 = qry5.filter(Taxonomy.taxotype == 'P') qry5 = qry5.filter( not_(Taxonomy.id.in_(self.session.query(subqry.c.id)))) qry5 = qry5.filter( not_(Taxonomy.id.in_(self.session.query(subqry2.c.id)))) logger.info("matching qry:%s", str(qry5)) res5 = qry5.all() # Format result for taxo in res5: ret.append((taxo, None)) elif case6: # No match, morpho subqry = TaxonomyChangeService.strict_match_subquery( self.session, used_taxo_ids, phylo_or_morpho=None) qry6: Query = self.session.query(Taxonomy) qry6 = qry6.filter(Taxonomy.id == any_(used_taxo_ids)) qry6 = qry6.filter(Taxonomy.taxotype == 'M') qry6 = qry6.filter( not_(Taxonomy.id.in_(self.session.query(subqry.c.id)))) logger.info("matching qry:%s", str(qry6)) res6 = qry6.all() # Format result for taxo in res6: ret.append((taxo, None)) return ret
def add_filter(self, upd): if "obj_head." in str(upd): ret = upd.filter(ObjectHeader.objid == any_(self.object_ids)) else: ret = upd.filter(ObjectFields.objfid == any_(self.object_ids)) return ret
def classify_validate(self, user_id: UserIDT, classif_ids: ClassifIDListT, wanted_qualif: str) \ -> Tuple[int, Dict[Tuple, ObjectIDListT]]: """ Set current classifications in self and/or validate current classification. :param user_id: The User who did these changes. :param classif_ids: One category id for each of the object ids in self. -1 means "keep current". :param wanted_qualif: Validate or Dubious :returns updated rows and a summary of changes, for MRU and logging. """ # Gather state of classification, for impacted objects, before the change. Keep a lock on rows. qry = select([ ObjectHeader.objid, ObjectHeader.classif_auto_id, ObjectHeader.classif_auto_when, ObjectHeader.classif_auto_score, ObjectHeader.classif_id, ObjectHeader.classif_qual, ObjectHeader.classif_who, ObjectHeader.classif_when ]).with_for_update(key_share=True) qry = qry.where(ObjectHeader.objid == any_(self.object_ids)) logger.info("Fetch with lock: %s", qry) res: ResultProxy = self.session.execute(qry) prev = {rec['objid']: rec for rec in res.fetchall()} # Cook a diff b/w present and wanted values, both for the update of obj_head and preparing the ones on _stat # Group the updates as lots of them are identical updates: Dict[Tuple, EnumeratedObjectSet] = {} all_changes: OrderedDict[Tuple, List[int]] = OrderedDict() # A bit of obsessive optimization classif_id_col = ObjectHeader.classif_id.name classif_qual_col = ObjectHeader.classif_qual.name classif_who_col = ObjectHeader.classif_who.name classif_when_col = ObjectHeader.classif_when.name for obj_id, v in zip(self.object_ids, classif_ids): prev_obj = prev[obj_id] prev_classif_id: Optional[int] = prev_obj['classif_id'] new_classif_id: Optional[int] if v == -1: # special value from validate all # Arrange that no change can happen for this field # Note: prev_classif_id can be None new_classif_id = prev_classif_id else: new_classif_id = v prev_classif_qual = prev_obj['classif_qual'] if (prev_classif_id == new_classif_id and prev_classif_qual == wanted_qualif and prev_obj['classif_who'] == user_id): continue # There was at least 1 field change for this object an_update = updates.setdefault( (new_classif_id, wanted_qualif), EnumeratedObjectSet(self.session, [])) an_update.add(obj_id) # Compact changes, grouped by operation change_key = (prev_classif_id, prev_classif_qual, new_classif_id, wanted_qualif) for_this_change = all_changes.setdefault(change_key, []) for_this_change.append(obj_id) # Keep the recently used in first all_changes.move_to_end(change_key, last=False) if len(updates) == 0: # Nothing to do return 0, all_changes # Update of obj_head, grouped by similar operations. nb_updated = 0 sql_now = text("now()") for (new_classif_id, wanted_qualif), an_obj_set in updates.items(): # Historize the updated rows (can be a lot!) an_obj_set.historize_classification() row_upd = { classif_id_col: new_classif_id, classif_qual_col: wanted_qualif, classif_who_col: user_id, classif_when_col: sql_now } # Do the update itsef nb_updated += an_obj_set.update(row_upd) logger.info("%d rows updated in %d queries", nb_updated, len(updates)) # Return statuses return nb_updated, all_changes
def add_filter(self, upd): return upd.filter(Sample.sampleid == any_(self.ids))
def add_filter(self, upd): return upd.filter(Process.processid == any_(self.ids))