def find_matches(dataset, text, filter=None, exclude=None): entities = Entity.__table__ match_text = normalize(text, dataset)[:254] # select text column and apply necesary transformations text_field = entities.c.name if dataset.normalize_text: text_field = entities.c.normalized if dataset.ignore_case: text_field = func.lower(text_field) text_field = func.left(text_field, 254) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text)) / l) * 100.0) score = func.max(score).label('score') # coalesce the canonical identifier id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id') # apply filters filters = [entities.c.dataset_id==dataset.id, entities.c.invalid==False] if not dataset.match_aliases: filters.append(entities.c.canonical_id==None) if exclude is not None: filters.append(entities.c.id!=exclude) if filter is not None: filters.append(text_field.ilike('%%%s%%' % filter)) q = select([id_, score], and_(*filters), [entities], group_by=[id_], order_by=[score.desc()]) return Matches(q)
def get_time_priority_query(self): t0 = get_config('priorities.t0') t1 = get_config('priorities.t1') a = get_config('priorities.build_threshold') / (math.log10(t1) - math.log10(t0)) b = -a * math.log10(t0) log_arg = func.greatest(0.000001, hours_since(func.max(Build.started))) time_expr = func.greatest(a * func.log(log_arg) + b, -30) return self.db.query(Build.package_id.label('pkg_id'), time_expr.label('priority'))\ .group_by(Build.package_id)
def get_time_priority_query(self): t0 = self.priority_conf['t0'] t1 = self.priority_conf['t1'] a = self.priority_threshold / (math.log10(t1) - math.log10(t0)) b = -a * math.log10(t0) log_arg = func.greatest(0.000001, hours_since(func.max(Build.started))) time_expr = func.greatest(a * func.log(log_arg) + b, -30) return self.db.query(Build.package_id.label('pkg_id'), time_expr.label('priority'))\ .group_by(Build.package_id)
def intersection(geom_a, geom_b): """Computes the interesction of two bounding boxes.""" intersection_score = func.greatest(0, (func.least(geom_a.x2, geom_b.x2) - func.greatest(geom_a.x1, geom_b.x1))) * \ func.greatest(0, (func.least(geom_a.y2, geom_b.y2) - func.greatest(geom_a.y1, geom_b.y1))) return intersection_score
def query(self): tables = self.left.from_clause + self.right.from_clause left_lt = self.config.linktab.alias('__left_linktab') right_lt = self.config.linktab.alias('__right_linktab') tables += [left_lt, right_lt] columns = [] score_length = func.greatest(func.length(self.left.key), func.length(self.right.key)) score_leven = func.levenshtein(self.left.key, self.right.key) score_leven = cast(score_leven, Float) score = 1 - (score_leven / score_length) columns.append(score.label("score")) for field in self.left.fields: columns.append(field.column.label(field.column_ref)) for field in self.right.fields: columns.append(field.column.label(field.column_ref)) q = select(columns=columns, from_obj=tables) q = self.left.apply_filters(q) q = self.right.apply_filters(q) q = q.where(left_lt.c.key == self.left.key) q = q.where(left_lt.c.view == self.left.name) q = q.where(right_lt.c.key == self.right.key) q = q.where(right_lt.c.view == self.right.name) # TODO: make this levenshteinable q = q.where(right_lt.c.fingerprint == left_lt.c.fingerprint) q = q.limit(self.config.cutoff + 1) q = q.order_by(score.desc()) q = q.distinct() # print q return q
def _insert_or_update(self, timestamp, values, lastseen=None): stmt = insert(self.tables.passive)\ .values(dict(values, addr=utils.force_int2ip(values['addr']))) try: self.db.execute(stmt) except IntegrityError: whereclause = and_( self.tables.passive.addr == values['addr'], self.tables.passive.sensor == values['sensor'], self.tables.passive.recontype == values['recontype'], self.tables.passive.source == values['source'], self.tables.passive.value == values['value'], self.tables.passive.targetval == values['targetval'], self.tables.passive.info == values['info'], self.tables.passive.port == values['port']) upsert = { 'firstseen': func.least( self.tables.passive.firstseen, timestamp, ), 'lastseen': func.greatest( self.tables.passive.lastseen, lastseen or timestamp, ), 'count': self.tables.passive.count + values['count'], } updt = update( self.tables.passive).where(whereclause).values(upsert) self.db.execute(updt)
def _insert_or_update(self, timestamp, values, lastseen=None): stmt = insert(self.tables.passive)\ .values(dict(values, addr=utils.force_int2ip(values['addr']))) try: self.db.execute(stmt) except IntegrityError: whereclause = and_( self.tables.passive.addr == values['addr'], self.tables.passive.sensor == values['sensor'], self.tables.passive.recontype == values['recontype'], self.tables.passive.source == values['source'], self.tables.passive.value == values['value'], self.tables.passive.targetval == values['targetval'], self.tables.passive.info == values['info'], self.tables.passive.port == values['port'] ) upsert = { 'firstseen': func.least( self.tables.passive.firstseen, timestamp, ), 'lastseen': func.greatest( self.tables.passive.lastseen, lastseen or timestamp, ), 'count': self.tables.passive.count + values['count'], } updt = update( self.tables.passive ).where(whereclause).values(upsert) self.db.execute(updt)
def _insert_or_update(self, timestamp, vals, lastseen=None): stmt = postgresql.insert(self.tables.passive).values(vals) index = [ 'addr', 'sensor', 'recontype', 'port', 'source', 'value', 'targetval', 'info' ] upsert = { 'firstseen': func.least( self.tables.passive.firstseen, timestamp, ), 'lastseen': func.greatest( self.tables.passive.lastseen, lastseen or timestamp, ), 'count': self.tables.passive.count + stmt.excluded.count, } self.db.execute( stmt.on_conflict_do_update( index_elements=index, set_=upsert, ))
def top_10_fields_by_prob(): a = LocalizationTile b = FieldTile a_lo = a.nested_lo.label('a_lo') a_hi = a.nested_hi.label('a_hi') b_lo = b.nested_lo.label('b_lo') b_hi = b.nested_hi.label('b_hi') query1 = db.session.query( a_lo, a_hi, b_lo, b_hi, FieldTile.field_id.label('field_id'), LocalizationTile.localization_id.label('localization_id'), LocalizationTile.probdensity.label('probdensity')) query2 = union( query1.join(b, a_lo.between(b_lo, b_hi)), query1.join(b, b_lo.between(a_lo, a_hi)), ).cte() lo = func.greatest(query2.c.a_lo, query2.c.b_lo) hi = func.least(query2.c.a_hi, query2.c.b_hi) area = (hi - lo + 1) * healpix.PIXEL_AREA prob = func.sum(query2.c.probdensity * area).label('probability') query = db.session.query(query2.c.localization_id, query2.c.field_id, prob).group_by(query2.c.localization_id, query2.c.field_id).order_by( prob.desc()).limit(10) return query.all()
def fetch_sms_free_allowance_remainder(start_date): # ASSUMPTION: AnnualBilling has been populated for year. billing_year = get_financial_year_for_datetime(start_date) start_of_year = date(billing_year, 4, 1) billable_units = func.coalesce( func.sum(FactBilling.billable_units * FactBilling.rate_multiplier), 0) query = db.session.query( AnnualBilling.service_id.label("service_id"), AnnualBilling.free_sms_fragment_limit, billable_units.label('billable_units'), func.greatest((AnnualBilling.free_sms_fragment_limit - billable_units).cast(Integer), 0).label('sms_remainder') ).outerjoin( # if there are no ft_billing rows for a service we still want to return the annual billing so we can use the # free_sms_fragment_limit) FactBilling, and_( AnnualBilling.service_id == FactBilling.service_id, FactBilling.bst_date >= start_of_year, FactBilling.bst_date < start_date, FactBilling.notification_type == SMS_TYPE, )).filter( AnnualBilling.financial_year_start == billing_year, ).group_by( AnnualBilling.service_id, AnnualBilling.free_sms_fragment_limit, ) return query
def reddit_score(self): s = self.upvotes - self.downvotes order = func.log(10, func.greatest(func.abs(s), 1)) sign = func.sign(s) seconds = func.date_part('epoch', self.timestamp) - 1134028003 return func.round(func.cast(sign * order + seconds / 45000, Numeric), 7)
def _select_records(self) -> list[tuple[str, datetime]]: """Select records to be evaluated for publication to, or retraction from, a catalog. A record is selected if: * there is no corresponding catalog_record entry; or * the record has any embargo tags; or * catalog_record.timestamp is less than any of the following: * catalog.schema.timestamp * collection.timestamp * record.timestamp :return: a list of (record_id, timestamp) tuples, where timestamp is that of the latest contributing change """ catalog = Session.get(Catalog, self.catalog_id) records_subq = ( select( Record.id.label('record_id'), func.greatest( catalog.schema.timestamp, Collection.timestamp, Record.timestamp, ).label('max_timestamp') ). join(Collection). subquery() ) catalog_records_subq = ( select( CatalogRecord.record_id, CatalogRecord.timestamp ). where(CatalogRecord.catalog_id == self.catalog_id). subquery() ) stmt = ( select( records_subq.c.record_id, records_subq.c.max_timestamp ). outerjoin_from(records_subq, catalog_records_subq). where(or_( catalog_records_subq.c.record_id == None, catalog_records_subq.c.timestamp < records_subq.c.max_timestamp, catalog_records_subq.c.record_id.in_( select(RecordTag.record_id). where(RecordTag.tag_id == ODPRecordTag.EMBARGO) ) )) ) return Session.execute(stmt).all()
def get_observation_nearest_query(args): '''Get an observation of the specified feature from the node nearest to the provided long, lat coordinates. :param args: (ValidatorResult) validated query arguments ''' # TODO(heyzoos) # [ ] Test me! Specifically test property filtering. lng = args.data['lng'] lat = args.data['lat'] feature = args.data['feature'] network = args.data['network'] point_dt = args.data['datetime'] if args.data.get('datetime') else datetime.now() conditions = args.data.get('filter') nearest_nodes_rp = NodeMeta.nearest_neighbor_to( lng, lat, network=network.name, features=[feature.name] ) if not nearest_nodes_rp: return 'No nodes could be found nearby with your target feature.' feature_str = '{}__{}'.format(network.name, feature.name) feature = redshift_base.metadata.tables[feature_str] result = None for row in nearest_nodes_rp: query = redshift_session.query(feature).filter(and_( feature.c.node_id == row.node, feature.c.datetime <= point_dt + timedelta(hours=12), feature.c.datetime >= point_dt - timedelta(hours=12) )) if conditions is not None: query = query.filter(conditions) query = query.order_by( asc( # Ensures that the interval values is always positive, # since the abs() function doesn't work for intervals sqla_fn.greatest(point_dt, feature.c.datetime) - sqla_fn.least(point_dt, feature.c.datetime) ) ) # Magic number 3 because IFTTT tests require at least three results result = query.limit(3).all() if result is not None: break if result is None: return 'Your feature has not been reported on by the nearest 10 ' \ 'nodes at the time provided.' return [format_observation(obs, feature) for obs in result]
def average_unit_profit(self): from .enum_values import EnumValues return ((select([-func.sum(InventoryTransactionLine.quantity * InventoryTransactionLine.price) / func.greatest(func.sum(InventoryTransactionLine.quantity), 1)]) .where(self.id == InventoryTransactionLine.product_id) .where(InventoryTransactionLine.inventory_transaction_id == InventoryTransaction.id) .where(InventoryTransaction.type_id == EnumValues.id) .where(or_(EnumValues.code == const.SALES_OUT_INV_TRANS_TYPE_KEY, EnumValues.code == const.PURCHASE_IN_INV_TRANS_KEY))) .label('average_unit_profit'))
def average_retail_price(self): from psi.app.models import EnumValues return (select([func.sum(InventoryTransactionLine.quantity * InventoryTransactionLine.price) / func.greatest(func.sum(InventoryTransactionLine.quantity), 1)]) .where(self.id == InventoryTransactionLine.product_id and InventoryTransactionLine.inventory_transaction_id == InventoryTransaction.id and InventoryTransaction.type_id == EnumValues.id and EnumValues.code == const.SALES_OUT_INV_TRANS_TYPE_KEY) .label('average_retail_price'))
def order_by(cls): """Order the search results by last update time.""" from core.model import MaterializedWorkWithGenre as work_model # TODO: first_appearance is only necessary here if this is for a custom list. updated = func.greatest(work_model.availability_time, work_model.first_appearance, work_model.last_update_time) collection_id = work_model.collection_id work_id = work_model.works_id return ([updated.desc(), collection_id, work_id], [updated, collection_id, work_id])
def get_time_priority_query(self): t0 = self.priority_conf['t0'] t1 = self.priority_conf['t1'] a = self.priority_threshold / (math.log10(t1) - math.log10(t0)) b = -a * math.log10(t0) time_expr = func.greatest(a * func.log( hours_since(func.max(Build.started)) + 0.00001) + b, -30) return self.db.query(Build.package_id.label('pkg_id'), time_expr.label('priority'))\ .group_by(Build.package_id)
def daily_amount_select(): return select([ func.cast( func.sum(SalesOrderLine.unit_price * SalesOrderLine.quantity) / func.greatest( func.cast( func.date_part( 'DAY', func.current_date() - Product.create_date), Integer), 1), Numeric) ]).as_scalar()
def average_unit_profit(self): from .enum_values import EnumValues return ((select([ -func.sum(InventoryTransactionLine.quantity * InventoryTransactionLine.price) / func.greatest(func.sum(InventoryTransactionLine.quantity), 1) ]).where(self.id == InventoryTransactionLine.product_id).where( InventoryTransactionLine.inventory_transaction_id == InventoryTransaction.id).where( InventoryTransaction.type_id == EnumValues.id).where( or_(EnumValues.code == const.SALES_OUT_INV_TRANS_TYPE_KEY, EnumValues.code == const.PURCHASE_IN_INV_TRANS_KEY)) ).label('average_unit_profit'))
def average_retail_price(self): from psi.app.models import EnumValues return (select([ func.sum(InventoryTransactionLine.quantity * InventoryTransactionLine.price) / func.greatest(func.sum(InventoryTransactionLine.quantity), 1) ]).where( self.id == InventoryTransactionLine.product_id and InventoryTransactionLine.inventory_transaction_id == InventoryTransaction.id and InventoryTransaction.type_id == EnumValues.id and EnumValues.code == const.SALES_OUT_INV_TRANS_TYPE_KEY).label( 'average_retail_price'))
def find_matches(dataset, text, filter=None, exclude=None): entities = Entity.__table__ match_text = (normalize(text) or '')[:254] # select text column and apply necesary transformations text_field = entities.c.name if dataset.normalize_text: text_field = entities.c.normalized if dataset.ignore_case: text_field = func.lower(text_field) text_field = func.left(text_field, 254) # calculate the difference percentage min_l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest( 0.0, ((min_l - func.levenshtein(text_field, match_text)) / min_l) * 100.0) score = func.max(score).label('score') # coalesce the canonical identifier id_ = func.coalesce(entities.c.canonical_id, entities.c.id).label('id') # apply filters filters = [ entities.c.dataset_id == dataset.id, entities.c.invalid == False ] # noqa if not dataset.match_aliases: filters.append(entities.c.canonical_id == None) # noqa if exclude is not None: filters.append(entities.c.id != exclude) if filter is not None: filters.append(text_field.ilike('%%%s%%' % filter)) q = select([id_, score], and_(*filters), [entities], group_by=[id_], order_by=[score.desc()]) return Matches(q)
def fetch_sms_billing_for_all_services(start_date, end_date): # ASSUMPTION: AnnualBilling has been populated for year. free_allowance_remainder = fetch_sms_free_allowance_remainder(start_date).subquery() sms_billable_units = func.sum(FactBilling.billable_units * FactBilling.rate_multiplier) sms_remainder = func.coalesce( free_allowance_remainder.c.sms_remainder, free_allowance_remainder.c.free_sms_fragment_limit ) chargeable_sms = func.greatest(sms_billable_units - sms_remainder, 0) sms_cost = chargeable_sms * FactBilling.rate query = db.session.query( Organisation.name.label('organisation_name'), Organisation.id.label('organisation_id'), Service.name.label("service_name"), Service.id.label("service_id"), free_allowance_remainder.c.free_sms_fragment_limit, FactBilling.rate.label('sms_rate'), sms_remainder.label("sms_remainder"), sms_billable_units.label('sms_billable_units'), chargeable_sms.label("chargeable_billable_sms"), sms_cost.label('sms_cost'), ).select_from( Service ).outerjoin( free_allowance_remainder, Service.id == free_allowance_remainder.c.service_id ).outerjoin( Service.organisation ).join( FactBilling, FactBilling.service_id == Service.id, ).filter( FactBilling.bst_date >= start_date, FactBilling.bst_date <= end_date, FactBilling.notification_type == SMS_TYPE, ).group_by( Organisation.name, Organisation.id, Service.id, Service.name, free_allowance_remainder.c.free_sms_fragment_limit, free_allowance_remainder.c.sms_remainder, FactBilling.rate, ).order_by( Organisation.name, Service.name ) return query.all()
def _flush_img_stats_bucket(self, db_session, dt_period_start, dt_now, stats): inserts = [] model = ImageStats for image_id, istats in stats.items(): rcount = istats['requests'] vcount = istats['views'] cvcount = istats['cached_views'] dcount = istats['downloads'] bytesum = istats['bytes'] rsecs = istats['request_seconds'] maxsecs = istats['max_request_seconds'] update_num = db_session.query(model).filter( model.image_id == image_id ).filter( model.from_time > dt_period_start ).update({ model.requests: model.requests + rcount, model.views: model.views + vcount, model.cached_views: model.cached_views + cvcount, model.downloads: model.downloads + dcount, model.total_bytes: model.total_bytes + bytesum, model.request_seconds: model.request_seconds + rsecs, model.max_request_seconds: func.greatest(model.max_request_seconds, maxsecs), model.to_time: dt_now }, synchronize_session=False) if not update_num: inserts.append({ 'image_id': image_id, 'requests': rcount, 'views': vcount, 'cached_views': cvcount, 'downloads': dcount, 'total_bytes': bytesum, 'request_seconds': rsecs, 'max_request_seconds': maxsecs, 'from_time': self.caches_started, 'to_time': dt_now }) db_session.commit() if inserts: try: db_session.execute(ImageStats.__table__.insert(), inserts) except IntegrityError: db_session.rollback() inserts = self._fix_insert_list(inserts, db_session) if inserts: db_session.execute(ImageStats.__table__.insert(), inserts)
def find_matches(project, account, text, schemata=[], properties=[]): main = aliased(Property) ent = aliased(Entity) q = db.session.query(main.entity_id) q = q.filter(main.name == "name") q = q.filter(main.entity_id == ent.id) q = q.join(ent) q = q.filter(ent.project_id == project.id) for schema in schemata: obj = aliased(Schema) q = q.join(obj, ent.schema_id == obj.id) q = q.filter(obj.name == schema) for name, value in properties: p = aliased(Property) q = q.join(p, p.entity_id == ent.id) q = q.filter(p.active == True) # noqa q = q.filter(p.name == name) attr = project.get_attribute("entity", name) column = getattr(p, attr.value_column) q = q.filter(column == value) # prepare text fields (todo: further normalization!) text_field = func.left(func.lower(main.value_string), 254) match_text = text.lower().strip()[:254] match_text_db = cast(match_text, types.Unicode) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0) score = score.label("score") q = q.add_columns(score) q = q.order_by(score.desc()) q = q.filter(score > 50) return Matches(q, account)
def find_matches(project, account, text, schemata=[], properties=[]): main = aliased(Property) ent = aliased(Entity) q = db.session.query(main.entity_id) q = q.filter(main.name == 'name') q = q.filter(main.entity_id == ent.id) q = q.join(ent) q = q.filter(ent.project_id == project.id) if len(schemata): obj = aliased(Schema) q = q.join(obj, ent.schema_id == obj.id) q = q.filter(obj.name.in_(schemata)) for name, value in properties: p = aliased(Property) q = q.join(p, p.entity_id == ent.id) q = q.filter(p.active == True) # noqa q = q.filter(p.name == name) column = getattr(p, p.type_column(value)) q = q.filter(column == value) # prepare text fields (todo: further normalization!) text_field = func.left(func.lower(main.value_string), 254) match_text = text.lower().strip()[:254] match_text_db = cast(match_text, types.Unicode) # calculate the difference percentage l = func.greatest(1.0, func.least(len(match_text), func.length(text_field))) score = func.greatest(0.0, ((l - func.levenshtein(text_field, match_text_db)) / l) * 100.0) score = score.label('score') q = q.group_by(main.entity_id) q = q.add_columns(func.max(score)) q = q.order_by(func.max(score).desc()) q = q.filter(score > 50) return Matches(q, project, account)
def fetch_sms_free_allowances(): """ We use one rate for the entire financial year. Take the most recently touched entry """ modified_at = func.greatest(AnnualBilling.created_at, AnnualBilling.updated_at) return db.session.query( AnnualBilling.service_id, AnnualBilling.financial_year_start, AnnualBilling.free_sms_fragment_limit, modified_at.label('modified_at'), ).distinct( AnnualBilling.service_id, AnnualBilling.financial_year_start, ).order_by( AnnualBilling.service_id, AnnualBilling.financial_year_start, modified_at.desc(), )
def get_band(session, dataset, freq_eff, freq_bw, freq_bw_max=.0): """ Returns the frequency band for the given frequency parameters. Will create a new frequency band entry in the database if no match is found. You can limit the bandwidth of the band association with the freq_bw_max. args: session (sqlalchemy.orm.session.Session): a SQLAlchemy session object dataset (tkp.db.model.Dataset): the TraP dataset freq_eff (float): The central frequency of image to get band for freq_bw (float): The bandwidth of image to get band for freq_bw_max (float): The maximum bandwith used for band association. Not used if 0.0 (default). returns: tkp.db.model.Frequencyband: a frequency band object """ if freq_bw_max == .0: bw_half = freq_bw / 2 low = freq_eff - bw_half high = freq_eff + bw_half else: bw_half = freq_bw_max / 2 low = freq_eff - bw_half high = freq_eff + bw_half w1 = high - low w2 = Frequencyband.freq_high - Frequencyband.freq_low max_ = func.greatest(high, Frequencyband.freq_high) min_ = func.least(low, Frequencyband.freq_low) band = session.query(Frequencyband).filter( (Frequencyband.dataset == dataset) & (max_ - min_ < w1 + w2) ).first() if not band: # no match so we create a new band band = Frequencyband(freq_central=freq_eff, freq_low=low, freq_high=high, dataset=dataset) session.add(band) return band
def decayed_score(score, created_at, peak=5, nominal_timestamp=14 * 24 * 60 * 60): """ Creates a decaying (over time) version of the provided `score`. The returned value is score * a multiplier determined by `peak` and `nominal_timestamp`. Args: score: (number) The base score to modify created_at: (timestamp) The timestamp the score is attributed to peak?: (number) The peak multipler possible Returns: A SQLAlchemy expression representing decayed score (score * multipler) where multipler is represented by: max(0.2, 5 ^ 1 - min(time_ago / nominal_timestamp, 1)) """ return score * func.greatest( func.pow( 5, 1 - func.least(seconds_ago(created_at) / nominal_timestamp, 1)), 0.2)
def _insert_or_update(self, timestamp, values, lastseen=None, replacecount=False): stmt = insert(self.tables.passive).values( dict(values, addr=utils.force_int2ip(values["addr"]))) try: self.db.execute(stmt) except IntegrityError: whereclause = and_( self.tables.passive.addr == values["addr"], self.tables.passive.sensor == values["sensor"], self.tables.passive.recontype == values["recontype"], self.tables.passive.source == values["source"], self.tables.passive.value == values["value"], self.tables.passive.targetval == values["targetval"], self.tables.passive.info == values["info"], self.tables.passive.port == values["port"], ) upsert = { "firstseen": func.least( self.tables.passive.firstseen, timestamp, ), "lastseen": func.greatest( self.tables.passive.lastseen, lastseen or timestamp, ), "count": (values["count"] if replacecount else self.tables.passive.count + values["count"]), } updt = update( self.tables.passive).where(whereclause).values(upsert) self.db.execute(updt)
Column('created', DateTime(timezone=False), default=datetime.utcnow, nullable=False), UniqueConstraint('question_parent_id', 'question_child_id', name='question_parent_child_uix') ) # We want to prevent redundant, bidirectional links in tbl # e.g.: (question_parent_id, question_child_id) == # (question_child_id, question_parent_id) # i.e.: # create unique index on question_to_questions (least(A,B), greatest(A,B)); Index( 'question_to_questions_uix', func.least(question_questions.c.question_parent_id, question_questions.c.question_child_id), func.greatest(question_questions.c.question_parent_id, question_questions.c.question_child_id)) class User(core.Base): __tablename__ = "users" id = Column(BigInteger, primary_key=True) username = Column(Unicode, unique=True) email = Column(Unicode, unique=True) tags_subscriptions = relationship('Tag', 'user_to_tags', backref="subscribed_users") def dict(self, verbose=False, minimal=False): u = super(User, self).dict() del u['email']
def safe_divide(sql_value): return func.greatest(sql_value, 1)
def insert_or_update_bulk(self, specs, getinfos=None, separated_timestamps=True): """Like `.insert_or_update()`, but `specs` parameter has to be an iterable of `(timestamp, spec)` (if `separated_timestamps` is True) or `spec` (if it is False) values. This will perform PostgreSQL COPY FROM inserts with the major drawback that the `getinfos` parameter will be called (if it is not `None`) for each spec, even when the spec already exists in the database and the call was hence unnecessary. It's up to you to decide whether having bulk insert is worth it or if you want to go with the regular `.insert_or_update()` method. """ more_to_read = True tmp = self.create_tmp_table(self.tables.passive) if config.DEBUG_DB: total_upserted = 0 total_start_time = time.time() while more_to_read: if config.DEBUG_DB: start_time = time.time() with PassiveCSVFile(specs, self.convert_ip, tmp, getinfos=getinfos, separated_timestamps=separated_timestamps, limit=config.POSTGRES_BATCH_SIZE) as fdesc: self.copy_from(fdesc, tmp.name) more_to_read = fdesc.more_to_read if config.DEBUG_DB: count_upserted = fdesc.count insrt = postgresql.insert(self.tables.passive) self.db.execute( insrt.from_select( [column(col) for col in [ 'addr', # sum / min / max 'count', 'firstseen', 'lastseen', # grouped 'sensor', 'port', 'recontype', 'source', 'targetval', 'value', 'fullvalue', 'info', 'moreinfo' ]], select([tmp.columns['addr'], func.sum_(tmp.columns['count']), func.min_(tmp.columns['firstseen']), func.max_(tmp.columns['lastseen'])] + [ tmp.columns[col] for col in [ 'sensor', 'port', 'recontype', 'source', 'targetval', 'value', 'fullvalue', 'info', 'moreinfo']])\ .group_by(*(tmp.columns[col] for col in [ 'addr', 'sensor', 'port', 'recontype', 'source', 'targetval', 'value', 'fullvalue', 'info', 'moreinfo' ])) )\ .on_conflict_do_update( index_elements=['addr', 'sensor', 'recontype', 'port', 'source', 'value', 'targetval', 'info'], set_={ 'firstseen': func.least( self.tables.passive.firstseen, insrt.excluded.firstseen, ), 'lastseen': func.greatest( self.tables.passive.lastseen, insrt.excluded.lastseen, ), 'count': self.tables.passive.count + insrt.excluded.count, }, ) ) self.db.execute(delete(tmp)) if config.DEBUG_DB: stop_time = time.time() time_spent = stop_time - start_time total_upserted += count_upserted total_time_spent = stop_time - total_start_time utils.LOGGER.debug( "DB:PERFORMANCE STATS %s upserts, %f s, %s/s\n" "\ttotal: %s upserts, %f s, %s/s", utils.num2readable(count_upserted), time_spent, utils.num2readable(count_upserted / time_spent), utils.num2readable(total_upserted), total_time_spent, utils.num2readable(total_upserted / total_time_spent), )
def rating(self): """ Return the Queryable rating of the mastery """ return func.greatest(0, self.answerRating - self.stalenessRating)
def daily_amount_select(): return select([func.cast(func.sum(SalesOrderLine.unit_price * SalesOrderLine.quantity) /func.greatest(func.cast(func.date_part('DAY', func.current_date() - Supplier.create_date),Integer), 1), Integer)]).as_scalar()
def execute(self, message, user, params): alliance = Alliance() race = None size_mod = None size = None value_mod = None value = None bash = False attacker = user.planet cluster = None params = params.group(1).split() for p in params: m = self.bashre.match(p) if m and not bash: bash = True continue m = self.clusterre.match(p) if m and not cluster: cluster = int(m.group(1)) m = self.racere.match(p) if m and not race: race = m.group(1) continue m = self.rangere.match(p) if m and not size and int(m.group(2)) < 32768: size_mod = m.group(1) or '>' size = m.group(2) continue m = self.rangere.match(p) if m and not value: value_mod = m.group(1) or '<' value = m.group(2) continue m = self.alliancere.match(p) if m and not alliance.name and not self.clusterre.match(p): alliance = Alliance(name="Unknown") if m.group( 1).lower() == "unknown" else Alliance.load(m.group(1)) if alliance is None: message.reply("No alliance matching '%s' found" % (m.group(1), )) return continue maxcap = PA.getfloat("roids", "maxcap") mincap = PA.getfloat("roids", "mincap") modifier = (cast(Planet.value, Float).op("/")(float(attacker.value))).op("^")(0.5) caprate = func.greatest(mincap, func.least(modifier.op("*")(maxcap), maxcap)) maxcap = cast(func.floor(cast(Planet.size, Float).op("*")(caprate)), Integer) bravery = func.greatest(0.2,func.least(2.2, cast(Planet.score,Float).op("/")(float(attacker.score)))-0.2) \ * func.greatest(0.2,func.least(1.8, cast(Planet.value,Float).op("/")(float(attacker.value)))-0.1) \ / ((6+max(4.0, float(attacker.score)/float(attacker.value)))/10.0) xp_gain = cast(func.floor(maxcap.op("*")(bravery.op("*")(10.0))), Integer) Q = session.query(Planet, Intel, xp_gain.label("xp_gain")) if alliance.id: Q = Q.join(Planet.intel) Q = Q.filter(Intel.alliance == alliance) else: Q = Q.outerjoin(Planet.intel) if alliance.name: Q = Q.filter(Intel.alliance == None) Q = Q.filter(Planet.active == True) if race: Q = Q.filter(Planet.race.ilike(race)) if size: Q = Q.filter(Planet.size.op(size_mod)(size)) if value: Q = Q.filter(Planet.value.op(value_mod)(value)) if bash: Q = Q.filter( or_( Planet.value.op(">")(attacker.value * PA.getfloat("bash", "value")), Planet.score.op(">")(attacker.score * PA.getfloat("bash", "score")))) if cluster: Q = Q.filter(Planet.x == cluster) Q = Q.order_by(desc("xp_gain")) Q = Q.order_by(desc(Planet.idle)) Q = Q.order_by(desc(Planet.value)) result = Q[:6] if len(result) < 1: reply = "No" if race: reply += " %s" % (race, ) reply += " planets" if alliance.name: reply += " in intel matching Alliance: %s" % (alliance.name, ) else: reply += " matching" if size: reply += " Size %s %s" % (size_mod, size) if value: reply += " Value %s %s" % (value_mod, value) message.reply(reply) return replies = [] for planet, intel, xp_gain in result[:5]: reply = "%s:%s:%s (%s)" % (planet.x, planet.y, planet.z, planet.race) reply += " Value: %s Size: %s Scoregain: %d" % ( planet.value, planet.size, xp_gain * PA.getint("numbers", "xp_value")) if intel: if intel.nick: reply += " Nick: %s" % (intel.nick, ) if not alliance.name and intel.alliance: reply += " Alliance: %s" % (intel.alliance.name, ) replies.append(reply) if len(result) > 5: replies[ -1] += " (Too many results to list, please refine your search)" message.reply("\n".join(replies))
def daily_profit_select(): return select([func.cast(func.sum((SalesOrderLine.unit_price - Product.purchase_price) * SalesOrderLine.quantity) / func.greatest(func.cast(func.date_part('DAY', func.current_date() - Supplier.create_date), Integer), 1), Integer)]).as_scalar()
def daily_profit_select(): return select([func.cast(func.sum((SalesOrderLine.unit_price - Product.purchase_price) * SalesOrderLine.quantity) / func.greatest(func.cast(func.date_part('DAY', func.current_date() - Product.create_date), Integer), 1), Numeric)]).as_scalar()
def _store_host(self, host): addr = self.convert_ip(host['addr']) info = host.get('infos') if 'coordinates' in (info or {}).get('loc', {}): info['coordinates'] = info.pop('loc')['coordinates'][::-1] source = host.get('source', []) host_tstart = utils.all2datetime(host['starttime']) host_tstop = utils.all2datetime(host['endtime']) insrt = postgresql.insert(self.tables.scan) scanid, scan_tstop = self.db.execute( insrt.values( addr=addr, source=source, info=info, time_start=host_tstart, time_stop=host_tstop, **dict( (key, host.get(key)) for key in ['state', 'state_reason', 'state_reason_ttl'] if key in host)).on_conflict_do_update( index_elements=['addr'], set_={ 'source': self.tables.scan.source + insrt.excluded.source, 'time_start': func.least( self.tables.scan.time_start, insrt.excluded.time_start, ), 'time_stop': func.greatest( self.tables.scan.time_stop, insrt.excluded.time_stop, ), }, ).returning(self.tables.scan.id, self.tables.scan.time_stop)).fetchone() newest = scan_tstop <= host_tstop for category in host.get("categories", []): insrt = postgresql.insert(self.tables.category) catid = self.db.execute( insrt.values(name=category).on_conflict_do_update( index_elements=['name'], set_={ 'name': insrt.excluded.name }).returning(self.tables.category.id)).fetchone()[0] self.db.execute( postgresql.insert( self.tables.association_scan_category).values( scan=scanid, category=catid).on_conflict_do_nothing()) for port in host.get('ports', []): scripts = port.pop('scripts', []) # FIXME: handle screenshots for fld in [ 'screendata', 'screenshot', 'screenwords', 'service_method' ]: try: del port[fld] except KeyError: pass if 'service_servicefp' in port: port['service_fp'] = port.pop('service_servicefp') if 'state_state' in port: port['state'] = port.pop('state_state') if 'state_reason_ip' in port: port['state_reason_ip'] = self.convert_ip( port['state_reason_ip']) insrt = postgresql.insert(self.tables.port) portid = self.db.execute( insrt.values(scan=scanid, **port).on_conflict_do_update( index_elements=['scan', 'port', 'protocol'], set_=dict(scan=scanid, **(port if newest else {}))).returning( self.tables.port.id)).fetchone()[0] for script in scripts: name, output = script.pop('id'), script.pop('output') if newest: insrt = postgresql.insert(self.tables.script) self.bulk.append( insrt.values(port=portid, name=name, output=output, data=script).on_conflict_do_update( index_elements=['port', 'name'], set_={ "output": insrt.excluded.output, "data": insrt.excluded.data, }, )) else: insrt = postgresql.insert(self.tables.script) self.bulk.append( insrt.values(port=portid, name=name, output=output, data=script).on_conflict_do_nothing()) for trace in host.get('traces', []): traceid = self.db.execute( postgresql.insert(self.tables.trace).values( scan=scanid, port=trace.get('port'), protocol=trace['protocol']).on_conflict_do_nothing(). returning(self.tables.trace.id)).fetchone()[0] for hop in trace.get('hops'): hop['ipaddr'] = self.convert_ip(hop['ipaddr']) self.bulk.append( postgresql.insert(self.tables.hop).values( trace=traceid, ipaddr=self.convert_ip(hop['ipaddr']), ttl=hop["ttl"], rtt=None if hop["rtt"] == '--' else hop["rtt"], host=hop.get("host"), domains=hop.get("domains"), )) for hostname in host.get('hostnames', []): self.bulk.append( postgresql.insert(self.tables.hostname).values( scan=scanid, domains=hostname.get('domains'), name=hostname.get('name'), type=hostname.get('type'), ).on_conflict_do_nothing()) utils.LOGGER.debug("VIEW STORED: %r", scanid) return scanid
def last_updated(cls): return func.greatest(cls.last_mod_update, cls.last_user_update)
def last_replicated_at(cls): webhook = cls.last_replicated_via_webhook_at api = cls.last_replicated_via_api_at return func.greatest(webhook, api, datetime.min)
METHOD( name='height7', score=Detection.prob * Detection.height7_score, inputs=[ Detection.prob, Detection.height7_score, ], output=Detection.height7_nms, display=False, ), METHOD( name='angle_height', score=Detection.prob * func.greatest( math.sqrt(sys.float_info.min), Detection.angle_score ) * func.greatest( math.sqrt(sys.float_info.min), Detection.height_score ), inputs=[ Detection.prob, Detection.height_score, Detection.angle_score, ], output=Detection.angle_height_nms, display=False, ), METHOD(
def observation_period_query(wrapper: Wrapper) -> Insert: person = wrapper.cdm.Person.__table__ condition = wrapper.cdm.ConditionOccurrence.__table__ visit = wrapper.cdm.VisitOccurrence.__table__ drug = wrapper.cdm.DrugExposure.__table__ procedure = wrapper.cdm.ProcedureOccurrence.__table__ observation = wrapper.cdm.Observation.__table__ measurement = wrapper.cdm.Measurement.__table__ obs_period = wrapper.cdm.ObservationPeriod.__table__ sel_condition = select([ condition.c.person_id, func.coalesce( condition.c.condition_start_date, condition.c.condition_start_datetime).label('start_date'), func.coalesce(condition.c.condition_end_date, condition.c.condition_end_datetime).label('end_date') ]) sel_drug = select([ drug.c.person_id, func.coalesce(drug.c.drug_exposure_start_date, drug.c.drug_exposure_start_datetime).label('start_date'), func.coalesce(drug.c.drug_exposure_end_date, drug.c.drug_exposure_end_datetime).label('end_date') ]) sel_measurement = select([ measurement.c.person_id, func.coalesce(measurement.c.measurement_date, measurement.c.measurement_datetime).label('start_date'), literal(None).label('end_date') ]) sel_observation = select([ observation.c.person_id, func.coalesce(observation.c.observation_date, observation.c.observation_datetime).label('start_date'), literal(None).label('end_date') ]) sel_procedure = select([ procedure.c.person_id, func.coalesce(procedure.c.procedure_date, procedure.c.procedure_datetime).label('start_date'), literal(None).label('end_date') ]) sel_visit = select([ visit.c.person_id, func.coalesce(visit.c.visit_start_date, visit.c.visit_start_datetime).label('start_date'), func.coalesce(visit.c.visit_end_date, visit.c.visit_end_datetime).label('end_date') ]) # CDM 5.3.1 try: death = wrapper.cdm.Death.__table__ sel_death = select([ death.c.person_id, func.coalesce(death.c.death_date, death.c.death_datetime).label('start_date'), literal(None).label('end_date') ]) except AttributeError: sel_death = None sels = [ sel_condition, sel_drug, sel_measurement, sel_observation, sel_procedure, sel_visit, sel_death ] include_sels = [sel for sel in sels if sel is not None] all_periods = union(*include_sels).alias('all_periods') sel = select([ person.c.person_id.label('person_id'), func.min(all_periods.c.start_date) .label('observation_period_start_date'), func.greatest(func.max(all_periods.c.start_date), func.max(all_periods.c.end_date)) .label('observation_period_end_date'), # Period covering healthcare encounters) literal(44814724).label('period_type_concept_id') ]).select_from( join(person, all_periods, person.c.person_id == all_periods.c.person_id) )\ .where(all_periods.c.start_date > '1970-01-01')\ .group_by(person.c.person_id) ins = obs_period.insert().from_select(sel.columns, sel) return ins
def centerline_query(session, detection): """Finds the centerline orientation that most closely agrees with detection-intersected roadbeds.""" # pylint: disable-msg=E1101 car_polygon = Detection.geom car_polygon102718 = func.ST_Transform(car_polygon, 102718) car_filter = func.ST_Intersects( Roadbed.geom, car_polygon102718 ) query = session.query( Roadbed.gid) \ .filter(Detection.id == detection.id) \ .filter(car_filter) road_gids = query.all() if len(road_gids) == 0: return lat, lon, alt = session.query( func.ST_Y(Detection.lla), func.ST_X(Detection.lla), func.ST_Z(Detection.lla)) \ .filter(Detection.id == detection.id) \ .one() lla = numpy.array([[lat, lon, alt]]) enu = pygeo.LLAToENU(lla).reshape((3, 3)) roadbeds4326 = func.ST_Transform(Roadbed.geom, 4326) centerlines4326 = PlanetOsmLine.way centerline_filter = func.ST_Intersects(roadbeds4326, centerlines4326) centerline_frac = func.ST_Line_Locate_Point( centerlines4326, Detection.lla) centerline_start_frac = func.least(1, centerline_frac + 0.01) centerline_end_frac = func.greatest(0, centerline_frac - 0.01) centerline_start = func.ST_Line_Interpolate_Point(centerlines4326, centerline_start_frac) centerline_end = func.ST_Line_Interpolate_Point(centerlines4326, centerline_end_frac) segments = session.query( func.ST_Y(centerline_start).label('lats'), func.ST_X(centerline_start).label('lons'), func.ST_Y(centerline_end).label('late'), func.ST_X(centerline_end).label('lone'), PlanetOsmLine.oneway) \ .filter(Detection.id == detection.id) \ .filter(centerline_filter) \ .filter(Roadbed.gid.in_(road_gids)) \ .filter(PlanetOsmLine.osm_id >= 0) \ .filter(PlanetOsmLine.railway.__eq__(None)) # pylint: enable-msg=E1101 for segment in segments: segment_start = pygeo.LLAToECEF(numpy.array( [[segment.lats, segment.lons, alt]], dtype=numpy.float64 )) segment_end = pygeo.LLAToECEF(numpy.array( [[segment.late, segment.lone, alt]], dtype=numpy.float64 )) segment_dir = (segment_end - segment_start) segment_dir /= numpy.linalg.norm(segment_dir) segment_rot = enu.T.dot(segment_dir.T) segment_angle = math.atan2(segment_rot[1], segment_rot[0]) yield segment_angle, segment.oneway
def originality(cls): return func.greatest(cls.originality_bmi, cls.originality_bmj)
def execute(self, message, user, params): alliance=Alliance() race=None size_mod=None size=None value_mod=None value=None bash=False attacker=user.planet cluster=None params=params.group(1).split() for p in params: m=self.bashre.match(p) if m and not bash: bash=True continue m=self.clusterre.match(p) if m and not cluster: cluster=int(m.group(1)) m=self.racere.match(p) if m and not race: race=m.group(1) continue m=self.rangere.match(p) if m and not size and int(m.group(2)) < 32768: size_mod=m.group(1) or '>' size=m.group(2) continue m=self.rangere.match(p) if m and not value: value_mod=m.group(1) or '<' value=m.group(2) continue m=self.alliancere.match(p) if m and not alliance.name and not self.clusterre.match(p): alliance = Alliance(name="Unknown") if m.group(1).lower() == "unknown" else Alliance.load(m.group(1)) if alliance is None: message.reply("No alliance matching '%s' found" % (m.group(1),)) return continue maxcap = PA.getfloat("roids","maxcap") mincap = PA.getfloat("roids","mincap") modifier = (cast(Planet.value,Float).op("/")(float(attacker.value))).op("^")(0.5) caprate = func.greatest(mincap,func.least(modifier.op("*")(maxcap),maxcap)) maxcap = cast(func.floor(cast(Planet.size,Float).op("*")(caprate)),Integer) bravery = (func.greatest(0.0,( func.least(2.0, cast(Planet.value,Float).op("/")(float(attacker.value)))-0.1) * (func.least(2.0, cast(Planet.score,Float).op("/")(float(attacker.score)))-0.2))).op("*")(10.0) xp_gain = cast(func.floor(maxcap.op("*")(bravery)),Integer) Q = session.query(Planet, Intel, xp_gain.label("xp_gain")) if alliance.id: Q = Q.join(Planet.intel) Q = Q.filter(Intel.alliance == alliance) else: Q = Q.outerjoin(Planet.intel) if alliance.name: Q = Q.filter(Intel.alliance == None) Q = Q.filter(Planet.active == True) if race: Q = Q.filter(Planet.race.ilike(race)) if size: Q = Q.filter(Planet.size.op(size_mod)(size)) if value: Q = Q.filter(Planet.value.op(value_mod)(value)) if bash: Q = Q.filter(or_(Planet.value.op(">")(attacker.value*PA.getfloat("bash","value")), Planet.score.op(">")(attacker.score*PA.getfloat("bash","score")))) if cluster: Q = Q.filter(Planet.x == cluster) Q = Q.order_by(desc("xp_gain")) Q = Q.order_by(desc(Planet.idle)) Q = Q.order_by(desc(Planet.value)) result = Q[:6] if len(result) < 1: reply="No" if race: reply+=" %s"%(race,) reply+=" planets" if alliance.name: reply+=" in intel matching Alliance: %s"%(alliance.name,) else: reply+=" matching" if size: reply+=" Size %s %s" % (size_mod,size) if value: reply+=" Value %s %s" % (value_mod,value) message.reply(reply) return replies = [] for planet, intel, xp_gain in result[:5]: reply="%s:%s:%s (%s)" % (planet.x,planet.y,planet.z,planet.race) reply+=" Value: %s Size: %s Scoregain: %d" % (planet.value,planet.size, xp_gain*PA.getint("numbers", "xp_value")) if intel: if intel.nick: reply+=" Nick: %s" % (intel.nick,) if not alliance.name and intel.alliance: reply+=" Alliance: %s" % (intel.alliance.name,) replies.append(reply) if len(result) > 5: replies[-1]+=" (Too many results to list, please refine your search)" message.reply("\n".join(replies))