示例#1
0
  def aggregate(self, table, groupby, filters={}, aggregate='count(*)', page=0, page_size=100, orderby=None):
    self.log.info((
        "table=%s, groupby=%s, filters=%s, aggregate=%s, page=%s,"
        " page_size=%s, orderby=%s"
      ), table, groupby, filters, aggregate, page, page_size, orderby)
    table_  = self._table(table)
    columnd = { col.name:col for col in table_.columns }

    if isinstance(groupby, basestring):
      groupby = [ groupby ]
    if isinstance(aggregate, basestring):
      aggregate = [ aggregate ]

    session = self.sessionmaker()
    try:
      groupby_    = [ label(c, str2col(c, table_)) for c in groupby ]
      aggregate_  = [ label(a, str2col(a, table_)) for a in aggregate ]

      query = session.query(*(aggregate_ + groupby_))
      query = with_filters(query, table_, filters)
      query = query.group_by(*groupby_)
      query = with_orderby(query, table_, orderby)
      query = with_pagination(query, table_, page, page_size)
      result = result2dict(query.all())

      self.log.info("retrieved %d rows", len(result))
      return result

    finally:
      session.close()
    def __call__(self, user_ids, session):
        """
        Parameters:
            user_ids    : list of mediawiki user ids to restrict computation to
            session     : sqlalchemy session open on a mediawiki database

        Returns:
            {
                user id: 1 if they're a rolling new active editor, 0 otherwise
                for all cohort users, or all users that have edits in the time period
            }
        """
        number_of_edits = int(self.number_of_edits.data)
        rolling_days = int(self.rolling_days.data)
        end_date = self.end_date.data
        start_date = end_date - timedelta(days=rolling_days)

        newly_registered = session.query(Logging.log_user) \
            .filter(Logging.log_type == 'newusers') \
            .filter(Logging.log_action == 'create') \
            .filter(between(Logging.log_timestamp, start_date, end_date))

        filtered_new = self.filter(
            newly_registered, user_ids, column=Logging.log_user
        ).subquery()

        rev_user = label('user_id', Revision.rev_user)
        ar_user = label('user_id', Archive.ar_user)
        count = label('count', func.count())

        revisions = session.query(rev_user, count)\
            .filter(between(Revision.rev_timestamp, start_date, end_date))\
            .filter(Revision.rev_user.in_(filtered_new))\
            .group_by(Revision.rev_user)

        archived = session.query(ar_user, count)\
            .filter(between(Archive.ar_timestamp, start_date, end_date))\
            .filter(Archive.ar_user.in_(filtered_new))\
            .group_by(Archive.ar_user)

        bot_user_ids = session.query(MediawikiUserGroups.ug_user)\
            .filter(MediawikiUserGroups.ug_group == 'bot')\
            .subquery()

        new_edits = revisions.union_all(archived).subquery()
        new_edits_by_user = session.query(new_edits.c.user_id)\
            .filter(new_edits.c.user_id.notin_(bot_user_ids))\
            .group_by(new_edits.c.user_id)\
            .having(func.SUM(new_edits.c.count) >= number_of_edits)

        metric_results = {r[0]: {self.id : 1} for r in new_edits_by_user.all()}

        if user_ids is None:
            return metric_results
        else:
            return {
                uid: metric_results.get(uid, self.default_result)
                for uid in user_ids
            }
 def postgres_aggregates(self, resolution):
     if isinstance(resolution, basestring):
         try:
             resolution = float(resolution)
         except ValueError:
             resolution = self.resolution
     return [
         label('cell_x', func.floor(ST_X(Column('cell')) / resolution) * resolution),
         label('cell_y', func.floor(ST_Y(Column('cell')) / resolution) * resolution)]
示例#4
0
    def build_query_to_report(self, query, aggregate_table, params):
        fk = Column(self.key, Integer)
        geom = Column(self.geometry_column, Geometry())
        join_table = Table(self.table, aggregate_table.metadata, fk, geom)
        if params == "key":
            query = query.column(label(self.key, aggregate_table.c.join_key))
        else:
            query = query.column(label("geometry", func.ST_AsGeoJSON(func.ST_Collect(geom))))

        return (
            query.select_from(join_table).where(aggregate_table.c.join_key == fk).group_by(aggregate_table.c.join_key)
        )
    def __call__(self, user_ids, session):
        """
        Parameters:
            user_ids    : list of mediawiki user ids to find edit for
            session     : sqlalchemy session open on a mediawiki database

        Returns:
            dictionary from user ids to the number of edit found.
        """
        start_date = self.start_date.data
        end_date = self.end_date.data

        revisions = session\
            .query(
                label('user_id', Revision.rev_user),
                label('timestamp', Revision.rev_timestamp)
            )\
            .filter(Revision.rev_timestamp > start_date)\
            .filter(Revision.rev_timestamp <= end_date)

        archives = session\
            .query(
                label('user_id', Archive.ar_user),
                label('timestamp', Archive.ar_timestamp)
            )\
            .filter(Archive.ar_timestamp > start_date)\
            .filter(Archive.ar_timestamp <= end_date)

        if self.namespaces.data and len(self.namespaces.data) > 0:
            revisions = revisions.join(Page)\
                .filter(Page.page_namespace.in_(self.namespaces.data))
            archives = archives\
                .filter(Archive.ar_namespace.in_(self.namespaces.data))

        revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
        archives = self.filter(archives, user_ids, column=Archive.ar_user)

        both = revisions
        if self.include_deleted.data:
            both = both.union_all(archives)
        both = both.subquery()

        query = session.query(both.c.user_id, func.count())\
            .group_by(both.c.user_id)

        query = self.apply_timeseries(query, column=both.c.timestamp)

        return self.results_by_user(
            user_ids,
            query,
            [(self.id, 1, 0)],
            date_index=2,
        )
    def __call__(self, user_ids, session):
        """
        Parameters:
            user_ids    : list of mediawiki user ids to restrict computation to
            session     : sqlalchemy session open on a mediawiki database

        Returns:
            dictionary from user ids to: 1 if they're a rolling active editor, 0 if not
        """
        number_of_edits = int(self.number_of_edits.data)
        rolling_days = int(self.rolling_days.data)
        end_date = self.end_date.data
        start_date = end_date - timedelta(days=rolling_days)

        rev_user = label("user_id", Revision.rev_user)
        ar_user = label("user_id", Archive.ar_user)
        count = label("count", func.count())

        revisions = (
            session.query(rev_user, count)
            .filter(between(Revision.rev_timestamp, start_date, end_date))
            .group_by(Revision.rev_user)
        )
        revisions = self.filter(revisions, user_ids, column=Revision.rev_user)

        archived = (
            session.query(ar_user, count)
            .filter(between(Archive.ar_timestamp, start_date, end_date))
            .group_by(Archive.ar_user)
        )
        archived = self.filter(archived, user_ids, column=Archive.ar_user)

        bot_user_ids = (
            session.query(MediawikiUserGroups.ug_user).filter(MediawikiUserGroups.ug_group == "bot").subquery()
        )

        edits = revisions.union_all(archived).subquery()
        edits_by_user = (
            session.query(edits.c.user_id)
            .filter(edits.c.user_id.notin_(bot_user_ids))
            .group_by(edits.c.user_id)
            .having(func.SUM(edits.c.count) >= number_of_edits)
        )

        metric_results = {r[0]: {self.id: 1} for r in edits_by_user.all()}

        if user_ids is None:
            return metric_results
        else:
            return {uid: metric_results.get(uid, self.default_result) for uid in user_ids}
    def get_messages_by_domain(self, session, limit=10):
        '''SELECT m.mailing_list_url, lower(p.domain_name) as domain,
                  count(m.message_id) as num_messages
             FROM messages m,messages_people mp, people p
            WHERE m.message_ID = mp.message_ID
              AND lower(mp.email_address) = lower(p.email_address)
              AND mp.type_of_recipient = 'From'
            GROUP BY m.mailing_list_url, domain
            ORDER BY num_messages DESC, domain
            LIMIT %s;'''
        mailing_lists = int(self.get_num_of_mailing_lists(session)[0])
        limit = limit * mailing_lists

        m = aliased(db.Messages)
        mp = aliased(db.MessagesPeople)
        p = aliased(db.People)
        ret = session.query(m.mailing_list_url,
                            label('domain', func.lower(p.domain_name)),
                            func.count(m.message_id))\
                     .filter(m.message_id == mp.message_id)\
                     .filter(func.lower(mp.email_address) ==
                             func.lower(p.email_address))\
                     .filter(mp.type_of_recipient == 'From')\
                     .group_by(m.mailing_list_url,
                               func.lower(p.domain_name))\
                     .order_by(func.count(m.message_id).desc(),
                               func.lower(p.domain_name))\
                     .limit(limit)
        return ret.all()
示例#8
0
文件: browser.py 项目: code6/cubes
    def column(self, field, dimension = None):
        """Return a table column for `field` which can be either :class:`cubes.Attribute` or a string.
        
        Possible column names:
        * ``field`` for fact field or flat dimension
        * ``field.locale`` for localized fact field or flat dimension
        * ``dimension.field`` for multi-level dimension field
        * ``dimension.field.locale`` for localized multi-level dimension field
        """

        # FIXME: should use: field.full_name(dimension, self.locale)
        # if there is no localization for field, use default name/first locale
        locale_suffix = ""

        if isinstance(field, cubes.model.Attribute) and field.locales:
            locale = self.locale if self.locale in field.locales else field.locales[0]
            locale_suffix = "." + locale

        if dimension:
            # FIXME: temporary flat dimension hack, not sure about impact of this to other parts of the
            # framework
            if not dimension.is_flat or dimension.has_details:
                logical_name = dimension.name + '.' + str(field)
            else:
                logical_name = str(field)
        else:
            logical_name = field

        self.logger.debug("getting column %s(%s) loc: %s - %s" % (field, type(field), self.locale, locale_suffix))

        localized_name = logical_name + locale_suffix
        column = self.view.c[localized_name]
        return expression.label(logical_name, column)
示例#9
0
def get_player_graph_data(server, granularity=15, start_date=None, end_date=None):
    end_date = end_date or datetime.utcnow()
    start_date = start_date or end_date - timedelta(days=7)

    result = db.session.query(
        label(
            'timestamp_group',
            func.round(
                (func.unix_timestamp(ServerStatus.timestamp) - time.timezone) / (granularity * 60)
            ),
        ),
        func.avg(ServerStatus.player_count)
    ).filter(
        ServerStatus.server == server,
        ServerStatus.timestamp >= start_date,
        ServerStatus.timestamp <= end_date
    ).group_by('timestamp_group').order_by(
        ServerStatus.timestamp
    ).all()

    points = []
    for chunk, count in result:
        points.append({
            'time': int(chunk * granularity * 60 * 1000),
            'player_count': int(count)
        })

    return {
        'start_time': int(calendar.timegm(start_date.timetuple()) * 1000),
        'end_time': int(calendar.timegm(end_date.timetuple()) * 1000),
        'points': points
    }
示例#10
0
def SelectSingeMenuPrivilege(strUserID, MidList):
    """
    @note 查询列表里菜单ID的权限
    :param strUserID:
    :param MidList:
    :return: 返回菜单权限列表
    """
    project_dic = CommonSession.SelectProject('ProjectDic')
    menu_list = []
    with GetSession() as db_ses:
        privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid,
                                 expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";",
                                                                                  tables.MenuPrivilege.r_priv, ";",
                                                                                  tables.MenuPrivilege.w_priv))).join(
            tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter(tables.MenuPrivilege.uid == strUserID,
                                                                             tables.MenuPrivilege.mid.in_(
                                                                                 MidList)).group_by(
            tables.MenuPrivilege.mid).all()
        for menu in privilege:
            priv_list = []
            for prjs in str(menu[4]).split(','):

                priv = prjs.split(';')
                prj_dic = {}
                if priv[0] in project_dic.keys():
                    prj_dic[project_dic[priv[0]]] = {'pid': priv[0], 'r_priv': priv[1], 'w_priv': priv[2]}
                    priv_list.append(prj_dic)

            menu_dic = {'menu_id': menu[0], 'menu_name': menu[1], 'menu_url': menu[2], 'menu_preid': menu[3],
                        'menu_pri': priv_list}
            menu_list.append(menu_dic)
    return menu_list
示例#11
0
    def get_people_by_domain(self, session, limit=10):
        '''SELECT mailing_list_url, lower(domain_name) as domain,
                  count(lower(p.email_address)) as t
             FROM mailing_lists_people as ml, people as p
            WHERE lower(ml.email_address) = lower(p.email_address)
            GROUP BY mailing_list_url, domain
            ORDER BY t DESC, domain
            LIMIT %s;'''

        mailing_lists = int(self.get_num_of_mailing_lists(session)[0])
        limit = limit * mailing_lists

        mlp = aliased(db.MailingListsPeople)
        p = aliased(db.People)
        ret = session.query(mlp.mailing_list_url,
                            label('domain', func.lower(p.domain_name)),
                            func.count(func.lower(p.email_address)))\
                     .filter(func.lower(mlp.email_address) ==
                             func.lower(p.email_address))\
                     .group_by(mlp.mailing_list_url,
                               func.lower(p.domain_name))\
                     .order_by(func.count(func.lower(p.email_address)).desc(),
                               func.lower(p.domain_name))\
                     .limit(limit)
        return ret.all()
示例#12
0
  def select(self, table, columns=None, filters={}, page=0, page_size=100, orderby=None):
    self.log.info((
        "table=%s, columns=%s, filters=%s, page=%s, page_size=%s, orderby=%s"
      ), table, columns, filters, page, page_size, orderby)
    table_  = self._table(table)
    columnd = { c.name:c for c in table_.columns }

    # get column objects corresponding to names
    if isinstance(columns, basestring):
      columns = [ columns ]

    if columns is None:
      columns_ = list(table_.columns)
    else:
      columns_ = [label(c, str2col(c, table_)) for c in columns]

    session = self.sessionmaker()
    try:
      query = session.query(*columns_)
      query = with_filters(query, table_, filters)
      query = with_orderby(query, table_, orderby)
      query = with_pagination(query, table_, page, page_size)
      result = result2dict(query.all())
      self.log.info("retrieved %d rows", len(result))
      return result
    finally:
      session.close()
示例#13
0
    def build_query_to_report(self, query, aggregate_table, params):
        assert params in self._known_units
        res = params

        truncated_time = func.date_trunc(res, aggregate_table.c.time_step)
        return (query
                .column(label("time_slice", func.extract("epoch", truncated_time)))
                .group_by(truncated_time))
示例#14
0
    def build_query_to_report(self, query, aggregate_table, params):
        assert params in self._known_units
        res = params

        truncated_time = func.date_trunc(res, aggregate_table.c.time_step)
        return (query.column(
            label("time_slice",
                  func.extract("epoch",
                               truncated_time))).group_by(truncated_time))
示例#15
0
 def build_query_to_report(self, query, aggregate_table, res):
     if isinstance(res, basestring):
         try:
             res = float(res)
         except ValueError:
             res = self.resolution
     snapped_geom = func.ST_SnapToGrid(aggregate_table.c.cell, res)
     grid_cell = func.ST_MakeBox2D(snapped_geom, func.ST_Translate(snapped_geom, res, res))
     return query.column(label('cell', func.ST_AsGeoJSON(grid_cell))).group_by(snapped_geom)
示例#16
0
def synsets(session, pos=None):
    """Query for synsets, concatenated ids and lemmas of their LUs.

    Parameters
    ----------
    session : orm.session.Session
    pos : list
        Parts of speech to select (default [2])
    """
    if not pos:
        pos = [2]
    return (session.query(
        Synset.id_, Synset.definition,
        label('lex_ids', func.group_concat(UnitSynset.lex_id)),
        label('unitindexes', func.group_concat(
            UnitSynset.unitindex))).join(UnitSynset).join(LexicalUnit).filter(
                LexicalUnit.pos.in_(pos)).order_by(Synset.id_).group_by(
                    Synset.id_))
    def get_grouped_over_period(
            self, period: PeriodType,
            filter_request: FilterRequest) -> List[QKeyValue]:
        """Gets the transaction amount grouped by period matching the provided filters.

        :param period: the period
        :param filter_request: the filter request
        :return: the list of (key, value) results
        """
        period_expr = self.period_expression(period, TransactionDbo.date_value)
        query = self.__entity_manager.query(
            label('key', period_expr),
            label('value', func.sum(TransactionDbo.amount)))
        query = self.__query_builder.build(query, filters=filter_request)
        query = query.group_by(period_expr)
        query = query.order_by(period_expr)
        logging.debug(query)
        return query.all()
    def get_grouped_by_category_over_period(
            self, period: PeriodType,
            filter_request: FilterRequest) -> List[QCompositeKeyValue]:
        """Gets the transaction amount grouped by category and period matching the provided filters.

        :param period: the period
        :param filter_request: the filter request
        :return: the list of (key_one, key_two, value) results
        """
        period_expr = self.period_expression(period, TransactionDbo.date_value)
        query = self.__entity_manager.query(
            label('value', func.sum(TransactionDbo.amount)),
            label('key_one', period_expr), label('key_two', CategoryDbo.id))
        query = self.__query_builder.build(query,
                                           filters=filter_request,
                                           groups=['label.category.id'])
        query = query.group_by(period_expr)
        logging.debug(query)
        return query.all()
示例#19
0
def SelectMenuProjectPrivilege(strUserId, strMenuID='None'):
    """
    @note 查询用户菜单权限
    :param strUserId:
    :param strMenuID: None 返回此用户所有菜单权限
                       不为None 返回此用户某个菜单ID的菜单权限
    :return:
    """

    project_dic = CommonSession.SelectProject('ProjectDic')

    menu_list = []
    with GetSession() as db_ses:
        if strMenuID == 'None':
            privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid,
                                     expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";",
                                                                                      tables.MenuPrivilege.r_priv, ";",
                                                                                      tables.MenuPrivilege.w_priv))).join(
                tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter(
                tables.MenuPrivilege.uid == strUserId).group_by(tables.MenuPrivilege.mid).all()

        else:
            privilege = db_ses.query(tables.MenuPrivilege.mid, tables.Menu.name, tables.Menu.url, tables.Menu.preid,
                                     expression.label('privileges', func.group_concat(tables.MenuPrivilege.pid, ";",
                                                                                      tables.MenuPrivilege.r_priv, ";",
                                                                                      tables.MenuPrivilege.w_priv))).join(
                tables.Menu, tables.MenuPrivilege.mid == tables.Menu.mid).filter(
                tables.MenuPrivilege.uid == strUserId, tables.MenuPrivilege.mid == strMenuID).group_by(
                tables.MenuPrivilege.mid).all()
        for menu in privilege:
            priv_list = []
            for prjs in str(menu[4]).split(','):

                priv = prjs.split(';')
                prj_dic = {}
                if priv[0] in project_dic.keys():
                    prj_dic[project_dic[priv[0]]] = {'pid': priv[0], 'r_priv': priv[1], 'w_priv': priv[2]}
                    priv_list.append(prj_dic)

            menu_dic = {'menu_id': menu[0], 'menu_name': menu[1], 'menu_url': menu[2], 'menu_preid': menu[3],
                        'menu_pri': priv_list}
            menu_list.append(menu_dic)
    return menu_list
示例#20
0
def get(run_id, query, with_stats=True):
    """Return a list of genotypes in a vcf conforming to the given query, as
    well as a dict of stats calculated on them.

    If a truth_vcf is associated with this VCF, stats include true/false,
    positive/negative stats, as well as precision, recall, and f1score. Stats
    also include the number of records, and the number of records once filters
    are applied.

    A query is a dictionary which specifies the range, filters, limit, offset
    and ordering which should be applied against genotypes before genotypes and
    stats are returned.

    It has structure:

    {range: {contig: "X", start: 0, end: 250000000},
     filters: [{columnName: 'info:DP', filterValue: '50', type: '<'}, ...],
     sortBy: [{columnName: 'contig', order: 'asc'},
              {columnName: 'position', order: 'asc'}, ...],
     page: 10,
     limit: 250
    }
    """
    query = _annotate_query_with_types(query, spec(run_id))
    compare_to_run_id = query.get('compareToVcfId')
    with tables(db.engine, 'genotypes') as (con, g):
        if compare_to_run_id:
            # We consider a genotype validated if a truth genotype exists at its
            # location (contig/position) with the same ref/alts.  This isn't
            # entirely accurate: for example, it handles SVs very poorly.
            gt = g.alias()
            joined_q = outerjoin(g, gt, and_(
                gt.c.vcf_id == compare_to_run_id,
                g.c.contig == gt.c.contig,
                g.c.position == gt.c.position,
                g.c.reference == gt.c.reference,
                g.c.alternates == gt.c.alternates,
                g.c.sample_name == gt.c.sample_name))
            valid_column = label('tag:true-positive', gt.c.contig != None)
            q = (select(g.c + [valid_column])
                 .select_from(joined_q)
                 .where(g.c.vcf_id == run_id))
        else:
            q = select(g.c).where(g.c.vcf_id == run_id)

        q = _add_range(q, g, query.get('range'))
        q = _add_filters(q, g, query.get('filters'))
        q = _add_orderings(q, g, query.get('sortBy'))
        q = _add_paging(q, g, query.get('limit'), query.get('page'))

        q = _add_ordering(q, g, 'String', 'contig', 'asc')
        q = _add_ordering(q, g, 'Integer', 'position', 'asc')
        genotypes = [dict(g) for g in con.execute(q).fetchall()]
    stats = calculate_stats(run_id, compare_to_run_id, query) if with_stats else {}
    return {'records': genotypes, 'stats': stats}
示例#21
0
def pwn_mappings(session, pos=None, pos_en=None):
    """Query plWN for already mapped synsets between plWN and PWN.

    Selects: polish synset id, english synset unitsstr, POS
    Source: Polish  -  Target (child): English
    RelationType: selects only plWN-PWN mappings
        does not take 'po_pa, po_ap' relation types.
    POS: Only selects nouns

    Parameters
    ----------
    session : orm.session.Session
    pos : list of int
    pos_en : list of int
    """
    if not pos:
        pos = [2]
    if not pos_en:
        pos_en = [6]
    rel_types = reltypes_pwn_plwn(session)

    syns_en = orm.aliased(Synset)
    uas_pl = orm.aliased(UnitSynset)
    lunit_pl = orm.aliased(LexicalUnit)
    return (session.query(
        label('pl_uid', Synset.id_), label('en_uid', syns_en.id_),
        syns_en.unitsstr, LexicalUnit.pos).join(
            SynsetRelation, Synset.id_ == SynsetRelation.parent_id).join(
                syns_en, SynsetRelation.child_id == syns_en.id_).join(
                    UnitSynset, syns_en.id_ == UnitSynset.syn_id).join(
                        LexicalUnit,
                        UnitSynset.lex_id == LexicalUnit.id_).join(
                            uas_pl, Synset.id_ == uas_pl.syn_id).
            join(lunit_pl,
                 uas_pl.lex_id == lunit_pl.id_).join(
                     RelationType,
                     SynsetRelation.rel_id == RelationType.id_).filter(
                         RelationType.id_.in_(rel_types)).filter(
                             LexicalUnit.pos.in_(pos_en)).filter(
                                 lunit_pl.pos.in_(pos)).group_by(
                                     Synset.id_, syns_en.id_, syns_en.unitsstr,
                                     LexicalUnit.pos).order_by(Synset.id_))
示例#22
0
 def build_query_to_report(self, query, aggregate_table, res):
     if isinstance(res, basestring):
         try:
             res = float(res)
         except ValueError:
             res = self.resolution
     snapped_geom = func.ST_SnapToGrid(aggregate_table.c.cell, res)
     grid_cell = func.ST_MakeBox2D(
         snapped_geom, func.ST_Translate(snapped_geom, res, res))
     return query.column(label(
         'cell', func.ST_AsGeoJSON(grid_cell))).group_by(snapped_geom)
def get_years(model):
    if model == "distinct":
        q = DB.session.query(
            label("year", distinct(func.date_part("year", VSynthese.date_min)))
        ).order_by("year")
    if model == "min-max":
        q = DB.session.query(
            func.min(func.date_part("year", VSynthese.date_min)),
            func.max(func.date_part("year", VSynthese.date_min)),
        )
    return q.all()
示例#24
0
    def build_query_to_populate(self, query, full_table, aggregate_table):
        insert_columns = [aggregate_table.c.isp]
        ip_range = Column("ip_range", INT8RANGE)
        isp_name = Column("label", String)
        join_table = Table(self.maxmind_table, full_table.metadata, ip_range, isp_name, keep_existing = True)
        isp_label = label('maxmind_isp', self._sql_rewrite(isp_name))
        select_query = (query.select_from(join_table)
                .where(ip_range.contains(full_table.c.client_ip))
                .column(isp_label)
                .group_by('maxmind_isp'))

        return insert_columns, select_query
    def get_total(self, filter_request: FilterRequest) -> float:
        """Gets the total of all transaction matching the provided filters.

        :param filter_request: the filter request
        :return: the total
        """
        query = self.__entity_manager.query(
            label('total', func.sum(TransactionDbo.amount)))
        query = self.__query_builder.build(query, filters=filter_request)
        logging.debug(query)
        total = query.scalar()
        return 0 if total is None else total
示例#26
0
    def _select_column(self, attribute, locale=None):
        """get select column"""

        if locale:
            localized_alias = attribute.alias + "." + locale
        else:
            localized_alias = attribute.alias

        if self.dimension_table_prefix:
            prefix = self.dimension_table_prefix
        else:
            prefix = ""
        self.logger.debug("looking for mapping %s (%s)" %
                          (localized_alias, attribute.alias))

        if self.cube.mappings and localized_alias in self.cube.mappings:
            mapping = self.cube.mappings[localized_alias]
            original_mapping = mapping
            self.logger.debug("  is in mappings: %s" % mapping)
        elif self.cube.mappings and attribute.alias in self.cube.mappings:
            mapping = self.cube.mappings[attribute.alias]
            original_mapping = mapping
            self.logger.debug("  not in mappings, using default trans: %s" %
                              mapping)
        else:
            original_mapping = None
            if attribute.dimension:
                mapping = prefix + attribute.alias
            else:
                mapping = attribute.alias

            # FIXME: make this work
            if locale:
                mapping = mapping + "_" + locale

            self.logger.debug("  defaulting to: %s" % mapping)

        (table_name, field_name) = self.split_field(mapping)
        if not table_name:
            table_name = self.fact_name

        table = self.table(table_name)

        try:
            column = table.c[field_name]
        except KeyError:
            raise model.ModelError("Mapped column '%s' does not exist (as %s.%s)" \
                                        % (localized_alias, table_name, field_name) )

        self.logger.debug("adding column %s as %s" % (column, localized_alias))
        # self.mappings[localized_alias] = column
        return expression.label(localized_alias, column)
示例#27
0
def stations2_filtered_pl(start, end):
    last_10_minutes = datetime.utcnow() - timedelta(minutes=10)

    query = (db.session.query(
        Receiver.name.label("s"),
        label("lt",
              func.round(func.ST_Y(Receiver.location_wkt) * 10000) / 10000),
        label("lg",
              func.round(func.ST_X(Receiver.location_wkt) * 10000) / 10000),
        case([(Receiver.lastseen > last_10_minutes, "U")],
             else_="D").label("u"),
        Receiver.lastseen.label("ut"),
        label("v", Receiver.version + "." + Receiver.platform),
    ).order_by(Receiver.lastseen).filter(
        db.or_(db.and_(start < Receiver.firstseen, end > Receiver.firstseen),
               db.and_(start < Receiver.lastseen, end > Receiver.lastseen))))

    res = db.session.execute(query)
    stations = json.dumps({"stations": [dict(r) for r in res]},
                          default=alchemyencoder)

    return stations
示例#28
0
    def _attach_ignored_data(self) -> TopicQuery:
        """Join the data related to whether the user has ignored the topic."""
        query = self.join(
            TopicIgnore,
            and_(
                TopicIgnore.topic_id == Topic.topic_id,
                TopicIgnore.user == self.request.user,
            ),
            isouter=(not self._only_ignored),
        )
        query = query.add_columns(label("ignored_time", TopicIgnore.created_time))

        return query
示例#29
0
    def get_messages_by_people(self, session, limit=10):
        '''SELECT m.mailing_list_url, lower(mp.email_address) as email,
                  count(m.message_ID) as t
             FROM messages m, messages_people mp
            WHERE m.message_ID = mp.message_ID
              AND mp.type_of_recipient = 'From'
            GROUP BY m.mailing_list_url, email
            ORDER BY t desc, email limit %s;'''

        m = aliased(db.Messages)
        mp = aliased(db.MessagesPeople)
        ret = session.query(m.mailing_list_url,
                            label('email', func.lower(mp.email_address)),
                            label('t', func.count(m.message_id))) \
            .filter(m.message_id == mp.message_id) \
            .filter(mp.type_of_recipient == 'From') \
            .group_by(m.mailing_list_url,
                      func.lower(mp.email_address)) \
            .order_by(func.count(m.message_id).desc(),
                      func.lower(mp.email_address)) \
            .limit(limit)
        return ret.all()
示例#30
0
    def _attach_bookmark_data(self) -> "TopicQuery":
        """Join the data related to whether the user has bookmarked the topic."""
        query = self.join(
            TopicBookmark,
            and_(
                TopicBookmark.topic_id == Topic.topic_id,
                TopicBookmark.user == self.request.user,
            ),
            isouter=(not self._only_bookmarked),
        )
        query = query.add_columns(label("bookmarked_time", TopicBookmark.created_time))

        return query
示例#31
0
    def get_messages_by_people(self, session, limit=10):
        '''SELECT m.mailing_list_url, lower(mp.email_address) as email,
                  count(m.message_ID) as t
             FROM messages m, messages_people mp
            WHERE m.message_ID = mp.message_ID
              AND mp.type_of_recipient = 'From'
            GROUP BY m.mailing_list_url, email
            ORDER BY t desc, email limit %s;'''

        m = aliased(db.Messages)
        mp = aliased(db.MessagesPeople)
        ret = session.query(m.mailing_list_url,
                            label('email', func.lower(mp.email_address)),
                            label('t', func.count(m.message_id)))\
                     .filter(m.message_id == mp.message_id)\
                     .filter(mp.type_of_recipient == 'From')\
                     .group_by(m.mailing_list_url,
                               func.lower(mp.email_address))\
                     .order_by(func.count(m.message_id).desc(),
                               func.lower(mp.email_address))\
                     .limit(limit)
        return ret.all()
示例#32
0
 def average(cls, session, *, guild_xid, channel_xid, scope, window_min):
     filters = [
         WaitTime.guild_xid == guild_xid,
         WaitTime.created_at >
         datetime.utcnow() - timedelta(minutes=window_min),
     ]
     if scope == "channel":
         filters.append(WaitTime.channel_xid == channel_xid)
     row = (session.query(
         label("average",
               func.sum(WaitTime.seconds) / func.count())).filter(
                   and_(*filters)).one_or_none())
     return row.average if row else None
示例#33
0
    def _attach_vote_data(self) -> "TopicQuery":
        """Join the data related to whether the user has voted on the topic."""
        query = self.join(
            TopicVote,
            and_(
                TopicVote.topic_id == Topic.topic_id,
                TopicVote.user == self.request.user,
            ),
            isouter=(not self._only_user_voted),
        )
        query = query.add_columns(label("voted_time", TopicVote.created_time))

        return query
示例#34
0
    def _attach_vote_data(self) -> CommentQuery:
        """Join the data related to whether the user has voted on the comment."""
        query = self.join(
            CommentVote,
            and_(
                CommentVote.comment_id == Comment.comment_id,
                CommentVote.user == self.request.user,
            ),
            isouter=(not self._only_user_voted),
        )
        query = query.add_columns(label("voted_time", CommentVote.created_time))

        return query
示例#35
0
    def top(self, page=0, per_page=25):
        upvote_counts = s.query(PostUpvote.post_id, func.count(PostUpvote.id).label('count')) \
                         .group_by(PostUpvote.id) \
                         .subquery()

        total_upvotes = coalesce(upvote_counts.c.count, 0)

        query = s.query(Post, label('total_upvotes', total_upvotes))
        query = query.outerjoin(upvote_counts, upvote_counts.c.post_id == Post.id) \
                     .order_by(desc('total_upvotes')) \
                     .offset(page * per_page) \
                     .limit(per_page)

        return [result[0] for result in list(query)]
示例#36
0
    def _attach_bookmark_data(self) -> "CommentQuery":
        """Join the data related to whether the user has bookmarked the comment."""
        query = self.join(
            CommentBookmark,
            and_(
                CommentBookmark.comment_id == Comment.comment_id,
                CommentBookmark.user == self.request.user,
            ),
            isouter=(not self._only_bookmarked),
        )
        query = query.add_columns(
            label("bookmarked_time", CommentBookmark.created_time))

        return query
示例#37
0
    def _select_column(self, attribute, locale = None):
        """get select column"""
        
        if locale:
            localized_alias = attribute.alias + "." + locale
        else:
            localized_alias = attribute.alias

        if self.dimension_table_prefix:
            prefix = self.dimension_table_prefix
        else:
            prefix = ""
        self.logger.debug("looking for mapping %s (%s)" % (localized_alias, attribute.alias))

        if self.cube.mappings and localized_alias in self.cube.mappings:
            mapping = self.cube.mappings[localized_alias]
            original_mapping = mapping
            self.logger.debug("  is in mappings: %s" % mapping)
        elif self.cube.mappings and attribute.alias in self.cube.mappings:
            mapping = self.cube.mappings[attribute.alias]
            original_mapping = mapping
            self.logger.debug("  not in mappings, using default trans: %s" % mapping)
        else:
            original_mapping = None
            if attribute.dimension:
                mapping = prefix + attribute.alias
            else:
                mapping = attribute.alias

            # FIXME: make this work
            if locale:
                mapping = mapping + "_" + locale
                
            self.logger.debug("  defaulting to: %s" % mapping)

        (table_name, field_name) = self.split_field(mapping)
        if not table_name:
            table_name = self.fact_name
            
        table = self.table(table_name)

        try:
            column = table.c[field_name]
        except KeyError:
            raise model.ModelError("Mapped column '%s' does not exist (as %s.%s)" \
                                        % (localized_alias, table_name, field_name) )
        
        self.logger.debug("adding column %s as %s" % (column, localized_alias))
        # self.mappings[localized_alias] = column
        return expression.label(localized_alias, column)
示例#38
0
    def aggregate(self,
                  table,
                  groupby,
                  filters={},
                  aggregate='count(*)',
                  page=0,
                  page_size=100,
                  orderby=None):
        self.log.info(
            ("table=%s, groupby=%s, filters=%s, aggregate=%s, page=%s,"
             " page_size=%s, orderby=%s"), table, groupby, filters, aggregate,
            page, page_size, orderby)
        table_ = self._table(table)
        columnd = {col.name: col for col in table_.columns}

        if isinstance(groupby, basestring):
            groupby = [groupby]
        if isinstance(aggregate, basestring):
            aggregate = [aggregate]

        session = self.sessionmaker()
        try:
            groupby_ = [label(c, str2col(c, table_)) for c in groupby]
            aggregate_ = [label(a, str2col(a, table_)) for a in aggregate]

            query = session.query(*(aggregate_ + groupby_))
            query = with_filters(query, table_, filters)
            query = query.group_by(*groupby_)
            query = with_orderby(query, table_, orderby)
            query = with_pagination(query, table_, page, page_size)
            result = result2dict(query.all())

            self.log.info("retrieved %d rows", len(result))
            return result

        finally:
            session.close()
示例#39
0
    def build_query_to_populate(self, query, full_table, aggregate_table):
        insert_columns = [aggregate_table.c.isp]
        ip_range = Column("ip_range", INT8RANGE)
        isp_name = Column("label", String)
        join_table = Table(self.maxmind_table,
                           full_table.metadata,
                           ip_range,
                           isp_name,
                           keep_existing=True)
        isp_label = label('maxmind_isp', self._sql_rewrite(isp_name))
        select_query = (query.select_from(join_table).where(
            ip_range.contains(full_table.c.client_ip)).column(
                isp_label).group_by('maxmind_isp'))

        return insert_columns, select_query
示例#40
0
def get_summary_distribution(state_code,
                             district_id=None,
                             school_id=None,
                             asmt_type=AssessmentType.SUMMATIVE):
    '''
    Get a bucketed distribution of scores
    '''
    with EdCoreDBConnection(state_code=state_code) as connection:
        fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome')
        #  should it be always for summative?
        query = select([
            label(Constants.SCORE_BUCKET,
                  (fact_asmt_outcome_vw.c.asmt_score / get_bucket_size()) *
                  get_bucket_size()),
            count(
                case([(fact_asmt_outcome_vw.c.asmt_subject
                       == Constants.MATH, 1)],
                     else_=0)).label(Constants.TOTAL_MATH),
            count(
                case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.ELA, 1)
                      ],
                     else_=0)).label(Constants.TOTAL_ELA)
        ],
                       from_obj=[fact_asmt_outcome_vw])
        query = query.where(fact_asmt_outcome_vw.c.state_code == state_code)
        query = query.where(fact_asmt_outcome_vw.c.asmt_type == asmt_type)
        query = query.where(
            fact_asmt_outcome_vw.c.rec_status == Constants.CURRENT)
        if (district_id is not None):
            query = query.where(
                fact_asmt_outcome_vw.c.district_id == district_id)
        if (school_id is not None):
            query = query.where(fact_asmt_outcome_vw.c.school_id == school_id)
        query = query.group_by(Constants.SCORE_BUCKET).order_by(
            Constants.SCORE_BUCKET)
        return connection.get_result(query)
示例#41
0
def generate_show():
    subquery = (db.session.query(LikeModel.media,
                                 func.count(1).label('count'))
                .group_by(LikeModel.media).subquery())
    now = int(time.time() / 7200)
    order = expression.label('hacker', (subquery.c.count + 1.0) / (now - ShowModel.hour_tagged + 2.0) / (now - ShowModel.hour_tagged + 2.0))
    medias =\
        (db.session.query(ShowModel)
         .filter(ShowModel.showable == 0)
         .outerjoin(subquery, ShowModel.mid == subquery.c.media)
         .filter(ShowModel.mid != None)     # NOQA
         .order_by(order.desc())
         .order_by(ShowModel.date_tagged.desc())
         .order_by(ShowModel.date_created.desc())
         .all())
    return [x.mid for x in medias]
示例#42
0
def get_summary_distribution(state_code, district_id=None, school_id=None, asmt_type=AssessmentType.SUMMATIVE):
    '''
    Get a bucketed distribution of scores
    '''
    with EdCoreDBConnection(state_code=state_code) as connection:
        fact_asmt_outcome_vw = connection.get_table('fact_asmt_outcome')
        #  should it be always for summative?
        query = select([label(Constants.SCORE_BUCKET, (fact_asmt_outcome_vw.c.asmt_score / get_bucket_size()) * get_bucket_size()),
                        count(case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.MATH, 1)], else_=0)).label(Constants.TOTAL_MATH),
                        count(case([(fact_asmt_outcome_vw.c.asmt_subject == Constants.ELA, 1)], else_=0)).label(Constants.TOTAL_ELA)],
                       from_obj=[fact_asmt_outcome_vw])
        query = query.where(fact_asmt_outcome_vw.c.state_code == state_code)
        query = query.where(fact_asmt_outcome_vw.c.asmt_type == asmt_type)
        query = query.where(fact_asmt_outcome_vw.c.rec_status == Constants.CURRENT)
        if (district_id is not None):
            query = query.where(fact_asmt_outcome_vw.c.district_id == district_id)
        if (school_id is not None):
            query = query.where(fact_asmt_outcome_vw.c.school_id == school_id)
        query = query.group_by(Constants.SCORE_BUCKET).order_by(Constants.SCORE_BUCKET)
        return connection.get_result(query)
def get_synthese_stat():
    params = request.args
    q = DB.session.query(
        label("year", func.date_part("year", VSynthese.date_min)),
        func.count(VSynthese.id_synthese),
        func.count(distinct(VSynthese.cd_ref)),
    ).group_by("year")
    if ("selectedRegne" in params) and (params["selectedRegne"] != ""):
        q = q.filter(VSynthese.regne == params["selectedRegne"])
    if ("selectedPhylum" in params) and (params["selectedPhylum"] != ""):
        q = q.filter(VSynthese.phylum == params["selectedPhylum"])
    if "selectedClasse" in params and (params["selectedClasse"] != ""):
        q = q.filter(VSynthese.classe == params["selectedClasse"])
    if "selectedOrdre" in params and (params["selectedOrdre"] != ""):
        q = q.filter(VSynthese.ordre == params["selectedOrdre"])
    if "selectedFamille" in params and (params["selectedFamille"] != ""):
        q = q.filter(VSynthese.famille == params["selectedFamille"])
    if ("selectedGroup2INPN" in params) and (params["selectedGroup2INPN"] != ""):
        q = q.filter(VSynthese.group2_inpn == params["selectedGroup2INPN"])
    if ("selectedGroup1INPN" in params) and (params["selectedGroup1INPN"] != ""):
        q = q.filter(VSynthese.group1_inpn == params["selectedGroup1INPN"])
    if ("taxon" in params) and (params["taxon"] != ""):
        q = q.filter(VSynthese.cd_ref == params["taxon"])
    return q.all()
示例#44
0
文件: browser.py 项目: mrcrabby/cubes
    def column(self, field, dimension = None):
        """Return a table column for `field` which can be either :class:`cubes.Attribute` or a string.
        
        Possible column names:
        * ``field`` for fact field or flat dimension
        * ``field.locale`` for localized fact field or flat dimension
        * ``dimension.field`` for multi-level dimension field
        * ``dimension.field.locale`` for localized multi-level dimension field
        """

        # FIXME: should use: field.full_name(dimension, self.locale)
        # if there is no localization for field, use default name/first locale
        locale_suffix = ""

        if isinstance(field, cubes.model.Attribute) and field.locales:
            locale = self.locale if self.locale in field.locales else field.locales[0]
            locale_suffix = "." + locale

        if dimension:
            # FIXME: temporary flat dimension hack, not sure about impact of this to other parts of the
            # framework
            # FIXME: the third condition is a temporary quick fix for https://github.com/Stiivi/cubes/issues/14
            field_name = str(field)
            if not dimension.is_flat or dimension.has_details or dimension.name != field_name:
                logical_name = dimension.name + '.' + field_name
            else:
                logical_name = field_name
        else:
            logical_name = field

        self.logger.debug("getting column %s(%s) loc: %s - %s" % (field, type(field), self.locale, locale_suffix))

        localized_name = logical_name + locale_suffix

        column = self.view.c[localized_name]
        return expression.label(logical_name, column)
示例#45
0
def get_slow_pages(path=None):
    l = label("average", func.avg(ViewLog.load_time))
    c = label("count", func.count(ViewLog.id))

    return (
        DBSession.query(
            ViewLog.path,
            l,
            c,
            label("cumulative_time", func.sum(ViewLog.load_time)),
            label("stddev", func.stddev_pop(ViewLog.load_time)),
            label("maximum", func.max(ViewLog.load_time)),
            label("minimum", func.min(ViewLog.load_time)),
        )
        .filter(ViewLog.path == path if path != None else True)
        .having(c > 2)
        .group_by(ViewLog.path)
        .order_by(l.desc())
    )
示例#46
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("upload_max", func.max(a.c.upload_max)))
示例#47
0
def compute_logbook_entries(session=None):
    logger.info("Compute logbook.")

    if session is None:
        session = app.session

    or_args = [
        between(TakeoffLanding.timestamp, '2016-06-28 00:00:00',
                '2016-06-28 23:59:59')
    ]
    or_args = []

    # 'wo' is the window order for the sql window function
    wo = and_(func.date(TakeoffLanding.timestamp), TakeoffLanding.device_id,
              TakeoffLanding.timestamp, TakeoffLanding.airport_id)

    # make a query with current, previous and next "takeoff_landing" event, so we can find complete flights
    sq = session.query(
            TakeoffLanding.device_id,
            func.lag(TakeoffLanding.device_id).over(order_by=wo).label('device_id_prev'),
            func.lead(TakeoffLanding.device_id).over(order_by=wo).label('device_id_next'),
            TakeoffLanding.timestamp,
            func.lag(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_prev'),
            func.lead(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_next'),
            TakeoffLanding.track,
            func.lag(TakeoffLanding.track).over(order_by=wo).label('track_prev'),
            func.lead(TakeoffLanding.track).over(order_by=wo).label('track_next'),
            TakeoffLanding.is_takeoff,
            func.lag(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_prev'),
            func.lead(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_next'),
            TakeoffLanding.airport_id,
            func.lag(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_prev'),
            func.lead(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_next')) \
        .filter(*or_args) \
        .subquery()

    # find complete flights (with takeoff and landing on the same day)
    complete_flight_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'),
            label('duration', sq.c.timestamp_next - sq.c.timestamp)) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) == func.date(sq.c.timestamp))

    # split complete flights (with takeoff and landing on different days) into one takeoff and one landing
    split_start_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'),
            null().label('duration')) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp))

    split_landing_query = session.query(
            sq.c.timestamp_next.label('reftime'),
            sq.c.device_id.label('device_id'),
            null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'),
            sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'),
            null().label('duration')) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp))

    # find landings without start
    only_landings_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'),
            sq.c.timestamp.label('landing_timestamp'), sq.c.track.label('landing_track'), sq.c.airport_id.label('landing_airport_id'),
            null().label('duration')) \
        .filter(sq.c.is_takeoff == false()) \
        .filter(or_(sq.c.device_id != sq.c.device_id_prev,
                    sq.c.is_takeoff_prev == false(),
                    sq.c.is_takeoff_prev == null()))

    # find starts without landing
    only_starts_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'),
            null().label('duration')) \
        .filter(sq.c.is_takeoff == true()) \
        .filter(or_(sq.c.device_id != sq.c.device_id_next,
                    sq.c.is_takeoff_next == true(),
                    sq.c.is_takeoff_next == null()))

    # unite all computated flights
    union_query = complete_flight_query.union(
            split_start_query,
            split_landing_query,
            only_landings_query,
            only_starts_query) \
        .subquery()

    # if a logbook entry exist --> update it
    upd = update(Logbook) \
        .where(and_(Logbook.device_id == union_query.c.device_id,
                    union_query.c.takeoff_airport_id != null(),
                    union_query.c.landing_airport_id != null(),
                    or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id,
                             Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp,
                             Logbook.landing_airport_id == null()),
                        and_(Logbook.takeoff_airport_id == null(),
                             Logbook.landing_airport_id == union_query.c.landing_airport_id,
                             Logbook.landing_timestamp == union_query.c.landing_timestamp)))) \
        .values({"takeoff_timestamp": union_query.c.takeoff_timestamp,
                 "takeoff_track": union_query.c.takeoff_track,
                 "takeoff_airport_id": union_query.c.takeoff_airport_id,
                 "landing_timestamp": union_query.c.landing_timestamp,
                 "landing_track": union_query.c.landing_track,
                 "landing_airport_id": union_query.c.landing_airport_id,
                 "duration": union_query.c.duration})

    result = session.execute(upd)
    update_counter = result.rowcount
    session.commit()
    logger.debug("Updated logbook entries: {}".format(update_counter))

    # if a logbook entry doesnt exist --> insert it
    new_logbook_entries = session.query(union_query) \
        .filter(~exists().where(
            and_(Logbook.device_id == union_query.c.device_id,
                 or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id,
                          Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp),
                     and_(Logbook.takeoff_airport_id == null(),
                          union_query.c.takeoff_airport_id == null())),
                 or_(and_(Logbook.landing_airport_id == union_query.c.landing_airport_id,
                          Logbook.landing_timestamp == union_query.c.landing_timestamp),
                     and_(Logbook.landing_airport_id == null(),
                          union_query.c.landing_airport_id == null())))))

    ins = insert(Logbook).from_select(
        (Logbook.reftime, Logbook.device_id, Logbook.takeoff_timestamp,
         Logbook.takeoff_track, Logbook.takeoff_airport_id,
         Logbook.landing_timestamp, Logbook.landing_track,
         Logbook.landing_airport_id, Logbook.duration), new_logbook_entries)

    result = session.execute(ins)
    insert_counter = result.rowcount
    session.commit()
    logger.debug("New logbook entries: {}".format(insert_counter))

    return "{}/{}".format(update_counter, insert_counter)
示例#48
0
 def build_query_to_report(self, query, aggregate_table):
     median = func.median(aggregate_table.c.upload_samples)
     return query.column(label("upload_median", median))
示例#49
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("download_min", func.min(a.c.download_min)))
示例#50
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("upload_count", func.sum(a.c.upload_count)))
示例#51
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     mean = func.sum(a.c.upload_octets) / func.sum(a.c.upload_time)
     is_safe = func.sum(a.c.upload_time) > 0
     safe_mean = case([(is_safe, mean)], else_=None)
     return query.column(label("upload_avg", safe_mean))
示例#52
0
def compute_logbook_entries(session=None):
    logger.info("Compute logbook.")

    if session is None:
        session = app.session

    or_args = [between(TakeoffLanding.timestamp, '2016-06-28 00:00:00', '2016-06-28 23:59:59')]
    or_args = []

    # 'wo' is the window order for the sql window function
    wo = and_(func.date(TakeoffLanding.timestamp),
              TakeoffLanding.device_id,
              TakeoffLanding.timestamp,
              TakeoffLanding.airport_id)

    # make a query with current, previous and next "takeoff_landing" event, so we can find complete flights
    sq = session.query(
            TakeoffLanding.device_id,
            func.lag(TakeoffLanding.device_id).over(order_by=wo).label('device_id_prev'),
            func.lead(TakeoffLanding.device_id).over(order_by=wo).label('device_id_next'),
            TakeoffLanding.timestamp,
            func.lag(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_prev'),
            func.lead(TakeoffLanding.timestamp).over(order_by=wo).label('timestamp_next'),
            TakeoffLanding.track,
            func.lag(TakeoffLanding.track).over(order_by=wo).label('track_prev'),
            func.lead(TakeoffLanding.track).over(order_by=wo).label('track_next'),
            TakeoffLanding.is_takeoff,
            func.lag(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_prev'),
            func.lead(TakeoffLanding.is_takeoff).over(order_by=wo).label('is_takeoff_next'),
            TakeoffLanding.airport_id,
            func.lag(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_prev'),
            func.lead(TakeoffLanding.airport_id).over(order_by=wo).label('airport_id_next')) \
        .filter(*or_args) \
        .subquery()

    # find complete flights (with takeoff and landing on the same day)
    complete_flight_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'),
            label('duration', sq.c.timestamp_next - sq.c.timestamp)) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) == func.date(sq.c.timestamp))

    # split complete flights (with takeoff and landing on different days) into one takeoff and one landing
    split_start_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'),
            null().label('duration')) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp))

    split_landing_query = session.query(
            sq.c.timestamp_next.label('reftime'),
            sq.c.device_id.label('device_id'),
            null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'),
            sq.c.timestamp_next.label('landing_timestamp'), sq.c.track_next.label('landing_track'), sq.c.airport_id_next.label('landing_airport_id'),
            null().label('duration')) \
        .filter(and_(sq.c.is_takeoff == true(), sq.c.is_takeoff_next == false())) \
        .filter(sq.c.device_id == sq.c.device_id_next) \
        .filter(func.date(sq.c.timestamp_next) != func.date(sq.c.timestamp))

    # find landings without start
    only_landings_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            null().label('takeoff_timestamp'), null().label('takeoff_track'), null().label('takeoff_airport_id'),
            sq.c.timestamp.label('landing_timestamp'), sq.c.track.label('landing_track'), sq.c.airport_id.label('landing_airport_id'),
            null().label('duration')) \
        .filter(sq.c.is_takeoff == false()) \
        .filter(or_(sq.c.device_id != sq.c.device_id_prev,
                    sq.c.is_takeoff_prev == false(),
                    sq.c.is_takeoff_prev == null()))

    # find starts without landing
    only_starts_query = session.query(
            sq.c.timestamp.label('reftime'),
            sq.c.device_id.label('device_id'),
            sq.c.timestamp.label('takeoff_timestamp'), sq.c.track.label('takeoff_track'), sq.c.airport_id.label('takeoff_airport_id'),
            null().label('landing_timestamp'), null().label('landing_track'), null().label('landing_airport_id'),
            null().label('duration')) \
        .filter(sq.c.is_takeoff == true()) \
        .filter(or_(sq.c.device_id != sq.c.device_id_next,
                    sq.c.is_takeoff_next == true(),
                    sq.c.is_takeoff_next == null()))

    # unite all computated flights
    union_query = complete_flight_query.union(
            split_start_query,
            split_landing_query,
            only_landings_query,
            only_starts_query) \
        .subquery()

    # if a logbook entry exist --> update it
    upd = update(Logbook) \
        .where(and_(Logbook.device_id == union_query.c.device_id,
                    union_query.c.takeoff_airport_id != null(),
                    union_query.c.landing_airport_id != null(),
                    or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id,
                             Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp,
                             Logbook.landing_airport_id == null()),
                        and_(Logbook.takeoff_airport_id == null(),
                             Logbook.landing_airport_id == union_query.c.landing_airport_id,
                             Logbook.landing_timestamp == union_query.c.landing_timestamp)))) \
        .values({"takeoff_timestamp": union_query.c.takeoff_timestamp,
                 "takeoff_track": union_query.c.takeoff_track,
                 "takeoff_airport_id": union_query.c.takeoff_airport_id,
                 "landing_timestamp": union_query.c.landing_timestamp,
                 "landing_track": union_query.c.landing_track,
                 "landing_airport_id": union_query.c.landing_airport_id,
                 "duration": union_query.c.duration})

    result = session.execute(upd)
    update_counter = result.rowcount
    session.commit()
    logger.debug("Updated logbook entries: {}".format(update_counter))

    # if a logbook entry doesnt exist --> insert it
    new_logbook_entries = session.query(union_query) \
        .filter(~exists().where(
            and_(Logbook.device_id == union_query.c.device_id,
                 or_(and_(Logbook.takeoff_airport_id == union_query.c.takeoff_airport_id,
                          Logbook.takeoff_timestamp == union_query.c.takeoff_timestamp),
                     and_(Logbook.takeoff_airport_id == null(),
                          union_query.c.takeoff_airport_id == null())),
                 or_(and_(Logbook.landing_airport_id == union_query.c.landing_airport_id,
                          Logbook.landing_timestamp == union_query.c.landing_timestamp),
                     and_(Logbook.landing_airport_id == null(),
                          union_query.c.landing_airport_id == null())))))

    ins = insert(Logbook).from_select((Logbook.reftime,
                                       Logbook.device_id,
                                       Logbook.takeoff_timestamp,
                                       Logbook.takeoff_track,
                                       Logbook.takeoff_airport_id,
                                       Logbook.landing_timestamp,
                                       Logbook.landing_track,
                                       Logbook.landing_airport_id,
                                       Logbook.duration),
                                      new_logbook_entries)

    result = session.execute(ins)
    insert_counter = result.rowcount
    session.commit()
    logger.debug("New logbook entries: {}".format(insert_counter))

    return "{}/{}".format(update_counter, insert_counter)
示例#53
0
 def build_query_to_report(self, query, aggregate_table):
     median = func.median(aggregate_table.c.rtt_samples)
     return query.column(label("MedianRTT", median))
示例#54
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("download_min", func.min(a.c.download_min)))
示例#55
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("upload_count", func.sum(a.c.upload_count)))
    def __call__(self, user_ids, session):
        """
        Parameters:
            user_ids    : list of mediawiki user ids to find pages for
            session     : sqlalchemy session open on a mediawiki database

        Returns:
            dictionary from user ids to the number of pages edited found
        """
        start_date = self.start_date.data
        end_date = self.end_date.data
        deduplicate = self.deduplicate_across_users.data

        revisions = (
            session.query(
                label('user_id', Revision.rev_user),
                label('page_id', Revision.rev_page),
                label('timestamp', Revision.rev_timestamp)
            )
            .filter(Revision.rev_timestamp > start_date)
            .filter(Revision.rev_timestamp <= end_date))

        archives = (
            session.query(
                label('user_id', Archive.ar_user),
                label('page_id', Archive.ar_page_id),
                label('timestamp', Archive.ar_timestamp)
            )
            .filter(Archive.ar_timestamp > start_date)
            .filter(Archive.ar_timestamp <= end_date))

        if self.namespaces.data and len(self.namespaces.data) > 0:
            revisions = (
                revisions
                .join(Page)
                .filter(Page.page_namespace.in_(self.namespaces.data))
            )
            archives = (
                archives
                .filter(Archive.ar_namespace.in_(self.namespaces.data))
            )

        revisions = self.filter(revisions, user_ids, column=Revision.rev_user)
        archives = self.filter(archives, user_ids, column=Archive.ar_user)

        both = revisions
        if self.include_deleted.data:
            both = both.union_all(archives)
        both = both.subquery()

        if deduplicate:
            # Use a constant user id here to deduplicate only by page
            # A single result will be returned and assigned to user_id = ROLLUP_USER_ID
            both_grouped = (
                session.query(
                    label('user_id', literal_column(str(ROLLUP_USER_ID))), both.c.page_id
                )
                .distinct().subquery()
            )
        else:
            # Select distinct user_id-page_id pairs
            # to count edits by the same user on the same page as one
            both_grouped = (
                session.query(both.c.user_id, both.c.page_id)
                .distinct().subquery()
            )

        query = (
            session.query(both_grouped.c.user_id, func.count())
            .group_by(both_grouped.c.user_id)
        )

        # Format the output
        metric_results = {r[0]: {PagesEdited.id : r[1]} for r in query.all()}
        if user_ids is None:
            return metric_results
        elif deduplicate:
            ret = {}
            ret[ROLLUP_USER_ID] = metric_results.get(
                ROLLUP_USER_ID, self.default_result
            )
            return ret
        else:
            return {
                uid: metric_results.get(uid, self.default_result)
                for uid in user_ids
            }
示例#57
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     mean = func.sum(a.c.upload_octets) / func.sum(a.c.upload_time)
     is_safe = func.sum(a.c.upload_time) > 0
     safe_mean = case([(is_safe, mean)], else_ = None)
     return query.column(label("upload_avg", safe_mean))
示例#58
0
 def build_query_to_report(self, query, aggregate_table):
     median = func.median(aggregate_table.c.upload_samples)
     return query.column(label("upload_median", median))
示例#59
0
 def build_query_to_report(self, query, aggregate_table):
     median = func.median(aggregate_table.c.rtt_samples)
     return query.column(label("MedianRTT", median))
示例#60
0
 def build_query_to_report(self, query, aggregate_table):
     a = aggregate_table
     return query.column(label("upload_max", func.max(a.c.upload_max)))