示例#1
0
文件: FTS3DB.py 项目: DIRACGrid/DIRAC
    def kickStuckOperations(self, limit=20, kickDelay=2):
        """finds operations that have not been updated for more than a given
          time but are still assigned and resets the assignment

        :param int limit: number of operations to treat
        :param int kickDelay: age of the lastUpdate in hours
        :returns: S_OK/S_ERROR with number of kicked operations

        """

        session = self.dbSession(expire_on_commit=False)

        try:

            ftsOps = (session.query(FTS3Operation.operationID).filter(
                FTS3Operation.lastUpdate < (func.date_sub(
                    utc_timestamp(), text("INTERVAL %d HOUR" % kickDelay)))).
                      filter(~FTS3Operation.assignment.is_(None)).limit(limit))

            opIDs = [opTuple[0] for opTuple in ftsOps]
            rowCount = 0

            if opIDs:
                result = session.execute(
                    update(FTS3Operation).where(
                        FTS3Operation.operationID.in_(opIDs)).where(
                            FTS3Operation.lastUpdate < (func.date_sub(
                                utc_timestamp(),
                                text("INTERVAL %d HOUR" % kickDelay)))).values(
                                    {
                                        "assignment": None
                                    }).execution_options(
                                        synchronize_session=False
                                    )  # see comment about synchronize_session
                )
                rowCount = result.rowcount

            session.commit()
            session.expunge_all()

            return S_OK(rowCount)

        except SQLAlchemyError as e:
            session.rollback()
            return S_ERROR("kickStuckOperations: unexpected exception : %s" %
                           e)
        finally:
            session.close()
示例#2
0
文件: FTS3DB.py 项目: pmusset/DIRAC
  def kickStuckJobs(self, limit=20, kickDelay=2):
    """finds jobs that have not been updated for more than a given
      time but are still assigned and resets the assignment

    :param int limit: number of jobs to treat
    :param int kickDelay: age of the lastUpdate in hours
    :returns: S_OK/S_ERROR with number of kicked jobs

    """

    session = self.dbSession(expire_on_commit=False)

    try:

      ftsJobs = session.query(FTS3Job.jobID)\
          .filter(FTS3Job.lastUpdate < (func.date_sub(func.utc_timestamp(),
                                                      text('INTERVAL %d HOUR' % kickDelay
                                                           ))))\
          .filter(~FTS3Job.assignment.is_(None))\
          .limit(limit)

      jobIDs = [jobTuple[0] for jobTuple in ftsJobs]
      rowCount = 0

      if jobIDs:
        result = session.execute(
            update(FTS3Job) .where(
                FTS3Job.jobID.in_(jobIDs)) .where(
                FTS3Job.lastUpdate < (
                    func.date_sub(
                        func.utc_timestamp(), text(
                            'INTERVAL %d HOUR' %
                            kickDelay)))) .values(
                {
                    'assignment': None}))
        rowCount = result.rowcount

      session.commit()
      session.expunge_all()

      return S_OK(rowCount)

    except SQLAlchemyError as e:
      session.rollback()
      return S_ERROR("kickStuckJobs: unexpected exception : %s" % e)
    finally:
      session.close()
示例#3
0
文件: FTS3DB.py 项目: DIRACGrid/DIRAC
  def kickStuckJobs(self, limit=20, kickDelay=2):
    """finds jobs that have not been updated for more than a given
      time but are still assigned and resets the assignment

    :param int limit: number of jobs to treat
    :param int kickDelay: age of the lastUpdate in hours
    :returns: S_OK/S_ERROR with number of kicked jobs

    """

    session = self.dbSession(expire_on_commit=False)

    try:

      ftsJobs = session.query(FTS3Job.jobID)\
          .filter(FTS3Job.lastUpdate < (func.date_sub(func.utc_timestamp(),
                                                      text('INTERVAL %d HOUR' % kickDelay
                                                           ))))\
          .filter(~FTS3Job.assignment.is_(None))\
          .limit(limit)

      jobIDs = [jobTuple[0] for jobTuple in ftsJobs]
      rowCount = 0

      if jobIDs:
        result = session.execute(
            update(FTS3Job) .where(
                FTS3Job.jobID.in_(jobIDs)) .where(
                FTS3Job.lastUpdate < (
                    func.date_sub(
                        func.utc_timestamp(), text(
                            'INTERVAL %d HOUR' %
                            kickDelay)))) .values(
                {
                    'assignment': None}))
        rowCount = result.rowcount

      session.commit()
      session.expunge_all()

      return S_OK(rowCount)

    except SQLAlchemyError as e:
      session.rollback()
      return S_ERROR("kickStuckJobs: unexpected exception : %s" % e)
    finally:
      session.close()
示例#4
0
def add_outcomes(
    feature_query,
    start_time: datetime,
    positive_event_lookahead: int = 1,
):
    # The events table holds all the events, not just conversion ones
    relevant_events = bq_session.query(
        events.c['time'].cast(DATE).label('date'),
        events.c['type'].label('outcome'),
        events.c['user_id'].label('user_id')).filter(
            events.c['type'].in_(list(LABELS.keys())),
            cast(events.c['time'], DATE) > cast(start_time, DATE),
            cast(events.c['time'], DATE) <= cast(
                start_time + timedelta(days=positive_event_lookahead),
                DATE)).subquery()

    # TODO: Remove deduplication, once the event table doesn't contain any
    relevant_events_deduplicated = bq_session.query(
        relevant_events.c['date'],
        relevant_events.c['user_id'],
        # This case when provides logic for dealing with multiple outcomes during the same time period
        # an example is user_id 195379 during the 4/2020 where the user renews, but then cancels and gets
        # a refund (the current pipeline provides both labels)
        case(
            [
                # If there is at least one churn event, we identify the user as churned
                (literal(negative_label()).in_(
                    func.unnest(func.array_agg(
                        relevant_events.c['outcome']))), negative_label())
            ],
            # In case of any number of any positive only events we consider the event as a renewal
            else_=positive_labels()).label('outcome')).group_by(
                relevant_events.c['date'].label('date'),
                relevant_events.c['user_id'].label('user_id')).subquery()

    feature_query_w_outcome = bq_session.query(
        feature_query,
        relevant_events_deduplicated.c['outcome'].label('outcome'),
        relevant_events_deduplicated.c['date'].label('outcome_date')
    ).outerjoin(
        relevant_events_deduplicated,
        and_(
            feature_query.c['user_id'] ==
            relevant_events_deduplicated.c['user_id'],
            feature_query.c['date'] >= func.date_sub(
                relevant_events_deduplicated.c['date'],
                text(f'interval {positive_event_lookahead} day')),
            feature_query.c['date'] <= relevant_events_deduplicated.c['date'])
    ).subquery('feature_query_w_outcome')

    return feature_query_w_outcome
示例#5
0
    def add_outcomes(self, feature_query):
        # The events table holds all the events, not just conversion ones
        relevant_events = self.bq_session.query(
            self.events.c['time'].cast(DATE).label('date'),
            self.events.c['type'].label('outcome'),
            self.events.c['browser_id'].label('browser_id')).filter(
                self.events.c['type'].in_(list(LABELS.keys()))).subquery()

        # This assumes we're always aggregating for a given day, if we do multiple day at once
        # This need to use a rolling window function
        num_shared_account_logins_past_30_days = self.bq_session.query(
            func.count(self.events.c['type']).label(
                'shared_account_logins_past_30_days'),
            self.events.c['browser_id']).filter(
                and_(
                    self.events.c['type'] == 'shared_account_login',
                    self.events.c['time'].cast(DATE) <= cast(
                        self.aggregation_time, DATE),
                    self.events.c['time'].cast(DATE) >= cast(
                        self.aggregation_time - timedelta(days=30),
                        DATE))).group_by(
                            self.events.c['browser_id']).subquery()

        feature_query_w_outcome = self.bq_session.query(
            feature_query,
            case([(relevant_events.c['outcome'].in_(
                self.positive_labels()), relevant_events.c['date'])],
                 else_=(self.aggregation_time +
                        timedelta(days=1)).date()).label('outcome_date'),
            case([(relevant_events.c['outcome'].in_(
                self.positive_labels()), relevant_events.c['outcome'])],
                 else_=self.negative_label()).label('outcome'),
            num_shared_account_logins_past_30_days.
            c['shared_account_logins_past_30_days']).outerjoin(
                relevant_events,
                and_(
                    feature_query.c['browser_id'] ==
                    relevant_events.c['browser_id'],
                    feature_query.c['date'] >= func.date_sub(
                        relevant_events.c['date'], text(f'interval {1} day')),
                    feature_query.c['date'] <
                    relevant_events.c['date'])).outerjoin(
                        num_shared_account_logins_past_30_days,
                        num_shared_account_logins_past_30_days.c['browser_id']
                        == feature_query.c['browser_id']).subquery()

        return feature_query_w_outcome
示例#6
0
文件: FTS3DB.py 项目: pmusset/DIRAC
  def deleteFinalOperations(self, limit=20, deleteDelay=180):
    """deletes operation in final state that are older than given time

    :param int limit: number of operations to treat
    :param int deleteDelay: age of the lastUpdate in days
    :returns: S_OK/S_ERROR with number of deleted operations
    """

    session = self.dbSession(expire_on_commit=False)

    try:

      ftsOps = session.query(
          FTS3Operation.operationID) .filter(
          FTS3Operation.lastUpdate < (
              func.date_sub(
                  func.utc_timestamp(),
                  text(
                      'INTERVAL %d DAY' %
                      deleteDelay)))) .filter(
          FTS3Operation.status.in_(
              FTS3Operation.FINAL_STATES)) .limit(limit)

      opIDs = [opTuple[0] for opTuple in ftsOps]
      rowCount = 0
      if opIDs:
        result = session.execute(delete(FTS3Operation)
                                 .where(FTS3Operation.operationID.in_(opIDs)))
        rowCount = result.rowcount

      session.commit()
      session.expunge_all()

      return S_OK(rowCount)

    except SQLAlchemyError as e:
      session.rollback()
      return S_ERROR("deleteFinalOperations: unexpected exception : %s" % e)
    finally:
      session.close()
示例#7
0
文件: FTS3DB.py 项目: DIRACGrid/DIRAC
  def deleteFinalOperations(self, limit=20, deleteDelay=180):
    """deletes operation in final state that are older than given time

    :param int limit: number of operations to treat
    :param int deleteDelay: age of the lastUpdate in days
    :returns: S_OK/S_ERROR with number of deleted operations
    """

    session = self.dbSession(expire_on_commit=False)

    try:

      ftsOps = session.query(
          FTS3Operation.operationID) .filter(
          FTS3Operation.lastUpdate < (
              func.date_sub(
                  func.utc_timestamp(),
                  text(
                      'INTERVAL %d DAY' %
                      deleteDelay)))) .filter(
          FTS3Operation.status.in_(
              FTS3Operation.FINAL_STATES)) .limit(limit)

      opIDs = [opTuple[0] for opTuple in ftsOps]
      rowCount = 0
      if opIDs:
        result = session.execute(delete(FTS3Operation)
                                 .where(FTS3Operation.operationID.in_(opIDs)))
        rowCount = result.rowcount

      session.commit()
      session.expunge_all()

      return S_OK(rowCount)

    except SQLAlchemyError as e:
      session.rollback()
      return S_ERROR("deleteFinalOperations: unexpected exception : %s" % e)
    finally:
      session.close()
示例#8
0
        )
        """
sql_data = [
    (d.emp_no, d.birth_date, d.first_name, d.last_name, d.gender, d.hire_date,
     d.title, d.from_date, d.to_date, d.salary, d.last_salary)
    for d in session.execute(sql)
]
'''使用 sqlalchemy 方式进行查询'''
'''方法一:使用 if else 三目运算符'''
s1 = aliased(Salary)
s2 = aliased(Salary)
alchemy_data = session.query(Employee.emp_no, Employee.birth_date, Employee.first_name,
                                 Employee.last_name, Employee.gender, Employee.hire_date, Title.title,
                                 s1.from_date, s1.to_date, s1.salary, (0 if not
                                        session.query(s2.salary).filter(s2.emp_no==Employee.emp_no,
                                        func.date_sub(text("date('1997-12-01'), interval 1 year")).
                                        between(s2.from_date, s2.to_date))
                             else session.query(s2.salary).
        filter(s2.emp_no==Employee.emp_no, func.date_sub(text("date('1997-12-01'), interval 1 year")).
               between(s2.from_date, s2.to_date))).label("last_salary")).\
    filter(Employee.emp_no==s1.emp_no , Title.emp_no==s1.emp_no,
           or_(Employee.emp_no==10004,
               Employee.emp_no==10001,
               Employee.emp_no==10006,
               Employee.emp_no==10003),
           func.date('1997-12-01').between(s1.from_date, s1.to_date),
           func.date('1997-12-01').between(Title.from_date, Title.to_date)).all()
'''===============================以下是两种错误方法================================================'''
'''方法二:使用 IFNULL 函数,这是一种错误的方法,由于使用 aliased ,在 from 之后将会出现另一条 IFNULL 语句, 
数据本身会提示错误——“Every derived table must have its own alias”
s1 = aliased(Salary)
s2 = aliased(Salary)