示例#1
0
def crawler_events(crawler,
                   run_id=None,
                   level=None,
                   stage=None,
                   page=1,
                   per_page=15):
    evt = aliased(Event)
    q = session.query(evt)
    q = q.filter(evt.crawler == crawler.name)
    if level is not None:
        q = q.filter(evt.level == level)
    if run_id is not None:
        q = q.filter(evt.run_id == run_id)
    if stage is not None:
        q = q.filter(evt.stage == stage)

    total = q.count()
    q = q.order_by(evt.timestamp.desc())
    q = q.limit(per_page)
    q = q.offset((max(1, page) - 1) * per_page)

    return {
        'page': page,
        'per_page': per_page,
        'pages': int(math.ceil((float(total) / per_page))),
        'total': total,
        'results': list(q)
    }
示例#2
0
文件: tag.py 项目: danohu/memorious
 def exists(cls, crawler, key, since=None):
     q = session.query(cls)
     q = q.filter(cls.crawler == crawler.name)
     q = q.filter(cls.key == key)
     if since is not None:
         q = q.filter(cls.timestamp >= since)
     return q.count() > 0
示例#3
0
def crawler_events(crawler,
                   run_id=None,
                   level=None,
                   stage=None,
                   page=1,
                   per_page=15):
    evt = aliased(Event)
    op = aliased(Operation)
    q = session.query(evt, op)
    q = q.join(op, op.id == evt.operation_id)
    q = q.filter(evt.crawler == crawler.name)
    if level is not None:
        q = q.filter(evt.level == level)
    if run_id is not None:
        q = q.filter(op.run_id == run_id)
    if stage is not None:
        q = q.filter(op.name == stage)

    total = q.count()
    q = q.order_by(evt.timestamp.desc())
    q = q.limit(per_page)
    q = q.offset((max(1, page) - 1) * per_page)

    results = []
    for (event, operation) in q:
        results.append({'event': event, 'operation': operation})

    return {
        'page': page,
        'per_page': per_page,
        'pages': int(math.ceil((float(total) / per_page))),
        'total': total,
        'results': results
    }
示例#4
0
def crawlers_index():
    """Generate a list of all crawlers, sorted alphabetically, with op
    counts."""
    # query for error and warning events:
    counts = {}
    event = aliased(Event)
    q = session.query(
        event.crawler,
        event.level,
        func.count(event.id),
    )
    q = q.group_by(event.crawler, event.level)
    for (name, level, count) in q:
        if name not in counts:
            counts[name] = {}
        counts[name][level] = count

    # make sure we're including crawlers that have never been run:
    crawlers = []
    for crawler in manager:
        data = counts.get(crawler.name, {})
        data['last_active'] = get_last_run(crawler)
        data['total_ops'] = get_crawler_op_count(crawler)
        data['running'] = is_running(crawler)
        data['crawler'] = crawler
        crawlers.append(data)
    return crawlers
示例#5
0
文件: tag.py 项目: danohu/memorious
 def find(cls, crawler, key, since=None):
     q = session.query(cls)
     q = q.filter(cls.crawler == crawler.name)
     q = q.filter(cls.key == key)
     if since is not None:
         q = q.filter(cls.timestamp >= since)
     q = q.order_by(cls.timestamp.desc())
     return q.first()
示例#6
0
 def last_status(cls, crawler):
     q = session.query(cls)
     q = q.filter(cls.crawler == crawler)
     q = q.order_by(cls.started_at.desc())
     op = q.first()
     if op is None:
         return None
     return op.status
示例#7
0
 def check_rate(cls, crawler, stage, sample=1):
     q = session.query(func.count(cls.id))
     q = q.filter(cls.crawler == crawler)
     q = q.filter(cls.name == stage)
     period = timedelta(seconds=sample * 60)
     start = datetime.utcnow() - period
     q = q.filter(cls.started_at >= start)
     count = q.scalar()
     return (float(count) / sample) / 60.0
示例#8
0
 def delete(cls, crawler):
     from memorious.model.event import Event
     from memorious.model.result import Result
     Event.delete(crawler)
     Result.delete(crawler)
     pq = session.query(cls)
     pq = pq.filter(cls.crawler == crawler)
     pq.delete(synchronize_session=False)
     session.flush()
示例#9
0
def global_stats():
    """Stats visible on each page of the UI."""
    stats = {'version': settings.VERSION, 'num_crawlers': len(manager)}

    steps = (('ops_last_hour', timedelta(hours=1)), ('ops_last_day',
                                                     timedelta(days=1)))
    for (field, delta) in steps:
        q = session.query(func.count(Operation.id))
        q = q.filter(Operation.started_at >= datetime.utcnow() - delta)
        stats[field] = q.scalar()
    return stats
示例#10
0
def crawler_stages(crawler):
    """See the number of executions of each stage."""
    counts = {}

    # operation runs per stage name, status
    op = aliased(Operation)
    q = session.query(
        op.name,
        op.status,
        func.count(op.id),
    )
    q = q.filter(op.crawler == crawler.name)
    q = q.group_by(op.name, op.status)
    counts = {}
    for (name, status, count) in q:
        if name not in counts:
            counts[name] = {}
        counts[name][status] = count

    # events by level
    op = aliased(Operation)
    evt = aliased(Event)
    q = session.query(
        op.name,
        evt.level,
        func.count(evt.id),
    )
    q = q.filter(evt.operation_id == op.id)
    q = q.filter(op.crawler == crawler.name)
    q = q.group_by(op.name, evt.level)
    for (name, level, count) in q:
        if name not in counts:
            counts[name] = {}
        counts[name][level] = count

    stages = []
    for stage in crawler:
        data = counts.get(stage.name, {})
        data['stage'] = stage
        stages.append(data)
    return stages
示例#11
0
def crawlers_index():
    """Generate a list of all crawlers, sorted alphabetically, with op
    counts."""
    # query for overall run and operations count:
    op = aliased(Operation)
    q = session.query(
        op.crawler,
        # func.count(distinct(op.run_id)),
        # func.count(op.id),
        func.max(op.started_at),
    )
    q = q.group_by(op.crawler)
    counts = {}
    # for (name, runs, operations, last_active) in q:
    for (name, last_active) in q:
        counts[name] = {
            # 'runs': runs,
            # 'operations': operations,
            'last_active': last_active,
        }

    # query for error and warning events:
    event = aliased(Event)
    q = session.query(
        event.crawler,
        event.level,
        func.count(event.id),
    )
    q = q.group_by(event.crawler, event.level)
    for (name, level, count) in q:
        if name not in counts:
            counts[name] = {}
        counts[name][level] = count

    # make sure we're including crawlers that have never been run:
    crawlers = []
    for crawler in manager:
        data = counts.get(crawler.name, {})
        data['crawler'] = crawler
        crawlers.append(data)
    return crawlers
示例#12
0
def crawler_runs(crawler):
    runs = get_crawler_runs(crawler)

    # events by level
    evt = aliased(Event)
    q = session.query(
        evt.run_id,
        evt.level,
        func.count(evt.id),
    )
    q = q.filter(evt.crawler == crawler.name)
    q = q.group_by(evt.run_id, evt.level)
    for (run_id, level, count) in q:
        for run in runs:
            if run['run_id'] == run_id:
                run[level] = count
    return runs
示例#13
0
def crawler_stages(crawler):
    """See the number of executions of each stage."""
    counts = {}
    # events by level
    evt = aliased(Event)
    q = session.query(
        evt.stage,
        evt.level,
        func.count(evt.id),
    )
    q = q.filter(evt.crawler == crawler.name)
    q = q.group_by(evt.stage, evt.level)
    for (stage_name, level, count) in q:
        if stage_name not in counts:
            counts[stage_name] = {}
        counts[stage_name][level] = count

    stages = []
    for stage in crawler:
        data = counts.get(stage.name, {})
        data['total_ops'] = get_stage_op_count(stage)
        data['stage'] = stage
        stages.append(data)
    return stages
示例#14
0
文件: tag.py 项目: danohu/memorious
 def delete(cls, crawler):
     pq = session.query(cls)
     pq = pq.filter(cls.crawler == crawler)
     pq.delete(synchronize_session=False)
示例#15
0
 def by_crawler_next_stage(cls, crawler, next_stage):
     q = session.query(cls)
     q = q.filter(cls.crawler == crawler)
     q = q.filter(cls.next_stage == next_stage)
     return q
示例#16
0
 def get(cls, **kwargs):
     q = session.query(cls)
     q = q.filter_by(**kwargs)
     return q.all()