示例#1
0
def update(slug):
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    source.update(request_data(), current_user)
    db.session.add(source)
    db.session.commit()
    return view(slug)
示例#2
0
def crawl_source(slug, ignore_tags=False):
    Source.sync()
    source = Source.by_slug(slug)
    if source is None:
        raise ValueError("Invalid source: %r" % slug)
    source.crawler_instance.ignore_tags = ignore_tags
    source.crawler_instance.crawl()
示例#3
0
def update(slug):
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    source.update(request_data(), current_user)
    db.session.add(source)
    db.session.commit()
    return view(slug)
示例#4
0
def crawl(slug):
    logging.debug('starting a crawl of %s' % slug)
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    crawl_source.delay(source.slug)
    logging.debug('started crawl')
    return jsonify({'status': 'ok'})
示例#5
0
def crawl_source(slug, ignore_tags=False):
    logging.debug('crawl source -- celery task going')

    Source.sync()
    source = Source.by_slug(slug)
    if source is None:
        raise ValueError("Invalid source: %r" % slug)
    source.crawler_instance.ignore_tags = ignore_tags
    source.crawler_instance.crawl()
示例#6
0
def crawl_source(slug, ignore_tags=False):
    logging.debug('crawl source -- celery task going')
    
    Source.sync()
    source = Source.by_slug(slug)
    if source is None:
        raise ValueError("Invalid source: %r" % slug)
    source.crawler_instance.ignore_tags = ignore_tags
    source.crawler_instance.crawl()
示例#7
0
def view(slug):
    authz.require(authz.source_read(slug))
    source = obj_or_404(Source.by_slug(slug))
    etag_cache_keygen(source)
    data = source.to_dict()
    data['can_write'] = authz.source_write(slug)
    if data['can_write']:
        data['users'] = [u.id for u in source.users]
        data['config'] = source.config
    return jsonify(data)
示例#8
0
def view(slug):
    authz.require(authz.source_read(slug) and authz.is_admin())
    source = obj_or_404(Source.by_slug(slug))
    etag_cache_keygen(source)
    data = source.to_dict()
    data['can_write'] = authz.source_write(slug)
    if data['can_write']:
        data['users'] = [u.id for u in source.users]
        data['config'] = source.config
    return jsonify(data)
示例#9
0
def dc_projects():
    slug = request.args.get('source')
    authz.require(authz.source_read(slug))
    source = obj_or_404(Source.by_slug(slug))
    if not isinstance(source.crawler_instance, DocumentCloudCrawler):
        return jsonify({'credentials': False})
    username = request.args.get('username')
    password = request.args.get('password')
    projects = source.crawler_instance.get_projects(username, password)
    if projects is False:
        return jsonify({'credentials': False})
    else:
        return jsonify({'credentials': True, 'projects': projects})
示例#10
0
def dc_projects():
    slug = request.args.get('source')
    authz.require(authz.source_read(slug))
    source = obj_or_404(Source.by_slug(slug))
    if not isinstance(source.crawler_instance, DocumentCloudCrawler):
        return jsonify({'credentials': False})
    username = request.args.get('username')
    password = request.args.get('password')
    projects = source.crawler_instance.get_projects(username, password)
    if projects is False:
        return jsonify({'credentials': False})
    else:
        return jsonify({'credentials': True,
                        'projects': projects})
示例#11
0
def process_row(row, attributes):
    src = row.get('_source')
    data = {}
    for name in attributes:
        value = src.get(name)
        for attr in src.get('attributes', []):
            if attr.get('name') == name:
                value = attr.get('value')
        if name == 'entities':
            objs = Entity.by_id_set([e.get('id') for e in value])
            value = ', '.join([o.label for o in objs.values()])
        if name == 'collection':
            # WARNING: don't to one query per row
            value = unicode(Source.by_slug(value) or value)
        data[name] = value
    return data
示例#12
0
def process_row(row, attributes):
    src = row.get('_source')
    data = {}
    for name in attributes:
        value = src.get(name)
        for attr in src.get('attributes', []):
            if attr.get('name') == name:
                value = attr.get('value')
        if name == 'entities':
            objs = Entity.by_id_set([e.get('id') for e in value])
            value = ', '.join([o.label for o in objs.values()])
        if name == 'collection':
            # WARNING: don't to one query per row
            value = unicode(Source.by_slug(value) or value)
        data[name] = value
    return data
示例#13
0
def process(slug):
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    process_collection.delay(source.slug)
    return jsonify({'status': 'ok'})
示例#14
0
def crawl(slug):
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    crawl_source.delay(source.slug)
    return jsonify({'status': 'ok'})
示例#15
0
def process(slug):
    authz.require(authz.source_write(slug))
    source = obj_or_404(Source.by_slug(slug))
    process_collection.delay(source.slug)
    return jsonify({'status': 'ok'})