示例#1
0
    def __init__(self,
                 name,
                 url,
                 test_username_pos,
                 status_code=None,
                 match_type=None,
                 match_expr=None,
                 test_username_neg=None,
                 headers={},
                 censor_images=False,
                 wait_time=1,
                 use_proxy=False):
        ''' Constructor. '''

        self.name = name
        self.url = url
        self.status_code = status_code
        self.match_type = match_type or 'text'
        self.match_expr = match_expr
        self.test_username_pos = test_username_pos
        self.headers = headers
        self.censor_images = censor_images
        self.use_proxy = use_proxy
        self.wait_time = wait_time

        if test_username_neg is None:
            self.test_username_neg = random_string(16)
        else:
            self.test_username_neg = test_username_neg
示例#2
0
def delete_expired_results():
    """
    Delete results more than _days_to_keep_result.

    Sites including expired results are retested.
    """
    worker.start_job()
    db_session = worker.get_session()
    tested_sites = set()
    expiry = datetime.utcnow() - timedelta(days=_days_to_keep_result)
    expired_results = db_session.query(Result).filter(
        Result.created_at < expiry).all()

    for result in expired_results:
        # Don't delete permanent image files
        if result.image_file.name in _permanent_images:
            result.image_file = None
            result.image_file_id = None
            db_session.flush()

        db_session.delete(result)

        if result.site_id not in tested_sites:
            tracker_id = 'tracker.{}'.format(random_string(10))
            test_site.enqueue(result.site_id, tracker_id)
            tested_sites.add(result.site_id)

    db_session.commit()
    worker.finish_job()
示例#3
0
    def __init__(self,
                 name,
                 mime,
                 user_id,
                 content=None,
                 zip_archive=False,
                 zip_files=[],
                 zip_str_files=[],
                 access_type='private'):
        '''
        Constructor.
        '''

        self.name = name
        self.mime = mime
        self.user_id = user_id
        self.access_type = access_type

        # Create dummy content to use in hash if there is
        # no content (zip archives)
        if content is None:
            content = ('DUMMY DATA - {}' + random_string(1000)).encode('utf8')

        hash_ = hashlib.sha256()
        hash_.update(content)
        self.hash = hash_.digest()

        # Write content to file.
        data_dir = get_path('data')
        hash_hex = binascii.hexlify(self.hash).decode('ascii')
        dir1 = os.path.join(data_dir, hash_hex[0])
        dir2 = os.path.join(dir1, hash_hex[1])
        path = os.path.join(dir2, hash_hex[2:])

        if not os.path.isdir(dir1):
            os.mkdir(dir1)

        if not os.path.isdir(dir2):
            os.mkdir(dir2)

        if not os.path.isfile(path):
            if zip_archive:
                self.zip_files(path, zip_files, zip_str_files)
            else:
                file_ = open(path, 'wb')
                file_.write(content)
                file_.close()
示例#4
0
    def __init__(self, name, url, test_username_pos,
                 status_code=None, match_type=None, match_expr=None,
                 test_username_neg=None, headers={}):
        ''' Constructor. '''

        self.name = name
        self.url = url
        self.status_code = status_code
        self.match_type = match_type or 'text'
        self.match_expr = match_expr
        self.test_username_pos = test_username_pos
        self.headers = headers

        if test_username_neg is None:
            self.test_username_neg = random_string(16)
        else:
            self.test_username_neg = test_username_neg
示例#5
0
    def post(self):
        '''
        Request search of usernames.

        **Example Request**

        .. sourcecode:: json

            {
                "usernames": [
                    "johndoe",
                    "janedoe",
                    ...
                ],
                "category": 3,
                "test": False,
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "johndoe": "tracker.12344565",
                }
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list usernames: a list of usernames to search for
        :>json int category: ID of site category to use (optional)
        :>json int site: ID of site to search (optional)
        :>json bool test: test results (optional, default: false)

        :>header Content-Type: application/json
        :>json list jobs: list of worker jobs
        :>json list jobs[n].id: unique id of this job
        :>json list jobs[n].usename: username target of this job

        :status 202: accepted for background processing
        :status 400: invalid request body
        :status 401: authentication required
        '''
        test = False
        category = None
        category_id = None
        jobs = []
        tracker_ids = dict()
        redis = g.redis
        request_json = request.get_json()
        site = None

        if 'usernames' not in request_json:
            raise BadRequest('`usernames` is required')

        validate_request_json(request_json, _username_attrs)

        if len(request_json['usernames']) == 0:
            raise BadRequest('At least one username is required')

        if 'category' in request_json and 'site' in request_json:
            raise BadRequest('Supply either `category` or `site`.')

        if 'category' in request_json:
            category_id = request_json['category']
            category = g.db.query(Category) \
                .filter(Category.id == category_id).first()

            if category is None:
                raise NotFound("Category '%s' does not exist." % category_id)
            else:
                category_id = category.id

        if 'site' in request_json:
            site_id = request_json['site']
            site = g.db.query(Site).filter(Site.id == site_id).first()

            if site is None:
                raise NotFound("Site '%s' does not exist." % site_id)

        if 'test' in request_json:
            test = request_json['test']

        if category:
            sites = category.sites
        elif site:
            sites = g.db.query(Site).filter(Site.id == site.id).all()
        else:
            sites = g.db.query(Site).all()

        # Only check valid sites.
        valid_sites = []
        for site in sites:
            if site.valid:
                valid_sites.append(site)

        # sites = sites.filter(Site.valid == True).all() # noqa

        if len(valid_sites) == 0:
            raise NotFound('No valid sites to check')

        for username in request_json['usernames']:
            # Create an object in redis to track the number of sites completed
            # in this search.
            tracker_id = 'tracker.{}'.format(random_string(10))
            tracker_ids[username] = tracker_id
            redis.set(tracker_id, 0)
            redis.expire(tracker_id, 600)
            total = len(valid_sites)

            # Queue a job for each site.
            for site in valid_sites:
                description = 'Checking {} for user "{}"'.format(site.name,
                                                                 username)
                job = worker.scrape.check_username.enqueue(
                    username=username,
                    site_id=site.id,
                    category_id=category_id,
                    total=total,
                    tracker_id=tracker_id,
                    test=test,
                    jobdesc=description,
                    timeout=_redis_worker['username_timeout'],
                    user_id=g.user.id
                )
                jobs.append({
                    'id': job.id,
                    'username': username,
                    'category': category_id,
                })

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response
示例#6
0
    def post_jobs_for_site(self, site_id):
        """
        Request background jobs for site identified by `id`.

        **Example Request**

        ..sourcode:: json

            {
                "jobs": [
                    {
                        "name": "test",
                    },
                    ...
                ]
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "1": "tracker.12344565",
                }
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list jobs: a list of jobs to schedule
        :>json string jobs[n].name: name of job

        :>header Content-Type: application/json
        :>json array tracker_ids: array of worker tracking ids
            {site ID: tracker ID}

        :status 202: scheduled
        :status 400: invalid request body
        :status 401: authentication required
        """
        request_attrs = {
            'jobs': {
                'type': list,
                'required': True
            },
        }
        job_attrs = {
            'name': {
                'type': str,
                'required': True
            },
        }
        available_jobs = ['test']
        tracker_ids = dict()

        # Get site.
        site_id = get_int_arg('site_id', site_id)
        site = g.db.query(Site).filter(Site.id == site_id).first()

        # Validate
        if site is None:
            raise NotFound("Site '%s' does not exist." % site_id)

        request_json = request.get_json()
        validate_request_json(request_json, request_attrs)

        for job in request_json['jobs']:
            validate_json_attr('name', job_attrs, job)

            if job['name'] not in available_jobs:
                raise BadRequest('`{}` does not exist in available'
                                 ' jobs: {}'.format(job['name'],
                                                    ','.join(available_jobs)))

        # Schedule jobs
        for job in request_json['jobs']:
            tracker_id = 'tracker.{}'.format(random_string(10))
            tracker_ids[site.id] = tracker_id

            if job['name'] == 'test':
                description = 'Testing site "{}"'.format(site.name)
                worker.scrape.test_site.enqueue(
                    site_id=site.id,
                    tracker_id=tracker_id,
                    jobdesc=description,
                    user_id=g.user.id,
                )

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response
示例#7
0
    def post(self):
        '''
        Request search of usernames.

        **Example Request**

        .. sourcecode:: json

            {
                "usernames": [
                    "johndoe",
                    "janedoe",
                    ...
                ],
                "group": 3,
                "test": False,
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "johndoe": "tracker.12344565",
                }
            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list usernames: a list of usernames to search for
        :>json int group: ID of site group to use (optional)
        :>json int site: ID of site to search (optional)
        :>json bool test: test results (optional, default: false)

        :>header Content-Type: application/json
        :>json list jobs: list of worker jobs
        :>json list jobs[n].id: unique id of this job
        :>json list jobs[n].usename: username target of this job

        :status 202: accepted for background processing
        :status 400: invalid request body
        :status 401: authentication required
        '''
        test = False
        group = None
        group_id = None
        jobs = []
        tracker_ids = dict()
        redis = g.redis
        request_json = request.get_json()
        site = None

        if 'usernames' not in request_json:
            raise BadRequest('`usernames` is required')

        validate_request_json(request_json, USERNAME_ATTRS)

        if len(request_json['usernames']) == 0:
            raise BadRequest('At least one username is required')

        if 'group' in request_json and 'site' in request_json:
            raise BadRequest('Supply either `group` or `site`.')

        if 'group' in request_json:
            group_id = request_json['group']
            group = g.db.query(Group).filter(Group.id == group_id).first()

            if group is None:
                raise NotFound("Group '%s' does not exist." % group_id)
            else:
                group_id = group.id

        if 'site' in request_json:
            site_id = request_json['site']
            site = g.db.query(Site).filter(Site.id == site_id).first()

            if site is None:
                raise NotFound("Site '%s' does not exist." % site_id)

        if 'test' in request_json:
            test = request_json['test']

        if group:
            sites = group.sites
        elif site:
            sites = g.db.query(Site).filter(Site.id == site.id)
        else:
            sites = g.db.query(Site)

        # Only check valid sites.
        sites = sites.filter(Site.valid == True).all() # noqa

        if len(sites) == 0:
            raise NotFound('No valid sites to check')

        for username in request_json['usernames']:
            # Create an object in redis to track the number of sites completed
            # in this search.
            tracker_id = 'tracker.{}'.format(random_string(10))
            tracker_ids[username] = tracker_id
            redis.set(tracker_id, 0)
            redis.expire(tracker_id, 600)
            total = len(sites)

            # Queue a job for each site.
            for site in sites:
                job_id = app.queue.schedule_username(
                    username=username,
                    site=site,
                    group_id=group_id,
                    total=total,
                    tracker_id=tracker_id,
                    test=test
                )
                jobs.append({
                    'id': job_id,
                    'username': username,
                    'group': group_id,
                })

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response
示例#8
0
    def post_jobs_for_sites(self):
        """
        Request background jobs for all sites.

        **Example Request**

        ..sourcode:: json

            {
                "jobs": [
                    {
                        "name": "test",
                    },
                    ...
                ]
            }

        **Example Response**

        .. sourcecode:: json

            {
                "tracker_ids": {
                        "1": "tracker.12344565",
                }

            }

        :<header Content-Type: application/json
        :<header X-Auth: the client's auth token
        :>json list jobs: a list of jobs to schedule
        :>json string jobs[n].name: name of job

        :>header Content-Type: application/json
        :>json array tracker_ids: array of worker tracking ids

        :status 202: scheduled
        :status 400: invalid request body
        :status 401: authentication required
        """
        request_attrs = {
            'jobs': {
                'type': list,
                'required': True
            },
        }
        job_attrs = {
            'name': {
                'type': str,
                'required': True
            },
        }
        available_jobs = ['test']
        tracker_ids = dict()

        request_json = request.get_json()
        validate_request_json(request_json, request_attrs)

        for job in request_json['jobs']:
            validate_json_attr('name', job_attrs, job)

            if job['name'] not in available_jobs:
                raise BadRequest('`{}` does not exist in available'
                                 ' jobs: {}'.format(job['name'],
                                                    ','.join(available_jobs)))

        # Get sites.
        sites = g.db.query(Site).all()

        # Schedule jobs
        for job in request_json['jobs']:
            for site in sites:
                tracker_id = 'tracker.{}'.format(random_string(10))
                tracker_ids[site.id] = tracker_id

                if job['name'] == 'test':
                    app.queue.schedule_site_test(
                        site=site,
                        tracker_id=tracker_id,
                    )

        response = jsonify(tracker_ids=tracker_ids)
        response.status_code = 202

        return response