示例#1
0
def get_touched_pushevent_branch_files(payload, github_auth, app_logger):
    ''' Return a set of files modified between master and payload head.

        https://developer.github.com/v3/activity/events/types/#pushevent
    '''
    branch_sha = payload['head_commit']['id']

    compare1_url = payload['repository']['compare_url']
    compare1_url = expand_uri(compare1_url, dict(base='master', head=branch_sha))
    app_logger.debug('Compare URL 1 {}'.format(compare1_url))

    compare1 = get(compare1_url, auth=github_auth).json()
    merge_base_sha = compare1['merge_base_commit']['sha']

    # That's no branch.
    if merge_base_sha == branch_sha:
        return set()

    compare2_url = payload['repository']['compare_url']
    compare2_url = expand_uri(compare2_url, dict(base=merge_base_sha, head=branch_sha))
    app_logger.debug('Compare URL 2 {}'.format(compare2_url))

    compare2 = get(compare2_url, auth=github_auth).json()
    touched = set([file['filename'] for file in compare2['files']])
    app_logger.debug(u'Touched files {}'.format(', '.join(touched)))

    return touched
示例#2
0
def is_merged_to_master(db, set_id, job_id, commit_sha, github_auth):
    '''
    '''
    # use objects.read_set and read_job so they can be mocked in testing.
    set, job = objects.read_set(db, set_id), objects.read_job(db, job_id)

    if set is not None:
        # Sets come from master by definition.
        return True

    elif job is None:
        # Missing set and job means unknown merge status.
        return None

    try:
        template1 = get('https://api.github.com/', auth=github_auth).json().get('repository_url')
        repo_url = expand_uri(template1, dict(owner=job.github_owner, repo=job.github_repository))

        template2 = get(repo_url, auth=github_auth).json().get('compare_url')
        compare_url = expand_uri(template2, dict(base=commit_sha, head='master'))

        compare = get(compare_url, auth=github_auth).json()
        return compare['base_commit']['sha'] == compare['merge_base_commit']['sha']

    except Exception as e:
        _L.error('Failed to check merged status of {}/{} {}: {}'\
            .format(job.github_owner, job.github_repository, commit_sha, e))
        return None
示例#3
0
def get_touched_pushevent_branch_files(payload, github_auth, app_logger):
    ''' Return a set of files modified between master and payload head.

        https://developer.github.com/v3/activity/events/types/#pushevent
    '''
    branch_sha = payload['head_commit']['id']

    compare1_url = payload['repository']['compare_url']
    compare1_url = expand_uri(compare1_url, dict(base='master', head=branch_sha))
    app_logger.debug('Compare URL 1 {}'.format(compare1_url))

    compare1 = get(compare1_url, auth=github_auth).json()
    merge_base_sha = compare1['merge_base_commit']['sha']

    # That's no branch.
    if merge_base_sha == branch_sha:
        return set()

    compare2_url = payload['repository']['compare_url']
    compare2_url = expand_uri(compare2_url, dict(base=merge_base_sha, head=branch_sha))
    app_logger.debug('Compare URL 2 {}'.format(compare2_url))

    compare2 = get(compare2_url, auth=github_auth).json()
    touched = set([file['filename'] for file in compare2['files']])
    app_logger.debug(u'Touched files {}'.format(', '.join(touched)))

    return touched
示例#4
0
def is_merged_to_master(db, set_id, job_id, commit_sha, github_auth):
    '''
    '''
    # use objects.read_set and read_job so they can be mocked in testing.
    set, job = objects.read_set(db, set_id), objects.read_job(db, job_id)

    if set is not None:
        # Sets come from master by definition.
        return True

    elif job is None:
        # Missing set and job means unknown merge status.
        return None

    try:
        template1 = get('https://api.github.com/', auth=github_auth).json().get('repository_url')
        repo_url = expand_uri(template1, dict(owner=job.github_owner, repo=job.github_repository))

        template2 = get(repo_url, auth=github_auth).json().get('compare_url')
        compare_url = expand_uri(template2, dict(base=commit_sha, head='master'))

        compare = get(compare_url, auth=github_auth).json()
        return compare['base_commit']['sha'] == compare['merge_base_commit']['sha']

    except Exception as e:
        _L.error('Failed to check merged status of {}/{} {}: {}'\
            .format(job.github_owner, job.github_repository, commit_sha, e))
        return None
示例#5
0
def _find_batch_source_urls(owner, repository, github_auth):
    ''' Starting with a Github repo API URL, return a list of sources.

        Sources are dictionaries, with keys commit_sha, url to content on Github,
        blob_sha for git blob, and path like 'sources/xx/yy.json'.
    '''
    resp = get('https://api.github.com/', auth=github_auth)
    if resp.status_code >= 400:
        raise Exception('Got status {} from Github API'.format(
            resp.status_code))
    start_url = expand_uri(resp.json()['repository_url'],
                           dict(owner=owner, repo=repository))

    _L.info('Starting batch sources at {start_url}'.format(**locals()))
    got = get(start_url, auth=github_auth).json()
    contents_url, commits_url = got['contents_url'], got['commits_url']

    master_url = expand_uri(commits_url, dict(sha=got['default_branch']))

    _L.debug('Getting {ref} branch {master_url}'.format(
        ref=got['default_branch'], **locals()))
    got = get(master_url, auth=github_auth).json()
    commit_sha, commit_date = got['sha'], got['commit']['committer']['date']

    contents_url += '{?ref}'  # So that we are consistently at the same commit.
    sources_urls = [
        expand_uri(contents_url, dict(path='sources', ref=commit_sha))
    ]
    sources_list = list()

    for sources_url in sources_urls:
        _L.debug('Getting sources {sources_url}'.format(**locals()))
        sources = get(sources_url, auth=github_auth).json()

        for source in sources:
            if source['type'] == 'dir':
                params = dict(path=source['path'], ref=commit_sha)
                sources_urls.append(expand_uri(contents_url, params))
                continue

            if source['type'] != 'file':
                continue

            path_base, ext = splitext(source['path'])

            if ext == '.json':
                sources_list.append(
                    dict(commit_sha=commit_sha,
                         url=source['url'],
                         blob_sha=source['sha'],
                         path=source['path']))

    return sources_list
示例#6
0
文件: git.py 项目: migurski/precog
def get_webhook_commit_info(app, payload):
    ''' Get owner, repository, commit SHA and Github status API URL from webhook payload.
    '''
    if 'pull_request' in payload:
        commit_sha = payload['pull_request']['head']['sha']
        status_url = payload['pull_request']['statuses_url']

    elif 'head_commit' in payload:
        commit_sha = payload['head_commit']['id']
        status_url = payload['repository']['statuses_url']
        status_url = expand_uri(status_url, dict(sha=commit_sha))

    else:
        raise ValueError('Unintelligible payload')

    if 'repository' not in payload:
        raise ValueError('Unintelligible payload')

    repo = payload['repository']
    owner = repo['owner'].get('name') or repo['owner'].get('login')
    repository = repo['name']

    app.logger.debug('Status URL {}'.format(status_url))

    return owner, repository, commit_sha, status_url
示例#7
0
    def test_convert_run_cached(self):
        '''
        '''
        memcache = mock.Mock()
        memcache.get.return_value = b'\x80\x02}q\x00(X\x07\x00\x00\x00conformq\x01\x89X\n\x00\x00\x00cache_dateq\x02X\n\x00\x00\x002015-08-16q\x03X\x11\x00\x00\x00coverage completeq\x04\x89U\x06run_idq\x05M\xc8\x01X\x04\x00\x00\x00hrefq\x06X(\x00\x00\x00http://blob/def/sources/pl/fo%C3%B6.jsonq\x07X\x04\x00\x00\x00skipq\x08\x89X\x05\x00\x00\x00cacheq\tX\x04\x00\x00\x00zip1q\nX\x0c\x00\x00\x00conform typeq\x0bNX\x06\x00\x00\x00sampleq\x0cX\x1e\x00\x00\x00http://example.com/sample.jsonq\rX\x06\x00\x00\x00sourceq\x0eX\x0c\x00\x00\x00pl/fo\xc3\xb6.jsonq\x0fX\x07\x00\x00\x00versionq\x10X\x04\x00\x00\x002015q\x11X\t\x00\x00\x00processedq\x12X\x04\x00\x00\x00zip3q\x13X\x0b\x00\x00\x00fingerprintq\x14X\x03\x00\x00\x00xyzq\x15X\r\x00\x00\x00address countq\x16KcX\x06\x00\x00\x00outputq\x17X\x04\x00\x00\x00zip2q\x18X\t\x00\x00\x00shortnameq\x19X\x07\x00\x00\x00pl/fo\xc3\xb6q\x1aX\n\x00\x00\x00cache timeq\x1bX\x04\x00\x00\x001:00q\x1cX\x04\x00\x00\x00typeq\x1dX\x04\x00\x00\x00httpq\x1eX\x0c\x00\x00\x00process timeq\x1fX\x04\x00\x00\x002:00q X\r\x00\x00\x00geometry typeq!X\x05\x00\x00\x00Pointq"u.'

        source = {'conform': {}, 'skip': False, 'type': 'http'}
        source_b64 = b64encode(json.dumps(source).encode('utf8'))
        url_template = 'http://blob/{commit_sha}/{+source_path}'

        state = {
            'address count': 99,
            'cache': 'zip1',
            'cache time': '1:00',
            'fingerprint': 'xyz',
            'geometry type': 'Point',
            'output': 'zip2',
            'process time': '2:00',
            'processed': 'zip3',
            'version': '2015',
            'sample': 'http://example.com/sample.json'
        }

        run = Run(456, u'sources/pl/foö.json', 'abc', source_b64,
                  datetime.utcnow(), RunState(state), True, None, '', '', None,
                  None, 'def', False)

        with mock.patch('requests.get') as get:
            conv = convert_run(memcache, run, url_template)
            get.assert_not_called()

        memcache.set.assert_not_called()
        memcache.get.assert_called_once_with(
            'converted-run-456-{}'.format(__version__))

        self.assertEqual(conv['address count'], state['address count'])
        self.assertEqual(conv['cache'], state['cache'])
        self.assertEqual(conv['cache time'], state['cache time'])
        self.assertEqual(conv['cache_date'], '2015-08-16',
                         'Should use a timestamp from the cached version')
        self.assertEqual(conv['conform'], bool(source['conform']))
        self.assertEqual(conv['conform type'],
                         state_conform_type(RunState(state)))
        self.assertEqual(conv['coverage complete'],
                         is_coverage_complete(source))
        self.assertEqual(conv['fingerprint'], state['fingerprint'])
        self.assertEqual(conv['geometry type'], state['geometry type'])
        self.assertEqual(conv['href'], expand_uri(url_template, run.__dict__))
        self.assertEqual(conv['output'], state['output'])
        self.assertEqual(conv['process time'], state['process time'])
        self.assertEqual(conv['processed'], state['processed'])
        self.assertEqual(conv['sample'], state['sample'])
        self.assertEqual(conv['run_id'], 456)
        self.assertEqual(conv['shortname'], u'pl/foö')
        self.assertEqual(conv['skip'], source['skip'])
        self.assertEqual(conv['source'], u'pl/foö.json')
        self.assertEqual(conv['type'], source['type'])
        self.assertEqual(conv['version'], state['version'])
示例#8
0
def _find_batch_source_urls(owner, repository, github_auth):
    ''' Starting with a Github repo API URL, return a list of sources.

        Sources are dictionaries, with keys commit_sha, url to content on Github,
        blob_sha for git blob, and path like 'sources/xx/yy.json'.
    '''
    resp = get('https://api.github.com/', auth=github_auth)
    if resp.status_code >= 400:
        raise Exception('Got status {} from Github API'.format(resp.status_code))
    start_url = expand_uri(resp.json()['repository_url'], dict(owner=owner, repo=repository))

    _L.info('Starting batch sources at {start_url}'.format(**locals()))
    got = get(start_url, auth=github_auth).json()
    contents_url, commits_url = got['contents_url'], got['commits_url']

    master_url = expand_uri(commits_url, dict(sha=got['default_branch']))

    _L.debug('Getting {ref} branch {master_url}'.format(ref=got['default_branch'], **locals()))
    got = get(master_url, auth=github_auth).json()
    commit_sha, commit_date = got['sha'], got['commit']['committer']['date']

    contents_url += '{?ref}' # So that we are consistently at the same commit.
    sources_urls = [expand_uri(contents_url, dict(path='sources', ref=commit_sha))]
    sources_list = list()

    for sources_url in sources_urls:
        _L.debug('Getting sources {sources_url}'.format(**locals()))
        sources = get(sources_url, auth=github_auth).json()

        for source in sources:
            if source['type'] == 'dir':
                params = dict(path=source['path'], ref=commit_sha)
                sources_urls.append(expand_uri(contents_url, params))
                continue

            if source['type'] != 'file':
                continue

            path_base, ext = splitext(source['path'])

            if ext == '.json':
                sources_list.append(dict(commit_sha=commit_sha, url=source['url'],
                                         blob_sha=source['sha'], path=source['path']))

    return sources_list
示例#9
0
    def test_convert_run_uncached(self):
        '''
        '''
        memcache = mock.Mock()
        memcache.get.return_value = None

        source = {'conform': {}, 'skip': False, 'type': 'http'}
        source_b64 = b64encode(json.dumps(source).encode('utf8'))
        url_template = u'http://blob/{commit_sha}/{+source_path}'

        state = {
            'address count': 99,
            'cache': 'zip1',
            'cache time': '1:00',
            'fingerprint': 'xyz',
            'geometry type': 'Point',
            'output': 'zip2',
            'process time': '2:00',
            'processed': 'zip3',
            'version': '2015',
            'sample': 'http://example.com/sample.json'
        }

        run = Run(456, u'sources/pl/foö.json', 'abc', source_b64,
                  datetime.utcnow(), RunState(state), True, None, '', '', None,
                  None, 'def', False)

        with HTTMock(self.response_content):
            conv = convert_run(memcache, run, url_template)

        self.assertEqual(conv['address count'], state['address count'])
        self.assertEqual(conv['cache'], state['cache'])
        self.assertEqual(conv['cache time'], state['cache time'])
        self.assertEqual(conv['cache_date'],
                         run.datetime_tz.strftime('%Y-%m-%d'))
        self.assertEqual(conv['conform'], bool(source['conform']))
        self.assertEqual(conv['conform type'],
                         state_conform_type(RunState(state)))
        self.assertEqual(conv['coverage complete'],
                         is_coverage_complete(source))
        self.assertEqual(conv['fingerprint'], state['fingerprint'])
        self.assertEqual(conv['geometry type'], state['geometry type'])
        self.assertEqual(conv['href'], expand_uri(url_template, run.__dict__))
        self.assertEqual(conv['output'], state['output'])
        self.assertEqual(conv['process time'], state['process time'])
        self.assertEqual(conv['processed'], state['processed'])
        self.assertEqual(conv['sample'], state['sample'])
        self.assertEqual(conv['run_id'], 456)
        self.assertEqual(conv['shortname'], u'pl/foö')
        self.assertEqual(conv['skip'], source['skip'])
        self.assertEqual(conv['source'], u'pl/foö.json')
        self.assertEqual(conv['type'], source['type'])
        self.assertEqual(conv['version'], state['version'])
示例#10
0
def summarize_runs(memcache, runs, datetime, owner, repository, sort_order):
    ''' Return summary data for set.html template.
    '''
    base_url = expand_uri(u'https://github.com/{owner}/{repository}/',
                          dict(owner=owner, repository=repository))
    url_template = urljoin(base_url, u'blob/{commit_sha}/{+source_path}')

    states = [convert_run(memcache, run, url_template) for run in runs]
    counts = run_counts(runs)
    sort_run_dicts(states, sort_order)

    return dict(states=states, last_modified=datetime, counts=counts)
示例#11
0
def summarize_runs(memcache, runs, datetime, owner, repository, sort_order):
    ''' Return summary data for set.html template.
    '''
    base_url = expand_uri(u'https://github.com/{owner}/{repository}/',
                          dict(owner=owner, repository=repository))
    url_template = urljoin(base_url, u'blob/{commit_sha}/{+source_path}')

    states = [convert_run(memcache, run, url_template) for run in runs]
    counts = run_counts(runs)
    sort_run_dicts(states, sort_order)

    return dict(states=states, last_modified=datetime, counts=counts)
示例#12
0
def process_issuecomment_payload_files(issuecomment_payload, github_auth,
                                       app_logger):
    ''' Return a dictionary of files paths from a comment payload.

        https://developer.github.com/v3/activity/events/types/#issuecommentevent
    '''
    files = dict()

    if issuecomment_payload['action'] == 'deleted':
        return files

    pull_request_url = issuecomment_payload['issue']['pull_request']['url']
    pull_request = get(pull_request_url, auth=github_auth).json()

    touched = get_touched_pullrequest_files(pull_request, github_auth,
                                            app_logger)

    commit_sha = pull_request['head']['sha']

    for filename in touched:
        if relpath(filename, 'sources').startswith('..'):
            # Skip things outside of sources directory.
            continue

        if splitext(filename)[1] != '.json':
            # Skip non-JSON files.
            continue

        contents_url = pull_request['head']['repo']['contents_url'] + '{?ref}'
        contents_url = expand_uri(contents_url,
                                  dict(path=filename, ref=commit_sha))
        app_logger.debug('Contents URL {}'.format(contents_url))

        got = get(contents_url, auth=github_auth)
        contents = got.json()

        if got.status_code not in range(200, 299):
            app_logger.warning('Skipping {} - {}'.format(
                filename, got.status_code))
            continue

        if contents['encoding'] != 'base64':
            raise ValueError(
                'Unrecognized encoding "{encoding}"'.format(**contents))

        app_logger.debug('Contents SHA {sha}'.format(**contents))
        files[filename] = contents['content'], contents['sha']

    return files
示例#13
0
def get_touched_pullrequest_files(pull_request, github_auth, app_logger):
    ''' Return a set of files modified between master and payload head.
    '''
    base_sha = pull_request['base']['sha']
    head_sha = pull_request['head']['sha']

    compare_url = pull_request['head']['repo']['compare_url']
    compare_url = expand_uri(compare_url, dict(head=head_sha, base=base_sha))
    app_logger.debug('Compare URL {}'.format(compare_url))

    compare = get(compare_url, auth=github_auth).json()
    touched = set([file['filename'] for file in compare['files']])
    app_logger.debug(u'Touched files {}'.format(', '.join(touched)))

    return touched
示例#14
0
def get_touched_pullrequest_files(pull_request, github_auth, app_logger):
    ''' Return a set of files modified between master and payload head.
    '''
    base_sha = pull_request['base']['sha']
    head_sha = pull_request['head']['sha']

    compare_url = pull_request['head']['repo']['compare_url']
    compare_url = expand_uri(compare_url, dict(head=head_sha, base=base_sha))
    app_logger.debug('Compare URL {}'.format(compare_url))

    compare = get(compare_url, auth=github_auth).json()
    touched = set([file['filename'] for file in compare['files']])
    app_logger.debug(u'Touched files {}'.format(', '.join(touched)))

    return touched
示例#15
0
def create_queued_job(queue, files, job_url_template, commit_sha, rerun, owner, repo, status_url, comments_url):
    ''' Create a new job, and add its files to the queue.
    '''
    filenames = list(files.keys())
    file_states = {name: None for name in filenames}
    file_results = {name: None for name in filenames}

    job_id = calculate_job_id(files)
    job_url = job_url_template and expand_uri(job_url_template, dict(id=job_id))
    job_status = None

    with queue as db:
        task_files = add_files_to_queue(queue, job_id, job_url, files, commit_sha, rerun)
        add_job(db, job_id, None, task_files, file_states, file_results, owner, repo, status_url, comments_url)

    return job_id
示例#16
0
def create_queued_job(queue, files, job_url_template, commit_sha, rerun, owner, repo, status_url, comments_url):
    ''' Create a new job, and add its files to the queue.
    '''
    filenames = list(files.keys())
    file_states = {name: None for name in filenames}
    file_results = {name: None for name in filenames}

    job_id = calculate_job_id(files)
    job_url = job_url_template and expand_uri(job_url_template, dict(id=job_id))
    job_status = None

    with queue as db:
        task_files = add_files_to_queue(queue, job_id, job_url, files, commit_sha, rerun)
        add_job(db, job_id, None, task_files, file_states, file_results, owner, repo, status_url, comments_url)

    return job_id
示例#17
0
def process_github_payload(queue, request_url, app_logger, github_auth,
                           webhook_payload, gag_status):
    '''
    '''
    if skip_payload(webhook_payload):
        return True, {'url': None, 'files': [], 'skip': True}

    owner, repo, commit_sha, status_url, comments_url = get_commit_info(
        app_logger, webhook_payload, github_auth)
    if gag_status:
        status_url = None

    try:
        files = process_payload_files(webhook_payload, github_auth, app_logger)
    except Exception as e:
        message = 'Could not read source files: {}'.format(e)
        update_error_status(status_url, message, [], github_auth)
        _L.error(message, exc_info=True)
        return True, {'url': None, 'files': [], 'status_url': status_url}

    if not files:
        update_empty_status(status_url, github_auth)
        _L.warning('No files')
        return True, {'url': None, 'files': [], 'status_url': status_url}

    filenames = list(files.keys())
    job_url_template = urljoin(request_url, u'/jobs/{id}')
    is_rerun = is_rerun_payload(webhook_payload)

    try:
        job_id = create_queued_job(queue, files, job_url_template, commit_sha,
                                   is_rerun, owner, repo, status_url,
                                   comments_url)
        job_url = expand_uri(job_url_template, dict(id=job_id))
    except Exception as e:
        # Oops, tell Github something went wrong.
        update_error_status(status_url, str(e), filenames, github_auth)
        _L.error('Oops', exc_info=True)
        return False, dict(error=str(e), files=files, status_url=status_url)
    else:
        # That worked, tell Github we're working on it.
        update_pending_status(status_url, job_url, filenames, github_auth)
        return True, dict(id=job_id,
                          url=job_url,
                          files=files,
                          status_url=status_url)
示例#18
0
    def test_convert_run_cached(self):
        '''
        '''
        memcache = mock.Mock()
        memcache.get.return_value = b'\x80\x02}q\x00(X\x07\x00\x00\x00conformq\x01\x89X\n\x00\x00\x00cache_dateq\x02X\n\x00\x00\x002015-08-16q\x03X\x11\x00\x00\x00coverage completeq\x04\x89U\x06run_idq\x05M\xc8\x01X\x04\x00\x00\x00hrefq\x06X(\x00\x00\x00http://blob/def/sources/pl/fo%C3%B6.jsonq\x07X\x04\x00\x00\x00skipq\x08\x89X\x05\x00\x00\x00cacheq\tX\x04\x00\x00\x00zip1q\nX\x0c\x00\x00\x00conform typeq\x0bNX\x06\x00\x00\x00sampleq\x0cX\x1e\x00\x00\x00http://example.com/sample.jsonq\rX\x06\x00\x00\x00sourceq\x0eX\x0c\x00\x00\x00pl/fo\xc3\xb6.jsonq\x0fX\x07\x00\x00\x00versionq\x10X\x04\x00\x00\x002015q\x11X\t\x00\x00\x00processedq\x12X\x04\x00\x00\x00zip3q\x13X\x0b\x00\x00\x00fingerprintq\x14X\x03\x00\x00\x00xyzq\x15X\r\x00\x00\x00address countq\x16KcX\x06\x00\x00\x00outputq\x17X\x04\x00\x00\x00zip2q\x18X\t\x00\x00\x00shortnameq\x19X\x07\x00\x00\x00pl/fo\xc3\xb6q\x1aX\n\x00\x00\x00cache timeq\x1bX\x04\x00\x00\x001:00q\x1cX\x04\x00\x00\x00typeq\x1dX\x04\x00\x00\x00httpq\x1eX\x0c\x00\x00\x00process timeq\x1fX\x04\x00\x00\x002:00q X\r\x00\x00\x00geometry typeq!X\x05\x00\x00\x00Pointq"u.'

        source = {'conform': {}, 'skip': False, 'type': 'http'}
        source_b64 = b64encode(json.dumps(source).encode('utf8'))
        url_template = 'http://blob/{commit_sha}/{+source_path}'

        state = {'address count': 99, 'cache': 'zip1', 'cache time': '1:00',
                 'fingerprint': 'xyz', 'geometry type': 'Point', 'output': 'zip2',
                 'process time': '2:00', 'processed': 'zip3', 'version': '2015',
                 'sample': 'http://example.com/sample.json'}

        run = Run(456, u'sources/pl/foö.json', 'abc', source_b64, datetime.utcnow(),
                  RunState(state), True, None, '', '', None, None, 'def', False)

        with mock.patch('requests.get') as get:
            conv = convert_run(memcache, run, url_template)
            get.assert_not_called()

        memcache.set.assert_not_called()
        memcache.get.assert_called_once_with('converted-run-456-{}'.format(__version__))

        self.assertEqual(conv['address count'], state['address count'])
        self.assertEqual(conv['cache'], state['cache'])
        self.assertEqual(conv['cache time'], state['cache time'])
        self.assertEqual(conv['cache_date'], '2015-08-16', 'Should use a timestamp from the cached version')
        self.assertEqual(conv['conform'], bool(source['conform']))
        self.assertEqual(conv['conform type'], state_conform_type(RunState(state)))
        self.assertEqual(conv['coverage complete'], is_coverage_complete(source))
        self.assertEqual(conv['fingerprint'], state['fingerprint'])
        self.assertEqual(conv['geometry type'], state['geometry type'])
        self.assertEqual(conv['href'], expand_uri(url_template, run.__dict__))
        self.assertEqual(conv['output'], state['output'])
        self.assertEqual(conv['process time'], state['process time'])
        self.assertEqual(conv['processed'], state['processed'])
        self.assertEqual(conv['sample'], state['sample'])
        self.assertEqual(conv['run_id'], 456)
        self.assertEqual(conv['shortname'], u'pl/foö')
        self.assertEqual(conv['skip'], source['skip'])
        self.assertEqual(conv['source'], u'pl/foö.json')
        self.assertEqual(conv['type'], source['type'])
        self.assertEqual(conv['version'], state['version'])
示例#19
0
def process_issuecomment_payload_files(issuecomment_payload, github_auth, app_logger):
    ''' Return a dictionary of files paths from a comment payload.

        https://developer.github.com/v3/activity/events/types/#issuecommentevent
    '''
    files = dict()

    if issuecomment_payload['action'] == 'deleted':
        return files

    pull_request_url = issuecomment_payload['issue']['pull_request']['url']
    pull_request = get(pull_request_url, auth=github_auth).json()

    touched = get_touched_pullrequest_files(pull_request, github_auth, app_logger)

    commit_sha = pull_request['head']['sha']

    for filename in touched:
        if relpath(filename, 'sources').startswith('..'):
            # Skip things outside of sources directory.
            continue

        if splitext(filename)[1] != '.json':
            # Skip non-JSON files.
            continue

        contents_url = pull_request['head']['repo']['contents_url'] + '{?ref}'
        contents_url = expand_uri(contents_url, dict(path=filename, ref=commit_sha))
        app_logger.debug('Contents URL {}'.format(contents_url))

        got = get(contents_url, auth=github_auth)
        contents = got.json()

        if got.status_code not in range(200, 299):
            app_logger.warning('Skipping {} - {}'.format(filename, got.status_code))
            continue

        if contents['encoding'] != 'base64':
            raise ValueError('Unrecognized encoding "{encoding}"'.format(**contents))

        app_logger.debug('Contents SHA {sha}'.format(**contents))
        files[filename] = contents['content'], contents['sha']

    return files
示例#20
0
def convert_run(memcache, run, url_template):
    '''
    '''
    cache_key = 'converted-run-{}-{}'.format(run.id, __version__)
    cached_run = _get_cached(memcache, cache_key)
    if cached_run is not None:
        return cached_run

    try:
        source = json.loads(b64decode(run.source_data).decode('utf8'))
    except:
        source = {}

    run_state = run.state or {}

    converted_run = {
        'address count': run_state.address_count,
        'cache': run_state.cache,
        'cache time': run_state.cache_time,
        'cache_date': run.datetime_tz.strftime('%Y-%m-%d'),
        'conform': bool(source.get('conform', False)),
        'conform type': state_conform_type(run_state),
        'coverage complete': is_coverage_complete(source),
        'fingerprint': run_state.fingerprint,
        'geometry type': run_state.geometry_type,
        'href': expand_uri(url_template, run.__dict__),
        'output': run_state.output,
        'process time': run_state.process_time,
        'processed': run_state.processed,
        'sample': run_state.sample,
        'run_id': run.id,
        'shortname': splitext(relpath(run.source_path, 'sources'))[0],
        'skip': bool(source.get('skip', False)),
        'source': relpath(run.source_path, 'sources'),
        'type': source.get('type', '').lower(),
        'version': run_state.version,
        'source problem': run_state.source_problem
    }

    _set_cached(memcache, cache_key, converted_run)
    return converted_run
示例#21
0
    def test_convert_run_uncached(self):
        '''
        '''
        memcache = mock.Mock()
        memcache.get.return_value = None

        source = {'conform': {}, 'skip': False, 'type': 'http'}
        source_b64 = b64encode(json.dumps(source).encode('utf8'))
        url_template = u'http://blob/{commit_sha}/{+source_path}'

        state = {'address count': 99, 'cache': 'zip1', 'cache time': '1:00',
                 'fingerprint': 'xyz', 'geometry type': 'Point', 'output': 'zip2',
                 'process time': '2:00', 'processed': 'zip3', 'version': '2015',
                 'sample': 'http://example.com/sample.json'}

        run = Run(456, u'sources/pl/foö.json', 'abc', source_b64, datetime.utcnow(),
                  RunState(state), True, None, '', '', None, None, 'def', False)

        with HTTMock(self.response_content):
            conv = convert_run(memcache, run, url_template)

        self.assertEqual(conv['address count'], state['address count'])
        self.assertEqual(conv['cache'], state['cache'])
        self.assertEqual(conv['cache time'], state['cache time'])
        self.assertEqual(conv['cache_date'], run.datetime_tz.strftime('%Y-%m-%d'))
        self.assertEqual(conv['conform'], bool(source['conform']))
        self.assertEqual(conv['conform type'], state_conform_type(RunState(state)))
        self.assertEqual(conv['coverage complete'], is_coverage_complete(source))
        self.assertEqual(conv['fingerprint'], state['fingerprint'])
        self.assertEqual(conv['geometry type'], state['geometry type'])
        self.assertEqual(conv['href'], expand_uri(url_template, run.__dict__))
        self.assertEqual(conv['output'], state['output'])
        self.assertEqual(conv['process time'], state['process time'])
        self.assertEqual(conv['processed'], state['processed'])
        self.assertEqual(conv['sample'], state['sample'])
        self.assertEqual(conv['run_id'], 456)
        self.assertEqual(conv['shortname'], u'pl/foö')
        self.assertEqual(conv['skip'], source['skip'])
        self.assertEqual(conv['source'], u'pl/foö.json')
        self.assertEqual(conv['type'], source['type'])
        self.assertEqual(conv['version'], state['version'])
示例#22
0
def convert_run(memcache, run, url_template):
    '''
    '''
    cache_key = 'converted-run-{}-{}'.format(run.id, __version__)
    cached_run = _get_cached(memcache, cache_key)
    if cached_run is not None:
        return cached_run

    try:
        source = json.loads(b64decode(run.source_data).decode('utf8'))
    except:
        source = {}

    run_state = run.state or {}

    converted_run = {
        'address count': run_state.address_count,
        'cache': run_state.cache,
        'cache time': run_state.cache_time,
        'cache_date': run.datetime_tz.strftime('%Y-%m-%d'),
        'conform': bool(source.get('conform', False)),
        'conform type': state_conform_type(run_state),
        'coverage complete': is_coverage_complete(source),
        'fingerprint': run_state.fingerprint,
        'geometry type': run_state.geometry_type,
        'href': expand_uri(url_template, run.__dict__),
        'output': run_state.output,
        'process time': run_state.process_time,
        'processed': run_state.processed,
        'sample': run_state.sample,
        'run_id': run.id,
        'shortname': splitext(relpath(run.source_path, 'sources'))[0],
        'skip': bool(source.get('skip', False)),
        'source': relpath(run.source_path, 'sources'),
        'type': source.get('type', '').lower(),
        'version': run_state.version,
        'source problem': run_state.source_problem
        }

    _set_cached(memcache, cache_key, converted_run)
    return converted_run
示例#23
0
def process_github_payload(queue, request_url, app_logger, github_auth, webhook_payload, gag_status):
    '''
    '''
    if skip_payload(webhook_payload):
        return True, {'url': None, 'files': [], 'skip': True}

    owner, repo, commit_sha, status_url, comments_url = get_commit_info(app_logger, webhook_payload, github_auth)
    if gag_status:
        status_url = None

    try:
        files = process_payload_files(webhook_payload, github_auth, app_logger)
    except Exception as e:
        message = 'Could not read source files: {}'.format(e)
        update_error_status(status_url, message, [], github_auth)
        _L.error(message, exc_info=True)
        return True, {'url': None, 'files': [], 'status_url': status_url}

    if not files:
        update_empty_status(status_url, github_auth)
        _L.warning('No files')
        return True, {'url': None, 'files': [], 'status_url': status_url}

    filenames = list(files.keys())
    job_url_template = urljoin(request_url, u'/jobs/{id}')
    is_rerun = is_rerun_payload(webhook_payload)

    try:
        job_id = create_queued_job(queue, files, job_url_template, commit_sha,
                                   is_rerun, owner, repo, status_url, comments_url)
        job_url = expand_uri(job_url_template, dict(id=job_id))
    except Exception as e:
        # Oops, tell Github something went wrong.
        update_error_status(status_url, str(e), filenames, github_auth)
        _L.error('Oops', exc_info=True)
        return False, dict(error=str(e), files=files, status_url=status_url)
    else:
        # That worked, tell Github we're working on it.
        update_pending_status(status_url, job_url, filenames, github_auth)
        return True, dict(id=job_id, url=job_url, files=files, status_url=status_url)
示例#24
0
def get_commit_info(app_logger, payload, github_auth):
    ''' Get owner, repository, commit SHA and Github status API URL from webhook payload.

        If payload links to a pull request instead of including it, get that.
    '''
    if 'pull_request' in payload:
        # https://developer.github.com/v3/activity/events/types/#pullrequestevent
        commit_sha = payload['pull_request']['head']['sha']
        status_url = payload['pull_request']['statuses_url']
        comments_url = payload['pull_request']['comments_url']

    elif 'head_commit' in payload:
        # https://developer.github.com/v3/activity/events/types/#pushevent
        commit_sha = payload['head_commit']['id']
        status_url = payload['repository']['statuses_url']
        status_url = expand_uri(status_url, dict(sha=commit_sha))
        comments_url = None

    elif 'issue' in payload and 'pull_request' in payload['issue']:
        # nested PR is probably linked, so retrieve it.
        # https://developer.github.com/v3/activity/events/types/#issuecommentevent
        resp = get(payload['issue']['pull_request']['url'], auth=github_auth)
        commit_sha = resp.json()['head']['sha']
        status_url = resp.json()['statuses_url']
        comments_url = resp.json()['comments_url']

    else:
        raise ValueError('Unintelligible payload')

    if 'repository' not in payload:
        raise ValueError('Unintelligible payload')

    repo = payload['repository']
    owner = repo['owner'].get('name') or repo['owner'].get('login')
    repository = repo['name']

    app_logger.debug('Status URL {}'.format(status_url))

    return owner, repository, commit_sha, status_url, comments_url
示例#25
0
def get_commit_info(app_logger, payload, github_auth):
    ''' Get owner, repository, commit SHA and Github status API URL from webhook payload.

        If payload links to a pull request instead of including it, get that.
    '''
    if 'pull_request' in payload:
        # https://developer.github.com/v3/activity/events/types/#pullrequestevent
        commit_sha = payload['pull_request']['head']['sha']
        status_url = payload['pull_request']['statuses_url']
        comments_url = payload['pull_request']['comments_url']

    elif 'head_commit' in payload:
        # https://developer.github.com/v3/activity/events/types/#pushevent
        commit_sha = payload['head_commit']['id']
        status_url = payload['repository']['statuses_url']
        status_url = expand_uri(status_url, dict(sha=commit_sha))
        comments_url = None

    elif 'issue' in payload and 'pull_request' in payload['issue']:
        # nested PR is probably linked, so retrieve it.
        # https://developer.github.com/v3/activity/events/types/#issuecommentevent
        resp = get(payload['issue']['pull_request']['url'], auth=github_auth)
        commit_sha = resp.json()['head']['sha']
        status_url = resp.json()['statuses_url']
        comments_url = resp.json()['comments_url']

    else:
        raise ValueError('Unintelligible payload')

    if 'repository' not in payload:
        raise ValueError('Unintelligible payload')

    repo = payload['repository']
    owner = repo['owner'].get('name') or repo['owner'].get('login')
    repository = repo['name']

    app_logger.debug('Status URL {}'.format(status_url))

    return owner, repository, commit_sha, status_url, comments_url