示例#1
0
class GitEvents(object):
    client = Client(settings.GITHUB_HOST + '/api/v3/events')
    # the etag for the last call to github
    etag = ''
    # the last event id to be pulled from github
    last_event = None

    def get_page_of_events(self, page=1, etag=True):
        """
        return a page (1-10) of events. if etag is True, will check
        etag version
        """
        headers = {'If-None-Match': self.etag} if etag else {}
        resp = self.client.get(headers=headers, params={"page": page})
        if etag:
            self.etag = resp.headers.get('ETag', self.etag)
        if resp.status_code == 200:
            return resp.json()
        elif resp.status_code == 304:
            return []

    def get_changed_page_urls(self):
        """
        return the urls for all pages changed since the last
        time get_changed_page_urls was called. Uses a combination
        of etag and the last synced event id to minimize (hopefully
        eliminate) duplication.
        """
        data = self.get_page_of_events()
        if not data:
            return data
        newest_last_event = int(data[0]['id'])
        intermediate_last_event = int(data[-1]['id'])
        pages = range(2, 11)
        for page in pages:
            if intermediate_last_event <= self.last_event:
                break
            data += self.get_page_of_events(page=page, etag=False)
            intermediate_last_event = int(data[-1]['id'])

        #get the pages changed for gollumEvents that happened after the last sync
        page_lists = [
            event['pages'] for event in data if event['type'] == 'gollumEvent'
            and int(event['id']) > self.last_event
        ]
        # each event can have multiple pages changed, so flatten
        pages = [item for sublist in page_lists
                 for item in sublist]  # flatten the lists of pages

        urls = [page['html_url'] for page in pages]
        urls = list(set(urls))  # dedup
        # update the last_event counter
        self.last_event = newest_last_event
        return urls
示例#2
0
def update_index():
    es = Client('http://localhost:9200')
    es.dc.DELETE()
    with open(schema_file) as f:
        es.dc.POST(data=f.read())
    with open(index_file) as f:
        while True:
            lines = f.readlines(10000000)
            if not lines:
                break
            resp = es._bulk.POST(data=''.join(lines))
示例#3
0
def GitHub(user_model=None, access_token=None):
    """
    return a UniversalClient client for GitHub, authenticated with the given access_token.
    If access_token is not passed, will look for the access_token associated with 
    the user_model.
    """
    if not access_token:
        try:
            access_token = user_model.social_auth.get(provider='github').tokens
        except:
            return None
    session = rauth.OAuth2Session(gh_client_key, gh_client_secret,
                                  access_token)
    return Client(gh_host, oauth=session, dataFilter=jsonFilter)
示例#4
0
def GitHubEnterprise(user_model=None, access_token=None):
    """
    return a UniversalClient client for GitHub Enterprise, authenticated with the given access_token.
    If access_token is not passed, will look for the access_token associated with 
    the user_model.
    """
    if not access_token:
        if not user_model.is_authenticated():
            return None
        access_token = user_model.social_auth.get(
            provider='github-enterprise').tokens
    session = rauth.OAuth2Session(ghe_client_key, ghe_client_secret,
                                  access_token)
    return Client(ghe_host + '/api/v3', oauth=session, dataFilter=jsonFilter)
示例#5
0
from universalclient import Client
import urllib3
from server import utils
import itertools

from gh_wiki import index as gh_wiki
from gh_readme import index as gh_readme
from gh_pages import index as gh_pages
from gh_issues import index as gh_issues

headers = {
    'keep_alive': True,
    'user_agent': 'cfpb-tiresias',
}

gh_api_client = Client(gh_settings.get('API'))

if 'AUTH' in gh_settings:
    gh_api_client = gh_api_client.auth(gh_settings['AUTH'])
    headers['basic_auth'] = '%s:%s' % gh_settings['AUTH']

gh_pool = urllib3.connection_from_url(gh_settings.get('WEB'),
                                      maxsize=50,
                                      block=True)
gh_api_pool = urllib3.connection_from_url(
    gh_settings.get('API'),
    maxsize=50,
    block=True,
    headers=urllib3.util.make_headers(**headers))

示例#6
0
# encoding: utf-8
from __future__ import absolute_import

from functools import wraps
import datetime

from dateutil.tz import tzutc
from django.conf import settings
from universalclient import Client

# Meetup API
MEETUP = Client("http://api.meetup.com").setArgs(params={"key": settings.MEETUP_API_KEY})

# Upcoming events
UPCOMING_EVENTS = MEETUP._('2').events.setArgs(params={"group_urlname": "dcpython"})

# Past events
PAST_EVENTS = MEETUP._('2').events.setArgs(params={"group_urlname": "dcpython", "status": "past"})


# Via https://github.com/pythonkc/pythonkc-meetups/blob/master/pythonkc_meetups/parsers.py#L102
def parse_datetime_ms(utc_timestamp_ms, utc_offset_ms=None):
    """
    Create a timezone-aware ``datetime.datetime`` from the given UTC timestamp
    (in milliseconds), if provided. If an offest it given, it is applied to the
    datetime returned.

    Parameters
    ----------
    utc_timestamp_ms
        UTC timestamp in milliseconds.
示例#7
0
from django.conf import settings
from universalclient import Client, jsonFilter
import requests
import json

kratos = Client(settings.KRATOS_URL,
                dataFilter=jsonFilter,
                auth=('admin', settings.KRATOS_ADMIN_PWD),
                headers={'Content-Type': 'application/json'})


def register_kratos(request, response, user, **kwargs):
    if not user or not user.gh_id:
        return
    social_auth = user.social_auth.get(provider='github').extra_data
    kratos_data = kratos.users.get(params={'gh': user.gh_id}).json()

    if kratos_data.get('error') == 'not_found':
        kratos_user = {
            "data": {
                "username": user.username,
                "contractor": user.contractor,
            },
            "roles": ["gh|user", "kratos|enabled"],
            "rsrcs": {
                "gh": {
                    "username": social_auth['username'],
                    "id": social_auth['id'],
                },
            },
        }
示例#8
0
from universalclient import Client, jsonFilter
import json, click, re, os

DIR = os.path.abspath(os.path.dirname(__file__))
lims_data_path = os.path.join(DIR, 'lims_data.json')

lims = Client('http://lims.dccouncil.us/api/v1/',
              headers={'Content-Type': 'application/json'},
              dataFilter=jsonFilter)

leg_details = lims.Legislation.Details._('{}')

leg_search = lims.Legislation.AdvancedSearch._('100/{}').data(
    {"LegislationStatus": "130"})

out = json.load(open(lims_data_path))
# offset = int(len(out)/100)
# while True:
# 	leg_data = leg_search.POST(offset).json()
# 	print('getting:', offset*100, '-', offset*100+len(leg_data))
# 	with click.progressbar(leg_data) as leg_data_bar:
# 		for leg_datum in leg_data_bar:
# 			leg_number = leg_datum['LegislationNumber']
# 			leg = leg_details.GET(leg_number).json()
# 			out[leg_number] = leg
# 	offset += 1
# 	json.dump(out, open(lims_data_path, 'w'), sort_keys=True, indent=2)
# 	if not leg_data:
# 		print('completed')
# 		break
示例#9
0
from universalclient import Client, jsonFilter
import urllib3
from server import settings
from gevent import subprocess
from server import schemas
import json
import bs4
import re

es_client = Client(settings.ES_HOST, dataFilter=jsonFilter)
es_pool = urllib3.connection_from_url(
    settings.ES_HOST,
    maxsize=50,
    block=True,
    headers=urllib3.util.make_headers(keep_alive=True))

history_index = 'history'
search_index = 'search'

search_client = es_client.search
history_client = es_client.history


def save_indexed_version(gh_type, repo_name, typ, version):
    doc_id = (gh_type + '/' + repo_name).replace('/', '%2F')
    body = json.dumps({'version': version})

    url = '/%s/%s/%s/_update' % (history_index, typ, doc_id)
    resp = es_pool.urlopen('POST', url, body=body)
    if resp.status == 500:
        url = '/%s/%s/%s' % (history_index, typ, doc_id)
示例#10
0
def GitHubAdmin(credentials=None):
    credentials = credentials or gh_admin_auth
    return Client(gh_host, auth=credentials, dataFilter=jsonFilter)
示例#11
0
from server import settings
import helpers
from universalclient import Client
import time
from datetime import datetime

jira_api_client = Client(settings.JIRA_HOST).rest.api._(2)
jira_fields = 'assignee,creator,updated,project,status,summary,labels,description,comment'
max_results = 500


def index():
    """
    sync all jira issues
    """
    offset = 0
    issues = []

    start = time.mktime(datetime.now().timetuple())

    # Grab all data via API calls, 500 issues at a time
    # TODO gevent solution
    while True:
        resp = jira_api_client.search.params(fields=jira_fields,
                                             startAt=offset,
                                             maxResults=max_results,
                                             ).get().json()
        issues += resp['issues']
        if resp['total'] > len(issues):
            offset += max_results
        else:
示例#12
0
from server import settings

ghe_settings = settings.GITHUB.get('GHE', {})
from universalclient import Client
import urllib3
from server import utils

from wiki import index as wiki
from readme import index as readme
from gh_pages import index as gh_pages
from gh_issues import index as gh_issues

ghe_api_client = Client(ghe_settings.get('API')).api.v3

ghe_api_pool = urllib3.connection_from_url(
    ghe_settings.get('API'),
    maxsize=50,
    block=True,
    headers=urllib3.util.make_headers(keep_alive=True))
ghe_pool = urllib3.connection_from_url(ghe_settings.get('WEB'),
                                       maxsize=50,
                                       block=True)


def get_repos():
    if not ghe_settings:
        return []
    return [
        repo['full_name'] for repo in utils.iter_get_url(
            ghe_settings['API_PATH'] + '/repositories', ghe_api_pool)
        if not repo['fork']
示例#13
0
from gevent import monkey
from gevent.pool import Pool

pool = Pool(50)

# patches stdlib (including socket and ssl modules) to cooperate with other greenlets
monkey.patch_all()

import urllib2
from os import path
from os.path import join as path_join

DIR = path.dirname(path.realpath(__file__))
LOG = path_join(DIR, '..', 'client', 'dist', 'log')

es_client = Client(settings.ES_HOST)
gh_client = Client(settings.GITHUB_HOST)
gh_api_client = gh_client.api.v3

whitespace_re = re.compile(r'(\W|\n)+')
def extract_text_from_html(soup):
    text_nodes = soup.findAll(text=True)
    text_with_newlines = ' '.join(text_nodes)
    text = whitespace_re.sub(' ', text_with_newlines)
    return text

def _get_soup(url, id):
    """
    return generator that given a url, gets the content, parses it and
    returns a tuple of the urle, the repo name,  and the soup of the tag
    with the given id