示例#1
0
        progress = Progress(id="progress", value=0)

    with open('government.github.com/_data/governments.yml') as infile:
        _data = yaml.load(infile)
    data = reshape_data(_data)
    organizations_government = set(
        [organization['entity'].lower() for organization in data])

    with open('government.github.com/_data/civic_hackers.yml') as infile:
        _data_civic = yaml.load(infile)
    data_civic = reshape_data(_data_civic)
    organizations_civic = set(
        [organization['entity'].lower() for organization in data_civic])

    for i in xrange(progress.value, len(data)):
        logging.info("{} {} {}".format(i, data[i]['entity'],
                                       data[i]['grouping']))
        try:
            o_data = upsert_organization(data[i]['entity'],
                                         data[i]['grouping'], "government")
            r_data = upsert_repositories(o_data)
            upsert_contributors(o_data, r_data)
            upsert_members(o_data)
            progress.value = i + 1
            session.add(progress)
            session.commit()
        except UnknownObjectException:
            pass

    progress.value = 0
示例#2
0
    logging.basicConfig(filename="scraper.log", level=logging.INFO)
    G = Github(ACCESS_TOKEN)
    progress = session.query(Progress).first()
    if not progress:
        progress = Progress(id="progress", value=0)

    with open('government.github.com/_data/governments.yml') as infile:
        _data = yaml.load(infile)
    data = reshape_data(_data)
    organizations_government = set([organization['entity'].lower() for organization in data]) 

    with open('government.github.com/_data/civic_hackers.yml') as infile:
        _data_civic = yaml.load(infile)
    data_civic = reshape_data(_data_civic)
    organizations_civic = set([organization['entity'].lower() for organization in data_civic])

    for i in xrange(progress.value, len(data)):
        logging.info("{} {} {}".format(i, data[i]['entity'], data[i]['grouping']))
        try:
            o_data = upsert_organization(data[i]['entity'], data[i]['grouping'], "government")
            r_data = upsert_repositories(o_data)
            upsert_contributors(o_data, r_data)
            upsert_members(o_data)
            progress.value = i+1
            session.add(progress)
            session.commit()
        except UnknownObjectException:
            pass

    progress.value = 0