def attach_all_skills(upto=100):
    mdb_repos = MicroDB(jsons_dir + 'repos.json',
                        partition_keys=[
                            'username',
                        ])
    mdb_skills = MicroDB(jsons_dir + 'skills.json',
                         partition_keys=[
                             'username',
                         ])
    upto = max([upto, len(mdb_repos) - len(mdb_skills)])
    sorted_usernames_by_priotity = sort_by_priotity(mdb_repos, mdb_skills)
    for username in tqdm(sorted_usernames_by_priotity[:upto]):
        update(username, mdb_skills)
def scrap_repos():
    mdb_repos = MicroDB(jsons_dir + 'repos.json',
                        partition_keys=[
                            'full_name',
                        ])
    mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
        'full_name',
    ])
    update_required_repos = exact_update_required(mdb_repos, mdb_gifs)
    args_iterable = [(mdb_gifs, repo) for repo in update_required_repos]
    scrap_error_ints = []
    with ThreadPool(processes=10) as pool:
        for success_bool, error_place in tqdm(
                pool.imap_unordered(update_mutlitherading_wrapper,
                                    args_iterable),
                total=len(update_required_repos)):
            error_int = 1 if error_place == 'chromeless' else 0
            scrap_error_ints.insert(0, error_int)
            scrap_error_ints = scrap_error_ints[:10]
            # print(scrap_error_ints, error_place)
            if len(scrap_error_ints) >= 10 and sum(scrap_error_ints) >= 8:
                raise Exception('chromeless failed too many times')
def save_all_repos():
    all_repos = get_all_repos()
    all_repos = trim_repos(all_repos)
    all_repos = exclude_no_thanks(all_repos)
    mdb = MicroDB(jsons_dir + 'repos.json', partition_keys=[
        'full_name',
    ])
    for repo in all_repos:
        mdb.upsert(dictionary=repo)
    mdb.save()
def attach_all_geotag(count=100):
    mdb_repos = MicroDB(jsons_dir+'repos.json', partition_keys=['username', ])
    mdb_geotag = MicroDB(jsons_dir+'geotag.json', partition_keys=['username', ])
    sorted_usernames_by_priotity = sort_by_priotity(mdb_repos, mdb_geotag)
    for username in tqdm(sorted_usernames_by_priotity[:count]):
        update(username, mdb_geotag)
    i = 0

    for d in mdb_repos.all():
        geotag = mdb_geotag.get(d)
        if geotag is None:
            i += 1
            print(i, d)
Пример #5
0
def exact_yet_stared_succeed_repos():
    mdb_repos = MicroDB(jsons_dir + 'repos.json',
                        partition_keys=[
                            'full_name',
                        ])
    mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
        'full_name',
    ])
    yet_stared_succeed_repos = []
    for d in mdb_repos.all():
        gifjson = mdb_gifs.get(d)
        if gifjson['success'] and d['stargazers_count'] == 0:
            yet_stared_succeed_repos.append(d)
    return yet_stared_succeed_repos
def del_wrong_data():
    mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
        'full_name',
    ])
    del_fullnames = []
    for d in mdb_gifs.all():
        if isinstance(d['success'], str):
            del_fullnames.append(d)
            print(d)
    print(len(del_fullnames))
    print(del_fullnames)
    for del_fullname in del_fullnames:
        key = mdb_gifs.gen_key(del_fullname)
        print(key)
        del mdb_gifs[key]
    mdb_gifs.save()
def chunks(list_, chunk_len):
    '''
    chunks([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 3)
    >> [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
    '''
    return list(list_[i:i + chunk_len]
                for i in range(0, len(list_), chunk_len))


def numberize(string):
    return int(re.sub(r'\D', '', string))


mdb_repos = MicroDB(jsons_dir + 'repos.json', partition_keys=[
    'full_name',
])
print('mdb_repos', len(mdb_repos))
mdb_geotags = MicroDB(jsons_dir + 'geotag.json', partition_keys=[
    'username',
])
print('mdb_geotags', len(mdb_geotags))
mdb_gifs = MicroDB(jsons_dir + 'gifs.json', partition_keys=[
    'full_name',
])
print('mdb_gifs', len(mdb_gifs))
mdb_skills = MicroDB(jsons_dir + 'skills.json', partition_keys=[
    'username',
])
print('mdb_skills', len(mdb_skills))
merged_db = []
Пример #8
0
def test():
    if os.path.exists(filename):
        os.remove(filename)

    mdb = MicroDB(filename, testpartition_keys)
    mdb.erase_all()
    mdb = MicroDB(filename, testpartition_keys)
    for d in test_data:
        mdb.upsert(d)
    mdb.save()
    mdb.pprint_all()

    mdb2 = MicroDB(filename, testpartition_keys)
    for d in mdb2.all():
        print(d)
    mdb2.save_as_grid()
    mdb3 = MicroDB(filename, testpartition_keys)
    for d in mdb3.all():
        print(d)
    mdb4 = MicroDB(filename, testpartition_keys)
    mdb4.upsert({
        'job': 'study',
        'name': 'Bob',
        'status': 'undone',
        'extra-info': 'hogehoge'
    })
    try:
        mdb4.save_as_grid()
    except Exception as e:
        print(e)
    mdb4.save()
    mdb4