示例#1
0
def get_all_set_named_deps(limit=10, use_rq="rq"):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.named_deps == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_named_deps", q, 5, use_rq)
示例#2
0
def get_all_set_named_deps(limit=10, use_rq="rq"):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.named_deps == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_named_deps", q, 5, use_rq)
示例#3
0
def set_all_requirements_pypi(q_limit=9500, use_rq="rq"):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.requirements_pypi == None)
    q = q.filter(GithubRepo.requirements != [])
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    enqueue_jobs(GithubRepo, "set_requirements_pypi", q, 7, use_rq)
示例#4
0
def set_all_pypi_dependencies(q_limit=100, use_rq='rq'):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines != None)
    q = q.filter(GithubRepo.pypi_dependencies == None)
    q = q.filter(GithubRepo.language == "python")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    enqueue_jobs(GithubRepo, "set_pypi_dependencies", q, 6, use_rq)
示例#5
0
def set_all_requirements_pypi(q_limit=9500, use_rq="rq"):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.requirements_pypi == None)
    q = q.filter(GithubRepo.requirements != [])
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    enqueue_jobs(GithubRepo, "set_requirements_pypi", q, 7, use_rq)
示例#6
0
def set_all_pypi_dependencies(q_limit=100, use_rq='rq'):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines != None)
    q = q.filter(GithubRepo.pypi_dependencies == None)
    q = q.filter(GithubRepo.language == "python")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    enqueue_jobs(GithubRepo, "set_pypi_dependencies", q, 6, use_rq)
示例#7
0
def set_all_cran_descr_file_names(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.cran_descr_file != None)
    q = q.filter(GithubRepo.cran_descr_file != "not_found")
    # q = q.filter(GithubRepo.api_raw.contains({"fork": False}))  #already did this when we made it
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_cran_descr_file_name", q, 9, use_rq)
示例#8
0
def get_all_setup_py_no_forks(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.reqs_file != None)
    q = q.filter(GithubRepo.setup_py_no_forks == None)
    q = q.filter(GithubRepo.api_raw.contains({"fork":False}))
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_setup_py_no_forks", q, 8, use_rq)
示例#9
0
def set_all_cran_descr_file(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.api_raw.contains({"fork": False}))
    q = q.filter(GithubRepo.language == 'r')
    q = q.filter(GithubRepo.cran_descr_file == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_cran_descr_file", q, 3, use_rq)
示例#10
0
def get_all_setup_py_no_forks(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.reqs_file != None)
    q = q.filter(GithubRepo.setup_py_no_forks == None)
    q = q.filter(GithubRepo.api_raw.contains({"fork": False}))
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_setup_py_no_forks", q, 8, use_rq)
示例#11
0
def set_all_cran_descr_file(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.api_raw.contains({"fork": False}))
    q = q.filter(GithubRepo.language == 'r')
    q = q.filter(GithubRepo.cran_descr_file == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_cran_descr_file", q, 3, use_rq)
示例#12
0
def set_all_cran_descr_file_names(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.cran_descr_file != None)
    q = q.filter(GithubRepo.cran_descr_file != "not_found")
    # q = q.filter(GithubRepo.api_raw.contains({"fork": False}))  #already did this when we made it
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_cran_descr_file_name", q, 9, use_rq)
示例#13
0
def set_all_setup_py_names(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.setup_py_no_forks != None)

    q = q.filter(GithubRepo.bucket == None)  # just a speed optimization
    q = q.filter(GithubRepo.api_raw.contains({"fork": False}))
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_setup_py_name", q, 1, use_rq)
示例#14
0
def set_all_setup_py_names(limit=10, use_rq="rq"):

    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.setup_py_no_forks != None)

    q = q.filter(GithubRepo.bucket == None)  # just a speed optimization
    q = q.filter(GithubRepo.api_raw.contains({"fork": False}))
    q = q.order_by(GithubRepo.login)
    q = q.limit(limit)

    enqueue_jobs(GithubRepo, "set_setup_py_name", q, 1, use_rq)
示例#15
0
def set_all_requirements(q_limit=9500):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.reqs_file_tried == None)
    q = q.filter(GithubRepo.language == "python")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_requirements, 0)
示例#16
0
def set_all_cran_dependencies(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines != None)
    q = q.filter(GithubRepo.cran_dependencies == None)
    q = q.filter(GithubRepo.language == "r")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_cran_dependencies, 0)
示例#17
0
def set_all_zip_filenames(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(~GithubRepo.api_raw.has_key('error_code'))
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_filenames_tried == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_zip_filenames, 0)
示例#18
0
def set_all_zip_filenames(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(~GithubRepo.api_raw.has_key('error_code'))
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_filenames_tried == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_zip_filenames, 0)
示例#19
0
def set_all_cran_dependencies(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines != None)
    q = q.filter(GithubRepo.cran_dependencies == None)
    q = q.filter(GithubRepo.language == "r")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_cran_dependencies, 0)
示例#20
0
def set_all_requirements(q_limit=9500):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.reqs_file_tried == None)
    q = q.filter(GithubRepo.language == "python")
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, set_requirements, 0)
示例#21
0
def add_all_github_dependency_lines(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(~GithubRepo.api_raw.has_key('error_code'))
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_download_elapsed == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, add_github_dependency_lines, 0)
示例#22
0
def add_all_github_dependency_lines(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(~GithubRepo.api_raw.has_key('error_code'))
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_download_elapsed == None)
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, add_github_dependency_lines, 0)
示例#23
0
def add_all_r_github_dependency_lines(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines == None)
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_download_elapsed == None)
    q = q.filter(GithubRepo.language == 'r')
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, add_github_dependency_lines, 0)
示例#24
0
def add_all_r_github_dependency_lines(q_limit=100):
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.dependency_lines == None)
    q = q.filter(GithubRepo.zip_download_error == None)
    q = q.filter(GithubRepo.zip_download_elapsed == None)
    q = q.filter(GithubRepo.language == 'r')
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    return enqueue_jobs(q, add_github_dependency_lines, 0)
示例#25
0
def set_all_pypi_in_formal_only(q_limit=9500, run_mode='with_rq'):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.requirements_pypi != [])
    q = q.filter(GithubRepo.pypi_dependencies != [])
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    if run_mode=='with_rq':  
        return enqueue_jobs(q, set_pypi_in_formal_only, 0)
    else:                   
        for row in q.all():
            #print "setting this row", row
            set_pypi_in_formal_only(row[0], row[1])
示例#26
0
def set_all_pypi_in_formal_only(q_limit=9500, run_mode='with_rq'):
    # note the low q_limit: it's cos we've got about 10 api keys @ 5000 each
    q = db.session.query(GithubRepo.login, GithubRepo.repo_name)
    q = q.filter(GithubRepo.requirements_pypi != [])
    q = q.filter(GithubRepo.pypi_dependencies != [])
    q = q.order_by(GithubRepo.login)
    q = q.limit(q_limit)

    if run_mode == 'with_rq':
        return enqueue_jobs(q, set_pypi_in_formal_only, 0)
    else:
        for row in q.all():
            #print "setting this row", row
            set_pypi_in_formal_only(row[0], row[1])