示例#1
0
文件: train.py 项目: ChrisCummins/phd
def train(db_path, out_path, **kwargs):
    db = sqlite3.connect(db_path)
    db.create_function("LC", 1, linecount)

    # auto-detect whether it's a GitHub repo
    kwargs['gh'] = dbutil.is_github(db)

    ret = create_corpus(db, out_path, **kwargs)
    if ret:
        sys.exit(ret)
示例#2
0
def explore(db_path, graph=False):
    locale.setlocale(locale.LC_ALL, 'en_GB.utf-8')

    db = sqlite3.connect(db_path)

    if dbutil.is_github(db):
        db.close()
        explore_gh(db_path)
        return

    if graph and not os.path.exists(img_dir):
        os.makedirs(img_dir)

    # Worker process pool
    pool, jobs = Pool(processes=4), []
    if graph:
        jobs.append(pool.apply_async(graph_ocl_lc, (db_path,)))
        # TODO: If GH dataset:
        # jobs.append(pool.apply_async(graph_ocl_stars, (db_path,)))
    future_stats = pool.apply_async(stats_worker, (db_path,))

    # Wait for jobs to finish
    [job.wait() for job in jobs]

    # Print stats
    print()
    stats = future_stats.get()
    maxlen = max([len(x[0]) for x in stats])
    for stat in stats:
        k,v = stat
        if k:
            print(k, ':', ' ' * (maxlen - len(k) + 2), v, sep='')
        elif v == '':
            print(k)
        else:
            print()
示例#3
0
 def test_is_github(self):
     self.assertFalse(dbutil.is_github(tests.db('empty')))
     self.assertTrue(dbutil.is_github(tests.db('empty-gh')))