def get_samples():
    """Return a {'class': [reponames]}."""

    repos = Repo.load()
    fetch_dates = [datetime.datetime(*(r.fetch_ymd)) for r in repos]

    print 'number of repos:', len(repos)

    latest_fetch = max(fetch_dates)
    print 'fetched between %s and %s' % (min(fetch_dates), latest_fetch)
    print

    filtered = [r for r in repos if
                30720 > r.size > 0 and  # not foolproof to avoid big repos
                r.stars > 1 and
                not r.fork and
                not 'dotfile' in r.name.lower() and
                not 'sublime' in r.name.lower()  # avoid SublimeText config
                ]
    print 'after noise filter:', len(filtered)

    filtered = [r for r in filtered if
                ((latest_fetch - r.creation_date) >
                 datetime.timedelta(30))
                ]
    print 'exluding very new:', len(filtered)

    filtered = [r for r in filtered if
                r.stars > 5 and
                classes.score(r) > (1 / 30)
                ]
    print 'exluding very unpopular:', len(filtered)

    score_pairs = [(classes.score(r), r) for r in filtered]
    score_pairs.sort(key=lambda x: x[0])

    # top 1k, bottom 1k.
    return {'high': [r.name for (score, r) in score_pairs[-1000:]],
            'low': [r.name for (score, r) in score_pairs[:1000]],
            }