Exemplo n.º 1
0
def metadata_refresh(num_days):
    repos = Repo.query\
        .filter(
            Repo.checked_at.is_(None) |
            (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1))
        )\
        .yield_per(25)\
        .limit(300)  # GitHub allows only 3000 calls per day within a token
    for repo in repos:
        details, code = api.repo_info(repo.full_name)
        if not details:
            if 400 <= code < 500:
                repo.worth -= 1
                db.session.commit()
                log.info(
                    '{0} is not found, the "worth" has been decreased by 1'.
                    format(repo.full_name))
            continue

        repo.checked_at = datetime.now()

        for key in [
                'description', 'forks_count', 'homepage', 'language',
                'open_issues_count', 'size', 'stargazers_count',
                'subscribers_count'
        ]:
            if getattr(repo, key) != details[key]:
                setattr(repo, key, details[key])

        db.session.commit()
    return repos.count()
def metadata_refresh(num_days):
    repos = Repo.query\
        .filter(
            Repo.checked_at.is_(None) |
            (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1))
        )\
        .yield_per(25)\
        .limit(300)  # GitHub allows only 3000 calls per day within a token
    for repo in repos:
        details, code = api.repo_info(repo.full_name)
        if not details:
            if 400 <= code < 500:
                repo.worth -= 1
                db.session.commit()
                log.info(
                    '{0} is not found, the "worth" has been decreased by 1'
                    .format(repo.full_name)
                )
            continue

        repo.checked_at = datetime.now()

        for key in [
            'description', 'forks_count', 'homepage', 'language', 'open_issues_count', 'size',
            'stargazers_count', 'subscribers_count'
        ]:
            if getattr(repo, key) != details[key]:
                setattr(repo, key, details[key])

        db.session.commit()
    return repos.count()
def metadata_maturity(num_months):
    cnt = func.count(RepoMean.repo_id).label("cnt")
    repos = db.session.query(RepoMean.repo_id, cnt).join(Repo)\
        .filter(Repo.mature.is_(False))\
        .group_by(RepoMean.repo_id)\
        .having(cnt >= num_months)
    for (repo_id, cnt) in repos:
        db.session.query(Repo).filter(Repo.id == repo_id).update({Repo.mature: True})
        db.session.commit()
        log.info('{0} marked as mature ({1})'.format(repo_id, cnt))
    return repos.count()
Exemplo n.º 4
0
def metadata_maturity(num_months):
    cnt = func.count(RepoMean.repo_id).label("cnt")
    repos = db.session.query(RepoMean.repo_id, cnt).join(Repo)\
        .filter(Repo.mature.is_(False))\
        .group_by(RepoMean.repo_id)\
        .having(cnt >= num_months)
    for (repo_id, cnt) in repos:
        db.session.query(Repo).filter(Repo.id == repo_id).update(
            {Repo.mature: True})
        db.session.commit()
        log.info('{0} marked as mature ({1})'.format(repo_id, cnt))
    return repos.count()
def status_refresh(num_days):
    repos = Repo.query\
        .filter(Repo.status.in_(('promising', 'hopeless')))\
        .filter(Repo.status_updated_at <= datetime.now() + timedelta(days=num_days * -1))
    for repo in repos:
        RepoStars.query.filter(RepoStars.repo_id == repo.id).delete()

        if repo.status != 'promising':
            repo.worth += -1
        else:
            repo.worth += 1
            log.info('The "worth" value for {0} has been increased by 1'.format(repo.full_name))

        repo.status = 'new' if repo.worth > -1 else 'deleted'

        db.session.commit()
Exemplo n.º 6
0
def metadata_trend(num_days):
    results = db.session.query(
        RepoMean.repo_id, func.substring_index(
            func.group_concat(
                RepoMean.value.op('ORDER BY')(expression.desc(RepoMean.created_at))
            ), ',', 2)
        )\
        .filter(RepoMean.created_at >= datetime.now() + timedelta(days=num_days * -1))\
        .group_by(RepoMean.repo_id)\
        .all()
    for result in filter(lambda x: ',' in x[1], results):
        curr, prev = map(lambda v: float(v), result[1].split(','))
        if is_worth_decreased(curr, prev):
            log.info(
                'Mean value of {0} is {1}, previous was {2}. The "worth" has been decreased by 1'
                .format(result[0], curr, prev))
            db.session.query(Repo)\
                .filter(Repo.id == result[0])\
                .update({Repo.worth: Repo.worth - 1})
            db.session.commit()
def metadata_trend(num_days):
    results = db.session.query(
        RepoMean.repo_id, func.substring_index(
            func.group_concat(
                RepoMean.value.op('ORDER BY')(expression.desc(RepoMean.created_at))
            ), ',', 2)
        )\
        .filter(RepoMean.created_at >= datetime.now() + timedelta(days=num_days * -1))\
        .group_by(RepoMean.repo_id)\
        .all()
    for result in filter(lambda x: ',' in x[1], results):
        curr, prev = map(lambda v: float(v), result[1].split(','))
        if is_worth_decreased(curr, prev):
            log.info(
                'Mean value of {0} is {1}, previous was {2}. The "worth" has been decreased by 1'
                .format(result[0], curr, prev)
            )
            db.session.query(Repo)\
                .filter(Repo.id == result[0])\
                .update({Repo.worth: Repo.worth - 1})
            db.session.commit()
Exemplo n.º 8
0
def metadata_refresh(num_days):
    repos = Repo.query\
        .filter(
            (Repo.status != 'deleted') & (
                Repo.checked_at.is_(None) |
                (Repo.checked_at <= datetime.now() + timedelta(days=num_days * -1))
            )
        )\
        .limit(300)  # GitHub allows only 3000 calls per day within a token
    for repo in repos:
        details, code = api.repo_info(repo.full_name)
        if not details:
            if 400 <= code < 500:
                repo.worth -= 1
                db.session.commit()
                log.info(
                    '{0} is not found (code={1}), the "worth" has been decreased by 1'
                    .format(repo.full_name, code))
            continue

        repo.checked_at = datetime.now()

        for key in [
                'description', 'forks_count', 'homepage', 'language',
                'open_issues_count', 'size', 'stargazers_count',
                'subscribers_count'
        ]:
            if getattr(repo, key) != details[key]:
                setattr(repo, key, details[key])

        if 'license' in details and details['license'] is not None:
            license = details['license'].get('key', 'unlicense')
            if license != repo.license:
                setattr(repo, 'license', license)

        db.session.commit()

        log.info('Repository {0}({1}) has been updated'.format(
            repo.id, repo.full_name))
    return repos.count()
Exemplo n.º 9
0
def status_detect(num_days, num_segments):
    repos = Repo.query.filter(Repo.status == 'unknown')
    for repo in repos:
        result = db.session.query(RepoStars.day, RepoStars.stars)\
            .filter(RepoStars.repo_id == repo.id)\
            .order_by(expression.asc(RepoStars.day))\
            .limit(num_days)\
            .all()

        val = 0 if not result else repo_mean(result, num_days, num_segments,
                                             last_known_mean(repo.id))

        status_old = repo.status
        repo.status = 'hopeless' if val < 1 else 'promising'

        log.info(
            'Repository status of {0}({1}) has been changed to {2} (was: {3})'.
            format(repo.id, repo.full_name, repo.status, status_old))

        db.session.merge(
            RepoMean(repo=repo,
                     value=val,
                     created_at=datetime.today().strftime('%Y-%m-%d')))
        db.session.commit()
from gitmostwanted.app import log, db
from gitmostwanted.models.repo import Repo, RepoMean
from gitmostwanted.tasks.repo_metadata import is_worth_decreased


cache = {}
results = db.session.query(Repo, RepoMean) \
    .filter((Repo.id == RepoMean.repo_id) & Repo.mature.is_(True)) \
    .order_by(RepoMean.created_at.asc()) \
    .yield_per(100) \
    .all()
for result in results:
    repo, mean = result

    if repo.id not in cache:
        cache[repo.id] = {'prev': mean.value, 'worth': 3}
        continue

    prev, curr = cache[repo.id]['prev'], mean.value
    cache[repo.id]['prev'] = curr
    cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1

    log.info(
        '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4})'
            .format(repo.id, prev, curr, cache[repo.id]['worth'], repo.worth)
    )

    repo.worth = cache[repo.id]['worth']
    db.session.commit()
from gitmostwanted.app import log, db
from gitmostwanted.models.repo import Repo, RepoMean
from gitmostwanted.tasks.repo_metadata import is_worth_decreased
import sys

cache = {}
query = db.session.query(Repo, RepoMean)\
    .filter(Repo.id == RepoMean.repo_id)\
    .order_by(RepoMean.created_at.asc())\
    .yield_per(100)

if sys.argv[0] and sys.argv[0].isdigit():
    log.info('#{0} is used as the repository id'.format(sys.argv[0]))
    query = query.filter(Repo.id == int(sys.argv[0]))

results = query.all()
for result in results:
    repo, mean = result

    if repo.id not in cache:
        cache[repo.id] = {'prev': mean.value, 'worth': 3, 'mature': 0}
        continue

    prev, curr = cache[repo.id]['prev'], mean.value
    cache[repo.id]['mature'] += 1
    cache[repo.id]['prev'] = curr
    cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1

    log.info(
        '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4}); mature: {5}'
        .format(repo.id, prev, curr, cache[repo.id]['worth'], repo.worth,
from gitmostwanted.app import log, db
from gitmostwanted.models.repo import Repo, RepoMean
from gitmostwanted.tasks.repo_metadata import is_worth_decreased
import sys


cache = {}
query = db.session.query(Repo, RepoMean)\
    .filter(Repo.id == RepoMean.repo_id)\
    .order_by(RepoMean.created_at.asc())\
    .yield_per(100)

if sys.argv[0] and sys.argv[0].isdigit():
    log.info('#{0} is used as the repository id'.format(sys.argv[0]))
    query = query.filter(Repo.id == int(sys.argv[0]))

results = query.all()
for result in results:
    repo, mean = result

    if repo.id not in cache:
        cache[repo.id] = {'prev': mean.value, 'worth': 3, 'mature': 0}
        continue

    prev, curr = cache[repo.id]['prev'], mean.value
    cache[repo.id]['mature'] += 1
    cache[repo.id]['prev'] = curr
    cache[repo.id]['worth'] += -1 if is_worth_decreased(curr, prev) else 1

    log.info(
        '#{0}: prev value is {1}, next value is {2} > worth is: {3} (now {4}); mature: {5}'
Exemplo n.º 13
0
def metadata_erase():
    cnt = Repo.query\
        .filter((Repo.worth < -5) & (Repo.worth_max <= app.config['REPOSITORY_WORTH_DEFAULT'] * 3))\
        .delete()
    log.info('{0} repositories has been removed (worthless)'.format(cnt))
    db.session.commit()