Exemplo n.º 1
0
def set_unknown_domains_not_in_hyps(hyps):
    # find domains in status Started,Paused,Unknown
    # that are not in hypervisors
    r_conn = new_rethink_connection()
    rtable = r.table('domains')

    status_to_unknown = ['Started', 'Paused', 'Unknown']

    l = list(
        rtable.filter(lambda d: r.expr(status_to_unknown).contains(d['status'])
                      ).filter(lambda d: r.not_(
                          r.expr(hyps).contains(d['hyp_started']))).update({
                              'status':
                              'Unknown'
                          }).run(r_conn))

    status_to_stopped = ['Starting', 'CreatingTemplate']

    l = list(
        rtable.filter(lambda d: r.expr(status_to_stopped).contains(d['status'])
                      ).filter(lambda d: r.not_(
                          r.expr(hyps).contains(d['hyp_started']))).update({
                              'status':
                              'Stopped'
                          }).run(r_conn))
    close_rethink_connection(r_conn)
    return l
Exemplo n.º 2
0
    def get_issues_for_group(self, group_id, query_params):
        if query_params is None:
            issues = self.uow.run_list(self.table.get_all(group_id, index="group_id"))
            return issues

        # get the group name to prevent slow queries
        group_name = self.uow.run(self.uow.tables.groups.get(group_id))

        if 'name' not in group_name:
            return QueryResult([], 0)

        group_name = group_name['name']

        # manually map all columns to prevent name conflicts and overwriting
        return self.uow.apply_query_parameters(
            self.table.get_all(group_id, index='group_id')
            .filter(r.row.has_fields('equipment_id'))
            .eq_join('equipment_id', self.uow.tables.equipment)
            .map(lambda x: {'id': x['left']['id'],
                            'group': group_name,
                            'equipment': x['right']['name'],
                            'name': x['left']['name'],
                            'description': x['left']['description']})
            .union(self.table.get_all(group_id, index='group_id')
                   .filter(r.not_(r.row.has_fields('equipment_id')))
                   .map(lambda x: {'id': x['id'],
                                   'group': group_name,
                                   'equipment': 'No Equip.',
                                   'name': x['name'],
                                   'description': x['description']})),
            query_params)
Exemplo n.º 3
0
def get_hyps_with_status(list_status, not_=False, empty=False):
    r_conn = new_rethink_connection()
    rtable = r.table('hypervisors')
    if not_ == True:
        l = list(
            rtable.filter({
                'enabled': True
            }).filter(lambda d: r.not_(
                r.expr(list_status).contains(d['status']))).run(r_conn))
    else:
        l = list(
            rtable.filter({
                'enabled': True
            }).filter(lambda d: r.expr(list_status).contains(d['status'])).run(
                r_conn))

    if empty == True:
        nostatus = list(
            rtable.filter({
                'enabled': True
            }).filter(lambda n: ~n.has_fields('status')).run(r_conn))
        l = l + nostatus

    close_rethink_connection(r_conn)
    return l
Exemplo n.º 4
0
async def create_subscription(user_id, serial, voice):
    serial_sub = {
        "id": serial["id"],
        "excluded_voices": [],
        "title": serial["title"]
    }
    await User.manager.execute(
        User.manager.table.filter(
            r.and_(r.row["id"] == user_id,
                   r.not_(r.row["serials"].default(
                       []).contains(serial_sub)))).update({
                           "serials":
                           r.row["serials"].default([]).append(serial_sub)
                       }))
Exemplo n.º 5
0
def get_hyps_with_status(list_status, not_=False, empty=False):
    r_conn = new_rethink_connection()
    rtable = r.table('hypervisors')
    if not_ == True:
        l = list(rtable.filter({'enabled': True}).filter(lambda d: r.not_(r.expr(list_status).
                                                                          contains(d['status']))).
                 run(r_conn))
    else:
        l = list(rtable.filter({'enabled': True}).filter(lambda d: r.expr(list_status).
                                                         contains(d['status'])).
                 run(r_conn))

    if empty == True:
        nostatus = list(rtable.filter({'enabled': True}).filter(lambda n: ~n.has_fields('status')).run(r_conn))
        l = l + nostatus

    close_rethink_connection(r_conn)
    return l
Exemplo n.º 6
0
def scrape_plugin_repos(num):
    """Scrapes the num plugin repos that have been least recently scraped."""
    MIN_FORK_USERS = 3

    query = r.table('plugin_github_repos').filter({'is_blacklisted': False})

    # We don't want to scrape forks that not many people use.
    query = query.filter(
        r.not_((r.row['is_fork'] == True) & (  # NOQA
            r.row['plugin_manager_users'] < MIN_FORK_USERS)),
        default=True)

    # Only scrape repos that don't redirect to other ones (probably renamed).
    query = query.filter(r.row['redirects_to'] == '')

    # We scrape vim-scripts separately using the batch /users/:user/repos call
    query = query.filter(r.row['owner'] != 'vim-scripts')

    query = query.order_by('last_scraped_at').limit(num)

    repos = query.run(r_conn())

    # TODO(david): Print stats at the end: # successfully scraped, # not found,
    #     # redirects, etc.
    for repo in repos:
        repo_name = repo['repo_name']
        repo_owner = repo['owner']

        # Print w/o newline.
        print "    scraping %s/%s ..." % (repo_owner, repo_name),
        sys.stdout.flush()

        # Attempt to fetch data about the plugin.
        res, repo_data = get_api_page('repos/%s/%s' % (repo_owner, repo_name))

        # If the API call 404s, then see if the repo has been renamed by
        # checking for a redirect in a non-API call.
        if res.status_code == 404:

            res = requests.head('https://github.com/%s/%s' %
                                (repo_owner, repo_name))

            if res.status_code == 301:
                location = res.headers.get('location')
                _, redirect_owner, redirect_repo_name = location.rsplit('/', 2)

                repo['redirects_to'] = '%s/%s' % (redirect_owner,
                                                  redirect_repo_name)

                # Make sure we insert the new location of the repo, which will
                # be scraped in a future run.
                PluginGithubRepos.upsert_with_owner_repo({
                    'owner':
                    redirect_owner,
                    'repo_name':
                    redirect_repo_name,
                    # TODO(david): Should append to a list
                    'redirects_from': ('%s/%s' % (repo_owner, repo_name)),
                })

                # And now change the GitHub repo location of the plugin that
                # the old repo location pointed to
                query = r.table('plugins').get_all([repo_owner, repo_name],
                                                   index='github_owner_repo')
                db_plugin = db.util.get_first(query)
                if db_plugin:
                    db_plugin['github_owner'] = redirect_owner
                    db_plugin['github_repo_name'] = redirect_repo_name
                    db.plugins.insert(db_plugin, conflict='replace')

                print 'redirects to %s/%s.' % (redirect_owner,
                                               redirect_repo_name)
            else:
                # TODO(david): Insert some metadata in the github repo that
                #     this is not found
                print 'not found.'

            plugin_data = None

        else:
            plugin_data = get_plugin_data(repo_owner, repo_name, repo_data)

        repo['repo_data'] = repo_data
        repo['repo_id'] = str(repo_data.get('id', repo['repo_id']))
        PluginGithubRepos.log_scrape(repo)

        # If this is a fork, note it and ensure we know about original repo.
        if repo_data.get('fork'):
            repo['is_fork'] = True
            PluginGithubRepos.upsert_with_owner_repo({
                'owner':
                repo_data['parent']['owner']['login'],
                'repo_name':
                repo_data['parent']['name'],
            })

        PluginGithubRepos.upsert_with_owner_repo(repo)

        # For most cases we don't care about forked repos, unless the forked
        # repo is used by others.
        if repo_data.get('fork') and (repo.get('plugin_manager_users', 0) <
                                      MIN_FORK_USERS):
            print 'skipping fork of %s' % repo_data['parent']['full_name']
            continue

        if plugin_data:

            # Insert the number of plugin manager users across all names/owners
            # of this repo.
            # TODO(david): Try to also use repo_id for this (but not all repos
            #     have it), or look at multiple levels of redirects.
            plugin_manager_users = repo.get('plugin_manager_users', 0)
            other_repos = r.table('plugin_github_repos').get_all(
                '%s/%s' % (repo_owner, repo_name),
                index='redirects_to').run(r_conn())
            for other_repo in other_repos:
                if other_repo['id'] == repo['id']:
                    continue
                plugin_manager_users += other_repo.get('plugin_manager_users',
                                                       0)

            plugin_data['github_bundles'] = plugin_manager_users

            if repo.get('from_submission'):
                _add_submission_data(plugin_data, repo['from_submission'])

            db.plugins.add_scraped_data(plugin_data,
                                        repo,
                                        submission=repo.get('from_submission'))

            print 'done.'
Exemplo n.º 7
0
def scrape_plugin_repos(num):
    """Scrapes the num plugin repos that have been least recently scraped."""
    MIN_FORK_USERS = 3

    query = r.table('plugin_github_repos').filter({'is_blacklisted': False})

    # We don't want to scrape forks that not many people use.
    query = query.filter(r.not_((r.row['is_fork'] == True) & (
            r.row['plugin_manager_users'] < MIN_FORK_USERS)),
            default=True)

    # Only scrape repos that don't redirect to other ones (probably renamed).
    query = query.filter(r.row['redirects_to'] == '')

    # We scrape vim-scripts separately using the batch /users/:user/repos call
    query = query.filter(r.row['owner'] != 'vim-scripts')

    query = query.order_by('last_scraped_at').limit(num)

    repos = query.run(r_conn())

    # TODO(david): Print stats at the end: # successfully scraped, # not found,
    #     # redirects, etc.
    for repo in repos:
        repo_name = repo['repo_name']
        repo_owner = repo['owner']

        # Print w/o newline.
        print "    scraping %s/%s ..." % (repo_owner, repo_name),
        sys.stdout.flush()

        # Attempt to fetch data about the plugin.
        res, repo_data = get_api_page('repos/%s/%s' % (repo_owner, repo_name))

        # If the API call 404s, then see if the repo has been renamed by
        # checking for a redirect in a non-API call.
        if res.status_code == 404:

            res = requests.head('https://github.com/%s/%s' % (
                    repo_owner, repo_name))

            if res.status_code == 301:
                location = res.headers.get('location')
                _, redirect_owner, redirect_repo_name = location.rsplit('/', 2)

                repo['redirects_to'] = '%s/%s' % (redirect_owner,
                        redirect_repo_name)

                # Make sure we insert the new location of the repo, which will
                # be scraped in a future run.
                PluginGithubRepos.upsert_with_owner_repo({
                    'owner': redirect_owner,
                    'repo_name': redirect_repo_name,
                    # TODO(david): Should append to a list
                    'redirects_from': ('%s/%s' % (repo_owner, repo_name)),
                })

                # And now change the GitHub repo location of the plugin that
                # the old repo location pointed to
                query = r.table('plugins').get_all(
                        [repo_owner, repo_name], index='github_owner_repo')
                db_plugin = db.util.get_first(query)
                if db_plugin:
                    db_plugin['github_owner'] = redirect_owner
                    db_plugin['github_repo_name'] = redirect_repo_name
                    db.plugins.insert(db_plugin, conflict='replace')

                print 'redirects to %s/%s.' % (redirect_owner,
                        redirect_repo_name)
            else:
                # TODO(david): Insert some metadata in the github repo that
                #     this is not found
                print 'not found.'

            plugin_data = None

        else:
            plugin_data = get_plugin_data(repo_owner, repo_name, repo_data)

        repo['repo_data'] = repo_data
        repo['repo_id'] = str(repo_data.get('id', repo['repo_id']))
        PluginGithubRepos.log_scrape(repo)

        # If this is a fork, note it and ensure we know about original repo.
        if repo_data.get('fork'):
            repo['is_fork'] = True
            PluginGithubRepos.upsert_with_owner_repo({
                'owner': repo_data['parent']['owner']['login'],
                'repo_name': repo_data['parent']['name'],
            })

        r.table('plugin_github_repos').insert(repo,
                conflict='replace').run(r_conn())

        # For most cases we don't care about forked repos, unless the forked
        # repo is used by others.
        if repo_data.get('fork') and (
                repo.get('plugin_manager_users', 0) < MIN_FORK_USERS):
            print 'skipping fork of %s' % repo_data['parent']['full_name']
            continue

        if plugin_data:

            # Insert the number of plugin manager users across all names/owners
            # of this repo.
            # TODO(david): Try to also use repo_id for this (but not all repos
            #     have it), or look at multiple levels of redirects.
            plugin_manager_users = repo.get('plugin_manager_users', 0)
            other_repos = r.table('plugin_github_repos').get_all(
                    '%s/%s' % (repo_owner, repo_name),
                    index='redirects_to').run(r_conn())
            for other_repo in other_repos:
                if other_repo['id'] == repo['id']:
                    continue
                plugin_manager_users += other_repo.get(
                        'plugin_manager_users', 0)

            plugin_data['github_bundles'] = plugin_manager_users

            if repo.get('from_submission'):
                _add_submission_data(plugin_data, repo['from_submission'])

            db.plugins.add_scraped_data(plugin_data, repo,
                    submission=repo.get('from_submission'))

            print 'done.'
Exemplo n.º 8
0
def scrape_plugin_repos(num):
    """Scrapes the num plugin repos that have been least recently scraped."""
    MIN_FORK_USERS = 3

    query = r.table("plugin_github_repos").filter({"is_blacklisted": False})

    # We don't want to scrape forks that not many people use.
    query = query.filter(
        r.not_((r.row["is_fork"] == True) & (r.row["plugin_manager_users"] < MIN_FORK_USERS)), default=True  # NOQA
    )

    # Only scrape repos that don't redirect to other ones (probably renamed).
    query = query.filter(r.row["redirects_to"] == "")

    # We scrape vim-scripts separately using the batch /users/:user/repos call
    query = query.filter(r.row["owner"] != "vim-scripts")

    query = query.order_by("last_scraped_at").limit(num)

    repos = query.run(r_conn())

    # TODO(david): Print stats at the end: # successfully scraped, # not found,
    #     # redirects, etc.
    for repo in repos:
        repo_name = repo["repo_name"]
        repo_owner = repo["owner"]

        # Print w/o newline.
        print "    scraping %s/%s ..." % (repo_owner, repo_name),
        sys.stdout.flush()

        # Attempt to fetch data about the plugin.
        res, repo_data = get_api_page("repos/%s/%s" % (repo_owner, repo_name))

        # If the API call 404s, then see if the repo has been renamed by
        # checking for a redirect in a non-API call.
        if res.status_code == 404:

            res = requests.head("https://github.com/%s/%s" % (repo_owner, repo_name))

            if res.status_code == 301:
                location = res.headers.get("location")

                valid_repo_url = re.compile("^https://github.com/[^/]+/[^/]+")
                if not valid_repo_url.match(location):
                    print "redirects to invalid GitHub repo URL: %s" % location
                    continue

                _, redirect_owner, redirect_repo_name = location.rsplit("/", 2)

                repo["redirects_to"] = "%s/%s" % (redirect_owner, redirect_repo_name)

                # Make sure we insert the new location of the repo, which will
                # be scraped in a future run.
                PluginGithubRepos.upsert_with_owner_repo(
                    {
                        "owner": redirect_owner,
                        "repo_name": redirect_repo_name,
                        # TODO(david): Should append to a list
                        "redirects_from": ("%s/%s" % (repo_owner, repo_name)),
                    }
                )

                # And now change the GitHub repo location of the plugin that
                # the old repo location pointed to
                query = r.table("plugins").get_all([repo_owner, repo_name], index="github_owner_repo")
                db_plugin = db.util.get_first(query)
                if db_plugin:
                    db_plugin["github_owner"] = redirect_owner
                    db_plugin["github_repo_name"] = redirect_repo_name
                    db.plugins.insert(db_plugin, conflict="replace")

                print "redirects to %s/%s." % (redirect_owner, redirect_repo_name)
            else:
                # TODO(david): Insert some metadata in the github repo that
                #     this is not found
                print "not found."

            plugin_data = None

        else:
            plugin_data = get_plugin_data(repo_owner, repo_name, repo_data)

        repo["repo_data"] = repo_data
        repo["repo_id"] = str(repo_data.get("id", repo["repo_id"]))
        PluginGithubRepos.log_scrape(repo)

        # If this is a fork, note it and ensure we know about original repo.
        if repo_data.get("fork"):
            repo["is_fork"] = True
            PluginGithubRepos.upsert_with_owner_repo(
                {"owner": repo_data["parent"]["owner"]["login"], "repo_name": repo_data["parent"]["name"]}
            )

        PluginGithubRepos.upsert_with_owner_repo(repo)

        # For most cases we don't care about forked repos, unless the forked
        # repo is used by others.
        if repo_data.get("fork") and (repo.get("plugin_manager_users", 0) < MIN_FORK_USERS):
            print "skipping fork of %s" % repo_data["parent"]["full_name"]
            continue

        if plugin_data:

            # Insert the number of plugin manager users across all names/owners
            # of this repo.
            # TODO(david): Try to also use repo_id for this (but not all repos
            #     have it), or look at multiple levels of redirects.
            plugin_manager_users = repo.get("plugin_manager_users", 0)
            other_repos = (
                r.table("plugin_github_repos")
                .get_all("%s/%s" % (repo_owner, repo_name), index="redirects_to")
                .run(r_conn())
            )
            for other_repo in other_repos:
                if other_repo["id"] == repo["id"]:
                    continue
                plugin_manager_users += other_repo.get("plugin_manager_users", 0)

            plugin_data["github_bundles"] = plugin_manager_users

            if repo.get("from_submission"):
                _add_submission_data(plugin_data, repo["from_submission"])

            db.plugins.add_scraped_data(plugin_data, repo, submission=repo.get("from_submission"))

            print "done."