Exemplo n.º 1
0
    def _query_join_relation(self, query: BaseQuery,
                             root_relation: str) -> BaseQuery:
        """
        Helper function that applies necessary joins for dotted columns on a
        SQLAlchemy query object

        :param query: SQLAlchemy query object
        :param root_relation: The root part of a dotted column, so the root relation
        :return: Transformed SQLAlchemy Query
        """
        relations = self.get_related_model_and_join(root_relation)

        for relation in relations:
            model_relation, relation_join = relation
            # Support multiple joins for the same table
            if self.is_model_already_joined(query, model_relation):
                # Since the join already exists apply a new aliased one
                model_relation = aliased(model_relation)
                # The binary expression needs to be inverted
                relation_pk = self.get_pk(model_relation)
                relation_join = BinaryExpression(relation_join.left,
                                                 relation_pk,
                                                 relation_join.operator)
            query = query.join(model_relation, relation_join, isouter=True)
        return query
Exemplo n.º 2
0
 def query_articles_by_tag(self, tag: str, query_obj: BaseQuery = None):
     if query_obj is not None:
         query_obj = query_obj.join(Article.tags).filter(Tag.name == tag)
     else:
         query_obj = Article.query.join(
             Article.tags).filter(Tag.name == tag)
     return query_obj
Exemplo n.º 3
0
    def apply_filter(query: BaseQuery) -> BaseQuery:
        for param, value in request.args.items():
            if param not in {"fields", "sort", "page", "limit"}:
                if param == "username":
                    query = query.filter(
                        GithubUserInfo.username.like(f"%{value}%"))
                    continue
                if param == "language":
                    langs_list = value.split(" ")
                    for lang in langs_list:
                        query = query.join(GithubUserInfo.repositories).filter(
                            GithubRepositories.languages.contains(lang))
                    continue
                if param.startswith("number_of_repositories"):
                    if param.endswith("[gt]"):
                        users_ids_list = []

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            if len(user.repositories) > int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))

                        continue
                    elif param.endswith("[gte]"):
                        users_ids_list = []

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            if len(user.repositories) >= int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))

                        continue
                    elif param.endswith("[lt]"):
                        users_ids_list = []

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            if len(user.repositories) < int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))

                        continue
                    elif param.endswith("[lte]"):
                        users_ids_list = []

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            if len(user.repositories) <= int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))

                        continue
                    else:
                        users_ids_list = []

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            if len(user.repositories) == int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))

                        continue
                if param.startswith("date"):
                    pass
                    ## last_commit_date scraping functionality to be added to database

                    ## try:
                    ##     value = datetime.strptime(value, "%d-%m-%Y").date()
                    ##     if param.endswith("[gt]"):
                    ##         query = query.filter(GithubUserInfo.date > value)
                    ##         continue
                    ##     elif param.endswith("[gte]"):
                    ##         query = query.filter(GithubUserInfo.date >= value)
                    ##         continue
                    ##     elif param.endswith("[lt]"):
                    ##         query = query.filter(GithubUserInfo.date < value)
                    ##         continue
                    ##     elif param.endswith("[lte]"):
                    ##         query = query.filter(GithubUserInfo.date <= value)
                    ##         continue
                    ##     else:
                    ##         query = query.filter(GithubUserInfo.date == value)
                    ##         continue
                    ## except ValueError:
                    ##     continue
                if param.startswith("stars"):
                    if param.endswith("[gt]"):
                        users_ids_list = []
                        users_stars_dict = {}

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            users_stars_dict[user] = 0
                            for repo in user.repositories:
                                users_stars_dict[user] = (
                                    users_stars_dict[user] + repo.stars)
                            if users_stars_dict[user] > int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))
                        continue
                    elif param.endswith("[gte]"):
                        users_ids_list = []
                        users_stars_dict = {}

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            users_stars_dict[user] = 0
                            for repo in user.repositories:
                                users_stars_dict[user] = (
                                    users_stars_dict[user] + repo.stars)
                            if users_stars_dict[user] >= int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))
                        continue
                    elif param.endswith("[lt]"):
                        users_ids_list = []
                        users_stars_dict = {}

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            users_stars_dict[user] = 0
                            for repo in user.repositories:
                                users_stars_dict[user] = (
                                    users_stars_dict[user] + repo.stars)
                            if users_stars_dict[user] < int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))
                        continue
                    elif param.endswith("[lte]"):
                        users_ids_list = []
                        users_stars_dict = {}

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            users_stars_dict[user] = 0
                            for repo in user.repositories:
                                users_stars_dict[user] = (
                                    users_stars_dict[user] + repo.stars)
                            if users_stars_dict[user] <= int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))
                        continue
                    else:
                        users_ids_list = []
                        users_stars_dict = {}

                        subquery = (query.join(
                            GithubUserInfo.repositories).distinct(
                                GithubUserInfo.id).all())

                        for user in subquery:
                            users_stars_dict[user] = 0
                            for repo in user.repositories:
                                users_stars_dict[user] = (
                                    users_stars_dict[user] + repo.stars)
                            if users_stars_dict[user] == int(value):
                                users_ids_list.append(user.id)

                        query = query.filter(
                            GithubUserInfo.id.in_(users_ids_list))
                        continue
                return query.all()

        return query
Exemplo n.º 4
0
    def gather_muts_and_sites(self,
                              mutations: BaseQuery,
                              sites: BaseQuery,
                              show_progress=True,
                              occurrences_in: List[MutationSource] = None,
                              intersection=None) -> MotifsData:
        """If occurrences_in is provided, the count of mutations will
        represent number of occurrences of mutations in provided
        sources, instead of number of distinct substitutions.
        """

        if intersection:
            accepted_sites = sites.join(Mutation.affected_sites).filter(
                and_(*[Mutation.in_sources(source)
                       for source in intersection])).all()
        else:
            accepted_sites = sites.all()

        mutations_affecting_sites = mutations.filter(
            Mutation.affected_sites.any(Site.types.contains(self.site_type)))

        muts_around_sites_with_motif = defaultdict(dict)
        muts_breaking_sites_motif = defaultdict(dict)

        sites_with_broken_motif = defaultdict(set)

        sites_with_motif = select_sites_with_motifs(accepted_sites,
                                                    self.site_specific_motifs)

        if occurrences_in:

            def mutation_count(mut: Mutation):
                return sum([
                    mut.sources_map[source.name].get_value()
                    if source.name in mut.sources_map else 0
                    for source in occurrences_in
                ])
        else:

            def mutation_count(mut):
                return 1

        is_affected = self.breaking_modes[self.mode]

        if show_progress:
            ptm_muts = mutations_affecting_sites.count()
            mutations_affecting_sites = tqdm(mutations_affecting_sites,
                                             total=ptm_muts)

        for mutation in mutations_affecting_sites:
            sites = mutation.affected_sites

            for site in sites:
                if site not in accepted_sites:
                    continue

                for motif_name, motif in self.site_specific_motifs.items():
                    if site in sites_with_motif[motif_name]:
                        count = mutation_count(mutation)
                        muts_around_sites_with_motif[motif_name][
                            mutation] = count

                        mutated_sequence = mutate_sequence(site,
                                                           mutation,
                                                           offset=7)

                        if is_affected(mutated_sequence, motif):
                            sites_with_broken_motif[motif_name].add(site)
                            muts_breaking_sites_motif[motif_name][
                                mutation] = count

        return MotifsData(
            sites_with_motif=sites_with_motif,
            sites_with_broken_motif=sites_with_broken_motif,
            muts_around_sites_with_motif=muts_around_sites_with_motif,
            muts_breaking_sites_motif=muts_breaking_sites_motif)