def test_foreignkey_reverse(self):
        books = list(Book.objects.all())
        with self.assertNumQueries(1):
            prefetch_related_objects(books, 'first_time_authors')

        with self.assertNumQueries(0):
            [list(book.first_time_authors.all()) for book in books]
    def test_prefetch_object_to_attr(self):
        book1 = Book.objects.get(id=self.book1.id)
        with self.assertNumQueries(1):
            prefetch_related_objects([book1], Prefetch('authors', to_attr='the_authors'))

        with self.assertNumQueries(0):
            self.assertCountEqual(book1.the_authors, [self.author1, self.author2, self.author3])
    def test_m2m_forward(self):
        book1 = Book.objects.get(id=self.book1.id)
        with self.assertNumQueries(1):
            prefetch_related_objects([book1], 'authors')

        with self.assertNumQueries(0):
            self.assertCountEqual(book1.authors.all(), [self.author1, self.author2, self.author3])
    def test_foreignkey_forward(self):
        authors = list(Author.objects.all())
        with self.assertNumQueries(1):
            prefetch_related_objects(authors, 'first_book')

        with self.assertNumQueries(0):
            [author.first_book for author in authors]
    def test_m2m_reverse(self):
        author1 = Author.objects.get(id=self.author1.id)
        with self.assertNumQueries(1):
            prefetch_related_objects([author1], 'books')

        with self.assertNumQueries(0):
            self.assertCountEqual(author1.books.all(), [self.book1, self.book2])
    def test_prefetch_object(self):
        book1 = Book.objects.get(id=self.book1.id)
        with self.assertNumQueries(1):
            prefetch_related_objects([book1], Prefetch('authors'))

        with self.assertNumQueries(0):
            self.assertEqual(set(book1.authors.all()), {self.author1, self.author2, self.author3})
Пример #7
0
def site(request):
    # type: (django.http.request.HttpRequest) -> dict
    """Add site settings to the context under the 'site' key."""
    site = get_current_site(request)
    prefetch_related_objects(
        [site], 'settings__translations')
    return {'site': site}
Пример #8
0
def _get_json_for_individuals(individuals, user=None, project_guid=None, family_guid=None, add_sample_guids_field=False, family_fields=None):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """

    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(modified_by, 'email') else modified_by

    def _load_phenotips_data(phenotips_data):
        phenotips_json = None
        if phenotips_data:
            try:
                phenotips_json = json.loads(phenotips_data)
            except Exception as e:
                logger.error("Couldn't parse phenotips: {}".format(e))
        return phenotips_json

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy': _get_case_review_status_modified_by(result.get('caseReviewStatusLastModifiedBy')),
            'phenotipsData': _load_phenotips_data(result['phenotipsData']),
            'maternalGuid': mother.guid if mother else None,
            'paternalGuid': father.guid if father else None,
            'maternalId': mother.individual_id if mother else None,
            'paternalId': father.individual_id if father else None,
            'displayName': result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [s.guid for s in individual.sample_set.all()]

    nested_fields = [
        {'fields': ('family', 'guid'), 'value': family_guid},
        {'fields': ('family', 'project', 'guid'), 'key': 'projectGuid', 'value': project_guid},
    ]
    if family_fields:
        for field in family_fields:
            nested_fields.append({'fields': ('family', field), 'key': _to_camel_case(field)})

    prefetch_related_objects(individuals, 'family')
    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    prefetch_related_objects(individuals, 'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')

    return _get_json_for_models(individuals, nested_fields=nested_fields, user=user, process_result=_process_result)
Пример #9
0
def prefetch_export_runs(queryset_list_or_model):
    prefetch_args = ['job__provider_tasks__provider', 'job__provider_tasks__formats',
                     'provider_tasks__tasks__result', 'provider_tasks__tasks__exceptions']
    if isinstance(queryset_list_or_model, models.query.QuerySet):
        return queryset_list_or_model.select_related('user').prefetch_related(*prefetch_args)
    elif isinstance(queryset_list_or_model, list):
        models.prefetch_related_objects(queryset_list_or_model, *prefetch_args)
    elif isinstance(queryset_list_or_model, ExportRun):
        models.prefetch_related_objects([queryset_list_or_model], *prefetch_args)
    return queryset_list_or_model
    def test_prefetch_queryset(self):
        book1 = Book.objects.get(id=self.book1.id)
        with self.assertNumQueries(1):
            prefetch_related_objects(
                [book1],
                Prefetch('authors', queryset=Author.objects.filter(id__in=[self.author1.id, self.author2.id]))
            )

        with self.assertNumQueries(0):
            self.assertCountEqual(book1.authors.all(), [self.author1, self.author2])
Пример #11
0
 def practice_overview(self, request, pk):
     del request, pk  # not needed
     student = self.get_object()
     prefetch_related_objects(
         [student],
         Prefetch(
             'task_sessions',
             queryset=TaskSession.objects.select_related('task')))
     # -> Same as student = Student.objects.prefetch_related(...).get(pk=pk)
     domain = get_domain()
     overview = get_practice_overview(domain, student)
     serializer = PracticeOverviewSerializer(overview)
     return Response(serializer.data)
Пример #12
0
def _process_variants(variants, families):
    prefetch_related_objects(families, 'project')
    genes = _saved_variant_genes(variants)
    # TODO add locus lists on the client side (?)
    projects = {family.project for family in families}
    _add_locus_lists(projects, variants, genes)
    saved_variants_by_guid = _get_saved_variants(variants)

    return {
        'searchedVariants': variants,
        'savedVariantsByGuid': saved_variants_by_guid,
        'genesById': genes,
    }
Пример #13
0
    def search(self, page_number: int = 1, page_size: int = 25):
        """
        Runs the search for this search and constructs
        :param page_number: The result page
        :param page_size: The number of items per page
        """
        queryset = Card.objects.filter(self.root_parameter.query()).distinct()
        self.add_sort_param(CardNameSortParam())
        self.add_sort_param(CardColourSortParam())
        self.add_sort_param(CardPowerSortParam())
        queryset = queryset.order_by(
            *[order for sort_param in self.sort_params for order in sort_param.get_sort_list()])

        self.paginator = Paginator(queryset, page_size)
        try:
            self.page = self.paginator.page(page_number)
        except EmptyPage:
            return
        cards = list(self.page)
        prefetch_related_objects(cards, 'printings__printed_languages__physical_cards__ownerships')
        prefetch_related_objects(cards, 'printings__printed_languages__language')
        prefetch_related_objects(cards, 'printings__set')
        prefetch_related_objects(cards, 'printings__rarity')

        preferred_set = self.get_preferred_set()
        self.results = [SearchResult(card, selected_set=preferred_set) for card in cards]
    def test_m2m_then_m2m(self):
        """A m2m can be followed through another m2m."""
        authors = list(Author.objects.all())
        with self.assertNumQueries(2):
            prefetch_related_objects(authors, 'books__read_by')

        with self.assertNumQueries(0):
            self.assertEqual(
                [
                    [[str(r) for r in b.read_by.all()] for b in a.books.all()]
                    for a in authors
                ],
                [
                    [['Amy'], ['Belinda']],  # Charlotte - Poems, Jane Eyre
                    [['Amy']],               # Anne - Poems
                    [['Amy'], []],           # Emily - Poems, Wuthering Heights
                    [['Amy', 'Belinda']],    # Jane - Sense and Sense
                ]
            )
Пример #15
0
def get_json_for_analysis_groups(analysis_groups, project_guid=None):
    """Returns a JSON representation of the given list of AnalysisGroups.

    Args:
        analysis_groups (array): array of django models for the AnalysisGroups.
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
    Returns:
        array: array of json objects
    """

    def _process_result(result, group):
        result.update({
            'familyGuids': [f.guid for f in group.families.only('guid').all()]
        })

    prefetch_related_objects(analysis_groups, 'families')

    nested_fields = [{'fields': ('project', 'guid'), 'value': project_guid}]

    return _get_json_for_models(analysis_groups, nested_fields=nested_fields, process_result=_process_result)
Пример #16
0
def _get_json_for_families(families, user=None, add_individual_guids_field=False, project_guid=None):
    """Returns a JSON representation of the given Family.

    Args:
        families (array): array of django models representing the family.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        add_individual_guids_field (bool): whether to add an 'individualGuids' field. NOTE: this will require a database query.
        project_guid (boolean): An optional field to use as the projectGuid instead of querying the DB
    Returns:
        array: json objects
    """

    def _get_pedigree_image_url(pedigree_image):
        if isinstance(pedigree_image, ImageFieldFile):
            try:
                pedigree_image = pedigree_image.url
            except Exception:
                pedigree_image = None
        return os.path.join("/media/", pedigree_image) if pedigree_image else None

    def _process_result(result, family):
        result['analysedBy'] = [{
            'createdBy': {'fullName': ab.created_by.get_full_name(), 'email': ab.created_by.email, 'isStaff': ab.created_by.is_staff},
            'lastModifiedDate': ab.last_modified_date,
        } for ab in family.familyanalysedby_set.all()]
        pedigree_image = _get_pedigree_image_url(result.pop('pedigreeImage'))
        if pedigree_image:
            result['pedigreeImage'] = pedigree_image
        if add_individual_guids_field:
            result['individualGuids'] = [i.guid for i in family.individual_set.all()]
        if not result['displayName']:
            result['displayName'] = result['familyId']

    prefetch_related_objects(families, 'familyanalysedby_set__created_by')
    if add_individual_guids_field:
        prefetch_related_objects(families, 'individual_set')

    nested_fields = [{'fields': ('project', 'guid'), 'value': project_guid}]

    return _get_json_for_models(families, nested_fields=nested_fields, user=user, process_result=_process_result)
Пример #17
0
def get_json_for_saved_variants(saved_variants, add_tags=False, add_details=False, project=None, user=None, **kwargs):
    """Returns a JSON representation of the given variant.

    Args:
        saved_variant (object): Django model for the SavedVariant.
    Returns:
        dict: json object
    """
    from seqr.views.utils.variant_utils import variant_details

    def _process_result(variant_json, saved_variant):
        if add_tags:
            variant_json.update({
                'tags': [get_json_for_variant_tag(tag) for tag in saved_variant.varianttag_set.all()],
                'functionalData': [get_json_for_variant_functional_data(tag) for tag in
                                   saved_variant.variantfunctionaldata_set.all()],
                'notes': [get_json_for_variant_note(tag) for tag in saved_variant.variantnote_set.all()],
            })
        if add_details:
            saved_variant_json = json.loads(saved_variant.saved_variant_json or '{}')
            variant_json.update(variant_details(saved_variant_json, project or saved_variant.project, user, **kwargs))
        variant_json.update({
            'variantId': saved_variant.guid,  # TODO get from json
            'familyGuids': [saved_variant.family.guid],
        })
        return variant_json

    prefetch_related_objects(saved_variants, 'family')
    if not project:
        prefetch_related_objects(saved_variants, 'project')
    if add_tags:
        prefetch_related_objects(saved_variants, 'varianttag_set__variant_tag_type', 'varianttag_set__created_by',
                                 'variantnote_set__created_by', 'variantfunctionaldata_set__created_by')

    return _get_json_for_models(saved_variants, guid_key='variantGuid', process_result=_process_result)
Пример #18
0
def get_json_for_locus_lists(locus_lists, user, include_genes=False):
    """Returns a JSON representation of the given LocusLists.

    Args:
        locus_lists (array): array of LocusList django models.
    Returns:
        array: json objects
    """

    def _process_result(result, locus_list):
        gene_set = locus_list.locuslistgene_set
        interval_set = locus_list.locuslistinterval_set
        if include_genes:
            intervals = _get_json_for_models(interval_set.all())
            genome_versions = {interval['genomeVersion'] for interval in intervals}
            result.update({
                'items': [{'geneId': gene.gene_id} for gene in gene_set.all()] + intervals,
                'intervalGenomeVersion': genome_versions.pop() if len(genome_versions) == 1 else None,
            })
        result.update({
            'numEntries': gene_set.count() + interval_set.count(),
            'canEdit': user == locus_list.created_by,
        })

    prefetch_related_objects(locus_lists, 'created_by')
    prefetch_related_objects(locus_lists, 'locuslistgene_set')
    prefetch_related_objects(locus_lists, 'locuslistinterval_set')

    return _get_json_for_models(locus_lists, user=user, process_result=_process_result)
Пример #19
0
def project_versions(request, project_slug):
    """
    Project version list view.

    Shows the available versions and lets the user choose which ones to build.
    """
    project = get_object_or_404(
        Project.objects.protected(request.user),
        slug=project_slug,
    )

    versions = Version.objects.public(
        user=request.user,
        project=project,
        only_active=False,
    )
    active_versions = versions.filter(active=True)
    inactive_versions = versions.filter(active=False)

    # If there's a wiped query string, check the string against the versions
    # list and display a success message. Deleting directories doesn't know how
    # to fail.  :)
    wiped = request.GET.get('wipe', '')
    wiped_version = versions.filter(slug=wiped)
    if wiped and wiped_version.count():
        messages.success(request, 'Version wiped: ' + wiped)

    # Optimize project permission checks
    prefetch_related_objects([project], 'users')

    return render(
        request,
        'projects/project_version_list.html',
        {
            'inactive_versions': inactive_versions,
            'active_versions': active_versions,
            'project': project,
        },
    )
Пример #20
0
def get_access_by_project(
        projects: Sequence[Project],
        user: User) -> MutableMapping[Project, MutableMapping[str, Any]]:
    request = env.request

    project_teams = list(
        ProjectTeam.objects.filter(
            project__in=projects).select_related("team"))
    project_team_map = defaultdict(list)

    for pt in project_teams:
        project_team_map[pt.project_id].append(pt.team)

    team_memberships = get_team_memberships([pt.team for pt in project_teams],
                                            user)
    org_roles = get_org_roles({i.organization_id for i in projects}, user)
    prefetch_related_objects(projects, "organization")

    is_superuser = request and is_active_superuser(
        request) and request.user == user
    result = {}
    for project in projects:
        is_member = any(t.id in team_memberships
                        for t in project_team_map.get(project.id, []))
        org_role = org_roles.get(project.organization_id)
        if is_member:
            has_access = True
        elif is_superuser:
            has_access = True
        elif project.organization.flags.allow_joinleave:
            has_access = True
        elif org_role and roles.get(org_role).is_global:
            has_access = True
        else:
            has_access = False
        result[project] = {"is_member": is_member, "has_access": has_access}
    return result
Пример #21
0
    def my_threads(self, request):
        queryset = self.get_queryset() \
            .only_threads_with_user(request.user) \
            .select_related('latest_message') \
            .prefetch_related('participants')
        queryset = self.filter_queryset(queryset)
        paginator = ThreadPagination()

        threads = list(
            paginator.paginate_queryset(queryset, request, view=self))
        messages = [
            t.latest_message for t in threads if t.latest_message is not None
        ]

        prefetch_related_objects(threads + messages, 'reactions')

        serializer = self.get_serializer(threads, many=True)
        message_serializer = self.get_serializer(messages, many=True)
        return paginator.get_paginated_response({
            'threads':
            serializer.data,
            'messages':
            message_serializer.data
        })
Пример #22
0
def get_json_for_saved_variants(saved_variants,
                                add_tags=False,
                                add_details=False,
                                project=None,
                                user=None,
                                **kwargs):
    """Returns a JSON representation of the given variant.

    Args:
        saved_variant (object): Django model for the SavedVariant.
    Returns:
        dict: json object
    """
    from seqr.views.utils.variant_utils import variant_details

    def _process_result(variant_json, saved_variant):
        if add_tags:
            variant_json.update({
                'tags': [
                    get_json_for_variant_tag(tag)
                    for tag in saved_variant.varianttag_set.all()
                ],
                'functionalData': [
                    get_json_for_variant_functional_data(tag)
                    for tag in saved_variant.variantfunctionaldata_set.all()
                ],
                'notes': [
                    get_json_for_variant_note(tag)
                    for tag in saved_variant.variantnote_set.all()
                ],
            })
        if add_details:
            saved_variant_json = json.loads(saved_variant.saved_variant_json
                                            or '{}')
            variant_json.update(
                variant_details(saved_variant_json, project
                                or saved_variant.project, user, **kwargs))
        variant_json.update({
            'variantId': saved_variant.guid,  # TODO get from json
            'familyGuids': [saved_variant.family.guid],
        })
        return variant_json

    prefetch_related_objects(saved_variants, 'family')
    if not project:
        prefetch_related_objects(saved_variants, 'project')
    if add_tags:
        prefetch_related_objects(saved_variants,
                                 'varianttag_set__variant_tag_type',
                                 'varianttag_set__created_by',
                                 'variantnote_set__created_by',
                                 'variantfunctionaldata_set__created_by')

    return _get_json_for_models(saved_variants,
                                guid_key='variantGuid',
                                process_result=_process_result)
    def _check_value(cls, value, multi_model=True):
        """
        If the value is a queryset then apply the prefetches and select related.
        if the value is not a queryset then return the value itself
        :param value:
        :return:
        """

        # in case it is a single model and not a queryset, then prefetches can be applied in this way to the model
        # itself. The queryset being None makes sure everything returns as if the queryset has no prefetches at all
        if (not multi_model) and isinstance(value, models.Model):
            models.prefetch_related_objects([value],
                                            *cls._prepare_prefetch_list())

        if not isinstance(value, (models.QuerySet, models.Manager)):
            return value

        queryset = value.all() if isinstance(value, models.Manager) else value

        prefetch_list = cls._prepare_prefetch_list(queryset)
        select = cls.database_relations['select'][::]

        return queryset.select_related(*select).prefetch_related(
            *prefetch_list)
Пример #24
0
def get_courses(user, org=None, filter_=None):
    """
    Return a LazySequence of courses available, optionally filtered by org code (case-insensitive).
    """
    courses = branding.get_visible_courses(
        org=org,
        filter_=filter_,
    ).prefetch_related(
        Prefetch(
            'modes',
            queryset=CourseMode.objects.exclude(
                mode_slug__in=CourseMode.CREDIT_MODES),
            to_attr='selectable_modes',
        ), ).select_related('image_set')

    permission_name = configuration_helpers.get_value(
        'COURSE_CATALOG_VISIBILITY_PERMISSION',
        settings.COURSE_CATALOG_VISIBILITY_PERMISSION)
    if user.is_authenticated:
        prefetch_related_objects([user], 'roles', 'courseenrollment_set',
                                 'experimentdata_set')
    return LazySequence(
        (c for c in courses if has_access(user, permission_name, c)),
        est_len=courses.count())
Пример #25
0
 def run_program(self, request, pk=None):
     task_session_id = request.data['task-session-id']
     program = request.data['program']
     correct = request.data['correct']
     task_session = (
         TaskSession.objects
         .select_related('task', 'student')
         .get(pk=task_session_id))
     student = task_session.student
     assert student.pk == int(pk)
     domain = get_domain()
     progress = actions.run_program(domain, task_session, program, correct)
     response = {'correct': correct}
     if correct:
         prefetch_related_objects(
             [student],
             Prefetch(
                 'task_sessions',
                 queryset=TaskSession.objects.select_related('task')))
         response['recommendation'] = get_recommendation(domain, student)
         response['progress'] = progress or []
         print('progress', progress)
     serializer = RunProgramResponseSerializer(response)
     return Response(serializer.data)
Пример #26
0
def get_json_for_genes(genes,
                       user=None,
                       add_dbnsfp=False,
                       add_omim=False,
                       add_constraints=False,
                       add_notes=False,
                       add_expression=False):
    """Returns a JSON representation of the given list of GeneInfo.

    Args:
        genes (array): array of django models for the GeneInfo.
    Returns:
        array: array of json objects
    """
    total_gene_constraints = GeneConstraint.objects.count()
    if add_notes:
        gene_notes_json = get_json_for_gene_notes_by_gene_id(
            [gene.gene_id for gene in genes], user)

    def _add_total_constraint_count(result, *args):
        result['totalGenes'] = total_gene_constraints

    def _process_result(result, gene):
        if add_dbnsfp:
            dbnsfp = gene.dbnsfpgene_set.first()
            if dbnsfp:
                result.update(_get_json_for_model(dbnsfp))
            else:
                result.update(_get_empty_json_for_model(dbNSFPGene))
        if add_omim:
            result['omimPhenotypes'] = _get_json_for_models(
                gene.omim_set.all())
        if add_constraints:
            constraint = gene.geneconstraint_set.order_by('-mis_z',
                                                          '-pLI').first()
            result['constraints'] = _get_json_for_model(
                constraint, process_result=_add_total_constraint_count
            ) if constraint else {}
        if add_notes:
            result['notes'] = gene_notes_json.get(result['geneId'], [])
        if add_expression:
            result[
                'expression'] = gene.geneexpression.expression_values if hasattr(
                    gene, 'geneexpression') else None

    if add_dbnsfp:
        prefetch_related_objects(genes, 'dbnsfpgene_set')
    if add_omim:
        prefetch_related_objects(genes, 'omim_set')
    if add_constraints:
        prefetch_related_objects(genes, 'geneconstraint_set')

    return _get_json_for_models(genes, process_result=_process_result)
Пример #27
0
def get_json_for_locus_lists(locus_lists,
                             user,
                             include_genes=False,
                             include_project_count=False,
                             is_analyst=None):
    """Returns a JSON representation of the given LocusLists.

    Args:
        locus_lists (array): array of LocusList django models.
    Returns:
        array: json objects
    """
    def _process_result(result, locus_list):
        gene_set = locus_list.locuslistgene_set
        interval_set = locus_list.locuslistinterval_set
        if include_genes:
            intervals = _get_json_for_models(interval_set.all())
            genome_versions = {
                interval['genomeVersion']
                for interval in intervals
            }
            result.update({
                'items': [{
                    'geneId': gene.gene_id
                } for gene in gene_set.all()] + intervals,
                'intervalGenomeVersion':
                genome_versions.pop() if len(genome_versions) == 1 else None,
            })
        if include_project_count:
            result['numProjects'] = locus_list.num_projects
        result.update({
            'numEntries': gene_set.count() + interval_set.count(),
            'canEdit': user == locus_list.created_by,
        })

    prefetch_related_objects(locus_lists, 'created_by')
    prefetch_related_objects(locus_lists, 'locuslistgene_set')
    prefetch_related_objects(locus_lists, 'locuslistinterval_set')

    return _get_json_for_models(locus_lists,
                                user=user,
                                is_analyst=is_analyst,
                                process_result=_process_result)
Пример #28
0
 def get(self, request):
     user = request.user
     if not user.is_authenticated:
         administrated_coteries = []
         joined_coteries = []
     else:
         administrated_coteries = list(user.administrated_coterie_set.all())
         joined_coteries = list(user.joined_coterie_set.all())
         combined = administrated_coteries + joined_coteries
         prefetch_related_objects(combined, 'administrators')
         prefetch_related_objects(combined, 'members')
         prefetch_related_objects(combined,
                                  'coteriedocument_set__unique_file')
     return JsonResponse(
         {
             'administratedCoteries': administrated_coteries,
             'joinedCoteries': joined_coteries,
         },
         encoder=CoterieEncoder,
         safe=False)
Пример #29
0
 def test_unknown(self):
     book1 = Book.objects.get(id=self.book1.id)
     with self.assertRaises(AttributeError):
         prefetch_related_objects([book1], "unknown_attribute")
Пример #30
0
def _get_json_for_individuals(individuals,
                              user=None,
                              project_guid=None,
                              family_guid=None,
                              add_sample_guids_field=False,
                              family_fields=None,
                              skip_nested=False,
                              add_hpo_details=False,
                              is_analyst=None,
                              has_case_review_perm=None):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """

    if not individuals:
        return []

    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(
            modified_by, 'email') else modified_by

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy':
            _get_case_review_status_modified_by(
                result.get('caseReviewStatusLastModifiedBy')),
            'maternalGuid':
            mother.guid if mother else None,
            'paternalGuid':
            father.guid if father else None,
            'maternalId':
            mother.individual_id if mother else None,
            'paternalId':
            father.individual_id if father else None,
            'displayName':
            result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [
                s.guid for s in individual.sample_set.all()
            ]
            result['igvSampleGuids'] = [
                s.guid for s in individual.igvsample_set.all()
            ]

    kwargs = {
        'additional_model_fields':
        _get_case_review_fields(individuals[0], has_case_review_perm, user,
                                lambda indiv: indiv.family.project)
    }
    if project_guid or not skip_nested:
        nested_fields = [
            {
                'fields': ('family', 'guid'),
                'value': family_guid
            },
            {
                'fields': ('family', 'project', 'guid'),
                'key': 'projectGuid',
                'value': project_guid
            },
        ]
        if family_fields:
            for field in family_fields:
                nested_fields.append({
                    'fields': ('family', field),
                    'key': _to_camel_case(field)
                })
        kwargs.update({'nested_fields': nested_fields})
    else:
        kwargs['additional_model_fields'].append('family_id')

    if add_hpo_details:
        kwargs['additional_model_fields'] += [
            'features', 'absent_features', 'nonstandard_features',
            'absent_nonstandard_features'
        ]

    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    if 'case_review_status_last_modified_by' in kwargs[
            'additional_model_fields']:
        prefetch_related_objects(individuals,
                                 'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')
        prefetch_related_objects(individuals, 'igvsample_set')

    parsed_individuals = _get_json_for_models(individuals,
                                              user=user,
                                              is_analyst=is_analyst,
                                              process_result=_process_result,
                                              **kwargs)
    if add_hpo_details:
        all_hpo_ids = set()
        for i in parsed_individuals:
            all_hpo_ids.update(
                [feature['id'] for feature in i.get('features') or []])
            all_hpo_ids.update(
                [feature['id'] for feature in i.get('absentFeatures') or []])
        hpo_terms_by_id = {
            hpo.hpo_id: hpo
            for hpo in HumanPhenotypeOntology.objects.filter(
                hpo_id__in=all_hpo_ids)
        }
        for i in parsed_individuals:
            for feature in i.get('features') or []:
                hpo = hpo_terms_by_id.get(feature['id'])
                if hpo:
                    feature.update({
                        'category': hpo.category_id,
                        'label': hpo.name
                    })
            for feature in i.get('absentFeatures') or []:
                hpo = hpo_terms_by_id.get(feature['id'])
                if hpo:
                    feature.update({
                        'category': hpo.category_id,
                        'label': hpo.name
                    })

    return parsed_individuals
Пример #31
0
def _get_json_for_models(models,
                         nested_fields=None,
                         user=None,
                         is_analyst=None,
                         process_result=None,
                         guid_key=None,
                         additional_model_fields=None):
    """Returns an array JSON representations of the given models.

    Args:
        models (array): Array of django models
        user (object): Django User object for determining whether to include restricted/internal-only fields
        nested_fields (array): Optional array of fields to get from the model that are nested on related objects
        process_result (lambda): Optional function to post-process a given model json
        guid_key (string): Optional key to use for the model's guid
    Returns:
        array: json objects
    """

    if not models:
        return []

    model_class = type(models[0])
    fields = copy(model_class._meta.json_fields)
    if is_analyst is None:
        is_analyst = user and user_is_analyst(user)
    if is_analyst:
        fields += getattr(model_class._meta, 'internal_json_fields', [])
    if additional_model_fields:
        fields += additional_model_fields

    if 'created_by' in fields:
        prefetch_related_objects(models, 'created_by')
    for nested_field in nested_fields or []:
        if not nested_field.get('value'):
            prefetch_related_objects(models,
                                     '__'.join(nested_field['fields'][:-1]))

    results = []
    for model in models:
        result = {
            _to_camel_case(field): getattr(model, field)
            for field in fields
        }
        for nested_field in (nested_fields or []):
            field_value = nested_field.get('value')
            if not field_value:
                field_value = model
                for field in nested_field['fields']:
                    field_value = getattr(field_value,
                                          field) if field_value else None

            result[nested_field.get(
                'key', _to_camel_case('_'.join(
                    nested_field['fields'])))] = field_value

        if result.get('guid'):
            guid_key = guid_key or '{}{}Guid'.format(
                model_class.__name__[0].lower(), model_class.__name__[1:])
            result[guid_key] = result.pop('guid')
        if result.get('createdBy'):
            result['createdBy'] = result['createdBy'].get_full_name(
            ) or result['createdBy'].email
        if process_result:
            process_result(result, model)
        results.append(result)

    return results
Пример #32
0
def add_variants_dataset_handler(request, project_guid):
    """Create or update samples for the given variant dataset

    Args:
        request: Django request object
        project_guid (string): GUID of the project that should be updated

    HTTP POST
        Request body - should contain the following json structure:
        {
            'elasticsearchIndex': <String> (required)
            'ignoreExtraSamplesInCallset': <Boolean>
            'mappingFilePath':  <String>
        }

        Response body - will contain the following structure:

    """

    project = get_project_and_check_permissions(project_guid,
                                                request.user,
                                                permission_level=CAN_EDIT)
    request_json = json.loads(request.body)

    try:
        required_fields = ['elasticsearchIndex', 'datasetType']
        if any(field not in request_json for field in required_fields):
            raise ValueError('request must contain fields: {}'.format(
                ', '.join(required_fields)))
        elasticsearch_index = request_json['elasticsearchIndex'].strip()
        dataset_type = request_json['datasetType']
        if dataset_type not in Sample.DATASET_TYPE_LOOKUP:
            raise ValueError('Invalid dataset type "{}"'.format(dataset_type))

        sample_ids, index_metadata = get_elasticsearch_index_samples(
            elasticsearch_index, dataset_type=dataset_type)
        if not sample_ids:
            raise ValueError(
                'No samples found in the index. Make sure the specified caller type is correct'
            )
        validate_index_metadata(index_metadata,
                                project,
                                elasticsearch_index,
                                dataset_type=dataset_type)
        sample_type = index_metadata['sampleType']

        sample_id_to_individual_id_mapping = load_mapping_file(
            request_json['mappingFilePath']) if request_json.get(
                'mappingFilePath') else {}

        loaded_date = timezone.now()
        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=dataset_type,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping=
            sample_id_to_individual_id_mapping,
            loaded_date=loaded_date,
        )

        unmatched_samples = set(sample_ids) - set(
            matched_sample_id_to_sample_record.keys())

        if request_json.get('ignoreExtraSamplesInCallset'):
            if len(matched_sample_id_to_sample_record) == 0:
                raise Exception(
                    "None of the individuals or samples in the project matched the {} expected sample id(s)"
                    .format(len(sample_ids)))
        elif len(unmatched_samples) > 0:
            raise Exception(
                'Matches not found for ES sample ids: {}. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.'
                .format(", ".join(unmatched_samples)))

        prefetch_related_objects(matched_sample_id_to_sample_record.values(),
                                 'individual__family')
        included_families = {
            sample.individual.family
            for sample in matched_sample_id_to_sample_record.values()
        }

        missing_individuals = Individual.objects.filter(
            family__in=included_families,
            sample__is_active=True,
            sample__dataset_type=dataset_type,
        ).exclude(sample__in=matched_sample_id_to_sample_record.values()
                  ).select_related('family')
        missing_family_individuals = defaultdict(list)
        for individual in missing_individuals:
            missing_family_individuals[individual.family].append(individual)

        if missing_family_individuals:
            raise Exception(
                'The following families are included in the callset but are missing some family members: {}.'
                .format(', '.join(
                    sorted([
                        '{} ({})'.format(
                            family.family_id, ', '.join(
                                sorted(
                                    [i.individual_id
                                     for i in missing_indivs]))) for family,
                        missing_indivs in missing_family_individuals.items()
                    ]))))

        inactivate_sample_guids = _update_variant_samples(
            matched_sample_id_to_sample_record, elasticsearch_index,
            loaded_date, dataset_type)

    except Exception as e:
        traceback.print_exc()
        return create_json_response({'errors': [e.message or str(e)]},
                                    status=400)

    if not matched_sample_id_to_sample_record:
        return create_json_response({'samplesByGuid': {}})

    family_guids_to_update = [
        family.guid for family in included_families
        if family.analysis_status == Family.ANALYSIS_STATUS_WAITING_FOR_DATA
    ]
    Family.objects.filter(guid__in=family_guids_to_update).update(
        analysis_status=Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS)

    response_json = _get_samples_json(matched_sample_id_to_sample_record,
                                      inactivate_sample_guids, project_guid)
    response_json['familiesByGuid'] = {
        family_guid: {
            'analysisStatus': Family.ANALYSIS_STATUS_ANALYSIS_IN_PROGRESS
        }
        for family_guid in family_guids_to_update
    }

    return create_json_response(response_json)
Пример #33
0
def anvil_export(request, project_guid):
    if project_guid == 'all':
        project_guid = None

    if project_guid:
        projects_by_guid = {
            project_guid: Project.objects.get(guid=project_guid)
        }
    else:
        projects_by_guid = {
            p.guid: p
            for p in Project.objects.filter(
                projectcategory__name__iexact='anvil')
        }

    families = _get_over_year_loaded_project_families(
        projects_by_guid.values())
    prefetch_related_objects(families, 'individual_set')

    saved_variants_by_family = _get_saved_variants_by_family(
        projects_by_guid.values(), request.user)

    # Handle compound het genes
    compound_het_gene_id_by_family = {}
    for family_guid, saved_variants in saved_variants_by_family.items():
        if len(saved_variants) > 1:
            potential_compound_het_variants = [
                variant for variant in saved_variants if all(
                    gen['numAlt'] < 2 for gen in variant['genotypes'].values())
            ]
            main_gene_ids = {
                variant['mainTranscript']['geneId']
                for variant in potential_compound_het_variants
            }
            if len(main_gene_ids) > 1:
                # This occurs in compound hets where some hits have a primary transcripts in different genes
                for gene_id in main_gene_ids:
                    if all(gene_id in variant['transcripts']
                           for variant in potential_compound_het_variants):
                        compound_het_gene_id_by_family[family_guid] = gene_id

    individuals = set()
    for family in families:
        individuals.update(family.individual_set.all())
    rows = _get_json_for_individuals(
        list(individuals),
        project_guid=project_guid,
        family_fields=['family_id', 'coded_phenotype'])

    gene_ids = set()
    for row in rows:
        row['Project ID'] = projects_by_guid[row['projectGuid']].name

        saved_variants = saved_variants_by_family[row['familyGuid']]
        row['numSavedVariants'] = len(saved_variants)
        for i, variant in enumerate(saved_variants):
            genotype = variant['genotypes'].get(row['individualGuid'], {})
            if genotype.get('numAlt', -1) > 0:
                gene_id = compound_het_gene_id_by_family.get(
                    row['familyGuid']) or variant['mainTranscript']['geneId']
                gene_ids.add(gene_id)
                variant_fields = {
                    'Zygosity': 'heterozygous'
                    if genotype['numAlt'] == 1 else 'homozygous',
                    'Chrom': variant['chrom'],
                    'Pos': variant['pos'],
                    'Ref': variant['ref'],
                    'Alt': variant['alt'],
                    'hgvsc': variant['mainTranscript']['hgvsc'],
                    'hgvsp': variant['mainTranscript']['hgvsp'],
                    'Transcript': variant['mainTranscript']['transcriptId'],
                    'geneId': gene_id,
                }
                row.update({
                    '{} - {}'.format(k, i + 1): v
                    for k, v in variant_fields.items()
                })

    genes_by_id = get_genes(gene_ids)
    for row in rows:
        for key, gene_id in row.items():
            if key.startswith('geneId') and genes_by_id.get(gene_id):
                row[key.replace('geneId',
                                'Gene')] = genes_by_id[gene_id]['geneSymbol']

    return create_json_response({'anvilRows': rows})
Пример #34
0
def anvil_export(request, project_guid):
    if project_guid == 'all':
        project_guid = None

    if project_guid:
        projects_by_guid = {project_guid: Project.objects.get(guid=project_guid)}
    else:
        projects_by_guid = {p.guid: p for p in Project.objects.filter(projectcategory__name__iexact='anvil')}

    families = _get_over_year_loaded_project_families(projects_by_guid.values())
    prefetch_related_objects(families, 'individual_set')

    saved_variants_by_family = _get_saved_variants_by_family(projects_by_guid.values(), request.user)

    # Handle compound het genes
    compound_het_gene_id_by_family = {}
    for family_guid, saved_variants in saved_variants_by_family.items():
        if len(saved_variants) > 1:
            potential_compound_het_variants = [
                variant for variant in saved_variants if all(gen['numAlt'] < 2 for gen in variant['genotypes'].values())
            ]
            main_gene_ids = {variant['mainTranscript']['geneId'] for variant in potential_compound_het_variants}
            if len(main_gene_ids) > 1:
                # This occurs in compound hets where some hits have a primary transcripts in different genes
                for gene_id in main_gene_ids:
                    if all(gene_id in variant['transcripts'] for variant in potential_compound_het_variants):
                        compound_het_gene_id_by_family[family_guid] = gene_id

    individuals = set()
    for family in families:
        individuals.update(family.individual_set.all())
    rows = _get_json_for_individuals(list(individuals), project_guid=project_guid, family_fields=['family_id', 'coded_phenotype'])

    gene_ids = set()
    for row in rows:
        row['Project_ID'] = projects_by_guid[row['projectGuid']].name

        saved_variants = saved_variants_by_family[row['familyGuid']]
        row['numSavedVariants'] = len(saved_variants)
        for i, variant in enumerate(saved_variants):
            genotype = variant['genotypes'].get(row['individualGuid'], {})
            if genotype.get('numAlt', -1) > 0:
                gene_id = compound_het_gene_id_by_family.get(row['familyGuid']) or variant['mainTranscript']['geneId']
                gene_ids.add(gene_id)
                variant_fields = {
                    'Zygosity': 'heterozygous' if genotype['numAlt'] == 1 else 'homozygous',
                    'Chrom': variant['chrom'],
                    'Pos': variant['pos'],
                    'Ref': variant['ref'],
                    'Alt': variant['alt'],
                    'hgvsc': variant['mainTranscript']['hgvsc'],
                    'hgvsp': variant['mainTranscript']['hgvsp'],
                    'Transcript': variant['mainTranscript']['transcriptId'],
                    'geneId': gene_id,
                }
                row.update({'{}-{}'.format(k, i + 1): v for k, v in variant_fields.items()})

    genes_by_id = get_genes(gene_ids)
    for row in rows:
        for key, gene_id in row.items():
            if key.startswith('geneId') and genes_by_id.get(gene_id):
                row[key.replace('geneId', 'Gene')] = genes_by_id[gene_id]['geneSymbol']

    return create_json_response({'anvilRows': rows})
Пример #35
0
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field] for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))]

    index_fields = ['index', 'docs.count', 'store.size', 'creation.date.string']
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field] for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
        if index['index'] not in ['.kibana', 'index_operations_log']]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    mappings = Index('_all', using=client).get_mapping(doc_type='variant')

    latest_loaded_samples = get_latest_loaded_samples()
    prefetch_related_objects(latest_loaded_samples, 'individual__family__project')
    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in latest_loaded_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index_mapping = mappings[index_name]['mappings']['variant']
        index.update(index_mapping.get('_meta', {}))
        index['hasNestedGenotypes'] = 'samples_num_alt_1' in index_mapping['properties']

        projects_for_index = []
        for index_prefix in seqr_index_projects.keys():
            if index_name.startswith(index_prefix):
                projects_for_index += seqr_index_projects.pop(index_prefix).keys()
        index['projects'] = [{'projectGuid': project.guid, 'projectName': project.name} for project in projects_for_index]

    errors = ['{} does not exist and is used by project(s) {}'.format(
        index, ', '.join(['{} ({} samples)'.format(p.name, len(indivs)) for p, indivs in project_individuals.items()])
    ) for index, project_individuals in seqr_index_projects.items() if project_individuals]

    # TODO remove once all projects are switched off of mongo
    all_mongo_samples = Sample.objects.filter(
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_status=Sample.SAMPLE_STATUS_LOADED,
        elasticsearch_index__isnull=True,
    ).exclude(individual__family__project__in=es_projects).prefetch_related('individual', 'individual__family__project')
    mongo_sample_individual_max_loaded_date = {
        agg['individual__guid']: agg['max_loaded_date'] for agg in
        all_mongo_samples.values('individual__guid').annotate(max_loaded_date=Max('loaded_date'))
    }
    mongo_project_samples = defaultdict(set)
    for s in all_mongo_samples:
        if s.loaded_date == mongo_sample_individual_max_loaded_date[s.individual.guid]:
            mongo_project_samples[s.individual.family.project].add(s.dataset_file_path)
    mongo_projects = [{'projectGuid': project.guid, 'projectName': project.name, 'sourceFilePaths': sample_file_paths}
                      for project, sample_file_paths in mongo_project_samples.items()]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'mongoProjects': mongo_projects,
        'errors': errors,
    })
Пример #36
0
def get_json_for_genes(genes, user=None, add_dbnsfp=False, add_omim=False, add_constraints=False, add_notes=False,
                       add_expression=False, add_primate_ai=False, add_mgi=False):
    """Returns a JSON representation of the given list of GeneInfo.

    Args:
        genes (array): array of django models for the GeneInfo.
    Returns:
        array: array of json objects
    """
    total_gene_constraints = GeneConstraint.objects.count()
    if add_notes:
        gene_notes_json = get_json_for_gene_notes_by_gene_id([gene.gene_id for gene in genes], user)

    def _add_total_constraint_count(result, *args):
        result['totalGenes'] = total_gene_constraints

    def _process_result(result, gene):
        if add_dbnsfp:
            # prefetching only works with all()
            dbnsfp = next((dbnsfp for dbnsfp in gene.dbnsfpgene_set.all()), None)
            if dbnsfp:
                result.update(_get_json_for_model(dbnsfp))
            else:
                result.update(_get_empty_json_for_model(dbNSFPGene))
        if add_primate_ai:
            # prefetching only works with all()
            primate_ai = next((primate_ai for primate_ai in gene.primateai_set.all()), None)
            if primate_ai:
                result['primateAi'] = _get_json_for_model(primate_ai)
        if add_mgi:
            # prefetching only works with all()
            mgi = next((mgi for mgi in gene.mgi_set.all()), None)
            result['mgiMarkerId'] = mgi.marker_id if mgi else None
        if add_omim:
            omim_phenotypes = _get_json_for_models(gene.omim_set.all())
            result['omimPhenotypes'] = [phenotype for phenotype in omim_phenotypes if phenotype['phenotypeMimNumber']]
            result['mimNumber'] = omim_phenotypes[0]['mimNumber'] if omim_phenotypes else None
        if add_constraints:
            constraint = next((constraint for constraint in gene.geneconstraint_set.all()), None)
            result['constraints'] = _get_json_for_model(constraint, process_result=_add_total_constraint_count) if constraint else {}
        if add_notes:
            result['notes'] = gene_notes_json.get(result['geneId'], [])
        if add_expression:
            result['expression'] = {ge.tissue_type: ge.expression_values for ge in gene.geneexpression_set.all()}

    if add_dbnsfp:
        prefetch_related_objects(genes, 'dbnsfpgene_set')
    if add_omim:
        prefetch_related_objects(genes, 'omim_set')
    if add_constraints:
        prefetch_related_objects(genes, Prefetch('geneconstraint_set', queryset=GeneConstraint.objects.order_by('-mis_z', '-pLI')))
    if add_primate_ai:
        prefetch_related_objects(genes, 'primateai_set')
    if add_mgi:
        prefetch_related_objects(genes, 'mgi_set')
    if add_expression:
        prefetch_related_objects(genes, 'geneexpression_set')

    return _get_json_for_models(genes, process_result=_process_result)
 def test_unknown(self):
     book1 = Book.objects.get(id=self.book1.id)
     with self.assertRaises(AttributeError):
         prefetch_related_objects([book1], 'unknown_attribute')
Пример #38
0
 def get_object(self, queryset=None):
     object = super(PrefetchedSingleObjectMixin, self).get_object(queryset)
     prefetch_related_objects([object], *self.prefetch_related_lookups)
     return object
Пример #39
0
def collectQuizTasksForTopic(articles=None, topic=None, project=None):
    taskList = []

    # getTopicTree returns the topic with all levels of its subtopic tree
    topictree = topic.getTopicTree()

    # Prefetching uses one query per related table to populate caches.
    # This helps us avoid per row queries when looping over rows.
    prefetch_related_objects(topictree, "questions__answers")

    # Set up the prefetch to retrieve all available hints for each article
    allHints = NLPHints.objects.all()
    fetchHints = Prefetch("hints", queryset=allHints, to_attr="allHints")
    logger.info("Found %d hints" % (len(allHints), ))

    # Set up Prefetch that will cache just the highlights matching
    # this topic to article.highlight_taskruns[n].highlightsForTopic
    topicHighlights = (HighlightGroup.objects.filter(
        topic=topic).prefetch_related("submitted_answers"))
    fetchHighlights = Prefetch("highlight_taskruns__highlights",
                               queryset=topicHighlights,
                               to_attr="highlightsForTopic")
    # Find articles highlighted with the topic within the provided queryset
    # distinct is essential after prefetch_related chained method
    articles = (articles.filter(highlight_taskruns__highlights__topic=topic).
                prefetch_related(fetchHighlights).prefetch_related(
                    fetchHints).order_by("id").distinct())

    project_data = ProjectSerializer(project, many=False).data
    topictree_data = TopicSerializer(topictree, many=True).data

    # With the prefetching config above, the loops below will
    # be hitting caches. Only 8 queries should be issued against 8 tables,
    # i.e. The query count will not be a function of number of rows returned.
    for article in articles:
        # Our prefetched highlightsForTopic is nested under
        # the ArticleHightlight record, in HighlightGroup
        # Not expecting more than one ArticleHighlight record
        # but safest to code as if there could be more than one.

        highlights = [
            hg for ah in article.highlight_taskruns.all()
            for hg in ah.highlightsForTopic
        ]
        # At this point, we are processing one topic for one article
        # All the highlights for a given topic/case need to be in one task.
        # Need to sort here instead of the above prefetch because we want
        # to ignore the potential grouping effect if there was more than one
        # ArticleHighlight in above list comprehension
        # See data.pybossa_api.save_highlight_taskrun for import code
        sortkey = lambda x: x.case_number
        hg_by_case = sorted(highlights, key=sortkey)

        for case_number, hg_case_group in groupby(hg_by_case, key=sortkey):
            taskList.append({
                "project":
                project_data,
                "topTopicId":
                topic.id,
                "topictree":
                topictree_data,
                "article":
                ArticleSerializer(article, many=False).data,
                "highlights":
                HighlightGroupSerializer(hg_case_group, many=True).data,
                "hints":
                NLPHintSerializer(article.allHints, many=True).data,
            })

    return taskList
Пример #40
0
    def get_attrs(self, item_list, user, **kwargs):
        alert_rules = {item.id: item for item in item_list}
        prefetch_related_objects(item_list, "snuba_query__environment")

        result = defaultdict(dict)
        triggers = AlertRuleTrigger.objects.filter(alert_rule__in=item_list).order_by("label")
        serialized_triggers = serialize(list(triggers))
        for trigger, serialized in zip(triggers, serialized_triggers):
            alert_rule_triggers = result[alert_rules[trigger.alert_rule_id]].setdefault(
                "triggers", []
            )
            alert_rule_triggers.append(serialized)

        alert_rule_projects = AlertRule.objects.filter(
            id__in=[item.id for item in item_list]
        ).values_list("id", "snuba_query__subscriptions__project__slug")
        for alert_rule_id, project_slug in alert_rule_projects:
            rule_result = result[alert_rules[alert_rule_id]].setdefault("projects", [])
            rule_result.append(project_slug)

        for rule_activity in AlertRuleActivity.objects.filter(
            alert_rule__in=item_list, type=AlertRuleActivityType.CREATED.value
        ).select_related("alert_rule", "user"):
            if rule_activity.user:
                user = {
                    "id": rule_activity.user.id,
                    "name": rule_activity.user.get_display_name(),
                    "email": rule_activity.user.email,
                }
            else:
                user = None

            result[alert_rules[rule_activity.alert_rule.id]].update({"created_by": user})

        resolved_actors = {}
        owners_by_type = defaultdict(list)
        for item in item_list:
            if item.owner_id is not None:
                owners_by_type[actor_type_to_string(item.owner.type)].append(item.owner_id)

        for k, v in ACTOR_TYPES.items():
            resolved_actors[k] = {
                a.actor_id: a.id
                for a in actor_type_to_class(v).objects.filter(actor_id__in=owners_by_type[k])
            }

        for alert_rule in alert_rules.values():
            if alert_rule.owner_id:
                type = actor_type_to_string(alert_rule.owner.type)
                if alert_rule.owner_id in resolved_actors[type]:
                    result[alert_rule][
                        "owner"
                    ] = f"{type}:{resolved_actors[type][alert_rule.owner_id]}"

        if "original_alert_rule" in self.expand:
            snapshot_activities = AlertRuleActivity.objects.filter(
                alert_rule__in=item_list,
                type=AlertRuleActivityType.SNAPSHOT.value,
            )
            for activity in snapshot_activities:
                result[alert_rules[activity.alert_rule_id]][
                    "originalAlertRuleId"
                ] = activity.previous_alert_rule_id

        return result
Пример #41
0
 def get_user_group_values(self, user) -> List[str]:
     if user.is_anonymous:
         return []
     prefetch_related_objects([user], "groups")
     return [g.name for g in user.groups.all()]
Пример #42
0
def _get_json_for_individuals(individuals,
                              user=None,
                              project_guid=None,
                              family_guid=None,
                              add_sample_guids_field=False,
                              family_fields=None,
                              skip_nested=False):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """
    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(
            modified_by, 'email') else modified_by

    def _load_phenotips_data(phenotips_data):
        phenotips_json = None
        if phenotips_data:
            try:
                phenotips_json = json.loads(phenotips_data)
            except Exception as e:
                logger.error("Couldn't parse phenotips: {}".format(e))
        return phenotips_json

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy':
            _get_case_review_status_modified_by(
                result.get('caseReviewStatusLastModifiedBy')),
            'phenotipsData':
            _load_phenotips_data(result['phenotipsData']),
            'maternalGuid':
            mother.guid if mother else None,
            'paternalGuid':
            father.guid if father else None,
            'maternalId':
            mother.individual_id if mother else None,
            'paternalId':
            father.individual_id if father else None,
            'displayName':
            result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [
                s.guid for s in individual.sample_set.all()
            ]

    if project_guid or not skip_nested:
        nested_fields = [
            {
                'fields': ('family', 'guid'),
                'value': family_guid
            },
            {
                'fields': ('family', 'project', 'guid'),
                'key': 'projectGuid',
                'value': project_guid
            },
        ]
        if family_fields:
            for field in family_fields:
                nested_fields.append({
                    'fields': ('family', field),
                    'key': _to_camel_case(field)
                })
        kwargs = {'nested_fields': nested_fields}
    else:
        kwargs = {'additional_model_fields': ['family_id']}

    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    prefetch_related_objects(individuals,
                             'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')

    return _get_json_for_models(individuals,
                                user=user,
                                process_result=_process_result,
                                **kwargs)
Пример #43
0
    def handle(self, *args, **options):
        """transfer project"""
        project_arg = options['project']
        elasticsearch_index = options['es_index']

        project = Project.objects.get(
            Q(name=project_arg) | Q(guid=project_arg))
        logger.info('Updating project genome version for {}'.format(
            project.name))

        # Validate the provided index
        logger.info('Validating es index {}'.format(elasticsearch_index))
        sample_ids, index_metadata = get_elasticsearch_index_samples(
            elasticsearch_index)
        validate_index_metadata(index_metadata,
                                project,
                                elasticsearch_index,
                                genome_version=GENOME_VERSION_GRCh38)
        sample_type = index_metadata['sampleType']
        dataset_path = index_metadata['sourceFilePath']

        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping={},
        )

        unmatched_samples = set(sample_ids) - set(
            matched_sample_id_to_sample_record.keys())
        if len(unmatched_samples) > 0:
            raise CommandError(
                'Matches not found for ES sample ids: {}.'.format(
                    ', '.join(unmatched_samples)))

        prefetch_related_objects(matched_sample_id_to_sample_record.values(),
                                 'individual__family')
        included_families = {
            sample.individual.family
            for sample in matched_sample_id_to_sample_record.values()
        }
        missing_individuals = Individual.objects.filter(
            family__in=included_families,
            sample__is_active=True,
            sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        ).exclude(sample__in=matched_sample_id_to_sample_record.values()
                  ).select_related('family')
        missing_family_individuals = defaultdict(list)
        for individual in missing_individuals:
            missing_family_individuals[individual.family].append(individual)

        if missing_family_individuals:
            raise CommandError(
                'The following families are included in the callset but are missing some family members: {}.'
                .format(', '.join([
                    '{} ({})'.format(
                        family.family_id,
                        ', '.join([i.individual_id for i in missing_indivs]))
                    for family, missing_indivs in
                    missing_family_individuals.items()
                ])))

        # Get and clean up expected saved variants
        saved_variant_models_by_guid = {
            v.guid: v
            for v in SavedVariant.objects.filter(family__project=project)
        }
        deleted_no_tags = set()
        for guid, variant in saved_variant_models_by_guid.items():
            if not (variant.varianttag_set.count()
                    or variant.variantnote_set.count()):
                deleted_no_tags.add(guid)

        if deleted_no_tags:
            if raw_input(
                    'Do you want to delete the following {} saved variants with no tags (y/n)?: {} '
                    .format(len(deleted_no_tags),
                            ', '.join(deleted_no_tags))) == 'y':
                for guid in deleted_no_tags:
                    saved_variant_models_by_guid.pop(guid).delete()
                logger.info('Deleted {} variants'.format(len(deleted_no_tags)))

        expected_families = {
            sv.family
            for sv in saved_variant_models_by_guid.values()
        }
        missing_families = expected_families - included_families
        if missing_families:
            raise CommandError(
                'The following families have saved variants but are missing from the callset: {}.'
                .format(', '.join([f.family_id for f in missing_families])))

        # Lift-over saved variants
        _update_variant_samples(matched_sample_id_to_sample_record,
                                elasticsearch_index, dataset_path)
        saved_variants = get_json_for_saved_variants(
            saved_variant_models_by_guid.values(), add_details=True)
        saved_variants_to_lift = [
            v for v in saved_variants
            if v['genomeVersion'] != GENOME_VERSION_GRCh38
        ]

        num_already_lifted = len(saved_variants) - len(saved_variants_to_lift)
        if num_already_lifted:
            if raw_input(
                    'Found {} saved variants already on Hg38. Continue with liftover (y/n)? '
                    .format(num_already_lifted)) != 'y':
                raise CommandError(
                    'Error: found {} saved variants already on Hg38'.format(
                        num_already_lifted))
        logger.info(
            'Lifting over {} variants (skipping {} that are already lifted)'.
            format(len(saved_variants_to_lift), num_already_lifted))

        liftover_to_38 = LiftOver('hg19', 'hg38')
        hg37_to_hg38_xpos = {}
        lift_failed = {}
        for v in saved_variants_to_lift:
            if not (hg37_to_hg38_xpos.get(v['xpos'])
                    or v['xpos'] in lift_failed):
                hg38_coord = liftover_to_38.convert_coordinate(
                    'chr{}'.format(v['chrom'].lstrip('chr')), int(v['pos']))
                if hg38_coord and hg38_coord[0]:
                    hg37_to_hg38_xpos[v['xpos']] = get_xpos(
                        hg38_coord[0][0], hg38_coord[0][1])
                else:
                    lift_failed[v['xpos']] = v

        if lift_failed:
            if raw_input(
                    'Unable to lift over the following {} coordinates. Continue with update (y/n)?: {} '
                    .format(
                        len(lift_failed), ', '.join([
                            '{}:{}-{}-{} ({})'.format(
                                v['chrom'], v['pos'], v['ref'], v['alt'],
                                ', '.join(v['familyGuids']))
                            for v in lift_failed.values()
                        ]))) != 'y':
                raise CommandError(
                    'Error: unable to lift over {} variants'.format(
                        len(lift_failed)))

        saved_variants_map = defaultdict(list)
        for v in saved_variants_to_lift:
            if hg37_to_hg38_xpos.get(v['xpos']):
                variant_model = saved_variant_models_by_guid[v['variantGuid']]
                saved_variants_map[(hg37_to_hg38_xpos[v['xpos']], v['ref'],
                                    v['alt'])].append(variant_model)

        es_variants = get_es_variants_for_variant_tuples(
            expected_families, saved_variants_map.keys())

        missing_variants = set(
            saved_variants_map.keys()) - {(v['xpos'], v['ref'], v['alt'])
                                          for v in es_variants}
        if missing_variants:
            missing_variant_strings = []
            for xpos, ref, alt in missing_variants:
                var_id = '{}-{}-{}'.format(xpos, ref, alt)
                for v in saved_variants_map[(xpos, ref, alt)]:
                    tags = v.varianttag_set.all()
                    notes = v.variantnote_set.all()
                    missing_variant_strings.append(
                        '{var_id} {family_id}: {tags} ({guid})'.format(
                            var_id=var_id,
                            family_id=v.family.family_id,
                            guid=v.guid,
                            tags=', '.join([
                                tag.variant_tag_type.name for tag in tags
                            ]) if tags else 'No Tags; {}'.format('; '.join(
                                [note.note for note in notes]))))
            if raw_input(
                    'Unable to find the following {} variants in the index. Continue with update (y/n)?:\n{}\n'
                    .format(len(missing_variants),
                            '\n'.join(missing_variant_strings))) != 'y':
                raise CommandError(
                    'Error: unable to find {} lifted-over variants'.format(
                        len(missing_variants)))

        logger.info('Successfully lifted over {} variants'.format(
            len(es_variants)))

        #  Update saved variants
        missing_family_count = 0
        for var in es_variants:
            saved_variant_models = saved_variants_map[(var['xpos'], var['ref'],
                                                       var['alt'])]
            missing_saved_variants = [
                v for v in saved_variant_models
                if v.family.guid not in var['familyGuids']
            ]
            if missing_saved_variants:
                variant_id = '{}-{}-{}-{}'.format(var['chrom'], var['pos'],
                                                  var['ref'], var['alt'])
                if raw_input(
                    ('Variant {} (hg37: {}) not find for expected families {}. Continue with update (y/n)? '
                     .format(
                         variant_id, missing_saved_variants[0].xpos,
                         ', '.join([
                             '{} ({})'.format(v.family.guid, v.guid)
                             for v in missing_saved_variants
                         ])))) == 'y':
                    var = get_single_es_variant(
                        [v.family for v in saved_variant_models],
                        variant_id,
                        return_all_queried_families=True)
                    missing_family_count += len(missing_saved_variants)
                else:
                    raise CommandError(
                        'Error: unable to find family data for lifted over variant'
                    )
            for saved_variant in saved_variant_models:
                saved_variant.xpos_start = var['xpos']
                saved_variant.saved_variant_json = var
                saved_variant.save()

        logger.info('Successfully updated {} variants'.format(
            len(es_variants)))

        # Update project and sample data
        update_model_from_json(project,
                               {'genome_version': GENOME_VERSION_GRCh38})

        reset_cached_search_results(project)

        logger.info('---Done---')
        logger.info(
            'Succesfully lifted over {} variants. Skipped {} failed variants. Family data not updated for {} variants'
            .format(len(es_variants),
                    len(missing_variants) + len(lift_failed),
                    missing_family_count))
Пример #44
0
    def get_elasticsearch_variants(
            self,
            project_id,
            family_id=None,
            variant_filter=None,
            genotype_filter=None,
            variant_id_filter=None,
            quality_filter=None,
            indivs_to_consider=None,
            include_all_consequences=False,
            user=None,
            max_results_limit=settings.VARIANT_QUERY_RESULTS_LIMIT,
        ):
        from xbrowse_server.base.models import Project, Family, Individual
        from seqr.models import Sample
        from seqr.utils.es_utils import _liftover_grch38_to_grch37
        from xbrowse_server.mall import get_reference

        redis_client = None
        if settings.REDIS_SERVICE_HOSTNAME:
            try:
                redis_client = redis.StrictRedis(host=settings.REDIS_SERVICE_HOSTNAME, socket_connect_timeout=3)
                redis_client.ping()
            except redis.exceptions.TimeoutError as e:
                logger.warn("Unable to connect to redis host: {}".format(settings.REDIS_SERVICE_HOSTNAME) + str(e))
                redis_client = None

        cache_key = "Variants___%s___%s___%s" % (
            project_id,
            family_id,
            json.dumps([
                variant_filter.toJSON() if variant_filter else None,
                genotype_filter,
                quality_filter,
                variant_id_filter,
                indivs_to_consider,
                include_all_consequences,
            ])
        )

        cached_results = redis_client and redis_client.get(cache_key)
        if cached_results is not None:
            variant_results = json.loads(cached_results)
            return [Variant.fromJSON(variant_json) for variant_json in variant_results]

        if family_id is None:
            project = Project.objects.get(project_id=project_id)
            elasticsearch_index = project.get_elasticsearch_index()
            logger.info("Searching in project elasticsearch index: " + str(elasticsearch_index))
        else:
            family = Family.objects.get(project__project_id=project_id, family_id=family_id)
            elasticsearch_index = family.get_elasticsearch_index()
            project = family.project
            logger.info("Searching in family elasticsearch index: " + str(elasticsearch_index))

        if indivs_to_consider is None and genotype_filter and not family_id:
            indivs_to_consider = genotype_filter.keys()

        individuals = Individual.objects.filter(family__project__project_id=project_id).only("indiv_id", "seqr_individual")
        if indivs_to_consider:
            individuals = individuals.filter(indiv_id__in=indivs_to_consider)
        if family_id is not None:
            individuals = individuals.filter(family__family_id=family_id)
            if not indivs_to_consider:
                indivs_to_consider = [i.indiv_id for i in individuals]
        prefetch_related_objects(individuals, "seqr_individual")

        es_indices = [index.rstrip('*') for index in elasticsearch_index.split(',')]

        samples = Sample.objects.filter(
            individual__in=[i.seqr_individual for i in individuals if i.seqr_individual],
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            sample_status=Sample.SAMPLE_STATUS_LOADED,
            elasticsearch_index__startswith=es_indices[0],
            loaded_date__isnull=False,
        ).order_by('-loaded_date')
        prefetch_related_objects(samples, "individual")

        family_individual_ids_to_sample_ids = {}
        for i in individuals:
            indiv_id = i.indiv_id
            sample_id = None
            if i.seqr_individual:
                sample_id = next((
                    sample.sample_id for sample in samples
                    if sample.individual == i.seqr_individual and sample.elasticsearch_index.startswith(tuple(es_indices))
                ), None)
            family_individual_ids_to_sample_ids[indiv_id] = sample_id or indiv_id

        query_json = self._make_db_query(genotype_filter, variant_filter)

        es_client = elasticsearch.Elasticsearch(host=settings.ELASTICSEARCH_SERVICE_HOSTNAME, timeout=30)
        mapping = es_client.indices.get_mapping(str(elasticsearch_index) + "*")
        index_fields = {}
        is_parent_child = False
        is_nested = False
        if elasticsearch_index in mapping and 'join_field' in mapping[elasticsearch_index]["mappings"]["variant"]["properties"]:
            # Nested indices are not sharded so all samples are in the single index
            logger.info("matching indices: " + str(elasticsearch_index))
            is_parent_child = True
        elif elasticsearch_index in mapping and 'genotypes' in mapping[elasticsearch_index]["mappings"]["variant"]["properties"]:
            # Nested indices are not sharded so all samples are in the single index
            logger.info("matching indices: " + str(elasticsearch_index))
            is_nested = True
        elif family_id is not None and len(family_individual_ids_to_sample_ids) > 0:
            # figure out which index to use
            # TODO add caching

            matching_indices = []

            for raw_sample_id in family_individual_ids_to_sample_ids.values():
                sample_id = _encode_name(raw_sample_id)
                for index_name, index_mapping in mapping.items():
                    if sample_id+"_num_alt" in index_mapping["mappings"]["variant"]["properties"]:
                        matching_indices.append(index_name)
                        index_fields.update(index_mapping["mappings"]["variant"]["properties"])
                if len(matching_indices) > 0:
                    break

            if not matching_indices:
                if family_id is not None and not family_individual_ids_to_sample_ids:
                    logger.error("no individuals found for family %s" % (family_id))
                elif not mapping:
                    logger.error("no es mapping found for found with prefix %s" % (elasticsearch_index))
                else:
                    logger.error("%s not found in %s:\n%s" % (indiv_id, elasticsearch_index, pformat(index_mapping["mappings"]["variant"]["properties"])))
            else:
                elasticsearch_index = ",".join(matching_indices)
                logger.info("matching indices: " + str(elasticsearch_index))
        else:
            elasticsearch_index = str(elasticsearch_index)+"*"
                
        if not index_fields:
            for index_mapping in mapping.values():
                index_fields.update(index_mapping["mappings"]["variant"]["properties"])

        s = elasticsearch_dsl.Search(using=es_client, index=elasticsearch_index) #",".join(indices))

        if variant_id_filter is not None:
            variant_id_filter_term = None
            for variant_id in variant_id_filter:
                q_obj = Q('term', **{"variantId": variant_id})
                if variant_id_filter_term is None:
                    variant_id_filter_term = q_obj
                else:
                    variant_id_filter_term |= q_obj
            s = s.filter(variant_id_filter_term)

        genotype_filters = {}
        for key, value in query_json.items():
            if key.startswith("genotypes"):
                indiv_id = ".".join(key.split(".")[1:-1])
                sample_id = family_individual_ids_to_sample_ids.get(indiv_id) or indiv_id
                genotype_filter = value
                if type(genotype_filter) == int or type(genotype_filter) == basestring:
                    genotype_filters[sample_id] = [('term', genotype_filter)]
                elif '$gte' in genotype_filter:
                    genotype_filter = {k.replace("$", ""): v for k, v in genotype_filter.items()}
                    genotype_filters[sample_id] = [('range', genotype_filter)]
                elif "$in" in genotype_filter:
                    num_alt_values = genotype_filter['$in']
                    genotype_filters[sample_id] = [('term', num_alt_value) for num_alt_value in num_alt_values]

        sample_ids = [family_individual_ids_to_sample_ids.get(indiv_id) or indiv_id for indiv_id in (indivs_to_consider or [])]

        min_ab = None
        min_gq = None
        if quality_filter is not None and indivs_to_consider:
            min_ab = quality_filter.get('min_ab')
            if min_ab is not None and not is_nested:
                min_ab /= 100.0  # convert to fraction
            min_gq = quality_filter.get('min_gq')
            vcf_filter = quality_filter.get('vcf_filter')
            if vcf_filter is not None:
                s = s.filter(~Q('exists', field='filters'))

        if is_parent_child:
            quality_q = Q()
            if min_ab or min_gq:
                if min_ab is not None:
                    #  AB only relevant for hets
                    quality_q &= Q(~Q('term', num_alt=1) | Q('range', ab={'gte': min_ab}))
                if min_gq is not None:
                    quality_q &= Q('range', gq={'gte': min_gq})

            if genotype_filters:
                # Return inner hits for all requested samples, even those without a specified genotype
                genotype_sample_ids = sample_ids or genotype_filters.keys()
                genotype_q = None
                for sample_id in genotype_sample_ids:
                    sample_q = Q(Q('term', sample_id=sample_id) & quality_q)
                    if genotype_filters.get(sample_id):
                        q = None
                        for (op, val) in genotype_filters[sample_id]:
                            if q:
                                q |= Q(op, num_alt=val)
                            else:
                                q = Q(op, num_alt=val)
                        sample_q &= q
                    if not genotype_q:
                        genotype_q = sample_q
                    else:
                        genotype_q |= sample_q
                genotype_kwargs = {'query': genotype_q, 'min_children': len(genotype_sample_ids)}
            elif sample_ids:
                # Subquery for child docs with the requested sample IDs and quality metrics
                sample_id_q = Q('terms', sample_id=sample_ids) & quality_q
                # Only return variants where at least one of the requested samples has an alt allele
                s = s.filter(Q('has_child', type='genotype', query=(Q(Q('range', num_alt={'gte': 1}) & sample_id_q))))
                # Return inner hits for all the requested samples regardless of genotype
                genotype_kwargs = {'query': sample_id_q, 'min_children': len(sample_ids)}
            else:
                # Return all inner hits for the variant
                # This case is only used by gene search, which also does not use quality filters
                genotype_kwargs = {'query': Q()}

            s = s.filter(Q('has_child', type='genotype',
                           inner_hits={'size': genotype_kwargs.get('min_children', MAX_INNER_HITS)}, **genotype_kwargs))

        if is_nested:
            if sample_ids and min_ab is not None:
                min_ab_filter_val = int(min_ab) - int(min_ab % 5)
                for sample_id in sample_ids:
                    q = Q('term', samples_ab_0_to_5=sample_id)
                    for i in range(5, min_ab_filter_val, 5):
                        q = q | Q('term', **{'samples_ab_{}_to_{}'.format(i, i+5): sample_id})
                    #  AB only relevant for hets
                    s = s.filter(~Q(q) | ~Q('term', samples_num_alt_1=sample_id))
            if sample_ids and min_gq is not None:
                min_gq_filter_val = int(min_gq) - int(min_gq % 5)
                for sample_id in sample_ids:
                    q = Q('term', samples_gq_0_to_5=sample_id)
                    for i in range(5, min_gq_filter_val, 5):
                        q = q | Q('term', **{'samples_gq_{}_to_{}'.format(i, i+5): sample_id})
                    s = s.filter(~Q(q))

            if genotype_filters:
                for sample_id, queries in genotype_filters.items():
                    if queries[0][0] == 'range':
                        allowed_num_alt = range(queries[0][1]['gte'], 3)
                    else:
                        allowed_num_alt = [query[1] for query in queries]

                    if 0 in allowed_num_alt:
                        q = Q('term', samples_no_call=sample_id)
                        if 1 not in allowed_num_alt:
                            q = q | Q('term', samples_num_alt_1=sample_id)
                        if 2 not in allowed_num_alt:
                            q = q | Q('term', samples_num_alt_2=sample_id)
                        s = s.filter(~q)
                    else:
                        q = Q('term', **{'samples_num_alt_{}'.format(allowed_num_alt[0]): sample_id})
                        for num_alt in allowed_num_alt[1:]:
                            q = q | Q('term', **{'samples_num_alt_{}'.format(num_alt): sample_id})
                        s = s.filter(q)

            elif sample_ids:
                s = s.filter(Q('terms', samples_num_alt_1=sample_ids) | Q('terms', samples_num_alt_2=sample_ids))

        else:
            for sample_id, queries in genotype_filters.items():
                encoded_sample_id = _encode_name(sample_id)
                q = Q(queries[0][0], **{encoded_sample_id + "_num_alt": queries[0][1]})
                for (op, val) in queries[1:]:
                    q = q | Q(op, **{encoded_sample_id + "_num_alt": val})
                s = s.filter(q)

            if sample_ids:
                atleast_one_nonref_genotype_filter = None
                for sample_id in sample_ids:
                    encoded_sample_id = _encode_name(sample_id)
                    q = Q('range', **{encoded_sample_id+"_num_alt": {'gte': 1}})
                    if atleast_one_nonref_genotype_filter is None:
                        atleast_one_nonref_genotype_filter = q
                    else:
                        atleast_one_nonref_genotype_filter |= q

                s = s.filter(atleast_one_nonref_genotype_filter)

            if min_ab or min_gq:
                for sample_id in sample_ids:
                    encoded_sample_id = _encode_name(sample_id)

                    if min_ab:
                        s = s.filter(
                            ~Q('term', **{encoded_sample_id+"_num_alt": 1}) |
                            Q('range', **{encoded_sample_id+"_ab": {'gte': min_ab}}))
                        #logger.info("### ADDED FILTER: " + str({encoded_sample_id+"_ab": {'gte': min_ab}}))
                    if min_gq:
                        s = s.filter('range', **{encoded_sample_id+"_gq": {'gte': min_gq}})
                        #logger.info("### ADDED FILTER: " + str({encoded_sample_id+"_gq": {'gte': min_gq}}))

        # parse variant query
        annotation_groups_map = ANNOTATION_GROUPS_MAP_INTERNAL if user and user.is_staff else ANNOTATION_GROUPS_MAP

        for key, value in query_json.items():
            if key == 'db_tags':
                so_annotations = query_json.get('db_tags', {}).get('$in', [])

                # handle clinvar filters
                selected_so_annotations_set = set(so_annotations)

                all_clinvar_filters_set = set(annotation_groups_map.get("clinvar", {}).get("children", []))
                selected_clinvar_filters_set = all_clinvar_filters_set & selected_so_annotations_set

                all_hgmd_filters_set = set(annotation_groups_map.get("hgmd", {}).get("children", []))
                selected_hgmd_filters_set = all_hgmd_filters_set & selected_so_annotations_set

                vep_consequences = list(selected_so_annotations_set - selected_clinvar_filters_set - selected_hgmd_filters_set)
                consequences_filter = Q("terms", transcriptConsequenceTerms=vep_consequences)

                if selected_clinvar_filters_set:
                    clinvar_clinical_significance_terms = set()
                    for clinvar_filter in selected_clinvar_filters_set:
                        # translate selected filters to the corresponding clinvar clinical consequence terms
                        if clinvar_filter == "pathogenic":
                            clinvar_clinical_significance_terms.update(["Pathogenic", "Pathogenic/Likely_pathogenic"])
                        elif clinvar_filter == "likely_pathogenic":
                            clinvar_clinical_significance_terms.update(["Likely_pathogenic", "Pathogenic/Likely_pathogenic"])
                        elif clinvar_filter == "benign":
                            clinvar_clinical_significance_terms.update(["Benign", "Benign/Likely_benign"])
                        elif clinvar_filter == "likely_benign":
                            clinvar_clinical_significance_terms.update(["Likely_benign", "Benign/Likely_benign"])
                        elif clinvar_filter == "vus_or_conflicting":
                            clinvar_clinical_significance_terms.update([
                                "Conflicting_interpretations_of_pathogenicity",
                                "Uncertain_significance",
                                "not_provided",
                                "other"])
                        else:
                            raise ValueError("Unexpected clinvar filter: " + str(clinvar_filter))

                    consequences_filter = consequences_filter | Q("terms", clinvar_clinical_significance=list(clinvar_clinical_significance_terms))

                if selected_hgmd_filters_set:
                    hgmd_class = set()
                    for hgmd_filter in selected_hgmd_filters_set:
                        # translate selected filters to the corresponding hgmd clinical consequence terms
                        if hgmd_filter == "disease_causing":
                            hgmd_class.update(["DM"])
                        elif hgmd_filter == "likely_disease_causing":
                            hgmd_class.update(["DM?"])
                        elif hgmd_filter == "hgmd_other":
                            hgmd_class.update(["DP", "DFP", "FP", "FTV"])
                        else:
                            raise ValueError("Unexpected hgmd filter: " + str(hgmd_filter))

                    consequences_filter = consequences_filter | Q("terms", hgmd_class=list(hgmd_class))

                if 'intergenic_variant' in vep_consequences:
                    # for many intergenic variants VEP doesn't add any annotations, so if user selected 'intergenic_variant', also match variants where transcriptConsequenceTerms is emtpy
                    consequences_filter = consequences_filter | ~Q('exists', field='transcriptConsequenceTerms')

                s = s.filter(consequences_filter)
                #logger.info("==> transcriptConsequenceTerms: %s" % str(vep_consequences))

            if key.startswith("genotypes"):
                continue

            if key == "db_gene_ids":
                db_gene_ids = query_json.get('db_gene_ids', {})

                exclude_genes = db_gene_ids.get('$nin', [])
                gene_ids = exclude_genes or db_gene_ids.get('$in', [])

                if exclude_genes:
                    s = s.exclude("terms", geneIds=gene_ids)
                else:
                    s = s.filter("terms",  geneIds=gene_ids)
                #logger.info("==> %s %s" % ("exclude" if exclude_genes else "include", "geneIds: " + str(gene_ids)))

            if key == "$or" and type(value) == list:
                q_terms = None
                for region_filter in value:
                    xpos_filters = region_filter.get("$and", {})

                    # for example: $or : [{'$and': [{'xpos': {'$gte': 12345}}, {'xpos': {'$lte': 54321}}]}]
                    xpos_filters_dict = {}
                    for xpos_filter in xpos_filters:
                        xpos_filter_setting = xpos_filter["xpos"]  # for example {'$gte': 12345} or {'$lte': 54321}
                        xpos_filters_dict.update(xpos_filter_setting)

                    xpos_filter_setting = {k.replace("$", ""): v for k, v in xpos_filters_dict.items()}
                    q = Q('range', **{"xpos": xpos_filter_setting})
                    if q_terms is None:
                        q_terms = q
                    else:
                        q_terms |= q
                if q_terms is not None:
                    s = s.filter(q_terms)

                #logger.info("==> xpos range: " + str({"xpos": xpos_filter_setting}))

            af_key_map = {
                "db_freqs.AF": ["AF"],
                "db_freqs.1kg_wgs_phase3": ["g1k_POPMAX_AF"],
                "db_freqs.exac_v3": ["exac_AF_POPMAX"],
                "db_freqs.topmed": ["topmed_AF"],
                "db_freqs.gnomad_exomes": ["gnomad_exomes_AF_POPMAX", "gnomad_exomes_AF_POPMAX_OR_GLOBAL"],
                "db_freqs.gnomad_genomes": ["gnomad_genomes_AF_POPMAX", "gnomad_genomes_AF_POPMAX_OR_GLOBAL"],
                "db_freqs.gnomad-exomes2": ["gnomad_exomes_AF_POPMAX", "gnomad_exomes_AF_POPMAX_OR_GLOBAL"],
                "db_freqs.gnomad-genomes2": ["gnomad_genomes_AF_POPMAX", "gnomad_genomes_AF_POPMAX_OR_GLOBAL"],
            }

            if key in af_key_map:
                for filter_key in af_key_map[key]:
                    af_filter_setting = {k.replace("$", ""): v for k, v in value.items()}
                    s = s.filter(Q('range', **{filter_key: af_filter_setting}) | ~Q('exists', field=filter_key))
                #logger.info("==> %s: %s" % (filter_key, af_filter_setting))

            ac_key_map = {
                "db_acs.AF": "AC",
                "db_acs.1kg_wgs_phase3": "g1k_AC",
                "db_acs.exac_v3": "exac_AC",
                "db_acs.topmed": "topmed_AC",
                "db_acs.gnomad_exomes": "gnomad_exomes_AC",
                "db_acs.gnomad_genomes": "gnomad_genomes_AC",
                "db_acs.gnomad-exomes2": "gnomad_exomes_AC",
                "db_acs.gnomad-genomes2": "gnomad_genomes_AC",
            }

            if key in ac_key_map:
                filter_key = ac_key_map[key]
                ac_filter_setting = {k.replace("$", ""): v for k, v in value.items()}
                s = s.filter(Q('range', **{filter_key: ac_filter_setting}) | ~Q('exists', field=filter_key))

            hemi_key_map = {
                "db_hemi.exac_v3": "exac_AC_Hemi",
                "db_hemi.gnomad_exomes": "gnomad_exomes_Hemi",
                "db_hemi.gnomad_genomes": "gnomad_genomes_Hemi",
                "db_hemi.gnomad-exomes2": "gnomad_exomes_Hemi",
                "db_hemi.gnomad-genomes2": "gnomad_genomes_Hemi",
            }

            if key in hemi_key_map:
                filter_key = hemi_key_map[key]
                hemi_filter_setting = {k.replace("$", ""): v for k, v in value.items()}
                s = s.filter(Q('range', **{filter_key: hemi_filter_setting}) | ~Q('exists', field=filter_key))

            hom_key_map = {
                "db_hom.exac_v3": "exac_AC_Hom",
                "db_hom.gnomad_exomes": "gnomad_exomes_Hom",
                "db_hom.gnomad_genomes": "gnomad_genomes_Hom",
                "db_hom.gnomad-exomes2": "gnomad_exomes_Hom",
                "db_hom.gnomad-genomes2": "gnomad_genomes_Hom",
            }

            if key in hom_key_map:
                filter_key = hom_key_map[key]
                hom_filter_setting = {k.replace("$", ""): v for k, v in value.items()}
                s = s.filter(Q('range', **{filter_key: hom_filter_setting}) | ~Q('exists', field=filter_key))

            #s = s.sort("xpos")

        #logger.info("=====")
        #logger.info("FULL QUERY OBJ: " + pformat(s.__dict__))
        #logger.info("FILTERS: " + pformat(s.to_dict()))

        # https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan
        start = time.time()

        s = s.params(size=max_results_limit + 1)
        #if not include_all_consequences:
        #    s = s.source(exclude=["sortedTranscriptConsequences"])
        response = s.execute()
        logger.info("=====")

        logger.info("TOTAL: %s. Query took %s seconds" % (response.hits.total, time.time() - start))

        if response.hits.total > max_results_limit + 1:
            raise Exception("This search matched too many variants. Please set additional filters and try again.")

        #print(pformat(response.to_dict()))

        project = Project.objects.get(project_id=project_id)

        #gene_list_map = project.get_gene_list_map()

        reference = get_reference()

        #for i, hit in enumerate(response.hits):
        variant_results = []
        for i, hit in enumerate(response):  # preserve_order=True
            #logger.info("HIT %s: %s %s %s" % (i, hit["variantId"], hit["geneIds"], pformat(hit.__dict__)))
            #print("HIT %s: %s" % (i, pformat(hit.to_dict())))
            filters = ",".join(hit["filters"] or []) if "filters" in hit else ""
            genotypes = {}
            all_num_alt = []

            if is_parent_child:
                genotypes_by_sample_id = {gen_hit['sample_id']: gen_hit for gen_hit in hit.meta.inner_hits.genotype}
            elif is_nested:
                genotypes_by_sample_id = {gen_hit['sample_id']: gen_hit for gen_hit in hit['genotypes']}

            for individual_id, sample_id in family_individual_ids_to_sample_ids.items():
                def _get_hit_field(field):
                    if is_parent_child or is_nested:
                        gen_hit = genotypes_by_sample_id.get(sample_id, {})
                        key = field
                    else:
                        gen_hit = hit
                        key = '{}_{}'.format(_encode_name(sample_id), field)
                    return gen_hit[key] if key in gen_hit else None

                num_alt = _get_hit_field('num_alt')
                if num_alt is None:
                    num_alt = -1
                all_num_alt.append(num_alt)

                alleles = []
                if num_alt == 0:
                    alleles = [hit["ref"], hit["ref"]]
                elif num_alt == 1:
                    alleles = [hit["ref"], hit["alt"]]
                elif num_alt == 2:
                    alleles = [hit["alt"], hit["alt"]]
                elif num_alt == -1 or num_alt == None:
                    alleles = []
                else:
                    raise ValueError("Invalid num_alt: " + str(num_alt))

                genotypes[individual_id] = {
                    'ab': _get_hit_field('ab'),
                    'alleles': map(str, alleles),
                    'extras': {
                        'ad': _get_hit_field('ad'),
                        'dp': _get_hit_field('dp'),
                        #'pl': '',
                    },
                    'filter': filters or "pass",
                    'gq': _get_hit_field('gq') or '',
                    'num_alt': num_alt,
                }

            vep_annotation = hit['sortedTranscriptConsequences'] if 'sortedTranscriptConsequences' in hit else None
            if vep_annotation is not None:
                if is_parent_child or is_nested:
                    vep_annotation = [annot.to_dict() for annot in vep_annotation]
                else:
                    vep_annotation = json.loads(str(vep_annotation))

            gene_ids = list(hit['geneIds'] or [])
            worst_vep_index_per_gene = {
                gene_id: next((i for i, annot in enumerate(vep_annotation) if annot['gene_id'] == gene_id), None)
                for gene_id in gene_ids
            }

            if project.genome_version == GENOME_VERSION_GRCh37:
                grch38_coord = None
                if self.liftover_grch37_to_grch38:
                    grch38_coord = self.liftover_grch37_to_grch38.convert_coordinate("chr%s" % hit["contig"].replace("chr", ""), int(hit["start"]))
                    if grch38_coord and grch38_coord[0]:
                        grch38_coord = "%s-%s-%s-%s "% (grch38_coord[0][0], grch38_coord[0][1], hit["ref"], hit["alt"])
                    else:
                        grch38_coord = None
            else:
                grch38_coord = hit["variantId"]

            if project.genome_version == GENOME_VERSION_GRCh38:
                grch37_coord = None
                liftover_grch38_to_grch37 = _liftover_grch38_to_grch37()
                if liftover_grch38_to_grch37:
                    grch37_coord = liftover_grch38_to_grch37.convert_coordinate("chr%s" % hit["contig"].replace("chr", ""), int(hit["start"]))
                    if grch37_coord and grch37_coord[0]:
                        grch37_coord = "%s-%s-%s-%s "% (grch37_coord[0][0], grch37_coord[0][1], hit["ref"], hit["alt"])
                    else:
                        grch37_coord = None
            else:
                grch37_coord = hit["variantId"]

            freq_fields = {
                'AF': "AF" if "AF" in index_fields else None,
                '1kg_wgs_AF': "g1k_AF" if "g1k_AF" in index_fields else None,
                '1kg_wgs_popmax_AF': "g1k_POPMAX_AF" if "g1k_POPMAX_AF" in index_fields else None,
                'exac_v3_AF': "exac_AF" if "exac_AF" in index_fields else None,
                'exac_v3_popmax_AF': "exac_AF_POPMAX" if "exac_AF_POPMAX" in index_fields else None,
                'gnomad_exomes_AF': "gnomad_exomes_AF" if "gnomad_exomes_AF" in index_fields else None,
                'gnomad_exomes_popmax_AF': "gnomad_exomes_AF_POPMAX_OR_GLOBAL" if "gnomad_exomes_AF_POPMAX_OR_GLOBAL" in index_fields else (
                     "gnomad_exomes_AF_POPMAX" if "gnomad_exomes_AF_POPMAX" in index_fields else None),
                'gnomad_genomes_AF': "gnomad_genomes_AF" if "gnomad_genomes_AF" in index_fields else None,
                'gnomad_genomes_popmax_AF': "gnomad_genomes_AF_POPMAX_OR_GLOBAL" if "gnomad_genomes_AF_POPMAX_OR_GLOBAL" in index_fields else (
                    "gnomad_genomes_AF_POPMAX" if "gnomad_genomes_AF_POPMAX" in index_fields else None),
                'topmed_AF': "topmed_AF" if "topmed_AF" in index_fields else None,
            }

            result = {
                #u'_id': ObjectId('596d2207ff66f729285ca588'),
                'alt': str(hit["alt"]) if "alt" in hit else None,
                'annotation': {
                    'fathmm': fathmm_map.get(hit["dbnsfp_FATHMM_pred"].split(';')[0]) if "dbnsfp_FATHMM_pred" in hit and hit["dbnsfp_FATHMM_pred"] else None,
                    'muttaster': muttaster_map.get(hit["dbnsfp_MutationTaster_pred"].split(';')[0]) if "dbnsfp_MutationTaster_pred" in hit and hit["dbnsfp_MutationTaster_pred"] else None,
                    'polyphen': polyphen_map.get(hit["dbnsfp_Polyphen2_HVAR_pred"].split(';')[0]) if "dbnsfp_Polyphen2_HVAR_pred" in hit and hit["dbnsfp_Polyphen2_HVAR_pred"] else None,
                    'sift': sift_map.get(hit["dbnsfp_SIFT_pred"].split(';')[0]) if "dbnsfp_SIFT_pred" in hit and hit["dbnsfp_SIFT_pred"] else None,
                    'metasvm': metasvm_map.get(hit["dbnsfp_MetaSVM_pred"].split(';')[0]) if "dbnsfp_MetaSVM_pred" in hit and hit["dbnsfp_MetaSVM_pred"] else None,

                    'GERP_RS': float(hit["dbnsfp_GERP_RS"]) if "dbnsfp_GERP_RS" in hit and hit["dbnsfp_GERP_RS"] else None,
                    'phastCons100way_vertebrate': float(hit["dbnsfp_phastCons100way_vertebrate"]) if "dbnsfp_phastCons100way_vertebrate" in hit and hit["dbnsfp_phastCons100way_vertebrate"] else None,

                    'cadd_phred': hit["cadd_PHRED"] if "cadd_PHRED" in hit else None,
                    'dann_score': hit["dbnsfp_DANN_score"] if "dbnsfp_DANN_score" in hit else None,
                    'revel_score': hit["dbnsfp_REVEL_score"] if "dbnsfp_REVEL_score" in hit else None,
                    'eigen_phred': hit["eigen_Eigen_phred"] if "eigen_Eigen_phred" in hit else (hit["dbnsfp_Eigen_phred"] if "dbnsfp_Eigen_phred" in hit else None),
                    'mpc_score': hit["mpc_MPC"] if "mpc_MPC" in hit else None,
                    'primate_ai_score': hit["primate_ai_score"] if "primate_ai_score" in hit else None,
                    'splice_ai_delta_score': hit["splice_ai_delta_score"] if "splice_ai_delta_score" in hit else None,
                    'rsid': hit["rsid"] if "rsid" in hit else None,
                    'annotation_tags': list(hit["transcriptConsequenceTerms"] or []) if "transcriptConsequenceTerms" in hit else None,
                    'coding_gene_ids': list(hit['codingGeneIds'] or []),
                    'gene_ids': list(hit['geneIds'] or []),
                    'vep_annotation': vep_annotation,
                    'vep_group': str(hit['mainTranscript_major_consequence'] or "") if "mainTranscript_major_consequence" in hit else "",
                    'vep_consequence': str(hit['mainTranscript_major_consequence'] or "") if "mainTranscript_major_consequence" in hit else "",
                    'main_transcript': {k.replace('mainTranscript_', ''): hit[k] for k in dir(hit) if k.startswith('mainTranscript_')},
                    'worst_vep_annotation_index': 0,
                    'worst_vep_index_per_gene': worst_vep_index_per_gene,
                },
                'chr': hit["contig"],
                'coding_gene_ids': list(hit['codingGeneIds'] or []),
                'gene_ids': gene_ids,
                'coverage': {
                    'gnomad_exome_coverage': float(hit["gnomad_exome_coverage"] or -1) if "gnomad_exome_coverage" in hit else -1,
                    'gnomad_genome_coverage': float(hit["gnomad_genome_coverage"] or -1) if "gnomad_genome_coverage" in hit else -1,
                },
                'pop_counts': {
                    'AC': int(hit['AC'] or 0) if 'AC' in hit else None,
                    'AN': int(hit['AN'] or 0) if 'AN' in hit else None,

                    'g1kAC': int(hit['g1k_AC'] or 0) if 'g1k_AC' in hit else None,
                    'g1kAN': int(hit['g1k_AN'] or 0) if 'g1k_AN' in hit else None,

                    'exac_v3_AC': int(hit["exac_AC_Adj"] or 0) if "exac_AC_Adj" in hit else None,
                    'exac_v3_Het': int(hit["exac_AC_Het"] or 0) if "exac_AC_Het" in hit else None,
                    'exac_v3_Hom': int(hit["exac_AC_Hom"] or 0) if "exac_AC_Hom" in hit else None,
                    'exac_v3_Hemi': int(hit["exac_AC_Hemi"] or 0) if "exac_AC_Hemi" in hit else None,
                    'exac_v3_AN': int(hit["exac_AN_Adj"] or 0) if "exac_AN_Adj" in hit else None,

                    'gnomad_exomes_AC': int(hit["gnomad_exomes_AC"] or 0) if "gnomad_exomes_AC" in hit else None,
                    'gnomad_exomes_Hom': int(hit["gnomad_exomes_Hom"] or 0) if "gnomad_exomes_Hom" in hit else None,
                    'gnomad_exomes_Hemi': int(hit["gnomad_exomes_Hemi"] or 0) if "gnomad_exomes_Hemi" in hit else None,
                    'gnomad_exomes_AN': int(hit["gnomad_exomes_AN"] or 0) if "gnomad_exomes_AN" in hit else None,

                    'gnomad_genomes_AC': int(hit["gnomad_genomes_AC"] or 0) if "gnomad_genomes_AC" in hit else None,
                    'gnomad_genomes_Hom': int(hit["gnomad_genomes_Hom"] or 0) if "gnomad_genomes_Hom" in hit else None,
                    'gnomad_genomes_Hemi': int(hit["gnomad_genomes_Hemi"] or 0) if "gnomad_genomes_Hemi" in hit else None,
                    'gnomad_genomes_AN': int(hit["gnomad_genomes_AN"] or 0) if "gnomad_genomes_AN" in hit else None,

                    'topmed_AC': float(hit["topmed_AC"] or 0) if "topmed_AC" in hit else None,
                    'topmed_Het': float(hit["topmed_Het"] or 0) if "topmed_Het" in hit else None,
                    'topmed_Hom': float(hit["topmed_Hom"] or 0) if "topmed_Hom" in hit else None,
                    'topmed_AN': float(hit["topmed_AN"] or 0) if "topmed_AN" in hit else None,
                },
                'db_freqs': {k: float(hit[v] or 0.0) if v in hit else (0.0 if v else None) for k, v in freq_fields.items()},
                #'popmax_populations': {
                #    'exac_popmax': hit["exac_POPMAX"] or None,
                #    'gnomad_exomes_popmax': hit["gnomad_exomes_POPMAX"] or None,
                #    'gnomad_genomes_popmax': hit["gnomad_genomes_POPMAX"] or None,
                #},
                'db_gene_ids': list((hit["geneIds"] or []) if "geneIds" in hit else []),
                'db_tags': str(hit["transcriptConsequenceTerms"] or "") if "transcriptConsequenceTerms" in hit else None,
                'extras': {
                    'clinvar_variant_id': hit['clinvar_variation_id'] if 'clinvar_variation_id' in hit and hit['clinvar_variation_id'] else None,
                    'clinvar_allele_id': hit['clinvar_allele_id'] if 'clinvar_allele_id' in hit and hit['clinvar_allele_id'] else None,
                    'clinvar_clinsig': hit['clinvar_clinical_significance'].lower() if ('clinvar_clinical_significance' in hit) and hit['clinvar_clinical_significance'] else None,
                    'clinvar_gold_stars': hit['clinvar_gold_stars'] if 'clinvar_gold_stars' in hit and hit['clinvar_gold_stars'] else None,
                    'hgmd_class': hit['hgmd_class'] if 'hgmd_class' in hit and user and user.is_staff else None,
                    'hgmd_accession': hit['hgmd_accession'] if 'hgmd_accession' in hit else None,
                    'genome_version': project.genome_version,
                    'grch37_coords': grch37_coord,
                    'grch38_coords': grch38_coord,
                    'alt_allele_pos': 0,
                    'orig_alt_alleles': map(str, [a.split("-")[-1] for a in hit["originalAltAlleles"]]) if "originalAltAlleles" in hit else None
                },
                'genotypes': genotypes,
                'pos': long(hit['start']),
                'pos_end': str(hit['end']),
                'ref': str(hit['ref']),
                'vartype': 'snp' if len(hit['ref']) == len(hit['alt']) else "indel",
                'vcf_id': None,
                'xpos': long(hit["xpos"]),
                'xposx': long(hit["xpos"]),
            }

            result["annotation"]["freqs"] = result["db_freqs"]
            result["annotation"]["pop_counts"] = result["pop_counts"]
            result["annotation"]["db"] = "elasticsearch"

            result["extras"]["svlen"] = hit["SVLEN"] if "SVLEN" in hit else None
            result["extras"]["svtype"] = hit["SVTYPE"] if "SVTYPE" in hit else None


            logger.info("Result %s: GRCh37: %s GRCh38: %s - gene ids: %s, coding gene_ids: %s" % (
                i, grch37_coord, grch38_coord,
                result["gene_ids"],
                result["coding_gene_ids"]))

            result["extras"]["project_id"] = project_id
            result["extras"]["family_id"] = family_id

            # add gene info
            gene_names = {}
            if vep_annotation is not None:
                gene_names = {vep_anno["gene_id"]: vep_anno.get("gene_symbol") for vep_anno in vep_annotation if vep_anno.get("gene_symbol")}
            result["extras"]["gene_names"] = gene_names

            try:
                genes = {}
                for gene_id in result["gene_ids"]:
                    if gene_id:
                        genes[gene_id] = reference.get_gene_summary(gene_id) or {}

                #if not genes:
                #    genes =  {vep_anno["gene_id"]: {"symbol": vep_anno["gene_symbol"]} for vep_anno in vep_annotation}

                result["extras"]["genes"] = genes
            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                logger.warn("WARNING: got unexpected error in add_gene_names_to_variants: %s : line %s" % (e, exc_tb.tb_lineno))

            variant_results.append(result)

        logger.info("Finished returning the %s variants: %s seconds" % (response.hits.total, time.time() - start))

        if redis_client:
            redis_client.set(cache_key, json.dumps(variant_results))

        return [Variant.fromJSON(variant_json) for variant_json in variant_results]
Пример #45
0
 def cache_skills(self, update=True):
     if not update and hasattr(self, 'cached_skills'):
         return
     prefetch_related_objects([self],
                              Prefetch('skills', to_attr='cached_skills'))
Пример #46
0
def get_json_for_genes(genes, user=None, add_dbnsfp=False, add_omim=False, add_constraints=False, add_notes=False,
                       add_expression=False, add_primate_ai=False, add_mgi=False):
    """Returns a JSON representation of the given list of GeneInfo.

    Args:
        genes (array): array of django models for the GeneInfo.
    Returns:
        array: array of json objects
    """
    total_gene_constraints = GeneConstraint.objects.count()
    if add_notes:
        gene_notes_json = get_json_for_gene_notes_by_gene_id([gene.gene_id for gene in genes], user)

    def _add_total_constraint_count(result, *args):
        result['totalGenes'] = total_gene_constraints

    def _process_result(result, gene):
        if add_dbnsfp:
            # prefetching only works with all()
            dbnsfp = next((dbnsfp for dbnsfp in gene.dbnsfpgene_set.all()), None)
            if dbnsfp:
                result.update(_get_json_for_model(dbnsfp))
            else:
                result.update(_get_empty_json_for_model(dbNSFPGene))
        if add_primate_ai:
            # prefetching only works with all()
            primate_ai = next((primate_ai for primate_ai in gene.primateai_set.all()), None)
            if primate_ai:
                result['primateAi'] = _get_json_for_model(primate_ai)
        if add_mgi:
            # prefetching only works with all()
            mgi = next((mgi for mgi in gene.mgi_set.all()), None)
            result['mgiMarkerId'] = mgi.marker_id if mgi else None
        if add_omim:
            omim_phenotypes = _get_json_for_models(gene.omim_set.all())
            result['omimPhenotypes'] = [phenotype for phenotype in omim_phenotypes if phenotype['phenotypeMimNumber']]
            result['mimNumber'] = omim_phenotypes[0]['mimNumber'] if omim_phenotypes else None
        if add_constraints:
            constraint = next((constraint for constraint in gene.geneconstraint_set.all()), None)
            result['constraints'] = _get_json_for_model(constraint, process_result=_add_total_constraint_count) if constraint else {}
        if add_notes:
            result['notes'] = gene_notes_json.get(result['geneId'], [])
        if add_expression:
            result['expression'] = {ge.tissue_type: ge.expression_values for ge in gene.geneexpression_set.all()}

    if add_dbnsfp:
        prefetch_related_objects(genes, 'dbnsfpgene_set')
    if add_omim:
        prefetch_related_objects(genes, 'omim_set')
    if add_constraints:
        prefetch_related_objects(genes, Prefetch('geneconstraint_set', queryset=GeneConstraint.objects.order_by('-mis_z', '-pLI')))
    if add_primate_ai:
        prefetch_related_objects(genes, 'primateai_set')
    if add_mgi:
        prefetch_related_objects(genes, 'mgi_set')
    if add_expression:
        prefetch_related_objects(genes, 'geneexpression_set')

    return _get_json_for_models(genes, process_result=_process_result)
Пример #47
0
 def cache_skills(self, update=True):
     if not update and hasattr(self, 'cached_skills'):
         return
     prefetch_related_objects([self], Prefetch('skills', to_attr='cached_skills'))
Пример #48
0
def _copy_facts_to_subscribers(facts, subscribers):
    '''
    The meat-and-potatoes of the copy operation.
    '''
    from manabi.apps.flashcards.models import Card, Fact, Deck

    shared_deck = facts[0].deck
    subscriber_decks = shared_deck.subscriber_decks.filter(
        owner__in=subscribers,
        active=True,
    )
    subscriber_deck_values = subscriber_decks.values_list('id', 'owner_id')
    subscriber_decks_already_with_facts = (
        _subscriber_decks_already_with_facts(subscriber_decks, facts)
    )

    fact_cards_prefetch = Prefetch(
        'card_set',
        queryset=Card.objects.filter(active=True, suspended=False),
        to_attr='available_cards',
    )
    try:
        facts = (
            facts.filter(active=True)
            .prefetch_related(fact_cards_prefetch)
        )
    except AttributeError:
        facts = [fact for fact in facts if fact.active]
        prefetch_related_objects(facts, fact_cards_prefetch)

    copied_facts = []
    copied_cards = []
    updated_subscriber_deck_ids = set()
    for shared_fact in facts:
        copy_attrs = [
            'active', 'suspended', 'new_fact_ordinal',
            'expression', 'reading', 'meaning', 'example_sentence',
            'jmdict_id',
        ]
        fact_kwargs = {attr: getattr(shared_fact, attr) for attr in copy_attrs}

        for subscriber_deck_id, subscriber_id in subscriber_deck_values:
            if _subscriber_deck_already_has_fact(
                subscriber_deck_id,
                shared_fact,
                subscriber_decks_already_with_facts,
            ):
                continue

            fact = Fact(
                deck_id=subscriber_deck_id,
                synchronized_with=shared_fact,
                **fact_kwargs
            )
            copied_facts.append(fact)

            # Copy the cards.
            copied_cards_for_fact = []
            for shared_card in shared_fact.available_cards:
                card = shared_card.copy(fact, owner_id=subscriber_id)
                copied_cards_for_fact.append(card)
            copied_cards.append(copied_cards_for_fact)

            updated_subscriber_deck_ids.add(subscriber_deck_id)

    # Persist everything.
    created_facts = Fact.objects.bulk_create(
        copied_facts, batch_size=BULK_BATCH_SIZE)
    for fact, fact_cards in zip(created_facts, copied_cards):
        for fact_card in fact_cards:
            fact_card.fact_id = fact.id
    Card.objects.bulk_create(
        itertools.chain.from_iterable(copied_cards),
        batch_size=BULK_BATCH_SIZE)

    # Refresh denormalized card count.
    for subscriber_deck_id in updated_subscriber_deck_ids:
        Deck.objects.filter(id=subscriber_deck_id).update(
            card_count=Card.objects.filter(
                deck_id=subscriber_deck_id,
            ).available().count(),
        )
Пример #49
0
def collectQuizTasksForTopic(articles=None, topic=None, project=None):
    taskList = []

    # getTopicTree returns the topic with all levels of its subtopic tree
    topictree = topic.getTopicTree()

    # Prefetching uses one query per related table to populate caches.
    # This helps us avoid per row queries when looping over rows.
    prefetch_related_objects(topictree, "questions__answers")

    # Set up the prefetch to retrieve all available hints for each article
    allHints = NLPHints.objects.all()
    fetchHints = Prefetch("hints",
                          queryset=allHints,
                          to_attr="allHints")
    logger.info("Found %d hints" % (len(allHints),))

    # Set up Prefetch that will cache just the highlights matching
    # this topic to article.highlight_taskruns[n].highlightsForTopic
    exclude_ids = []

    # Pick the contributor based on what's selected in the GUI.
    contributor_id = project.task_config['contributor_id']
    topicHighlights = HighlightGroup.objects.filter(topic=topic,
        article_highlight__contributor=contributor_id)

    # Filter the highlights based on the min tokens provided on project creation
    min_tokens_per_highlight = project.task_config['min_tokens']
    max_tokens_per_highlight = project.task_config['max_tokens']
    for topic_hlght in topicHighlights:
        total_count = topic_hlght.token_count()
        if (total_count < min_tokens_per_highlight
            or total_count > max_tokens_per_highlight):
            exclude_ids.append(topic_hlght.id)
            logger.info("Excluded HighlightGroup: {} {} {} tokens".
                        format(topic_hlght.id, topic_hlght.topic.name, total_count))

    topicHighlights = topicHighlights.exclude(id__in=exclude_ids)

    fetchHighlights = Prefetch("highlight_taskruns__highlights",
                               queryset=topicHighlights,
                               to_attr="highlightsForTopic")
    # Find articles highlighted with the topic within the provided queryset
    # distinct is essential after prefetch_related chained method
    articles = (articles
                .filter(highlight_taskruns__highlights__topic=topic)
                .prefetch_related(fetchHighlights)
                .prefetch_related(fetchHints)
                .order_by("article_number")
                .distinct())
    logger.info("collectQuizTasks sorting by article_number for topic {}: {}"
                .format(topic.name, [article.article_number for article in articles])
               )

    project_data = ProjectSerializer(project, many=False).data
    topictree_data = TopicSerializer2(topictree, many=True).data

    # With the prefetching config above, the loops below will
    # be hitting caches. Only 8 queries should be issued against 8 tables,
    # i.e. The query count will not be a function of number of rows returned.
    for article in articles:
        # Our prefetched highlightsForTopic is nested under
        # the ArticleHightlight record, in HighlightGroup
        # Not expecting more than one ArticleHighlight record
        # but safest to code as if there could be more than one.

        highlights = [ hg
                       for ah in article.highlight_taskruns.all()
                       for hg in ah.highlightsForTopic
        ]
        # At this point, we are processing one topic for one article
        # All the highlights for a given topic/case need to be in one task.
        # Need to sort here instead of the above prefetch because we want
        # to ignore the potential grouping effect if there was more than one
        # ArticleHighlight in above list comprehension
        # See data.pybossa_api.save_quiz_taskrun for import code
        sortkey = lambda x: x.case_number
        hg_by_case = sorted(highlights, key=sortkey)

        # Although this code can send multiple HighlightGroups, the
        # Quiz task presenter will only use the first one.
        # So when there are multiple highlight taskruns in the database,
        # (for a given article and topic),
        # the taskrun to be processed by a Quiz will essentially be selected
        # at random. There will need to be a way to flag the
        # official "Gold Standard" HighlightGroup that was distilled from
        # multiple Highlighter taskruns, that will be the one sent to the Quiz.
        for case_number, hg_case_group in groupby(hg_by_case, key=sortkey):
            taskList.append({
               "project": project_data,
               "topTopicId": topic.id,
               "topictree": topictree_data,
               "article": ArticleSerializer(article, many=False).data,
               "highlights": HighlightGroupSerializer(
                                 hg_case_group, many=True).data,
               "hints": NLPHintSerializer(article.allHints, many=True).data,
            })

    return taskList
Пример #50
0
def render_rules(rules=None, version=None):
    '''
    Render rules in a format that Prometheus understands

    :param rules: List of rules
    :type rules: list(Rule)
    :param int version: Prometheus rule format (1 or 2)
    :return: Returns rules in yaml or Prometheus v1 format
    :rtype: bytes

    This function can render in either v1 or v2 format
    We call prefetch_related_objects within this function to populate the
    other related objects that are mostly used for the sub lookups.
    '''
    if rules is None:
        rules = models.Rule.objects.filter(enabled=True)
    if version is None:
        version = settings.PROMGEN['prometheus'].get('version', 1)

    prefetch_related_objects(
        rules,
        'content_object',
        'content_type',
        'overrides__content_object',
        'overrides__content_type',
        'ruleannotation_set',
        'rulelabel_set',
    )

    # V1 format is a custom format which we render through django templates
    # See promgen/tests/examples/import.rule
    if version == 1:
        return render_to_string('promgen/prometheus.rule', {
            'rules': rules
        }).encode('utf-8')

    # V2 format is a yaml dictionary which we build and then render
    # See promgen/tests/examples/import.rule.yml
    rule_list = collections.defaultdict(list)
    for r in rules:
        rule_list[str(r.content_object)].append({
            'alert':
            r.name,
            'expr':
            macro.rulemacro(r.clause, r),
            'for':
            r.duration,
            'labels':
            r.labels,
            'annotations':
            r.annotations,
        })

    return yaml.safe_dump(
        {
            'groups': [{
                'name': name,
                'rules': rule_list[name]
            } for name in rule_list]
        },
        default_flow_style=False,
        allow_unicode=True,
        encoding='utf-8')
Пример #51
0
 def prefetch_related(self, *args):
     prefetch_related_objects(self.results, *args)
     return self
Пример #52
0
def _get_projects_details(projects, user, project_category_guid=None):
    for project in projects:
        check_permissions(project, user)

    prefetch_related_objects(projects, 'can_view_group')
    project_models_by_guid = {project.guid: project for project in projects}
    projects_json = get_json_for_projects(projects, user)

    locus_lists = set()

    functional_data_tag_types = get_json_for_variant_functional_data_tag_types(
    )
    variant_tag_types_by_guid = {
        vtt.guid: vtt
        for vtt in VariantTagType.objects.filter(
            Q(project__in=projects)
            | Q(project__isnull=True)).prefetch_related('project')
    }
    variant_tag_types = _get_json_for_models(
        variant_tag_types_by_guid.values())
    for project_json in projects_json:
        project = project_models_by_guid[project_json['projectGuid']]
        project_locus_lists = get_project_locus_list_models(project)
        locus_lists.update(project_locus_lists)

        project_json.update({
            'locusListGuids':
            [locus_list.guid for locus_list in project_locus_lists],
            'variantTagTypes': [
                vtt for vtt in variant_tag_types
                if variant_tag_types_by_guid[vtt['variantTagTypeGuid']].project
                is None or variant_tag_types_by_guid[vtt['variantTagTypeGuid']]
                .project.guid == project_json['projectGuid']
            ],
            'variantFunctionalTagTypes':
            functional_data_tag_types,
        })

    families = _get_json_for_families(
        Family.objects.filter(project__in=projects), user)
    individuals = _get_json_for_individuals(
        Individual.objects.filter(family__project__in=projects), user=user)
    samples = get_json_for_samples(
        Sample.objects.filter(individual__family__project__in=projects))
    analysis_groups = get_json_for_analysis_groups(
        AnalysisGroup.objects.filter(project__in=projects))

    individual_guids_by_family = defaultdict(list)
    for individual in individuals:
        individual_guids_by_family[individual['familyGuid']].append(
            individual['individualGuid'])
    for family in families:
        family['individualGuids'] = individual_guids_by_family[
            family['familyGuid']]

    sample_guids_by_individual = defaultdict(list)
    for sample in samples:
        sample_guids_by_individual[sample['individualGuid']].append(
            sample['sampleGuid'])
    for individual in individuals:
        individual['sampleGuids'] = sample_guids_by_individual[
            individual['individualGuid']]

    response = {
        'projectsByGuid': {p['projectGuid']: p
                           for p in projects_json},
        'familiesByGuid': {f['familyGuid']: f
                           for f in families},
        'individualsByGuid': {i['individualGuid']: i
                              for i in individuals},
        'samplesByGuid': {s['sampleGuid']: s
                          for s in samples},
        'locusListsByGuid': {
            ll['locusListGuid']: ll
            for ll in get_json_for_locus_lists(list(locus_lists), user)
        },
        'analysisGroupsByGuid':
        {ag['analysisGroupGuid']: ag
         for ag in analysis_groups},
    }
    if project_category_guid:
        response['projectCategoriesByGuid'] = {
            project_category_guid:
            ProjectCategory.objects.get(guid=project_category_guid).json()
        }
    return response
Пример #53
0
def saved_variants_page(request, tag):
    gene = request.GET.get('gene')
    tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
    saved_variant_models = SavedVariant.objects.filter(
        varianttag__variant_tag_type=tag_type)
    if gene:
        saved_variant_models = saved_variant_models.filter(
            saved_variant_json__transcripts__has_key=gene)

    if saved_variant_models.count() > 10000 and not gene:
        return create_json_response(
            {'message': 'Select a gene to filter variants'}, status=400)

    prefetch_related_objects(saved_variant_models, 'family__project')
    response_json = get_json_for_saved_variants_with_tags(
        saved_variant_models, add_details=True, include_missing_variants=True)

    project_models_by_guid = {
        variant.family.project.guid: variant.family.project
        for variant in saved_variant_models
    }
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    saved_variants = response_json['savedVariantsByGuid'].values()
    genes = _saved_variant_genes(saved_variants)
    locus_list_guids = _add_locus_lists(project_models_by_guid.values(),
                                        saved_variants, genes)

    projects_json = get_json_for_projects(
        project_models_by_guid.values(),
        user=request.user,
        add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    variant_tag_types = VariantTagType.objects.filter(
        Q(project__in=project_models_by_guid.values())
        | Q(project__isnull=True))
    prefetch_related_objects(variant_tag_types, 'project')
    variant_tags_json = _get_json_for_models(variant_tag_types)
    tag_projects = {
        vt.guid: vt.project.guid
        for vt in variant_tag_types if vt.project
    }

    for project_json in projects_json:
        project_guid = project_json['projectGuid']
        project_variant_tags = [
            vt for vt in variant_tags_json if tag_projects.get(
                vt['variantTagTypeGuid'], project_guid) == project_guid
        ]
        project_json.update({
            'locusListGuids':
            locus_list_guids,
            'variantTagTypes':
            sorted(project_variant_tags,
                   key=lambda variant_tag_type: variant_tag_type['order']),
            'variantFunctionalTagTypes':
            functional_tag_types,
        })

    families_json = _get_json_for_families(list(families),
                                           user=request.user,
                                           add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals,
                                                 user=request.user)
    locus_lists_by_guid = {
        locus_list['locusListGuid']: locus_list
        for locus_list in get_json_for_locus_lists(
            LocusList.objects.filter(guid__in=locus_list_guids), request.user)
    }

    response_json.update({
        'genesById': genes,
        'projectsByGuid':
        {project['projectGuid']: project
         for project in projects_json},
        'familiesByGuid':
        {family['familyGuid']: family
         for family in families_json},
        'individualsByGuid':
        {indiv['individualGuid']: indiv
         for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
    return create_json_response(response_json)
Пример #54
0
def _get_json_for_families(families,
                           user=None,
                           add_individual_guids_field=False,
                           project_guid=None,
                           skip_nested=False,
                           is_analyst=None,
                           has_case_review_perm=None):
    """Returns a JSON representation of the given Family.

    Args:
        families (array): array of django models representing the family.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        add_individual_guids_field (bool): whether to add an 'individualGuids' field. NOTE: this will require a database query.
        project_guid (boolean): An optional field to use as the projectGuid instead of querying the DB
    Returns:
        array: json objects
    """
    if not families:
        return []

    def _get_pedigree_image_url(pedigree_image):
        if isinstance(pedigree_image, ImageFieldFile):
            try:
                pedigree_image = pedigree_image.url
            except Exception:
                pedigree_image = None
        return os.path.join("/media/",
                            pedigree_image) if pedigree_image else None

    analyst_users = set(
        User.objects.filter(
            groups__name=ANALYST_USER_GROUP) if ANALYST_USER_GROUP else [])

    def _process_result(result, family):
        result['analysedBy'] = [{
            'createdBy': {
                'fullName': ab.created_by.get_full_name(),
                'email': ab.created_by.email,
                'isAnalyst': ab.created_by in analyst_users
            },
            'lastModifiedDate': ab.last_modified_date,
        } for ab in family.familyanalysedby_set.all()]
        pedigree_image = _get_pedigree_image_url(result.pop('pedigreeImage'))
        result['pedigreeImage'] = pedigree_image
        if add_individual_guids_field:
            result['individualGuids'] = [
                i.guid for i in family.individual_set.all()
            ]
        if not result['displayName']:
            result['displayName'] = result['familyId']
        if result['assignedAnalyst']:
            result['assignedAnalyst'] = {
                'fullName': result['assignedAnalyst'].get_full_name(),
                'email': result['assignedAnalyst'].email,
            }
        else:
            result['assignedAnalyst'] = None

    prefetch_related_objects(families, 'assigned_analyst')
    prefetch_related_objects(families, 'familyanalysedby_set__created_by')
    if add_individual_guids_field:
        prefetch_related_objects(families, 'individual_set')

    kwargs = {
        'additional_model_fields':
        _get_case_review_fields(families[0], has_case_review_perm, user,
                                lambda family: family.project)
    }
    if project_guid or not skip_nested:
        kwargs.update({
            'nested_fields': [{
                'fields': ('project', 'guid'),
                'value': project_guid
            }]
        })
    else:
        kwargs['additional_model_fields'].append('project_id')

    return _get_json_for_models(families,
                                user=user,
                                is_analyst=is_analyst,
                                process_result=_process_result,
                                **kwargs)
Пример #55
0
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field]
        for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))
                   ]

    index_fields = [
        'index', 'docs.count', 'store.size', 'creation.date.string'
    ]
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field]
        for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
               if all(not index['index'].startswith(omit_prefix)
                      for omit_prefix in ['.', 'index_operations_log'])]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    mappings = Index('_all', using=client).get_mapping(doc_type='variant')

    active_samples = Sample.objects.filter(
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        is_active=True,
        elasticsearch_index__isnull=False,
    ).prefetch_related('individual', 'individual__family')
    prefetch_related_objects(active_samples, 'individual__family__project')
    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in active_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(
                        sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(
                    sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index_mapping = mappings[index_name]['mappings']['variant']
        index.update(index_mapping.get('_meta', {}))

        projects_for_index = []
        for index_prefix in seqr_index_projects.keys():
            if index_name.startswith(index_prefix):
                projects_for_index += seqr_index_projects.pop(
                    index_prefix).keys()
        index['projects'] = [{
            'projectGuid': project.guid,
            'projectName': project.name
        } for project in projects_for_index]

    errors = [
        '{} does not exist and is used by project(s) {}'.format(
            index, ', '.join([
                '{} ({} samples)'.format(p.name, len(indivs))
                for p, indivs in project_individuals.items()
            ])) for index, project_individuals in seqr_index_projects.items()
        if project_individuals
    ]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'errors': errors,
    })
Пример #56
0
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field]
        for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))
                   ]

    index_fields = [
        'index', 'docs.count', 'store.size', 'creation.date.string'
    ]
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field]
        for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
               if index['index'] not in ['.kibana', 'index_operations_log']]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    mappings = Index('_all', using=client).get_mapping(doc_type='variant')

    latest_loaded_samples = get_latest_loaded_samples()
    prefetch_related_objects(latest_loaded_samples,
                             'individual__family__project')
    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in latest_loaded_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(
                        sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(
                    sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index_mapping = mappings[index_name]['mappings']['variant']
        index.update(index_mapping.get('_meta', {}))
        index['hasNestedGenotypes'] = 'samples_num_alt_1' in index_mapping[
            'properties']

        projects_for_index = []
        for index_prefix in seqr_index_projects.keys():
            if index_name.startswith(index_prefix):
                projects_for_index += seqr_index_projects.pop(
                    index_prefix).keys()
        index['projects'] = [{
            'projectGuid': project.guid,
            'projectName': project.name
        } for project in projects_for_index]

    errors = [
        '{} does not exist and is used by project(s) {}'.format(
            index, ', '.join([
                '{} ({} samples)'.format(p.name, len(indivs))
                for p, indivs in project_individuals.items()
            ])) for index, project_individuals in seqr_index_projects.items()
        if project_individuals
    ]

    # TODO remove once all projects are switched off of mongo
    all_mongo_samples = Sample.objects.filter(
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_status=Sample.SAMPLE_STATUS_LOADED,
        elasticsearch_index__isnull=True,
    ).exclude(individual__family__project__in=es_projects).prefetch_related(
        'individual', 'individual__family__project')
    mongo_sample_individual_max_loaded_date = {
        agg['individual__guid']: agg['max_loaded_date']
        for agg in all_mongo_samples.values('individual__guid').annotate(
            max_loaded_date=Max('loaded_date'))
    }
    mongo_project_samples = defaultdict(set)
    for s in all_mongo_samples:
        if s.loaded_date == mongo_sample_individual_max_loaded_date[
                s.individual.guid]:
            mongo_project_samples[s.individual.family.project].add(
                s.dataset_file_path)
    mongo_projects = [{
        'projectGuid': project.guid,
        'projectName': project.name,
        'sourceFilePaths': sample_file_paths
    } for project, sample_file_paths in mongo_project_samples.items()]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'mongoProjects': mongo_projects,
        'errors': errors,
    })
Пример #57
0
    def _form_search_response_data_from_vouchers(self, vouchers, user_email,
                                                 user):
        """
        Build a list of dictionaries that contains the relevant information
        for each voucher_application (redemption) or offer_assignment (assignment).

        Returns a list of dictionaries to be handed to the serializer for
        construction of pagination.
        """
        def _prepare_redemption_data(coupon_data, offer_assignment=None):
            """
            Prepares redemption data for the received voucher in coupon_data
            """
            redemption_data = dict(coupon_data)
            redemption_data['course_title'] = None
            redemption_data['course_key'] = None
            redemption_data['redeemed_date'] = None
            redemption_data[
                'user_email'] = offer_assignment.user_email if offer_assignment else None
            redemptions_and_assignments.append(redemption_data)

        redemptions_and_assignments = []
        prefetch_related_objects(vouchers, 'applications', 'coupon_vouchers',
                                 'coupon_vouchers__coupon', 'offers',
                                 'offers__condition',
                                 'offers__offerassignment_set')
        for voucher in vouchers:
            coupon_vouchers = voucher.coupon_vouchers.all()
            coupon_voucher = coupon_vouchers[0]
            coupon_data = {
                'coupon_id': coupon_voucher.coupon.id,
                'coupon_name': coupon_voucher.coupon.title,
                'code': voucher.code,
                'voucher_id': voucher.id,
            }
            if user is not None:
                for application in voucher.applications.all():
                    if application.user.id == user.id:
                        line = application.order.lines.first()
                        redemption_data = dict(coupon_data)
                        redemption_data[
                            'course_title'] = line.product.course.name
                        redemption_data['course_key'] = line.product.course.id
                        redemption_data[
                            'redeemed_date'] = application.date_created
                        redemptions_and_assignments.append(redemption_data)

            offer = voucher and voucher.enterprise_offer
            all_offer_assignments = offer.offerassignment_set.all()
            offer_assignments = []
            for assignment in all_offer_assignments:
                if (assignment.voucher_application is None
                        and assignment.status
                        in [OFFER_ASSIGNED, OFFER_ASSIGNMENT_EMAIL_PENDING]
                        and assignment.code == voucher.code
                        and (assignment.user_email == user_email
                             if user_email else True)):
                    offer_assignments.append(assignment)
            coupon_data['is_assigned'] = len(offer_assignments)
            # For the case when an unassigned voucher code is searched
            if len(offer_assignments) == 0:
                if not user_email:
                    _prepare_redemption_data(coupon_data)
            else:
                for offer_assignment in offer_assignments:
                    _prepare_redemption_data(coupon_data, offer_assignment)
        return redemptions_and_assignments
Пример #58
0
    def search(self, page_number: int = 1, page_size: int = 25) -> None:
        """
        Runs the search for this search and constructs
        :param page_number: The result page
        :param page_size: The number of items per page
        """
        queryset = self.get_queryset()
        print(str(queryset.query))
        self.paginator = Paginator(queryset, page_size)
        try:
            self.page = self.paginator.page(page_number)
        except EmptyPage:
            return
        cards = list(self.page)
        prefetch_related_objects(cards, "printings__face_printings")
        prefetch_related_objects(cards, "printings__localisations__ownerships")
        prefetch_related_objects(cards, "printings__localisations__language")
        prefetch_related_objects(cards,
                                 "printings__localisations__localised_faces")
        prefetch_related_objects(cards, "faces")
        prefetch_related_objects(cards, "printings__set")
        prefetch_related_objects(cards, "printings__rarity")

        preferred_set = self.get_preferred_set()
        self.results = [
            SearchResult(card, selected_set=preferred_set) for card in cards
        ]
Пример #59
0
def site(request: "HttpRequest") -> dict:
    """Add site settings to the context under the 'site' key."""
    site = get_current_site(request)
    if isinstance(site, Site):
        prefetch_related_objects([site], "settings__translations")
    return {"site": site}