Exemplo n.º 1
0
def _get_saved_variants_by_family(projects, user):
    tag_type = VariantTagType.objects.get(name='Known gene for phenotype')

    project_saved_variants = SavedVariant.objects.select_related(
        'family', 'project').filter(
            project__in=projects,
            varianttag__variant_tag_type=tag_type,
        )

    individuals = Individual.objects.filter(family__project__in=projects).only(
        'guid', 'individual_id')
    individual_guids_by_id = {i.individual_id: i.guid for i in individuals}
    project_saved_variants_json = get_json_for_saved_variants(
        project_saved_variants,
        add_tags=True,
        add_details=True,
        user=user,
        individual_guids_by_id=individual_guids_by_id)

    saved_variants_by_family = defaultdict(list)
    for variant in project_saved_variants_json:
        for family_guid in variant['familyGuids']:
            saved_variants_by_family[family_guid].append(variant)

    return saved_variants_by_family
Exemplo n.º 2
0
def _get_saved_variants(variants):
    if not variants:
        return {}

    variant_q = Q()
    for variant in variants:
        variant_q |= Q(xpos_start=variant['xpos'],
                       ref=variant['ref'],
                       alt=variant['alt'],
                       family__guid__in=variant['familyGuids'])
    saved_variants = SavedVariant.objects.filter(variant_q)

    variants_by_id = {
        '{}-{}-{}'.format(var['xpos'], var['ref'], var['alt']): var
        for var in variants
    }
    saved_variants_json = get_json_for_saved_variants(saved_variants,
                                                      add_tags=True)
    saved_variants_by_guid = {}
    for saved_variant in saved_variants_json:
        family_guids = saved_variant['familyGuids']
        saved_variant.update(variants_by_id['{}-{}-{}'.format(
            saved_variant['xpos'], saved_variant['ref'],
            saved_variant['alt'])])
        #  For saved variants only use family it was saved for, not all families in search
        saved_variant['familyGuids'] = family_guids
        saved_variants_by_guid[saved_variant['variantGuid']] = saved_variant

    return saved_variants_by_guid
Exemplo n.º 3
0
def _get_json_for_variant_tag_types(project, user, individuals_by_guid):
    individual_guids_by_id = {
        individual['individualId']: individual_guid for individual_guid, individual in individuals_by_guid.items()
    }

    tag_counts_by_type_and_family = VariantTag.objects.filter(saved_variant__project=project).values('saved_variant__family__guid', 'variant_tag_type__name').annotate(count=Count('*'))
    note_counts_by_family = VariantNote.objects.filter(saved_variant__project=project).values('saved_variant__family__guid').annotate(count=Count('*'))
    project_variant_tags = get_project_variant_tag_types(project, tag_counts_by_type_and_family=tag_counts_by_type_and_family, note_counts_by_family=note_counts_by_family)
    discovery_tags = []
    for tag_type in project_variant_tags:
        if tag_type['category'] == 'CMG Discovery Tags' and tag_type['numTags'] > 0:
            tags = VariantTag.objects.filter(saved_variant__project=project, variant_tag_type__guid=tag_type['variantTagTypeGuid']).select_related('saved_variant')
            saved_variants = [tag.saved_variant for tag in tags]
            discovery_tags += get_json_for_saved_variants(
                saved_variants, add_tags=True, add_details=True, project=project, user=user, individual_guids_by_id=individual_guids_by_id)

    project_functional_tags = []
    for category, tags in VariantFunctionalData.FUNCTIONAL_DATA_CHOICES:
        project_functional_tags += [{
            'category': category,
            'name': name,
            'metadataTitle': json.loads(tag_json).get('metadata_title'),
            'color': json.loads(tag_json)['color'],
            'description': json.loads(tag_json).get('description'),
        } for name, tag_json in tags]

    return {
        'variantTagTypes': sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order']),
        'variantFunctionalTagTypes': get_json_for_variant_functional_data_tag_types(),
        'discoveryTags': discovery_tags,
    }
Exemplo n.º 4
0
def get_individual_mme_matches(request, submission_guid):
    """
    Looks for matches for the given submission. Expects a single patient (MME spec) in the POST
    data field under key "patient_data"
    Args:
        project_id,indiv_id and POST all data in POST under key "patient_data"
    Returns:
        Status code and results
    """
    submission = MatchmakerSubmission.objects.get(guid=submission_guid)
    check_mme_permissions(submission, request.user)

    results = MatchmakerResult.objects.filter(submission=submission)

    saved_variants = get_json_for_saved_variants(
        SavedVariant.objects.filter(family=submission.individual.family),
        add_tags=True,
        add_details=True)

    gene_ids = set()
    for variant in saved_variants:
        gene_ids.update(variant['transcripts'].keys())

    return _parse_mme_results(
        submission,
        results,
        request.user,
        additional_genes=gene_ids,
        response_json={
            'savedVariantsByGuid':
            {variant['variantGuid']: variant
             for variant in saved_variants}
        })
Exemplo n.º 5
0
def saved_variant_data(request, project_guid, variant_guid=None):
    project = get_project_and_check_permissions(project_guid, request.user)
    family_guids = request.GET['families'].split(',') if request.GET.get(
        'families') else None

    if family_guids:
        variant_query = SavedVariant.objects.filter(
            family__guid__in=family_guids)
    else:
        variant_query = SavedVariant.objects.filter(family__project=project)
    if variant_guid:
        variant_query = variant_query.filter(guid=variant_guid)
        if variant_query.count() < 1:
            return create_json_response(
                {},
                status=404,
                reason='Variant {} not found'.format(variant_guid))

    saved_variants = get_json_for_saved_variants(variant_query,
                                                 add_tags=True,
                                                 add_details=True)
    variants = {
        variant['variantGuid']: variant
        for variant in saved_variants if variant['notes'] or variant['tags']
    }
    genes = _saved_variant_genes(variants.values())
    _add_locus_lists([project], variants.values(), genes)

    return create_json_response({
        'savedVariantsByGuid': variants,
        'genesById': genes,
    })
Exemplo n.º 6
0
def saved_variants(request, tag):
    tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
    saved_variant_models = SavedVariant.objects.filter(varianttag__variant_tag_type=tag_type, family__isnull=False)
    saved_variants = get_json_for_saved_variants(saved_variant_models, add_tags=True, add_details=True, user=request.user)

    project_models_by_guid = {variant.project.guid: variant.project for variant in saved_variant_models}
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    genes = _saved_variant_genes(saved_variants)
    locus_list_guids = _add_locus_lists(project_models_by_guid.values(), saved_variants, genes)

    projects_json = get_json_for_projects(project_models_by_guid.values(), user=request.user, add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    for project_json in projects_json:
        project_json.update({
            'locusListGuids': locus_list_guids,
            'variantTagTypes': get_project_variant_tag_types(project_models_by_guid[project_json['projectGuid']]),
            'variantFunctionalTagTypes': functional_tag_types,
        })

    families_json = _get_json_for_families(list(families), user=request.user, add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals, user=request.user)
    locus_lists_by_guid = {locus_list['locusListGuid']: locus_list for locus_list in
                           get_json_for_locus_lists(LocusList.objects.filter(guid__in=locus_list_guids), request.user)}

    return create_json_response({
        'savedVariantsByGuid': {variant['variantGuid']: variant for variant in saved_variants},
        'genesById': genes,
        'projectsByGuid': {project['projectGuid']: project for project in projects_json},
        'familiesByGuid': {family['familyGuid']: family for family in families_json},
        'individualsByGuid': {indiv['individualGuid']: indiv for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
Exemplo n.º 7
0
def _get_saved_discovery_variants_by_family(variant_filter, parse_json=False):
    tag_types = VariantTagType.objects.filter(project__isnull=True, category='CMG Discovery Tags')

    project_saved_variants = SavedVariant.objects.select_related('family').prefetch_related(
        Prefetch('varianttag_set', to_attr='discovery_tags',
                 queryset=VariantTag.objects.filter(variant_tag_type__in=tag_types).select_related('variant_tag_type'),
                 )).prefetch_related('variantfunctionaldata_set').filter(
        varianttag__variant_tag_type__in=tag_types,
        **variant_filter
    ).order_by('created_date').distinct()

    if parse_json:
        variant_by_guid = {variant['variantGuid']: variant for variant in
                           get_json_for_saved_variants(project_saved_variants, add_details=True)}

    saved_variants_by_family = defaultdict(list)
    for saved_variant in project_saved_variants:
        parsed_variant = saved_variant
        if parse_json:
            parsed_variant = variant_by_guid[saved_variant.guid]
            parsed_variant['discovery_tag_guids_by_name'] = {vt.variant_tag_type.name: vt.guid for vt in
                                                             saved_variant.discovery_tags}
        saved_variants_by_family[saved_variant.family.guid].append(parsed_variant)

    return saved_variants_by_family
Exemplo n.º 8
0
def _get_saved_variants(variants, families):
    if not variants:
        return {}, {}

    prefetch_related_objects(families, 'project')
    hg37_family_guids = {
        family.guid
        for family in families
        if family.project.genome_version == GENOME_VERSION_GRCh37
    }

    variant_q = Q()
    variants_by_id = {}
    for variant in variants:
        variants_by_id[_get_variant_key(**variant)] = variant
        variant_q |= Q(xpos_start=variant['xpos'],
                       ref=variant['ref'],
                       alt=variant['alt'],
                       family__guid__in=variant['familyGuids'])
        if variant[
                'liftedOverGenomeVersion'] == GENOME_VERSION_GRCh37 and hg37_family_guids:
            variant_hg37_families = [
                family_guid for family_guid in variant['familyGuids']
                if family_guid in hg37_family_guids
            ]
            if variant_hg37_families:
                lifted_xpos = get_xpos(variant['liftedOverChrom'],
                                       variant['liftedOverPos'])
                variant_q |= Q(xpos_start=lifted_xpos,
                               ref=variant['ref'],
                               alt=variant['alt'],
                               family__guid__in=variant_hg37_families)
                variants_by_id[_get_variant_key(
                    xpos=lifted_xpos,
                    ref=variant['ref'],
                    alt=variant['alt'],
                    genomeVersion=variant['liftedOverGenomeVersion']
                )] = variant
    saved_variants = SavedVariant.objects.filter(variant_q)

    saved_variants_json = get_json_for_saved_variants(saved_variants,
                                                      add_tags=True,
                                                      add_details=True)
    saved_variants_by_guid = {}
    variants_to_saved_variants = {}
    for saved_variant in saved_variants_json:
        family_guids = saved_variant['familyGuids']
        searched_variant = variants_by_id[_get_variant_key(**saved_variant)]
        saved_variant.update(searched_variant)
        #  For saved variants only use family it was saved for, not all families in search
        saved_variant['familyGuids'] = family_guids
        saved_variants_by_guid[saved_variant['variantGuid']] = saved_variant
        if searched_variant['variantId'] not in variants_to_saved_variants:
            variants_to_saved_variants[searched_variant['variantId']] = {}
        for family_guid in family_guids:
            variants_to_saved_variants[searched_variant['variantId']][
                family_guid] = saved_variant['variantGuid']

    return saved_variants_by_guid, variants_to_saved_variants
Exemplo n.º 9
0
def _get_json_for_variant_tag_types(project):
    note_counts_by_family = VariantNote.objects.filter(saved_variants__family__project=project)\
        .values('saved_variants__family__guid').annotate(count=Count('*'))
    num_tags = sum(count['count'] for count in note_counts_by_family)
    note_tag_type = {
        'variantTagTypeGuid': 'notes',
        'name': 'Has Notes',
        'category': 'Notes',
        'description': '',
        'color': 'grey',
        'order': 100,
        'numTags': num_tags,
        'numTagsPerFamily': {count['saved_variants__family__guid']: count['count'] for count in note_counts_by_family},
    }

    tag_counts_by_type_and_family = VariantTag.objects.filter(saved_variants__family__project=project)\
        .values('saved_variants__family__guid', 'variant_tag_type__name').annotate(count=Count('*'))
    project_variant_tags = _get_json_for_models(VariantTagType.objects.filter(Q(project=project) | Q(project__isnull=True)))
    for tag_type in project_variant_tags:
        current_tag_type_counts = [counts for counts in tag_counts_by_type_and_family if
                                   counts['variant_tag_type__name'] == tag_type['name']]
        num_tags = sum(count['count'] for count in current_tag_type_counts)
        tag_type.update({
            'numTags': num_tags,
            'numTagsPerFamily': {count['saved_variants__family__guid']: count['count'] for count in
                                 current_tag_type_counts},
        })

    project_variant_tags.append(note_tag_type)
    project_variant_tags = sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order'])

    discovery_tag_type_guids = [tag_type['variantTagTypeGuid'] for tag_type in project_variant_tags
                                if tag_type['category'] == 'CMG Discovery Tags' and tag_type['numTags'] > 0]
    discovery_tags = get_json_for_saved_variants(SavedVariant.objects.filter(
        family__project=project, varianttag__variant_tag_type__guid__in=discovery_tag_type_guids,
    ), add_details=True)

    project_functional_tags = []
    for category, tags in VariantFunctionalData.FUNCTIONAL_DATA_CHOICES:
        project_functional_tags += [{
            'category': category,
            'name': name,
            'metadataTitle': json.loads(tag_json).get('metadata_title'),
            'color': json.loads(tag_json)['color'],
            'description': json.loads(tag_json).get('description'),
        } for name, tag_json in tags]

    return {
        'variantTagTypes': sorted(project_variant_tags, key=lambda variant_tag_type: variant_tag_type['order']),
        'variantFunctionalTagTypes': get_json_for_variant_functional_data_tag_types(),
        'discoveryTags': discovery_tags,
    }
Exemplo n.º 10
0
def _get_saved_known_gene_variants_by_family(projects):
    tag_type = VariantTagType.objects.get(name='Known gene for phenotype')

    project_saved_variants = SavedVariant.objects.select_related(
        'family').filter(
            family__project__in=projects,
            varianttag__variant_tag_type=tag_type,
        )

    project_saved_variants_json = get_json_for_saved_variants(
        project_saved_variants, add_details=True)

    saved_variants_by_family = defaultdict(list)
    for variant in project_saved_variants_json:
        for family_guid in variant['familyGuids']:
            saved_variants_by_family[family_guid].append(variant)

    return saved_variants_by_family
Exemplo n.º 11
0
def _get_saved_variants_by_family(projects, user):
    tag_type = VariantTagType.objects.get(name='Known gene for phenotype')

    project_saved_variants = SavedVariant.objects.select_related('family', 'project').filter(
        project__in=projects,
        varianttag__variant_tag_type=tag_type,
    )

    individuals = Individual.objects.filter(family__project__in=projects).only('guid', 'individual_id')
    individual_guids_by_id = {i.individual_id: i.guid for i in individuals}
    project_saved_variants_json = get_json_for_saved_variants(
        project_saved_variants, add_tags=True, add_details=True, user=user, individual_guids_by_id=individual_guids_by_id)

    saved_variants_by_family = defaultdict(list)
    for variant in project_saved_variants_json:
        for family_guid in variant['familyGuids']:
            saved_variants_by_family[family_guid].append(variant)

    return saved_variants_by_family
Exemplo n.º 12
0
def _get_saved_variants(variants):
    if not variants:
        return {}

    variant_q = Q()
    for variant in variants:
        variant_q |= Q(xpos_start=variant['xpos'], ref=variant['ref'], alt=variant['alt'], family__guid__in=variant['familyGuids'])
    saved_variants = SavedVariant.objects.filter(variant_q)

    variants_by_id = {'{}-{}-{}'.format(var['xpos'], var['ref'], var['alt']): var for var in variants}
    saved_variants_json = get_json_for_saved_variants(saved_variants, add_tags=True)
    saved_variants_by_guid = {}
    for saved_variant in saved_variants_json:
        family_guids = saved_variant['familyGuids']
        saved_variant.update(
            variants_by_id['{}-{}-{}'.format(saved_variant['xpos'], saved_variant['ref'], saved_variant['alt'])]
        )
        #  For saved variants only use family it was saved for, not all families in search
        saved_variant['familyGuids'] = family_guids
        saved_variants_by_guid[saved_variant['variantGuid']] = saved_variant

    return saved_variants_by_guid
Exemplo n.º 13
0
def saved_variant_data(request, project_guid, variant_guid=None):
    project = get_project_and_check_permissions(project_guid, request.user)
    family_guids = request.GET['families'].split(',') if request.GET.get('families') else None

    variant_query = SavedVariant.objects.filter(project=project)
    if family_guids:
        variant_query = variant_query.filter(family__guid__in=family_guids)
    if variant_guid:
        variant_query = variant_query.filter(guid=variant_guid)
        if variant_query.count() < 1:
            return create_json_response({}, status=404, reason='Variant {} not found'.format(variant_guid))

    individual_guids_by_id = {i.individual_id: i.guid for i in Individual.objects.filter(family__project=project)}

    saved_variants = get_json_for_saved_variants(variant_query, add_tags=True, add_details=True, project=project,
                                                 user=request.user, individual_guids_by_id=individual_guids_by_id)
    variants = {variant['variantGuid']: variant for variant in saved_variants if variant['notes'] or variant['tags']}
    genes = _saved_variant_genes(variants.values())
    _add_locus_lists([project], variants.values(), genes)

    return create_json_response({
        'savedVariantsByGuid': variants,
        'genesById': genes,
    })
Exemplo n.º 14
0
    def handle(self, *args, **options):
        """transfer project"""
        project_arg = options['project']
        elasticsearch_index = options['es_index']

        project = Project.objects.get(
            Q(name=project_arg) | Q(guid=project_arg))
        logger.info('Updating project genome version for {}'.format(
            project.name))

        # Validate the provided index
        logger.info('Validating es index {}'.format(elasticsearch_index))
        sample_ids, index_metadata = get_elasticsearch_index_samples(
            elasticsearch_index)
        validate_index_metadata(index_metadata,
                                project,
                                elasticsearch_index,
                                genome_version=GENOME_VERSION_GRCh38)
        sample_type = index_metadata['sampleType']
        dataset_path = index_metadata['sourceFilePath']

        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping={},
        )

        unmatched_samples = set(sample_ids) - set(
            matched_sample_id_to_sample_record.keys())
        if len(unmatched_samples) > 0:
            raise CommandError(
                'Matches not found for ES sample ids: {}.'.format(
                    ', '.join(unmatched_samples)))

        prefetch_related_objects(matched_sample_id_to_sample_record.values(),
                                 'individual__family')
        included_families = {
            sample.individual.family
            for sample in matched_sample_id_to_sample_record.values()
        }
        missing_individuals = Individual.objects.filter(
            family__in=included_families,
            sample__is_active=True,
            sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        ).exclude(sample__in=matched_sample_id_to_sample_record.values()
                  ).select_related('family')
        missing_family_individuals = defaultdict(list)
        for individual in missing_individuals:
            missing_family_individuals[individual.family].append(individual)

        if missing_family_individuals:
            raise CommandError(
                'The following families are included in the callset but are missing some family members: {}.'
                .format(', '.join([
                    '{} ({})'.format(
                        family.family_id,
                        ', '.join([i.individual_id for i in missing_indivs]))
                    for family, missing_indivs in
                    missing_family_individuals.items()
                ])))

        # Get and clean up expected saved variants
        saved_variant_models_by_guid = {
            v.guid: v
            for v in SavedVariant.objects.filter(family__project=project)
        }
        deleted_no_tags = set()
        for guid, variant in saved_variant_models_by_guid.items():
            if not (variant.varianttag_set.count()
                    or variant.variantnote_set.count()):
                deleted_no_tags.add(guid)

        if deleted_no_tags:
            if raw_input(
                    'Do you want to delete the following {} saved variants with no tags (y/n)?: {} '
                    .format(len(deleted_no_tags),
                            ', '.join(deleted_no_tags))) == 'y':
                for guid in deleted_no_tags:
                    saved_variant_models_by_guid.pop(guid).delete()
                logger.info('Deleted {} variants'.format(len(deleted_no_tags)))

        expected_families = {
            sv.family
            for sv in saved_variant_models_by_guid.values()
        }
        missing_families = expected_families - included_families
        if missing_families:
            raise CommandError(
                'The following families have saved variants but are missing from the callset: {}.'
                .format(', '.join([f.family_id for f in missing_families])))

        # Lift-over saved variants
        _update_variant_samples(matched_sample_id_to_sample_record,
                                elasticsearch_index, dataset_path)
        saved_variants = get_json_for_saved_variants(
            saved_variant_models_by_guid.values(), add_details=True)
        saved_variants_to_lift = [
            v for v in saved_variants
            if v['genomeVersion'] != GENOME_VERSION_GRCh38
        ]

        num_already_lifted = len(saved_variants) - len(saved_variants_to_lift)
        if num_already_lifted:
            if raw_input(
                    'Found {} saved variants already on Hg38. Continue with liftover (y/n)? '
                    .format(num_already_lifted)) != 'y':
                raise CommandError(
                    'Error: found {} saved variants already on Hg38'.format(
                        num_already_lifted))
        logger.info(
            'Lifting over {} variants (skipping {} that are already lifted)'.
            format(len(saved_variants_to_lift), num_already_lifted))

        liftover_to_38 = LiftOver('hg19', 'hg38')
        hg37_to_hg38_xpos = {}
        lift_failed = {}
        for v in saved_variants_to_lift:
            if not (hg37_to_hg38_xpos.get(v['xpos'])
                    or v['xpos'] in lift_failed):
                hg38_coord = liftover_to_38.convert_coordinate(
                    'chr{}'.format(v['chrom'].lstrip('chr')), int(v['pos']))
                if hg38_coord and hg38_coord[0]:
                    hg37_to_hg38_xpos[v['xpos']] = get_xpos(
                        hg38_coord[0][0], hg38_coord[0][1])
                else:
                    lift_failed[v['xpos']] = v

        if lift_failed:
            if raw_input(
                    'Unable to lift over the following {} coordinates. Continue with update (y/n)?: {} '
                    .format(
                        len(lift_failed), ', '.join([
                            '{}:{}-{}-{} ({})'.format(
                                v['chrom'], v['pos'], v['ref'], v['alt'],
                                ', '.join(v['familyGuids']))
                            for v in lift_failed.values()
                        ]))) != 'y':
                raise CommandError(
                    'Error: unable to lift over {} variants'.format(
                        len(lift_failed)))

        saved_variants_map = defaultdict(list)
        for v in saved_variants_to_lift:
            if hg37_to_hg38_xpos.get(v['xpos']):
                variant_model = saved_variant_models_by_guid[v['variantGuid']]
                saved_variants_map[(hg37_to_hg38_xpos[v['xpos']], v['ref'],
                                    v['alt'])].append(variant_model)

        es_variants = get_es_variants_for_variant_tuples(
            expected_families, saved_variants_map.keys())

        missing_variants = set(
            saved_variants_map.keys()) - {(v['xpos'], v['ref'], v['alt'])
                                          for v in es_variants}
        if missing_variants:
            missing_variant_strings = []
            for xpos, ref, alt in missing_variants:
                var_id = '{}-{}-{}'.format(xpos, ref, alt)
                for v in saved_variants_map[(xpos, ref, alt)]:
                    tags = v.varianttag_set.all()
                    notes = v.variantnote_set.all()
                    missing_variant_strings.append(
                        '{var_id} {family_id}: {tags} ({guid})'.format(
                            var_id=var_id,
                            family_id=v.family.family_id,
                            guid=v.guid,
                            tags=', '.join([
                                tag.variant_tag_type.name for tag in tags
                            ]) if tags else 'No Tags; {}'.format('; '.join(
                                [note.note for note in notes]))))
            if raw_input(
                    'Unable to find the following {} variants in the index. Continue with update (y/n)?:\n{}\n'
                    .format(len(missing_variants),
                            '\n'.join(missing_variant_strings))) != 'y':
                raise CommandError(
                    'Error: unable to find {} lifted-over variants'.format(
                        len(missing_variants)))

        logger.info('Successfully lifted over {} variants'.format(
            len(es_variants)))

        #  Update saved variants
        missing_family_count = 0
        for var in es_variants:
            saved_variant_models = saved_variants_map[(var['xpos'], var['ref'],
                                                       var['alt'])]
            missing_saved_variants = [
                v for v in saved_variant_models
                if v.family.guid not in var['familyGuids']
            ]
            if missing_saved_variants:
                variant_id = '{}-{}-{}-{}'.format(var['chrom'], var['pos'],
                                                  var['ref'], var['alt'])
                if raw_input(
                    ('Variant {} (hg37: {}) not find for expected families {}. Continue with update (y/n)? '
                     .format(
                         variant_id, missing_saved_variants[0].xpos,
                         ', '.join([
                             '{} ({})'.format(v.family.guid, v.guid)
                             for v in missing_saved_variants
                         ])))) == 'y':
                    var = get_single_es_variant(
                        [v.family for v in saved_variant_models],
                        variant_id,
                        return_all_queried_families=True)
                    missing_family_count += len(missing_saved_variants)
                else:
                    raise CommandError(
                        'Error: unable to find family data for lifted over variant'
                    )
            for saved_variant in saved_variant_models:
                saved_variant.xpos_start = var['xpos']
                saved_variant.saved_variant_json = var
                saved_variant.save()

        logger.info('Successfully updated {} variants'.format(
            len(es_variants)))

        # Update project and sample data
        update_model_from_json(project,
                               {'genome_version': GENOME_VERSION_GRCh38})

        reset_cached_search_results(project)

        logger.info('---Done---')
        logger.info(
            'Succesfully lifted over {} variants. Skipped {} failed variants. Family data not updated for {} variants'
            .format(len(es_variants),
                    len(missing_variants) + len(lift_failed),
                    missing_family_count))
Exemplo n.º 15
0
def saved_variants_page(request, tag):
    gene = request.GET.get('gene')
    tag_type = VariantTagType.objects.get(name=tag, project__isnull=True)
    saved_variant_models = SavedVariant.objects.filter(
        varianttag__variant_tag_type=tag_type)
    if gene:
        saved_variant_models = saved_variant_models.filter(
            saved_variant_json__transcripts__has_key=gene)

    if saved_variant_models.count() > 10000 and not gene:
        return create_json_response(
            {'message': 'Select a gene to filter variants'}, status=400)

    prefetch_related_objects(saved_variant_models, 'family__project')
    saved_variants = get_json_for_saved_variants(saved_variant_models,
                                                 add_tags=True,
                                                 add_details=True)

    project_models_by_guid = {
        variant.family.project.guid: variant.family.project
        for variant in saved_variant_models
    }
    families = {variant.family for variant in saved_variant_models}
    individuals = Individual.objects.filter(family__in=families)

    genes = _saved_variant_genes(saved_variants)
    locus_list_guids = _add_locus_lists(project_models_by_guid.values(),
                                        saved_variants, genes)

    projects_json = get_json_for_projects(
        project_models_by_guid.values(),
        user=request.user,
        add_project_category_guids_field=False)
    functional_tag_types = get_json_for_variant_functional_data_tag_types()

    variant_tag_types = VariantTagType.objects.filter(
        Q(project__in=project_models_by_guid.values())
        | Q(project__isnull=True))
    prefetch_related_objects(variant_tag_types, 'project')
    variant_tags_json = _get_json_for_models(variant_tag_types)
    tag_projects = {
        vt.guid: vt.project.guid
        for vt in variant_tag_types if vt.project
    }

    for project_json in projects_json:
        project_guid = project_json['projectGuid']
        project_variant_tags = [
            vt for vt in variant_tags_json if tag_projects.get(
                vt['variantTagTypeGuid'], project_guid) == project_guid
        ]
        project_json.update({
            'locusListGuids':
            locus_list_guids,
            'variantTagTypes':
            sorted(project_variant_tags,
                   key=lambda variant_tag_type: variant_tag_type['order']),
            'variantFunctionalTagTypes':
            functional_tag_types,
        })

    families_json = _get_json_for_families(list(families),
                                           user=request.user,
                                           add_individual_guids_field=True)
    individuals_json = _get_json_for_individuals(individuals,
                                                 user=request.user)
    locus_lists_by_guid = {
        locus_list['locusListGuid']: locus_list
        for locus_list in get_json_for_locus_lists(
            LocusList.objects.filter(guid__in=locus_list_guids), request.user)
    }

    return create_json_response({
        'savedVariantsByGuid':
        {variant['variantGuid']: variant
         for variant in saved_variants},
        'genesById': genes,
        'projectsByGuid':
        {project['projectGuid']: project
         for project in projects_json},
        'familiesByGuid':
        {family['familyGuid']: family
         for family in families_json},
        'individualsByGuid':
        {indiv['individualGuid']: indiv
         for indiv in individuals_json},
        'locusListsByGuid': locus_lists_by_guid,
    })
Exemplo n.º 16
0
    def handle(self, *args, **options):
        """transfer project"""
        project_arg = options['project']
        elasticsearch_index = options['es_index']

        project = Project.objects.get(Q(name=project_arg) | Q(guid=project_arg))
        logger.info('Updating project genome version for {}'.format(project.name))

        # Validate the provided index
        logger.info('Validating es index {}'.format(elasticsearch_index))
        sample_ids, index_metadata = get_elasticsearch_index_samples(elasticsearch_index)
        validate_index_metadata(index_metadata, project, elasticsearch_index, genome_version=GENOME_VERSION_GRCh38)
        sample_type = index_metadata['sampleType']
        dataset_path = index_metadata['sourceFilePath']

        matched_sample_id_to_sample_record = match_sample_ids_to_sample_records(
            project=project,
            sample_ids=sample_ids,
            sample_type=sample_type,
            dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
            elasticsearch_index=elasticsearch_index,
            sample_id_to_individual_id_mapping={},
        )

        unmatched_samples = set(sample_ids) - set(matched_sample_id_to_sample_record.keys())
        if len(unmatched_samples) > 0:
            raise Exception('Matches not found for ES sample ids: {}.'.format(', '.join(unmatched_samples)))

        included_family_individuals = defaultdict(set)
        individual_guids_by_id = {}
        for sample in matched_sample_id_to_sample_record.values():
            included_family_individuals[sample.individual.family].add(sample.individual.individual_id)
            individual_guids_by_id[sample.individual.individual_id] = sample.individual.guid
        missing_family_individuals = []
        for family, individual_ids in included_family_individuals.items():
            missing_indivs = family.individual_set.filter(
                sample__sample_status=Sample.SAMPLE_STATUS_LOADED,
                sample__dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS
            ).exclude(individual_id__in=individual_ids)
            if missing_indivs:
                missing_family_individuals.append(
                    '{} ({})'.format(family.family_id, ', '.join([i.individual_id for i in missing_indivs]))
                )
        if missing_family_individuals:
            raise Exception(
                'The following families are included in the callset but are missing some family members: {}.'.format(
                    ', '.join(missing_family_individuals)
                ))

        # Get and clean up expected saved variants
        saved_variant_models_by_guid = {v.guid: v for v in SavedVariant.objects.filter(project=project)}
        deleted_no_family = set()
        deleted_no_tags = set()
        for guid, variant in saved_variant_models_by_guid.items():
            if not variant.family:
                deleted_no_family.add(guid)
            elif not (variant.varianttag_set.count() or variant.variantnote_set.count()):
                deleted_no_tags.add(guid)

        if deleted_no_family:
            if raw_input('Do you want to delete the following {} saved variants with no family (y/n)?: {} '.format(
                    len(deleted_no_family), ', '.join(deleted_no_family))) == 'y':
                for guid in deleted_no_family:
                    saved_variant_models_by_guid.pop(guid).delete()
                logger.info('Deleted {} variants'.format(len(deleted_no_family)))

        if deleted_no_tags:
            if raw_input('Do you want to delete the following {} saved variants with no tags (y/n)?: {} '.format(
                    len(deleted_no_tags), ', '.join(deleted_no_tags))) == 'y':
                for guid in deleted_no_tags:
                    saved_variant_models_by_guid.pop(guid).delete()
                logger.info('Deleted {} variants'.format(len(deleted_no_tags)))

        expected_families = {sv.family for sv in saved_variant_models_by_guid.values()}
        missing_families = expected_families - set(included_family_individuals.keys())
        if missing_families:
            raise Exception(
                'The following families have saved variants but are missing from the callset: {}.'.format(
                    ', '.join([f.family_id for f in missing_families])
                ))

        # Lift-over saved variants
        saved_variants = get_json_for_saved_variants(
            saved_variant_models_by_guid.values(), add_details=True, project=project,
            individual_guids_by_id=individual_guids_by_id)
        saved_variants_to_lift = [v for v in saved_variants if v['genomeVersion'] != GENOME_VERSION_GRCh38]

        num_already_lifted = len(saved_variants) - len(saved_variants_to_lift)
        if num_already_lifted:
            if raw_input('Found {} saved variants already on Hg38. Continue with liftover (y/n)?'.format(num_already_lifted)) != 'y':
                raise Exception('Error: found {} saved variants already on Hg38'.format(num_already_lifted))
        logger.info('Lifting over {} variants (skipping {} that are already lifted)'.format(
            len(saved_variants_to_lift), num_already_lifted))

        liftover_to_38 = LiftOver('hg19', 'hg38')
        hg37_to_hg38_xpos = {}
        lift_failed = set()
        for v in saved_variants_to_lift:
            if not (hg37_to_hg38_xpos.get(v['xpos']) or v['xpos'] in lift_failed):
                hg38_coord = liftover_to_38.convert_coordinate('chr{}'.format(v['chrom'].lstrip('chr')), int(v['pos']))
                if hg38_coord and hg38_coord[0]:
                    hg37_to_hg38_xpos[v['xpos']] = get_xpos(hg38_coord[0][0], hg38_coord[0][1])
                else:
                    lift_failed.add(v['xpos'])

        if lift_failed:
            raise Exception(
                'Unable to lift over the following {} coordinates: {}'.format(len(lift_failed), ', '.join(lift_failed)))

        saved_variants_map = defaultdict(list)
        for v in saved_variants_to_lift:
            variant_model = saved_variant_models_by_guid[v['variantGuid']]
            saved_variants_map[(hg37_to_hg38_xpos[v['xpos']], v['ref'], v['alt'])].append(variant_model)

        es_variants = get_es_variants_for_variant_tuples(expected_families, saved_variants_map.keys())

        missing_variants = set(saved_variants_map.keys()) - {(v['xpos'], v['ref'], v['alt']) for v in es_variants}
        if missing_variants:
            missing_variant_strings = ['{}-{}-{} ({})'.format(
                xpos, ref, alt,
                ', '.join(['{}: {}'.format(v.family.family_id, v.guid) for v in saved_variants_map[(xpos, ref, alt)]]))
                for xpos, ref, alt in missing_variants]
            if raw_input('Unable to find the following {} variants in the index. Continue with update (y/n)?: {} '.format(
                    len(missing_variants), ', '.join(missing_variant_strings))) != 'y':
                raise Exception('Error: unable to find {} lifted-over variants'.format(len(missing_variants)))

        logger.info('Successfully lifted over {} variants'.format(len(es_variants)))

        #  Update saved variants
        for var in es_variants:
            saved_variant_models = saved_variants_map[(var['xpos'], var['ref'], var['alt'])]
            missing_families = [v.family.guid for v in saved_variant_models if v.family.guid not in var['familyGuids']]
            if missing_families:
                raise Exception('Error with variant {}:{}-{}-{} not find for expected families {}; found in families {}'.format(
                    var['chrom'], var['pos'], var['ref'], var['alt'], ', '.join(missing_families), ', '.join(var['familyGuids'])
                ))
            for saved_variant in saved_variant_models:
                saved_variant.xpos_start = var['xpos']
                saved_variant.saved_variant_json = json.dumps(var)
                saved_variant.save()

        logger.info('Successfully updated {} variants'.format(len(es_variants)))

        # Update project and sample data
        update_model_from_json(project, {'genome_version': GENOME_VERSION_GRCh38, 'has_new_search': True})
        _update_samples(
            matched_sample_id_to_sample_record, elasticsearch_index=elasticsearch_index, dataset_path=dataset_path
        )
        update_xbrowse_vcfffiles(
            project, sample_type, elasticsearch_index, dataset_path, matched_sample_id_to_sample_record
        )

        reset_cached_search_results(project)

        logger.info('---Done---')
        logger.info('Succesfully lifted over {} variants. Skipped {} failed variants.'.format(
            len(es_variants), len(missing_variants)))