示例#1
0
def _get_json_for_user(user, is_anvil=None, fields=None):
    """Returns JSON representation of the given User object

    Args:
        user (object): Django user model

    Returns:
        dict: json object
    """

    if hasattr(user, '_wrapped'):
        user = user._wrapped  # Django request.user actually stores the Django User objects in a ._wrapped attribute

    model_fields = [field for field in fields if field in MODEL_USER_FIELDS
                    ] if fields else MODEL_USER_FIELDS
    computed_fields = [
        field for field in fields if field in COMPUTED_USER_FIELDS
    ] if fields else COMPUTED_USER_FIELDS

    user_json = {
        _to_camel_case(field): getattr(user, field)
        for field in model_fields
    }
    user_json.update({
        _to_camel_case(field): COMPUTED_USER_FIELDS[field](user,
                                                           is_anvil=is_anvil)
        for field in computed_fields
    })
    return user_json
示例#2
0
def _get_json_for_models(models,
                         nested_fields=None,
                         user=None,
                         process_result=None,
                         guid_key=None):
    """Returns an array JSON representations of the given models.

    Args:
        models (array): Array of django models
        user (object): Django User object for determining whether to include restricted/internal-only fields
        nested_fields (array): Optional array of fields to get from the model that are nested on related objects
        process_result (lambda): Optional function to post-process a given model json
        guid_key (string): Optional key to use for the model's guid
    Returns:
        array: json objects
    """

    if not models:
        return []

    model_class = type(models[0])
    fields = copy(model_class._meta.json_fields)
    if user and user.is_staff:
        fields += getattr(model_class._meta, 'internal_json_fields', [])

    for nested_field in nested_fields or []:
        if not nested_field.get('value'):
            prefetch_related_objects(models,
                                     '__'.join(nested_field['fields'][:-1]))

    results = []
    for model in models:
        result = {
            _to_camel_case(field): getattr(model, field)
            for field in fields
        }
        for nested_field in (nested_fields or []):
            field_value = nested_field.get('value')
            if not field_value:
                field_value = model
                for field in nested_field['fields']:
                    field_value = getattr(field_value,
                                          field) if field_value else None
            result[nested_field.get(
                'key', _to_camel_case('_'.join(
                    nested_field['fields'])))] = field_value

        if result.get('guid'):
            guid_key = guid_key or '{}{}Guid'.format(
                model_class.__name__[0].lower(), model_class.__name__[1:])
            result[guid_key] = result.pop('guid')
        if result.get('createdBy'):
            result['createdBy'] = result['createdBy'].get_full_name(
            ) or result['createdBy'].email
        if process_result:
            process_result(result, model)
        results.append(result)

    return results
示例#3
0
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'shards', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field] for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))]

    index_fields = ['index', 'docs.count', 'store.size', 'creation.date.string']
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field] for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
        if all(not index['index'].startswith(omit_prefix) for omit_prefix in ['.', 'index_operations_log'])]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    index_metadata = get_index_metadata('_all', client, use_cache=False)

    active_samples = Sample.objects.filter(is_active=True).select_related('individual__family__project')

    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in active_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index.update(index_metadata[index_name])

        projects_for_index = []
        for index_prefix in list(seqr_index_projects.keys()):
            if index_name.startswith(index_prefix):
                projects_for_index += list(seqr_index_projects.pop(index_prefix).keys())
        index['projects'] = [{'projectGuid': project.guid, 'projectName': project.name} for project in projects_for_index]

    errors = ['{} does not exist and is used by project(s) {}'.format(
        index, ', '.join(['{} ({} samples)'.format(p.name, len(indivs)) for p, indivs in project_individuals.items()])
    ) for index, project_individuals in seqr_index_projects.items() if project_individuals]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'errors': errors,
    })
示例#4
0
def _get_es_meta(client, meta_type, fields, filter_rows=None):
    return [{
        _to_camel_case(field.replace('.', '_')): o[field]
        for field in fields
    } for o in getattr(client.cat, meta_type)(format="json",
                                              h=','.join(fields))
            if filter_rows is None or filter_rows(o)]
示例#5
0
def _get_json_for_individuals(individuals, user=None, project_guid=None, family_guid=None, add_sample_guids_field=False, family_fields=None):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """

    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(modified_by, 'email') else modified_by

    def _load_phenotips_data(phenotips_data):
        phenotips_json = None
        if phenotips_data:
            try:
                phenotips_json = json.loads(phenotips_data)
            except Exception as e:
                logger.error("Couldn't parse phenotips: {}".format(e))
        return phenotips_json

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy': _get_case_review_status_modified_by(result.get('caseReviewStatusLastModifiedBy')),
            'phenotipsData': _load_phenotips_data(result['phenotipsData']),
            'maternalGuid': mother.guid if mother else None,
            'paternalGuid': father.guid if father else None,
            'maternalId': mother.individual_id if mother else None,
            'paternalId': father.individual_id if father else None,
            'displayName': result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [s.guid for s in individual.sample_set.all()]

    nested_fields = [
        {'fields': ('family', 'guid'), 'value': family_guid},
        {'fields': ('family', 'project', 'guid'), 'key': 'projectGuid', 'value': project_guid},
    ]
    if family_fields:
        for field in family_fields:
            nested_fields.append({'fields': ('family', field), 'key': _to_camel_case(field)})

    prefetch_related_objects(individuals, 'family')
    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    prefetch_related_objects(individuals, 'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')

    return _get_json_for_models(individuals, nested_fields=nested_fields, user=user, process_result=_process_result)
示例#6
0
def _get_json_for_individuals(individuals, user=None, project_guid=None, family_guid=None, add_sample_guids_field=False, family_fields=None):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """

    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(modified_by, 'email') else modified_by

    def _load_phenotips_data(phenotips_data):
        phenotips_json = None
        if phenotips_data:
            try:
                phenotips_json = json.loads(phenotips_data)
            except Exception as e:
                logger.error("Couldn't parse phenotips: {}".format(e))
        return phenotips_json

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy': _get_case_review_status_modified_by(result.get('caseReviewStatusLastModifiedBy')),
            'phenotipsData': _load_phenotips_data(result['phenotipsData']),
            'maternalGuid': mother.guid if mother else None,
            'paternalGuid': father.guid if father else None,
            'maternalId': mother.individual_id if mother else None,
            'paternalId': father.individual_id if father else None,
            'displayName': result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [s.guid for s in individual.sample_set.all()]

    nested_fields = [
        {'fields': ('family', 'guid'), 'value': family_guid},
        {'fields': ('family', 'project', 'guid'), 'key': 'projectGuid', 'value': project_guid},
    ]
    if family_fields:
        for field in family_fields:
            nested_fields.append({'fields': ('family', field), 'key': _to_camel_case(field)})

    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    prefetch_related_objects(individuals, 'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')

    return _get_json_for_models(individuals, nested_fields=nested_fields, user=user, process_result=_process_result)
示例#7
0
def _get_json_for_models(models, nested_fields=None, user=None, process_result=None, guid_key=None):
    """Returns an array JSON representations of the given models.

    Args:
        models (array): Array of django models
        user (object): Django User object for determining whether to include restricted/internal-only fields
        nested_fields (array): Optional array of fields to get from the model that are nested on related objects
        process_result (lambda): Optional function to post-process a given model json
        guid_key (string): Optional key to use for the model's guid
    Returns:
        array: json objects
    """

    if not models:
        return []

    model_class = type(models[0])
    fields = copy(model_class._meta.json_fields)
    if user and user.is_staff:
        fields += getattr(model_class._meta, 'internal_json_fields', [])

    results = []
    for model in models:
        result = {_to_camel_case(field): getattr(model, field) for field in fields}
        for nested_field in (nested_fields or []):
            field_value = nested_field.get('value')
            if not field_value:
                field_value = model
                for field in nested_field['fields']:
                    field_value = getattr(field_value, field) if field_value else None
            result[nested_field.get('key', _to_camel_case('_'.join(nested_field['fields'])))] = field_value

        if result.get('guid'):
            guid_key = guid_key or '{}{}Guid'.format(model_class.__name__[0].lower(), model_class.__name__[1:])
            result[guid_key] = result.pop('guid')
        if result.get('createdBy'):
            result['createdBy'] = result['createdBy'].get_full_name() or result['createdBy'].email
        if process_result:
            process_result(result, model)
        results.append(result)

    return results
示例#8
0
def _add_sample_type_counts(cursor, projects_by_guid):
    """Retrieves per-family analysis status counts from the database and adds these to each project
    in the 'projects_by_guid' dictionary.

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_by_guid (dict): projects for which to add analysis counts
    """

    if len(projects_by_guid) == 0:
        return {}

    sample_type_counts_query = """
        SELECT
          p.guid AS project_guid,
          s.sample_type AS sample_type,
          COUNT(distinct s.individual_id) AS num_samples
        FROM seqr_sample AS s
          JOIN seqr_individual AS i ON s.individual_id=i.id
          JOIN seqr_family AS f ON i.family_id=f.id
          JOIN seqr_project AS p ON f.project_id=p.id
        {projects_WHERE_clause}
        AND dataset_type='{variant_dataset_type}'
        AND sample_status='{loaded_sample_status}'
        GROUP BY p.guid, s.sample_type
    """.strip().format(
        projects_WHERE_clause=_to_WHERE_clause(
            [guid for guid in projects_by_guid]),
        variant_dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        loaded_sample_status=Sample.SAMPLE_STATUS_LOADED,
    )

    cursor.execute(sample_type_counts_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]
    for row in cursor.fetchall():
        record = dict(zip(columns, row))
        project_guid = record['projectGuid']
        sample_type = record['sampleType']
        num_samples = record['numSamples']

        if project_guid not in projects_by_guid:
            continue  # defensive programming

        if 'sampleTypeCounts' not in projects_by_guid[project_guid]:
            projects_by_guid[project_guid]['sampleTypeCounts'] = {}

        projects_by_guid[project_guid]['sampleTypeCounts'][
            sample_type] = num_samples
示例#9
0
def get_json_for_sample_dict(sample_dict):
    """Returns a JSON representation of the given Sample dictionary.

        Args:
            sample (object): dictionary representation for the Sample.
        Returns:
            dict: json object
        """
    result = {_to_camel_case(field): sample_dict.get('sample_{}'.format(field)) for field in Sample._meta.json_fields}

    result.update({
        'projectGuid': sample_dict['project_guid'],
        'individualGuid': sample_dict['individual_guid'],
        'sampleGuid': result.pop('guid'),
    })
    return result
示例#10
0
def get_json_for_sample_dict(sample_dict):
    """Returns a JSON representation of the given Sample dictionary.

        Args:
            sample (object): dictionary representation for the Sample.
        Returns:
            dict: json object
        """
    result = {_to_camel_case(field): sample_dict.get('sample_{}'.format(field)) for field in Sample._meta.json_fields}

    result.update({
        'projectGuid': sample_dict['project_guid'],
        'individualGuid': sample_dict['individual_guid'],
        'sampleGuid': result.pop('guid'),
    })
    return result
示例#11
0
def _add_sample_type_counts(cursor, projects_by_guid, user_is_staff=False):
    """Retrieves per-family analysis status counts from the database and adds these to each project
    in the 'projects_by_guid' dictionary.

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_by_guid (dict): projects for which to add analysis counts
    """

    if len(projects_by_guid) == 0:
        return {}

    projects_WHERE_clause = ""
    if not user_is_staff:
        projects_WHERE_clause = _to_WHERE_clause(
            [guid for guid in projects_by_guid])

    sample_type_counts_query = """
        SELECT
          p.guid AS project_guid,
          s.sample_type AS sample_type,
          COUNT(*) AS num_samples
        FROM seqr_sample AS s
          JOIN seqr_individual AS i ON s.individual_id=i.id
          JOIN seqr_family AS f ON i.family_id=f.id
          JOIN seqr_project AS p ON f.project_id=p.id
        %(projects_WHERE_clause)s
        GROUP BY p.guid, s.sample_type
    """.strip() % locals()

    cursor.execute(sample_type_counts_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]
    for row in cursor.fetchall():
        record = dict(zip(columns, row))
        project_guid = record['projectGuid']
        sample_type = record['sampleType']
        num_samples = record['numSamples']

        if project_guid not in projects_by_guid:
            continue  # defensive programming

        if 'sampleTypeCounts' not in projects_by_guid[project_guid]:
            projects_by_guid[project_guid]['sampleTypeCounts'] = {}

        projects_by_guid[project_guid]['sampleTypeCounts'][
            sample_type] = num_samples
示例#12
0
def _add_sample_type_counts(cursor, projects_by_guid):
    """Retrieves per-family analysis status counts from the database and adds these to each project
    in the 'projects_by_guid' dictionary.

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_by_guid (dict): projects for which to add analysis counts
    """

    if len(projects_by_guid) == 0:
        return {}

    sample_type_counts_query = """
        SELECT
          p.guid AS project_guid,
          s.sample_type AS sample_type,
          COUNT(distinct s.individual_id) AS num_samples
        FROM seqr_sample AS s
          JOIN seqr_individual AS i ON s.individual_id=i.id
          JOIN seqr_family AS f ON i.family_id=f.id
          JOIN seqr_project AS p ON f.project_id=p.id
        {projects_WHERE_clause}
        AND dataset_type='{variant_dataset_type}'
        AND sample_status='{loaded_sample_status}'
        GROUP BY p.guid, s.sample_type
    """.strip().format(
        projects_WHERE_clause=_to_WHERE_clause([guid for guid in projects_by_guid]),
        variant_dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        loaded_sample_status=Sample.SAMPLE_STATUS_LOADED,
    )

    cursor.execute(sample_type_counts_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]
    for row in cursor.fetchall():
        record = dict(zip(columns, row))
        project_guid = record['projectGuid']
        sample_type = record['sampleType']
        num_samples = record['numSamples']

        if project_guid not in projects_by_guid:
            continue  # defensive programming

        if 'sampleTypeCounts' not in projects_by_guid[project_guid]:
            projects_by_guid[project_guid]['sampleTypeCounts'] = {}

        projects_by_guid[project_guid]['sampleTypeCounts'][sample_type] = num_samples
示例#13
0
def _get_json_for_user(user):
    """Returns JSON representation of the given User object

    Args:
        user (object): Django user model

    Returns:
        dict: json object
    """

    if hasattr(user, '_wrapped'):
        user = user._wrapped   # Django request.user actually stores the Django User objects in a ._wrapped attribute

    user_json = {_to_camel_case(field): getattr(user, field) for field in
                ['username', 'email', 'first_name', 'last_name', 'last_login', 'is_staff', 'date_joined', 'id']}
    user_json['displayName'] = user.get_full_name()
    return user_json
示例#14
0
def _get_json_for_user(user):
    """Returns JSON representation of the given User object

    Args:
        user (object): Django user model

    Returns:
        dict: json object
    """

    if hasattr(user, '_wrapped'):
        user = user._wrapped   # Django request.user actually stores the Django User objects in a ._wrapped attribute

    user_json = {_to_camel_case(field): getattr(user, field) for field in
                ['username', 'email', 'first_name', 'last_name', 'last_login', 'is_staff', 'date_joined']}
    user_json['displayName'] = user.get_full_name()
    return user_json
示例#15
0
def _get_json_for_individuals(individuals,
                              user=None,
                              project_guid=None,
                              family_guid=None,
                              add_sample_guids_field=False,
                              family_fields=None,
                              skip_nested=False,
                              add_hpo_details=False,
                              is_analyst=None,
                              has_case_review_perm=None):
    """Returns a JSON representation for the given list of Individuals.

    Args:
        individuals (array): array of django models for the individual.
        user (object): Django User object for determining whether to include restricted/internal-only fields
        project_guid (string): An optional field to use as the projectGuid instead of querying the DB
        family_guid (boolean): An optional field to use as the familyGuid instead of querying the DB
        add_sample_guids_field (boolean): A flag to indicate weather sample ids should be added
    Returns:
        array: array of json objects
    """

    if not individuals:
        return []

    def _get_case_review_status_modified_by(modified_by):
        return modified_by.email or modified_by.username if hasattr(
            modified_by, 'email') else modified_by

    def _process_result(result, individual):
        mother = result.pop('mother', None)
        father = result.pop('father', None)

        result.update({
            'caseReviewStatusLastModifiedBy':
            _get_case_review_status_modified_by(
                result.get('caseReviewStatusLastModifiedBy')),
            'maternalGuid':
            mother.guid if mother else None,
            'paternalGuid':
            father.guid if father else None,
            'maternalId':
            mother.individual_id if mother else None,
            'paternalId':
            father.individual_id if father else None,
            'displayName':
            result['displayName'] or result['individualId'],
        })

        if add_sample_guids_field:
            result['sampleGuids'] = [
                s.guid for s in individual.sample_set.all()
            ]
            result['igvSampleGuids'] = [
                s.guid for s in individual.igvsample_set.all()
            ]

    kwargs = {
        'additional_model_fields':
        _get_case_review_fields(individuals[0], has_case_review_perm, user,
                                lambda indiv: indiv.family.project)
    }
    if project_guid or not skip_nested:
        nested_fields = [
            {
                'fields': ('family', 'guid'),
                'value': family_guid
            },
            {
                'fields': ('family', 'project', 'guid'),
                'key': 'projectGuid',
                'value': project_guid
            },
        ]
        if family_fields:
            for field in family_fields:
                nested_fields.append({
                    'fields': ('family', field),
                    'key': _to_camel_case(field)
                })
        kwargs.update({'nested_fields': nested_fields})
    else:
        kwargs['additional_model_fields'].append('family_id')

    if add_hpo_details:
        kwargs['additional_model_fields'] += [
            'features', 'absent_features', 'nonstandard_features',
            'absent_nonstandard_features'
        ]

    prefetch_related_objects(individuals, 'mother')
    prefetch_related_objects(individuals, 'father')
    if 'case_review_status_last_modified_by' in kwargs[
            'additional_model_fields']:
        prefetch_related_objects(individuals,
                                 'case_review_status_last_modified_by')
    if add_sample_guids_field:
        prefetch_related_objects(individuals, 'sample_set')
        prefetch_related_objects(individuals, 'igvsample_set')

    parsed_individuals = _get_json_for_models(individuals,
                                              user=user,
                                              is_analyst=is_analyst,
                                              process_result=_process_result,
                                              **kwargs)
    if add_hpo_details:
        all_hpo_ids = set()
        for i in parsed_individuals:
            all_hpo_ids.update(
                [feature['id'] for feature in i.get('features') or []])
            all_hpo_ids.update(
                [feature['id'] for feature in i.get('absentFeatures') or []])
        hpo_terms_by_id = {
            hpo.hpo_id: hpo
            for hpo in HumanPhenotypeOntology.objects.filter(
                hpo_id__in=all_hpo_ids)
        }
        for i in parsed_individuals:
            for feature in i.get('features') or []:
                hpo = hpo_terms_by_id.get(feature['id'])
                if hpo:
                    feature.update({
                        'category': hpo.category_id,
                        'label': hpo.name
                    })
            for feature in i.get('absentFeatures') or []:
                hpo = hpo_terms_by_id.get(feature['id'])
                if hpo:
                    feature.update({
                        'category': hpo.category_id,
                        'label': hpo.name
                    })

    return parsed_individuals
示例#16
0
def _get_empty_json_for_model(model_class):
    return {
        _to_camel_case(field): None
        for field in model_class._meta.json_fields
    }
示例#17
0
    'is_anvil':
    lambda user, is_anvil=None, **kwargs: is_anvil_authenticated(user)
    if is_anvil is None else is_anvil,
    'display_name':
    lambda user, **kwargs: user.get_full_name(),
    'is_analyst':
    lambda user, analyst_users=None, **kwargs: user in analyst_users
    if analyst_users is not None else user_is_analyst(user),
    'is_data_manager':
    lambda user, **kwargs: user_is_data_manager(user),
    'is_pm':
    lambda user, pm_users=None, **kwargs: user in pm_users
    if pm_users is not None else user_is_pm(user),
}

DEFAULT_USER = {_to_camel_case(field): '' for field in MAIN_USER_FIELDS}
DEFAULT_USER.update(
    {_to_camel_case(field): val
     for field, val in BOOL_USER_FIELDS.items()})
DEFAULT_USER.update(
    {_to_camel_case(field): False
     for field in COMPUTED_USER_FIELDS.keys()})


def _get_json_for_user(user,
                       is_anvil=None,
                       fields=None,
                       analyst_users=None,
                       pm_users=None):
    """Returns JSON representation of the given User object
示例#18
0
def _retrieve_projects_by_guid(cursor, projects_user_can_view, projects_user_can_edit):
    """Retrieves all relevant metadata for each project from the database, and returns a 'projects_by_guid' dictionary.

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_user_can_view (list): list of Django Project objects for which the user has CAN_VIEW permissions.
        projects_user_can_edit (list): list of Django Project objects for which the user has CAN_EDIT permissions.
    Returns:
        Dictionary that maps each project's GUID to a dictionary of key-value pairs representing
        attributes of that project.
    """

    if len(projects_user_can_view) == 0:
        return {}

    # get all projects this user has permissions to view
    projects_WHERE_clause = _to_WHERE_clause([p.guid for p in projects_user_can_view])

    # use raw SQL to avoid making N+1 queries.
    num_families_subquery = """
      SELECT count(*) FROM seqr_family
        WHERE project_id=p.id
    """.strip()

    num_variant_tags_subquery = """
      SELECT count(*) FROM seqr_varianttag AS v
        JOIN seqr_savedvariant AS s ON v.saved_variant_id=s.id
        WHERE project_id=p.id
    """.strip()

    num_individuals_subquery = """
      SELECT count(*) FROM seqr_individual AS i
        JOIN seqr_family AS f ON i.family_id=f.id
        WHERE f.project_id=p.id
    """.strip()

    project_fields = ', '.join(Project._meta.json_fields)

    projects_query = """
      SELECT
        guid AS project_guid,
        {project_fields},
        ({num_variant_tags_subquery}) AS num_variant_tags,
        ({num_families_subquery}) AS num_families,
        ({num_individuals_subquery}) AS num_individuals
      FROM seqr_project AS p
      {projects_WHERE_clause}
    """.strip().format(
        project_fields=project_fields, num_variant_tags_subquery=num_variant_tags_subquery,
        num_families_subquery=num_families_subquery, num_individuals_subquery=num_individuals_subquery,
        projects_WHERE_clause=projects_WHERE_clause
    )

    cursor.execute(projects_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]

    projects_by_guid = {
        r['projectGuid']: r for r in (dict(zip(columns, row)) for row in cursor.fetchall())
    }

    # mark all projects where this user has edit permissions
    for project in projects_user_can_edit:
        projects_by_guid[project.guid]['canEdit'] = True

    return projects_by_guid
示例#19
0
def _gene_json(gene):
    gene['constraints'] = _parse_gene_constraints(gene)
    gene = {_to_camel_case(k): v for k, v in gene.items()}
    gene['phenotypeInfo'] = {_to_camel_case(k): v for k, v in gene.get('phenotypeInfo', {}).items()}
    return gene
示例#20
0
文件: staff_api.py 项目: lecb/seqr
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field]
        for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))
                   ]

    index_fields = [
        'index', 'docs.count', 'store.size', 'creation.date.string'
    ]
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field]
        for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
               if index['index'] not in ['.kibana', 'index_operations_log']]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    mappings = Index('_all', using=client).get_mapping(doc_type='variant')

    latest_loaded_samples = get_latest_loaded_samples()
    prefetch_related_objects(latest_loaded_samples,
                             'individual__family__project')
    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in latest_loaded_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(
                        sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(
                    sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index_mapping = mappings[index_name]['mappings']['variant']
        index.update(index_mapping.get('_meta', {}))
        index['hasNestedGenotypes'] = 'samples_num_alt_1' in index_mapping[
            'properties']

        projects_for_index = []
        for index_prefix in seqr_index_projects.keys():
            if index_name.startswith(index_prefix):
                projects_for_index += seqr_index_projects.pop(
                    index_prefix).keys()
        index['projects'] = [{
            'projectGuid': project.guid,
            'projectName': project.name
        } for project in projects_for_index]

    errors = [
        '{} does not exist and is used by project(s) {}'.format(
            index, ', '.join([
                '{} ({} samples)'.format(p.name, len(indivs))
                for p, indivs in project_individuals.items()
            ])) for index, project_individuals in seqr_index_projects.items()
        if project_individuals
    ]

    # TODO remove once all projects are switched off of mongo
    all_mongo_samples = Sample.objects.filter(
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_status=Sample.SAMPLE_STATUS_LOADED,
        elasticsearch_index__isnull=True,
    ).exclude(individual__family__project__in=es_projects).prefetch_related(
        'individual', 'individual__family__project')
    mongo_sample_individual_max_loaded_date = {
        agg['individual__guid']: agg['max_loaded_date']
        for agg in all_mongo_samples.values('individual__guid').annotate(
            max_loaded_date=Max('loaded_date'))
    }
    mongo_project_samples = defaultdict(set)
    for s in all_mongo_samples:
        if s.loaded_date == mongo_sample_individual_max_loaded_date[
                s.individual.guid]:
            mongo_project_samples[s.individual.family.project].add(
                s.dataset_file_path)
    mongo_projects = [{
        'projectGuid': project.guid,
        'projectName': project.name,
        'sourceFilePaths': sample_file_paths
    } for project, sample_file_paths in mongo_project_samples.items()]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'mongoProjects': mongo_projects,
        'errors': errors,
    })
示例#21
0
def _retrieve_projects_by_guid(cursor, projects_user_can_view, projects_user_can_edit):
    """Retrieves all relevant metadata for each project from the database, and returns a 'projects_by_guid' dictionary.

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_user_can_view (list): list of Django Project objects for which the user has CAN_VIEW permissions.
        projects_user_can_edit (list): list of Django Project objects for which the user has CAN_EDIT permissions.
    Returns:
        Dictionary that maps each project's GUID to a dictionary of key-value pairs representing
        attributes of that project.
    """

    if len(projects_user_can_view) == 0:
        return {}

    # get all projects this user has permissions to view
    projects_WHERE_clause = _to_WHERE_clause([p.guid for p in projects_user_can_view])

    # use raw SQL to avoid making N+1 queries.
    num_families_subquery = """
      SELECT count(*) FROM seqr_family
        WHERE project_id=p.id
    """.strip()

    num_variant_tags_subquery = """
      SELECT count(*) FROM seqr_varianttag AS v
        JOIN seqr_varianttagtype AS t ON v.variant_tag_type_id=t.id
        WHERE project_id=p.id
    """.strip()

    num_individuals_subquery = """
      SELECT count(*) FROM seqr_individual AS i
        JOIN seqr_family AS f ON i.family_id=f.id
        WHERE f.project_id=p.id
    """.strip()

    project_fields = ', '.join(Project._meta.json_fields)

    projects_query = """
      SELECT
        guid AS project_guid,
        {project_fields},
        ({num_variant_tags_subquery}) AS num_variant_tags,
        ({num_families_subquery}) AS num_families,
        ({num_individuals_subquery}) AS num_individuals
      FROM seqr_project AS p
      {projects_WHERE_clause}
    """.strip().format(
        project_fields=project_fields, num_variant_tags_subquery=num_variant_tags_subquery,
        num_families_subquery=num_families_subquery, num_individuals_subquery=num_individuals_subquery,
        projects_WHERE_clause=projects_WHERE_clause
    )

    cursor.execute(projects_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]

    projects_by_guid = {
        r['projectGuid']: r for r in (dict(zip(columns, row)) for row in cursor.fetchall())
    }

    # mark all projects where this user has edit permissions
    for project in projects_user_can_edit:
        projects_by_guid[project.guid]['canEdit'] = True

    return projects_by_guid
示例#22
0
    def _parse_hit(self, raw_hit):
        hit = {k: raw_hit[k] for k in QUERY_FIELD_NAMES if k in raw_hit}
        index_name = raw_hit.meta.index
        index_family_samples = self.samples_by_family_index[index_name]

        if hasattr(raw_hit.meta, 'matched_queries'):
            family_guids = list(raw_hit.meta.matched_queries)
        else:
            # Searches for all inheritance and all families do not filter on inheritance so there are no matched_queries
            alt_allele_samples = set()
            for alt_samples_field in HAS_ALT_FIELD_KEYS:
                alt_allele_samples.update(hit[alt_samples_field])
            family_guids = [family_guid for family_guid, samples_by_id in index_family_samples.items()
                            if any(sample_id in alt_allele_samples for sample_id in samples_by_id.keys())]

        genotypes = {}
        for family_guid in family_guids:
            samples_by_id = index_family_samples[family_guid]
            genotypes.update({
                samples_by_id[genotype_hit['sample_id']].individual.guid: _get_field_values(genotype_hit, GENOTYPE_FIELDS_CONFIG)
                for genotype_hit in hit[GENOTYPES_FIELD_KEY] if genotype_hit['sample_id'] in samples_by_id
            })

        genome_version = self.index_metadata[index_name].get('genomeVersion')
        lifted_over_genome_version = None
        lifted_over_chrom = None
        lifted_over_pos = None
        liftover_grch38_to_grch37 = _liftover_grch38_to_grch37()
        if liftover_grch38_to_grch37 and genome_version == GENOME_VERSION_GRCh38:
            if liftover_grch38_to_grch37:
                grch37_coord = liftover_grch38_to_grch37.convert_coordinate(
                    'chr{}'.format(hit['contig'].lstrip('chr')), int(hit['start'])
                )
                if grch37_coord and grch37_coord[0]:
                    lifted_over_genome_version = GENOME_VERSION_GRCh37
                    lifted_over_chrom = grch37_coord[0][0].lstrip('chr')
                    lifted_over_pos = grch37_coord[0][1]

        populations = {
            population: _get_field_values(
                hit, POPULATION_RESPONSE_FIELD_CONFIGS, format_response_key=lambda key: key.lower(),
                lookup_field_prefix=population,
                existing_fields=self.index_metadata[index_name]['fields'],
                get_addl_fields=lambda field, field_config:
                [pop_config.get(field)] + ['{}_{}'.format(population, custom_field) for custom_field in
                                           field_config.get('fields', [])],
            )
            for population, pop_config in POPULATIONS.items()
        }

        sorted_transcripts = [
            {_to_camel_case(k): v for k, v in transcript.to_dict().items()}
            for transcript in hit[SORTED_TRANSCRIPTS_FIELD_KEY] or []
        ]
        transcripts = defaultdict(list)
        for transcript in sorted_transcripts:
            transcripts[transcript['geneId']].append(transcript)

        result = _get_field_values(hit, CORE_FIELDS_CONFIG, format_response_key=str)
        result.update({
            field_name: _get_field_values(hit, fields, lookup_field_prefix=field_name)
            for field_name, fields in NESTED_FIELDS.items()
        })
        if hasattr(raw_hit.meta, 'sort'):
            result['_sort'] = [_parse_es_sort(sort, self._sort[i]) for i, sort in enumerate(raw_hit.meta.sort)]

        result.update({
            'familyGuids': sorted(family_guids),
            'genotypes': genotypes,
            'genomeVersion': genome_version,
            'liftedOverGenomeVersion': lifted_over_genome_version,
            'liftedOverChrom': lifted_over_chrom,
            'liftedOverPos': lifted_over_pos,
            'mainTranscript': sorted_transcripts[0] if len(sorted_transcripts) else {},
            'populations': populations,
            'predictions': _get_field_values(
                hit, PREDICTION_FIELDS_CONFIG, format_response_key=lambda key: key.split('_')[1].lower()
            ),
            'transcripts': transcripts,
        })
        return result
示例#23
0
def _get_empty_json_for_model(model_class):
    return {_to_camel_case(field): None for field in model_class._meta.json_fields}
示例#24
0
def elasticsearch_status(request):
    client = get_es_client()

    disk_fields = ['node', 'disk.avail', 'disk.used', 'disk.percent']
    disk_status = [{
        _to_camel_case(field.replace('.', '_')): disk[field] for field in disk_fields
    } for disk in client.cat.allocation(format="json", h=','.join(disk_fields))]

    index_fields = ['index', 'docs.count', 'store.size', 'creation.date.string']
    indices = [{
        _to_camel_case(field.replace('.', '_')): index[field] for field in index_fields
    } for index in client.cat.indices(format="json", h=','.join(index_fields))
        if index['index'] not in ['.kibana', 'index_operations_log']]

    aliases = defaultdict(list)
    for alias in client.cat.aliases(format="json", h='alias,index'):
        aliases[alias['alias']].append(alias['index'])

    mappings = Index('_all', using=client).get_mapping(doc_type='variant')

    latest_loaded_samples = get_latest_loaded_samples()
    prefetch_related_objects(latest_loaded_samples, 'individual__family__project')
    seqr_index_projects = defaultdict(lambda: defaultdict(set))
    es_projects = set()
    for sample in latest_loaded_samples:
        for index_name in sample.elasticsearch_index.split(','):
            project = sample.individual.family.project
            es_projects.add(project)
            if index_name in aliases:
                for aliased_index_name in aliases[index_name]:
                    seqr_index_projects[aliased_index_name][project].add(sample.individual.guid)
            else:
                seqr_index_projects[index_name.rstrip('*')][project].add(sample.individual.guid)

    for index in indices:
        index_name = index['index']
        index_mapping = mappings[index_name]['mappings']['variant']
        index.update(index_mapping.get('_meta', {}))
        index['hasNestedGenotypes'] = 'samples_num_alt_1' in index_mapping['properties']

        projects_for_index = []
        for index_prefix in seqr_index_projects.keys():
            if index_name.startswith(index_prefix):
                projects_for_index += seqr_index_projects.pop(index_prefix).keys()
        index['projects'] = [{'projectGuid': project.guid, 'projectName': project.name} for project in projects_for_index]

    errors = ['{} does not exist and is used by project(s) {}'.format(
        index, ', '.join(['{} ({} samples)'.format(p.name, len(indivs)) for p, indivs in project_individuals.items()])
    ) for index, project_individuals in seqr_index_projects.items() if project_individuals]

    # TODO remove once all projects are switched off of mongo
    all_mongo_samples = Sample.objects.filter(
        dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS,
        sample_status=Sample.SAMPLE_STATUS_LOADED,
        elasticsearch_index__isnull=True,
    ).exclude(individual__family__project__in=es_projects).prefetch_related('individual', 'individual__family__project')
    mongo_sample_individual_max_loaded_date = {
        agg['individual__guid']: agg['max_loaded_date'] for agg in
        all_mongo_samples.values('individual__guid').annotate(max_loaded_date=Max('loaded_date'))
    }
    mongo_project_samples = defaultdict(set)
    for s in all_mongo_samples:
        if s.loaded_date == mongo_sample_individual_max_loaded_date[s.individual.guid]:
            mongo_project_samples[s.individual.family.project].add(s.dataset_file_path)
    mongo_projects = [{'projectGuid': project.guid, 'projectName': project.name, 'sourceFilePaths': sample_file_paths}
                      for project, sample_file_paths in mongo_project_samples.items()]

    return create_json_response({
        'indices': indices,
        'diskStats': disk_status,
        'elasticsearchHost': ELASTICSEARCH_SERVER,
        'mongoProjects': mongo_projects,
        'errors': errors,
    })
示例#25
0
def _retrieve_sample_batches_by_guid(cursor, projects_by_guid, user_is_staff=False):
    """Retrieves sample batches from the database, and returns a 'sample_batches_by_guid' dictionary,
    while also adding a 'sampleBatchGuids' attribute to each project dict in 'projects_by_guid'

    Args:
        cursor: connected database cursor that can be used to execute SQL queries.
        projects_by_guid: Dictionary that maps each project's GUID to a dictionary of key-value pairs
            representing attributes of that project.

    Returns:
        Dictionary that maps each sample batch's GUID to a dictionary of key-value pairs representing
        attributes of this sample batch.
    """
    if len(projects_by_guid) == 0:
        return {}

    projects_WHERE_clause = ""
    if not user_is_staff:
        projects_WHERE_clause = _to_WHERE_clause([guid for guid in projects_by_guid])

    num_samples_subquery = """
      SELECT COUNT(*) FROM seqr_sample AS subquery_s
        WHERE subquery_s.sample_batch_id=sb.id
    """
    sample_batch_query = """
        SELECT
          p.guid AS project_guid,
          sb.guid AS sample_batch_guid,
          sb.id AS sample_batch_id,
          sb.sample_type AS sample_type,
          (%(num_samples_subquery)s) AS num_samples
        FROM seqr_samplebatch AS sb
          JOIN seqr_sample AS s ON sb.id=s.sample_batch_id
          JOIN seqr_individual_samples AS iss ON iss.sample_id=s.id
          JOIN seqr_individual AS i ON iss.individual_id=i.id
          JOIN seqr_family AS f ON i.family_id=f.id
          JOIN seqr_project AS p ON f.project_id=p.id
        %(projects_WHERE_clause)s
        GROUP BY p.guid, sb.guid, sb.id, sb.sample_type
    """.strip() % locals()

    # TODO retrieve sample batches based on sample batch permissions instead of going by project permissions

    cursor.execute(sample_batch_query)

    columns = [_to_camel_case(col[0]) for col in cursor.description]
    sample_batches_by_guid = {}
    for row in cursor.fetchall():
        sample_batch_project_record = dict(zip(columns, row))
        sample_batch_guid = sample_batch_project_record['sampleBatchGuid']

        project_guid = sample_batch_project_record['projectGuid']

        del sample_batch_project_record['projectGuid']
        #del sample_batch_project_record['sampleBatchGuid']

        sample_batches_by_guid[sample_batch_guid] = sample_batch_project_record

        project_record = projects_by_guid[project_guid]
        if 'sampleBatchGuids' not in project_record:
            project_record['sampleBatchGuids'] = []
        project_record['sampleBatchGuids'].append(sample_batch_guid)

    return sample_batches_by_guid
示例#26
0
def _get_json_for_record(record, fields):
    return {_to_camel_case(field[0]): record.get(field[1]) for field in fields}
示例#27
0
def _parse_es_hit(raw_hit, family_samples_by_id):
    hit = {k: raw_hit[k] for k in QUERY_FIELD_NAMES if k in raw_hit}

    genotypes = {}
    family_guids = list(raw_hit.meta.matched_queries)
    for family_guid in family_guids:
        samples_by_id = family_samples_by_id[family_guid]
        genotypes.update({
            samples_by_id[genotype_hit['sample_id']].individual.guid:
            _get_field_values(genotype_hit, GENOTYPE_FIELDS_CONFIG)
            for genotype_hit in hit[GENOTYPES_FIELD_KEY]
            if genotype_hit['sample_id'] in samples_by_id
        })

    # TODO better handling for multi-project searches
    project = family_samples_by_id[
        family_guids[0]].values()[0].individual.family.project

    genome_version = project.genome_version
    lifted_over_genome_version = None
    lifted_over_chrom = None
    lifted_over_pos = None
    liftover_grch38_to_grch37 = _liftover_grch38_to_grch37()
    if liftover_grch38_to_grch37 and genome_version == GENOME_VERSION_GRCh38:
        if liftover_grch38_to_grch37:
            grch37_coord = liftover_grch38_to_grch37.convert_coordinate(
                'chr{}'.format(hit['contig'].lstrip('chr')), int(hit['start']))
            if grch37_coord and grch37_coord[0]:
                lifted_over_chrom = grch37_coord[0][0].lstrip('chr')
                lifted_over_pos = grch37_coord[0][1]

    populations = {
        population: _get_field_values(
            hit,
            POPULATION_RESPONSE_FIELD_CONFIGS,
            format_response_key=lambda key: key.lower(),
            lookup_field_prefix=population,
            get_addl_fields=lambda field, field_config:
            [pop_config.get(field)] + [
                '{}_{}'.format(population, custom_field)
                for custom_field in field_config.get('fields', [])
            ],
        )
        for population, pop_config in POPULATIONS.items()
    }

    sorted_transcripts = [{
        _to_camel_case(k): v
        for k, v in transcript.to_dict().items()
    } for transcript in hit[SORTED_TRANSCRIPTS_FIELD_KEY] or []]
    transcripts = defaultdict(list)
    for transcript in sorted_transcripts:
        transcripts[transcript['geneId']].append(transcript)

    result = _get_field_values(hit,
                               CORE_FIELDS_CONFIG,
                               format_response_key=str)
    result.update({
        field_name: _get_field_values(hit,
                                      fields,
                                      lookup_field_prefix=field_name)
        for field_name, fields in NESTED_FIELDS.items()
    })
    if hasattr(raw_hit.meta, 'sort'):
        result['_sort'] = [_parse_es_sort(sort) for sort in raw_hit.meta.sort]
    result.update({
        'projectGuid':
        project.guid,
        'familyGuids':
        family_guids,
        'genotypes':
        genotypes,
        'genomeVersion':
        genome_version,
        'liftedOverGenomeVersion':
        lifted_over_genome_version,
        'liftedOverChrom':
        lifted_over_chrom,
        'liftedOverPos':
        lifted_over_pos,
        'mainTranscript':
        sorted_transcripts[0] if len(sorted_transcripts) else {},
        'populations':
        populations,
        'predictions':
        _get_field_values(
            hit,
            PREDICTION_FIELDS_CONFIG,
            format_response_key=lambda key: key.split('_')[1].lower()),
        'transcripts':
        transcripts,
    })
    return result