Python Molgenis.get примеры использования

Язык программирования: Python

Пространство имен/Пакет: rd3.api.molgenis

Класс/Тип: Molgenis

Метод/Функция: get

Примеров на hotexamples.com: 7

Python Molgenis.get - 7 примеров найдено. Это лучшие примеры Python кода для rd3.api.molgenis.Molgenis.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get(7)

importData(4)

updateColumn(4)

Molgenis(3)

add(2)

update_one(2)

batch_update_one_attr(1)

delete(1)

updateRows(1)

Пример #1

Показать файл

# for local dev, set credentials
from dotenv import load_dotenv
from os import environ

load_dotenv()

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

# pull RD3 data
files = rd3.get(entity='rd3_portal_cluster',
                q='type=="phenopacket"',
                attributes='release,name,type',
                batch_size=10000)

subjects = rd3.get(entity='rd3_freeze1_subject',
                   attributes='id,subjectID,patch',
                   batch_size=10000)

statusMsg('File metadata entries pulled: {}'.format(len(files)))
statusMsg('Subject metadata entries pulled: {}'.format(len(subjects)))

# extract subject ID
for file in files:
    file['subject'] = re.sub(
        pattern=r'((.[0-9]{4}-[0-9]{2}-[0-9]{2})?(.json))$',
        repl='',
        string=file['name'])

Пример #2

Показать файл

# /////////////////////////////////////////////////////////////////////////////

# ~ 1 ~
# Start Molgenis Session and Pull Required Data
#
# In order to process new phenopacket files, it is important to compare values
# with new exiting RD3 metadata. This allows us to import the values that have
# changed rather than everything. Once the contents of the files have been
# processed and evaluated, we can import them into RD3. These values will be
# imported into the `subject` and `subjectinfo` tables. The attributes that
# are managed by this script are listed in the GET requests below.

# pull subject metadata for the current freeze
freeze = rd3.get(
    entity=paths['rd3_subjects'],
    attributes=
    'id,subjectID,clinical_status,disease,phenotype,hasNotPhenotype,phenopacketsID,patch',
    batch_size=10000)

# pull subjectinfo data
freeze_info = rd3.get(entity=paths['rd3_subjectinfo'],
                      attributes='id,dateofBirth,ageOfOnset,patch',
                      batch_size=10000)

# extract subject IDs for later
# freeze_ids = rd3tools.flatten_attr(freeze, 'id')
freeze_ids = [row['id'] for row in freeze]

# pull HPO and disease codes, and then flatten
hpo_codes_raw = rd3.get(entity='rd3_phenotype', batch_size=10000)
disease_codes_raw = rd3.get(entity='rd3_disease', batch_size=10000)

Пример #3

Показать файл

Файл: rd3_new_release_processing.py Проект: molgenis/molgenis-solve-rd

# migrate data from one server to the other:
# pull data then switch tokens and restart connection
# portalData = rd3.get(releaseName,batch_size=10000)
# rd3.importData(entity='rd3_portal_release_freeze3', data=portalData)

#//////////////////////////////////////////////////////////////////////////////

# ~ 0 ~
# Create Reference Datasets
# Pull reference tables to create mapping tables for recoding raw values into
# RD3 terminology. Add additional mappings as needed.

# ~ 0a ~
# Create ERN Mapping
erns = dt.Frame(rd3.get('rd3_ERN'))
del erns['_href']

# as key pair dictionary
ernMappings = toKeyPairs(data=erns[:, {
    'from': f.identifier,
    'to': f.identifier
}].to_pandas().to_dict('records'),
                         keyAttr='from',
                         valueAttr='to')

# define additional ERN mappings based on past/present values the variation
# must be mapped to an existing ERN identifier. The format you should use is:
# `'variation' : 'RD3 ERN identifier'`
ernMappings.update({
    'ERN-CRANIO': 'ERNCRANIO',

Пример #4

Показать файл

Файл: rd3_new_novelomics_processing.py Проект: molgenis/molgenis-solve-rd

# Pull Data
# The source of the novelomics releases come from rd3_portal_novelomics. Data
# is sent from EGA and Tubingen, and sometimes supplied by CNAG. To run this
# script, pull both novelomics portal tables, reference entities, and create
# a list of existing subject and sample IDs.
#
# Pull mapping tables or define them below.

# ~ 0a ~
# Pull portal tables
# After the initial run, make sure the query param is uncommented.
statusMsg('Pulling data from the portal....')

shipment = dt.Frame(
    rd3.get(entity='rd3_portal_novelomics_shipment',
            q='processed==False',
            batch_size=10000))

experiment = dt.Frame(
    rd3.get(entity='rd3_portal_novelomics_experiment',
            q='processed==False',
            batch_size=10000))

del shipment['_href']
del experiment['_href']

# ~ 0b ~
# Build Patch Information
# Determine if there are any new releases based on type of analysis. If there
# are, stop this script and complete the following
#   1. Determine if this is an actual new study or if this data should be

Пример #5

Показать файл

Файл: data_update_percentage.py Проект: molgenis/molgenis-solve-rd

    'sample_id': 'sampleID',
    'participant_subject': 'subjectID',
    'pathological state': 'pathologicalState',
    'tumor cell fraction': 'percentageTumorCells'
}

newData[:, dt.update(sampleID=as_type(f.sampleID, str))]

newData.key = 'sampleID'

# ~ 1b ~
# Pull the deepwes data from RD3
# Unnest reference attributes and set key

samples = rd3.get(entity='rd3_noveldeepwes_sample',
                  attributes='id,sampleID,subject',
                  batch_size=10000)

for row in samples:
    row['subject'] = row['subject']['subjectID']

samples = dt.Frame(samples)
del samples['_href']

samples.key = 'sampleID'

# ~ 1c ~
# Join datasets
newSamplesData = samples[:, :, dt.join(newData)]

# recode attribute

Пример #6

Показать файл

Файл: freeze_ped_import.py Проект: molgenis/molgenis-solve-rd

# RD3 `rd3_freeze[x]_subject` where `[x]` is the freeze that the new PED files
# are tied to (e.g., `rd3_freeze2_subject`).
#
#   - `id`: the molgenis row ID; a concatenation of subject ID and release
#   - `subjectID`: RD3 P number
#   - `sex`: patient's sex
#   - `fid`: family ID
#
# It isn't necessary to run extensive checks that compare PED file data with
# the values that are in RD3 as PED files should be considered the most
# up to date.

# pull subject metadata for the current freeze
freeze_subject_metadata = rd3.get(
    entity=paths['rd3_subjects'],
    # q = 'patch=freeze1_patch1',
    attributes='id,subjectID,sex1,fid',
    batch_size=10000)

# flatten subjectIDs for faster comparison later on
subject_ids = [row['subjectID'] for row in freeze_subject_metadata]

# In addition to subject metadata, it is import to pull file metadata to identify
# which have changed and should be processed. We will pull the following
# attributes:
#
#   - `EGA`: the EGA file ID
#   - `name`: the full name of the file
#   - `md5`: checksum
#

Пример #7

Показать файл

Файл: rd3_data_overview_mapping.py Проект: molgenis/molgenis-solve-rd

availableReleases = regularReleases + novelomicsReleases

statusMsg('Pulling metadata....')

# fetch subject metadata
subjects=[]
for release in availableReleases:
    statusMsg('Fetching subject metadata for',release)
    data=rd3.get(
        entity=f"rd3_{release}_subject",
        batch_size=10000,
        attributes=','.join([
            'id', 'subjectID',
            'sex1',
            'fid', 'mid', 'pid',
            'clinical_status', 'disease', 'phenotype', 'hasNotPhenotype',
            'organisation', 'ERN',
            'solved',
            'patch'
        ])
    )

    # clean data
    for row in data:
        row['sex1']=row.get('sex1',{}).get('identifier')
        row['mid']=row.get('mid',{}).get('id')
        row['pid']=row.get('pid',{}).get('id')
        if row.get('disease'):
            row['disease']=','.join([record['id'] for record in row['disease']])
        else: