示例#1
0
subjects = rd3.get(entity='rd3_freeze1_subject',
                   attributes='id,subjectID,patch',
                   batch_size=10000)

statusMsg('File metadata entries pulled: {}'.format(len(files)))
statusMsg('Subject metadata entries pulled: {}'.format(len(subjects)))

# extract subject ID
for file in files:
    file['subject'] = re.sub(
        pattern=r'((.[0-9]{4}-[0-9]{2}-[0-9]{2})?(.json))$',
        repl='',
        string=file['name'])
ids = [file['subject'] for file in files]

# update ptch
for s in subjects:
    if s['subjectID'] in ids:
        patches = ['freeze1_patch1']
        for patch in s.get('patch'):
            patches.append(patch.get('id', None))
        s['patch'] = ','.join(list(set(patches)))

data = list(
    map(lambda x: {k: v
                   for k, v in x.items() if k in ['id', 'patch']}, subjects))

# import into RD3
rd3.updateColumn(entity='rd3_freeze1_subject', attr='patch', data=data)
rd3.updateColumn(entity='rd3_freeze1_subjectinfo', attr='patch', data=data)
#     statusMsg(
#         'Not all records were processed. There are',
#         shipment.nrows - shipmentUpdates.nrows,
#         'records remaining.'
#     )
# else:
#     statusMsg('All records were processed! :-)')

# import
# rd3_shipment_updates = dtFrameToRecords(shipmentUpdates)
rd3_shipment_updates = dtFrameToRecords(shipment[:, {
    'molgenis_id': f.molgenis_id,
    'processed': True
}])
rd3.updateColumn(entity='rd3_portal_novelomics_shipment',
                 attr='processed',
                 data=rd3_shipment_updates)

# ~ 6e.ii ~
# update experiment table

experimentUpdates = experiment[functools.reduce(operator.or_, (
    f.project_experiment_dataset_id == id
    for id in labinfo[:, f.experimentID].to_list()[0])), {
        'molgenis_id': f.molgenis_id,
        'processed': True
    }]

# check processed rows
if experimentUpdates.nrows != experiment.nrows:
    statusMsg('Not all records were processed. There are still',
# Update Patch table with new release info (DO THIS FIRST!)
rd3.add(entity='rd3_patch',
        data={
            'id': patchinfo.get('id'),
            'type': patchinfo.get('type'),
            'date': patchinfo.get('date'),
            'description': patchinfo.get('description')
        })

# import new orgs; import ERNs if needed, but highly unlikely
rd3.importData(entity='rd3_organisation', data=dtFrameToRecords(newOrgs))

# prep data for import into RD3
rd3_subjects = dtFrameToRecords(data=subjects)
rd3_subjectInfo = dtFrameToRecords(data=subjectInfo)
rd3_samples = dtFrameToRecords(data=samples)
rd3_labinfo = dtFrameToRecords(data=labinfo)

# import data
rd3.importData(entity=f'rd3_{patchinfo["name"]}_subject', data=rd3_subjects)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_subjectinfo',
               data=rd3_subjectInfo)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_sample', data=rd3_samples)
rd3.importData(entity=f'rd3_{patchinfo["name"]}_labinfo', data=rd3_labinfo)

# upodate portal
rd3.updateColumn(entity=releaseName,
                 attr='processed',
                 data=dtFrameToRecords(portalUpdates))
# recode attribute
newSamplesData['percentageTumorCells'] = dt.Frame([
    None if d == 'UK' else d
    for d in newSamplesData['percentageTumorCells'].to_list()[0]
])

# newSamplesData[:, dt.update(
#     percentageTumorCells = as_type(f.percentageTumorCells, dt.Type.int8)
# )]

#///////////////////////////////////////

# ~ 2 ~
# Import data

# prep data for import
pathologicalState = dtFrameToRecords(
    newSamplesData[:, ['id', 'pathologicalState']])
percentageTumorCells = dtFrameToRecords(
    newSamplesData[:, ['id', 'percentageTumorCells']])

# import data
rd3.updateColumn(entity='rd3_noveldeepwes_sample',
                 attr='pathologicalState',
                 data=pathologicalState)

rd3.updateColumn(entity='rd3_noveldeepwes_sample',
                 attr='percentageTumorCells',
                 data=percentageTumorCells)