Exemplo n.º 1
0
                    'submitter_id': case_submitter_id
                },
                'submitter_id': submitter_id,
                'project_id': DEFAULT_PROJECT_ID,
                'data_type': row['c_file.mime'],
                'md5sum': row['c_file.ETag'],
                'file_size': row['c_file.size'],
                'file_name': row['c_file.path'],
            }
            file_emitter.write(file)
    file_emitter.close()


if __name__ == "__main__":
    item_paths = [DEFAULT_INPUT_FILE]
    args = default_parser(DEFAULT_OUTPUT_DIR, DEFAULT_EXPERIMENT_CODE,
                          DEFAULT_PROJECT_ID).parse_args()

    transform(item_paths,
              output_dir=args.output_dir,
              experiment_code=args.experiment_code)

    p = os.path.join(args.output_dir, 'submitted_file.json')
    assert os.path.isfile(p), 'should have an output file {}'.format(p)
    print(p)

    if args.schema:

        def my_callback(schema):
            # adds the source property
            schema['category'] = 'hop extention'
            schema['properties']['cases'] = {
Exemplo n.º 2
0
            # diagnosis['*last_known_disease_status'] = disease_status.get(line['Cancer Status'], 'Unknown tumor status')
            # diagnosis['*morphology'] = 'tumor_size={}'.format(line['Tumor Size']) # "None is not of type 'string'")
            # diagnosis['*primary_diagnosis'] = line['Case_ICD::Transformation']
            # diagnosis['*progression_or_recurrence'] = 'unknown' # ['yes', 'no', 'unknown', 'not reported', 'Not Allowed To Collect']
            # diagnosis['*site_of_resection_or_biopsy'] = 'unknown'
            # diagnosis['*tissue_or_organ_of_origin'] = 'pancrease'
            # diagnosis['*tumor_grade'] = 'unknown' #  "None is not of type 'string'")
            # diagnosis['*tumor_stage'] = 'unknown' #  "None is not of type 'string'")
            # diagnosis['*vital_status'] = 'unknown'
            #
            # diagnosis_emitter.write(diagnosis)

    cases_emitter.close()
    demographics_emitter.close()


if __name__ == "__main__":
    item_paths = [
        'source/ccle/Individual.Vertex.json.gz',
        'source/ccle/maf.Individual.Vertex.json.gz'
    ]
    args = default_parser().parse_args()
    transform(item_paths=item_paths, output_dir=args.output_dir)

    # glob.glob("output/bcc/*.json")
    if args.schema:
        schema_path = generate(item_paths,
                               'case',
                               output_dir=DEFAULT_OUTPUT_DIR)
        print(schema_path)