Пример #1
0
def process_clinical_data(input_dir: str, output_dir: str) -> List[str]:
    """
    Reads subject registry data from input_dir and transforms the data
    to clinical data files for cBioPortal.
    Writes the generated data files to output_dir.
    Returns the list of sample identifiers in the clinical data.
    """
    # Clinical data
    subject_registry_reader = SubjectRegistryReader(input_dir)
    subject_registry: CentralSubjectRegistry = subject_registry_reader.read_subject_registry(
    )

    # Transform patient file
    patient_clinical_data, patient_clinical_header = transform_patient_clinical_data(
        subject_registry)
    write_clinical(patient_clinical_data, patient_clinical_header, 'patient',
                   output_dir, STUDY_ID)

    # Transform sample file
    sample_clinical_data, sample_clinical_header = transform_sample_clinical_data(
        subject_registry)
    write_clinical(sample_clinical_data, sample_clinical_header, 'sample',
                   output_dir, STUDY_ID)

    return sample_clinical_data['SAMPLE_ID'].unique().tolist()
Пример #2
0
def csr2transmart(input_dir: str, output_dir: str, config_dir: str,
                  study_id: str, top_tree_node: str):
    logger.info('csr2transmart')
    try:
        logger.info('Reading configuration data...')
        ontology_config = read_configuration(config_dir)

        logger.info('Reading CSR data...')
        subject_registry_reader = SubjectRegistryReader(input_dir)
        subject_registry: CentralSubjectRegistry = subject_registry_reader.read_subject_registry(
        )
        study_registry_reader = StudyRegistryReader(input_dir)
        study_registry: StudyRegistry = study_registry_reader.read_study_registry(
        )

        logger.info('Mapping CSR to Data Collection...')
        mapper = CsrMapper(study_id, top_tree_node)
        collection: DataCollection = mapper.map(subject_registry,
                                                study_registry,
                                                ontology_config.nodes)

        logger.info('Writing files to {}'.format(output_dir))
        copy_writer = TransmartCopyWriter(str(output_dir))
        copy_writer.write_collection(collection)

        logger.info('Done.')

    except Exception as e:
        logger.error(e)
        sys.exit(1)
Пример #3
0
def mapped_data_collection() -> DataCollection:
    input_dir = './test_data/input_data/CSR2TRANSMART_TEST_DATA'
    config_dir = './test_data/input_data/config'
    study_id = 'CSR'
    top_tree_node = '\\Central Subject Registry\\'
    ontology_config: OntologyConfig = read_configuration(config_dir)
    subject_registry_reader = SubjectRegistryReader(input_dir)
    subject_registry: CentralSubjectRegistry = subject_registry_reader.read_subject_registry(
    )
    study_registry_reader = StudyRegistryReader(input_dir)
    study_registry: StudyRegistry = study_registry_reader.read_study_registry()

    mapper = CsrMapper(study_id, top_tree_node)
    return mapper.map(subject_registry, study_registry, ontology_config.nodes)
Пример #4
0
def test_serialising_and_deserialising_biomaterials(
        registry_with_biosources_and_biomaterials, tmp_path):
    subject_registry = add_ngs_data(registry_with_biosources_and_biomaterials,
                                    './test_data/input_data/CLINICAL')

    writer = SubjectRegistryWriter(tmp_path.as_posix())
    writer.write(subject_registry)
    reader = SubjectRegistryReader(tmp_path.as_posix())
    subject_registry = reader.read_subject_registry()

    assert len(subject_registry.entity_data['Biomaterial']) == 2
    assert Counter(subject_registry.entity_data['Biomaterial']
                   [0].library_strategy) == Counter({
                       'CNV': 2,
                       'SNV': 1
                   })
def sample_clinical_data() -> pd.DataFrame:
    input_dir = './test_data/input_data/CSR2CBIOPORTAL_TEST_DATA'
    subject_registry_reader = SubjectRegistryReader(input_dir)
    subject_registry: CentralSubjectRegistry = subject_registry_reader.read_subject_registry()
    return transform_sample_clinical_data(subject_registry)[0]