Exemplo n.º 1
0
    def test_flatten__raises_error__given_multiple_schema_versions_of_same_concrete_entity(
            self):
        # given
        entity_json_list = [
            {
                'content': {
                    "describedBy":
                    "https://schema.humancellatlas.org/type/project/14.2.0/donor_organism",
                    "schema_type": "biomaterial",
                    "field": "value"
                },
                'uuid': {
                    'uuid': 'uuid1'
                }
            },
            {
                'content': {
                    "describedBy":
                    "https://schema.humancellatlas.org/type/project/14.3.0/donor_organism",
                    "schema_type": "biomaterial",
                    "field": "value"
                },
                'uuid': {
                    'uuid': 'uuid2'
                }
            },
        ]
        entity_list = Entity.from_json_list(entity_json_list)

        # when/then
        flattener = Flattener()
        with self.assertRaisesRegex(
                ValueError,
                "Multiple versions of same concrete entity schema"):
            flattener.flatten(entity_list)
    def test_flatten__has_ontology_property_with_single_element(self):
        # given
        self.content.update({
            "organ_parts": [{
                "ontology": "UBERON:0000376",
                "ontology_label": "hindlimb stylopod",
                "text": "hindlimb stylopod"
            }]
        })

        metadata_entity = {'content': self.content, 'uuid': self.uuid}

        entity_list = [metadata_entity]

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        self.flattened_metadata_entity['Project']['values'][0].update({
            'project.organ_parts.ontology':
            'UBERON:0000376',
            'project.organ_parts.ontology_label':
            'hindlimb stylopod',
            'project.organ_parts.text':
            'hindlimb stylopod',
        })
        self.flattened_metadata_entity['Project']['headers'].extend([
            'project.organ_parts.ontology',
            'project.organ_parts.ontology_label', 'project.organ_parts.text'
        ])

        # then
        self.assertEqual(actual, self.flattened_metadata_entity)
Exemplo n.º 3
0
    def test_flatten__has_string_arrays(self):
        # given
        self.content.update({
            "insdc_project_accessions": ["SRP180337"],
            "geo_series_accessions": ["GSE124298", "GSE124299"]
        })
        self.metadata_entity = {'content': self.content, 'uuid': self.uuid}
        entity_list = [Entity.from_json(self.metadata_entity)]

        # when
        flattener = Flattener()
        actual = flattener.flatten(entity_list)

        self.flattened_metadata_entity['Project']['values'][0].update({
            'project.insdc_project_accessions':
            'SRP180337',
            'project.geo_series_accessions':
            "GSE124298||GSE124299"
        })

        self.flattened_metadata_entity['Project']['headers'].extend([
            'project.insdc_project_accessions', 'project.geo_series_accessions'
        ])

        self.assertEqual(actual, self.flattened_metadata_entity)
Exemplo n.º 4
0
    def test_flatten__entities_has_input(self):
        # given
        with open(self.script_dir +
                  '/entities-with-inputs-specimen.json') as file:
            metadata_entity = Entity.from_json(json.load(file))

        with open(self.script_dir +
                  '/entities-with-inputs-donor.json') as file:
            input_entity = Entity.from_json(json.load(file))

        with open(self.script_dir +
                  '/entities-with-inputs-process.json') as file:
            process = Entity.from_json(json.load(file))

        with open(self.script_dir +
                  '/entities-with-inputs-protocols.json') as file:
            protocols = Entity.from_json_list(json.load(file))

        metadata_entity.set_input([input_entity], [], process, protocols)

        # when
        flattener = Flattener()
        actual = flattener.flatten([metadata_entity, process] + protocols)

        with open(self.script_dir +
                  '/entities-with-inputs-flattened.json') as file:
            expected = json.load(file)

        self.assertEqual(actual, expected)
    def test_flatten__has_project_modules(self):
        # given
        self.content.update({
            "contributors": [{
                "name": "Alex A,,Pollen",
                "email": "*****@*****.**",
                "institution": "University of California, San Francisco (UCSF)",
                "laboratory": "Department of Neurology",
                "country": "USA",
                "corresponding_contributor": True,
                "project_role": {
                    "text": "experimental scientist",
                    "ontology": "EFO:0009741",
                    "ontology_label": "experimental scientist"
                }
            }]
        })

        metadata_entity = {
            'content': self.content,
            'uuid': self.uuid
        }

        entity_list = [metadata_entity]

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        self.flattened_metadata_entity.update({
            'Project - Contributors': {
                'headers': [
                    'project.contributors.name',
                    'project.contributors.email',
                    'project.contributors.institution',
                    'project.contributors.laboratory',
                    'project.contributors.country',
                    'project.contributors.corresponding_contributor',
                    'project.contributors.project_role.text',
                    'project.contributors.project_role.ontology',
                    'project.contributors.project_role.ontology_label'
                ],
                'values': [{
                    'project.contributors.corresponding_contributor': 'True',
                    'project.contributors.country': 'USA',
                    'project.contributors.email': '*****@*****.**',
                    'project.contributors.institution': 'University of California, San Francisco (UCSF)',
                    'project.contributors.laboratory': 'Department of Neurology',
                    'project.contributors.name': 'Alex A,,Pollen',
                    'project.contributors.project_role.ontology': 'EFO:0009741',
                    'project.contributors.project_role.ontology_label': 'experimental scientist',
                    'project.contributors.project_role.text': 'experimental scientist'}
                ]}
        })

        # then
        self.assertEqual(actual, self.flattened_metadata_entity)
Exemplo n.º 6
0
    def test_flatten__has_no_modules(self):
        # given
        metadata_entity = {'content': self.content, 'uuid': self.uuid}
        entity_list = [Entity.from_json(metadata_entity)]

        # when
        flattener = Flattener()
        actual = flattener.flatten(entity_list)

        self.assertEqual(actual, self.flattened_metadata_entity)
Exemplo n.º 7
0
class XlsDownloader:
    def __init__(self):
        self.flattener = Flattener()

    def convert_json(self, metadata_list: List[dict]):
        return self.flattener.flatten(metadata_list)

    def create_workbook(self, input_json: dict) -> Workbook:
        workbook = Workbook()
        workbook.remove(workbook.active)

        for ws_title, ws_elements in input_json.items():
            if ws_title == 'Project':
                worksheet: Worksheet = workbook.create_sheet(title=ws_title,
                                                             index=0)
            elif ws_title == SCHEMAS_WORKSHEET:
                continue
            else:
                worksheet: Worksheet = workbook.create_sheet(title=ws_title)

            self.add_worksheet_content(worksheet, ws_elements)

        self.generate_schemas_worksheet(input_json, workbook)

        return workbook

    def generate_schemas_worksheet(self, input_json, workbook):
        schemas = input_json.get(SCHEMAS_WORKSHEET)
        if not schemas:
            raise ValueError('The schema urls are missing')
        schemas_worksheet = workbook.create_sheet(SCHEMAS_WORKSHEET)
        schemas_worksheet.cell(row=1, column=1, value=SCHEMAS_WORKSHEET)
        for row_num, schema in enumerate(schemas, start=2):
            schemas_worksheet.cell(row=row_num, column=1, value=schema)

    def add_worksheet_content(self, worksheet, ws_elements: dict):
        headers = ws_elements.get('headers')
        self.__add_header_row(worksheet, headers)
        all_values = ws_elements.get('values')

        for row_number, row_values in enumerate(all_values,
                                                start=START_DATA_ROW):
            self.__add_row_content(worksheet, headers, row_number, row_values)

    @staticmethod
    def __add_header_row(worksheet, headers: list):
        for col, header in enumerate(headers, start=1):
            worksheet.cell(row=HEADER_ROW_NO, column=col, value=header)

    @staticmethod
    def __add_row_content(worksheet, headers: list, row_number: int,
                          values: dict):
        for header, value in values.items():
            index = headers.index(header)
            worksheet.cell(row=row_number, column=index + 1, value=value)
    def test_flatten__project_metadata(self):
        # given
        with open(self.script_dir + '/project-list.json') as file:
            entity_list = json.load(file)

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        with open(self.script_dir + '/project-list-flattened.json') as file:
            expected = json.load(file)

        # then
        self.assertEqual(actual, expected)
    def test_flatten__has_different_entities(self):
        # given
        with open(self.script_dir + '/entities.json') as file:
            entity_list = json.load(file)

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        with open(self.script_dir + '/entities-flattened.json') as file:
            expected = json.load(file)

        # then
        self.assertEqual(actual, expected)
Exemplo n.º 10
0
    def test_flatten__has_list_property_with_elements(self):
        # given
        content = {
            "describedBy":
            "https://schema.humancellatlas.org/type/project/14.2.0/collection_protocol",
            "schema_type":
            "protocol",
            "organ_parts": [{
                "field_1": "UBERON:0000376",
                "field_2": "hindlimb stylopod",
                "field_3": "hindlimb stylopod"
            }]
        }

        metadata_entity = {'content': content, 'uuid': self.uuid}

        entity_list = [Entity.from_json(metadata_entity)]

        # when
        flattener = Flattener()
        actual = flattener.flatten(entity_list)

        flattened_metadata_entity = {
            'Collection protocol': {
                'values': [{
                    'collection_protocol.organ_parts.field_1':
                    'UBERON:0000376',
                    'collection_protocol.organ_parts.field_2':
                    'hindlimb stylopod',
                    'collection_protocol.organ_parts.field_3':
                    'hindlimb stylopod',
                    'collection_protocol.uuid': 'uuid1'
                }],
                'headers': [
                    'collection_protocol.uuid',
                    'collection_protocol.organ_parts.field_1',
                    'collection_protocol.organ_parts.field_2',
                    'collection_protocol.organ_parts.field_3'
                ]
            },
            'Schemas': [
                'https://schema.humancellatlas.org/type/project/14.2.0/collection_protocol'
            ]
        }

        # then
        self.assertEqual(actual, flattened_metadata_entity)
Exemplo n.º 11
0
    def test_flatten__has_boolean(self):
        # given
        self.content.update({'boolean_field': True})
        self.metadata_entity = {'content': self.content, 'uuid': self.uuid}
        entity_list = [Entity.from_json(self.metadata_entity)]

        # when
        flattener = Flattener()
        actual = flattener.flatten(entity_list)

        self.flattened_metadata_entity['Project']['values'][0].update(
            {'project.boolean_field': 'True'})
        self.flattened_metadata_entity['Project']['headers'].append(
            'project.boolean_field')

        # then
        self.assertEqual(actual, self.flattened_metadata_entity)
    def test_flatten__has_ontology_property_with_single_element_but_only_with_text_attr(
            self):
        # given
        self.content.update({'diseases': [{'text': 'dummytext2'}]})

        metadata_entity = {'content': self.content, 'uuid': self.uuid}

        entity_list = [metadata_entity]

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        self.flattened_metadata_entity['Project']['values'][0].update({
            'project.diseases.text':
            'dummytext2',
        })
        self.flattened_metadata_entity['Project']['headers'].extend(
            ['project.diseases.text'])

        # then
        self.assertEqual(actual, self.flattened_metadata_entity)
    def test_flatten__has_ontology_property_with_multiple_elements_but_with_empty_ontology_values(
            self):
        # given
        self.content.update({
            'diseases': [{
                'ontology': 'UBERON:0000376',
                'ontology_label': 'dummylabel1',
                'text': 'dummytext1'
            }, {
                'ontology': '',
                'ontology_label': '',
                'text': 'dummytext2'
            }]
        })

        metadata_entity = {'content': self.content, 'uuid': self.uuid}

        entity_list = [metadata_entity]

        # when
        flattener = Flattener()
        actual = flattener.flatten(Entity.from_json_list(entity_list))

        self.flattened_metadata_entity['Project']['values'][0].update({
            'project.diseases.ontology':
            'UBERON:0000376',
            'project.diseases.ontology_label':
            'dummylabel1',
            'project.diseases.text':
            'dummytext1||dummytext2',
        })
        self.flattened_metadata_entity['Project']['headers'].extend([
            'project.diseases.ontology', 'project.diseases.ontology_label',
            'project.diseases.text'
        ])

        # then
        self.assertEqual(self.flattened_metadata_entity, actual)
Exemplo n.º 14
0
    def test_flatten__rows_have_different_columns(self):
        # given
        entity_json_list = [
            {
                'content': {
                    "describedBy":
                    "https://schema.humancellatlas.org/type/project/14.2.0/project",
                    "schema_type": "project",
                    "project_core": {
                        "project_short_name": "label1",
                    }
                },
                'uuid': {
                    'uuid': 'uuid1'
                }
            },
            {
                'content': {
                    "describedBy":
                    "https://schema.humancellatlas.org/type/project/14.2.0/project",
                    "schema_type": "project",
                    "project_core": {
                        "project_short_name": "label2",
                        "project_title": "title",
                        "project_description": "desc"
                    }
                },
                'uuid': {
                    'uuid': 'uuid2'
                }
            },
        ]

        # when
        flattener = Flattener()
        entity_list = [
            Entity.from_json(entity_json) for entity_json in entity_json_list
        ]
        actual = flattener.flatten(entity_list)

        expected = {
            'Project': {
                'headers': [
                    'project.uuid', 'project.project_core.project_short_name',
                    'project.project_core.project_title',
                    'project.project_core.project_description'
                ],
                'values': [{
                    'project.uuid': 'uuid1',
                    'project.project_core.project_short_name': 'label1'
                }, {
                    'project.uuid': 'uuid2',
                    'project.project_core.project_short_name': 'label2',
                    'project.project_core.project_title': 'title',
                    'project.project_core.project_description': 'desc'
                }]
            },
            'Schemas':
            ['https://schema.humancellatlas.org/type/project/14.2.0/project']
        }

        # then
        self.assertEqual(actual, expected)