Exemplo n.º 1
0
 def __init__(self, metadata_file):
     conf = os.path.join(ROOT_DIR, 'etc', 'eva_project_conf.yaml')
     self.reader = XlsxReader(metadata_file, conf)
     self.metadata_file=metadata_file
Exemplo n.º 2
0
 def setUp(self):
     self.xls_reader = XlsxReader(self.metadata_file, self.eva_xls_reader_conf)
Exemplo n.º 3
0
class EvaXlsxReader(AppLogger):

    def __init__(self, metadata_file):
        conf = os.path.join(ROOT_DIR, 'etc', 'eva_project_conf.yaml')
        self.reader = XlsxReader(metadata_file, conf)
        self.metadata_file=metadata_file

    def _get_all_rows(self, active_sheet):
        self.reader.active_worksheet = active_sheet
        rows = []
        try:
            r = self.reader.next()
            while r:
                rows.append(r)
                r = self.reader.next()
            rows.append(r)
        except StopIteration:
            pass
        return rows

    @cached_property
    def project(self):
        self.reader.active_worksheet = 'Project'
        try:
            return self.reader.next()
        except StopIteration:
            self.error('No project was found in the spreadsheet %s', self.metadata_file)

    @cached_property
    def submitters(self):
        return self._get_all_rows('Submitter Details')

    @cached_property
    def analysis(self):
        return self._get_all_rows('Analysis')

    @cached_property
    def samples(self):
        return self._get_all_rows('Sample')

    @cached_property
    def files(self):
        return self._get_all_rows('Files')

    @cached_property
    def project_title(self):
        if self.project:
            return self.project.get('Project Title')

    @property
    def analysis_titles(self):
        return [a.get('Analysis Title') for a in self.analysis]

    @property
    def references(self):
        return list(set([a.get('Reference') for a in self.analysis if a.get('Reference')]))

    @property
    def samples_per_analysis(self):
        samples_per_analysis = defaultdict(list)
        for row in self.samples:
            samples_per_analysis[row.get('Analysis Alias')].append(row)
        return samples_per_analysis

    @property
    def files_per_analysis(self):
        files_per_analysis = defaultdict(list)
        for row in self.files:
            files_per_analysis[row.get('Analysis Alias')].append(row)
        return files_per_analysis
Exemplo n.º 4
0
class TestXlsxReader(TestCase):

    metadata_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'metadata.xlsx')
    eva_xls_reader_conf = os.path.join(os.path.dirname(__file__), 'resources',
                                       'test_metadata_fields.yaml')

    def setUp(self):
        self.xls_reader = XlsxReader(self.metadata_file,
                                     self.eva_xls_reader_conf)

    def test_valid_worksheets(self):
        worksheets = self.xls_reader.valid_worksheets()
        assert isinstance(worksheets, list)
        assert set(worksheets) == {'Project', 'Sample', 'Analysis'}

    def test_get_valid_conf_keys(self):
        worksheets = self.xls_reader.valid_worksheets()
        assert isinstance(worksheets, list)
        assert set(worksheets) == {'Project', 'Sample', 'Analysis'}

    def test_next_row(self):
        self.xls_reader.active_worksheet = 'Sample'
        row = self.xls_reader.next()
        assert isinstance(row, dict)
        assert row == {
            'Analysis Alias': 'GAE',
            'Sample Accession': None,
            'Sample ID': None,
            'Sample Name': 'S1',
            'Sampleset Accession': None,
            'Title': 'Sample 1',
            'row_num': 4
        }

        self.xls_reader.active_worksheet = 'Project'
        row = self.xls_reader.next()
        assert isinstance(row, dict)
        assert row == {
            'Project Title': 'Greatest project ever',
            'Project Alias': 'GPE',
            'Publication(s)': None,
            'Parent Project(s)': None,
            'Child Project(s)': None,
            'row_num': 2
        }

    def test_get_rows(self):
        self.xls_reader.active_worksheet = 'Sample'
        rows = self.xls_reader.get_rows()
        assert isinstance(rows, list)
        assert len(rows) == 100
        assert rows[0] == {
            'Analysis Alias': 'GAE',
            'Sample Accession': None,
            'Sample ID': None,
            'Sample Name': 'S1',
            'Sampleset Accession': None,
            'Title': 'Sample 1',
            'row_num': 4
        }