Пример #1
0
class Winlab32(InstrumentResultsFileParser):
    ar = None

    def __init__(self, infile, worksheet=None, encoding=None, delimiter=None):
        self.delimiter = delimiter if delimiter else ','
        self.encoding = encoding
        self.infile = infile
        self.csv_data = None
        self.worksheet = worksheet if worksheet else 0
        self.sample_id = None
        mimetype, encoding = guess_type(self.infile.filename)
        InstrumentResultsFileParser.__init__(self, infile, mimetype)

    def parse(self):
        order = []
        ext = splitext(self.infile.filename.lower())[-1]
        if ext == '.xlsx':
            order = (xlsx_to_csv, xls_to_csv)
        elif ext == '.xls':
            order = (xls_to_csv, xlsx_to_csv)
        elif ext == '.csv':
            self.csv_data = self.infile
        if order:
            for importer in order:
                try:
                    self.csv_data = importer(
                        infile=self.infile,
                        worksheet=self.worksheet,
                        delimiter=self.delimiter)
                    break
                except SheetNotFound:
                    self.err("Sheet not found in workbook: %s" % self.worksheet)
                    return -1
                except Exception as e:  # noqa
                    pass
            else:
                self.warn("Can't parse input file as XLS, XLSX, or CSV.")
                return -1
        stub = FileStub(file=self.csv_data, name=str(self.infile.filename))
        self.csv_data = FileUpload(stub)

        lines = self.csv_data.readlines()
        reader = csv.DictReader(lines)
        for row in reader:
            self.parse_row(reader.line_num, row)
        return 0

    def parse_row(self, row_nr, row):
        # convert row to use interim field names
        try:
            value = float(row['Reported Conc (Calib)'])
        except (TypeError, ValueError):
            value = row['Reported Conc (Calib)']
        parsed = {'reading': value, 'DefaultResult': 'reading'}

        sample_id = subn(r'[^\w\d\-_]*', '', row.get('Sample ID', ""))[0]
        kw = subn(r"[^\w\d]*", "", row.get('Analyte Name', ""))[0]
        kw = kw
        if not sample_id or not kw:
            return 0

        try:
            ar = self.get_ar(sample_id)
            brain = self.get_analysis(ar, kw)
            new_kw = brain.getKeyword
        except Exception as e:
            self.warn(msg="Error getting analysis for '${s}/${kw}': ${e}",
                      mapping={'s': sample_id, 'kw': kw, 'e': repr(e)},
                      numline=row_nr, line=str(row))
            return

        self._addRawResult(sample_id, {new_kw: parsed})
        return 0

    @staticmethod
    def get_ar(sample_id):
        query = dict(portal_type="AnalysisRequest", getId=sample_id)
        brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING)
        try:
            return api.get_object(brains[0])
        except IndexError:
            pass

    @staticmethod
    def get_analyses(ar):
        brains = ar.getAnalyses()
        return dict((a.getKeyword, a) for a in brains)

    def get_analysis(self, ar, kw):
        kw = kw
        brains = self.get_analyses(ar)
        brains = [v for k, v in brains.items() if k.startswith(kw)]
        if len(brains) < 1:
            msg = "No analysis found matching Keyword '${kw}'",
            raise AnalysisNotFound(msg, kw=kw)
        if len(brains) > 1:
            msg = "Multiple brains found matching Keyword '${kw}'",
            raise MultipleAnalysesFound(msg, kw=kw)
        return brains[0]
Пример #2
0
class Nexion350xParser(InstrumentResultsFileParser):
    ar = None

    def __init__(self, infile, worksheet=0, encoding=None, delimiter=None):
        self.delimiter = delimiter if delimiter else ','
        self.encoding = encoding
        self.infile = infile
        self.worksheet = worksheet
        self.csv_data = None
        self.sample_id = None
        mimetype, encoding = guess_type(self.infile.filename)
        InstrumentResultsFileParser.__init__(self, infile, mimetype)

    def parse(self):
        order = []
        ext = splitext(self.infile.filename.lower())[-1]
        if ext == '.xlsx':
            order = (xlsx_to_csv, xls_to_csv)
        elif ext == '.xls':
            order = (xls_to_csv, xlsx_to_csv)
        elif ext == '.csv':
            self.csv_data = self.infile
        if order:
            for importer in order:
                try:
                    self.csv_data = importer(infile=self.infile,
                                             worksheet=self.worksheet,
                                             delimiter=self.delimiter)
                    break
                except SheetNotFound:
                    self.err("Sheet not found in workbook: %s" %
                             self.worksheet)
                    return -1
                except Exception as e:  # noqa
                    pass
            else:
                self.warn("Can't parse input file as XLS, XLSX, or CSV.")
                return -1
        stub = FileStub(file=self.csv_data, name=str(self.infile.filename))
        self.csv_data = FileUpload(stub)

        lines = self.csv_data.readlines()
        reader = csv.DictReader(lines)
        for row in reader:
            self.parse_row(reader.line_num, row)
        return 0

    def parse_row(self, row_nr, row):
        if row['Sample Id'].lower().strip() in ('', 'sample id', 'blk', 'rblk',
                                                'calibration curves'):
            return 0

        # Get sample for this row
        sample_id = subn(r'[^\w\d\-_]*', '', row.get('Sample Id', ''))[0]
        ar = self.get_ar(sample_id)
        if not ar:
            msg = 'Sample not found for {}'.format(sample_id)
            self.warn(msg, numline=row_nr, line=str(row))
            return 0
        # Search for rows who's headers are analyte keys
        for key in row.keys():
            if key in non_analyte_row_headers:
                continue
            kw = subn(r'[^\w\d]*', '', key)[0]
            if not kw:
                continue
            try:
                brain = self.get_analysis(ar, kw, row_nr=row_nr, row=row)
                if not brain:
                    continue
                new_kw = brain.getKeyword
                parsed = dict(reading=float(row[key]), DefaultResult='reading')
                self._addRawResult(sample_id, {new_kw: parsed})
            except (TypeError, ValueError):
                self.warn('Value for keyword ${kw} is not numeric',
                          mapping=dict(kw=kw),
                          numline=row_nr,
                          line=str(row))

        return 0

    @staticmethod
    def get_ar(sample_id):
        query = dict(portal_type='AnalysisRequest', getId=sample_id)
        brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING)
        try:
            return api.get_object(brains[0])
        except IndexError:
            pass

    @staticmethod
    def get_analyses(ar):
        analyses = ar.getAnalyses()
        return dict((a.getKeyword, a) for a in analyses)

    def get_analysis(self, ar, kw, row_nr="", row=""):
        kw = kw
        items = self.get_analyses(ar)
        brains = [v for k, v in items.items() if k.startswith(kw)]
        if len(brains) < 1:
            return None
        if len(brains) > 1:
            msg = "Multiple brains found matching Keyword '${kw}'",
            raise MultipleAnalysesFound(msg, kw=kw)
        return brains[0]
Пример #3
0
class S8TigerParser(InstrumentResultsFileParser):
    ar = None

    def __init__(self,
                 infile,
                 worksheet=None,
                 encoding=None,
                 default_unit=None,
                 delimiter=None):
        self.delimiter = delimiter if delimiter else ','
        self.unit = default_unit if default_unit else "pct"
        self.encoding = encoding
        self.ar = None
        self.analyses = None
        self.worksheet = worksheet if worksheet else 0
        self.infile = infile
        self.csv_data = None
        self.sample_id = None
        mimetype = guess_type(self.infile.filename)
        InstrumentResultsFileParser.__init__(self, infile, mimetype)

    def parse(self):
        order = []
        ext = splitext(self.infile.filename.lower())[-1]
        if ext == '.xlsx':
            order = (xlsx_to_csv, xls_to_csv)
        elif ext == '.xls':
            order = (xls_to_csv, xlsx_to_csv)
        elif ext == '.csv':
            self.csv_data = self.infile
        if order:
            for importer in order:
                try:
                    self.csv_data = importer(infile=self.infile,
                                             worksheet=self.worksheet,
                                             delimiter=self.delimiter)
                    break
                except SheetNotFound:
                    self.err("Sheet not found in workbook: %s" %
                             self.worksheet)
                    return -1
                except Exception as e:  # noqa
                    pass
            else:
                self.warn("Can't parse input file as XLS, XLSX, or CSV.")
                return -1
        stub = FileStub(file=self.csv_data, name=str(self.infile.filename))
        self.csv_data = FileUpload(stub)

        try:
            sample_id, ext = splitext(basename(self.infile.filename))
            # maybe the filename is a sample ID, just the way it is
            ar = self.get_ar(sample_id)
            if not ar:
                # maybe we need to chop of it's -9digit suffix
                sample_id = '-'.join(sample_id.split('-')[:-1])
                ar = self.get_ar(sample_id)
                if not ar:
                    # or we are out of luck
                    msg = "Can't find sample for " + self.infile.filename
                    self.warn(msg)
                    return -1
            self.ar = ar
            self.sample_id = sample_id
            self.analyses = self.get_analyses(ar)
        except Exception as e:
            self.err(repr(e))
            return False
        lines = self.csv_data.readlines()
        reader = csv.DictReader(lines)
        for row in reader:
            self.parse_row(ar, reader.line_num, row)
        return 0

    def parse_row(self, ar, row_nr, row):
        # convert row to use interim field names
        if 'reading' not in field_interim_map.values():
            self.err("Missing 'reading' interim field.")
            return -1
        parsed = {field_interim_map.get(k, ''): v for k, v in row.items()}

        formula = parsed.get('formula')
        kw = subn(r'[^\w\d\-_]*', '', formula)[0]
        kw = kw.lower()
        try:
            analysis = self.get_analysis(ar, kw)
            if not analysis:
                return 0
            keyword = analysis.getKeyword
        except Exception as e:
            self.warn(msg="Error getting analysis for '${kw}': ${e}",
                      mapping={
                          'kw': kw,
                          'e': repr(e)
                      },
                      numline=row_nr,
                      line=str(row))
            return

        # Concentration can be PPM or PCT as it likes, I'll save both.
        concentration = parsed['concentration']
        try:
            val = float(subn(r'[^.\d]', '', str(concentration))[0])
        except (TypeError, ValueError, IndexError):
            self.warn(msg="Can't extract numerical value from `concentration`",
                      numline=row_nr,
                      line=str(row))
            parsed['reading_pct'] = ''
            parsed['reading_ppm'] = ''
            return 0
        else:
            if 'reading_ppm' in concentration.lower():
                parsed['reading_pct'] = val * 0.0001
                parsed['reading_ppm'] = val
            elif '%' in concentration:
                parsed['reading_pct'] = val
                parsed['reading_ppm'] = 1 / 0.0001 * val
            else:
                self.warn("Can't decide if reading units are PPM or %",
                          numline=row_nr,
                          line=str(row))
                return 0

        if self.unit == 'ppm':
            reading = parsed['reading_ppm']
        else:
            reading = parsed['reading_pct']
        parsed['reading'] = reading
        parsed.update({'DefaultResult': 'reading'})

        self._addRawResult(self.sample_id, {keyword: parsed})
        return 0

    @staticmethod
    def get_ar(sample_id):
        query = dict(portal_type="AnalysisRequest", getId=sample_id)
        brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING)
        try:
            return api.get_object(brains[0])
        except IndexError:
            pass

    @staticmethod
    def get_analyses(ar):
        analyses = ar.getAnalyses()
        return dict((a.getKeyword, a) for a in analyses)

    def get_analysis(self, ar, kw):
        analyses = self.get_analyses(ar)
        analyses = [v for k, v in analyses.items() if k.startswith(kw)]
        if len(analyses) < 1:
            self.log('No analysis found matching keyword "${kw}"',
                     mapping=dict(kw=kw))
            return None
        if len(analyses) > 1:
            self.warn('Multiple analyses found matching Keyword "${kw}"',
                      mapping=dict(kw=kw))
            return None
        return analyses[0]