class Winlab32(InstrumentResultsFileParser): ar = None def __init__(self, infile, worksheet=None, encoding=None, delimiter=None): self.delimiter = delimiter if delimiter else ',' self.encoding = encoding self.infile = infile self.csv_data = None self.worksheet = worksheet if worksheet else 0 self.sample_id = None mimetype, encoding = guess_type(self.infile.filename) InstrumentResultsFileParser.__init__(self, infile, mimetype) def parse(self): order = [] ext = splitext(self.infile.filename.lower())[-1] if ext == '.xlsx': order = (xlsx_to_csv, xls_to_csv) elif ext == '.xls': order = (xls_to_csv, xlsx_to_csv) elif ext == '.csv': self.csv_data = self.infile if order: for importer in order: try: self.csv_data = importer( infile=self.infile, worksheet=self.worksheet, delimiter=self.delimiter) break except SheetNotFound: self.err("Sheet not found in workbook: %s" % self.worksheet) return -1 except Exception as e: # noqa pass else: self.warn("Can't parse input file as XLS, XLSX, or CSV.") return -1 stub = FileStub(file=self.csv_data, name=str(self.infile.filename)) self.csv_data = FileUpload(stub) lines = self.csv_data.readlines() reader = csv.DictReader(lines) for row in reader: self.parse_row(reader.line_num, row) return 0 def parse_row(self, row_nr, row): # convert row to use interim field names try: value = float(row['Reported Conc (Calib)']) except (TypeError, ValueError): value = row['Reported Conc (Calib)'] parsed = {'reading': value, 'DefaultResult': 'reading'} sample_id = subn(r'[^\w\d\-_]*', '', row.get('Sample ID', ""))[0] kw = subn(r"[^\w\d]*", "", row.get('Analyte Name', ""))[0] kw = kw if not sample_id or not kw: return 0 try: ar = self.get_ar(sample_id) brain = self.get_analysis(ar, kw) new_kw = brain.getKeyword except Exception as e: self.warn(msg="Error getting analysis for '${s}/${kw}': ${e}", mapping={'s': sample_id, 'kw': kw, 'e': repr(e)}, numline=row_nr, line=str(row)) return self._addRawResult(sample_id, {new_kw: parsed}) return 0 @staticmethod def get_ar(sample_id): query = dict(portal_type="AnalysisRequest", getId=sample_id) brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING) try: return api.get_object(brains[0]) except IndexError: pass @staticmethod def get_analyses(ar): brains = ar.getAnalyses() return dict((a.getKeyword, a) for a in brains) def get_analysis(self, ar, kw): kw = kw brains = self.get_analyses(ar) brains = [v for k, v in brains.items() if k.startswith(kw)] if len(brains) < 1: msg = "No analysis found matching Keyword '${kw}'", raise AnalysisNotFound(msg, kw=kw) if len(brains) > 1: msg = "Multiple brains found matching Keyword '${kw}'", raise MultipleAnalysesFound(msg, kw=kw) return brains[0]
class Nexion350xParser(InstrumentResultsFileParser): ar = None def __init__(self, infile, worksheet=0, encoding=None, delimiter=None): self.delimiter = delimiter if delimiter else ',' self.encoding = encoding self.infile = infile self.worksheet = worksheet self.csv_data = None self.sample_id = None mimetype, encoding = guess_type(self.infile.filename) InstrumentResultsFileParser.__init__(self, infile, mimetype) def parse(self): order = [] ext = splitext(self.infile.filename.lower())[-1] if ext == '.xlsx': order = (xlsx_to_csv, xls_to_csv) elif ext == '.xls': order = (xls_to_csv, xlsx_to_csv) elif ext == '.csv': self.csv_data = self.infile if order: for importer in order: try: self.csv_data = importer(infile=self.infile, worksheet=self.worksheet, delimiter=self.delimiter) break except SheetNotFound: self.err("Sheet not found in workbook: %s" % self.worksheet) return -1 except Exception as e: # noqa pass else: self.warn("Can't parse input file as XLS, XLSX, or CSV.") return -1 stub = FileStub(file=self.csv_data, name=str(self.infile.filename)) self.csv_data = FileUpload(stub) lines = self.csv_data.readlines() reader = csv.DictReader(lines) for row in reader: self.parse_row(reader.line_num, row) return 0 def parse_row(self, row_nr, row): if row['Sample Id'].lower().strip() in ('', 'sample id', 'blk', 'rblk', 'calibration curves'): return 0 # Get sample for this row sample_id = subn(r'[^\w\d\-_]*', '', row.get('Sample Id', ''))[0] ar = self.get_ar(sample_id) if not ar: msg = 'Sample not found for {}'.format(sample_id) self.warn(msg, numline=row_nr, line=str(row)) return 0 # Search for rows who's headers are analyte keys for key in row.keys(): if key in non_analyte_row_headers: continue kw = subn(r'[^\w\d]*', '', key)[0] if not kw: continue try: brain = self.get_analysis(ar, kw, row_nr=row_nr, row=row) if not brain: continue new_kw = brain.getKeyword parsed = dict(reading=float(row[key]), DefaultResult='reading') self._addRawResult(sample_id, {new_kw: parsed}) except (TypeError, ValueError): self.warn('Value for keyword ${kw} is not numeric', mapping=dict(kw=kw), numline=row_nr, line=str(row)) return 0 @staticmethod def get_ar(sample_id): query = dict(portal_type='AnalysisRequest', getId=sample_id) brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING) try: return api.get_object(brains[0]) except IndexError: pass @staticmethod def get_analyses(ar): analyses = ar.getAnalyses() return dict((a.getKeyword, a) for a in analyses) def get_analysis(self, ar, kw, row_nr="", row=""): kw = kw items = self.get_analyses(ar) brains = [v for k, v in items.items() if k.startswith(kw)] if len(brains) < 1: return None if len(brains) > 1: msg = "Multiple brains found matching Keyword '${kw}'", raise MultipleAnalysesFound(msg, kw=kw) return brains[0]
class S8TigerParser(InstrumentResultsFileParser): ar = None def __init__(self, infile, worksheet=None, encoding=None, default_unit=None, delimiter=None): self.delimiter = delimiter if delimiter else ',' self.unit = default_unit if default_unit else "pct" self.encoding = encoding self.ar = None self.analyses = None self.worksheet = worksheet if worksheet else 0 self.infile = infile self.csv_data = None self.sample_id = None mimetype = guess_type(self.infile.filename) InstrumentResultsFileParser.__init__(self, infile, mimetype) def parse(self): order = [] ext = splitext(self.infile.filename.lower())[-1] if ext == '.xlsx': order = (xlsx_to_csv, xls_to_csv) elif ext == '.xls': order = (xls_to_csv, xlsx_to_csv) elif ext == '.csv': self.csv_data = self.infile if order: for importer in order: try: self.csv_data = importer(infile=self.infile, worksheet=self.worksheet, delimiter=self.delimiter) break except SheetNotFound: self.err("Sheet not found in workbook: %s" % self.worksheet) return -1 except Exception as e: # noqa pass else: self.warn("Can't parse input file as XLS, XLSX, or CSV.") return -1 stub = FileStub(file=self.csv_data, name=str(self.infile.filename)) self.csv_data = FileUpload(stub) try: sample_id, ext = splitext(basename(self.infile.filename)) # maybe the filename is a sample ID, just the way it is ar = self.get_ar(sample_id) if not ar: # maybe we need to chop of it's -9digit suffix sample_id = '-'.join(sample_id.split('-')[:-1]) ar = self.get_ar(sample_id) if not ar: # or we are out of luck msg = "Can't find sample for " + self.infile.filename self.warn(msg) return -1 self.ar = ar self.sample_id = sample_id self.analyses = self.get_analyses(ar) except Exception as e: self.err(repr(e)) return False lines = self.csv_data.readlines() reader = csv.DictReader(lines) for row in reader: self.parse_row(ar, reader.line_num, row) return 0 def parse_row(self, ar, row_nr, row): # convert row to use interim field names if 'reading' not in field_interim_map.values(): self.err("Missing 'reading' interim field.") return -1 parsed = {field_interim_map.get(k, ''): v for k, v in row.items()} formula = parsed.get('formula') kw = subn(r'[^\w\d\-_]*', '', formula)[0] kw = kw.lower() try: analysis = self.get_analysis(ar, kw) if not analysis: return 0 keyword = analysis.getKeyword except Exception as e: self.warn(msg="Error getting analysis for '${kw}': ${e}", mapping={ 'kw': kw, 'e': repr(e) }, numline=row_nr, line=str(row)) return # Concentration can be PPM or PCT as it likes, I'll save both. concentration = parsed['concentration'] try: val = float(subn(r'[^.\d]', '', str(concentration))[0]) except (TypeError, ValueError, IndexError): self.warn(msg="Can't extract numerical value from `concentration`", numline=row_nr, line=str(row)) parsed['reading_pct'] = '' parsed['reading_ppm'] = '' return 0 else: if 'reading_ppm' in concentration.lower(): parsed['reading_pct'] = val * 0.0001 parsed['reading_ppm'] = val elif '%' in concentration: parsed['reading_pct'] = val parsed['reading_ppm'] = 1 / 0.0001 * val else: self.warn("Can't decide if reading units are PPM or %", numline=row_nr, line=str(row)) return 0 if self.unit == 'ppm': reading = parsed['reading_ppm'] else: reading = parsed['reading_pct'] parsed['reading'] = reading parsed.update({'DefaultResult': 'reading'}) self._addRawResult(self.sample_id, {keyword: parsed}) return 0 @staticmethod def get_ar(sample_id): query = dict(portal_type="AnalysisRequest", getId=sample_id) brains = api.search(query, CATALOG_ANALYSIS_REQUEST_LISTING) try: return api.get_object(brains[0]) except IndexError: pass @staticmethod def get_analyses(ar): analyses = ar.getAnalyses() return dict((a.getKeyword, a) for a in analyses) def get_analysis(self, ar, kw): analyses = self.get_analyses(ar) analyses = [v for k, v in analyses.items() if k.startswith(kw)] if len(analyses) < 1: self.log('No analysis found matching keyword "${kw}"', mapping=dict(kw=kw)) return None if len(analyses) > 1: self.warn('Multiple analyses found matching Keyword "${kw}"', mapping=dict(kw=kw)) return None return analyses[0]