def load(expression_tsv, experiment_md=None): "if experiment_md is provide, validate samples" try: with gopen(expression_tsv) as fh: # na_value handling is needed to detect empty cells and short # rows, preventing implicit conversion to NaN. df = pd.read_csv(fh, dialect=csv.excel_tab, header=0, converters={"ID": str}, na_values=na_values, keep_default_na=False) expression_mat = ExpressionMatrix(df) validate_header(expression_mat) expression_mat.df.set_index("ID", drop=False, inplace=True, verify_integrity=True) validate_data(expression_mat) if experiment_md is not None: validate_replicates(experiment_md, expression_mat) return expression_mat except (LrgaspException, pd.errors.ParserError, pd.errors.EmptyDataError, ValueError) as ex: raise LrgaspException( f"Parse of expression matrix TSV failed: {expression_tsv}") from ex
def experiment_load(experiment_json): """load and validate experiment metadata""" try: with gopen(experiment_json) as fh: experiment = json.load(fh, object_pairs_hook=ObjDict) except json.decoder.JSONDecodeError as ex: raise LrgaspException(f"parse of experiment metadata (JSON) failed: {experiment_json}") from ex try: experiment_validate(experiment) except LrgaspException as ex: raise LrgaspException(f"validation of experiment metadata failed: {experiment_json}") from ex return experiment
def load(model_map_tsv): read_model_map = ReadModelMap() try: with gopen(model_map_tsv) as fh: for pair in _tsv_reader(fh): read_model_map.add(pair) if len(read_model_map) == 0: raise LrgaspException("TSV contains no data") return read_model_map except (LrgaspException, FileNotFoundError, csv.Error) as ex: raise LrgaspException("Parse of reads-to-models TSV failed: {}".format( model_map_tsv)) from ex
def load(entry_json): """load and validate entry metadata""" try: with gopen(entry_json) as fh: entry_md = json.load(fh, object_pairs_hook=ObjDict) except (json.decoder.JSONDecodeError, UnicodeDecodeError) as ex: raise LrgaspException( f"parse of entry metadata (JSON) failed: {entry_json}") from ex try: entry_validate(entry_md) except LrgaspException as ex: raise LrgaspException( f"validation of entry metadata failed: {entry_json}") from ex entry_md.experiments = None return entry_md
def expr_mat_load(expr_mat_tsv): try: check_row_consistency(expr_mat_tsv) with gopen(expr_mat_tsv) as fh: expr_mat = pd.read_csv(fh, dialect=csv.excel_tab, header=0, converters={"ID": str}) validate(expr_mat) return expr_mat except (LrgaspException, pd.errors.ParserError, pd.errors.EmptyDataError) as ex: raise LrgaspException( "Parse of expression matrix TSV failed: {}".format( expr_mat_tsv)) from ex
def check_row_consistency(expression_tsv): """Pandas just pads or ignores inconsistent number of columns, so to validate the matrix is consistent, we need to make another pass. """ minLen = maxLen = None with gopen(expression_tsv) as fh: for row in csv.reader(fh, dialect=csv.excel_tab): if minLen is None: minLen = maxLen = len(row) else: minLen = min(len(row), minLen) maxLen = max(len(row), maxLen) if minLen is None: raise LrgaspException("TSV is empty") if minLen != maxLen: raise LrgaspException( f"TSV has an inconsistent number of columns: min={minLen}, max={maxLen}" )
def load(expression_tsv): try: check_row_consistency(expression_tsv) with gopen(expression_tsv) as fh: expression = pd.read_csv(fh, dialect=csv.excel_tab, header=0, converters={"ID": str}) validate_header(expression) expression.set_index("ID", drop=False, inplace=True, verify_integrity=True) validate_data(expression) return expression except (LrgaspException, pd.errors.ParserError, pd.errors.EmptyDataError, ValueError) as ex: raise LrgaspException( "Parse of expression matrix TSV failed: {}".format( expression_tsv)) from ex
def load(entry_json): """load and validate entry metadata""" try: with gopen(entry_json) as fh: entry_md = json.load(fh, object_pairs_hook=ObjDict) except (json.decoder.JSONDecodeError, UnicodeDecodeError) as ex: raise LrgaspException( f"parse of entry metadata (JSON) failed: {entry_json}") from ex try: check_from_defs("entry", entry_fields, entry_md) # add non-serialized fields after field check entry_md.experiments = None entry_md.entry_dir = None entry_md.entry_json = entry_json entry_validate(entry_md) except LrgaspException as ex: raise LrgaspException( f"validation of entry metadata failed: {entry_json}") from ex return entry_md