def iter_raw(self, validators=None, error_handler=None, **kwargs): """Generates an iterator that goes over every row in the file without modification. This is primarily for altering or re-saving the original file.(eg convert short tags to long) Parameters validators : [func or validator like] or func or validator like A validator or list of validators to apply to the hed strings before returning kwargs: See util.translate_ops or the specific validators for additional options Yields ------- row_number: int The current row number column_to_hed_tags_dictionary: dict A dict with keys column_number, value the cell at that position. """ if error_handler is None: error_handler = ErrorHandler() default_mapper = ColumnMapper() return self.iter_dataframe(default_mapper, validators=validators, run_string_ops_on_columns=True, error_handler=error_handler, **kwargs)
def validate(self, validators=None, error_handler=None, **kwargs): """ Run the given validators on this string. Parameters ---------- validators : [func or validator like] or func or validator like A validator or list of validators to apply to the hed strings in this sidecar. error_handler : ErrorHandler or None Used to report errors. Uses a default one if none passed in. kwargs: See util.translate_ops or the specific validators for additional options Returns ------- """ if error_handler is None: error_handler = ErrorHandler() tag_ops = translate_ops(validators, **kwargs) error_handler.push_error_context(ErrorContext.HED_STRING, self, increment_depth_after=False) issues = self.apply_ops(tag_ops) error_handler.add_context_to_issues(issues) error_handler.pop_error_context() return issues
def validate_column_data(self, validators, error_handler=None, **kwargs): """ Validates all column definitions that are being used and column definition hed strings Parameters ---------- validators : [func or validator like] or func or validator like A validator or list of validators to apply to the hed strings in the sidecars. error_handler : ErrorHandler or None Used to report errors. Uses a default one if none passed in. kwargs: See util.translate_ops or the specific validators for additional options Returns ------- validation_issues : [{}] A list of syntax and semantic issues found in the definitions. """ if error_handler is None: error_handler = ErrorHandler() all_validation_issues = [] for column_data in self.column_data.values(): all_validation_issues += column_data.validate_column( validators, error_handler=error_handler, **kwargs) return all_validation_issues
def extract_definitions(self, error_handler=None): """ Gathers and validates all definitions found in this spreadsheet Parameters ---------- error_handler : ErrorHandler The error handler to use for context, uses a default one if none. Returns ------- def_dict: DefDict Contains all the definitions located in the file """ if error_handler is None: error_handler = ErrorHandler() new_def_dict = DefDict() validators = [] validators.append(new_def_dict) validators.append(HedString.remove_definitions) all_issues = [] for hed_string, key_name, issues in self.hed_string_iter( validators=validators, allow_placeholders=True, error_handler=error_handler): self.set_hed_string(hed_string, key_name, set_def_removed=True) all_issues += issues return new_def_dict
def hed_string_iter(self, validators=None, error_handler=None, **kwargs): """ Return iterator to loop over all hed strings in this column definition Parameters ---------- validators : [func or validator like] or func or validator like A validator or list of validators to apply to the hed strings before returning error_handler : ErrorHandler The error handler to use for context, uses a default one if none. kwargs: See util.translate_ops or the specific validators for additional options Yields ------- hed_string : HedString hed_string at a given column and key position position: str Indicates where hed_string was loaded from so it can be later set by the user issues: [{}] List of issues found applying validators """ if error_handler is None: error_handler = ErrorHandler() if not isinstance(self._hed_dict, dict): return tag_ops = [] if validators: tag_ops = translate_ops(validators, error_handler=error_handler, **kwargs) for hed_string_obj, key_name in self._hed_iter(): new_col_issues = [] error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) if not hed_string_obj: new_col_issues += ErrorHandler.format_error( SidecarErrors.BLANK_HED_STRING) error_handler.add_context_to_issues(new_col_issues) yield hed_string_obj, key_name, new_col_issues else: error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj, increment_depth_after=False) if tag_ops: new_col_issues += hed_string_obj.apply_ops(tag_ops) error_handler.add_context_to_issues(new_col_issues) yield hed_string_obj, key_name, new_col_issues error_handler.pop_error_context() error_handler.pop_error_context()
def validate_file(self, validators, name=None, error_handler=None, check_for_warnings=True, **kwargs): """Run the given validators on all columns and rows of the spreadsheet Parameters ---------- validators : [func or validator like] or func or validator like A validator or list of validators to apply. name: str If present, will use this as the filename for context, rather than using the actual filename Useful for temp filenames. error_handler : ErrorHandler or None Used to report errors. Uses a default one if none passed in. check_for_warnings: bool If True this will check for and return warnings as well kwargs: See util.translate_ops or the specific validators for additional options Returns ------- validation_issues: [{}] The list of validation issues found """ if not name: name = self.name if not isinstance(validators, list): validators = [validators] if error_handler is None: error_handler = ErrorHandler() error_handler.push_error_context(ErrorContext.FILE_NAME, name) validation_issues = self.get_def_and_mapper_issues( error_handler, check_for_warnings) validation_issues += self._run_validators( validators, error_handler=error_handler, check_for_warnings=check_for_warnings, **kwargs) error_handler.pop_error_context() return validation_issues
def extract_definitions(self, error_handler=None): """ Gathers and validates all definitions found in this spreadsheet Parameters ---------- error_handler : ErrorHandler The error handler to use for context, uses a default one if none. Returns ------- def_dict: DefDict Contains all the definitions located in the file """ if error_handler is None: error_handler = ErrorHandler() new_def_dict = DefDict() validators = [new_def_dict] _ = self._run_validators(validators, run_on_raw=True, error_handler=error_handler) return new_def_dict
def _convert_to_form(self, hed_schema, tag_form, error_handler): """ Converts all tags in a given spreadsheet to a given form Parameters ---------- hed_schema : HedSchema The schema to use to convert tags. tag_form: str The form to convert the tags to. (short_tag, long_tag, base_tag, etc) error_handler : ErrorHandler The error handler to use for context, uses a default one if none. Returns ------- issues_list: [{}] A list of issues found during conversion """ if error_handler is None: error_handler = ErrorHandler() error_list = [] for row_number, column_to_hed_tags_dictionary in self: error_handler.push_error_context(ErrorContext.ROW, row_number) for column_number in column_to_hed_tags_dictionary: error_handler.push_error_context(ErrorContext.COLUMN, column_number) column_hed_string = column_to_hed_tags_dictionary[ column_number] error_list += column_hed_string.convert_to_canonical_forms( hed_schema) self.set_cell(row_number, column_number, column_hed_string, include_column_prefix_if_exist=False, tag_form=tag_form) error_handler.pop_error_context() error_handler.pop_error_context() return error_list
def iter_dataframe(self, mapper=None, return_row_dict=False, validators=None, run_string_ops_on_columns=False, error_handler=None, expand_defs=False, remove_definitions=True, **kwargs): """ Generates a list of parsed rows based on the given column mapper. Parameters ---------- mapper : ColumnMapper The column name to column number mapper return_row_dict: bool If True, this returns the full row_dict including issues. If False, returns just the HedStrings for each column error_handler : ErrorHandler The error handler to use for context, uses a default one if none. validators : [func or validator like] or func or validator like A validator or list of validators to apply to the hed strings before returning run_string_ops_on_columns: bool If true, run all tag and string ops on columns, rather than columns then rows. expand_defs: bool If True, this will fully remove all definitions found and expand all def tags to def-expand tags remove_definitions: bool If true, this will remove all definition tags found. kwargs: See util.translate_ops or the specific validators for additional options Yields ------- row_number: int The current row number row_dict: dict A dict containing the parsed row, including: "HED", "column_to_hed_tags", and possibly "column_issues" """ if error_handler is None: error_handler = ErrorHandler() if mapper is None: mapper = self._mapper tag_ops, string_ops = self._translate_ops( validators, run_string_ops_on_columns=run_string_ops_on_columns, expand_defs=expand_defs, remove_definitions=remove_definitions, error_handler=error_handler, **kwargs) start_at_one = 1 if self._has_column_names: start_at_one += 1 for row_number, text_file_row in self._dataframe.iterrows(): # Skip any blank lines. if all(text_file_row.isnull()): continue row_dict = mapper.expand_row_tags(text_file_row) column_to_hed_tags = row_dict[model_constants.COLUMN_TO_HED_TAGS] expansion_column_issues = row_dict.get( model_constants.COLUMN_ISSUES, {}) error_handler.push_error_context(ErrorContext.ROW, row_number) row_issues = [] if tag_ops: row_issues += self._run_column_ops(column_to_hed_tags, tag_ops, expansion_column_issues, error_handler) if return_row_dict: final_hed_string = HedString.create_from_other( column_to_hed_tags.values()) if string_ops: row_issues += self._run_row_ops(final_hed_string, string_ops, error_handler) row_dict[model_constants.ROW_ISSUES] = row_issues row_dict[model_constants.ROW_HED_STRING] = final_hed_string yield row_number + start_at_one, row_dict else: yield row_number + start_at_one, column_to_hed_tags error_handler.pop_error_context()