def get_hed_schema_from_pull_down(request): """Creates a HedSchema object from a section of form that uses a pull-down box and hed_cache Parameters ---------- request: Request object A Request object containing user data from a form. Returns ------- tuple: str A HedSchema object """ if base_constants.SCHEMA_VERSION not in request.form: raise HedFileError("NoSchemaError", "Must provide a valid schema or schema version", "") elif request.form[base_constants. SCHEMA_VERSION] != base_constants.OTHER_VERSION_OPTION: hed_file_path = hedschema.get_path_from_hed_version( request.form[base_constants.SCHEMA_VERSION]) hed_schema = hedschema.load_schema(hed_file_path) elif request.form[base_constants.SCHEMA_VERSION] == \ base_constants.OTHER_VERSION_OPTION and base_constants.SCHEMA_PATH in request.files: f = request.files[base_constants.SCHEMA_PATH] hed_schema = hedschema.from_string( f.read(file_constants.BYTE_LIMIT).decode('ascii'), file_type=secure_filename(f.filename)) else: raise HedFileError( "NoSchemaFile", "Must provide a valid schema for upload if other chosen", "") return hed_schema
def process(arguments): """Perform the requested action for the schema. Parameters ---------- arguments: dict A dictionary with the input arguments from the schema form Returns ------- dict A dictionary with results in standard format """ hed_schema = arguments.get('schema', None) display_name = arguments.get('schema_display_name', 'unknown_source') if base_constants.COMMAND not in arguments or arguments[ base_constants.COMMAND] == '': raise HedFileError('MissingCommand', 'Command is missing', '') elif arguments[base_constants.COMMAND] == base_constants.COMMAND_VALIDATE: results = schema_validate(hed_schema, display_name) elif arguments[base_constants.COMMAND] == base_constants.COMMAND_CONVERT: results = schema_convert(hed_schema, display_name) else: raise HedFileError('UnknownProcessingMethod', "Select a schema processing method", "") return results
def from_string(schema_string, file_type=".xml", library_prefix=None): """ Creates a schema from the given string as if it was loaded from the given file type. Parameters ---------- schema_string : str An XML or mediawiki file as a single long string. file_type : str The extension(including the .) we should treat this string as library_prefix : str or None The name_prefix all tags in this schema will accept. Returns ------- schema: HedSchema The loaded schema """ if not schema_string: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty string passed to HedSchema.from_string", filename=schema_string) if file_type.endswith(".xml"): hed_schema = HedSchemaXMLParser.load_xml(schema_as_string=schema_string) elif file_type.endswith(".mediawiki"): hed_schema = HedSchemaWikiParser.load_wiki(schema_as_string=schema_string) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=file_type) if library_prefix: hed_schema.set_library_prefix(library_prefix=library_prefix) return hed_schema
def __init__(self, key_cols, target_cols=None, name=''): """ Class stores base data for doing event remapping. Args: key_cols (list): List of columns to be replaced (assumed in the DataFrame) target_cols(list): List of replacement columns (assumed to not be in the DataFrame) name (str): Name associated with this remap (usually a pathname of the events file). """ if not key_cols: raise HedFileError("KeyColumnsEmpty", "KeyMap key columns must exist", "") self.key_cols = key_cols.copy() if target_cols and set(key_cols).intersection(target_cols): raise HedFileError( "KeyTargetNotDisjoint", f"Key cols {str(key_cols)} and target cols {str(target_cols)} must be disjoint", "") elif target_cols: self.target_cols = target_cols.copy() else: self.target_cols = [] self.name = name self.columns = self.key_cols + self.target_cols self.col_map = pd.DataFrame(columns=self.columns) self.map_dict = {}
def validate_attributes(attrib_dict, filename): for attribute_name, attribute_value in attrib_dict.items(): if attribute_name in attribute_validators: validator, error_code = attribute_validators[attribute_name] result = validator(attribute_value) if result is not True: raise HedFileError(error_code, result, filename) if constants.VERSION_ATTRIBUTE not in attrib_dict: raise HedFileError(HedExceptions.BAD_HED_SEMANTIC_VERSION, "No version attribute found in header", filename=filename)
def get_worksheet(excel_file, sheet_name): wb = openpyxl.load_workbook(excel_file, read_only=True) sheet_names = wb.sheetnames if not sheet_names: raise HedFileError('BadExcelFile', 'Excel files must have worksheets', None) if sheet_name and sheet_name not in sheet_names: raise HedFileError('BadWorksheetName', f'Worksheet {sheet_name} not in Excel file', '') if sheet_name: worksheet = wb[sheet_name] else: worksheet = wb.worksheets[0] return worksheet, sheet_names
def test_get_file_list_files(self): dir_pairs = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../data/hed_pairs/prologue_tests') test_files = [name for name in os.listdir(dir_pairs) if os.path.isfile(os.path.join(dir_pairs, name))] file_list1 = get_file_list(dir_pairs) for file in file_list1: if os.path.basename(file) in test_files: continue raise HedFileError("FileNotFound", f"get_file_list should have found file {file}", "") for file in test_files: if os.path.join(dir_pairs, file) in file_list1: continue raise HedFileError("FileShouldNotBeFound", f"get_event_files should have not have found file {file}", "")
def test_get_get_file_list_suffix(self): dir_data = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../data') file_list = get_file_list(dir_data, extensions=[".json", ".tsv"]) for item in file_list: if item.endswith(".json") or item.endswith(".tsv"): continue raise HedFileError("BadFileType", "get_event_files expected only .html or .js files", "")
def _get_element_tag_value(self, element, tag_name=xml_constants.NAME_ELEMENT): """Gets the value of the element's tag. Parameters ---------- element: Element A element in the HED XML file. tag_name: str The name of the XML element's tag. The default is 'name'. Returns ------- str The value of the element's tag. If the element doesn't have the tag then it will return an empty string. """ element = element.find(tag_name) if element is not None: if element.text is None and tag_name != "units": raise HedFileError( HedExceptions.HED_SCHEMA_NODE_NAME_INVALID, f"A Schema node is empty for tag of element name: '{tag_name}'.", self._schema.filename) return element.text return ""
def get_columns_request(request): if not form_has_file(request, base_constants.COLUMNS_FILE): raise HedFileError('MissingFile', 'An uploadable file was not provided', None) columns_file = request.files.get(base_constants.COLUMNS_FILE, '') has_column_names = form_has_option(request, 'has_column_names', 'on') sheet_name = request.form.get(base_constants.WORKSHEET_SELECTED, None) return create_columns_info(columns_file, has_column_names, sheet_name)
def update(self, data): """ Updates the existing map with information from data. Args: data (DataFrame or str): DataFrame or filename of an events file or event map Returns: list Indices of duplicates """ df = get_new_dataframe(data) remove_quotes(df) col_list = df.columns.values.tolist() keys_present, keys_missing = separate_columns(col_list, self.key_cols) if keys_missing: raise HedFileError( "MissingKeyColumn", f"make_template data does not have key columns {str(keys_missing)}", "") base_df = pd.DataFrame(columns=self.columns) base_df[self.key_cols] = df[self.key_cols].values targets_present, targets_missing = separate_columns( col_list, self.target_cols) if targets_present: base_df[targets_present] = df[targets_present].values if targets_missing: base_df[targets_missing] = 'n/a' return self._update(base_df)
def test_handle_http_error(self): from hed.errors.exceptions import HedFileError, HedExceptions from hedweb.web_util import handle_http_error with self.app.test_request_context(): ex = HedFileError(HedExceptions.BAD_PARAMETERS, "This had bad parameters", 'my.file') response = handle_http_error(ex) headers = dict(response.headers) self.assertEqual('error', headers["Category"], "handle_http_error should have category error") self.assertTrue( headers['Message'].startswith(HedExceptions.BAD_PARAMETERS), "handle_http_error error message starts with the error_type") self.assertFalse(response.data, "handle_http_error should have empty data") ex = Exception() response = handle_http_error(ex) headers = dict(response.headers) self.assertEqual('error', headers["Category"], "handle_http_error should have category error") self.assertTrue( headers['Message'].startswith('Exception'), "handle_http_error error message starts with the error_type") self.assertFalse(response.data, "handle_http_error should have empty data")
def unflatten_hed(self, dataframe): """ Takes a sidecar dictionary and returns a two-column flattened tsv version of the HED portions Args: dataframe (DataFrame): A Pandas DataFrame containing flattened sidecar. Returns: dict compatible with BIDS JSON events. """ master_dict = {} current_dict = {} for index, row in dataframe.iterrows(): key = row['column'] value = row["HED"] unmarked_key = self.get_unmarked_key(key) if not unmarked_key: raise HedFileError( "unflatten", f"Empty or invalid flattened sidecar key {str(key)}", "") if unmarked_key == key: current_dict[key] = value elif value != 'n/a': master_dict[unmarked_key] = {"HED": value} current_dict = {} else: current_dict = {} master_dict[unmarked_key] = {"HED": current_dict} return master_dict
def unflatten(self, dataframe): """ Takes a sidecar dictionary and returns a two-column flattened tsv version of the HED portions Args: dataframe (DataFrame): A Pandas DataFrame containing flattened sidecar. Returns: dict compatible with BIDS JSON events. """ dict_list = [{}] key_list = [] for index, row in dataframe.iterrows(): key = row['column'] value = row["HED"] unmarked_key = self.get_unmarked_key(key) if not unmarked_key: raise HedFileError( "unflatten", f"Empty or invalid flattened sidecar key {str(key)}", "") elif unmarked_key == key: dict_list[-1][key] = value elif len( key_list) > 0 and key_list[-1] == key: # End of dictionary key_list = key_list[:-1] current_dict = dict_list[-1] dict_list = dict_list[:-1] dict_list[-1][unmarked_key] = current_dict else: # New key corresponding to new dictionary key_list.append(key) dict_list.append({}) return dict_list[0]
def _get_header_attributes_old(self, version_line): """Extracts all valid attributes like version from the HED line in .mediawiki format. Parameters ---------- version_line: string The line in the wiki file that contains the version or other attributes. Returns ------- {}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'} """ final_attributes = {} attribute_pairs = version_line.split(',') for pair in attribute_pairs: divider_index = pair.find(':') if divider_index == -1: msg = f"Found poorly matched key:value pair in header: {pair}" raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename, issues=[msg]) key, value = pair[:divider_index], pair[divider_index + 1:] key = key.strip() value = value.strip() final_attributes[key] = value return final_attributes
def __init__(self, value_cols=None, skip_cols=None, name='', header_char='*'): """ . Args: value_cols (list): List of columns to be treated as value columns skip_cols (list): List of columns to be skipped name (str): Name associated with the dictionary """ self.name = name self.header_char = header_char self.categorical_info = {} self.value_info = {} if value_cols and skip_cols and set(value_cols).intersection( skip_cols): raise HedFileError( "ValueSkipOverlap", f"Value columns {str(value_cols)} and skip columns {str(skip_cols)} cannot overlap", "") if value_cols: for value in value_cols: self.value_info[value] = 0 if skip_cols: self.skip_cols = skip_cols.copy() else: self.skip_cols = []
def get_input_from_form(request): """Gets input arguments from a request object associated with the string form. Parameters ---------- request: Request object A Request object containing user data from the string form. Returns ------- dict A dictionary containing input arguments for calling the underlying string processing functions. """ hed_schema = get_hed_schema_from_pull_down(request) hed_string = request.form.get(base_constants.STRING_INPUT, None) if hed_string: string_list = [HedString(hed_string)] else: raise HedFileError('EmptyHedString', 'Must enter a HED string', '') arguments = { base_constants.COMMAND: request.form.get(base_constants.COMMAND_OPTION, ''), base_constants.SCHEMA: hed_schema, base_constants.STRING_LIST: string_list, base_constants.CHECK_FOR_WARNINGS: form_has_option(request, base_constants.CHECK_FOR_WARNINGS, 'on') } return arguments
def __init__(self, schema_list): """ Create combination of multiple HedSchema objects you can use with the validator. Note: will raise HedFileError if two schemas share the same name_prefix Parameters ---------- Returns ------- HedSchemaGroup A HedSchemaCombined object. """ library_prefixes = [ hed_schema._library_prefix for hed_schema in schema_list ] if len(set(library_prefixes)) != len(library_prefixes): raise HedFileError( HedExceptions.SCHEMA_DUPLICATE_PREFIX, "Multiple schemas share the same tag name_prefix. This is not allowed.", filename="Combined Schema") self._schemas = { hed_schema._library_prefix: hed_schema for hed_schema in schema_list }
def get_schema(schema_path=None, schema_url=None, schema_string=None, file_type=".xml"): """Return a schema object. Parameters ---------- schema_path: str A string representing a path to a schema schema_url: str A string representing a URL of a schema schema_string: str A string representing a URL of a schema file_type: str A string representing the file extension including the . Returns ------- HedSchema The HedSchema object that as loaded. """ if schema_path: hed_schema = hedschema.load_schema(schema_path) elif schema_url: hed_schema = hedschema.load_schema(schema_url) elif schema_string: hed_schema = hedschema.from_string(schema_string, file_type=file_type) else: raise HedFileError("HedSchemaNotFound", "A HED schema could not be located", "") return hed_schema
def flatten_hed(self, sidecar, col_names=None): """ Takes a sidecar dictionary and returns a two-column flattened tsv version of the HED portions Args: sidecar (dict): A dictionary conforming to BIDS JSON events sidecar format. col_names (list): A list of the cols to include in the flattened side car. Returns: dataframe containing two columns corresponding to a flattened tsv. """ if not isinstance(sidecar, dict): raise HedFileError( "BadSidecar", f"flatten sidecar must have a sidecar dictionary not [{str(sidecar)}]", "") sidecar_keys = sidecar.keys() if not col_names: col_names = sidecar_keys keys = [] values = [] for col_key, col_dict in sidecar.items(): if col_key not in col_names or 'HED' not in col_dict: continue next_keys, next_values = self.flatten_hed_value( col_key, col_dict['HED']) keys = keys + next_keys values = values + next_values data = {"column": keys, "HED": values} dataframe = DataFrame(data) return dataframe
def process(arguments): """Perform the requested action for the spreadsheet. Parameters ---------- arguments: dict A dictionary with the input arguments from the spreadsheet form. Returns ------- dict A dictionary of results from spreadsheet processing in standard form. """ hed_schema = arguments.get('schema', None) if not hed_schema or not isinstance(hed_schema, hedschema.hed_schema.HedSchema): raise HedFileError('BadHedSchema', "Please provide a valid HedSchema", "") spreadsheet = arguments.get(base_constants.SPREADSHEET, 'None') if not spreadsheet or not isinstance(spreadsheet, models.HedInput): raise HedFileError( 'InvalidSpreadsheet', "An spreadsheet was given but could not be processed", "") command = arguments.get(base_constants.COMMAND, None) check_for_warnings = arguments.get(base_constants.CHECK_FOR_WARNINGS, False) if command == base_constants.COMMAND_VALIDATE: results = spreadsheet_validate(hed_schema, spreadsheet, check_for_warnings=check_for_warnings) elif command == base_constants.COMMAND_TO_SHORT: results = spreadsheet_convert(hed_schema, spreadsheet, command, check_for_warnings=check_for_warnings) elif command == base_constants.COMMAND_TO_LONG: results = spreadsheet_convert(hed_schema, spreadsheet, command, check_for_warnings=check_for_warnings) else: raise HedFileError('UnknownSpreadsheetProcessingMethod', f"Command {command} is missing or invalid", "") return results
def process(arguments): """Perform the requested string processing action Parameters ---------- arguments: dict A dictionary with the input arguments from the string form or string service request. Returns ------- dict A dictionary with the results in standard format. """ hed_schema = arguments.get('schema', None) if not hed_schema or not isinstance(hed_schema, hedschema.hed_schema.HedSchema): raise HedFileError('BadHedSchema', "Please provide a valid HedSchema", "") string_list = arguments.get(base_constants.STRING_LIST, None) command = arguments.get(base_constants.COMMAND, None) check_for_warnings = arguments.get(base_constants.CHECK_FOR_WARNINGS, False) if not string_list: raise HedFileError( 'EmptyHedStringList', "Please provide a list of HED strings to be processed", "") if command == base_constants.COMMAND_VALIDATE: results = validate(hed_schema, string_list, check_for_warnings=check_for_warnings) elif command == base_constants.COMMAND_TO_SHORT: results = convert(hed_schema, string_list, command, check_for_warnings=check_for_warnings) elif command == base_constants.COMMAND_TO_LONG: results = convert(hed_schema, string_list, command, check_for_warnings=check_for_warnings) else: raise HedFileError('UnknownProcessingMethod', f'Command {command} is missing or invalid', '') return results
def make_template(self, additional_cols=[]): if additional_cols and set(self.columns).intersection(additional_cols): raise HedFileError( "AdditionalColumnsNotDisjoint", f"Additional columns {str(additional_cols)} must be disjoint from \ {str(self.columns)} must be disjoint", "") df = pd.DataFrame(columns=self.columns + additional_cols) df[self.columns] = self.col_map[self.columns].values if additional_cols: df[additional_cols] = 'n/a' return df
def process(arguments): """Perform the requested action for the events file and its sidecar Parameters ---------- arguments: dict A dictionary with the input arguments from the event form Returns ------- dict A dictionary with the results. """ hed_schema = arguments.get('schema', None) command = arguments.get(base_constants.COMMAND, None) if not hed_schema or not isinstance(hed_schema, hedschema.hed_schema.HedSchema): raise HedFileError( 'BadHedSchema', "Please provide a valid HedSchema for event processing", "") events = arguments.get(base_constants.EVENTS, None) sidecar = arguments.get(base_constants.JSON_SIDECAR, None) if not events or not isinstance(events, models.EventsInput): raise HedFileError( 'InvalidEventsFile', "An events file was given but could not be processed", "") if command == base_constants.COMMAND_VALIDATE: results = validate( hed_schema, events, sidecar, arguments.get(base_constants.CHECK_FOR_WARNINGS, False)) elif command == base_constants.COMMAND_ASSEMBLE: results = assemble(hed_schema, events, arguments.get(base_constants.EXPAND_DEFS, False)) elif command == base_constants.COMMAND_EXTRACT: results = extract(events, arguments.get(base_constants.COLUMNS_SELECTED, None)) else: raise HedFileError('UnknownEventsProcessingMethod', f'Command {command} is missing or invalid', '') return results
def process(arguments): """Perform the requested action for the sidecar. Parameters ---------- arguments: dict A dictionary with the input arguments from the sidecar form Returns ------- dict A dictionary of results. """ hed_schema = arguments.get(base_constants.SCHEMA, None) if not hed_schema or not isinstance(hed_schema, hedschema.hed_schema.HedSchema): raise HedFileError('BadHedSchema', "Please provide a valid HedSchema", "") json_sidecar = arguments.get(base_constants.JSON_SIDECAR, 'None') if not json_sidecar or not isinstance(json_sidecar, models.Sidecar): raise HedFileError('InvalidJSONFile', "Please give a valid JSON file to process", "") command = arguments.get(base_constants.COMMAND, None) check_for_warnings = arguments.get(base_constants.CHECK_FOR_WARNINGS, False) expand_defs = arguments.get(base_constants.EXPAND_DEFS, False) if command == base_constants.COMMAND_VALIDATE: results = sidecar_validate(hed_schema, json_sidecar, check_for_warnings=check_for_warnings) elif command == base_constants.COMMAND_TO_SHORT or command == base_constants.COMMAND_TO_LONG: results = sidecar_convert(hed_schema, json_sidecar, command=command, expand_defs=expand_defs) else: raise HedFileError('UnknownProcessingMethod', f'Command {command} is missing or invalid', '') return results
def _update_dict_skip(self, col_dict): if not col_dict.skip_cols: return cat_cols = self.categorical_info.keys() val_cols = self.value_info.keys() for col in col_dict.skip_cols: if col in cat_cols or col in val_cols: raise HedFileError( "SkipColInvalid", f"Skip column [{str(col)}] is already a categorical or value column", "") elif col not in self.skip_cols: self.skip_cols.append(col)
def load_schema(hed_path=None, library_prefix=None): """ Load a schema from the given file or URL path. Raises HedFileError if there are any fatal issues. Parameters ---------- hed_path : str or None A filepath or url to open a schema from library_prefix : str or None The name_prefix all tags in this schema will accept. Returns ------- schema: HedSchema The loaded schema """ if not hed_path: raise HedFileError(HedExceptions.FILE_NOT_FOUND, "Empty file path passed to HedSchema.load_file", filename=hed_path) is_url = hed_cache._check_if_url(hed_path) if is_url: file_as_string = file_util.url_to_string(hed_path) return from_string(file_as_string, file_type=os.path.splitext(hed_path.lower())[1]) elif hed_path.lower().endswith(".xml"): hed_schema = HedSchemaXMLParser.load_xml(hed_path) elif hed_path.lower().endswith(".mediawiki"): hed_schema = HedSchemaWikiParser.load_wiki(hed_path) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, "Unknown schema extension", filename=hed_path) if library_prefix: hed_schema.set_library_prefix(library_prefix=library_prefix) return hed_schema
def _read_header_line(self, line): if line.startswith(wiki_constants.HEADER_LINE_STRING): hed_attributes = self._get_header_attributes( line[len(wiki_constants.HEADER_LINE_STRING):]) schema_validation_util.validate_attributes(hed_attributes, filename=self.filename) self.header_attributes = hed_attributes self._schema.header_attributes = hed_attributes return msg = f"First line of file should be HED, instead found: {line}" raise HedFileError(HedExceptions.SCHEMA_HEADER_MISSING, msg, filename=self.filename, issues=[msg])
def add_sidecar_file(self, file): """ Loads column definitions from a given json file. You can load multiple files into one Sidecar, but it is discouraged. Parameters ---------- file: str or FileLike If a string, this is a filename. Otherwise, it will be parsed as a file-like. """ if isinstance(file, str): try: with open(file, "r") as fp: if not self.name: self.name = file self._add_json_file_defs(fp) except FileNotFoundError as e: raise HedFileError(HedExceptions.FILE_NOT_FOUND, e.strerror, file) except TypeError as e: raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(e), file) else: self._add_json_file_defs(file)
def flatten_col_dict(self, column_dict, marker_level=2): if not isinstance(column_dict, dict): raise HedFileError( "UnsupportedJSONValue", f"[{str(column_dict)}] format not in supported by flatten", "") keys = [] values = [] for key, value in column_dict.items(): if isinstance(value, str): keys.append(key) values.append(value) elif isinstance(value, dict): header = self.get_marked_key(key, marker_level) [next_keys, next_values ] = self.flatten_col_dict(value, marker_level=marker_level + 1) keys = keys + [header] + next_keys + [header] values = values + ['n/a'] + next_values + ['n/a'] else: raise HedFileError( "UnsupportedJSONValue", f"[{str(value)}] should be a string or dictionary", "") return keys, values