def test_child_insert_index(): # Part 1 eml = Node(names.EML) eml.add_attribute("packageId", "edi.23.1") eml.add_attribute("system", "metapype") access = Node(names.ACCESS, parent=eml) eml.add_child(access) additional_metadata = Node(names.ADDITIONALMETADATA, parent=eml) eml.add_child(additional_metadata) r = rule.get_rule(names.EML) dataset = Node(names.DATASET, parent=eml) index = r.child_insert_index(eml, dataset) eml.add_child(dataset, index=index) validate.node(eml) # Part 2 r = rule.get_rule(names.ASSOCIATEDPARTY) associated_party = Node(names.ASSOCIATEDPARTY) organization_name = Node(names.ORGANIZATIONNAME) index = r.child_insert_index(associated_party, organization_name) associated_party.add_child(organization_name, index) address = Node(names.ADDRESS) associated_party.add_child(address) online_url = Node(names.ONLINEURL) associated_party.add_child(online_url) position_name = Node(names.POSITIONNAME) index = r.child_insert_index(associated_party, position_name) associated_party.add_child(position_name, index) role = Node(names.ROLE) index = r.child_insert_index(associated_party, role) associated_party.add_child(role, index) validate.node(associated_party)
def test_taxonid(): taxonId = Node(names.TAXONID, parent=None) taxonId.content = "42" # without the provider, we should get an error with pytest.raises(MetapypeRuleError): validate.node(taxonId) # with the provider, it should be ok taxonId.add_attribute("provider", "https://www.itis.gov") validate.node(taxonId)
def _process_element(e, clean, literals) -> Node: """ Process an lxml etree element into a Metapype node. If the clean attribute is true, then remove leading and trailing whitespace from the element content. Args: e: lxml etree element clean: boolean to clean leading and trailing whitespace from node content literals: tuple of XML elements whose content should not be altered Returns: Node """ tag = e.tag[e.tag.find("}") + 1:] # Remove any prepended namespace node = Node(tag) node.nsmap = e.nsmap node.prefix = e.prefix if clean: if e.text is not None: if tag in literals: node.content = e.text else: # if text consists entirely of one or more spaces and/or non-breaking spaces, keep it if re.search("^[ \xA0]+$", e.text): node.content = e.text else: node.content = None if e.text.strip() == '' else " ".join( e.text.split()) if e.tail is not None: # if tail consists entirely of one or more spaces and/or non-breaking spaces, keep it if re.search("^[ \xA0]+$", e.tail): node.tail = e.tail else: node.tail = None if e.tail.strip() == '' else " ".join( e.tail.split()) else: node.content = e.text node.tail = e.tail for name, value in e.attrib.items(): if "{" not in name: node.add_attribute(name, value) else: nsname = _format_extras(name, node.nsmap) node.add_extras(nsname, value) for _ in e: if _.tag is not etree.Comment: node.add_child(_process_element(_, clean, literals)) for child in node.children: child.parent = node if child.nsmap == node.nsmap: child.nsmap = node.nsmap # Map to single instance of nsmap return node
def test_validate_annotation(): annotation = Node(names.ANNOTATION) property_uri = Node(names.PROPERTYURI) property_uri.content = "http://purl.obolibrary.org/obo/IAO_0000136" property_uri.add_attribute("label", "some property label") annotation.add_child(property_uri) value_uri = Node(names.VALUEURI) value_uri.content = "http://purl.obolibrary.org/obo/IAO_0000136" value_uri.add_attribute("label", "some value label") annotation.add_child(value_uri) validate.tree(annotation)
def test_responsible_party_with_role(): personnel = Node(names.PERSONNEL) personnel.add_attribute("id", "personnel") personnel.add_namespace("eml", "https://eml.ecoinformatics.org/eml-2.2.0") individual_name = Node(names.INDIVIDUALNAME) personnel.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Chase") given_name.add_attribute("lang", "Spanish") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Gaucho") sur_name.add_attribute("lang", "Spanish") individual_name.add_child(sur_name) individual_name = Node(names.INDIVIDUALNAME) personnel.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Cactus") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Jack") individual_name.add_child(sur_name) phone = Node(names.PHONE, content="999-999-9999") personnel.add_child(phone) errs = [] # Without role, should get an error with pytest.raises(MetapypeRuleError): validate.tree(personnel) # Error should name 'role' as the cause validate.tree(personnel, errs) for err_code, msg, node, *args in errs: err_cause, min = args assert err_cause == 'role' # With role, it should be ok role = Node(names.ROLE, content="drummer") personnel.add_child(role) validate.tree(personnel, errs)
def from_xml_element(xml_elem, metapype_node, metapype_parent): """ Creates a metapype node corresponding to an xml element. Args: xml_elem: the xml element. metapype_node: the metapype_node corresponding to that xml element. metapype_node == None, except at the root of the tree. metapype_parent: the parent metapype_node for this node. """ if metapype_node is None: # Will be None except at the root metapype_node = Node(name=xml_elem.tag, parent=metapype_parent) # xml_element_lookup_by_node_id[metapype_node.id] = (metapype_node, xml_elem) for name, value in xml_elem.attrib.items(): if '}' not in name: metapype_node.add_attribute(name, value) if xml_elem.text: metapype_node.content = xml_elem.text if metapype_parent is not None: metapype_parent.add_child(metapype_node) for xml_child in xml_elem: from_xml_element(xml_child, None, metapype_node)
def from_json(json_node: dict, parent: Node = None) -> Node: ''' Recursively traverse Python JSON and build a metapype model instance. Args: json_node: JSON converted to Python structure parent: parent node reference to child Returns: Node: Child node of decomposed and parsed JSON ''' # Get first inner JSON object from dict and discard outer _ = json_node.popitem() name = _[0] body = _[1] node = Node(name, id=body[0]['id']) if parent is not None: node.parent = parent attributes = body[1]['attributes'] if attributes is not None: for attribute in attributes: node.add_attribute(attribute, attributes[attribute]) content = body[2]['content'] if content is not None: node.content = content children = body[3]['children'] for child in children: child_node = from_json(child, node) node.add_child(child_node) return node
def load_other_entity(dataset_node: Node = None, uploads_path: str = None, data_file: str = ''): full_path = f'{uploads_path}/{data_file}' other_entity_node = Node(names.OTHERENTITY, parent=dataset_node) add_child(dataset_node, other_entity_node) physical_node = Node(names.PHYSICAL, parent=other_entity_node) add_child(other_entity_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node) add_child(other_entity_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = Node(names.SIZE, parent=physical_node) add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT, parent=data_format_node) add_child(data_format_node, externally_defined_format_node) format_name_node = Node(names.FORMATNAME, parent=externally_defined_format_node) add_child(externally_defined_format_node, format_name_node) format_name_node.content = format_name_from_data_file(data_file) entity_type_node = new_child_node(names.ENTITYTYPE, parent=other_entity_node) entity_type_node.content = format_name_from_data_file(data_file) delete_data_files(uploads_path) return other_entity_node
def test_to_json(self): eml = Node(names.EML) eml.add_attribute('packageId', 'edi.23.1') eml.add_attribute('system', 'metapype') access = Node(names.ACCESS, parent=eml) access.add_attribute('authSystem', 'pasta') access.add_attribute('order', 'allowFirst') eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal = Node(names.PRINCIPAL, parent=allow) principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org' allow.add_child(principal) permission = Node(names.PERMISSION, parent=allow) permission.content = 'all' allow.add_child(permission) j = mp_io.to_json(eml) self.assertIsInstance(j, str)
def test_delete_node_no_children(): eml = Node(names.EML) eml.add_attribute("packageId", "edi.23.1") eml.add_attribute("system", "metapype") access = Node(names.ACCESS, parent=eml) access.add_attribute("authSystem", "pasta") access.add_attribute("order", "allowFirst") eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal = Node(names.PRINCIPAL, parent=allow) principal.content = "uid=gaucho,o=EDI,dc=edirepository,dc=org" allow.add_child(principal) permission = Node(names.PERMISSION, parent=allow) permission.content = "all" allow.add_child(permission) node = Node.get_node_instance(principal.id) assert principal is node Node.delete_node_instance(eml.id, children=False) assert principal.id in Node.store
def test_delete_node_no_children(self): eml = Node(names.EML) eml.add_attribute('packageId', 'edi.23.1') eml.add_attribute('system', 'metapype') access = Node(names.ACCESS, parent=eml) access.add_attribute('authSystem', 'pasta') access.add_attribute('order', 'allowFirst') eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal = Node(names.PRINCIPAL, parent=allow) principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org' allow.add_child(principal) permission = Node(names.PERMISSION, parent=allow) permission.content = 'all' allow.add_child(permission) node = Node.get_node_instance(principal.id) self.assertIs(principal, node) Node.delete_node_instance(eml.id, children=False) self.assertIn(principal.id, Node.store)
def test_responsible_party(): creator = Node(names.CREATOR) creator.add_attribute("id", "creator") creator.add_namespace("eml", "https://eml.ecoinformatics.org/eml-2.2.0") individual_name = Node(names.INDIVIDUALNAME) creator.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Chase") given_name.add_attribute("lang", "Spanish") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Gaucho") sur_name.add_attribute("lang", "Spanish") individual_name.add_child(sur_name) individual_name = Node(names.INDIVIDUALNAME) creator.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Cactus") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Jack") individual_name.add_child(sur_name) phone = Node(names.PHONE, content="999-999-9999") creator.add_child(phone) validate.tree(creator)
def test_copy(): creator = Node(names.CREATOR) creator.add_attribute("id", "creator") creator.add_namespace("eml", "https://eml.ecoinformatics.org/eml-2.2.0") individual_name = Node(names.INDIVIDUALNAME) creator.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Chase") given_name.add_attribute("lang", "Spanish") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Gaucho") sur_name.add_attribute("lang", "Spanish") individual_name.add_child(sur_name) individual_name = Node(names.INDIVIDUALNAME) creator.add_child(individual_name) given_name = Node(names.GIVENNAME, content="Cactus") individual_name.add_child(given_name) sur_name = Node(names.SURNAME, content="Jack") individual_name.add_child(sur_name) validate.tree(creator) creator_copy = creator.copy() validate.tree(creator_copy) assert is_deep_copy(creator, creator_copy)
def load_data_table(uploads_path: str = None, data_file: str = '', num_header_rows: int = 1, delimiter: str = ',', quote_char: str = '"'): if Config.LOG_DEBUG: app = Flask(__name__) with app.app_context(): current_app.logger.info(f'Entering load_data_table') full_path = f'{uploads_path}/{data_file}' # datatable_node = new_child_node(names.DATATABLE, parent=dataset_node) datatable_node = new_child_node(names.DATATABLE, parent=None) physical_node = new_child_node(names.PHYSICAL, parent=datatable_node) physical_node.add_attribute('system', 'EDI') entity_name_node = new_child_node(names.ENTITYNAME, parent=datatable_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = new_child_node(names.OBJECTNAME, parent=physical_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = new_child_node(names.SIZE, physical_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = num_header_rows num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' simple_delimited_node = Node(names.SIMPLEDELIMITED, parent=text_format_node) add_child(text_format_node, simple_delimited_node) field_delimiter_node = Node(names.FIELDDELIMITER, parent=simple_delimited_node) add_child(simple_delimited_node, field_delimiter_node) field_delimiter_node.content = delimiter quote_character_node = Node(names.QUOTECHARACTER, parent=simple_delimited_node) add_child(simple_delimited_node, quote_character_node) quote_character_node.content = quote_char with open(full_path) as file: next(file) line_terminator = repr(file.newlines).replace("'", "") record_delimiter_node = Node(names.RECORDDELIMITER, parent=text_format_node) add_child(text_format_node, record_delimiter_node) record_delimiter_node.content = line_terminator data_frame = pd.read_csv(full_path, comment='#', encoding='utf8', sep=delimiter, quotechar=quote_char) column_vartypes = [] column_names = [] column_categorical_codes = [] if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] record_count = row_count number_of_records.content = f'{record_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) add_child(datatable_node, attribute_list_node) columns = data_frame.columns for col in columns: dtype = data_frame[col][1:].infer_objects().dtype var_type, codes = infer_col_type(data_frame, col) column_vartypes.append(var_type) column_names.append(col) column_categorical_codes.append(codes) attribute_node = new_child_node(names.ATTRIBUTE, attribute_list_node) attribute_name_node = new_child_node(names.ATTRIBUTENAME, attribute_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) add_child(attribute_node, att_def_node) ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) add_child(attribute_node, ms_node) if var_type == VariableType.CATEGORICAL: # nominal / nonNumericDomain / enumeratedDomain / ...codes... nominal_node = new_child_node(names.NOMINAL, ms_node) non_numeric_domain_node = new_child_node( names.NONNUMERICDOMAIN, nominal_node) enumerated_domain_node = new_child_node( names.ENUMERATEDDOMAIN, non_numeric_domain_node) for code in codes: code_definition_node = new_child_node( names.CODEDEFINITION, enumerated_domain_node) code_node = new_child_node(names.CODE, code_definition_node) code_node.content = code definition_node = new_child_node(names.DEFINITION, code_definition_node) elif var_type == VariableType.NUMERICAL: # ratio / numericDomain ratio_node = new_child_node(names.RATIO, ms_node) numeric_domain_node = new_child_node(names.NUMERICDOMAIN, ratio_node) number_type = 'real' if str(dtype).startswith( 'int'): # FIXME - we can do better than this number_type = 'integer' number_type_node = new_child_node(names.NUMBERTYPE, numeric_domain_node) number_type_node.content = number_type numeric_domain_node = new_child_node(names.UNIT, ratio_node) elif var_type == VariableType.TEXT: # nominal / nonNumericDomain / textDomain nominal_node = new_child_node(names.NOMINAL, ms_node) non_numeric_domain_node = new_child_node( names.NONNUMERICDOMAIN, nominal_node) text_domain_node = new_child_node(names.TEXTDOMAIN, non_numeric_domain_node) definition_node = new_child_node(names.DEFINITION, text_domain_node) elif var_type == VariableType.DATETIME: # dateTime / formatString datetime_node = Node(names.DATETIME, parent=ms_node) add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) add_child(datetime_node, format_string_node) format_string_node.content = codes if Config.LOG_DEBUG: app = Flask(__name__) with app.app_context(): current_app.logger.info(f'Leaving load_data_table') return datatable_node, column_vartypes, column_names, column_categorical_codes
class TestEml_2_1_1(unittest.TestCase): def setUp(self): self.eml = Node(names.EML) self.eml.add_attribute('packageId', 'edi.23.1') self.eml.add_attribute('system', 'metapype') self.access = Node(names.ACCESS, parent=self.eml) self.access.add_attribute('authSystem', 'pasta') self.access.add_attribute('order', 'allowFirst') self.eml.add_child(self.access) self.allow = Node(names.ALLOW, parent=self.access) self.access.add_child(self.allow) self.principal_allow = Node(names.PRINCIPAL, parent=self.allow) self.principal_allow.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org' self.allow.add_child(self.principal_allow) self.permission_allow = Node(names.PERMISSION, parent=self.allow) self.permission_allow.content = 'all' self.allow.add_child(self.permission_allow) self.deny = Node(names.DENY, parent=self.access) self.access.add_child(self.deny) self.principal_deny = Node(names.PRINCIPAL, parent=self.deny) self.principal_deny.content = 'public' self.deny.add_child(self.principal_deny) self.permission_deny = Node(names.PERMISSION, parent=self.deny) self.permission_deny.content = 'write' self.deny.add_child(self.permission_deny) self.dataset = Node(names.DATASET, parent=self.eml) self.eml.add_child(self.dataset) self.title = Node(names.TITLE, parent=self.dataset) self.title.content = 'Green sea turtle counts: Tortuga Island 20017' self.dataset.add_child(self.title) self.creator = Node(names.CREATOR, parent=self.dataset) self.dataset.add_child(self.creator) self.individualName_creator = Node(names.INDIVIDUALNAME, parent=self.creator) self.creator.add_child(self.individualName_creator) self.salutation_creator = Node(names.SALUTATION, parent=self.individualName_creator) self.salutation_creator.content = 'Mr.' self.individualName_creator.add_child(self.salutation_creator) self.given_name_creator = Node(names.GIVENNAME, parent=self.individualName_creator) self.given_name_creator.content = 'Chase' self.individualName_creator.add_child(self.given_name_creator) self.surName_creator = Node(names.SURNAME, parent=self.individualName_creator) self.surName_creator.content = 'Gaucho' self.individualName_creator.add_child(self.surName_creator) self.value = Node(names.VALUE, parent=self.surName_creator) self.value.add_attribute('lang', 'en') self.value.content = 'Gaucho' self.surName_creator.add_child(self.value) self.address = Node(names.ADDRESS, parent=self.creator) self.creator.add_child(self.address) self.delivery_point_1 = Node(names.DELIVERYPOINT, parent=self.address) self.delivery_point_1.content = '100 Maple St' self.address.add_child(self.delivery_point_1) self.delivery_point_2 = Node(names.DELIVERYPOINT, parent=self.address) self.delivery_point_2.content = 'Apt. 10-B' self.address.add_child(self.delivery_point_2) self.city = Node(names.CITY, parent=self.address) self.city.content = "Gotham City" self.address.add_child(self.city) self.administrative_area = Node(names.ADMINISTRATIVEAREA, parent=self.address) self.administrative_area.content = "New York" self.address.add_child(self.administrative_area) self.postal_code = Node(names.POSTALCODE, parent=self.address) self.postal_code.content = '11111' self.address.add_child(self.postal_code) self.country = Node(names.COUNTRY, parent=self.address) self.country.content = 'USA' self.address.add_child(self.country) self.phone = Node(names.PHONE, parent=self.creator) self.phone.content = '555-555-5555' self.phone.add_attribute('phonetype', 'voice') self.creator.add_child(self.phone) self.electronic_mail_address = Node(names.ELECTRONICMAILADDRESS, parent=self.creator) self.electronic_mail_address.content = '*****@*****.**' self.creator.add_child(self.electronic_mail_address) self.online_url = Node(names.ONLINEURL, parent=self.creator) self.online_url.content = 'https://www.somecollege.edu/people/cgaucho' self.creator.add_child(self.online_url) self.user_id = Node(names.USERID, parent=self.creator) self.user_id.content = 'uid=jgaucho,o=EDI,dc=edirepository,dc=org' self.user_id.add_attribute( 'directory', 'ldap:///ldap.edirepository.org/dc=edirepository,dc=org') self.creator.add_child(self.user_id) self.pubdate = Node(names.PUBDATE, parent=self.dataset) self.pubdate.content = '2018' self.dataset.add_child(self.pubdate) self.abstract = Node(names.ABSTRACT, parent=self.dataset) self.abstract.add_attribute('lang', 'en') self.section = Node(names.SECTION, parent=self.abstract) self.abstract.add_child(self.section) self.section.content = "abstract section" self.para = Node(names.PARA, parent=self.abstract) self.abstract.add_child(self.para) self.para.content = "para section" self.dataset.add_child(self.abstract) self.keyword_set = Node(names.KEYWORDSET, parent=self.dataset) self.dataset.add_child(self.keyword_set) self.keyword_1 = Node(names.KEYWORD, parent=self.keyword_set) self.keyword_1.content = 'phytoplankton ecology' self.keyword_set.add_child(self.keyword_1) self.keyword_2 = Node(names.KEYWORD, parent=self.keyword_set) self.keyword_2.add_attribute('keywordType', 'place') self.keyword_2.content = 'lake' self.keyword_set.add_child(self.keyword_2) self.keyword_thesaurus = Node(names.KEYWORDTHESAURUS, parent=self.keyword_set) self.keyword_thesaurus.content = 'IRIS keyword thesaurus' self.keyword_set.add_child(self.keyword_thesaurus) self.coverage = Node(names.COVERAGE, parent=self.dataset) self.dataset.add_child(self.coverage) self.taxonomic_coverage = Node(names.TAXONOMICCOVERAGE, parent=self.coverage) self.coverage.add_child(self.taxonomic_coverage) self.general_taxonomic_coverage = Node(names.GENERALTAXONOMICCOVERAGE, parent=self.taxonomic_coverage) self.taxonomic_coverage.add_child(self.general_taxonomic_coverage) self.general_taxonomic_coverage.content = "All vascular plants were \ identified to family or species, mosses and lichens were \ identified as moss or lichen." self.taxonomic_classification_genus = Node( names.TAXONOMICCLASSIFICATION, parent=self.taxonomic_coverage) self.taxonomic_coverage.add_child(self.taxonomic_classification_genus) self.taxon_rank_name_genus = Node( names.TAXONRANKNAME, parent=self.taxonomic_classification_genus) self.taxonomic_classification_genus.add_child( self.taxon_rank_name_genus) self.taxon_rank_name_genus.content = "Genus" self.taxon_rank_value_genus = Node( names.TAXONRANKVALUE, parent=self.taxonomic_classification_genus) self.taxonomic_classification_genus.add_child( self.taxon_rank_value_genus) self.taxon_rank_value_genus.content = "Escherichia" self.taxonomic_classification_species = Node( names.TAXONOMICCLASSIFICATION, parent=self.taxonomic_classification_genus) self.taxonomic_classification_genus.add_child( self.taxonomic_classification_species) self.taxon_rank_name_species = Node( names.TAXONRANKNAME, parent=self.taxonomic_classification_species) self.taxonomic_classification_species.add_child( self.taxon_rank_name_species) self.taxon_rank_name_species.content = "Species" self.taxon_rank_value_species = Node( names.TAXONRANKVALUE, parent=self.taxonomic_classification_species) self.taxonomic_classification_species.add_child( self.taxon_rank_value_species) self.taxon_rank_value_species.content = "coli" self.contact = Node(names.CONTACT, parent=self.dataset) self.dataset.add_child(self.contact) self.individualName_contact = Node(names.INDIVIDUALNAME, parent=self.contact) self.contact.add_child(self.individualName_contact) self.surName_contact = Node(names.SURNAME, parent=self.individualName_contact) self.surName_contact.content = 'Gaucho' self.individualName_contact.add_child(self.surName_contact) self.additional_metadata = Node(names.ADDITIONALMETADATA, parent=self.eml) self.eml.add_child(self.additional_metadata) self.metadata = Node(names.METADATA, parent=self.additional_metadata) self.metadata.content = '<tag>TAG</tag>' self.additional_metadata.add_child(self.metadata) self.node = self.eml def tearDown(self): self.node = None def test_validate_node(self): self.assertIsNone(validate.node(self.node)) def test_validate_tree(self): self.assertIsNone(validate.tree(self.node)) def test_get_rule(self): r = rule.get_rule(names.ACCESS) self.assertEquals(r.name, rule.RULE_ACCESS) self.assertEquals(type(r.attributes), type(dict())) self.assertEquals(type(r.children), type(list())) self.assertEquals(type(r.content_rules), type(list())) self.assertEquals(type(r.content_enum), type(list())) def test_rule_validation(self): r = rule.get_rule(names.ACCESS) self.assertIsNone(r.validate_rule(self.access)) def test_empty_content(self): self.access.content = 'some content' r = rule.get_rule(names.ACCESS) self.assertRaises(MetapypeRuleError, r.validate_rule, self.access) def test_non_empty_content(self): self.principal_allow.content = None r = rule.get_rule(names.PRINCIPAL) self.assertRaises(MetapypeRuleError, r.validate_rule, self.principal_allow) def test_permissions_content(self): self.permission_allow.content = 'some permission' r = rule.get_rule(names.PERMISSION) self.assertRaises(MetapypeRuleError, r.validate_rule, self.permission_allow) def test_str_content(self): self.permission_allow.content = 1 r = rule.get_rule(names.PERMISSION) self.assertRaises(MetapypeRuleError, r.validate_rule, self.permission_allow) def test_is_allowed_child(self): r = rule.get_rule(names.EML) allowed = r.is_allowed_child(names.ACCESS) self.assertTrue(allowed) allowed = r.is_allowed_child(names.INDIVIDUALNAME) self.assertFalse(allowed) def test_child_insert_index(self): eml = Node(names.EML) access = Node(names.ACCESS, parent=eml) eml.add_child(access) additional_metadata = Node(names.ADDITIONALMETADATA, parent=eml) eml.add_child(additional_metadata) r = rule.get_rule(names.EML) dataset = Node(names.DATASET, parent=eml) index = r.child_insert_index(eml, dataset) eml.add_child(dataset, index=index) self.assertIsInstance(index, int) def test_is_yeardate(self): good_vals = ['1980', '2020', '1980-01-01', '2020-12-31'] bad_vals = ['nineteen-eighty', 2020, '01-01-1980', '2020-31-12'] for good_val in good_vals: self.assertTrue(rule.Rule.is_yeardate(good_val)) for bad_val in bad_vals: self.assertFalse(rule.Rule.is_yeardate(bad_val)) # Test whether a value is, or can be converted to, a float def test_is_float(self): good_vals = ['34.555', '-120.0000', '34', -120] bad_vals = ['nineteen-eighty', 'foo', '01-01-1980', '-0000-'] for good_val in good_vals: self.assertTrue(rule.Rule.is_float(good_val)) for bad_val in bad_vals: self.assertFalse(rule.Rule.is_float(bad_val))
def load_data_table(dataset_node: Node = None, uploads_path: str = None, data_file: str = ''): full_path = f'{uploads_path}/{data_file}' datatable_node = Node(names.DATATABLE, parent=dataset_node) add_child(dataset_node, datatable_node) physical_node = Node(names.PHYSICAL, parent=datatable_node) add_child(datatable_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=datatable_node) add_child(datatable_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = Node(names.SIZE, parent=physical_node) add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) data_format_node = Node(names.DATAFORMAT, parent=physical_node) add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = '1' num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' data_frame = pd.read_csv(full_path, comment='#') if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] number_of_records.content = f'{row_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) add_child(datatable_node, attribute_list_node) columns = data_frame.columns for col in columns: dtype = str(data_frame[col].dtype) print(f'{col}: {dtype}') attribute_node = Node(names.ATTRIBUTE, parent=attribute_list_node) add_child(attribute_list_node, attribute_node) attribute_name_node = Node(names.ATTRIBUTENAME, parent=attribute_node) add_child(attribute_node, attribute_name_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) add_child(attribute_node, att_def_node) att_def_node.content = f'Attribute definition for {col}' ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) add_child(attribute_node, ms_node) if dtype == 'bool': nominal_node = Node(names.NOMINAL, parent=ms_node) add_child(ms_node, nominal_node) non_numeric_domain_node = Node(names.NONNUMERICDOMAIN, parent=nominal_node) add_child(nominal_node, non_numeric_domain_node) elif dtype == 'object': if is_datetime_column(col): datetime_node = Node(names.DATETIME, parent=ms_node) add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) add_child(datetime_node, format_string_node) format_string_node.content = '' else: nominal_node = Node(names.NOMINAL, parent=ms_node) add_child(ms_node, nominal_node) non_numeric_domain_node = Node(names.NONNUMERICDOMAIN, parent=nominal_node) add_child(nominal_node, non_numeric_domain_node) elif dtype.startswith('float') or dtype.startswith('int'): number_type = 'real' if dtype.startswith('int'): number_type = 'integer' ratio_node = Node(names.RATIO, parent=ms_node) add_child(ms_node, ratio_node) numeric_domain_ratio_node = Node(names.NUMERICDOMAIN, parent=ratio_node) add_child(ratio_node, numeric_domain_ratio_node) number_type_ratio_node = Node(names.NUMBERTYPE, parent=numeric_domain_ratio_node) add_child(numeric_domain_ratio_node, number_type_ratio_node) number_type_ratio_node.content = number_type delete_data_files(uploads_path) return datatable_node
def load_data_table(uploads_path: str = None, data_file: str = '', num_header_rows: str = '1', delimiter: str = ',', quote_char: str = '"'): # if Config.LOG_DEBUG: log_info(f'Entering load_data_table: {data_file}') full_path = f'{uploads_path}/{data_file}' datatable_node = metapype_client.new_child_node(names.DATATABLE, parent=None) physical_node = metapype_client.new_child_node(names.PHYSICAL, parent=datatable_node) physical_node.add_attribute('system', 'EDI') entity_name_node = metapype_client.new_child_node(names.ENTITYNAME, parent=datatable_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = metapype_client.new_child_node(names.OBJECTNAME, parent=physical_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: size_node = metapype_client.new_child_node(names.SIZE, physical_node) size_node.add_attribute('unit', 'byte') size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: hash_node = Node(names.AUTHENTICATION, parent=physical_node) metapype_client.add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') hash_node.content = str(md5_hash) data_format_node = Node(names.DATAFORMAT, parent=physical_node) metapype_client.add_child(physical_node, data_format_node) text_format_node = Node(names.TEXTFORMAT, parent=data_format_node) metapype_client.add_child(data_format_node, text_format_node) num_header_lines_node = Node(names.NUMHEADERLINES, parent=text_format_node) metapype_client.add_child(text_format_node, num_header_lines_node) num_header_lines_node.content = num_header_rows num_footer_lines_node = Node(names.NUMFOOTERLINES, parent=text_format_node) metapype_client.add_child(text_format_node, num_footer_lines_node) num_footer_lines_node.content = '0' simple_delimited_node = Node(names.SIMPLEDELIMITED, parent=text_format_node) metapype_client.add_child(text_format_node, simple_delimited_node) field_delimiter_node = Node(names.FIELDDELIMITER, parent=simple_delimited_node) metapype_client.add_child(simple_delimited_node, field_delimiter_node) field_delimiter_node.content = delimiter quote_character_node = Node(names.QUOTECHARACTER, parent=simple_delimited_node) metapype_client.add_child(simple_delimited_node, quote_character_node) quote_character_node.content = quote_char if file_size == 0: raise DataTableError("The CSV file is empty.") check_column_name_uniqueness(full_path, delimiter) with open(full_path) as file: next(file) line_terminator = repr(file.newlines).replace("'", "") record_delimiter_node = Node(names.RECORDDELIMITER, parent=text_format_node) metapype_client.add_child(text_format_node, record_delimiter_node) record_delimiter_node.content = line_terminator # log_info('pd.read_csv') try: data_frame = pd.read_csv(full_path, encoding='utf8', sep=delimiter, quotechar=quote_char) except pd.errors.ParserError as e: raise DataTableError(e.args[0]) column_vartypes = [] column_names = [] column_categorical_codes = [] if data_frame is not None: number_of_records = Node(names.NUMBEROFRECORDS, parent=datatable_node) metapype_client.add_child(datatable_node, number_of_records) row_count = data_frame.shape[0] record_count = row_count number_of_records.content = f'{record_count}' attribute_list_node = Node(names.ATTRIBUTELIST, parent=datatable_node) metapype_client.add_child(datatable_node, attribute_list_node) # data_frame = data_frame.convert_dtypes() columns = data_frame.columns for col in columns: dtype = data_frame[col][1:].infer_objects().dtype # dtype = data_frame.dtypes[col] var_type, codes = infer_col_type(data_frame, col) log_info(f'col: {col} var_type: {var_type}') column_vartypes.append(var_type) column_names.append(col) column_categorical_codes.append(codes) attribute_node = metapype_client.new_child_node( names.ATTRIBUTE, attribute_list_node) attribute_name_node = metapype_client.new_child_node( names.ATTRIBUTENAME, attribute_node) attribute_name_node.content = col att_label_node = Node(names.ATTRIBUTELABEL, parent=attribute_node) metapype_client.add_child(attribute_node, att_label_node) att_label_node.content = col att_def_node = Node(names.ATTRIBUTEDEFINITION, parent=attribute_node) metapype_client.add_child(attribute_node, att_def_node) ms_node = Node(names.MEASUREMENTSCALE, parent=attribute_node) metapype_client.add_child(attribute_node, ms_node) missing_value_code = guess_missing_value_code( full_path, delimiter, quote_char, col) if missing_value_code: mv_node = Node(names.MISSINGVALUECODE, parent=attribute_node) metapype_client.add_child(attribute_node, mv_node) code_node = Node(names.CODE, parent=mv_node) metapype_client.add_child(mv_node, code_node) code_node.content = missing_value_code if var_type == metapype_client.VariableType.CATEGORICAL: codes = force_categorical_codes(attribute_node, dtype, codes) codes = force_missing_value_code(missing_value_code, dtype, codes) # nominal / nonNumericDomain / enumeratedDomain / ...codes... nominal_node = metapype_client.new_child_node( names.NOMINAL, ms_node) non_numeric_domain_node = metapype_client.new_child_node( names.NONNUMERICDOMAIN, nominal_node) enumerated_domain_node = metapype_client.new_child_node( names.ENUMERATEDDOMAIN, non_numeric_domain_node) for code in codes: code_definition_node = metapype_client.new_child_node( names.CODEDEFINITION, enumerated_domain_node) code_node = metapype_client.new_child_node( names.CODE, code_definition_node) code_node.content = str(code) definition_node = metapype_client.new_child_node( names.DEFINITION, code_definition_node) elif var_type == metapype_client.VariableType.NUMERICAL: # ratio / numericDomain ratio_node = metapype_client.new_child_node( names.RATIO, ms_node) numeric_domain_node = metapype_client.new_child_node( names.NUMERICDOMAIN, ratio_node) number_type = 'real' if str(dtype).startswith( 'int'): # FIXME - we can do better than this number_type = 'integer' number_type_node = metapype_client.new_child_node( names.NUMBERTYPE, numeric_domain_node) number_type_node.content = number_type numeric_domain_node = metapype_client.new_child_node( names.UNIT, ratio_node) elif var_type == metapype_client.VariableType.TEXT: # nominal / nonNumericDomain / textDomain nominal_node = metapype_client.new_child_node( names.NOMINAL, ms_node) non_numeric_domain_node = metapype_client.new_child_node( names.NONNUMERICDOMAIN, nominal_node) text_domain_node = metapype_client.new_child_node( names.TEXTDOMAIN, non_numeric_domain_node) definition_node = metapype_client.new_child_node( names.DEFINITION, text_domain_node) elif var_type == metapype_client.VariableType.DATETIME: # dateTime / formatString datetime_node = Node(names.DATETIME, parent=ms_node) metapype_client.add_child(ms_node, datetime_node) format_string_node = Node(names.FORMATSTRING, parent=datetime_node) metapype_client.add_child(datetime_node, format_string_node) format_string_node.content = codes # if Config.LOG_DEBUG: # log_info(f'Leaving load_data_table') return datatable_node, column_vartypes, column_names, column_categorical_codes, data_frame, missing_value_code
def load_other_entity(dataset_node: Node = None, uploads_path: str = None, data_file: str = '', node_id: str = None): full_path = f'{uploads_path}/{data_file}' doing_reupload = node_id is not None and node_id != '1' if doing_reupload: other_entity_node = Node.get_node_instance(node_id) object_name_node = other_entity_node.find_descendant(names.OBJECTNAME) else: other_entity_node = Node(names.OTHERENTITY, parent=dataset_node) metapype_client.add_child(dataset_node, other_entity_node) physical_node = Node(names.PHYSICAL, parent=other_entity_node) metapype_client.add_child(other_entity_node, physical_node) physical_node.add_attribute('system', 'EDI') entity_name_node = Node(names.ENTITYNAME, parent=other_entity_node) metapype_client.add_child(other_entity_node, entity_name_node) entity_name = entity_name_from_data_file(data_file) entity_name_node.content = entity_name object_name_node = Node(names.OBJECTNAME, parent=physical_node) metapype_client.add_child(physical_node, object_name_node) object_name_node.content = data_file file_size = get_file_size(full_path) if file_size is not None: if not doing_reupload: size_node = Node(names.SIZE, parent=physical_node) metapype_client.add_child(physical_node, size_node) size_node.add_attribute('unit', 'byte') else: size_node = other_entity_node.find_descendant(names.SIZE) size_node.content = str(file_size) md5_hash = get_md5_hash(full_path) if md5_hash is not None: if not doing_reupload: hash_node = Node(names.AUTHENTICATION, parent=physical_node) metapype_client.add_child(physical_node, hash_node) hash_node.add_attribute('method', 'MD5') else: hash_node = other_entity_node.find_descendant(names.AUTHENTICATION) hash_node.content = str(md5_hash) if not doing_reupload: data_format_node = Node(names.DATAFORMAT, parent=physical_node) metapype_client.add_child(physical_node, data_format_node) externally_defined_format_node = Node(names.EXTERNALLYDEFINEDFORMAT, parent=data_format_node) metapype_client.add_child(data_format_node, externally_defined_format_node) format_name_node = Node(names.FORMATNAME, parent=externally_defined_format_node) metapype_client.add_child(externally_defined_format_node, format_name_node) else: format_name_node = other_entity_node.find_descendant(names.FORMATNAME) format_name_node.content = format_name_from_data_file(data_file) if not doing_reupload: entity_type_node = metapype_client.new_child_node( names.ENTITYTYPE, parent=other_entity_node) else: entity_type_node = other_entity_node.find_descendant(names.ENTITYTYPE) entity_type_node.content = format_name_from_data_file(data_file) user_data.add_data_table_upload_filename(data_file) delete_data_files(uploads_path) return other_entity_node
def node(): eml = Node(names.EML) eml.add_attribute("packageId", "edi.23.1") eml.add_attribute("system", "metapype") access = Node(names.ACCESS, parent=eml) access.add_attribute("authSystem", "pasta") access.add_attribute("order", "allowFirst") eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal_allow = Node(names.PRINCIPAL, parent=allow) principal_allow.content = "uid=gaucho,o=EDI,dc=edirepository,dc=org" allow.add_child(principal_allow) permission_allow = Node(names.PERMISSION, parent=allow) permission_allow.content = "all" allow.add_child(permission_allow) deny = Node(names.DENY, parent=access) access.add_child(deny) principal_deny = Node(names.PRINCIPAL, parent=deny) principal_deny.content = "public" deny.add_child(principal_deny) permission_deny = Node(names.PERMISSION, parent=deny) permission_deny.content = "write" deny.add_child(permission_deny) dataset = Node(names.DATASET, parent=eml) eml.add_child(dataset) title = Node(names.TITLE, parent=dataset) title.content = "Green sea turtle counts: Tortuga Island 20017" dataset.add_child(title) creator = Node(names.CREATOR, parent=dataset) dataset.add_child(creator) individualName_creator = Node(names.INDIVIDUALNAME, parent=creator) creator.add_child(individualName_creator) salutation_creator = Node(names.SALUTATION, parent=individualName_creator) salutation_creator.content = "Mr." individualName_creator.add_child(salutation_creator) given_name_creator = Node(names.GIVENNAME, parent=individualName_creator) given_name_creator.content = "Chase" individualName_creator.add_child(given_name_creator) surName_creator = Node(names.SURNAME, parent=individualName_creator) surName_creator.content = "Gaucho" individualName_creator.add_child(surName_creator) value = Node(names.VALUE, parent=surName_creator) value.add_attribute("lang", "en") value.content = "Gaucho" surName_creator.add_child(value) address = Node(names.ADDRESS, parent=creator) creator.add_child(address) delivery_point_1 = Node(names.DELIVERYPOINT, parent=address) delivery_point_1.content = "100 Maple St" address.add_child(delivery_point_1) delivery_point_2 = Node(names.DELIVERYPOINT, parent=address) delivery_point_2.content = "Apt. 10-B" address.add_child(delivery_point_2) city = Node(names.CITY, parent=address) city.content = "Gotham City" address.add_child(city) administrative_area = Node(names.ADMINISTRATIVEAREA, parent=address) administrative_area.content = "New York" address.add_child(administrative_area) postal_code = Node(names.POSTALCODE, parent=address) postal_code.content = "11111" address.add_child(postal_code) country = Node(names.COUNTRY, parent=address) country.content = "USA" address.add_child(country) phone = Node(names.PHONE, parent=creator) phone.content = "555-555-5555" phone.add_attribute("phonetype", "voice") creator.add_child(phone) electronic_mail_address = Node(names.ELECTRONICMAILADDRESS, parent=creator) electronic_mail_address.content = "*****@*****.**" creator.add_child(electronic_mail_address) online_url = Node(names.ONLINEURL, parent=creator) online_url.content = "https://www.somecollege.edu/people/cgaucho" creator.add_child(online_url) user_id = Node(names.USERID, parent=creator) user_id.content = "uid=jgaucho,o=EDI,dc=edirepository,dc=org" user_id.add_attribute( "directory", "ldap:///ldap.edirepository.org/dc=edirepository," "dc=org" ) creator.add_child(user_id) pubdate = Node(names.PUBDATE, parent=dataset) pubdate.content = "2018" dataset.add_child(pubdate) abstract = Node(names.ABSTRACT, parent=dataset) abstract.add_attribute("lang", "en") section = Node(names.SECTION, parent=abstract) abstract.add_child(section) para = Node(names.PARA, parent=abstract) section.add_child(para) para.content = "para section" dataset.add_child(abstract) keyword_set = Node(names.KEYWORDSET, parent=dataset) dataset.add_child(keyword_set) keyword_1 = Node(names.KEYWORD, parent=keyword_set) keyword_1.content = "phytoplankton ecology" keyword_set.add_child(keyword_1) keyword_2 = Node(names.KEYWORD, parent=keyword_set) keyword_2.add_attribute("keywordType", "place") keyword_2.content = "lake" keyword_set.add_child(keyword_2) keyword_thesaurus = Node(names.KEYWORDTHESAURUS, parent=keyword_set) keyword_thesaurus.content = "IRIS keyword thesaurus" keyword_set.add_child(keyword_thesaurus) coverage = Node(names.COVERAGE, parent=dataset) dataset.add_child(coverage) taxonomic_coverage = Node(names.TAXONOMICCOVERAGE, parent=coverage) coverage.add_child(taxonomic_coverage) general_taxonomic_coverage = Node( names.GENERALTAXONOMICCOVERAGE, parent=taxonomic_coverage ) taxonomic_coverage.add_child(general_taxonomic_coverage) general_taxonomic_coverage.content = "All vascular plants were \ identified to family or species, mosses and lichens were \ identified as moss or lichen." taxonomic_classification_genus = Node( names.TAXONOMICCLASSIFICATION, parent=taxonomic_coverage ) taxonomic_coverage.add_child(taxonomic_classification_genus) taxon_rank_name_genus = Node( names.TAXONRANKNAME, parent=taxonomic_classification_genus ) taxonomic_classification_genus.add_child(taxon_rank_name_genus) taxon_rank_name_genus.content = "Genus" taxon_rank_value_genus = Node( names.TAXONRANKVALUE, parent=taxonomic_classification_genus ) taxonomic_classification_genus.add_child(taxon_rank_value_genus) taxon_rank_value_genus.content = "Escherichia" taxonomic_classification_species = Node( names.TAXONOMICCLASSIFICATION, parent=taxonomic_classification_genus ) taxonomic_classification_genus.add_child(taxonomic_classification_species) taxon_rank_name_species = Node( names.TAXONRANKNAME, parent=taxonomic_classification_species ) taxonomic_classification_species.add_child(taxon_rank_name_species) taxon_rank_name_species.content = "Species" taxon_rank_value_species = Node( names.TAXONRANKVALUE, parent=taxonomic_classification_species ) taxonomic_classification_species.add_child(taxon_rank_value_species) taxon_rank_value_species.content = "coli" contact = Node(names.CONTACT, parent=dataset) dataset.add_child(contact) individualName_contact = Node(names.INDIVIDUALNAME, parent=contact) contact.add_child(individualName_contact) surName_contact = Node(names.SURNAME, parent=individualName_contact) surName_contact.content = "Gaucho" individualName_contact.add_child(surName_contact) additional_metadata = Node(names.ADDITIONALMETADATA, parent=eml) eml.add_child(additional_metadata) metadata = Node(names.METADATA, parent=additional_metadata) fictitious = Node("fictitious") fictitious.content = "<tag>more fictitious content</tag>" metadata.add_child(fictitious) additional_metadata.add_child(metadata) return eml
class TestNode(unittest.TestCase): def setUp(self): self.node = Node(names.EML) def tearDown(self): self.node = None def test_add_attribute(self): self.node.add_attribute('packageId', 'test.1.1') self.node.add_attribute('system', 'metapype') attributes = self.node.attributes for attribute in attributes: self.assertTrue(attribute in ['packageId', 'system']) value = attributes[attribute] self.assertTrue(value in ['test.1.1', 'metapype']) def test_add_child(self): child_1 = Node(names.ACCESS) self.node.add_child(child_1) children = self.node.children self.assertIs(child_1, children[0]) child_2 = Node(names.DATASET) self.node.add_child(child_2, 0) self.assertIs(child_2, children[0]) def test_copy(self): node = Node(names.GIVENNAME) node.content = 'Chase' validate.node(node) node_copy = node.copy() validate.node(node_copy) def test_create_node(self): self.assertIsNotNone(self.node) def test_find_child(self): access = Node(names.ACCESS) self.node.add_child(access) child = self.node.find_child(names.ACCESS) self.assertIs(access, child) allow = Node(names.ALLOW) access.add_child(allow) grandchild = self.node.find_child(names.ALLOW) self.assertIs(grandchild, allow) permission = Node(names.PERMISSION) allow.add_child(permission) great_grandchild = self.node.find_child(names.PERMISSION) self.assertIs(great_grandchild, permission) child = self.node.find_child('nonesuch') self.assertIs(child, None) def test_remove_child(self): access = Node(names.ACCESS) self.node.add_child(access) child = self.node.children[0] self.assertIs(access, child) self.node.remove_child(child) self.assertNotIn(access, self.node.children) def test_replace_child(self): individual_name = Node(names.INDIVIDUALNAME) sur_name_1 = Node(names.SURNAME, parent=individual_name) sur_name_1.content = 'Gaucho' individual_name.add_child(sur_name_1) sur_name_2 = Node(names.SURNAME, parent=individual_name) sur_name_2.content = 'Carroll' self.assertIn(sur_name_1, individual_name.children) self.assertNotIn(sur_name_2, individual_name.children) individual_name.replace_child(old_child=sur_name_1, new_child=sur_name_2) self.assertIn(sur_name_2, individual_name.children) self.assertNotIn(sur_name_1, individual_name.children) # Test for old child removal from node store self.assertNotIn(sur_name_1.id, Node.store) # Test for child node type mismatch given_name = Node(names.GIVENNAME) given_name.content = 'Chase' try: individual_name.replace_child(old_child=sur_name_2, new_child=given_name) except ValueError as e: self.assertIsNotNone(e) def test_shift(self): individual_name_1 = Node(names.INDIVIDUALNAME) individual_name_2 = Node(names.INDIVIDUALNAME) individual_name_3 = Node(names.INDIVIDUALNAME) individual_name_4 = Node(names.INDIVIDUALNAME) organization_name = Node(names.ORGANIZATIONNAME) position_name = Node(names.POSITIONNAME) # Test shift right contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=position_name) shift_index = contact.shift(child=individual_name_2, direction=Shift.RIGHT) self.assertEqual(shift_index, 3) self.assertIs(contact.children[3], individual_name_2) # Test shift left contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=position_name) shift_index = contact.shift(child=individual_name_2, direction=Shift.LEFT) self.assertEqual(shift_index, 1) self.assertIs(contact.children[1], individual_name_2) # Test shift on edge right contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=position_name) index = contact.children.index(individual_name_3) shift_index = contact.shift(child=individual_name_3, direction=Shift.RIGHT) self.assertEqual(index, shift_index) # Test shift on edge left contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=position_name) index = contact.children.index(individual_name_1) shift_index = contact.shift(child=individual_name_1, direction=Shift.LEFT) self.assertEqual(index, shift_index) # Test hard shift on edge right contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) index = contact.children.index(individual_name_3) shift_index = contact.shift(child=individual_name_3, direction=Shift.RIGHT) self.assertEqual(index, shift_index) # Test hard shift on edge left contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) index = contact.children.index(individual_name_1) shift_index = contact.shift(child=individual_name_1, direction=Shift.LEFT) self.assertEqual(index, shift_index) # Test distant sibling shift right contact = Node(names.CONTACT) contact.add_child(child=organization_name) contact.add_child(child=individual_name_1) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=position_name) contact.add_child(child=individual_name_4) shift_index = contact.shift(child=individual_name_3, direction=Shift.RIGHT) index = contact.children.index(individual_name_3) self.assertEqual(index, shift_index) # Test distant sibling shift left contact = Node(names.CONTACT) contact.add_child(child=individual_name_1) contact.add_child(child=organization_name) contact.add_child(child=individual_name_2) contact.add_child(child=individual_name_3) contact.add_child(child=individual_name_4) contact.add_child(child=position_name) shift_index = contact.shift(child=individual_name_2, direction=Shift.LEFT) index = contact.children.index(individual_name_2) self.assertEqual(index, shift_index) def test_get_node(self): access = Node(names.ACCESS) node = Node.get_node_instance(access.id) self.assertIs(access, node) def test_delete_node(self): eml = Node(names.EML) eml.add_attribute('packageId', 'edi.23.1') eml.add_attribute('system', 'metapype') access = Node(names.ACCESS, parent=eml) access.add_attribute('authSystem', 'pasta') access.add_attribute('order', 'allowFirst') eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal = Node(names.PRINCIPAL, parent=allow) principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org' allow.add_child(principal) permission = Node(names.PERMISSION, parent=allow) permission.content = 'all' allow.add_child(permission) node = Node.get_node_instance(principal.id) self.assertIs(principal, node) Node.delete_node_instance(eml.id) self.assertNotIn(principal.id, Node.store) def test_delete_node_no_children(self): eml = Node(names.EML) eml.add_attribute('packageId', 'edi.23.1') eml.add_attribute('system', 'metapype') access = Node(names.ACCESS, parent=eml) access.add_attribute('authSystem', 'pasta') access.add_attribute('order', 'allowFirst') eml.add_child(access) allow = Node(names.ALLOW, parent=access) access.add_child(allow) principal = Node(names.PRINCIPAL, parent=allow) principal.content = 'uid=gaucho,o=EDI,dc=edirepository,dc=org' allow.add_child(principal) permission = Node(names.PERMISSION, parent=allow) permission.content = 'all' allow.add_child(permission) node = Node.get_node_instance(principal.id) self.assertIs(principal, node) Node.delete_node_instance(eml.id, children=False) self.assertIn(principal.id, Node.store)