def test_individual_xml_validates(self): dtd = etree.DTD(StringIO(""" <!ELEMENT responseData (response, totalResults, individuals)> <!ELEMENT response (#PCDATA)> <!ELEMENT totalResults (#PCDATA)> <!ELEMENT individuals (individual+)> <!ELEMENT individual (name, displayName, cnetid, chicagoid, contacts, resources)> <!ELEMENT name (#PCDATA)> <!ELEMENT displayName (#PCDATA)> <!ELEMENT cnetid (#PCDATA)> <!ELEMENT chicagoid (#PCDATA)> <!ELEMENT contacts (contact)> <!ELEMENT contact (title, division, department, subDepartment, email, phone, facultyExchange)> <!ELEMENT title (#PCDATA)> <!ELEMENT division (name, resources)> <!ELEMENT resources (directoryURL, xmlURL)> <!ELEMENT directoryURL (#PCDATA)> <!ELEMENT xmlURL (#PCDATA)> <!ELEMENT department (name, resources)> <!ELEMENT subDepartment (name, resources)> <!ELEMENT email (#PCDATA)> <!ELEMENT phone (#PCDATA)> <!ELEMENT facultyExchange (#PCDATA)> """)) cnetids = get_all_library_cnetids_from_directory() root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + cnetids[0] + '.xml')) self.assertEqual(dtd.validate(root), True)
def test_directory_xml_validates(self): dtd = etree.DTD(StringIO(""" <!ELEMENT responseData (response, totalResults, organizations)> <!ELEMENT response (#PCDATA)> <!ELEMENT totalResults (#PCDATA)> <!ELEMENT organizations (organization+)> <!ELEMENT organization (name, type, departments, members, resources)> <!ELEMENT name (#PCDATA)> <!ELEMENT type (#PCDATA)> <!ELEMENT departments (department+)> <!ELEMENT department (name, resources)> <!-- name (see above) --> <!ELEMENT resources (directoryURL, xmlURL)> <!ELEMENT directoryURL (#PCDATA)> <!ELEMENT xmlURL (#PCDATA)> <!ELEMENT members (member+)> <!ELEMENT member (name, displayName, cnetid, chicagoid, title, email, phone, facultyExchange, resources)> <!-- name (see above) --> <!ELEMENT displayName (#PCDATA)> <!ELEMENT cnetid (#PCDATA)> <!ELEMENT chicagoid (#PCDATA)> <!ELEMENT title (#PCDATA)> <!ELEMENT email (#PCDATA)> <!ELEMENT phone (#PCDATA)> <!ELEMENT facultyExchange (#PCDATA)> <!-- resources (see above) --> """)) root = etree.XML(get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/divisions/16.xml')) self.assertEqual(dtd.validate(root), True)
def get_data(tree, node=None): if node == None: node = tree x = ElementTree.fromstring(get_xml_from_directory_api(node['xml'])) # no matter what, name is in the same place- but sometimes it gets split into chunks. node['name'] = x.find(".//organizations/organization/name").text # division level if node['xml'].find("/divisions/") > -1: for d in x.findall(".//departments/department/resources/xmlURL"): child_node = { 'name': '', 'xml': d.text, 'children': [] } node['children'].append(child_node) get_data(tree, child_node) # department level elif node['xml'].find("/departments/") > -1: parent_node = find_node_for_xml(tree, node['xml']) if not parent_node: parent_node = node for d in x.findall(".//subDepartments/subDepartment/resources/xmlURL"): child_node = { 'name': '', 'xml': d.text, 'children': [] } parent_node['children'].append(child_node) get_data(tree, child_node)
def get_all_library_cnetids_from_directory(xml_string = None): if not xml_string: xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/divisions/16.xml') # get xml element tree. x = ElementTree.fromstring(xml_string) # get cnetids. cnetids = set() for cnetid in x.findall(".//member/cnetid"): cnetids.add(cnetid.text) return sorted(list(cnetids))
def get_all_library_cnetids_from_directory(xml_string=None): if not xml_string: xml_string = get_xml_from_directory_api( 'https://directory.uchicago.edu/api/v2/divisions/16.xml') # get xml element tree. x = ElementTree.fromstring(xml_string) # get cnetids. cnetids = set() for cnetid in x.findall(".//member/cnetid"): cnetids.add(cnetid.text) return sorted(list(cnetids))
def _get_campus_directory_unit_names(self): """ Report unit names in the campus directory. Returns: a set() of campus directory full names, as strings. """ unit_names = set() x = ElementTree.fromstring( get_xml_from_directory_api( 'https://directory.uchicago.edu/api/v2/divisions/16')) for d in x.findall(".//departments/department"): department_name = re.sub('\s+', ' ', d.find('name').text).strip() unit_names.add(department_name) department_xml = d.find('resources/xmlURL').text x2 = ElementTree.fromstring( get_xml_from_directory_api(department_xml)) for d2 in x2.findall(".//subDepartments/subDepartment"): subdepartment_name = re.sub('\s+', ' ', d2.find('name').text).strip() unit_names.add(department_name + ' - ' + subdepartment_name) return unit_names
def get_data(tree, node=None): if node == None: node = tree x = ElementTree.fromstring(get_xml_from_directory_api(node['xml'])) # no matter what, name is in the same place- but sometimes it gets split into chunks. node['name'] = x.find(".//organizations/organization/name").text # division level if node['xml'].find("/divisions/") > -1: for d in x.findall(".//departments/department/resources/xmlURL"): child_node = {'name': '', 'xml': d.text, 'children': []} node['children'].append(child_node) get_data(tree, child_node) # department level elif node['xml'].find("/departments/") > -1: parent_node = find_node_for_xml(tree, node['xml']) if not parent_node: parent_node = node for d in x.findall(".//subDepartments/subDepartment/resources/xmlURL"): child_node = {'name': '', 'xml': d.text, 'children': []} parent_node['children'].append(child_node) get_data(tree, child_node)
def _staff_out_of_sync(self): """ Get lists of staff pages that are out of sync. This function returns two lists--first, a list of the names of staff pages that are present in Wagtail, but missing in the campus directory. Second, a list of the names of staff pages that are present in the campus directory but missing in Wagtail. Returns: two lists of strings. """ def _format(cnetid, value, field): value = '' if value is None else str(value) return '{} -{}- ({})'.format(cnetid, self._clean(value), field) # Don't sync up some staff accounts. Former library directors may # appear in the campus directory, but they shouldn't appear in # staff listings on the library website. In other cases, staff # might have phone numbers connected with non-library jobs. If they # want those numbers to appear on the library website, they can add # them manually but we won't keep their information in sync. skip_cnetids = ['judi'] api_staff_info = set() for cnetid in get_all_library_cnetids_from_directory(): if cnetid in skip_cnetids: continue api_staff_info.add(cnetid) xml_string = get_xml_from_directory_api( 'https://directory.uchicago.edu/api/v2/individuals/{}.xml'. format(cnetid)) info = get_individual_info_from_directory(xml_string) api_staff_info.add( _format(cnetid, info['officialName'], 'officialName')) api_staff_info.add( _format(cnetid, info['displayName'], 'displayName')) api_staff_info.add( _format(cnetid, info['positionTitle'], 'positionTitle')) for email in info['email']: api_staff_info.add(_format(cnetid, email, 'email')) for phone_facex in info['phoneFacultyExchanges']: api_staff_info.add( _format(cnetid, re.sub(r"\n", " ", phone_facex), 'phoneFacultyExchange')) for department in info['departments']: api_staff_info.add(_format(cnetid, department, 'department')) wag_staff_info = set() for s in StaffPage.objects.live(): if s.cnetid in skip_cnetids: continue wag_staff_info.add(s.cnetid) wag_staff_info.add( _format(s.cnetid, s.official_name, 'officialName')) wag_staff_info.add(_format(s.cnetid, s.display_name, 'displayName')) wag_staff_info.add( _format(s.cnetid, s.position_title, 'positionTitle')) for e in s.staff_page_email.all(): wag_staff_info.add(_format(s.cnetid, e.email, 'email')) for p in s.staff_page_phone_faculty_exchange.all(): wag_staff_info.add( _format(s.cnetid, '{} {}'.format(p.faculty_exchange, p.phone_number), 'phoneFacultyExchange')) for d in s.staff_page_units.all(): wag_staff_info.add( _format(s.cnetid, d.library_unit.get_campus_directory_full_name(), 'department')) missing_in_campus_directory = sorted( list(wag_staff_info.difference(api_staff_info))) missing_in_wagtail = sorted( list(api_staff_info.difference(wag_staff_info))) return missing_in_campus_directory, missing_in_wagtail
def handle(self, *args, **options): """ The actual logic of the command. Subclasses must implement this method. It may return a Unicode string which will be printed to stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom -management-commands/#django.core.management.BaseCommand.handle """ try: cnetid = options['cnetid'] except: sys.exit(1) staff_index_path = StaffIndexPage.objects.first().path staff_index_depth = StaffIndexPage.objects.first().depth staff_index_url = StaffIndexPage.objects.first().url staff_index_content_type_pk = ContentType.objects.get(model='staffindexpage').pk staff_content_type_pk = ContentType.objects.get(model='staffpage').pk xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + cnetid + '.xml') info = get_individual_info_from_directory(xml_string) next_available_path = get_available_path_under(staff_index_path) # Update a StaffPage # works with just 'title' set as a default. # setting path destroyed the staff index!!! # 'path': next_available_path if StaffPage.objects.filter(cnetid=info['cnetid']): sp, created = StaffPage.objects.update_or_create( cnetid=info['cnetid'], defaults = { 'title': info['displayName'], 'display_name': info['displayName'], 'official_name': info['officialName'], 'slug': make_slug(info['displayName']), 'url_path': '/loop/staff/' + make_slug(info['displayName']) + '/', 'depth': staff_index_depth + 1, }) StaffIndexPage.objects.first().fix_tree(destructive=False) else: StaffPage.objects.create( title=info['displayName'], slug=make_slug(info['displayName']), path=next_available_path, depth=len(next_available_path) // 4, numchild=0, url_path='/staff/' + make_slug(info['displayName']) + '/', cnetid=info['cnetid'], display_name=info['displayName'], official_name=info['officialName'], ) # Add new VCards for vcard in info['title_department_subdepartments_dicts']: faculty_exchange = '' if hasattr(vcard, 'facultyexchange'): faculty_exchange = vcard['facultyexchange'] email = '' if hasattr(vcard, 'email'): email = vcard['email'] phone_label = '' phone_number = '' if hasattr(vcard, 'phone'): phone_label = 'work' phone_number = vcard['phone'] v, created = StaffPagePageVCards.objects.get_or_create( title=vcard['title'], unit=DirectoryUnit.objects.get(pk=vcard['department']), faculty_exchange=faculty_exchange, email=email, phone_label=phone_label, phone_number=phone_number, page=StaffPage.objects.get(cnetid=cnetid)) # Delete unnecesary VCards for vcard in StaffPage.objects.get(cnetid=info['cnetid']).vcards.all(): d = { 'title': vcard.title, 'department': vcard.unit.id, 'facultyexchange': vcard.faculty_exchange, 'email': vcard.email, 'phone': vcard.phone_number } if not d in info['title_department_subdepartments_dicts']: vcard.delete() staff_page = StaffPage.objects.get(cnetid=info['cnetid']) staff_page.page_maintainer = staff_page staff_page.save()
def handle(self, *args, **options): """ The actual logic of the command. Subclasses must implement this method. It may return a Unicode string which will be printed to stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom -management-commands/#django.core.management.BaseCommand.handle """ # api staff, wagtail staff api_staff = set(get_all_library_cnetids_from_directory()) wag_staff = set(get_all_library_cnetids_from_wagtail()) output = [] ''' # JEJ print( get_individual_info_from_directory('amybuckland') ) import sys sys.exit() ''' missing_in_api = wag_staff.difference(api_staff) if missing_in_api: output.append("THE FOLLOWING STAFF APPEAR IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:") output = output + list(missing_in_api) # if this happens, go into the user object and mark is_active False. missing_in_wagtail = api_staff.difference(wag_staff) if missing_in_wagtail: output.append("THE FOLLOWING STAFF APPEAR IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:") output = output + list(missing_in_wagtail) # if this happens, report that a new user needs to be created. for s in sorted(list(api_staff.intersection(wag_staff))): xml_string = xml_string = get_xml_from_directory_api('https://directory.uchicago.edu/api/v2/individuals/' + s + '.xml') api = get_individual_info_from_directory(xml_string) wag = get_individual_info_from_wagtail(s) if not api['officialName'] == wag['officialName']: output.append(s + "'s officialName is " + api['officialName'] + ", not " + wag['officialName']) if not api['displayName'] == wag['displayName']: output.append(s + "'s displayName is " + api['displayName'] + ", not " + wag['displayName']) # In the user management command, change the following things in the User object: # (note- in the User object, username = cnetid) # prompt a human for first_name, last_name. # In the StaffPage object, # check displayName and officialName. diffs = api['title_department_subdepartments'].difference(wag['title_department_subdepartments']) if diffs: output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:") for d in diffs: output.append(d) diffs = wag['title_department_subdepartments'].difference(api['title_department_subdepartments']) if diffs: output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:") for d in diffs: output.append(d) return "\n".join(output)
def handle(self, *args, **options): """ The actual logic of the command. Subclasses must implement this method. It may return a Unicode string which will be printed to stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom -management-commands/#django.core.management.BaseCommand.handle """ # api staff, wagtail staff api_staff = set(get_all_library_cnetids_from_directory()) wag_staff = set(get_all_library_cnetids_from_wagtail()) output = [] ''' # JEJ print( get_individual_info_from_directory('amybuckland') ) import sys sys.exit() ''' missing_in_api = wag_staff.difference(api_staff) if missing_in_api: output.append( "THE FOLLOWING STAFF APPEAR IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:" ) output = output + list(missing_in_api) # if this happens, go into the user object and mark is_active False. missing_in_wagtail = api_staff.difference(wag_staff) if missing_in_wagtail: output.append( "THE FOLLOWING STAFF APPEAR IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:" ) output = output + list(missing_in_wagtail) # if this happens, report that a new user needs to be created. for s in sorted(list(api_staff.intersection(wag_staff))): xml_string = xml_string = get_xml_from_directory_api( 'https://directory.uchicago.edu/api/v2/individuals/' + s + '.xml') api = get_individual_info_from_directory(xml_string) wag = get_individual_info_from_wagtail(s) if not api['officialName'] == wag['officialName']: output.append(s + "'s officialName is " + api['officialName'] + ", not " + wag['officialName']) if not api['displayName'] == wag['displayName']: output.append(s + "'s displayName is " + api['displayName'] + ", not " + wag['displayName']) # In the user management command, change the following things in the User object: # (note- in the User object, username = cnetid) # prompt a human for first_name, last_name. # In the StaffPage object, # check displayName and officialName. diffs = api['title_department_subdepartments'].difference( wag['title_department_subdepartments']) if diffs: output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN THE UNIVERSITY'S API, BUT NOT IN WAGTAIL:") for d in diffs: output.append(d) diffs = wag['title_department_subdepartments'].difference( api['title_department_subdepartments']) if diffs: output.append("THE FOLLOWING VCARDS APPEAR FOR " + s + " IN WAGTAIL, BUT NOT IN THE UNIVERSITY'S API:") for d in diffs: output.append(d) return "\n".join(output)
def handle(self, *args, **options): """ The actual logic of the command. Subclasses must implement this method. It may return a Unicode string which will be printed to stdout. More: https://docs.djangoproject.com/en/1.8/howto/custom -management-commands/#django.core.management.BaseCommand.handle """ try: cnetid = options['cnetid'] except: sys.exit(1) staff_index_path = StaffIndexPage.objects.first().path staff_index_depth = StaffIndexPage.objects.first().depth staff_index_url = StaffIndexPage.objects.first().url staff_index_content_type_pk = ContentType.objects.get( model='staffindexpage').pk staff_content_type_pk = ContentType.objects.get(model='staffpage').pk xml_string = get_xml_from_directory_api( 'https://directory.uchicago.edu/api/v2/individuals/' + cnetid + '.xml') info = get_individual_info_from_directory(xml_string) next_available_path = get_available_path_under(staff_index_path) # Update a StaffPage # works with just 'title' set as a default. # setting path destroyed the staff index!!! # 'path': next_available_path if StaffPage.objects.filter(cnetid=info['cnetid']): sp, created = StaffPage.objects.update_or_create( cnetid=info['cnetid'], defaults={ 'title': info['displayName'], 'display_name': info['displayName'], 'official_name': info['officialName'], 'slug': make_slug(info['displayName']), 'url_path': '/loop/staff/' + make_slug(info['displayName']) + '/', 'depth': staff_index_depth + 1, }) StaffIndexPage.objects.first().fix_tree(destructive=False) else: StaffPage.objects.create( title=info['displayName'], slug=make_slug(info['displayName']), path=next_available_path, depth=len(next_available_path) // 4, numchild=0, url_path='/staff/' + make_slug(info['displayName']) + '/', cnetid=info['cnetid'], display_name=info['displayName'], official_name=info['officialName'], ) # Add new contact information. # for contact_info in info['title_department_subdepartments_dicts']: # add new email addresses, phone faculty exchange pairs, # telephone numbers. # Delete unnecesary contact information. staff_page = StaffPage.objects.get(cnetid=info['cnetid']) staff_page.page_maintainer = staff_page staff_page.save()