def get_naa_record(self, cleaned_data): current_user = self.user system_user = User.objects.get(username='******') url = cleaned_data['url'] try: if 'dhistory' in url: barcode = re.search(r'naa\/items\/(\d+)', url).group(1) else: barcode = re.search(r'Barcode=(\d+)', url).group(1) except AttributeError: self._errors['url'] = self.error_class(['Not a valid NAA url']) return cleaned_data rs = RSItemClient() rsseries = RSSeriesClient() item_details = rs.get_summary(barcode) print (item_details) dates = item_details['contents_dates'] citation = '{}, {}'.format( item_details['series'], item_details['control_symbol'] ) if item_details['digitised_status'] is True: item_url = 'http://dhistory.org/archives/naa/items/{}/'.format(barcode) else: item_url = 'http://www.naa.gov.au/cgi-bin/Search?O=I&Number={}'.format(barcode) series_details = rsseries.get_summary(item_details['series']) repository, created = Repository.objects.get_or_create( name='National Archives of Australia', defaults={'added_by': system_user} ) series_type = SourceType.objects.get(label='series') series, created = Source.objects.get_or_create( repository_item_id=item_details['series'], source_type=series_type, repository=repository, defaults={ 'added_by': system_user, 'title': series_details['title'], 'url': 'http://www.naa.gov.au/cgi-bin/Search?Number={}'.format(item_details['series']) } ) item_type = SourceType.objects.get(label='item') cleaned_data['collection'] = series cleaned_data['source_type'] = item_type cleaned_data['collection_item_id'] = item_details['control_symbol'] cleaned_data['repository_item_id'] = item_details['identifier'] cleaned_data['title'] = item_details['title'] cleaned_data['publication_date'] = dates['start_date']['date'] cleaned_data['publication_date_month'] = dates['start_date']['month'] cleaned_data['publication_date_day'] = dates['start_date']['day'] cleaned_data['publication_date_end'] = dates['end_date']['date'] cleaned_data['publication_date_end_month'] = dates['end_date']['month'] cleaned_data['publication_date_end_day'] = dates['end_date']['day'] cleaned_data['pages'] = item_details['digitised_pages'] cleaned_data['citation'] = citation cleaned_data['repository'] = repository cleaned_data['url'] = item_url cleaned_data['rdf_url'] = 'http://dhistory.org/archives/naa/items/{}/#file'.format(barcode) if 'title' in self._errors: del self._errors['title'] if 'source_type' in self._errors: del self._errors['source_type'] return cleaned_data
def get_naa_record(self, cleaned_data): system_user = User.objects.get(username='******') url = cleaned_data['url'] try: if 'dhistory' in url: barcode = re.search(r'naa/items/(\d+)', url).group(1) else: barcode = re.search(r'Barcode=(\d+)', url).group(1) except AttributeError: self._errors['url'] = self.error_class(['Not a valid NAA url']) return cleaned_data rs = RSItemClient() rsseries = RSSeriesClient() try: item_details = rs.get_summary(barcode) except URLError as e: self._errors['url'] = self.error_class(['Error accessing the url. Error: "{}"'.format(e.reason)]) return cleaned_data except Exception as e: self._errors['url'] = self.error_class(['Error accessing the url. Error: "{}"'.format(e)]) return cleaned_data dates = item_details['contents_dates'] citation = '{}, {}'.format( item_details['series'], item_details['control_symbol'] ) if item_details['digitised_status'] is True: item_url = 'http://dhistory.org/archives/naa/items/{}/'.format(barcode) else: item_url = 'http://www.naa.gov.au/cgi-bin/Search?O=I&Number={}'.format(barcode) series_details = rsseries.get_summary(item_details['series']) repository, created = Repository.objects.get_or_create( name='National Archives of Australia', defaults={'added_by': system_user} ) series_type = SourceType.objects.get(label='series') series, created = Source.objects.get_or_create( repository_item_id=item_details['series'], source_type=series_type, repository=repository, defaults={ 'added_by': system_user, 'title': series_details['title'], 'url': 'http://www.naa.gov.au/cgi-bin/Search?Number={}'.format(item_details['series']) } ) item_type = SourceType.objects.get(label='item') cleaned_data['collection'] = series cleaned_data['source_type'] = item_type cleaned_data['collection_item_id'] = item_details['control_symbol'] cleaned_data['repository_item_id'] = item_details['identifier'] cleaned_data['title'] = item_details['title'] cleaned_data['publication_date'] = dates['start_date']['date'] cleaned_data['publication_date_month'] = dates['start_date']['month'] cleaned_data['publication_date_day'] = dates['start_date']['day'] cleaned_data['publication_date_end'] = dates['end_date']['date'] cleaned_data['publication_date_end_month'] = dates['end_date']['month'] cleaned_data['publication_date_end_day'] = dates['end_date']['day'] cleaned_data['pages'] = item_details['digitised_pages'] cleaned_data['citation'] = citation cleaned_data['repository'] = repository cleaned_data['url'] = item_url cleaned_data['rdf_url'] = 'http://dhistory.org/archives/naa/items/{}/#file'.format(barcode) if 'title' in self._errors: del self._errors['title'] if 'source_type' in self._errors: del self._errors['source_type'] return cleaned_data
def get_naa_item(barcode): rs = RSItemClient() result = rs.get_summary(barcode, date_format='iso') return jsonify({'result': result})