def doimport(**kwargs): path = kwargs.get('path', PATH) save = kwargs.get('save', False) image_path = kwargs.get('image_path', IMAGE_PATH) ruthless = kwargs.get('ruthless', False) newsindex = NEWS_INDEX tree = ET.parse(path) root = tree.getroot() errors = [] images_errors = [] for item in root.findall('news_item'): itemerrors = {} # sort out what instance this is news_contentid = item.attrib['contentid'] title, itemerrors['title'] = text_from_elem(item, 'title', length=255) date = parse_date(item.find('goinglivedate').text.strip().replace('.','-')) or datetime.date.today() try: newsitem = NewsItem.objects.get(rca_content_id=news_contentid) except NewsItem.DoesNotExist: newsitem = NewsItem(rca_content_id=news_contentid) newsitem.title = title newsitem.date = date newsitem.intro = richtext_from_elem(item.find('intro')) newsitem.slug = make_slug(newsitem) # possibly delete any images that are embedded in the existing body if ruthless: soup = BeautifulSoup(newsitem.body, 'html.parser') to_delete_ids = [] for x in soup.find_all('embed'): try: to_delete_ids.append(int(x.attrs['id'])) except ValueError: pass if to_delete_ids: RcaImage.objects.filter(id__in=to_delete_ids).delete() # build the body strings = [] if item.find('texts'): for elem in item.find('texts').findall('text'): html = richtext_from_elem(elem.find('content')) strings.append(html) newsitem.body = '\n'.join(strings) # save newsitem if save: if newsitem.id: newsitem.save() else: newsindex.add_child(newsitem) tobesaved = False if item.find('images') is not None: # first delete images that haven't got a contentid if ruthless: for c in NewsItemCarouselItem.objects.filter(page=newsitem): c.image.delete() c.delete() for image in item.find('images').findall('image'): imageerrors = {} metadata = image.find('imagemetadata') im_contentid = image.attrib['contentid'] filename = urllib2.unquote(image.find('filename').text.strip()) try: theimage = RcaImage.objects.get(rca_content_id=im_contentid) except RcaImage.DoesNotExist: theimage = RcaImage(rca_content_id=im_contentid) theimage.title, imageerrors['title'] = text_from_elem(metadata, 'title', length=255, textify=True) theimage.creator, imageerrors['creator'] = text_from_elem(metadata, 'creator', length=255, textify=True) theimage.medium, imageerrors['medium'] = text_from_elem(metadata, 'media', length=255, textify=True) theimage.photographer, imageerrors['photog'] = text_from_elem(metadata, 'photographer', length=255, textify=True) theimage.permission, imageerrors['perms'] = text_from_elem(metadata, 'rights', length=255, textify=True) caption, imageerrors['caption'] = text_from_elem(metadata, 'caption', length=255, textify=True) theimage.alt = caption #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255) #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255) try: with File(open(image_path + filename.encode('utf-8'), 'r')) as f: if theimage.id: if save: theimage.delete() theimage.file = f if save: theimage.save() except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print repr(filename) except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise if save and theimage.is_landscape(): try: carousel = NewsItemCarouselItem.objects.get( page = newsitem, image = theimage, ) except NewsItemCarouselItem.DoesNotExist: carousel = NewsItemCarouselItem( page = newsitem, image = theimage, ) if save: carousel.save() elif save and theimage.id: imagestring = '<embed alt="%(alt)s" embedtype="image" format="right" id="%(id)s"/>' % { 'alt': theimage.alt, 'id': theimage.id, } newsitem.body = imagestring + newsitem.body tobesaved = True imageerrordict = dict((k, v) for k, v in imageerrors.iteritems() if v) if imageerrordict: images_errors.append({image: imageerrordict}) if tobesaved and save: newsitem.save() errordict = dict((k, v) for k, v in itemerrors.iteritems() if v) if errordict: errors.append({item: errordict}) return errors, images_errors
def import_image(element): errors = {} # Get image info image_contentid = element.attrib['contentid'] image_filename, errors['filename'] = text_from_elem(element, 'filename', length=255, textify=True) image_caption, errors['caption'] = text_from_elem(element, 'caption', length=255) image_metadata = element.find('imagemetadata') image_title, errors['title'] = text_from_elem(image_metadata, 'title', length=255, textify=True) image_creator, errors['creator'] = text_from_elem(image_metadata, 'creator', length=255, textify=True) image_media, errors['media'] = text_from_elem(image_metadata, 'media', length=255, textify=True) image_photographer, errors['photographer'] = text_from_elem(image_metadata, 'photographer', length=255, textify=True) image_rights, errors['rights'] = text_from_elem(image_metadata, 'rights', length=255, textify=True) # Create image try: image = RcaImage.objects.get(rca_content_id=image_contentid) except RcaImage.DoesNotExist: image = RcaImage() image.rca_content_id = image_contentid image.title = image_title image.alt = image_caption image.creator = image_creator image.medium = image_media image.photographer = image_photographer image.permission = image_rights # Load image file if not image.id: try: with File(open(IMAGE_PATH + image_filename.encode('utf-8'), 'r')) as f: image.file = f image.save() except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print repr(image_filename) return None, None except ValueError: print "Could not convert data to an integer." return None, None except: import sys print "Unexpected error:", sys.exc_info()[0] raise else: image.save() return image, errors
def doimport(**kwargs): path = kwargs.get('path', PATH) save = kwargs.get('save', False) image_path = kwargs.get('image_path', IMAGE_PATH) ruthless = kwargs.get('ruthless', False) newsindex = NEWS_INDEX tree = ET.parse(path) root = tree.getroot() errors = [] images_errors = [] for item in root.findall('news_item'): itemerrors = {} # sort out what instance this is news_contentid = item.attrib['contentid'] title, itemerrors['title'] = text_from_elem(item, 'title', length=255) date = parse_date( item.find('goinglivedate').text.strip().replace( '.', '-')) or datetime.date.today() try: newsitem = NewsItem.objects.get(rca_content_id=news_contentid) except NewsItem.DoesNotExist: newsitem = NewsItem(rca_content_id=news_contentid) newsitem.title = title newsitem.date = date newsitem.intro = richtext_from_elem(item.find('intro')) newsitem.slug = make_slug(newsitem) # possibly delete any images that are embedded in the existing body if ruthless: soup = BeautifulSoup(newsitem.body, 'html.parser') to_delete_ids = [] for x in soup.find_all('embed'): try: to_delete_ids.append(int(x.attrs['id'])) except ValueError: pass if to_delete_ids: RcaImage.objects.filter(id__in=to_delete_ids).delete() # build the body strings = [] if item.find('texts'): for elem in item.find('texts').findall('text'): html = richtext_from_elem(elem.find('content')) strings.append(html) newsitem.body = '\n'.join(strings) # save newsitem if save: if newsitem.id: newsitem.save() else: newsindex.add_child(newsitem) tobesaved = False if item.find('images') is not None: # first delete images that haven't got a contentid if ruthless: for c in NewsItemCarouselItem.objects.filter(page=newsitem): c.image.delete() c.delete() for image in item.find('images').findall('image'): imageerrors = {} metadata = image.find('imagemetadata') im_contentid = image.attrib['contentid'] filename = urllib2.unquote(image.find('filename').text.strip()) try: theimage = RcaImage.objects.get( rca_content_id=im_contentid) except RcaImage.DoesNotExist: theimage = RcaImage(rca_content_id=im_contentid) theimage.title, imageerrors['title'] = text_from_elem( metadata, 'title', length=255, textify=True) theimage.creator, imageerrors['creator'] = text_from_elem( metadata, 'creator', length=255, textify=True) theimage.medium, imageerrors['medium'] = text_from_elem( metadata, 'media', length=255, textify=True) theimage.photographer, imageerrors['photog'] = text_from_elem( metadata, 'photographer', length=255, textify=True) theimage.permission, imageerrors['perms'] = text_from_elem( metadata, 'rights', length=255, textify=True) caption, imageerrors['caption'] = text_from_elem(metadata, 'caption', length=255, textify=True) theimage.alt = caption #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255) #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255) try: with File(open(image_path + filename.encode('utf-8'), 'r')) as f: if theimage.id: if save: theimage.delete() theimage.file = f if save: theimage.save() except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print repr(filename) except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise if save and theimage.is_landscape(): try: carousel = NewsItemCarouselItem.objects.get( page=newsitem, image=theimage, ) except NewsItemCarouselItem.DoesNotExist: carousel = NewsItemCarouselItem( page=newsitem, image=theimage, ) if save: carousel.save() elif save and theimage.id: imagestring = '<embed alt="%(alt)s" embedtype="image" format="right" id="%(id)s"/>' % { 'alt': theimage.alt, 'id': theimage.id, } newsitem.body = imagestring + newsitem.body tobesaved = True imageerrordict = dict( (k, v) for k, v in imageerrors.iteritems() if v) if imageerrordict: images_errors.append({image: imageerrordict}) if tobesaved and save: newsitem.save() errordict = dict((k, v) for k, v in itemerrors.iteritems() if v) if errordict: errors.append({item: errordict}) return errors, images_errors
def doimport(**kwargs): save = kwargs.get('save', False) path = kwargs.get('path', PATH) image_path = kwargs.get('image_path', IMAGE_PATH) show_index = SHOW_INDEX tree = ET.parse(path) root = tree.getroot() errors = {} images_errors = [] dept_count = 0 total_students = 0 new_count = 0 student_save_count = 0 for d in root.findall('department'): dept_count += 1 page = d.find('page') depterrors = {} dept_title, depterrors['title'] = text_from_elem(page, 'title') specialism = '' print '\nNow importing: ' + repr(dept_title) if dept_title in PROGRAMME_SPECIALISMS.keys(): dept_title, specialism = PROGRAMME_SPECIALISMS[dept_title] print 'dept: ' + repr(dept_title) theprogramme = PROGRAMMES[dept_title] print 'prog: ' + repr(theprogramme) theschool = SCHOOLS[dept_title] print 'scho: ' + repr(theschool) h = html2text.HTML2Text() h.body_width = 0 try: blurb = page.find('texts').findall('text')[0].find('content') except AttributeError: blurb = page.find('synopsis') blurb = h.handle(blurb.text).strip() print "Blurb: " + repr(blurb) print "******* note that the above text will not be imported *******" student_count = 0 for s in d.findall('student'): student_count += 1 s = s.find('studentpage') sp_contentid = s.attrib['contentid'] try: sp = StudentPage.objects.get(rca_content_id=sp_contentid) except StudentPage.DoesNotExist: sp = StudentPage(rca_content_id=sp_contentid) sp_errs = {} sp.title, sp_errs['title'] = text_from_elem(s, 'title', length=255) # there is no intro text in any of the data at time of writing # intro, sp_errs['intro'] = text_from_elem(s, 'intro') sp.slug = make_slug(sp) statement = richtext_from_elem(s.find('statement')) statement_text, sponsors, collaborators = statement_extract(statement) sp.statement = statement_text sp.work_description = statement_text # handle the metadata fields metadata = s.find('metadata') # format the current degree sp.degree_year, sp_errs['deg_year'] = text_from_elem(metadata, 'year', length=255) degree_subject, sp_errs['deg_subj'] = text_from_elem(metadata, 'degrees', length=255) if degree_subject[-1] == '?': degree_subject = degree_subject[:-1] sp.degree_subject = DEGREE_SUBJECTS[degree_subject] degree_qualification, sp_errs['deg_qual'] = text_from_elem(metadata, 'degree', length=255) sp.degree_qualification = degree_qualification.lower() # metadata contains first and last names in separate fields sp.first_name, sp_errs['first_name'] = text_from_elem(metadata, 'firstname', length=255) sp.last_name, sp_errs['last_name'] = text_from_elem(metadata, 'surname', length=255) # we worked out the programme and school earlier from the dept_page sp.programme = theprogramme sp.school = theschool if not specialism and metadata.find('specialism') is not None: sp.specialism, sp_errs['specialism'] = text_from_elem(metadata, 'specialism') else: sp.specialism = specialism # handle profile image try: profile_image = RcaImage.objects.get(rca_content_id=sp_contentid + 'profile_image') except RcaImage.DoesNotExist: profile_image = RcaImage(rca_content_id=sp_contentid + 'profile_image') profile_filename = slugify(unicode(sp.title)).replace('-','_') profile_image_path = image_path + "show_2013_profiles/2400_" + sp.programme + "/" profile_image.title = sp.title + ' profile image' if not profile_image.id: try: with File(open(normalize("NFKD", profile_image_path + profile_filename + '.jpg'), 'rb')) as f: profile_image.file = f if save: profile_image.save() except IOError as e: try: with File(open(normalize("NFKD", profile_image_path + profile_filename + '.png'), 'rb')) as f: profile_image.file = f if save: profile_image.save() except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) + " " + profile_image_path + profile_filename sp_errs['image_not_found'] = profile_image_path + profile_filename except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise else: if save: profile_image.save() sp.profile_image = profile_image # save the studentpage for foreignkey purposes if save: student_save_count += 1 if sp.id: sp.save() else: new_count += 1 show_index.add_child(sp) elif not sp.id: new_count += 1 # handle the sponsors and collaborators from earlier for spon in sponsors: name, sp_errs['sponsors'] = check_length(spon, 255) if save: sponpage = StudentPageWorkSponsor(page=sp, name=name) sponpage.save() for col in collaborators: name, sp_errs['collaborators'] = check_length(col, 255) if save: colpage = StudentPageWorkCollaborator(page=sp, name=name) colpage.save() # handle the cv fields cv = s.find('cv') sp_errs['degree'] = cv_handle( cv, 'degrees', StudentPageDegree, sp, length=255, fieldname='degree', save=save) sp_errs['exhibition'] = cv_handle( cv, 'exhibition', StudentPageExhibition, sp, length=255, save=save) sp_errs['experience'] = cv_handle( cv, 'experience', StudentPageExperience, sp, length=255, save=save) sp_errs['awards'] = cv_handle( cv, 'awards', StudentPageAwards, sp, length=255, fieldname='award', save=save) if cv.find('sponsors') is not None: sp_errs['sponsors'] = cv_handle( cv, 'sponsors', StudentPageWorkSponsor, sp, length=255, fieldname='name', save=save) # currently the model doesn't have publications or conferences #sp_errs['publications'] = cv_handle( # cv, 'publications', StudentPagePublications, sp, length=255) #sp_errs['conferences'] = cv_handle( # cv, 'conferences', StudentPageConferences, sp, length=255) if s.find('emails') is not None: for emailaddress in s.find('emails').getchildren(): emailtext = emailaddress.text.strip() if save: StudentPageContactsEmail.objects.get_or_create(page=sp, email=emailtext) if s.find('phonenumbers') is not None: for num in s.find('phonenumbers').getchildren(): if num.text: phonenumber = num.text.strip() if save: StudentPageContactsPhone.objects.get_or_create(page=sp, phone=phonenumber) if s.find('urls') is not None: for url in s.find('urls').getchildren(): if url.text: urltext = url.text.strip() if save: StudentPageContactsWebsite.objects.get_or_create(page=sp, website=urltext) # handle images tag images = s.find('images') forloop_counter = 0 if images is not None: for image in images.findall('image'): forloop_counter += 1 imageerrors = {} metadata = image.find('imagemetadata') im_contentid = image.attrib['contentid'] if not im_contentid: im_contentid = sp_contentid + '_image_' + str(forloop_counter) try: theimage = RcaImage.objects.get(rca_content_id=im_contentid) except RcaImage.DoesNotExist: theimage = RcaImage(rca_content_id=im_contentid) theimage.title, imageerrors['title'] = text_from_elem(metadata, 'title', length=255, textify=True) theimage.creator, imageerrors['creator'] = text_from_elem(metadata, 'creator', length=255, textify=True) theimage.medium, imageerrors['medium'] = text_from_elem(metadata, 'media', length=255, textify=True) photographer, imageerrors['photographer'] = text_from_elem(metadata, 'photographer', length=255) if photographer.strip().startswith('©'): photographer = photographer.replace('©', '').strip() theimage.photographer = photographer theimage.permissions, imageerrors['permissions'] = text_from_elem(metadata, 'rights', length=255) caption, imageerrors['caption'] = text_from_elem(metadata, 'caption', length=255, textify=True) theimage.alt = caption #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255) #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255) filename = unicode(urllib2.unquote(image.find('filename').text.strip())) image_success = False full_image_path = image_path + '2400_' + sp.programme + "/" if not theimage.id: try: with File(open(normalize("NFKD", full_image_path + filename), 'rb')) as f: theimage.file = f if save: theimage.save() image_success = True except IOError as e: try: with File(open(normalize("NFKD", full_image_path + filename[:-4] + '.png'), 'rb')) as f: theimage.file = f if save: theimage.save() image_success = True except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) print full_image_path + filename imageerrors['image_not_found'] = full_image_path + filename except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise else: if save: theimage.save() image_success = True if save and image_success: StudentPageCarouselItem.objects.get_or_create(page=sp, image=theimage) newimageerrordict = dict((k, v) for k, v in imageerrors.iteritems() if v) if newimageerrordict: images_errors.append({image: newimageerrordict}) errordict = dict((k, v) for k, v in sp_errs.iteritems() if v) if errordict: depterrors[sp.title] = errordict errordict = dict((k, v) for k, v in depterrors.iteritems() if v) if errordict: errors[theprogramme] = errordict print "%(student_count)s students" % { 'student_count': student_count } total_students += student_count print "%(d)s departments imported, total %(s)s students, %(sv)s saved (%(n)s new)" % { 'd': dept_count, 's': total_students, 'sv': student_save_count, 'n': new_count, } profile_not_found_count = 0 image_not_found_count = 0 for dept, depterrors in errors.iteritems(): print '\n' + dept + '\n' + '='*len(dept) for name, sp_errs in depterrors.iteritems(): if isinstance(sp_errs, dict): print name print sp_errs['image_not_found'] profile_not_found_count += 1 print '\nImage errors\n============' for image_dict in images_errors: for image, error_dict in image_dict.iteritems(): if isinstance(error_dict, dict): print error_dict['image_not_found'] image_not_found_count += 1 print str(profile_not_found_count) + " profile images not found" print str(image_not_found_count) + " artwork images not found" print '\n\n' return images_errors, errors
def doimport(**kwargs): save = kwargs.get('save', False) path = kwargs.get('path', PATH) image_path = kwargs.get('image_path', IMAGE_PATH) show_index = SHOW_INDEX tree = ET.parse(path) root = tree.getroot() errors = {} images_errors = [] dept_count = 0 total_students = 0 new_count = 0 student_save_count = 0 for d in root.findall('department'): dept_count += 1 page = d.find('page') depterrors = {} dept_title, depterrors['title'] = text_from_elem(page, 'title') specialism = '' print '\nNow importing: ' + repr(dept_title) if dept_title in PROGRAMME_SPECIALISMS.keys(): dept_title, specialism = PROGRAMME_SPECIALISMS[dept_title] print 'dept: ' + repr(dept_title) theprogramme = PROGRAMMES[dept_title] print 'prog: ' + repr(theprogramme) theschool = SCHOOLS[dept_title] print 'scho: ' + repr(theschool) h = html2text.HTML2Text() h.body_width = 0 try: blurb = page.find('texts').findall('text')[0].find('content') except AttributeError: blurb = page.find('synopsis') blurb = h.handle(blurb.text).strip() print "Blurb: " + repr(blurb) print "******* note that the above text will not be imported *******" student_count = 0 for s in d.findall('student'): student_count += 1 s = s.find('studentpage') sp_contentid = s.attrib['contentid'] try: sp = StudentPage.objects.get(rca_content_id=sp_contentid) except StudentPage.DoesNotExist: sp = StudentPage(rca_content_id=sp_contentid) sp_errs = {} sp.title, sp_errs['title'] = text_from_elem(s, 'title', length=255) # there is no intro text in any of the data at time of writing # intro, sp_errs['intro'] = text_from_elem(s, 'intro') sp.slug = make_slug(sp) statement = richtext_from_elem(s.find('statement')) statement_text, sponsors, collaborators = statement_extract( statement) sp.statement = statement_text sp.work_description = statement_text # handle the metadata fields metadata = s.find('metadata') # format the current degree sp.degree_year, sp_errs['deg_year'] = text_from_elem(metadata, 'year', length=255) degree_subject, sp_errs['deg_subj'] = text_from_elem(metadata, 'degrees', length=255) if degree_subject[-1] == '?': degree_subject = degree_subject[:-1] sp.degree_subject = DEGREE_SUBJECTS[degree_subject] degree_qualification, sp_errs['deg_qual'] = text_from_elem( metadata, 'degree', length=255) sp.degree_qualification = degree_qualification.lower() # metadata contains first and last names in separate fields sp.first_name, sp_errs['first_name'] = text_from_elem(metadata, 'firstname', length=255) sp.last_name, sp_errs['last_name'] = text_from_elem(metadata, 'surname', length=255) # we worked out the programme and school earlier from the dept_page sp.programme = theprogramme sp.school = theschool if not specialism and metadata.find('specialism') is not None: sp.specialism, sp_errs['specialism'] = text_from_elem( metadata, 'specialism') else: sp.specialism = specialism # handle profile image try: profile_image = RcaImage.objects.get( rca_content_id=sp_contentid + 'profile_image') except RcaImage.DoesNotExist: profile_image = RcaImage(rca_content_id=sp_contentid + 'profile_image') profile_filename = slugify(unicode(sp.title)).replace('-', '_') profile_image_path = image_path + "show_2013_profiles/2400_" + sp.programme + "/" profile_image.title = sp.title + ' profile image' if not profile_image.id: try: with File( open( normalize( "NFKD", profile_image_path + profile_filename + '.jpg'), 'rb')) as f: profile_image.file = f if save: profile_image.save() except IOError as e: try: with File( open( normalize( "NFKD", profile_image_path + profile_filename + '.png'), 'rb')) as f: profile_image.file = f if save: profile_image.save() except IOError as e: print "I/O error({0}): {1}".format( e.errno, e.strerror ) + " " + profile_image_path + profile_filename sp_errs[ 'image_not_found'] = profile_image_path + profile_filename except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise else: if save: profile_image.save() sp.profile_image = profile_image # save the studentpage for foreignkey purposes if save: student_save_count += 1 if sp.id: sp.save() else: new_count += 1 show_index.add_child(sp) elif not sp.id: new_count += 1 # handle the sponsors and collaborators from earlier for spon in sponsors: name, sp_errs['sponsors'] = check_length(spon, 255) if save: sponpage = StudentPageWorkSponsor(page=sp, name=name) sponpage.save() for col in collaborators: name, sp_errs['collaborators'] = check_length(col, 255) if save: colpage = StudentPageWorkCollaborator(page=sp, name=name) colpage.save() # handle the cv fields cv = s.find('cv') sp_errs['degree'] = cv_handle(cv, 'degrees', StudentPageDegree, sp, length=255, fieldname='degree', save=save) sp_errs['exhibition'] = cv_handle(cv, 'exhibition', StudentPageExhibition, sp, length=255, save=save) sp_errs['experience'] = cv_handle(cv, 'experience', StudentPageExperience, sp, length=255, save=save) sp_errs['awards'] = cv_handle(cv, 'awards', StudentPageAwards, sp, length=255, fieldname='award', save=save) if cv.find('sponsors') is not None: sp_errs['sponsors'] = cv_handle(cv, 'sponsors', StudentPageWorkSponsor, sp, length=255, fieldname='name', save=save) # currently the model doesn't have publications or conferences #sp_errs['publications'] = cv_handle( # cv, 'publications', StudentPagePublications, sp, length=255) #sp_errs['conferences'] = cv_handle( # cv, 'conferences', StudentPageConferences, sp, length=255) if s.find('emails') is not None: for emailaddress in s.find('emails').getchildren(): emailtext = emailaddress.text.strip() if save: StudentPageContactsEmail.objects.get_or_create( page=sp, email=emailtext) if s.find('phonenumbers') is not None: for num in s.find('phonenumbers').getchildren(): if num.text: phonenumber = num.text.strip() if save: StudentPageContactsPhone.objects.get_or_create( page=sp, phone=phonenumber) if s.find('urls') is not None: for url in s.find('urls').getchildren(): if url.text: urltext = url.text.strip() if save: StudentPageContactsWebsite.objects.get_or_create( page=sp, website=urltext) # handle images tag images = s.find('images') forloop_counter = 0 if images is not None: for image in images.findall('image'): forloop_counter += 1 imageerrors = {} metadata = image.find('imagemetadata') im_contentid = image.attrib['contentid'] if not im_contentid: im_contentid = sp_contentid + '_image_' + str( forloop_counter) try: theimage = RcaImage.objects.get( rca_content_id=im_contentid) except RcaImage.DoesNotExist: theimage = RcaImage(rca_content_id=im_contentid) theimage.title, imageerrors['title'] = text_from_elem( metadata, 'title', length=255, textify=True) theimage.creator, imageerrors['creator'] = text_from_elem( metadata, 'creator', length=255, textify=True) theimage.medium, imageerrors['medium'] = text_from_elem( metadata, 'media', length=255, textify=True) photographer, imageerrors['photographer'] = text_from_elem( metadata, 'photographer', length=255) if photographer.strip().startswith('©'): photographer = photographer.replace('©', '').strip() theimage.photographer = photographer theimage.permissions, imageerrors[ 'permissions'] = text_from_elem(metadata, 'rights', length=255) caption, imageerrors['caption'] = text_from_elem( metadata, 'caption', length=255, textify=True) theimage.alt = caption #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255) #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255) filename = unicode( urllib2.unquote(image.find('filename').text.strip())) image_success = False full_image_path = image_path + '2400_' + sp.programme + "/" if not theimage.id: try: with File( open( normalize("NFKD", full_image_path + filename), 'rb')) as f: theimage.file = f if save: theimage.save() image_success = True except IOError as e: try: with File( open( normalize( "NFKD", full_image_path + filename[:-4] + '.png'), 'rb')) as f: theimage.file = f if save: theimage.save() image_success = True except IOError as e: print "I/O error({0}): {1}".format( e.errno, e.strerror) print full_image_path + filename imageerrors[ 'image_not_found'] = full_image_path + filename except ValueError: print "Could not convert data to an integer." except: import sys print "Unexpected error:", sys.exc_info()[0] raise else: if save: theimage.save() image_success = True if save and image_success: StudentPageCarouselItem.objects.get_or_create( page=sp, image=theimage) newimageerrordict = dict( (k, v) for k, v in imageerrors.iteritems() if v) if newimageerrordict: images_errors.append({image: newimageerrordict}) errordict = dict((k, v) for k, v in sp_errs.iteritems() if v) if errordict: depterrors[sp.title] = errordict errordict = dict((k, v) for k, v in depterrors.iteritems() if v) if errordict: errors[theprogramme] = errordict print "%(student_count)s students" % {'student_count': student_count} total_students += student_count print "%(d)s departments imported, total %(s)s students, %(sv)s saved (%(n)s new)" % { 'd': dept_count, 's': total_students, 'sv': student_save_count, 'n': new_count, } profile_not_found_count = 0 image_not_found_count = 0 for dept, depterrors in errors.iteritems(): print '\n' + dept + '\n' + '=' * len(dept) for name, sp_errs in depterrors.iteritems(): if isinstance(sp_errs, dict): print name print sp_errs['image_not_found'] profile_not_found_count += 1 print '\nImage errors\n============' for image_dict in images_errors: for image, error_dict in image_dict.iteritems(): if isinstance(error_dict, dict): print error_dict['image_not_found'] image_not_found_count += 1 print str(profile_not_found_count) + " profile images not found" print str(image_not_found_count) + " artwork images not found" print '\n\n' return images_errors, errors