def create_record(self, id, media, storage=None): record = Record.objects.create(name='id') CollectionItem.objects.create( collection=self.collection, record=record) FieldValue.objects.create( record=record, field=standardfield('identifier'), value=id) FieldValue.objects.create( record=record, field=standardfield('title'), value=id) self.records.append(record) if media: record.media_set.create( storage=storage or self.storage, url='%s.txt' % media) return record
def _annotation_filter(self): return dict( owner=self.presentation.owner, context_id=self.id, context_type=ContentType.objects.get_for_model(PresentationItem), field=standardfield('description'), record=self.record)
def _preload_work_to_images(self, record_ids): q = Q(field__in=standardfield('identifier', equiv=True), ) | Q( field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', ) identifiers = FieldValue.objects.filter( q, record__in=record_ids, ).values_list('value', 'record__id') images = FieldValue.objects.filter( field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', value__in=(i[0] for i in identifiers), index_value__in=(i[0][:32] for i in identifiers), ) images = images.values_list('record__id', 'value') identifier_map = dict() for v, r in identifiers: identifier_map.setdefault(v, []).append(r) work_to_images = dict() for record_id, image in images: image_ids = identifier_map.get(image, []) for i in image_ids: if record_id != i: work_to_images.setdefault(i, []).append(record_id) return work_to_images
def find_records_without_media(request): available_storage = get_list_or_404(filter_by_access(request.user, Storage, manage=True).order_by('title').values_list('id', 'title')) available_collections = get_list_or_404(filter_by_access(request.user, Collection, manage=True)) class SelectionForm(forms.Form): collection = forms.ChoiceField(choices=((c.id, c.title) for c in sorted(available_collections, key=lambda c: c.title))) storage = forms.ChoiceField(choices=available_storage) identifiers = records = [] analyzed = False if request.method == 'POST': form = SelectionForm(request.POST) if form.is_valid(): collection = get_object_or_404(filter_by_access(request.user, Collection.objects.filter(id=form.cleaned_data['collection']), manage=True)) storage = get_object_or_404(filter_by_access(request.user, Storage.objects.filter(id=form.cleaned_data['storage']), manage=True)) records = analyze_records(collection, storage) analyzed = True identifiers = FieldValue.objects.filter(field__in=standardfield('identifier', equiv=True), record__in=records).order_by('value').values_list('value', flat=True) else: form = SelectionForm(request.GET) return render_to_response('storage_find_records_without_media.html', {'form': form, 'identifiers': identifiers, 'records': records, 'analyzed': analyzed, }, context_instance=RequestContext(request))
def create_record(self, id): record = Record.objects.create(name='id') CollectionItem.objects.create( collection=self.collection, record=record) FieldValue.objects.create( record=record, field=standardfield('identifier'), value=id) self.records.append(record) return record
def _annotation_filter(self): return dict( owner=self.presentation.owner, context_id=self.id, context_type=ContentType.objects.get_for_model(PresentationItem), field=standardfield('description'), record=self.record )
def create_record(self, remote_id): collection = self.get_collection() url = urlparse(self.shared.url) server = '://'.join([url.scheme, url.netloc]) url = server + reverse('api-record', kwargs={ 'id': remote_id, 'name': '_' }) response = _fetch_url(url, self.shared.username, self.shared.password) data = json.loads(response.read()) title = data['record']['title'] image_url = data['record']['image'] if not '://' in image_url: image_url = server + image_url record = Record.objects.create(name=title, source=url, manager=self.get_source_id()) unmapped_field, created = Field.objects.get_or_create( name='shared-data', defaults={ 'label': 'Metadata', }) for index, metadata in enumerate(data['record']['metadata']): try: field = (standardfield(metadata['dc']) if metadata.get('dc') else unmapped_field) except Field.DoesNotExist: field = unmapped_field FieldValue.objects.create( record=record, field=field, order=metadata.get('order', index), value=metadata['value'], label=metadata['label'], ) CollectionItem.objects.create(collection=collection, record=record) # create job to download actual media file job = JobInfo.objects.create(func='shared_download_media', arg=simplejson.dumps( dict(shared_id=self.shared.id, record=record.id, url=image_url))) job.run() return record
def create_record(self, remote_id): collection = self.get_collection() url = urlparse(self.shared.url) server = '://'.join([url.scheme, url.netloc]) url = server + reverse('api-record', kwargs={ 'id': remote_id, 'name': '_'}) response = _fetch_url(url, self.shared.username, self.shared.password) data = json.loads(response.read()) title = data['record']['title'] image_url = data['record']['image'] if not '://' in image_url: image_url = server + image_url record = Record.objects.create(name=title, source=url, manager=self.get_source_id()) unmapped_field, created = Field.objects.get_or_create( name='shared-data', defaults={ 'label': 'Metadata', } ) for index, metadata in enumerate(data['record']['metadata']): try: field = (standardfield(metadata['dc']) if metadata.get('dc') else unmapped_field) except Field.DoesNotExist: field = unmapped_field FieldValue.objects.create( record=record, field=field, order=metadata.get('order', index), value=metadata['value'], label=metadata['label'], ) CollectionItem.objects.create(collection=collection, record=record) # create job to download actual media file job = JobInfo.objects.create( func='shared_download_media', arg=json.dumps(dict(shared_id=self.shared.id, record=record.id, url=image_url))) job.run() return record
def add_field(f, v, o): if type(v) == list: for w in v: add_field(f, w, o) elif v: # TODO: neaten? try: FieldValue.objects.create(record=record, field=standardfield(f), order=o, value=v) except: pass
def match_up_media(storage, collection): broken, files = analyze_media(storage) # find records that have an ID matching one of the remaining files idfields = standardfield('identifier', equiv=True) results = [] for file in files: # Match identifiers that are either full file name (with extension) or just base name match filename = os.path.split(file)[1] id = os.path.splitext(filename)[0] records = Record.by_fieldvalue(idfields, (id, filename)).filter(collection=collection, owner=None) if len(records) == 1: results.append((records[0], file)) return results
def create_record(self, url): collection = self.get_collection() s = BeautifulSoup(urllib2.urlopen(url)) def sort_by_dimension(entry): m = re.search(r'(?P<width>\d+) x (?P<height>\d+)', entry[1]) return int(m.group('width')) * int(m.group('height')) if m else 0 # get metadata date = s.find(text='Date: ').parent.findNextSibling('td').next title = s.find(text='Title: ').parent.findNextSibling('td').next description = s.find(text='Description: ').parent.findNextSibling('td').next id = s.find(text='ID: ').parent.findNextSibling('td').next credit_url = s.find(text='Credit: ').parent.findNextSibling('td').findNext('a')['href'] credit_title = s.find(text='Credit: ').parent.findNextSibling('td').findNext('a').next record = Record.objects.create(name=title, source=url, manager='nasaimageexchange') FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) FieldValue.objects.create(record=record, field=standardfield('description'), order=1, value=description) FieldValue.objects.create(record=record, field=standardfield('date'), order=2, value=date) FieldValue.objects.create(record=record, field=standardfield('identifier'), order=3, value=id) FieldValue.objects.create(record=record, field=standardfield('contributor'), order=4, value=credit_title) FieldValue.objects.create(record=record, field=standardfield('contributor'), order=5, value=credit_url) CollectionItem.objects.create(collection=collection, record=record) # media links and dimensions media = [(a['href'], a.next) for a in s.find(text='Format: ').parent.findNextSibling('td').findAll('a')] media = sorted(media, key=sort_by_dimension, reverse=True) # create job to download actual media file job = JobInfo.objects.create(func='nasa_download_media', arg=simplejson.dumps(dict( record=record.id, url=media[0][0]))) job.run() return record
def add_field(f, v, o): if type(v) == list: for w in v: add_field(f, w, o) elif v: # TODO: neaten? try: FieldValue.objects.create( record=record, field=standardfield(f), order=o, value=v) except: pass
def redirect_to_video(request, id): id_fields = [standardfield('identifier')] id_fields.extend(id_fields[0].get_equivalent_fields()) records = Record.by_fieldvalue(id_fields, id).filter( collection__name='online-video-collection') if not records: raise Http404() Activity.objects.create(event='ovc-redirect', request=request, content_object=records[0], data=dict(id=id)) request.master_template = 'ovc_master.html' return viewer_shell(request, 'mediaplayer', records[0].id, template='ovc_player.html')
def main(request, year='1994'): collection = Collection.objects.get(name='furious-flower-%s' % year) relation_field = standardfield('relation') order = FieldValue.objects.filter(field=relation_field, record__collection=collection).values_list('record__id', 'value') order = sorted(order, key=lambda (r,o): int(o)) records = dict((r.id, r) for r in collection.records.all()) sorted_records = [] for r, o in order: if records.has_key(r): sorted_records.append(records.pop(r)) sorted_records.extend(records.values()) return render_to_response('furiousflower-main.html', {'records': sorted_records, 'year': year, }, context_instance=RequestContext(request))
def _dummy_record(title, url): print '** _dummy_record (%s, %s)' % (title, url) record = Record.objects.create(name=title, source=url, manager='dummy') print '** _dummy_record.foo' FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) collection = _get_collection() print '** _dummy_record.bar' CollectionItem.objects.create(collection=collection, record=record) print '** _dummy_record.baz' job = JobInfo.objects.create(func='dummy_download_media', arg=simplejson.dumps({ 'record': record.id, 'url': url })) print '** _dummy_record.qux' job.run() return record
def handle(self, *args, **kwargs): updated = 0 id_fields = standardfield('identifier', equiv=True) titles = FieldValue.objects.select_related('record').filter(field__in=id_fields) pb = ProgressBar(titles.count()) for count, title in enumerate(titles): name = slugify(title.value) if name != title.record.name: title.record.name = name title.record.save(force_update_name=True) updated += 1 pb.update(count) pb.done() print "Updated %d record objects" % updated
def handle(self, *args, **kwargs): updated = 0 id_fields = standardfield('identifier', equiv=True) titles = FieldValue.objects.select_related('record').filter( field__in=id_fields) pb = ProgressBar(titles.count()) for count, title in enumerate(titles): name = slugify(title.value) if name != title.record.name: title.record.name = name title.record.save(force_update_name=True) updated += 1 pb.update(count) pb.done() print("Updated %d record objects" % updated)
def _preload_image_to_works(self, record_ids): image_to_works = dict() work_relation = FieldValue.objects.filter( record__in=record_ids, field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', ).values_list('record__id', 'value') works = FieldValue.objects.filter( field__in=standardfield('identifier', equiv=True), value__in=(wr[1] for wr in work_relation), index_value__in=(wr[1][:32] for wr in work_relation), ).values_list('value', 'record__id') works = dict(works) for record_id, work in work_relation: work_id = works.get(work) if work_id: image_to_works.setdefault(record_id, []).append(work_id) return image_to_works
def _preload_work_to_images(self, record_ids): q = Q( field__in=standardfield('identifier', equiv=True), ) | Q( field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', ) identifiers = FieldValue.objects.filter( q, record__in=record_ids, ).values_list('value', 'record__id') images = FieldValue.objects.filter( field__standard__prefix='dc', field__name='relation', refinement='IsPartOf', value__in=(i[0] for i in identifiers), index_value__in=(i[0][:32] for i in identifiers), ) images = images.values_list('record__id', 'value') identifier_map = dict() for v, r in identifiers: identifier_map.setdefault(v, []).append(r) work_to_images = dict() for record_id, image in images: image_ids = identifier_map.get(image, []) for i in image_ids: if record_id != i: work_to_images.setdefault(i, []).append(record_id) return work_to_images
def create_record(self, url): collection = self.get_collection() s = BeautifulSoup(urllib2.urlopen(url)) def sort_by_dimension(entry): m = re.search(r"(?P<width>\d+) x (?P<height>\d+)", entry[1]) return int(m.group("width")) * int(m.group("height")) if m else 0 # get metadata date = s.find(text="Date: ").parent.findNextSibling("td").next title = s.find(text="Title: ").parent.findNextSibling("td").next description = s.find(text="Description: ").parent.findNextSibling("td").next id = s.find(text="ID: ").parent.findNextSibling("td").next credit_url = s.find(text="Credit: ").parent.findNextSibling("td").findNext("a")["href"] credit_title = s.find(text="Credit: ").parent.findNextSibling("td").findNext("a").next record = Record.objects.create(name=title, source=url, manager="nasaimageexchange") FieldValue.objects.create(record=record, field=standardfield("title"), order=0, value=title) FieldValue.objects.create(record=record, field=standardfield("description"), order=1, value=description) FieldValue.objects.create(record=record, field=standardfield("date"), order=2, value=date) FieldValue.objects.create(record=record, field=standardfield("identifier"), order=3, value=id) FieldValue.objects.create(record=record, field=standardfield("contributor"), order=4, value=credit_title) FieldValue.objects.create(record=record, field=standardfield("contributor"), order=5, value=credit_url) CollectionItem.objects.create(collection=collection, record=record) # media links and dimensions media = [(a["href"], a.next) for a in s.find(text="Format: ").parent.findNextSibling("td").findAll("a")] media = sorted(media, key=sort_by_dimension, reverse=True) # create job to download actual media file job = JobInfo.objects.create( func="nasa_download_media", arg=json.dumps(dict(record=record.id, url=media[0][0])) ) job.run() return record
def setUp(self): self.fieldset = FieldSet.objects.create(title='facet-fields') FieldSetField.objects.create( fieldset=self.fieldset, field=standardfield('title')) FieldSetField.objects.create( fieldset=self.fieldset, field=standardfield('creator'))
def create_record(self, remote_id): collection = self.get_collection() results = self.flickr.flickr_call(method='flickr.photos.getInfo', api_key=settings.FLICKR_KEY, photo_id=remote_id, format='xmlnode') def get_property(exp): try: return exp(results.photo[0]) except (KeyError, AttributeError): return None username = get_property(lambda r: r.owner[0]['username']) realname = get_property(lambda r: r.owner[0]['realname']) title = get_property(lambda r: r.title[0].text) or 'Untitled' description = get_property(lambda r: r.description[0].text) date = get_property(lambda r: r.dates[0]['taken']) url = get_property(lambda r: r.urls[0].url[0].text) tags = get_property(lambda r: r.tags[0].tag) tags = [tag.text for tag in tags] if tags else [] info = self.flickr.flickr_call(method='flickr.photos.getSizes', api_key=settings.FLICKR_KEY, photo_id=remote_id, format='xmlnode') image_url = info.sizes[0].size[-1]['source'] record = Record.objects.create(name=title, source=url, manager='flickr') FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) if description: FieldValue.objects.create(record=record, field=standardfield('description'), order=1, value=description) if date: FieldValue.objects.create(record=record, field=standardfield('date'), order=2, value=date) FieldValue.objects.create(record=record, field=standardfield('identifier'), order=3, value=remote_id) if username: FieldValue.objects.create(record=record, field=standardfield('contributor'), order=4, value=username) if realname: FieldValue.objects.create(record=record, field=standardfield('contributor'), order=5, value=realname) for tag in tags: FieldValue.objects.create(record=record, field=standardfield('subject'), order=6, value=tag) if url: FieldValue.objects.create(record=record, field=standardfield('source'), order=7, value=url) CollectionItem.objects.create(collection=collection, record=record) # create job to download actual media file job = JobInfo.objects.create(func='flickr_download_media', arg=simplejson.dumps(dict( record=record.id, url=image_url))) job.run() return record
def import_files(request): available_storage = get_list_or_404(filter_by_access(request.user, Storage, write=True).order_by('title')) available_collections = get_list_or_404(filter_by_access(request.user, Collection)) writable_collection_ids = list(filter_by_access(request.user, Collection, write=True).values_list('id', flat=True)) storage_choices = choices = [make_storage_select_choice(s, request.user) for s in available_storage] class UploadFileForm(forms.Form): collection = forms.ChoiceField(choices=((c.id, '%s%s' % ('*' if c.id in writable_collection_ids else '', c.title)) for c in sorted(available_collections, key=lambda c: c.title))) storage = forms.ChoiceField(choices=storage_choices) file = forms.FileField() create_records = forms.BooleanField(required=False) replace_files = forms.BooleanField(required=False, label='Replace files of same type') multiple_files = forms.BooleanField(required=False, label='Allow multiple files of same type') personal_records = forms.BooleanField(required=False) def clean(self): cleaned_data = self.cleaned_data if any(self.errors): return cleaned_data personal = cleaned_data['personal_records'] if not personal: if not int(cleaned_data['collection']) in writable_collection_ids: self._errors['collection'] = ErrorList(["Can only add personal records to selected collection"]) del cleaned_data['collection'] return cleaned_data if request.method == 'POST': form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): create_records = form.cleaned_data['create_records'] replace_files = form.cleaned_data['replace_files'] multiple_files = form.cleaned_data['multiple_files'] personal_records = form.cleaned_data['personal_records'] collection = get_object_or_404(filter_by_access(request.user, Collection.objects.filter(id=form.cleaned_data['collection']), write=True if not personal_records else None)) storage = get_object_or_404(filter_by_access(request.user, Storage.objects.filter(id=form.cleaned_data['storage'].split(',')[0]), write=True)) file = request.FILES['file'] record = None limit = storage.get_upload_limit(request.user) if limit > 0 and file.size > limit * 1024: result = "The uploaded file is too large (%d>%d)." % (file.size, limit * 1024) else: mimetype = mimetypes.guess_type(file.name)[0] or file.content_type owner = request.user if personal_records else None id = os.path.splitext(file.name)[0] # find record by identifier titlefield = standardfield('title') idfield = standardfield('identifier') # Match identifiers that are either full file name (with extension) or just base name match records = find_record_by_identifier((id, file.name,), collection, owner=owner, ignore_suffix=multiple_files) result = "File skipped." if len(records) == 1: # Matching record found record = records[0] media = record.media_set.filter(storage=storage, mimetype=mimetype) media_same_id = media.filter(name=id) if len(media) == 0 or (len(media_same_id) == 0 and multiple_files): # No media yet media = Media.objects.create(record=record, name=id, storage=storage, mimetype=mimetype) media.save_file(file.name, file) result = "File added (Identifier '%s')." % id elif len(media_same_id) > 0 and multiple_files: # Replace existing media with same name and mimetype media = media_same_id[0] media.delete_file() media.save_file(file.name, file) result = "File replaced (Identifier '%s')." % id elif replace_files: # Replace existing media with same mimetype media = media[0] media.delete_file() media.save_file(file.name, file) result = "File replaced (Identifier '%s')." % id else: result = "File skipped, media files already attached." elif len(records) == 0: # No matching record found if create_records: # Create a record record = Record.objects.create(name=id, owner=owner) CollectionItem.objects.create(collection=collection, record=record) FieldValue.objects.create(record=record, field=idfield, value=id, order=0) FieldValue.objects.create(record=record, field=titlefield, value=id, order=1) media = Media.objects.create(record=record, name=id, storage=storage, mimetype=mimetype) media.save_file(file.name, file) result = "File added to new record (Identifier '%s')." % id else: result = "File skipped, no matching record found (Identifier '%s')." % id else: result = "File skipped, multiple matching records found (Identifier '%s')." % id # Multiple matching records found pass if request.POST.get('swfupload') == 'true': html = render_to_string('storage_import_file_response.html', {'result': result, 'record': record,}, context_instance=RequestContext(request) ) return HttpResponse(content=simplejson.dumps(dict(status='ok', html=html)), mimetype='application/json') request.user.message_set.create(message=result) next = request.GET.get('next', request.get_full_path()) return HttpResponseRedirect(next) else: # invalid form submission if request.POST.get('swfupload') == 'true': html = render_to_string('storage_import_file_response.html', {'result': form.errors}, context_instance=RequestContext(request) ) return HttpResponse(content=simplejson.dumps(dict(status='ok', html=html)), mimetype='application/json') else: form = UploadFileForm() return render_to_response('storage_import_files.html', {'upload_form': form, }, context_instance=RequestContext(request))
def create_record(self, remote_id): collection = self.get_collection() results = self.flickr.flickr_call(method='flickr.photos.getInfo', api_key=settings.FLICKR_KEY, photo_id=remote_id, format='xmlnode') def get_property(exp): try: return exp(results.photo[0]) except (KeyError, AttributeError): return None username = get_property(lambda r: r.owner[0]['username']) realname = get_property(lambda r: r.owner[0]['realname']) title = get_property(lambda r: r.title[0].text) or 'Untitled' description = get_property(lambda r: r.description[0].text) date = get_property(lambda r: r.dates[0]['taken']) url = get_property(lambda r: r.urls[0].url[0].text) tags = get_property(lambda r: r.tags[0].tag) tags = [tag.text for tag in tags] if tags else [] info = self.flickr.flickr_call(method='flickr.photos.getSizes', api_key=settings.FLICKR_KEY, photo_id=remote_id, format='xmlnode') image_url = info.sizes[0].size[-1]['source'] record = Record.objects.create(name=title, source=url, manager='flickr') FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) if description: FieldValue.objects.create(record=record, field=standardfield('description'), order=1, value=description) if date: FieldValue.objects.create(record=record, field=standardfield('date'), order=2, value=date) FieldValue.objects.create(record=record, field=standardfield('identifier'), order=3, value=remote_id) if username: FieldValue.objects.create(record=record, field=standardfield('contributor'), order=4, value=username) if realname: FieldValue.objects.create(record=record, field=standardfield('contributor'), order=5, value=realname) for tag in tags: FieldValue.objects.create(record=record, field=standardfield('subject'), order=6, value=tag) if url: FieldValue.objects.create(record=record, field=standardfield('source'), order=7, value=url) CollectionItem.objects.create(collection=collection, record=record) # create job to download actual media file from .tasks import flickr_download_media flickr_download_media.delay(record.id, image_url) return record
def dido_import(): """ Import and convert a set of JPEG or TIFF images into DIDO. """ thumb_dir = os.path.join(SCRATCH_DIR, MDID_COLLECTION_ID) assert all([ os.path.exists(ARCHIVE_DIR), os.path.exists(COLLECTION_DIR), os.path.exists(INCOMING_DIR), ]), """Must specify location of directories in the INCOMING_DIR, ARCHIVE_DIR and COLLECTION_DIR config variables""" assert all([ os.path.exists(SCRATCH_DIR), os.path.exists(thumb_dir), ]), """Must specify the location of the thumb directory in the SCRATCH_DIR mdid3 config variable, and the collection id in the MDID_COLLECTION_ID config variable""" if not os.listdir(INCOMING_DIR): # No files to import - nothing to do! print 'No files to import, exiting' sys.exit(0) try: # Set up the database access objects storage = Storage.objects.get(id=2) identifier_field = standardfield('identifier') image_number_field = Field.objects.get(label='Image Number') except Exception: # Error accessing the database exception = traceback.format_exc() send_report(exception=exception) sys.exit(1) try: # Set up metadata variables meta = {} exception = None with lock_file('dido.lock'): for file_name in os.listdir(INCOMING_DIR): print "Attempting to import file %s..." % file_name file_id, _ = os.path.splitext(file_name) file_type = extract_mimetype( os.path.join(INCOMING_DIR, file_name), ) if file_type not in VALID_MIMETYPES: print 'Invalid filetype, skipping %s' % file_name continue # Generate absolute file paths for the original and new files try: print "Generating paths for %s..." % file_name paths = generate_paths(file_name, file_id) except OSError: exception = traceback.format_exc() raise # Create the new images and save them, making sure not to # re-compress if the image is already a jpeg quality = 70 #if file_type == 'image/tiff' else 100 try: print "Attempting to convert %s. File type: %s" % (file_name, file_type) convert_image( paths['orig_file'], paths['converted_file'], quality, ) except IOError: exception = traceback.format_exc() raise # Move the original file to the archive directory try: if not os.path.exists(paths['archive_dir']): os.makedirs(paths['archive_dir']) print "Moving the original file %s to the archive..." % file_name shutil.move(paths['orig_file'], paths['archive_file']) except (IOError, OSError): exception = traceback.format_exc() raise assert os.path.isfile(paths['archive_file']), \ '%s does not exist' % paths['archive_file'] assert os.path.isfile(paths['converted_file']), \ '%s does not exist' % paths['converted_file'] # Create the database records for this file try: print "Attempting to see if this record already exists in db..." field_value = FieldValue.objects.get( field=image_number_field, value=file_id, index_value=file_id[:32], ) except FieldValue.DoesNotExist: # Record doesn't exist, which means we need to create # the initial record in the database. print "No record found. Adding %s to database..." % file_name duplicate = False add_record_to_database( file_id=file_id, field=identifier_field, storage=storage, image_number=image_number_field, ) else: duplicate = True record = field_value.record print "Record already exists in database - replacing file" delete_thumbs(record=record, thumb_dir=thumb_dir) # Add metadata to the meta dict meta[file_id + '.jpg'] = { 'path': paths['converted_file'], 'duplicate': duplicate, } print print except EnvironmentError: exception = traceback.format_exc() raise except Exception as e: print e.message raise finally: send_report(meta, exception) sys.exit(1 if exception else 0)
def import_files(request): available_storage = get_list_or_404( filter_by_access(request.user, Storage, write=True).order_by('title')) available_collections = get_list_or_404( filter_by_access(request.user, Collection)) writable_collection_ids = list( filter_by_access(request.user, Collection, write=True).values_list('id', flat=True)) storage_choices = [ make_storage_select_choice(s, request.user) for s in available_storage ] class UploadFileForm(forms.Form): collection = forms.ChoiceField(choices=( (c.id, '%s%s' % ('*' if c.id in writable_collection_ids else '', c.title)) for c in available_collections)) storage = forms.ChoiceField(choices=storage_choices) file = forms.FileField() create_records = forms.BooleanField(required=False) replace_files = forms.BooleanField(required=False, label='Replace files of same type') multiple_files = forms.BooleanField( required=False, label='Allow multiple files of same type') personal_records = forms.BooleanField(required=False) response_type = forms.CharField(required=False, widget=forms.HiddenInput) def clean(self): cleaned_data = self.cleaned_data if any(self.errors): return cleaned_data personal = cleaned_data['personal_records'] if not personal: if int(cleaned_data['collection']) not in \ writable_collection_ids: self._errors['collection'] = \ ErrorList([ "Can only add personal records " "to selected collection" ]) del cleaned_data['collection'] return cleaned_data if request.method == 'POST': form = UploadFileForm(request.POST, request.FILES) if form.is_valid(): create_records = form.cleaned_data['create_records'] replace_files = form.cleaned_data['replace_files'] multiple_files = form.cleaned_data['multiple_files'] personal_records = form.cleaned_data['personal_records'] collection = get_object_or_404( filter_by_access(request.user, Collection.objects.filter( id=form.cleaned_data['collection']), write=True if not personal_records else None)) storage = get_object_or_404( filter_by_access( request.user, Storage.objects.filter( id=form.cleaned_data['storage'].split(',')[0]), write=True)) file = request.FILES['file'] record = None limit = storage.get_upload_limit(request.user) if limit > 0 and file.size > limit * 1024: result = "The uploaded file is too large (%d>%d)." % ( file.size, limit * 1024) else: mimetype = mimetypes.guess_type(file.name)[0] or \ file.content_type owner = request.user if personal_records else None id = os.path.splitext(file.name)[0] # find record by identifier titlefield = standardfield('title') idfield = standardfield('identifier') # Match identifiers that are either full file name # (with extension) or just base name match records = find_record_by_identifier( ( id, file.name, ), collection, owner=owner, ignore_suffix=multiple_files) result = "File skipped." if len(records) == 1: # Matching record found record = records[0] media = record.media_set.filter(storage=storage, mimetype=mimetype) media_same_id = media.filter(name=id) if len(media) == 0 or \ (len(media_same_id) == 0 and multiple_files): # No media yet media = Media.objects.create(record=record, name=id, storage=storage, mimetype=mimetype) media.save_file(file.name, file) result = "File added (Identifier '%s')." % id elif len(media_same_id) > 0 and multiple_files: # Replace existing media with same name and mimetype media = media_same_id[0] media.delete_file() media.save_file(file.name, file) result = "File replaced (Identifier '%s')." % id elif replace_files: # Replace existing media with same mimetype media = media[0] media.delete_file() media.save_file(file.name, file) result = "File replaced (Identifier '%s')." % id else: result = "File skipped, media files already attached." elif len(records) == 0: # No matching record found if create_records: # Create a record record = Record.objects.create(name=id, owner=owner) CollectionItem.objects.create(collection=collection, record=record) FieldValue.objects.create(record=record, field=idfield, value=id, order=0) FieldValue.objects.create(record=record, field=titlefield, value=id, order=1) media = Media.objects.create(record=record, name=id, storage=storage, mimetype=mimetype) media.save_file(file.name, file) result = \ "File added to new record (Identifier '%s')." % id else: result = \ "File skipped, no matching record found " \ "(Identifier '%s')." % id else: result = \ "File skipped, multiple matching records found " \ "(Identifier '%s')." % id # Multiple matching records found pass if form.cleaned_data['response_type'] == 'json': return HttpResponse(content=simplejson.dumps( dict(status='ok', message=result)), content_type='application/json') messages.add_message(request, messages.INFO, message=result) next = request.GET.get('next', request.get_full_path()) return HttpResponseRedirect(next) else: pass else: form = UploadFileForm() return render_to_response('storage_import_files.html', { 'upload_form': form, }, context_instance=RequestContext(request))
def main(request): collection = Collection.objects.get(name='the-breeze') coverage = standardfield('coverage') date = standardfield('date') volumes = sorted(map(int, FieldValue.objects.filter( record__collection=collection, field=coverage, label='Volume', ).values_list('value', flat=True).distinct())) try: volume = int(request.GET.get('v')) except (ValueError, TypeError): volume = None if not volume in volumes: volume = volumes[0] record_ids = FieldValue.objects.filter( record__collection=collection, field=coverage, label='Volume', value=str(volume), ).values_list('record', flat=True) issues = sorted(FieldValue.objects.filter( record__in=record_ids, field=coverage, label='Issue', ).values_list('record', 'value')) dates = sorted(FieldValue.objects.filter( record__in=record_ids, field=date, ).values_list('record', 'value')) combined = sorted((int(i[1]), d[1], i[0]) for i, d in zip(issues, dates)) try: record_id = int(request.GET.get('r')) except (ValueError, TypeError): record_id = None if not record_id in (r for i, d, r in combined): record_id = combined[0][2] for i, d, r in combined: if record_id == r: issue = i break viewer = pdfviewer(None, request, record_id) return render_to_response('thebreeze.html', {'collection': collection, 'breezelogo': 'breeze_logo_%s.png' % random.choice('00 01 02 03 04'.split()), 'volume': volume, 'volumes': volumes, 'issue': issue, 'issues': combined, 'record': record_id, 'viewer': viewer, 'embedcode': viewer.embed_code(request, None) if viewer else None, }, context_instance=RequestContext(request))
def presentation_import(pres_ids, rc): print pres_ids for pres_id in pres_ids: pres_url = 'http://mdid3.temple.edu/api/presentation/' + str(pres_id) + '/' print 'fetching %s' % pres_url theShow = requests.get(pres_url, cookies=rc) #print theShow.json() jp = simplejson.loads(theShow.content) concat_description = jp['description'] presentation = Presentation.objects.create(title=jp['title'], owner=target_user, description=concat_description) # jp['content'] contains every slide for order, slide in enumerate(jp['content']): #print order, slide rec_exists = False rec_id = None print 'using storage %s' % store.base for metadata in slide['metadata']: #print 'metadata for slide %s, %s' % (slide['name'], str(metadata)) #print metadata if metadata['label'] == 'ID': print 'metadata for slide %s, %s' % (slide['name'], str(metadata)) rec_id = metadata['value'] print '%s is an ID field' % rec_id #print metadata['value'] if Record.by_fieldvalue(fid, rec_id): rec_exists = True print '%s already exists' % rec_id break # when finished checking for ID either add existing record to pres # or create record and then add it if rec_exists: # note that record is the first record in the list that is returned byfieldvalue # which should be checked for accuracy in multiple tests if there's any chance that # there could be multiple records print 'Check the following list list of records for multiple values:' print Record.by_fieldvalue(fid, rec_id) record = Record.by_fieldvalue(fid, rec_id)[0] presentation.items.create(order=order, record=record) presentation.save() print 'adding %s to presentation at position %s' % (rec_id, order) else: print 'creating record for %s' % rec_id print 'metadata:' print slide['metadata'] #record = Record.objects.create(name=rec_id, owner=target_user) record = Record.objects.create(owner=target_user) record.save() for metadata in slide['metadata']: try: target = Field.objects.get(label=metadata['label'], standard__prefix='aae') record.fieldvalue_set.create(field=target, value=metadata['value'], label=metadata['label'], ) except Exception as e: print e try: target = Field.objects.filter(label=metadata['label']) record.fieldvalue_set.create(field=target[0], value=metadata['value'], label=metadata['label'], ) print "Ok, went with %s the first field I could find to go with!" % target[0] except Exception as e_two: print e_two print "ok, giving up!" continue continue try: title = slide['title'] except: title = 'Untitled' FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) col_i = CollectionItem.objects.create(collection=collection, record=record) print 'collection item created: %s' % col_i ## file biz # media_req.content contains the image media_url = mdid_base_url + slide['image'] print 'media_url: %s' % media_url media_req = requests.get(media_url, cookies=rc) mimetype = media_req.headers['content-type'] file = StringIO(media_req.content) if guess_extension(mimetype) == '.jpeg': filename = record.name + '.jpg' extension = 'JPEG' else: filename = os.path.join(record.name + guess_extension(mimetype)) extension = os.path.splitext(mimetype)[0] print 'extension %s' % extension file_path = os.path.join(store.base, filename) print 'saving media file for %s to %s' % (record.name, file_path) media = Media.objects.create(record=record, #name=os.path.splitext(file.name)[0], name=record.name, storage=store, mimetype=mimetype) media.save_file(filename, file) presentation.items.create(order=order, record=record) presentation.save()
def upload(request): collection = Collection.objects.get(name='the-breeze') storage = Storage.objects.get(name='the-breeze') check_access(request.user, collection, write=True, fail_if_denied=True) check_access(request.user, storage, write=True, fail_if_denied=True) fcoverage = standardfield('coverage') fdate = standardfield('date') ftitle = standardfield('title') fdescription = standardfield('description') fidentifier = standardfield('identifier') if request.method == 'POST': form = UploadForm(request.POST, request.FILES) if form.is_valid(): volume = str(form.cleaned_data['volume']) issue = str(form.cleaned_data['issue']) date = str(form.cleaned_data['date']) pages = str(form.cleaned_data['pages']) publication = form.cleaned_data['publication'] title = '%s %s Volume %s Issue %s' % ( publication, date, volume, issue, ) record = Record.objects.create() CollectionItem.objects.create(record=record, collection=collection) record.fieldvalue_set.create( field=ftitle, label=None, order=1, value=title, ) record.fieldvalue_set.create( field=fcoverage, label='Volume', order=2, value=volume, ) record.fieldvalue_set.create( field=fcoverage, label='Issue', order=3, value=issue, ) record.fieldvalue_set.create( field=fdate, label=None, order=4, value=date, ) record.fieldvalue_set.create( field=fdescription, label='Pages', order=5, value=pages, ) record.fieldvalue_set.create( field=fidentifier, label=None, order=6, value=title, hidden=True, ) import re filename = re.sub(r'[^a-z0-9]+', '-', title.lower()) + '.pdf' media = Media.objects.create( record=record, storage=storage, mimetype='application/pdf', ) media.save_file(filename, request.FILES['pdf']) return HttpResponseRedirect(reverse('thebreeze-main')) else: form = UploadForm() return render_to_response('thebreeze-upload.html', {'breezelogo': 'breeze_logo_%s.png' % random.choice('00 01 02 03 04'.split()), 'form': form, }, context_instance=RequestContext(request))
def create_record(self, url): collection = self.get_collection() s = BeautifulSoup(urllib2.urlopen(url)) def sort_by_dimension(entry): m = re.search(r'(?P<width>\d+) x (?P<height>\d+)', entry[1]) return int(m.group('width')) * int(m.group('height')) if m else 0 # get metadata date = s.find(text='Date: ').parent.findNextSibling('td').next title = s.find(text='Title: ').parent.findNextSibling('td').next description = s.find( text='Description: ').parent.findNextSibling('td').next id = s.find(text='ID: ').parent.findNextSibling('td').next credit_url = s.find(text='Credit: ').parent.findNextSibling( 'td').findNext('a')['href'] credit_title = s.find(text='Credit: ').parent.findNextSibling( 'td').findNext('a').next record = Record.objects.create(name=title, source=url, manager='nasaimageexchange') FieldValue.objects.create(record=record, field=standardfield('title'), order=0, value=title) FieldValue.objects.create(record=record, field=standardfield('description'), order=1, value=description) FieldValue.objects.create(record=record, field=standardfield('date'), order=2, value=date) FieldValue.objects.create(record=record, field=standardfield('identifier'), order=3, value=id) FieldValue.objects.create(record=record, field=standardfield('contributor'), order=4, value=credit_title) FieldValue.objects.create(record=record, field=standardfield('contributor'), order=5, value=credit_url) CollectionItem.objects.create(collection=collection, record=record) # media links and dimensions media = [(a['href'], a.next) for a in s.find( text='Format: ').parent.findNextSibling('td').findAll('a')] media = sorted(media, key=sort_by_dimension, reverse=True) # create job to download actual media file job = JobInfo.objects.create(func='nasa_download_media', arg=simplejson.dumps( dict(record=record.id, url=media[0][0]))) job.run() return record