def cleanErrItem(item_id, count): try: bucket = getBucket() i = 0 while count > i: if i == 0: filename = '%s.jp2' % item_id else: filename = '%s/%s.jp2' % (item_id, i) i += 1 bucket.delete_key(S3_DEFAULT_FOLDER + filename) if count > 1: filename = '%s/' % item_id bucket.delete_key(S3_DEFAULT_FOLDER + filename) except: pass try: cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document') cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128]) cloudsearch.commit() except: pass try: Item(item_id).delete() except: pass return
def finalizeItem(batch_id, item_id, item_tasks_count): item_tasks = [] for task_order in range(0, item_tasks_count): item_tasks.append(Task(batch_id, item_id, task_order)) # the task with highest id for the specific item has all item data last_task = item_tasks[-1] item_data = last_task.item_data item_data['timestamp'] = datetime.utcnow().isoformat("T") + "Z" if item_data.has_key('status') and item_data['status'] == 'deleted': whole_item_delete = True else: whole_item_delete = False try: old_item = Item(item_id) except: old_item = None if old_item: if not whole_item_delete: item_data['image_meta'] = old_item.image_meta else: item_data['image_meta'] = {} error = False if not whole_item_delete: for task in item_tasks: if task.status == 'pending' or task.status == 'error': error = True # modification tasks never changes image_meta elif task.type == 'mod': pass elif task.status == 'deleted': # if the image is being really deleted not only being reshuffled if not task.url in item_data['url']: item_data['image_meta'].pop(task.url, None) elif task.status == 'ok': item_data['image_meta'][task.url] = task.image_meta if not error: if not (old_item and whole_item_delete): item = Item(item_id, item_data) ordered_image_meta = [] for url in item.url: tmp = item.image_meta[url] tmp['url'] = url ordered_image_meta.append(tmp) if CLOUDSEARCH_ITEM_DOMAIN is not None: try: cloudsearch = getCloudSearch(CLOUDSEARCH_ITEM_DOMAIN, 'document') if old_item and whole_item_delete: cloudsearch.delete(hashlib.sha512(item_id).hexdigest()[:128]) else: cloudsearch.add(hashlib.sha512(item_id).hexdigest()[:128], {'id': item.id, 'title': item.title, 'creator': item.creator, 'source': item.source, 'institution': item.institution, 'institution_link': item.institution_link, 'license': item.license, 'description': item.description, 'url': json.dumps(item.url), 'timestamp': item.timestamp, 'image_meta': json.dumps(ordered_image_meta)}) cloudsearch.commit() except: if last_task.attempts < MAX_TASK_REPEAT * 2: print '\nFailed Cloud Search attempt numb.: %s\nItem: %s\nError message:\n###\n%s###' % (last_task.attempts + 1, task.item_id, traceback.format_exc()) last_task.attempts += 1 last_task.status = 'pending' last_task.type = 'cloud_search' last_task.save() rand = (last_task.attempts * 60) + random.randint(last_task.attempts * 60, last_task.attempts * 60 * 2) return ingestQueue.apply_async(args=[batch_id, item_id, last_task.task_id], countdown=rand) else: last_task.status = 'error' last_task.message = ERR_MESSAGE_CLOUDSEARCH last_task.save() if last_task.status == 'error': cleanErrItem(item_id, len(item_data['image_meta'])) print "Item '%s' failed" % item_id elif old_item and whole_item_delete: old_item.delete() print "Item '%s' deleted" % item_id else: item.save() print "Item '%s' finalized" % item_id else: cleanErrItem(item_id, len(item_data['image_meta'])) print "Item '%s' failed" % item_id return