def clean_dst(self): dst = self.cleaned_data.get('dst') token = self.request.session['SMARTFILE_ACCESS_TOKEN'] try: folder_list(get_client(token[0], token[1]), dst) except: raise forms.ValidationError('The path does not exist in SmartFile.') return dst
def handle(self, *args, **kwargs): try: # TODO: lock or atomic nabbing of job. Only important if more than # one archiver. job = Job.objects.filter(state=Job.STATE_NEW, started=None).order_by('id')[0] except IndexError: return LOGGER.info('processing job %s' % job.uuid, exc_info=True) # TODO: do an update instead of save(). job.state = Job.STATE_RUNNING job.pid = os.getpid() job.started = datetime.datetime.now() job.save() try: base_dir = os.path.join(job.dest, u'archive') imap = imap_open(job.url) for source in job.sources.all().iterator(): # TODO: update instead of save() source.total = imap_folder_size(imap, source.name) source.save() # Optimizations: # - modify SmartFile API so that a POST to a missing directory # creates it. This will save a round-trip to mkdir(). # - parallelize the operations (thread pool). # TODO: preserve the user's folder structure. client = get_client(job.token, job.secret) for source in job.sources.all().iterator(): def chunker(): chunk, lsender = [], None # This library will return messages sorted by sender. for message in imap_folder_iter(imap, source.name, chunk_size=32): n, sender = parseaddr(message['from']) LOGGER.debug('message from %s' % sender) name = u'%s.email' % ''.join(random.sample( string.ascii_letters+string.digits, 10)) # If the chunk size exceeds 25 messages, or the sender # changed, yield and reset our chunk. if (lsender and lsender != sender) or len(chunk) >= 25: yield lsender, chunk del chunk[:] # Accumulate messages from the same sender. lsender = sender chunk.append((name, StringIO(message.as_string(True)))) for sender, chunk in chunker(): LOGGER.debug('chunked %s messages from %s' % (len(chunk), sender)) try: path = os.path.join(base_dir, sender) client.path.oper.mkdir.put(path) files = {} for i, message in chunk: files['file%s' % i] = message client.path.data.post(path, **files) except KeyboardInterrupt: raise except: LOGGER.error('continuing', exc_info=True) source.failed += len(chunk) else: source.archived += len(chunk) source.save() job.finished = datetime.datetime.now() job.state = Job.STATE_COMPLETE job.save() except: LOGGER.error('ERROR, aborting', exc_info=True) job.finished = datetime.datetime.now() job.state = Job.STATE_FAILED job.save()