def run(self, interval): self.log.info('Starting updater service') last_update = 0 while True: now = time.time() sleep = interval + last_update - now if sleep > 0: self.log.info('Sleeping for %s', human_readable_duration(sleep)) time.sleep(sleep) try: with TrapErrors(): scanner.run(logger=self.log) feeds.run(logger=self.log, force=False) except TrapError, exc: self.log.error('Processing Error', exc_info=exc.args) last_update = now
def handle_noargs(self, keep_days=KEEP_DAYS, keep_links=KEEP_LINKS, **kwargs): now = datetime.now() cutoff = now - timedelta(days=keep_days) published_links = Link.objects.filter(state='published').order_by('-created') keep1 = published_links[:keep_links] keep2 = published_links.filter(created__gte=cutoff) keep_ids = {link.id for qs in [keep1, keep2] for link in qs} archive = published_links.exclude(id__in=keep_ids) archive_count = archive.count() print 'Number of published items to retain: %d' % len(keep_ids) print 'Number of published items to archive: %d' % archive_count # get some verification first, this is destructive as hell. if archive_count and get_yesno('\nContinue (y/N)? ', default=False): # make unique directory inside the archive root base, fmt = ['memebot', 'archive', now.strftime('%Y%m%d')], '%%0%dd' % len(str(MAX_UNIQUE_DIRS - 1)) if not os.path.exists(settings.ARCHIVE_DIR): os.makedirs(settings.ARCHIVE_DIR) for i in xrange(MAX_UNIQUE_DIRS): archive_dir = os.path.join(settings.ARCHIVE_DIR, '.'.join(base + [fmt % i])) try: os.mkdir(archive_dir) break except OSError, exc: if exc.errno != errno.EEXIST: raise else: raise OSError(errno.EEXIST, os.strerror(errno.EEXIST, archive_dir)) # rudimentary meter def update(desc, i): sys.stderr.write('\r%s ... %d / %d' % (desc, i + 1, archive_count)) sys.stderr.flush() print '\nBeginning dump ...' start = time.time() try: # dump the contents of the fields we're about to nuke for i, link in enumerate(archive.only('id', *FIELDS).values('id', *FIELDS)): id = link.pop('id') pickle_file = os.path.join(archive_dir, 'link-%08d.pkl' % id) with open(pickle_file, 'wb') as fp: pickle.dump(link, fp) update('Dumping', i) print # nuke 'em for i, link in enumerate(archive.only('id')): link.state = 'archived' for field in FIELDS: setattr(link, field, None) link.save() update('Cleaning Content', i) # compress archive dir tar_file = archive_dir + '.tar' if os.path.exists(tar_file): os.remove(tar_file) print '\nCreating %s ...' % os.path.basename(tar_file) with tarfile.open(tar_file, 'w') as tar: for basedir, subdirs, filenames in os.walk(archive_dir): for filename in filenames: file = os.path.join(basedir, filename) arcname = os.path.relpath(file, settings.ARCHIVE_DIR) tar.add(file, arcname) shutil.rmtree(archive_dir) bz2_file = tar_file + '.bz2' print 'Creating %s ...' % os.path.basename(bz2_file) with bz2.BZ2File(bz2_file, 'w', 0, 9) as out_fp: with open(tar_file, 'rb') as in_fp: shutil.copyfileobj(in_fp, out_fp) os.remove(tar_file) print 'Archive is: ' + os.path.relpath(bz2_file, os.curdir) finally: print '\nFinished in ' + human_readable_duration(time.time() - start)
def format_date(date): return '%s (%s ago)' % (date.strftime('%Y-%m-%d %H:%M:%S'), human_readable_duration(date, precision=2))