def scrub(self): ''' Perform a thorough scrub and cleanup of the database ''' # Currently only reasons are a problem try: start_count = Reason.objects.count() except Exception: e = sys.exc_info()[1] self.log.error("Failed to load reason objects: %s" % e) return dup_reasons = [] cmp_reasons = dict() batch_update = [] for reason in BatchFetch(Reason.objects): ''' Loop through each reason and create a key out of the data. \ This lets us take advantage of a fast hash lookup for \ comparisons ''' id = reason.id reason.id = None key = md5(pickle.dumps(reason)).hexdigest() reason.id = id if key in cmp_reasons: self.log.debug("Update interactions from %d to %d" \ % (reason.id, cmp_reasons[key])) dup_reasons.append([reason.id]) batch_update.append([cmp_reasons[key], reason.id]) else: cmp_reasons[key] = reason.id self.log.debug("key %d" % reason.id) self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany( 'update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception: ex = sys.exc_info()[1] self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count()))
def scrub(self): ''' Perform a thorough scrub and cleanup of the database ''' # Currently only reasons are a problem try: start_count = Reason.objects.count() except Exception: e = sys.exc_info()[1] self.log.error("Failed to load reason objects: %s" % e) return dup_reasons = [] cmp_reasons = dict() batch_update = [] for reason in BatchFetch(Reason.objects): ''' Loop through each reason and create a key out of the data. \ This lets us take advantage of a fast hash lookup for \ comparisons ''' id = reason.id reason.id = None key = md5(pickle.dumps(reason)).hexdigest() reason.id = id if key in cmp_reasons: self.log.debug("Update interactions from %d to %d" \ % (reason.id, cmp_reasons[key])) dup_reasons.append([reason.id]) batch_update.append([cmp_reasons[key], reason.id]) else: cmp_reasons[key] = reason.id self.log.debug("key %d" % reason.id) self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception: ex = sys.exc_info()[1] self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count()))
def purge(self, client=None, maxdate=None, state=None): '''Purge historical data from the database''' filtered = False # indicates whether or not a client should be deleted if not client and not maxdate and not state: self.errExit("Reports.prune: Refusing to prune all data") ipurge = Interaction.objects if client: try: cobj = Client.objects.get(name=client) ipurge = ipurge.filter(client=cobj) except Client.DoesNotExist: self.log.error("Client %s not in database" % client) raise SystemExit(-1) self.log.debug("Filtering by client: %s" % client) if maxdate: filtered = True if not isinstance(maxdate, datetime.datetime): raise TypeError("maxdate is not a DateTime object") self.log.debug("Filtering by maxdate: %s" % maxdate) ipurge = ipurge.filter(timestamp__lt=maxdate) # Handle ping data as well ping = Ping.objects.filter(endtime__lt=maxdate) if client: ping = ping.filter(client=cobj) ping.delete() if state: filtered = True if state not in ('dirty', 'clean', 'modified'): raise TypeError("state is not one of the following values " + \ "('dirty','clean','modified')") self.log.debug("Filtering by state: %s" % state) ipurge = ipurge.filter(state=state) count = ipurge.count() rnum = 0 try: while rnum < count: grp = list(ipurge[:1000].values("id")) # just in case... if not grp: break Interaction.objects.filter(id__in=[x['id'] for x in grp]).delete() rnum += len(grp) self.log.debug("Deleted %s of %s" % (rnum, count)) except: self.log.error("Failed to remove interactions") (a, b, c) = sys.exc_info() msg = traceback.format_exception(a, b, c, limit=2)[-1][:-1] del a, b, c self.log.error(msg) # bulk operations bypass the Interaction.delete method self.log.debug("Pruning orphan Performance objects") Performance.prune_orphans() self.log.debug("Pruning orphan Reason objects") Reason.prune_orphans() if client and not filtered: '''Delete the client, ping data is automatic''' try: self.log.debug("Purging client %s" % client) cobj.delete() except: self.log.error("Failed to delete client %s" % client) (a, b, c) = sys.exc_info() msg = traceback.format_exception(a, b, c, limit=2)[-1][:-1] del a, b, c self.log.error(msg)
self.log.debug("Done with updates, deleting dupes") try: cursor = connection.cursor() cursor.executemany('update reports_entries_interactions set reason_id=%s where reason_id=%s', batch_update) cursor.executemany('delete from reports_reason where id = %s', dup_reasons) transaction.set_dirty() except Exception, ex: self.log.error("Failed to delete reasons: %s" % ex) raise self.log.info("Found %d dupes out of %d" % (len(dup_reasons), start_count)) # Cleanup orphans start_count = Reason.objects.count() Reason.prune_orphans() self.log.info("Pruned %d Reason records" % (start_count - Reason.objects.count())) start_count = Entries.objects.count() Entries.prune_orphans() self.log.info("Pruned %d Entries records" % (start_count - Entries.objects.count())) def django_command_proxy(self, command): '''Call a django command''' if command == 'sqlall': django.core.management.call_command(command, 'reports') else: django.core.management.call_command(command) def load_stats(self, stats_file=None, clientspath=None, verb=0, quick=False): '''Load statistics data into the database'''