def handle(self, **options): date = options['date'] pillow_args = set(options['pillows'] or []) if not pillow_args and not confirm('Reset checkpoints ALL pillows?'): raise CommandError('Abort') def _pillow_match(pillow_id): return ( pillow_id in pillow_args or any(re.match(arg, pillow_id, re.IGNORECASE) for arg in pillow_args) ) all_pillows = get_all_pillow_instances() if not pillow_args: pillows = all_pillows else: pillows = [ pillow for pillow in all_pillows if _pillow_match(pillow.pillow_id) ] if not pillows: raise CommandError('No pillows match: {}'.format(options['pillows'])) if not confirm('Update checkpoints for {}?'.format('\n '.join(p.pillow_id for p in pillows))): raise CommandError('abort') for pillow in pillows: checkpoint = pillow.checkpoint historical_checkpoint = HistoricalPillowCheckpoint.objects.filter( checkpoint_id=checkpoint.checkpoint_id, date_updated__lt=date).first() if not historical_checkpoint: print(self.style.ERROR('No historical checkpoints for {} before {}'.format( checkpoint.checkpoint_id, date)) ) continue old_seq = pillow.get_last_checkpoint_sequence() new_seq = historical_checkpoint.seq if checkpoint.sequence_format == 'json' and isinstance(old_seq, dict): new_seq = str_to_kafka_seq(new_seq) diff = ('\n'.join(difflib.ndiff( pprint.pformat(old_seq).splitlines(), pprint.pformat(new_seq).splitlines()))) else: diff = 'from: {}\nto : {}'.format(old_seq, new_seq) pillow_id = pillow.pillow_id if old_seq == new_seq: print('Sequences for {} are identical, moving on.'.format(pillow_id)) continue if confirm("\nReset checkpoint for '{}' pillow to sequence from {}:\n\n{}\n".format( pillow_id, historical_checkpoint.date_updated, diff )): pillow.checkpoint.update_to(new_seq) print(self.style.SUCCESS("Checkpoint for {} updated\n".format(pillow_id)))
def rewind_pillows(date): for pillow in get_all_pillow_instances(): checkpoint = pillow.checkpoint try: checkpoint = HistoricalPillowCheckpoint.objects.get(checkpoint_id=checkpoint.checkpoint_id, date_updated=date) if pillow.checkpoint.sequence_format == 'json': seq = str_to_kafka_seq(checkpoint.seq) else: seq = checkpoint.seq except HistoricalPillowCheckpoint.DoesNotExist: seq = DEFAULT_EMPTY_CHECKPOINT_SEQUENCE_FOR_RESTORE[pillow.checkpoint.sequence_format] pillow.checkpoint.update_to(seq)
def handle(self, **options): for checkpoint in DjangoPillowCheckpoint.objects.filter( sequence_format='json'): try: kafka_seq = str_to_kafka_seq(checkpoint.sequence) except ValueError: print("unable to migrate {}", checkpoint.checkpoint_id) else: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=checkpoint.checkpoint_id, topic=topic_partition.topic, partition=topic_partition.partition, defaults={'offset': offset})
def handle(self, **options): for checkpoint in DjangoPillowCheckpoint.objects.filter(sequence_format='json'): try: kafka_seq = str_to_kafka_seq(checkpoint.sequence) except ValueError: print("unable to migrate {}", checkpoint.checkpoint_id) else: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=checkpoint.checkpoint_id, topic=topic_partition.topic, partition=topic_partition.partition, defaults={'offset': offset} )
def handle(self, topic, num_partitions, **options): stop_pillows = raw_input("did you stop pillows? [y/n]") if stop_pillows not in ['y', 'yes']: print("then stop them") kafka_command = ( "./kafka-topics.sh --alter --zookeeper <zk IP>:2181 --partitions={} --topic={}" .format(num_partitions, topic)) added_partition = raw_input( "have you run {} ? [y/n]".format(kafka_command)) if added_partition not in ['y', 'yes']: print("then run it") for checkpoint in DjangoPillowCheckpoint.objects.filter( sequence_format='json'): try: kafka_seq = str_to_kafka_seq(checkpoint.sequence) except ValueError: print("unable to parse {}", checkpoint.checkpoint_id) continue topics = [tp.topic for tp in kafka_seq] if topic not in topics: print("topic does not exist in {}", checkpoint.checkpoint_id) continue changed = False for partition in range(num_partitions): tp = TopicAndPartition(topic, partition) if tp in kafka_seq: continue else: changed = True kafka_seq[tp] = 0 if changed: checkpoint.old_sequence = checkpoint.sequence checkpoint.sequence = kafka_seq_to_str(kafka_seq) checkpoint.save() for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=checkpoint.checkpoint_id, topic=topic_partition.topic, partition=topic_partition.partition, defaults={'offset': offset}) print("please restart the pillows")
def update_to(self, seq): if isinstance(seq, six.string_types): kafka_seq = str_to_kafka_seq(seq) else: kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info("(%s) setting checkpoint: %s" % (self.checkpoint_id, seq)) with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset})
def handle(self, doc_type, since, **kwargs): since = datetime.strptime(since, '%Y-%m-%d') if doc_type == 'form': pillows = get_form_es_pillows() elif doc_type == 'case': pillows = get_case_es_pillows() else: print("Unknown doc type {}. Specify form or case doc-type".format( doc_type)) return for pillow in pillows: print("Processing for pillow {}".format(pillow.pillow_id)) try: checkpoint = HistoricalPillowCheckpoint.objects.get( date_updated=since, checkpoint_id=pillow.checkpoint.checkpoint_id) except HistoricalPillowCheckpoint.DoesNotExist: print( "No HistoricalPillowCheckpoint data available for pillow {}\n" .format(pillow.pillow_id)) continue total_changes = 0 deleted_changes = 0 seq = str_to_kafka_seq(checkpoint.seq) es_processors = [ p for p in pillow.processors if isinstance(p, ElasticProcessor) ] for change in pillow.get_change_feed().iter_changes(since=seq, forever=False): total_changes += 1 if change.deleted and change.id: deleted_changes += 1 for processor in es_processors: processor.process_change(change) if total_changes % 100 == 0: print( "Processed {} deletes out of total {} changes for pillow {}\n" .format(deleted_changes, total_changes, pillow.pillow_id)) print("Finished processing all deletes sucessfully!")
def update_to(self, seq, change=None): if isinstance(seq, six.string_types): kafka_seq = str_to_kafka_seq(seq) else: kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) doc_modification_time = change.metadata.publish_timestamp if change else None with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset, 'doc_modification_time': doc_modification_time} )
def update_to(self, seq, change=None): if isinstance(seq, str): kafka_seq = str_to_kafka_seq(seq) else: kafka_seq = seq seq = kafka_seq_to_str(seq) pillow_logging.info( "(%s) setting checkpoint: %s" % (self.checkpoint_id, seq) ) doc_modification_time = change.metadata.publish_timestamp if change else None with transaction.atomic(): if kafka_seq: for topic_partition, offset in kafka_seq.items(): KafkaCheckpoint.objects.update_or_create( checkpoint_id=self.checkpoint_id, topic=topic_partition[0], partition=topic_partition[1], defaults={'offset': offset, 'doc_modification_time': doc_modification_time} )
def handle(self, **options): date = options['date'] pillow_args = set(options['pillows'] or []) if not pillow_args and not confirm('Reset checkpoints ALL pillows?'): raise CommandError('Abort') def _pillow_match(pillow_id): return (pillow_id in pillow_args or any( re.match(arg, pillow_id, re.IGNORECASE) for arg in pillow_args)) all_pillows = get_all_pillow_instances() if not pillow_args: pillows = all_pillows else: pillows = [ pillow for pillow in all_pillows if _pillow_match(pillow.pillow_id) ] if not pillows: raise CommandError('No pillows match: {}'.format( options['pillows'])) if not confirm('Update checkpoints for {}?'.format('\n '.join( p.pillow_id for p in pillows))): raise CommandError('abort') for pillow in pillows: checkpoint = pillow.checkpoint historical_checkpoint = HistoricalPillowCheckpoint.objects.filter( checkpoint_id=checkpoint.checkpoint_id, date_updated__lt=date).first() if not historical_checkpoint: print( self.style.ERROR( 'No historical checkpoints for {} before {}'.format( checkpoint.checkpoint_id, date))) continue old_seq = pillow.get_last_checkpoint_sequence() new_seq = historical_checkpoint.seq if checkpoint.sequence_format == 'json' and isinstance( old_seq, dict): new_seq = str_to_kafka_seq(new_seq) diff = ('\n'.join( difflib.ndiff( pprint.pformat(old_seq).splitlines(), pprint.pformat(new_seq).splitlines()))) else: diff = 'from: {}\nto : {}'.format(old_seq, new_seq) pillow_id = pillow.pillow_id if old_seq == new_seq: print('Sequences for {} are identical, moving on.'.format( pillow_id)) continue if confirm( "\nReset checkpoint for '{}' pillow to sequence from {}:\n\n{}\n" .format(pillow_id, historical_checkpoint.date_updated, diff)): pillow.checkpoint.update_to(new_seq) print( self.style.SUCCESS( "Checkpoint for {} updated\n".format(pillow_id)))