def update_record(self, old_cluster_id, new_cluster_id, message, updated_message): """ This function updates a message in all of the data structures and can change the cluster that this message is in""" old_m_enc = encode_message(message) new_m_enc = encode_message(updated_message) del self.clusters[old_cluster_id][self.clusters[old_cluster_id].index(old_m_enc)] del self.clusters_by_day[message.day][old_cluster_id][ self.clusters_by_day[message.day][old_cluster_id].index(old_m_enc)] self.clusters[new_cluster_id].append(encode_message(updated_message)) self.add_to_clusters_by_day(new_cluster_id, updated_message.day, new_m_enc)
def test_purge(self): """ Tests the purge functionality""" message1 = Message(0, 0, 0, "human:0") message2 = Message(15, 0, 1, "human:0") clusters = Clusters() clusters.add_messages([encode_message(message1)], 0) clusters.add_messages([encode_message(message2)], 0) clusters.purge(13) self.assertEqual(len(clusters), 2) clusters.purge(14) self.assertEqual(len(clusters), 1) clusters.purge(15) self.assertEqual(len(clusters), 0)
def test_add_message_to_cluster_new_cluster_run(self): """ Tests messages with mutually exclusive uids on the same day are scored lowly """ # make new old message clusters message = Message(0, 0, 0, "human:1") clusters = Clusters() clusters.add_messages([encode_message(message)], 0) # make new message new_message = Message(1, 0, 0, "human:1") # add message to clusters clusters.add_messages([encode_message(new_message)], 0) num_clusters = len(clusters) self.assertEqual(num_clusters, 2)
def update_records(self, update_messages): """ Updates old encounter messages with a new risk""" if not update_messages: return self # TODO: implement a 24-hour cycle of message updates and batch the update messages by this. grouped_update_messages = self.group_by_received_at(update_messages) # for each batch of update messages for received_at, update_messages in grouped_update_messages.items(): # for each update message in the batch for update_message in update_messages: old_message_dec = Message(update_message.uid, update_message.risk, update_message.day, update_message.unobs_id) old_message_enc = encode_message(old_message_dec) old_cluster = None # TODO: don't use the secret info when finding the message to update. Also, optimize this loop. # Find the matching message to update for cluster, messages in self.clusters_by_day[update_message.day].items(): for message in messages: if message == old_message_enc: old_cluster = cluster break if old_cluster: break # Create the a new encounter message with the update risk and replace the old encounter message updated_message = Message(old_message_dec.uid, update_message.new_risk, old_message_dec.day, old_message_dec.unobs_id) new_cluster = hash_to_cluster(updated_message) self.update_record(old_cluster, new_cluster, old_message_dec, updated_message) return self
def test_score_bad_match_same_day_run(self): """ Tests messages with mutually exclusive uids on the same day are scored lowly """ # uid, risk, day, time_received, true sender id current_day = 0 message1 = Message(0, 0, current_day, "human:0") message2 = Message(1, 0, current_day, "human:1") clusters = Clusters() clusters.add_messages([encode_message(message1)], current_day) best_cluster, best_message, best_score = clusters.score_matches(message2, current_day) self.assertEqual(best_score, -1) self.assertEqual(message1, best_message)
def test_score_bad_match_one_day_run(self): """ Tests messages with mutually exclusive uids seperated by a day are scored lowly """ # uid, risk, day, true sender id message1 = Message(0, 0, 0, "human:1") message2 = Message(6, 0, 1, "human:1") clusters = Clusters() clusters.add_messages([encode_message(message1)], 0) best_cluster, best_message, best_score = clusters.score_matches(message2, 1) self.assertEqual(best_cluster, 0) self.assertEqual(best_message, message1) self.assertEqual(best_score, -1)
def test_score_good_match_same_day_run(self): """ Tests messages with the same uids on the same day are scored highly """ # uid, risk, day, true sender id current_day = 0 message1 = Message(0, 0, current_day, "human:1") message2 = Message(0, 0, current_day, "human:1") clusters = Clusters() clusters.add_messages([encode_message(message1)], current_day) best_cluster, best_message, best_score = clusters.score_matches(message2, current_day) self.assertEqual(best_cluster, 0) self.assertEqual(best_message, message1) self.assertEqual(best_score, 3)
def test_score_good_match_one_day_run(self): """ Tests messages with similar uids on the different day are scored mediumly """ # uid, risk, day, true sender id current_day = 0 clusters = Clusters() message1 = Message(0, 0, 0, "human:1") clusters.add_messages([encode_message(message1)], current_day) message2 = Message(1, 0, 1, "human:1") best_cluster, best_message, best_score = clusters.score_matches(message2, 1) self.assertEqual(best_cluster, 0) self.assertEqual(best_message, message1) self.assertEqual(best_score, 2)