def dumpData(self):

            print("Start logging data to tmp folder")    
            # ClientState
            # utilities.dump_data_to_file(self.padding_ds,"tmp", "paddingset")
            utilities.dump_data_to_file(self.getClientState(), "tmp", "client")
            self.cached_data_clusters.getAllLocalCacheWithoutLock()
            # utilities.dump_data_to_file(self.cached_data_clusters.getAllLocalCacheWithoutLock(), "tmp", "cache")
            utilities.dump_data_to_file(self.keywords_tracking, "tmp", "keywords_tracking")
            
            # print out 
            # throughput, bogus pairs, local cache pairs and cache size, 
            # total pairs in edb, and average result length in paddedDB
            
            print("Throughput " + str(self.accumulated_throughput))
            print("Batch count " + str(self.batch_count)) 
            print("Average batch processing time (ms) " + str((self.total_time_pad_enc/self.batch_count )*1000))
            print("Bogus pairs " + str(self.accumulated_bogus))
            # content = self.cached_data_clusters.getClusterSizeAllWithoutLock()
            # print("Cache pairs " + str(content))
            # print("Cache (mb) " + str(os.path.getsize(os.path.join("tmp", "cache"))/(1024*1024.0)))
            
            print("EDB size " + str(utilities.monitor_dict_size(self.keywords_tracking)))
            print("EDB size (mb) " + str(utilities.get_size("shield.db") / (1024 * 1024.0)))        
            print("Avg result length " + str(utilities.monitor_dict_size(self.keywords_tracking) / len(self.keywords_tracking)))
Exemplo n.º 2
0
def generatePaddingData(clusters_keywords_props, *kwargs):
    #format [[('a',0.23),('b',0.2),...], [], [], [], []  ]

    max_bogus_size = kwargs[0]['max_bogus_size']
    file_count = [(100000 + i) for i in range(1, max_bogus_size + 1)]
    padding_ds = {}

    count = 0

    for cluster in clusters_keywords_props:
        count += 1
        print("Processing cluster " + str(count) + "/" +
              str(len(clusters_keywords_props)))

        for keyword_prop in cluster:
            keyword = keyword_prop[0]
            probability = keyword_prop[1]

            bogus_file_no = int(probability * max_bogus_size *
                                1500)  #800 is average document/file

            if bogus_file_no < max_bogus_size / 2:
                bogus_file_no = int(max_bogus_size / 2)

            random.shuffle(file_count)

            bogus_ids = file_count[0:bogus_file_no]
            padding_ds[keyword] = bogus_ids

    #dump to file
    if kwargs[0]['writable'] == True:
        utilities.dump_data_to_file(padding_ds, kwargs[0]['dir'],
                                    kwargs[0]['file_name'])
 def getAllLocalCacheWithoutLock(self):
     self.lock.acquire()
     img = copy.deepcopy(self.cached_data_clusters)
     self.lock.release()
     utilities.dump_data_to_file(img, "tmp", "cache")
     content = self.getClusterSizeAllWithoutLock()
     print("Cache pairs " + str(content))
     print("Cache (mb) " + str(os.path.getsize(os.path.join("tmp", "cache"))/(1024*1024.0)))            
Exemplo n.º 4
0
    def run(self):
        
        try:
            self.service_connector.open_connection()
            n_query_number= int(0.1 * len(self.keywords_tracking))   
            print("No. keywords " + str(len(self.keywords_tracking)))
            #serving counting attack
            access_patterns = []
                        
            #total ids
            #total_ids = 0
                             
            #select keywords, #note search token to perform search later
            n_query_keywords = random.sample(self.keywords_tracking.keys(),n_query_number)
                
            query_time = 0
            for query_keyword in n_query_keywords:
                start_time = timer() # in seconds
                
                search_token= self.sse_client.generateToken(query_keyword)
                encrypted_IDs = self.service_connector.search_connect(search_token)
                if encrypted_IDs is not None:
                    raw_ids = self.sse_client.decryptIDs(encrypted_IDs)
                else:
                    print("Keyword " + query_keyword +" is not tracked")
                _ = self.search_local_cache(query_keyword)
                
                end_time = timer() 
                
                access_patterns.append(raw_ids)
                #total_ids += len(encrypted_IDs)
                
                query_time += (end_time - start_time)
                
            avg_cur_trial = (query_time/n_query_number)*1000
            
            print("Avg search time (ms) per keyword: " + str(avg_cur_trial))
            #print("Total ids " + str(total_ids))
            #write search data to file

            
            utilities.dump_data_to_file(n_query_keywords, "tmp", "query")
            utilities.dump_data_to_file(access_patterns,"tmp", "access")
        #except:
        #    print("Search exception")
        finally:
            self.service_connector.close_connection()
 def generatePaddingData(self, clusters_keywords_props, max_bogus_size=550000, **kwargs):
     # format [[('a',0.23),('b',0.2),...], [], [], [], []  ]
     
     file_count = [(100000 + i) for i in range(1, max_bogus_size + 1)]
     self.padding_ds = {}
         
     for cluster in clusters_keywords_props:
         for keyword_prop in cluster:
             keyword = keyword_prop[0]
             probability = keyword_prop[1]
             
             bogus_file_no = int(probability * max_bogus_size * 100)  # 100 is average document/file
             
             random.shuffle(file_count)
         
             bogus_ids = file_count[0:bogus_file_no]
             self.padding_ds[keyword] = bogus_ids
             
     # dump to file
     if kwargs['writable'] == True:
         utilities.dump_data_to_file(self.padding_ds, kwargs['dir'], kwargs['file_name'])
Exemplo n.º 6
0
    def exportClusters(self, datadir, cluster_keyword_file, cluster_prob_file):

        utilities.dump_data_to_file(self.clusters_keywords, datadir,
                                    cluster_keyword_file)
        utilities.dump_data_to_file(self.cluster_probabilities, datadir,
                                    cluster_prob_file)
Exemplo n.º 7
0
    def run(self):

        cur_cluster_real_ids = dict(
        )  # this dictionary contains real keywords that we later pad them to transfer to server
        outsourced_key_count = 0
        padding_count = 0

        self.service_connector.open_connection()
        ######## test
        total_ids = 0
        temp_total_search_time = 0.0
        for queried_keyword in self.queried_keywords:

            start_temp_search_time = timer()
            search_token = self.sse_client.generateToken(queried_keyword)
            if search_token is not None:
                encrypted_IDs = self.service_connector.search_connect(
                    search_token)
                # and time to decryptID as well
                _ = self.sse_client.decryptIDs(
                    encrypted_IDs)  # considered as access patterns.
                total_ids += len(encrypted_IDs)

            #FARE COMPARE: NOT INCLUDE LOCAL SEARCH
            #consider the time it search in local cache
            #_ = self.search_local_cache(queried_keyword)
            end_temp_search_time = timer()
            temp_total_search_time += (end_temp_search_time -
                                       start_temp_search_time)

        print(">>>GROUND TEST searching queried keywords (ms) " +
              str((temp_total_search_time / len(self.queried_keywords)) *
                  1000))
        print(">>>Total ids " + str(total_ids))

        self.service_connector.close_connection()

        ##########     ending test

        print(
            "> Phase 1: Query back and deletion at DB, add back to padding dataset"
        )
        self.service_connector.open_connection()

        start_time = timer()

        for keyword in self.rebuild_keywords:
            search_token = self.sse_client.generateToken(keyword)
            if search_token is not None:
                encrypted_IDs = self.service_connector.search_del_connect(
                    search_token)
                raw_ids = self.sse_client.decryptIDs(encrypted_IDs)

                # update calculate
                outsourced_key_count += 1
                #**** we need to reset tracking as latter we also add them again with different numbers of. of real and Padding
                self.keywords_tracking[keyword] = 0

                # we delete in client states
                self.sse_client.deleteState(keyword)
                # filter real document
                (real_ids,
                 bogus_ids) = utilities.split_real_documents(raw_ids, 100000)

                padding_count += len(bogus_ids)
                if keyword in self.padding_dataset:
                    cur_padding = self.padding_dataset[keyword]
                    mergedlist = list(set(cur_padding + bogus_ids))
                    self.padding_dataset[keyword] = mergedlist

                # we keep track raw_IDS for later re-encryption
                cur_cluster_real_ids[keyword] = real_ids

        end_time = timer()

        self.service_connector.close_connection()

        #print("updated keyword state " + str(len(self.sse_client.getKeywordState())))

        # rebuild time in phase 1 includes
        # * query time + delete in server and client state + filter and put back in padding data set + reset tracking

        print(">>> rebuild time (ms) phase 1st " +
              str((end_time - start_time) * 1000))
        print(">>> no. outsourced keys of the cluster " +
              str(outsourced_key_count))
        print(">>> no. of original keys of the cluster " +
              str(len(self.rebuild_keywords)))
        print(">>> no. real downloaded ids " +
              str(utilities.monitor_len_dataset(cur_cluster_real_ids)))
        print(">>> no. bogus downloaded ids " + str(padding_count))

        # we -re test the query keywords

        self.service_connector.open_connection()

        #how often these keywords occur in the query test
        count_in_query = len(
            set(self.rebuild_keywords).intersection(set(
                self.queried_keywords)))

        total_ids = 0
        temp_total_search_time = 0.0
        for queried_keyword in self.queried_keywords:

            start_temp_search_time = timer()
            search_token = self.sse_client.generateToken(queried_keyword)
            if search_token is not None:
                encrypted_IDs = self.service_connector.search_connect(
                    search_token)
                # and time to decryptID as well
                _ = self.sse_client.decryptIDs(
                    encrypted_IDs)  # considered as access patterns.
                total_ids += len(encrypted_IDs)

            #consider the time it search in local cache
            #_ = self.search_local_cache(queried_keyword)
            end_temp_search_time = timer()
            temp_total_search_time += (end_temp_search_time -
                                       start_temp_search_time)

        self.service_connector.close_connection()

        print(">>> no. of queried keywords " + str(len(self.queried_keywords)))
        print(">>> rebuild keywords in queried set " + str(count_in_query))
        print(">>> re-test searching queried keywords (ms) " +
              str((temp_total_search_time / len(self.queried_keywords)) *
                  1000))
        print(">>> total ids " + str(total_ids))

        print(
            "> Phase 2: Pad the downloaded keyword ids using high mode, and outsource encrypted data to server"
        )

        padding_count = 0
        self.service_connector.open_connection()

        start_re_encrypt_time = timer()

        max_length = 0
        for _, value in cur_cluster_real_ids.items():
            if max_length < len(value):
                max_length = len(value)

        #finalise the padding bogus for each keyword
        for keyword, value in cur_cluster_real_ids.items():
            bogus_count = max_length - len(value)
            padding_count += bogus_count

            if bogus_count > 0:
                #select random bogus and add to the value
                random.shuffle(self.padding_dataset[keyword])
                bogus_files = self.padding_dataset[keyword][0:bogus_count]

                #update the list
                value.update(bogus_files)
                #update the padding dataset
                self.padding_dataset[keyword] = self.padding_dataset[keyword][
                    bogus_count:]

                #reset tracking
                self.keywords_tracking[keyword] = len(value)

        #encrypt and transfer to server
        encrypted_batch = self.sse_client.streaming(cur_cluster_real_ids)
        #transfer to server
        self.service_connector.streaming_connect(encrypted_batch)

        end_re_encrypt_time = timer()

        self.service_connector.close_connection()

        print(">>> Re-encrypt time (ms) " +
              str((end_re_encrypt_time - start_re_encrypt_time) * 1000))
        print(">>> Bogus used " + str(padding_count))

        print("> Phase 3: Re-search queried keywords")

        self.service_connector.open_connection()

        total_ids = 0
        temp_total_search_time = 0.0
        for queried_keyword in self.queried_keywords:

            start_temp_search_time = timer()
            search_token = self.sse_client.generateToken(queried_keyword)
            if search_token is not None:
                encrypted_IDs = self.service_connector.search_connect(
                    search_token)
                # and time to decryptID as well
                _ = self.sse_client.decryptIDs(
                    encrypted_IDs)  # considered as access patterns.
                total_ids += len(encrypted_IDs)

            #consider the time it search in local cache
            #_ = self.search_local_cache(queried_keyword)
            end_temp_search_time = timer()
            temp_total_search_time += (end_temp_search_time -
                                       start_temp_search_time)
        print(">>>re-test searching queried keywords (ms) " +
              str((temp_total_search_time / len(self.queried_keywords)) *
                  1000))
        print(">>> total ids " + str(total_ids))

        self.service_connector.close_connection()

        #over write self.keywords_tracking
        #over write self.padding dataset
        #over write client state
        utilities.dump_data_to_file(self.padding_dataset, "tmp", "paddingset")
        utilities.dump_data_to_file(self.sse_client.exportClientState(), "tmp",
                                    "client")
        utilities.dump_data_to_file(self.keywords_tracking, "tmp",
                                    "keywords_tracking")