Пример #1
0
 def generate_Lk(min_sup_src, L_k1, C_k, k):
     for key, hash_item_collection in L_k1.get_items():
         for index in range(hash_item_collection.size() - 1):
             new_key = ''
             index_th_item = hash_item_collection.get_item(index)
             
             if key == '':
                 new_key = index_th_item.last_item
             else:
                 new_key = key +',' + index_th_item.last_item
             new_hash_collection = HashItemCollection()
             
             #check if it is infrequent item-set
             for item in hash_item_collection.get_items_from(index + 1):
                 
                 '''
                 Create new itemset and check its support
                 '''
                 new_item = HashItem(item.last_item)
                 inter_items = set(index_th_item.tids).intersection(item.tids)      
                 if len(inter_items) >= min_sup_src:  
                     new_item.add_tids(list(inter_items))
                     new_hash_collection.add_item(new_item)
                     
             '''
             Add the new itemsets into next level if there's any.
             '''
             if new_hash_collection.size() > 0:        
                 C_k.insert(new_key,  new_hash_collection) 
Пример #2
0
 def deserialize(self, file_name, reset_table=True):
     if reset_table == True:
         self.table = {}
     with open(file_name, "r") as text_file:
         k = 0
         collection_key = None
         for line in text_file:
             if k % 2 == 0:
                 collection_key = line.strip()
             else:
                 collection = HashItemCollection()
                 collection.deserialize(line.strip())
                 self.table[collection_key] = collection
             k = k + 1
Пример #3
0
 def generate_Lk_vw(min_sup, L_k1, C_k_file, k):
     print('generate candidates with ' + str(k) + ' items')
     file_writer = open(C_k_file, 'w') 
     for key, hash_item_collection in L_k1.get_items():
         for index in range(hash_item_collection.size() - 1):
             
             index_th_item = hash_item_collection.get_item(index)
             new_key = ''
             if key == '':
                 new_key = index_th_item.last_item
             else:
                 new_key = key +',' + index_th_item.last_item
             new_hash_collection = HashItemCollection()
             
             #check if it is infrequent item-set
             for item in hash_item_collection.get_items_from(index + 1):
                 new_item = HashItem(item.last_item)
                 inter_items = set(index_th_item.tids).intersection(item.tids)      
                 if len(inter_items) >= min_sup:  
                     new_item.add_tids(list(inter_items))
                     new_hash_collection.add_item(new_item)
                     
             if new_hash_collection.size() > 0:  
                 file_writer.write(new_key)
                 file_writer.write('\n')
                 file_writer.write(new_hash_collection.serialize())      
                 file_writer.write('\n')
     file_writer.close()
Пример #4
0
    def generate_Lk_w(min_sup_src, L_k1, C_k_file, k, inclusive_items_dict):
        #print('generate candidates with ' + str(k) + ' items')
        file_writer = open(C_k_file, 'w')
        for key, hash_item_collection in L_k1.get_items():
            for index in range(hash_item_collection.size() - 1):

                index_th_item = hash_item_collection.get_item(index)
                new_key = ''
                if key == '':
                    new_key = index_th_item.last_item
                else:
                    new_key = key + ',' + index_th_item.last_item
                new_hash_collection = HashItemCollection()

                #check if it is infrequent item-set
                previous_itemset = string_2_itemset(new_key)
                for item in hash_item_collection.get_items_from(index + 1):
                    '''
                    Check if the itemset contains any inclusive pair of items.
                    '''
                    if Apriori.checkInclusiveItems(previous_itemset,
                                                   item.last_item,
                                                   inclusive_items_dict):
                        continue
                    '''
                    Create new itemset and check its support
                    '''
                    new_item = HashItem(item.last_item)
                    inter_items = set(index_th_item.tids).intersection(
                        item.tids)
                    if len(inter_items) >= min_sup_src:
                        new_item.add_tids(list(inter_items))
                        new_hash_collection.add_item(new_item)
                '''
                Write the new itemsets into file if there's any.
                '''
                if new_hash_collection.size() > 0:
                    file_writer.write(new_key)
                    file_writer.write('\n')
                    file_writer.write(new_hash_collection.serialize())
                    file_writer.write('\n')
        file_writer.close()
Пример #5
0
 def insert_key(self, key):
     self.table[key] = HashItemCollection()