Пример #1
0
 def mine(self,tuple_size,num_tuples,weights=None,expand=False,table_size=2**19):
     if not weights and not expand:
         ldb=sa.sampledmh_mine(self.ldb,tuple_size,num_tuples,table_size)
     elif expand and not weights:
         max_freq=sa.mh_get_cumulative_frequency(self.ldb,expand.ldb)
         ldb_=sa.mh_expand_listdb(self.ldb,max_freq)
         ldb=sa.sampledmh_mine(ldb_,tuple_size,num_tuples,table_size)
     elif not expand and weights:
         ldb=sa.sampledmh_mine_weighted(self.ldb,ifindex, tuple_size,num_tuples,table_size,weights.weights)
     elif expand and weights:
         max_freq=sa.mh_get_cumulative_frequency(self.ldb,expand.ldb)
         ldb_=sa.mh_expand_listdb(self.ldb, max_freq)
         weights_=sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights)
         ldb=sa.sampledmh_mine_weighted(ldb_,tuple_size,num_tuples,table_size,weights_)
         
     return SMH(ldb=ldb)
Пример #2
0
    def mine(self,
             listdb,
             weights = None,
             expand = None):
        """
        samples inverted file to mine sets of highly co-occurring items
        """
        if not weights and not expand:
            mined = sa.sampledmh_mine(listdb.ldb,
                                      self.tuple_size_,
                                      self.number_of_tuples_,
                                      self.table_size_,
                                      self.min_set_size_)
        elif expand and not weights:
            max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb)
            ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq)
            mined = sa.sampledmh_mine(ldb_,
                                      self.tuple_size_,
                                      self.number_of_tuples_,
                                      self.table_size_,
                                      self.min_set_size_)
            sa.listdb_destroy(ldb_)
            
        elif not expand and weights:
            mined = sa.sampledmh_mine_weighted(listdb.ldb,
                                               self.tuple_size_,
                                               self.number_of_tuples_,
                                               self.table_size_,
                                               weights.weights,
                                               self.min_set_size_)
        elif expand and weights:
            max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb)
            ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq)
            weights_ = sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights)
            mined = sa.sampledmh_mine_weighted(ldb_,
                                               self.tuple_size_,
                                               self.number_of_tuples_,
                                               self.table_size_,
                                               weights_,
                                               self.min_set_size_)
            sa.listdb_destroy(ldb_)

        sa.listdb_delete_smallest(mined, self.min_set_size_)

        return ListDB(ldb = mined)
Пример #3
0
    def mine(self,
             listdb,
             weights = None,
             expand = None):
        """
        samples nverted file to mine sets of highly co-occurring items
        """
        if not weights and not expand:
            mined = sa.sampledmh_mine(listdb.ldb,
                                      self.tuple_size_,
                                      self.number_of_tuples_,
                                      self.table_size_,
                                      self.min_set_size_)
        elif expand and not weights:
            max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb)
            ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq)
            mined = sa.sampledmh_mine(ldb_,
                                      self.tuple_size_,
                                      self.number_of_tuples_,
                                      self.table_size_,
                                      self.min_set_size_)
            sa.listdb_destroy(ldb_)
            
        elif not expand and weights:
            mined = sa.sampledmh_mine_weighted(listdb.ldb,
                                               self.tuple_size_,
                                               self.number_of_tuples_,
                                               self.table_size_,
                                               weights.weights,
                                               self.min_set_size_)
        elif expand and weights:
            max_freq = sa.mh_get_cumulative_frequency(listdb.ldb, expand.ldb)
            ldb_ = sa.mh_expand_listdb(listdb.ldb, max_freq)
            weights_ = sa.mh_expand_weights(expand.ldb.size, max_freq, weights.weights)
            mined = sa.sampledmh_mine_weighted(ldb_,
                                               self.tuple_size_,
                                               self.number_of_tuples_,
                                               self.table_size_,
                                               weights_,
                                               self.min_set_size_)
            sa.listdb_destroy(ldb_)

        sa.listdb_delete_smallest(mined, self.min_set_size_)

        return ListDB(ldb = mined)
Пример #4
0
 def invert(self):
     ldb=sa.sampledmh_mine(self.ldb)
     ldb._inverted=True
     ldb._original=self
     return SMH(ldb=ldb)