def _finalize( self ): assert Frontend._finalize( self ) is None; rowids_for_ntile_bounds = []; for i in range( 1, 32 ): rowids_for_ntile_bounds.append( int( float(self._rowcount) * float(i)/32.0 ) ); ntile_bound_by_dim = {}; for i in self._dbs: ntile_bound_by_dim[ i ] = []; for ( i, db ) in self._dbs.items(): with db.iterator() as it: rowid = 0; for ( valid, ( xval, xcnt ) ) in enumerate( it ): xval = unpack( '>I', xval )[ 0 ] - (1<<31); xcnt = unpack( '>I', xcnt )[ 0 ]; rowid_old = rowid; rowid = rowid_old + xcnt; for bound in rowids_for_ntile_bounds: if rowid_old < bound <= rowid: ntile_bound_by_dim[ i ].append( xval ); self._state = ntile_bound_by_dim;
def _finalize( self ): assert Frontend._finalize( self ) is None; data = np.array( self._data ).T; rest = []; for i in range( 1, len(data)+1 ): rest.append( {i} ); while True: len_rest_before = len(rest); rest = self._merge( data, rest ); # print( rest ); if len( rest ) == len_rest_before: break; self._state = rest; if False: print( "-- CLUSTERS --" ); print( len(rest), rest ); if False: print( "-- INSIDE CORRELATIONS --" ); for dc in rest: print( dc ); for di in dc: for dj in dc: if abs(di) >= abs(dj): continue; corr = np.corrcoef( data[abs(di)-1], data[abs(dj)-1] )[ 0 ][ 1 ]; print( di, dj, corr ); if False: print( "-- OUTSIDE CORRELATIONS --" ); for ( i, dci ) in enumerate( rest ): ivals = self._cluster_val( data, dci ); for ( j, dcj ) in enumerate( rest ): if i >= j: continue; jvals = self._cluster_val( data, dcj ); corr = np.corrcoef( ivals, jvals )[ 0 ][ 1 ]; print( i, j, corr );
def _finalize( self ): assert Frontend._finalize( self ) is None; self._state = []; rest = set( range( 0, self._lenrow ) ); while rest: if len( rest ) >= 2: ( left, right, rest ) = self._split( rest ); self._state.append( left ); self._state.append( right ); else: self._state.append( rest ); rest = set(); if False: for ( cluster_id, cluster ) in enumerate( self._state ): print( "-->", cluster_id, cluster ); for i in cluster: a = { i }; b = cluster - a; print( " {:d} {:1.4f}".format( i, self._i_corr( a, b ) ) );
def _finalize( self ): assert Frontend._finalize( self ) is None; if False: print( "unique combinations = ", self._kdb.count() ); keyfmt = '>IIIII'; valfmt = '>Q'; c = self._kdb.cursor(); c.jump(); gt2 = 0; gt4 = 0; gt8 = 0; gt16 = 0; gt32 = 0; while True: r = c.get( True ); if not r: break; self._ldb.put( r[0], r[1] ); key = unpack( keyfmt, r[0] ); val = unpack( valfmt, r[1] )[ 0 ]; if val > 2: gt2 += 1; if val > 4: gt4 += 1; if val > 8: gt8 += 1; if val > 16: gt16 += 1; if val > 32: gt32 += 1; if False: print( gt2, gt4, gt8, gt16, gt32 ); self._ic = {}; for i in range( 0, self._lenrow ): self._ic[ i ] = self._get_info_content_by_dimension( i ); self._icbp = {}; for i in range( 0, self._lenrow ): for j in range( 0, self._lenrow ): if i >= j: continue; self._icbp[ (i,j) ] = self._get_info_content_by_pair( i, j ); self._state \ = { "ic": self._ic, "icbp": self._icbp, "c": self._len_c, "b": self._len_b, "x": self._len_x };