def createCliqueFinder(self, text, *args): context, argsIVs, vdm = self.createCliqueFinderContext(text, *args) vdm.unloadAllPossible() context.resume() return FORANative.CliqueFinder(context)
def disable_big_lm(self): rows = 20 cols = 20 passes = 3 def shouldDrop(row, col): return col > 1 or row < 20 randomSeed = 1 pageCounts = {} vecText = """ Vector.range(%s, fun(colIx) { Vector.range(%s, { Vector.range(1000).paged}).sum() }) """ % (cols, rows) context, argIVs, vdm = self.createCliqueFinderContext( """fun(v) { math.regression.linear.computeXTX(dataframe.DataFrame(v), splitLimit:100) }""", vecText) vecOfVecs = argIVs[0] droppedPages = set() pageCoordinates = {} coordsToPage = {} for col in range(len(vecOfVecs)): vec = vecOfVecs[col] for row, vdid in enumerate( vec.getVectorDataIdsForSlice(0, len(vec), vdm)): pageCoordinates[vdid.page] = (row, col) coordsToPage[(row, col)] = vdid.page rows = max(rows, row + 1) for col in range(cols): vec = vecOfVecs[col] for row, vdid in enumerate( vec.getVectorDataIdsForSlice(0, len(vec), vdm)): if shouldDrop(row, col): vdm.dropPageWithoutWritingToDisk(vdid.page) droppedPages.add(vdid.page) print "dropped ", len(droppedPages), " pages" context.resume() for passIx in range(3): def inc(d, e): if e not in d: d[e] = 0 d[e] += 1 for index in range(passes): t0 = time.time() added = 0 while time.time() - t0 < 2.0: randomSeed += 1 cliqueFinder = FORANative.CliqueFinder(context) task = cliqueFinder.getRootTask() cliqueFinder.searchFromTopOfTreeReturningCliquesCreated( time.time() + 2.0, randomSeed) for n in task.cliques(): added += 1 for p in n: inc(pageCounts, p) print "ix: %s. cliques: %s" % (index, added) pageCountsSorted = sorted(list(pageCounts.values())) c1 = pageCountsSorted[len(pageCountsSorted) / 4] c2 = pageCountsSorted[len(pageCountsSorted) * 2 / 4] c3 = pageCountsSorted[len(pageCountsSorted) * 3 / 4] if c2 <= c1: c2 = c1 + 1 if c3 <= c2: c3 = c2 + 1 print "rows: ", rows print "cols: ", cols print "passes: ", passes print for row in range(rows): print "row %3s: " % row, for col in range(cols): if (row, col) in coordsToPage: p = coordsToPage[(row, col)] if p not in pageCounts or pageCounts[p] == 0: sym = ' ' elif pageCounts[p] < c1: sym = '.' elif pageCounts[p] < c2: sym = '-' elif pageCounts[p] < c3: sym = '*' else: sym = '#' else: sym = '?' print sym, print print print print print print "Total pages mentioned: ", len(pageCounts), " of ", len( pageCoordinates) self.assertTrue(len(pageCounts) * 2 > len(pageCoordinates))