def main(): if True: os.chdir(constants.BENCH_BASE_DIR) for i in range(30): try: runCommand('hg pull -u > hgupdate.log') except RuntimeError: message(' retry...') time.sleep(60.0) else: s = open('hgupdate.log', 'r').read() if s.find('not updating') != -1: raise RuntimeError('hg did not update: %s' % s) else: break else: raise RuntimeError('failed to run hg pull -u') os.chdir('%s/%s' % (constants.BASE_DIR, NIGHTLY_DIR)) runCommand('svn cleanup') open('update.log', 'ab').write('\n\n[%s]: update' % datetime.datetime.now()) for i in range(30): try: runCommand('svn update > update.log 2>&1') except RuntimeError: message(' retry...') time.sleep(60.0) else: svnRev = int( reSVNRev.search(open('update.log', 'rb').read()).group(1)) print 'SVN rev is %s' % svnRev break else: raise RuntimeError('svn update failed') runCommand('%s clean > clean.log 2>&1' % constants.ANT_EXE) runCommand('%s compile > compile.log 2>&1' % constants.ANT_EXE) MEDIUM_LINE_FILE = constants.NIGHTLY_MEDIUM_LINE_FILE MEDIUM_INDEX_NUM_DOCS = constants.NIGHTLY_MEDIUM_INDEX_NUM_DOCS mediumSource = competition.Data('wikimedium', MEDIUM_LINE_FILE, MEDIUM_INDEX_NUM_DOCS, constants.WIKI_MEDIUM_TASKS_FILE) comp = competition.Competition() index = comp.newIndex(NIGHTLY_DIR, mediumSource) c = comp.competitor(id, NIGHTLY_DIR, index=index) r = benchUtil.RunAlgs(constants.JAVA_COMMAND, True) r.compile(c)
comp = competition.Competition(randomSeed=0) index = comp.newIndex( constants.TRUNK_CHECKOUT, sourceData, postingsFormat='Lucene90', idFieldPostingsFormat='Memory', grouping=False, doDeletions=False, addDVFields=True, ) c = competition.Competitor('base', constants.TRUNK_CHECKOUT) r = benchUtil.RunAlgs(constants.JAVA_COMMAND, False, False) r.compile(c) r.makeIndex(c.name, index, False) cp = '%s' % r.classPathToString(benchUtil.getClassPath(c.checkout)) fip = '%s/index' % benchUtil.nameToIndexPath(index.getName()) modes = benchUtil.getArg('-mode', 'update', True) docsPerSec = benchUtil.getArg('-dps', '1', True) reopenPerSec = benchUtil.getArg('-rps', '0.2', True) runTimeSec = benchUtil.getArg('-rts', 60, True) numSearchThreads = benchUtil.getArg('-nst', 1, True) # default to 1 search thread numIndexThreads = benchUtil.getArg('-nit', constants.INDEX_NUM_THREADS, True) for mode in modes.split(','):
def run(id, base, challenger, coldRun=False, doCharts=False, search=False, index=False, verifyScores=True, verifyCounts=True, taskPatterns=None, randomSeed=None, requireOverlap=1.0): competitors = [challenger, base] if randomSeed is None: raise RuntimeError('missing randomSeed') #verifyScores = False r = benchUtil.RunAlgs(constants.JAVA_COMMAND, verifyScores, verifyCounts) if '-noc' not in sys.argv: print() print('Compile:') for c in competitors: r.compile(c) if not search: search = '-search' in sys.argv if not index: index = '-index' in sys.argv sum = search or '-sum' in sys.argv if index: seen = set() indexSegCount = None indexCommit = None p = False tasksFile = None for c in competitors: if tasksFile is None: tasksFile = c.tasksFile elif tasksFile != c.tasksFile: raise RuntimeError('inconsistent taskFile %s vs %s' % (taskFile, c.taskFile)) if c.index not in seen: if not p: print() print('Create indices:') p = True seen.add(c.index) r.makeIndex(id, c.index, doCharts) segCount = benchUtil.getSegmentCount( benchUtil.nameToIndexPath(c.index.getName())) if indexSegCount is None: indexSegCount = segCount indexCommit = c.commitPoint elif indexCommit == c.commitPoint and indexSegCount != segCount: raise RuntimeError( 'segment counts differ across indices: %s vs %s' % (indexSegCount, segCount)) logUpto = 0 if search: if taskPatterns != (None, None): pos, neg = taskPatterns if pos is None: if neg is None: print(' tasks file: %s' % tasksFile) else: print(' tasks file: NOT %s from %s' % (','.join(neg), tasksFile)) elif neg is None: print(' tasks file: %s from %s' % (','.join(pos), tasksFile)) else: print(' tasks file: %s, NOT %s from %s' % (','.join(pos), ','.join(neg), tasksFile)) newTasksFile = '%s/%s.tasks' % (constants.BENCH_BASE_DIR, os.getpid()) pos, neg = taskPatterns if pos is None: posPatterns = None else: posPatterns = [re.compile(x) for x in pos] if neg is None: negPatterns = None else: negPatterns = [re.compile(x) for x in neg] f = open(c.tasksFile) fOut = open(newTasksFile, 'wb') for l in f.readlines(): i = l.find(':') if i != -1: cat = l[:i] if posPatterns is not None: for p in posPatterns: if p.search(cat) is not None: #print 'KEEP: match on %s' % cat break else: continue if negPatterns is not None: skip = False for p in negPatterns: if p.search(cat) is not None: skip = True #print 'SKIP: match on %s' % cat break if skip: continue if PYTHON_MAJOR_VER < 3: fOut.write(l) else: fOut.write(l.encode('utf-8')) f.close() fOut.close() for c in competitors: c.tasksFile = newTasksFile else: print(' tasks file: %s' % c.tasksFile) newTasksFile = None try: results = {} if constants.JAVA_COMMAND.find(' -ea') != -1: print() print( 'WARNING: *** assertions are enabled *** JAVA_COMMAND=%s' % constants.JAVA_COMMAND) print() print() print('Search:') taskFiles = {} rand = random.Random(randomSeed) staticSeed = rand.randint(-10000000, 1000000) # Remove old log files: for c in competitors: for fileName in r.getSearchLogFiles(id, c): if os.path.exists(fileName): os.remove(fileName) for iter in range(base.competition.jvmCount): print(' iter %d' % iter) seed = rand.randint(-10000000, 1000000) for c in competitors: print(' %s:' % c.name) t0 = time.time() if c not in results: results[c] = [] logFile = r.runSimpleSearchBench(iter, id, c, coldRun, seed, staticSeed, filter=None, taskPatterns=taskPatterns) results[c].append(logFile) print() print('Report after iter %d:' % iter) #print ' results: %s' % results details, cmpDiffs, cmpHeap = r.simpleReport( results[base], results[challenger], '-jira' in sys.argv, '-html' in sys.argv, cmpDesc=challenger.name, baseDesc=base.name) if cmpDiffs is not None: if cmpDiffs[1]: raise RuntimeError('errors occurred: %s' % str(cmpDiffs)) if cmpDiffs[2] < requireOverlap: raise RuntimeError('results differ: %s' % str(cmpDiffs)) finally: if newTasksFile is not None and os.path.exists(newTasksFile): os.remove(newTasksFile) # TODO: maybe print this after each iter, not just in the end, for the impatient/progressive? for mode in 'cpu', 'heap': for c in competitors: print(f'\n{mode.upper()} merged search profile for {c.name}:') print(c.getAggregateProfilerResult(id, mode)[0][1]) else: results = {} for c in competitors: results[c] = r.getSearchLogFiles(id, c) details, cmpDiffs, cmpHeap = r.simpleReport(results[base], results[challenger], '-jira' in sys.argv, '-html' in sys.argv, cmpDesc=challenger.name, baseDesc=base.name) if cmpDiffs is not None: raise RuntimeError('results differ: %s' % str(cmpDiffs))
# MAX_BUFFERED_DOCS = 49774 # INDEXING_BUFFER_MB = -1 MAX_BUFFERED_DOCS = 5000 INDEXING_BUFFER_MB = -1 BODY_FIELD_TERM_VECTORS = True # DOC_COUNT = 27625038 # DOC_COUNT = 100000 DOC_COUNT = 10000000 print('Compile luceneutil and %s/%s...' % (constants.BASE_DIR, LUCENE_TRUNK_ROOT)) r = benchUtil.RunAlgs(JAVA_CMD, False) c = competition.Competitor('foo', LUCENE_TRUNK_ROOT) c.compile(r.classPathToString(r.getClassPath(c.checkout))) while True: print print('%s: create index' % datetime.datetime.now()) shutil.rmtree(INDEX_PATH) cmd = '%s -classpath "ROOT/lucene/build/core/classes/java:ROOT/lucene/build/core/classes/test:ROOT/lucene/build/sandbox/classes/java:ROOT/lucene/build/misc/classes/java:ROOT/lucene/build/facet/classes/java:/home/mike/src/lucene-c-boost/dist/luceneCBoost-SNAPSHOT.jar:ROOT/lucene/build/analysis/common/classes/java:ROOT/lucene/build/analysis/icu/classes/java:ROOT/lucene/build/queryparser/classes/java:ROOT/lucene/build/grouping/classes/java:ROOT/lucene/build/suggest/classes/java:ROOT/lucene/build/highlighter/classes/java:ROOT/lucene/build/codecs/classes/java:ROOT/lucene/build/queries/classes/java:lib/HdrHistogram.jar:build" perf.Indexer -dirImpl MMapDirectory -indexPath "%s" -analyzer StandardAnalyzerNoStopWords -lineDocsFile %s -docCountLimit %s -threadCount %d -maxConcurrentMerges 3 -dvfields -ramBufferMB %s -maxBufferedDocs %d -postingsFormat Lucene50 -waitForMerges -mergePolicy LogDocMergePolicy -facets Date -facetDVFormat Lucene50 -idFieldPostingsFormat Memory'.replace( 'ROOT', '%s/%s' % (constants.BASE_DIR, LUCENE_TRUNK_ROOT)) % ( JAVA_CMD, INDEX_PATH, LINE_DOCS_FILE, DOC_COUNT, INDEX_THREADS, INDEXING_BUFFER_MB, MAX_BUFFERED_DOCS) if USE_CMS: