def do_calibration( device, baud, name, cal, base, numer, denom ): if isnan( cal ): cal = base with Emon( device, baud ) as emon: emon.set( name, cal ) tmp = 25 while True: infomsg( name, "=", cal ) dmean, derr = 1, 1 while threshold < derr / dmean: sample = tmp infomsg( "collecting %d denominator samples" % sample, "@", datetime.now() ) dmean, derr = denom( sample ) infomsg( "sample =", sample, "dmean =", dmean, "derr =", derr ) tmp = int( np.ceil( sample * ( derr / dmean / threshold )**2 ) ) tmp = max( tmp, sample ) infomsg( "collecting %d numerator samples" % sample, "@", datetime.now() ) nmean, nerr = numer( sample ) infomsg( "sample =", sample, "nmean =", nmean, "nerr =", nerr ) if dmean - derr < nmean + nerr and nmean - nerr < dmean + derr: break cal *= nmean / dmean with Emon( device, baud ) as emon: emon.set( name, cal ) return cal
def mkconfig( test_command ): with mktemp() as config: check_call( [ "cp", configfile, config ] ) infomsg( "using test-command:", test_command ) with open( config, 'a' ) as fh: infomsg( "--test-command", test_command, file = fh ) yield config
def read_wattsup( wu_device, counter, samples ): cmd = [ os.path.join( root, "bin", "wu.py" ), wu_device, "--log-style", "external", "-i", "-e", counter, "-o", "/dev/stdout", "--", "sleep", str( samples ) ] infomsg( "DEBUG:", *cmd ) p = Popen( cmd, stdout = PIPE ) lines = p.communicate()[ 0 ] if p.returncode != 0: raise _CalledProcessError( p.returncode, cmd ) terms = len( counter.split( "," ) ) values = list() first = True for line in lines.splitlines(): if not first: values.append( map( float, line.strip().split( "," )[-terms:] ) ) first = False mean = np.mean( values, 0 ) err = 1.96 * np.std( values, 0 ) / np.sqrt( len( values ) ) return mean, err
def get_localization_files(): files = glob( os.path.join( datadir, options.localization + ".*" ) ) if len( files ) == 0: if options.pin_root is None: if "PIN_ROOT" in os.environ: options.pin_root = os.environ[ "PIN_ROOT" ] else: infomsg( "you must set --pin-root to generate localization files", file = sys.stderr ) exit( 2 ) record.time( "partitioning", check_call, [ os.path.join( root, "bin", "partition-func-localization.py" ), "--output", os.path.join( datadir, options.localization ), "--num", str( options.partitions ), "--pin-root", options.pin_root, "--source", options.source, config ] ) files = glob( os.path.join( datadir, options.localization + ".*" ) ) if len( files ) != options.partitions: infomsg( "ERROR: expected", options.partitions, "partitions, but got", len( files ), "parititions" ) exit( 2 ) return sorted( files )
def get_builder( deltas ): if options.sources is None: if not options.compound_edits: fieldpat = re.compile( r'[a-z]\((\d+),(\d+)\)' ) pending = list( reversed( deltas ) ) deltas = list() while len( pending ) > 0: gene = pending.pop() if gene[ 0 ] == 'a': deltas.append( gene ) elif gene[ 0 ] == 'd': deltas.append( gene ) elif gene[ 0 ] == 'r': m = fieldpat.match( gene ) dst, src = m.group( 1, 2 ) pending += [ 'd(%s)' % dst, 'a(%s,%s)' % ( dst, src ) ] elif gene[ 0 ] == 's': m = fieldpat.match( gene ) dst, src = m.group( 1, 2 ) pending += [ 'r(%s,%s)' % ( dst, src ), 'r(%s,%s)' % ( src, dst ) ] else: infomsg( "ERROR: unrecognized gene:", gene ) exit( 1 ) deltas = list( enumerate( deltas ) ) builder = GenomeBuilder( genprog ) else: print >>sys.stderr, "file-based differences not implemented yet" exit( 2 ) return deltas, builder
def validateCorrectness(self, outfile): golden = self.getGolden() new_golden = False if not os.path.exists(golden): if self.options.create_golden: if not os.path.exists(os.path.dirname(golden)): os.makedirs(os.path.dirname(golden)) fname = next(iter(outfile)) if os.path.isdir(fname): check_call(["rsync", "-a", fname + "/", golden]) else: check_call(["cp", "-p", fname, golden]) new_golden = True else: infomsg("ERROR: no golden output to compare against", file=sys.stderr) infomsg("ERROR: try running with original program and --create-golden", file=sys.stderr) raise IOError(2, "No such file or directory", golden) try: return self.diff(golden, outfile) except Exception as e: if new_golden: check_call(["rm", "-rf", golden]) if isinstance(e, CalledProcessError): return False raise
def _test( self, deltas ): # "Passing" behavior is more like the original (slower, more energy). # "Failing" behavior is more optimized (faster, less energy). fitness = np.array( self.get_fitness( deltas ) ) if len( fitness ) == 0: return self.UNRESOLVED if np.any( fitness == 0 ): return self.UNRESOLVED m = np.mean( fitness, axis = 0 ) s = np.std( fitness, axis = 0 ) sqrtn = np.sqrt( fitness.shape[ 0 ] ) for i in range( fitness.shape[ 1 ] ): infomsg( " ", m[ i ], "+/-", 1.96 * s[ i ] / sqrtn ) for i in range( fitness.shape[ 1 ] ): if np.ptp( self.optimized[ ::, i ] ) == 0 and \ np.ptp( fitness[ ::, i ] ) == 0 and \ self.optimized[ 0, i ] == fitness[ 0, i ]: # Optimized and fitness are all the same value, likely because # we are comparing the optimized variant to itself. This counts # as a fail, since they are clearly drawn from the same distro. continue pval = mannwhitneyu( self.optimized[ ::, i ], fitness[ ::, i ] )[ 1 ] if pval < options.alpha and m[ i ] < self.mean[ i ]: return self.PASS return self.FAIL
def get_repair_log( d ): if "--seed" in cfg: return os.path.join( d, "repair.debug.%s" % cfg[ "--seed" ] ) logs = glob( os.path.join( d, "repair.debug.*" ) ) if len( logs ) == 1: return logs[ 0 ] infomsg( "ERROR: could not determine repair log", file = sys.stderr ) exit( 2 )
def __init__( self, genprog, builder, deltas ): DD.__init__( self ) self.builder = builder self.genprog = genprog infomsg( "INFO: computing optimized energy usage" ) self.optimized = self.get_fitness( deltas ) self.mean = numpy.mean( self.optimized )
def check_call(cmd, **kwargs): verbose = kwargs.pop("verbose", False) counts = set() def get_iter(iter, arg): if isinstance(arg, Multitmp): counts.add(len(arg)) count = len(arg) return iter(arg) else: return repeat(arg) open_files = list() def iopen(iterable): for name in iterable: if os.path.isdir(name): yield name else: fh = open(name, "w") open_files.append(fh) yield fh args = [get_iter(iter, arg) for arg in cmd] kw = [(k, get_iter(iopen, v)) for k, v in kwargs.items()] if len(counts) > 1: raise ValueError("Multitmp args have different lengths") e = None procs = list() try: while True: try: tmpcmd = [next(i) for i in args] tmpkw = {k: next(v) for k, v in kw} if verbose: infomsg("+", *tmpcmd) procs.append((Popen(tmpcmd, **tmpkw), tmpcmd)) if len(counts) == 0: raise StopIteration except StopIteration: break while len(procs) > 0: p, tmpcmd = procs.pop() p.wait() if p.returncode != 0: e = CalledProcessError(cmd=tmpcmd, returncode=p.returncode) finally: for p, _ in procs: p.kill() for fh in open_files: fh.close() if e is not None: raise e
def reduce_error(f, alpha, probes=5): global debug_file # variance computation adapted from: # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance # correction computation adapted from: # https://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation if probes % 2 == 0: probes += 1 log = debug_file debug_file = None n = None mean = None M2 = None errp = np.ones(1) while np.any(alpha < errp): infomsg("errp =", errp, ": Attempting", probes, "more probes") i = 0 while i < probes: for x in f(): if log is not None: infomsg(*x, file=log) yield x x = np.array(x) if n is None: n = np.zeros(len(x)) mean = np.zeros(len(x)) M2 = np.zeros(len(x)) errp = np.zeros(len(x)) if len(errp) < len(x): infomsg("warning: change in fitness dimnsions!") return if np.all(x == 0): infomsg("warning: 0 fitness: terminating evaluation") return n += 1 delta = x - mean mean += delta / n M2 += delta * (x - mean) i = i + 1 var = M2 / (n - 1) errp = np.sqrt(var / n) / mean if np.any(n < 100): c4 = np.sqrt(2 / (n - 1)) * gamma(n / 2) / gamma((n - 1) / 2) errp = errp / c4 probes += probes if log is not None: for j in range(len(n)): infomsg(n[j], "probes: relative standard error =", errp[j], file=log) debug_file = log
def check_cmd( cmd ): try: status = call( cmd ) except OSError as e: infomsg( e.strerror ) exit( 2 ) if status != 0: if not isinstance( cmd, str ): cmd = " ".join( cmd ) infomsg( "ERROR:", cmd, file = sys.stderr ) exit( status )
def parse_log( infile, outfile ): infomsg( "INFO: parsing", infile ) cmd = [ parselog, infile, "--filter", options.filter, "--final", "--no-confidence", "--csv", outfile ] if options.stop_after is not None: cmd += [ "--stop-after", str( options.stop_after ) ] check_call( cmd )
def __init__( self, genprog, builder, deltas ): DD.__init__( self ) self.builder = builder self.genprog = genprog if options.disable_cache: self.cache_outcomes = 0 self.duration = 0 infomsg( "INFO: computing optimized energy usage" ) self.optimized = np.array( self.get_fitness( deltas ) ) self.mean = np.mean( self.optimized, axis = 0 ) assert np.all( self.mean > 0 ), "'optimized' variant has 0 fitness!"
def get_builder( deltas ): if options.sources is None: if not options.compound_edits: try: deltas = lower_genome( deltas ) except ValueError as e: infomsg( "ERROR:", e.message, file = sys.stderr ) exit( 1 ) deltas = list( enumerate( deltas ) ) builder = GenomeBuilder( genprog ) else: print >>sys.stderr, "file-based differences not implemented yet" exit( 2 ) return deltas, builder
def align_functions( f1, f2, name ): aligned = list() i, j = 0, 0 failure = None while i < len( f1 ) and j < len( f2 ): d1, s1 = f1[ i ] d2, s2 = f2[ j ] if instreq( s1, s2 ): aligned.append( ( d1, d2, s1, s2 ) ) i += 1 j += 1 elif isnop( s1 ): i += 1 elif isnop( s2 ): j += 1 else: failure = MisalignedCodeError( "unexpected instruction pair:", f1[ i ], f2[ j ] ) infomsg( "ERROR: unexpected instruction pair:" ) infomsg( " ", i, ":", f1[ i ] ) infomsg( " ", j, ":", f2[ j ] ) infomsg( "in function", name ) i+=1 j+=1 if failure is not None: raise failure for k, f in [ ( i, f1 ), ( j, f2 ) ]: while k < len( f ): if not isnop( f[ k ][ 1 ] ): raise MisalignedCodeError( "unexpected final instruction:", f[ k ][ 1 ] ) k += 1 return aligned
def calcReplayGain(cls, pieces): command = ['wvgain', '-a' ] command.extend(pieces) infomsg( "calculating replaygain info for wavpack files...") exitcode = subprocess.call( command, shell=False, stdout=subprocess.PIPE, ) if exitcode != 0: raise ReplayGainError( "fail to calulate replaygain for wavpack files. ")
def get_localized_results( localfile, index ): storage = get_storage_dir( index ) fitness_log = "fitness%s.csv" % index cfg = Config() cfg.load( config ) run_search = False if os.path.isdir( storage ): if not os.path.exists( os.path.join( storage, fitness_log ) ): infomsg( "no such file:", os.path.join( storage, fitness_log ) ) run_search = True elif not os.path.exists( fitness_log ): infomsg( "no such file:", fitness_log ) run_search = True if run_search: cmd = [ genprog, config, "--max-evals", str( options.max_evals ), "--fitness-log", fitness_log ] if localfile is not None: cmd += [ "--fix-scheme", "line", "--fix-file", localfile, "--fault-scheme", "line", "--fault-file", localfile, ] record.time( "GenProg", check_call, cmd ) def get_repair_log( d ): if "--seed" in cfg: return os.path.join( d, "repair.debug.%s" % cfg[ "--seed" ] ) logs = glob( os.path.join( d, "repair.debug.*" ) ) if len( logs ) == 1: return logs[ 0 ] infomsg( "ERROR: could not determine repair log", file = sys.stderr ) exit( 2 ) if not os.path.isdir( storage ): repairlog = get_repair_log( "." ) os.makedirs( storage ) sources = [ fitness_log, "multi.cache", "repair.cache", repairlog ] check_call( [ "mv" ] + sources + [ storage ] ) repairlog = os.path.join( storage, repairlog ) else: repairlog = get_repair_log( storage ) return repairlog
def read_emon( device, baud, columns, num_samples ): with Emon( device, baud ) as emon: emon.set( "calibrate", 1 ) emon.set( "period", 1000 ) samples = list() for i in range( num_samples ): row = next( emon ) infomsg( row ) selected = list() for col in columns: selected.append( row[ col ] ) samples.append( selected ) mean = np.mean( samples, 0 ) err = 1.96 * np.std( samples, 0 ) / np.sqrt( len( samples ) ) return mean, err
def rename_by_taginfo(self, scheme=default_scheme): taginfo = self.extract_taginfo() filename = "%s%s" % ( eval_scheme(scheme, taginfo), self.extension, ) filename_good = normalize_filename(filename) infomsg ("filename_good: %s => %s" % (self.filename, filename_good)) os.rename(self.filename, filename_good) self.filename = filename_good self.basename = os.path.splitext(filename_good)[0]
def get_line_coverage( metrics, asmfuns ): for fname in asmfiles: for ( fname, fun ), instrs in assembly_funs( fname ): if not fun in metrics: infomsg( "Warning: no annotations for %s:%s" % ( fname, fun ), file = sys.stderr ) continue try: lines = list( align_functions( instrs, metrics[ fun ], fun ) ) except MisalignedCodeError as e: infomsg( "ERROR:", e ) continue for line, cvg, _, _ in lines: yield fname, line, cvg
def build_variant( self, genome ): if len( genome ) == 0: genome = [ "original" ] seed = self.config.get( "--config", "0" ) with mktemp() as genome_file: with open( genome_file, 'w' ) as fh: print( "--oracle-genome", " ".join( genome ), file = fh ) cmd = [ self.genprog, self.configfile, genome_file, "--seed", seed, "--keep-source", "--no-test-cache", "--search", "oracle", "--test-command", "true", ] keepfiles = [ "000000", "repair.debug." + seed ] tmpdir = tempfile.mkdtemp( dir = "." ) try: for fname in keepfiles: if os.path.exists( fname ): os.rename( fname, os.path.join( tmpdir, fname ) ) with mktemp() as log: try: with open( log, 'w' ) as fh: check_call( cmd, stdout = fh, stderr = fh ) except CalledProcessError: with open( log ) as fh: infomsg( fh.read(), file = sys.stderr ) raise if os.path.exists( genome_file ): os.remove( genome_file ) if os.path.exists( "000000/000000" ): yield "000000/000000" else: yield None finally: for fname in keepfiles: if os.path.exists( fname ): check_call( [ "rm", "-rf", fname ] ) if os.path.exists( os.path.join( tmpdir, fname ) ): os.rename( os.path.join( tmpdir, fname ), fname ) check_call( [ "rm", "-rf", tmpdir ] )
def calcReplayGain(cls, pieces): command = ['metaflac', '--add-replay-gain' ] command.extend(pieces) return infomsg( "calculating replaygain info for flac files...") exitcode = subprocess.call( command, shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE ) if exitcode != 0: raise ReplayGainError( "fail to calulate replaygain for flac files. ")
def energy_future(): energy = [ None ] * len( options.plane ) start = list() for plane in options.plane: fname = planes[ plane ][ 0 ] with open( fname ) as fh: infomsg( "reading from plane:", fname ) start.append( int( next( fh ) ) ) try: yield energy finally: for i, plane in enumerate( options.plane ): fname = planes[ plane ][ 0 ] with open( fname ) as fh: delta = int( next( fh ) ) - start[ i ] if delta < 0: delta += planes[ plane ][ 1 ] energy[ i ] = delta * 1e-6
def _test( self, deltas ): global cache genome = map( lambda (_, y): y, deltas ) key = " ".join( genome ) if key in cache: return cache[ key ] if len( genome ) == 0: infomsg( "INFO: genome: original" ) else: infomsg( "INFO: genome:", *genome ) build = self.genprog.build_variant with swallow( CalledProcessError, build, genome ) as exe: if exe is None: fitness = "compile error" if len( deltas ) == self.num_edits: result = self.FAIL else: result = self.UNRESOLVED else: try: result = self.PASS for fitness in self.genprog.run_test( exe ): if any( [ f == 0 for f in fitness ] ): result = self.FAIL except CalledProcessError: fitness = "test failure" result = self.FAIL infomsg( " ", fitness, "=", result ) if not options.disable_cache: cache[ key ] = result return result
def split (self, cuefile, cmd_args ): format = cmd_args.format if cmd_args.format else "flac" scheme = cmd_args.scheme if cmd_args.scheme else default_scheme target = getLossLessAudio("xyz." + format) self.check_decodable() target.check_encodable() try : cuesheet = parsecuefile( cuefile) except NoCuedataError as e: # when no cuefile is not available infomsg( e.message) infomsg("trying embeded cuesheet...") cuedata = self.embeded_cuedata() if not cuedata : raise NoCuedataError("%s does not contain embeded cuedata." % self.filename ) cuesheet = parsecuedata( conv2unicode(cuedata) ) infomsg( "splitting audio chunk: %s..." % self.filename) pieces = shnsplit(self.filename, cuesheet.breakpoints(), format) target.tag_pieces(pieces, cuesheet) target.calcReplayGain(pieces) target.rename_pieces(pieces, scheme)
def get_localization( cmd, asmfiles, repeat = 1, smooth = None ): asmfuns = defaultdict( dict ) for fname in asmfiles: for ( fname, fun ), instrs in assembly_funs( fname ): asmfuns[ fun ][ fname ] = instrs accum = defaultdict( lambda: 0.0 ) stdout = sys.stdout stderr = sys.stderr with open( "/dev/null", 'w' ) as fh: for i in range( repeat ): if i > 0: infomsg( "profiling run", i + 1 ) metrics = profile( args, stdout = stdout, stderr = stderr ) for fname, line, cvg in get_line_coverage( metrics, asmfuns, smooth ): accum[ fname, line ] += cvg stdout = fh stderr = fh localization = list() for fname, line in sorted( accum ): localization.append( ( fname, line, accum[ fname, line ] / repeat ) ) return localization
def build( self, genome ): if len( genome ) == 0: infomsg( "INFO: genome: original" ) else: infomsg( "INFO: genome:", *get_genes( genome ) ) try: with self.genprog.build_variant( get_genes( genome ) ) as exe: yield exe except CalledProcessError as e: infomsg( "ERROR:", e ) yield None
def get_minimized_fitness( genome ): infomsg( "getting minimized genome for", genome ) if not os.path.exists( genome + ".min" ): record.time( "minimize.py", check_call, [ os.path.join( root, "bin", "minimize.py" ), genprog, config, "--genome-file", genome, "--cache", genome + ".cache", "--save-binary", genome + ".bin", "--save-source", genome + ".src", "--save-genome", genome + ".min" ] ) if options.regenerate is not None: cfg = Config() cfg.load( config ) cmd = cfg[ "--test-command" ] cmd = cmd.replace( "__EXE_NAME__", genome + ".bin" ) if options.regenerate == "wall": cmd += " -j 1 --wall --repeat 100 --no-limit" with record.context( "fitness eval" ): fitnesses = list() for i in range( options.rows ): with mktemp() as fitnessfile: # use call instead of check_call because test scripts always # return non-zero status tmp = cmd.replace( "__FITNESS_FILE__", fitnessfile ) call( [ "sh", "-c", tmp ] ) with open( fitnessfile ) as fh: for line in fh: value = float( line.split()[ 0 ] ) infomsg( " ", value ) fitnesses.append( value ) break else: infomsg( "ERROR: no fitness for", genome + ".bin", file = sys.stderr ) exit( 2 ) return fitnesses else: with open( genome + ".min" ) as fh: key = " ".join( fh.readlines() ) key = " ".join( key.split() ) d = shelve.open( genome + ".cache" ) return d[ key ]
def run(self, root, argv=sys.argv): parser = self.getParser() self.addCommonOptions(parser) self.options, args = parser.parse_args(args=argv[1:]) try: self.checkArgs(parser, args) except ValueError: return 0 if len(args) == 0 else 1 if find_executable(self.exe) is None: raise ValueError("%s: command not found" % self.exe) self.exe = find_executable(self.exe) if "/" not in self.exe: self.exe = os.path.join(".", self.exe) metrics = list() if self.options.emon is not None: metrics.append(EmonMetric(root, self.options.emon)) if self.options.model: metrics.append(ModelMetric(root)) if len(self.options.rapl) > 0: metrics.append(RaplMetric(root, self.options.rapl)) if self.options.time: metrics.append(TimeMetric(root)) if self.options.wu is not None: metrics.append(WuMetric(root, self.options.wu)) try: if self.options.cpu is None and all([metric.inParallel() for metric in metrics]): fitness = self.getParallelFitness(root, metrics) else: jobs = self.options.jobs self.options.jobs = 1 fitness = list() for i in range(jobs): result = self.getParallelFitness(root, metrics) while len(fitness) < len(result): fitness.append(list()) for dim, values in zip(fitness, result): dim.extend(values) self.options.jobs = jobs except IOError as e: exit(e.errno) results = [list() for i in range(self.options.jobs)] for dim in fitness: if len(dim) != self.options.jobs: results = [0] break for i, x in enumerate(dim): results[i].append(x) if self.options.csv is not None: with open(self.options.csv, "a") as fh: writer = csv.writer(fh) for row in results: writer.writerow(["%g" % y for y in row]) with open(self.fitnessfile, "w") as fh: for row in results: infomsg(*["%g" % y for y in row], file=fh)