def queryHelper(self, regex, commandToRunFmt): """query() might benefit from this regex: the libLF.regex to query commandToRunFmt: format string with the invocation We apply commandToRun.format(inFile, outFile) inFile: contains a libLF.Regex, NDJSON formatted outFile: contains a libLF.RegexPatternAndInput, NDJSON formatted @returns: GeneratorQueryResponse[] """ libLF.log('queryHelper for {}:\n regex /{}/\n command {}' \ .format(self.name, regex.pattern, commandToRunFmt)) gqrs = [] with tempfile.NamedTemporaryFile(prefix='GenInput-DriverQueryFile-', suffix='.json', delete=DELETE_TMP_FILES) as inFile, \ tempfile.NamedTemporaryFile(prefix='GenInput-DriverOutFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(inFile.name, regex.toNDJSON()) rc, out = libLF.runcmd(commandToRunFmt.format(inFile.name, outFile.name)) if rc == 0: with open(outFile.name, 'r') as inStream: contents = inStream.read() rpai = libLF.RegexPatternAndInputs().initFromNDJSON(contents) for producer in rpai.stringsByProducer: gqr = GeneratorQueryResponse(producer, rpai.stringsByProducer[producer]) gqrs.append(gqr) return gqrs
def main(regexFile, outFile, seed, nInputs, timeout): libLF.log('regexFile {} outFile {} seed {} nInputs {} timeout {}' \ .format(regexFile, outFile, seed, nInputs, timeout)) # Get the libLF.Regex with open(regexFile, 'r') as inStream: regex = libLF.Regex().initFromNDJSON(inStream.read()) libLF.log('Generating inputs for regex /{}/'.format(regex.pattern)) # Query Rex stringsByProducer = getRexInputs(regex.pattern, seed, nInputs, timeout) # Emit rpai = libLF.RegexPatternAndInputs().initFromRaw(regex.pattern, stringsByProducer) libLF.log('Rex generated {} unique inputs for regex /{}/ ({} including duplicates)' \ .format(len(rpai.getUniqueInputs()), regex.pattern, rpai.getNTotalInputs())) with open(outFile, 'w') as outStream: outStream.write(rpai.toNDJSON())
def main(regexFile, outFile, timeout): libLF.log('regexFile {} outFile {} timeout {}' \ .format(regexFile, outFile, timeout)) # Get the libLF.Regex with open(regexFile, 'r') as inStream: regex = libLF.Regex().initFromNDJSON(inStream.read()) libLF.log('Generating inputs for regex /{}/'.format(regex.pattern)) # Query ReScue mutRexInputs = getReScueInputs(regex.pattern, timeout) libLF.log('ReScue generated {} inputs for regex /{}/'.format( len(mutRexInputs), regex.pattern)) # Emit stringsByProducer = {"ReScue": mutRexInputs} with open(outFile, 'w') as outStream: rpai = libLF.RegexPatternAndInputs().initFromRaw( regex.pattern, stringsByProducer) outStream.write(rpai.toNDJSON())
def _getInputs(self): """inputs: unique str[], collapsing the result from INPUT_GENERATOR""" # For testing #return ["abc"] # TODO #return [" ", "\t", "\r", "\n", "\v"] # Whitespace -- Go does not include \n ? #return ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!"] # For SL testing # Query from tempfile with tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-genInputs-', suffix='.json', delete=DELETE_TMP_FILES) as queryFile, \ tempfile.NamedTemporaryFile(prefix='SemanticAnalysis-outFile-', suffix='.json', delete=DELETE_TMP_FILES) as outFile: libLF.writeToFile(queryFile.name, self.regex.toNDJSON()) rc, out = libLF.runcmd("'{}' --regex-file '{}' --out-file '{}' --parallelism 1 --seed {} --max-inputs-per-generator {} --generator-timeout {} 2>/dev/null" \ .format(INPUT_GENERATOR, queryFile.name, outFile.name, self.rngSeed, # Propagate reproducibility into the generators self.maxInputsPerGenerator, # Reduce the size of intermediate tmp files self.timeoutPerGenerator, # Ensure reasonable time is taken )) out = out.strip() rpaiFileContents = outFile.read().decode("utf-8") #libLF.log('Got rc {} scriptOut {} rpai as JSON {}'.format(rc, out, rpaiFileContents)) # This should never fail assert (rc == 0) rpai = libLF.RegexPatternAndInputs().initFromNDJSON(rpaiFileContents) inputs = [] libLF.log('_getInputs: The {} producers yielded {} total inputs' \ .format(len(rpai.stringsByProducer), len(rpai.getUniqueInputs()))) for producer in rpai.stringsByProducer: # Apply per-generator input limit producerInputs = rpai.stringsByProducer[producer] if 0 < self.maxInputsPerGenerator and self.maxInputsPerGenerator < len( producerInputs): libLF.log('_getInputs: producer {} yielded {} inputs, reducing to {}' \ .format(producer, len(producerInputs), self.maxInputsPerGenerator)) producerInputs = random.sample(producerInputs, self.maxInputsPerGenerator) # Add these inputs inputs += producerInputs return list(set(inputs + ["a"])) # Always test at least one string
def run(self): try: libLF.log('Working on regex: /{}/'.format(self.regex.pattern)) # Drive the various input generators stringsByProducer = {} nStrings = 0 for inputGen in INPUT_GENERATORS: libLF.log('Getting inputs from {}'.format(inputGen.name)) # Query the generator gqrs = inputGen.query(self.regex, self.rngSeed, self.inputsPerGenerator, self.generatorTimeout) # Unpack the responses for gqr in gqrs: # Enforce inputsPerGenerator _inputs = gqr.inputs if len(_inputs) > self.inputsPerGenerator: _inputs = random.sample(_inputs, self.inputsPerGenerator) stringsByProducer['{}-{}'.format(inputGen.name, gqr.name)] = _inputs nStrings += len(_inputs) libLF.log('Got {} inputs from {}-{}'.format(len(_inputs), inputGen.name, gqr.name)) #libLF.log('sbp = {}'.format(stringsByProducer)) # TODO Consider introducing mutants here rpai = libLF.RegexPatternAndInputs().initFromRaw(self.regex.pattern, stringsByProducer) #libLF.log('rpai {}: {}'.format(rpai, rpai.toNDJSON())) # Return libLF.log('Completed regex /{}/ -- {} inputs'.format(self.regex.pattern, nStrings)) return rpai except KeyboardInterrupt: raise except BaseException as err: libLF.log('ERROR') Sys.exit(1) libLF.log(err) return err