def main(semanticsTestsFile, semanticOnly, performanceTestsFile, perfOnly):
    libLF.log('semanticsTestsFile {} semanticOnly, performanceTestsFile {} perfOnly {}' \
      .format(semanticsTestsFile, semanticOnly, performanceTestsFile, perfOnly))

    #### Check dependencies
    libLF.checkShellDependencies(shellDeps)

    #### Load and run each test
    summary = []

    for testType, testsFile in [(TestSuite.SEMANTIC_TEST, semanticsTestsFile),
                                (TestSuite.PERF_TEST, performanceTestsFile)]:
        if perfOnly and testType != TestSuite.PERF_TEST:
            continue
        if semanticOnly and testType != TestSuite.SEMANTIC_TEST:
            continue

        libLF.log("Loading {} tests from {}".format(testType, testsFile))
        ts = TestSuite(testsFile, testType)

        libLF.log("Running {} tests".format(testType))
        nFailures = ts.run()
        summary.append("{} tests from {}: {} failures".format(
            testType, testsFile, nFailures))

    libLF.log("****************************************")
    for line in summary:
        libLF.log("  " + line)
def checkRegistryDependencies(registry):
  libLF.checkShellDependencies(['cloc'], mustBeExecutable=True)

  paths = [
    registryToPaths[registry]['preprocessor'],
    *registryToPaths[registry]['instrumentor'].values(),
    registryToPaths[registry]['moduleRunner']
  ]
  libLF.log("Checking paths for registry {}: {}".format(registry, paths))
  libLF.checkShellDependencies(paths, mustBeExecutable=False)
def main(caseStudyFile):
  libLF.log('caseStudyFile {}' \
    .format(caseStudyFile))

  #### Check dependencies
  libLF.checkShellDependencies(shellDeps)

  #### Load data
  caseStudies = loadCaseStudies(caseStudyFile)

  #### Run
  studiesToRun = ["REWZWA-1", "REWZWA-2", "REWBR-2", "Microsoft", "Cloudflare"]
  studiesToRun = ["REWZWA-1", "REWZWA-2"]
  studiesToRun = ["REWBR-6"]
  nick2fname = {}
  for caseStudy in caseStudies:
    if caseStudy.nick in studiesToRun:
      nick2fname[caseStudy.nick] = caseStudy.run()
  for nick, fname in nick2fname.items():
    libLF.log("Case study {} -- See {}".format(nick, fname))
def main(regexFile, outFile, parallelism):
  libLF.log('regexFile {} outFile {} parallelism {}' \
    .format(regexFile, outFile, parallelism))

  #### Check dependencies
  libLF.checkShellDependencies(shellDeps)

  #### Load data
  tasks = getTasks(regexFile)
  libLF.log('{} regexes'.format(len(tasks)))

  #### Process data

  # CPU-bound, no limits
  libLF.log('Submitting to map')
  results = libLF.parallel.map(tasks, parallelism,
    libLF.parallel.RateLimitEnums.NO_RATE_LIMIT, libLF.parallel.RateLimitEnums.NO_RATE_LIMIT,
    jitter=False)

  #### Emit results

  libLF.log('Writing results to {}'.format(outFile))
  nSuccesses = 0
  nExceptions = 0
  with open(outFile, 'w') as outStream:
    for msa in results:
        # Emit
        if type(msa) is libMemo.MemoizationStaticAnalysis:
          nSuccesses += 1
          outStream.write(msa.toNDJSON() + '\n')
        else:
          nExceptions += 1
          libLF.log("Error message: " + str(msa))
  libLF.log('Successfully performed MemoizationStaticAnalysis on {} regexes, {} exceptions'.format(nSuccesses, nExceptions))

  #### Analysis
  # TODO Any preliminary analysis

  return
def main(rustc, rustFile, dumpTokenTree):
    libLF.checkShellDependencies([rustc])

    if fileMightContainRegexes(rustFile):
        libLF.log('File might contain regexes, proceeding...')
        try:
            libLF.log('Getting token tree')
            tokenTree = getTokenTree(rustc, rustFile)
        except BaseException as err:
            libLF.log('Error getting token tree: {}'.format(err))
            sys.exit(1)

        try:
            libLF.log('Walking token tree')
            visitor = FrontierVisitor()
            walkTokenTree(tokenTree, visitor)
            patterns = visitor.getRegexPatterns()
            libLF.log('Extracted {} patterns'.format(len(patterns)))
        except BaseException as err:
            libLF.log('Error walking token tree: {}'.format(err))
            sys.exit(1)
    else:
        libLF.log('File does not contain "Regex", no regexes possible')
        patterns = []

    regexes = [{'pattern': p, 'flags': ''} for p in patterns]
    sfwr = libLF.SimpleFileWithRegexes()
    sfwr.initFromRaw(fileName=rustFile,
                     language='rust',
                     couldParse=1,
                     regexes=regexes)
    print(sfwr.toNDJSON())

    if dumpTokenTree:
        # "Pretty" JSON makes it easier for humans to decode
        asJSON = json.dumps(tokenTree, indent=2, separators=(',', ':'))
        libLF.log('\n' + asJSON)
def main(regexFile, useCSharpToFindMostEI, perfPumps, maxAttackStringLen,
         queryPrototype, runSecurityAnalysis, nTrialsPerCondition,
         queryProductionEngines, timeSensitive, parallelism, outFile):
    libLF.log('regexFile {} useCSharpToFindMostEI {} perfPumps {} maxAttackStringLen {} queryPrototype {} runSecurityAnalysis {} nTrialsPerCondition {} queryProductionEngines {} timeSensitive {} parallelism {} outFile {}' \
      .format(regexFile, useCSharpToFindMostEI, perfPumps, queryPrototype, maxAttackStringLen, runSecurityAnalysis, nTrialsPerCondition, queryProductionEngines, timeSensitive, parallelism, outFile))

    #### Check dependencies
    libLF.checkShellDependencies(shellDeps)

    #### Load data
    taskConfig = TaskConfig(useCSharpToFindMostEI, queryPrototype,
                            runSecurityAnalysis, queryProductionEngines)
    tasks = getTasks(regexFile, perfPumps, maxAttackStringLen,
                     nTrialsPerCondition, taskConfig)
    nRegexes = len(tasks)

    #### Collect data

    df = None
    nSL = 0
    nNonSL = 0
    nExceptions = 0

    nWorkers = 1 if timeSensitive else parallelism
    libLF.log("timeSensitive {}, so using {} workers".format(
        timeSensitive, nWorkers))
    results = libLF.parallel.map(tasks,
                                 nWorkers,
                                 libLF.parallel.RateLimitEnums.NO_RATE_LIMIT,
                                 libLF.parallel.RateLimitEnums.NO_RATE_LIMIT,
                                 jitter=False)

    if runSecurityAnalysis:
        allSL = [res for res in results if res != MyTask.NOT_SL]
        nSucceeded = len([res for res in results if res])
        nFailed = len([res for res in results if not res])
        libLF.log("{} succeeded in sec'ty analysis, {} failed".format(
            nSucceeded, nFailed))
        sys.exit(0)

    for t, res in zip(tasks, results):
        if type(res) is pd.DataFrame:
            nSL += 1

            if df is None:
                df = res
            else:
                df = df.append(res)
        elif type(res) is type(MyTask.NOT_SL) and res == MyTask.NOT_SL:
            nNonSL += 1
        else:
            libLF.log("Exception on /{}/: {}".format(t.regex.pattern, res))
            nExceptions += 1

    libLF.log("{} regexes were SL, {} non-SL, {} exceptions".format(
        nSL, nNonSL, nExceptions))

    #### Emit results
    libLF.log('Writing results to {}'.format(outFile))
    df.to_pickle(outFile)
    libLF.log("Data columns: {}".format(df.columns))
import os
import sys
import re

sys.path.append('{}/lib'.format(
    os.environ['REGEX_GENERALIZABILITY_PROJECT_ROOT']))
import libLF
import argparse
import subprocess
import sys
import json
import stat

MAVEN_CLI = '/home/davisjam/local-install/apache-maven-3.6.0/bin/mvn'
MAVEN_CLI = 'mvn'
libLF.checkShellDependencies([MAVEN_CLI], mustBeExecutable=True)

GRADLE_USER_HOME = '/tmp/.gradle'

#########
# Classes to drive the Maven and Gradle build systems


class BuildSystem:
    BUILD_SYSTEM_MAVEN = "maven"
    BUILD_SYSTEM_GRADLE = "gradle"

    def __init__(self):
        self.name = None
        self.cli = None
        self.buildFile = None
def checkRegistryDeps(registry):
    dependenciesToCheck = []
    for l in registryToLangs[registry]:
        dependenciesToCheck.append(langToExtractorPath[l.lower()])
    libLF.checkShellDependencies(dependenciesToCheck, mustBeExecutable=False)
Пример #9
0
AUTOMATACLI_BATCH_SIZE = 10
AUTOMATACLI_MAX_SECONDS_PER_REGEX = 5
AUTOMATACLI_TIMEOUT_SEC = AUTOMATACLI_BATCH_SIZE * AUTOMATACLI_MAX_SECONDS_PER_REGEX
LIMIT_SIMPLE_PATHS = True
SIMPLE_PATH_COUNT_LIMIT = 5000 # Based on a sample of 70K regexes, the distribution is heavily weighted towards 1-10 paths per regex. <=100 regexes fall above 50K simple paths. No need to exhaustively count for these outliers.
SIMPLE_PATH_TIME_LIMIT = 5 # seconds

# Dependencies
WINDOWS_OS = os.name == 'nt'
WINE_PATH = shutil.which("wine")
AutomataCLI = os.path.join(os.environ['REGEX_GENERALIZABILITY_PROJECT_ROOT'], 'bin', 'AutomataCLI.exe')
if WINDOWS_OS:
    # Workaround for broken symlink
    AutomataCLI = os.path.join(os.environ['REGEX_GENERALIZABILITY_PROJECT_ROOT'], 'measurement-instruments', 'automata', 'AutomataCLI.exe')
else:
    libLF.checkShellDependencies([WINE_PATH], mustBeExecutable=True)

libLF.checkShellDependencies([AutomataCLI], mustBeExecutable=False)

# Control analysis
class AnalysisStages:
  ANALYZE_AUTOMATON = 'automaton'
  ANALYZE_SIMPLE_PATHS = 'simple paths'
  ANALYZE_WORST_CASE = 'worst case'

# Misc
reg2lang = {
  'npm': 'JavaScript', # TypeScript is evaluated on a JS engine
  'crates.io': 'Rust',
  'packagist': 'PHP',
  'pypi': 'Python',
Пример #10
0
import subprocess
import tempfile
import argparse
import shutil
import shlex

import re
import subprocess

################
# Dependencies
################

RESCUE_PATH = os.path.join(os.environ['ECOSYSTEM_REGEXP_PROJECT_ROOT'], 'bin',
                           'ReScueInputGenerator.jar')
libLF.checkShellDependencies([RESCUE_PATH], mustBeExecutable=False)

CROSSOVER_PROBABILITY = 10
MUTATE_PROBABILITY = 10

################
# Helpers
################


def getReScueInputs(pattern, timeout):
    """Return inputs: str[]"""
    # Use subprocess.run directly instead of libLF.runcmd because the regex is delivered on command line
    # and might be unescaped, contain newlines, etc.
    # Also, we want to be able to capture stderr cleanly with a timeout.
    libLF.log('Command: ' + str([
Пример #11
0
import subprocess
import tempfile
import argparse
import shutil
import shlex

import re
import subprocess

################
# Dependencies
################

BRICS_PATH = os.path.join(os.environ['ECOSYSTEM_REGEXP_PROJECT_ROOT'], 'bin',
                          'BricsInputGenerator.jar')
libLF.checkShellDependencies([BRICS_PATH], mustBeExecutable=False)

MAX_STRING_LEN = 128
PROB_EXCESSIVE_STRINGS = 0

################
# Helpers
################


def convertPatternToBrics(pattern):
    """Convert to Brics style
  
  The Brics language is fairly minimal:
    http://www.brics.dk/automaton/doc/index.html?dk/brics/automaton/RegExp.html
  In particular, it supports *no* character classes like \d or \s.
Пример #12
0
import subprocess
import tempfile
import argparse
import shutil
import shlex

import re
import subprocess

################
# Dependencies
################

MUTREX_PATH = os.path.join(os.environ['ECOSYSTEM_REGEXP_PROJECT_ROOT'], 'bin',
                           'MutRexInputGenerator.jar')
libLF.checkShellDependencies([MUTREX_PATH], mustBeExecutable=False)

################
# Helpers
################


def getMutRexInputs(pattern, timeout):
    """Return inputs: str[]"""

    # Build command to run
    cmd = ["java", "-jar", MUTREX_PATH, pattern]
    libLF.log('cmd: ' + " ".join(cmd))

    # Get inputs, guarded by a timeout
    tmo = None if timeout < 0 else timeout
libLF.log('Config:\n  INPUT_GENERATOR {}'.format(INPUT_GENERATOR))

langCLIDir = os.path.join(os.environ['ECOSYSTEM_REGEXP_PROJECT_ROOT'], 'bin')
lang2cli = {
    'go': os.path.join(langCLIDir, 'check-regex-behavior-in-go'),
    'java': os.path.join(langCLIDir, 'check-regex-behavior-in-java.pl'),
    'javascript': os.path.join(langCLIDir, 'check-regex-behavior-in-node.js'),
    'perl': os.path.join(langCLIDir, 'check-regex-behavior-in-perl.pl'),
    'php': os.path.join(langCLIDir, 'check-regex-behavior-in-php.php'),
    'python': os.path.join(langCLIDir, 'check-regex-behavior-in-python.py'),
    'ruby': os.path.join(langCLIDir, 'check-regex-behavior-in-ruby.rb'),
    'rust': os.path.join(langCLIDir, 'check-regex-behavior-in-rust'),
}
libLF.log('Config:\n  language CLIs: {}'.format(json.dumps(lang2cli)))

libLF.checkShellDependencies([INPUT_GENERATOR] + list(lang2cli.values()))


class MyTask(libLF.parallel.ParallelTask):
    def __init__(self, regex, maxInputsPerGenerator, rngSeed,
                 timeoutPerGenerator):
        self.regex = regex
        self.maxInputsPerGenerator = maxInputsPerGenerator
        self.rngSeed = rngSeed
        self.timeoutPerGenerator = timeoutPerGenerator

    def _queryRegexInLang(self, pattern, queryFile, language):
        """Query behavior of <pattern, input[]> in language

    pattern: str: regex pattern
    queryFile: str: name of file containing the query to use