Exemplo n.º 1
0
def cli():
    parser = argparse.ArgumentParser(
        description='Check HTTPs rules for validity')
    parser.add_argument('checker_config',
                        help='an integer for the accumulator')
    parser.add_argument('rule_files',
                        nargs="*",
                        default=[],
                        help="Specific XML rule files")
    parser.add_argument('--json_file',
                        default=None,
                        help='write results in json file')
    args = parser.parse_args()

    config = SafeConfigParser()
    config.read(args.checker_config)

    logfile = config.get("log", "logfile")
    loglevel = convertLoglevel(config.get("log", "loglevel"))
    if logfile == "-":
        logging.basicConfig(stream=sys.stderr,
                            level=loglevel,
                            format="%(levelname)s %(message)s")
    else:
        logging.basicConfig(
            filename=logfile,
            level=loglevel,
            format=
            "%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]")

    autoDisable = False
    if config.has_option("rulesets", "auto_disable"):
        autoDisable = config.getboolean("rulesets", "auto_disable")
    # Test rules even if they have default_off=...
    includeDefaultOff = False
    if config.has_option("rulesets", "include_default_off"):
        includeDefaultOff = config.getboolean("rulesets",
                                              "include_default_off")
    ruledir = config.get("rulesets", "rulesdir")
    checkCoverage = False
    if config.has_option("rulesets", "check_coverage"):
        checkCoverage = config.getboolean("rulesets", "check_coverage")
    checkTargetValidity = False
    if config.has_option("rulesets", "check_target_validity"):
        checkTargetValidity = config.getboolean("rulesets",
                                                "check_target_validity")
    checkNonmatchGroups = False
    if config.has_option("rulesets", "check_nonmatch_groups"):
        checkNonmatchGroups = config.getboolean("rulesets",
                                                "check_nonmatch_groups")
    checkTestFormatting = False
    if config.has_option("rulesets", "check_test_formatting"):
        checkTestFormatting = config.getboolean("rulesets",
                                                "check_test_formatting")
    certdir = config.get("certificates", "basedir")
    if config.has_option("rulesets", "skiplist") and config.has_option(
            "rulesets", "skipfield"):
        skiplist = config.get("rulesets", "skiplist")
        skipfield = config.get("rulesets", "skipfield")
        with open(skiplist) as f:
            f.readline()
            for line in f:
                splitLine = line.split(",")
                fileHash = splitLine[0]
                if splitLine[int(skipfield)] == "1":
                    skipdict[binascii.unhexlify(fileHash)] = 1

    threadCount = config.getint("http", "threads")
    httpEnabled = True
    if config.has_option("http", "enabled"):
        httpEnabled = config.getboolean("http", "enabled")

    metricName = config.get("thresholds", "metric")
    thresholdDistance = config.getfloat("thresholds", "max_distance")
    metricClass = getMetricClass(metricName)
    metric = metricClass()

    # Debugging options, graphviz dump
    dumpGraphvizTrie = False
    if config.has_option("debug", "dump_graphviz_trie"):
        dumpGraphvizTrie = config.getboolean("debug", "dump_graphviz_trie")
    if dumpGraphvizTrie:
        graphvizFile = config.get("debug", "graphviz_file")
        exitAfterDump = config.getboolean("debug", "exit_after_dump")

    if args.rule_files:
        xmlFnames = args.rule_files
    else:
        xmlFnames = glob.glob(os.path.join(ruledir, "*.xml"))
    trie = RuleTrie()

    rulesets = []
    coverageProblemsExist = False
    targetValidityProblemExist = False
    nonmatchGroupProblemsExist = False
    testFormattingProblemsExist = False
    for xmlFname in xmlFnames:
        logging.debug("Parsing {}".format(xmlFname))
        if skipFile(xmlFname):
            logging.debug(
                "Skipping rule file '{}', matches skiplist.".format(xmlFname))
            continue

        ruleset = Ruleset(
            etree.parse(open(xmlFname, "rb")).getroot(), xmlFname)
        if ruleset.defaultOff and not includeDefaultOff:
            logging.debug("Skipping rule '{}', reason: {}".format(
                ruleset.name, ruleset.defaultOff))
            continue
        # Check whether ruleset coverage by tests was sufficient.
        if checkCoverage:
            logging.debug("Checking coverage for '{}'.".format(ruleset.name))
            problems = ruleset.getCoverageProblems()
            for problem in problems:
                coverageProblemsExist = True
                logging.error(problem)
        if checkTargetValidity:
            logging.debug("Checking target validity for '{}'.".format(
                ruleset.name))
            problems = ruleset.getTargetValidityProblems()
            for problem in problems:
                targetValidityProblemExist = True
                logging.error(problem)
        if checkNonmatchGroups:
            logging.debug("Checking non-match groups for '{}'.".format(
                ruleset.name))
            problems = ruleset.getNonmatchGroupProblems()
            for problem in problems:
                nonmatchGroupProblemsExist = True
                logging.error(problem)
        if checkTestFormatting:
            logging.debug("Checking test formatting for '{}'.".format(
                ruleset.name))
            problems = ruleset.getTestFormattingProblems()
            for problem in problems:
                testFormattingProblemsExist = True
                logging.error(problem)
        trie.addRuleset(ruleset)
        rulesets.append(ruleset)

    # Trie is built now, dump it if it's set in config
    if dumpGraphvizTrie:
        logging.debug("Dumping graphviz ruleset trie")
        graph = trie.generateGraphizGraph()
        if graphvizFile == "-":
            graph.dot()
        else:
            with open(graphvizFile, "w") as gvFd:
                graph.dot(gvFd)
        if exitAfterDump:
            sys.exit(0)
    fetchOptions = http_client.FetchOptions(config)
    fetchers = list()

    # Ensure "default" is in the platform dirs
    if not os.path.isdir(os.path.join(certdir, "default")):
        raise RuntimeError(
            "Platform 'default' is missing from certificate directories")

    platforms = http_client.CertificatePlatforms(
        os.path.join(certdir, "default"))
    fetchers.append(
        http_client.HTTPFetcher("default", platforms, fetchOptions, trie))
    # fetches pages with unrewritten URLs
    fetcherPlain = http_client.HTTPFetcher("default", platforms, fetchOptions)

    urlList = []
    if config.has_option("http", "url_list"):
        with open(config.get("http", "url_list")) as urlFile:
            urlList = [line.rstrip() for line in urlFile.readlines()]

    if httpEnabled:
        taskQueue = queue.Queue(1000)
        resQueue = queue.Queue()
        startTime = time.time()
        testedUrlPairCount = 0
        config.getboolean("debug", "exit_after_dump")

        for i in range(threadCount):
            t = UrlComparisonThread(taskQueue, metric, thresholdDistance,
                                    autoDisable, resQueue)
            t.setDaemon(True)
            t.start()

        # set of main pages to test
        mainPages = set(urlList)
        # If list of URLs to test/scan was not defined, use the test URL extraction
        # methods built into the Ruleset implementation.
        if not urlList:
            for ruleset in rulesets:
                if ruleset.platform != "default" and os.path.isdir(
                        os.path.join(certdir, ruleset.platform)):
                    theseFetchers = copy.deepcopy(fetchers)
                    platforms.addPlatform(
                        ruleset.platform,
                        os.path.join(certdir, ruleset.platform))
                    theseFetchers.append(
                        http_client.HTTPFetcher(ruleset.platform, platforms,
                                                fetchOptions, trie))
                else:
                    theseFetchers = fetchers
                testUrls = []
                for test in ruleset.tests:
                    if not ruleset.excludes(test.url):
                        testedUrlPairCount += 1
                        testUrls.append(test.url)
                    else:
                        # TODO: We should fetch the non-rewritten exclusion URLs to make
                        # sure they still exist.
                        logging.debug("Skipping excluded URL {}".format(
                            test.url))
                task = ComparisonTask(testUrls, fetcherPlain, theseFetchers,
                                      ruleset)
                taskQueue.put(task)

        taskQueue.join()
        logging.info(
            "Finished in {:.2f} seconds. Loaded rulesets: {}, URL pairs: {}.".
            format(time.time() - startTime, len(xmlFnames),
                   testedUrlPairCount))
        if args.json_file:
            json_output(resQueue, args.json_file, problems)
    if checkCoverage:
        if coverageProblemsExist:
            return 1  # exit with error code
    if checkTargetValidity:
        if targetValidityProblemExist:
            return 1  # exit with error code
    if checkNonmatchGroups:
        if nonmatchGroupProblemsExist:
            return 1  # exit with error code
    if checkTestFormatting:
        if testFormattingProblemsExist:
            return 1  # exit with error code
    return 0  # exit with success
def cli():
	parser = argparse.ArgumentParser(description='Check HTTPs rules for validity')
	parser.add_argument('checker_config', help='an integer for the accumulator')
	parser.add_argument('rule_files', nargs="*", default=[], help="Specific XML rule files")
	parser.add_argument('--json_file', default=None, help='write results in json file')
	args = parser.parse_args()

	config = SafeConfigParser()
	config.read(args.checker_config)
	
	logfile = config.get("log", "logfile")
	loglevel = convertLoglevel(config.get("log", "loglevel"))
	if logfile == "-":
		logging.basicConfig(stream=sys.stderr, level=loglevel,
			format="%(levelname)s %(message)s")
	else:
		logging.basicConfig(filename=logfile, level=loglevel,
			format="%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]")
		
	autoDisable = False
	if config.has_option("rulesets", "auto_disable"):
		autoDisable = config.getboolean("rulesets", "auto_disable")
	# Test rules even if they have default_off=...
	includeDefaultOff = False
	if config.has_option("rulesets", "include_default_off"):
		includeDefaultOff = config.getboolean("rulesets", "include_default_off")
	ruledir = config.get("rulesets", "rulesdir")
	checkCoverage = False
	if config.has_option("rulesets", "check_coverage"):
		checkCoverage = config.getboolean("rulesets", "check_coverage")
	checkTargetValidity = False
	if config.has_option("rulesets", "check_target_validity"):
		checkTargetValidity = config.getboolean("rulesets", "check_target_validity")
	checkNonmatchGroups = False
	if config.has_option("rulesets", "check_nonmatch_groups"):
		checkNonmatchGroups = config.getboolean("rulesets", "check_nonmatch_groups")
	checkTestFormatting = False
	if config.has_option("rulesets", "check_test_formatting"):
		checkTestFormatting = config.getboolean("rulesets", "check_test_formatting")
	certdir = config.get("certificates", "basedir")
	if config.has_option("rulesets", "skiplist") and config.has_option("rulesets", "skipfield"):
		skiplist = config.get("rulesets", "skiplist")
		skipfield = config.get("rulesets", "skipfield")
		with open(skiplist) as f:
			f.readline()
			for line in f:
				splitLine = line.split(",")
				fileHash = splitLine[0]
				if splitLine[int(skipfield)] == "1":
					skipdict[binascii.unhexlify(fileHash)] = 1

	threadCount = config.getint("http", "threads")
	httpEnabled = True
	if config.has_option("http", "enabled"):
		httpEnabled = config.getboolean("http", "enabled")
	
	#get all platform dirs, make sure "default" is among them
	certdirFiles = glob.glob(os.path.join(certdir, "*"))
	havePlatforms = set([os.path.basename(fname) for fname in certdirFiles if os.path.isdir(fname)])
	logging.debug("Loaded certificate platforms: %s", ",".join(havePlatforms))
	if "default" not in havePlatforms:
		raise RuntimeError("Platform 'default' is missing from certificate directories")
	
	metricName = config.get("thresholds", "metric")
	thresholdDistance = config.getfloat("thresholds", "max_distance")
	metricClass = getMetricClass(metricName)
	metric = metricClass()
	
	# Debugging options, graphviz dump
	dumpGraphvizTrie = False
	if config.has_option("debug", "dump_graphviz_trie"):
		dumpGraphvizTrie = config.getboolean("debug", "dump_graphviz_trie")
	if dumpGraphvizTrie:
		graphvizFile = config.get("debug", "graphviz_file")
		exitAfterDump = config.getboolean("debug", "exit_after_dump")
	
	if args.rule_files:
		xmlFnames = args.rule_files
	else:
		xmlFnames = glob.glob(os.path.join(ruledir, "*.xml"))
	trie = RuleTrie()
	
	rulesets = []
	coverageProblemsExist = False
	targetValidityProblemExist = False
	nonmatchGroupProblemsExist = False
	testFormattingProblemsExist = False
	for xmlFname in xmlFnames:
		logging.debug("Parsing %s", xmlFname)
		if skipFile(xmlFname):
			logging.debug("Skipping rule file '%s', matches skiplist." % xmlFname)
			continue

		try:
			ruleset = Ruleset(etree.parse(file(xmlFname)).getroot(), xmlFname)
		except Exception, e:
			logging.error("Exception parsing %s: %s" % (xmlFname, e))
		if ruleset.defaultOff and not includeDefaultOff:
			logging.debug("Skipping rule '%s', reason: %s", ruleset.name, ruleset.defaultOff)
			continue
		# Check whether ruleset coverage by tests was sufficient.
		if checkCoverage:
			logging.debug("Checking coverage for '%s'." % ruleset.name)
			problems = ruleset.getCoverageProblems()
			for problem in problems:
				coverageProblemsExist = True
				logging.error(problem)
		if checkTargetValidity:
			logging.debug("Checking target validity for '%s'." % ruleset.name)
			problems = ruleset.getTargetValidityProblems()
			for problem in problems:
				targetValidityProblemExist = True
				logging.error(problem)
		if checkNonmatchGroups:
			logging.debug("Checking non-match groups for '%s'." % ruleset.name)
			problems = ruleset.getNonmatchGroupProblems()
			for problem in problems:
				nonmatchGroupProblemsExist = True
				logging.error(problem)
		if checkTestFormatting:
			logging.debug("Checking test formatting for '%s'." % ruleset.name)
			problems = ruleset.getTestFormattingProblems()
			for problem in problems:
				testFormattingProblemsExist = True
				logging.error(problem)
		trie.addRuleset(ruleset)
		rulesets.append(ruleset)
Exemplo n.º 3
0
def cli():
    parser = argparse.ArgumentParser(
        description='Check HTTPs rules for validity')
    parser.add_argument(
        'checker_config', help='an integer for the accumulator')
    parser.add_argument('rule_files', nargs="*", default=[],
                        help="Specific XML rule files")
    parser.add_argument('--json_file', default=None,
                        help='write results in json file')
    args = parser.parse_args()

    config = SafeConfigParser()
    config.read(args.checker_config)

    logfile = config.get("log", "logfile")
    loglevel = convertLoglevel(config.get("log", "loglevel"))
    if logfile == "-":
        logging.basicConfig(stream=sys.stderr, level=loglevel,
                            format="%(levelname)s %(message)s")
    else:
        logging.basicConfig(filename=logfile, level=loglevel,
                            format="%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]")

    autoDisable = False
    if config.has_option("rulesets", "auto_disable"):
        autoDisable = config.getboolean("rulesets", "auto_disable")
    # Test rules even if they have default_off=...
    includeDefaultOff = False
    if config.has_option("rulesets", "include_default_off"):
        includeDefaultOff = config.getboolean(
            "rulesets", "include_default_off")
    ruledir = config.get("rulesets", "rulesdir")
    checkCoverage = False
    if config.has_option("rulesets", "check_coverage"):
        checkCoverage = config.getboolean("rulesets", "check_coverage")
    checkTargetValidity = False
    if config.has_option("rulesets", "check_target_validity"):
        checkTargetValidity = config.getboolean(
            "rulesets", "check_target_validity")
    checkNonmatchGroups = False
    if config.has_option("rulesets", "check_nonmatch_groups"):
        checkNonmatchGroups = config.getboolean(
            "rulesets", "check_nonmatch_groups")
    checkTestFormatting = False
    if config.has_option("rulesets", "check_test_formatting"):
        checkTestFormatting = config.getboolean(
            "rulesets", "check_test_formatting")
    certdir = config.get("certificates", "basedir")
    if config.has_option("rulesets", "skiplist") and config.has_option("rulesets", "skipfield"):
        skiplist = config.get("rulesets", "skiplist")
        skipfield = config.get("rulesets", "skipfield")
        with open(skiplist) as f:
            f.readline()
            for line in f:
                splitLine = line.split(",")
                fileHash = splitLine[0]
                if splitLine[int(skipfield)] == "1":
                    skipdict[binascii.unhexlify(fileHash)] = 1

    threadCount = config.getint("http", "threads")
    httpEnabled = True
    if config.has_option("http", "enabled"):
        httpEnabled = config.getboolean("http", "enabled")

    metricName = config.get("thresholds", "metric")
    thresholdDistance = config.getfloat("thresholds", "max_distance")
    metricClass = getMetricClass(metricName)
    metric = metricClass()

    # Debugging options, graphviz dump
    dumpGraphvizTrie = False
    if config.has_option("debug", "dump_graphviz_trie"):
        dumpGraphvizTrie = config.getboolean("debug", "dump_graphviz_trie")
    if dumpGraphvizTrie:
        graphvizFile = config.get("debug", "graphviz_file")
        exitAfterDump = config.getboolean("debug", "exit_after_dump")

    if args.rule_files:
        xmlFnames = args.rule_files
    else:
        xmlFnames = glob.glob(os.path.join(ruledir, "*.xml"))
    trie = RuleTrie()

    rulesets = []
    coverageProblemsExist = False
    targetValidityProblemExist = False
    nonmatchGroupProblemsExist = False
    testFormattingProblemsExist = False
    for xmlFname in xmlFnames:
        logging.debug("Parsing {}".format(xmlFname))
        if skipFile(xmlFname):
            logging.debug(
                "Skipping rule file '{}', matches skiplist.".format(xmlFname))
            continue

        ruleset = Ruleset(etree.parse(open(xmlFname, "rb")).getroot(), xmlFname)
        if ruleset.defaultOff and not includeDefaultOff:
            logging.debug("Skipping rule '{}', reason: {}".format(
                          ruleset.name, ruleset.defaultOff))
            continue
        # Check whether ruleset coverage by tests was sufficient.
        if checkCoverage:
            logging.debug("Checking coverage for '{}'.".format(ruleset.name))
            problems = ruleset.getCoverageProblems()
            for problem in problems:
                coverageProblemsExist = True
                logging.error(problem)
        if checkTargetValidity:
            logging.debug("Checking target validity for '{}'.".format(ruleset.name))
            problems = ruleset.getTargetValidityProblems()
            for problem in problems:
                targetValidityProblemExist = True
                logging.error(problem)
        if checkNonmatchGroups:
            logging.debug("Checking non-match groups for '{}'.".format(ruleset.name))
            problems = ruleset.getNonmatchGroupProblems()
            for problem in problems:
                nonmatchGroupProblemsExist = True
                logging.error(problem)
        if checkTestFormatting:
            logging.debug("Checking test formatting for '{}'.".format(ruleset.name))
            problems = ruleset.getTestFormattingProblems()
            for problem in problems:
                testFormattingProblemsExist = True
                logging.error(problem)
        trie.addRuleset(ruleset)
        rulesets.append(ruleset)

    # Trie is built now, dump it if it's set in config
    if dumpGraphvizTrie:
        logging.debug("Dumping graphviz ruleset trie")
        graph = trie.generateGraphizGraph()
        if graphvizFile == "-":
            graph.dot()
        else:
            with open(graphvizFile, "w") as gvFd:
                graph.dot(gvFd)
        if exitAfterDump:
            sys.exit(0)
    fetchOptions = http_client.FetchOptions(config)
    fetchers = list()

    # Ensure "default" is in the platform dirs
    if not os.path.isdir(os.path.join(certdir, "default")):
        raise RuntimeError(
            "Platform 'default' is missing from certificate directories")

    platforms = http_client.CertificatePlatforms(
        os.path.join(certdir, "default"))
    fetchers.append(http_client.HTTPFetcher(
        "default", platforms, fetchOptions, trie))
    # fetches pages with unrewritten URLs
    fetcherPlain = http_client.HTTPFetcher("default", platforms, fetchOptions)

    urlList = []
    if config.has_option("http", "url_list"):
        with open(config.get("http", "url_list")) as urlFile:
            urlList = [line.rstrip() for line in urlFile.readlines()]

    if httpEnabled:
        taskQueue = queue.Queue(1000)
        resQueue = queue.Queue()
        startTime = time.time()
        testedUrlPairCount = 0
        config.getboolean("debug", "exit_after_dump")

        for i in range(threadCount):
            t = UrlComparisonThread(
                taskQueue, metric, thresholdDistance, autoDisable, resQueue)
            t.setDaemon(True)
            t.start()

        # set of main pages to test
        mainPages = set(urlList)
        # If list of URLs to test/scan was not defined, use the test URL extraction
        # methods built into the Ruleset implementation.
        if not urlList:
            for ruleset in rulesets:
                if ruleset.platform != "default" and os.path.isdir(os.path.join(certdir, ruleset.platform)):
                    theseFetchers = copy.deepcopy(fetchers)
                    platforms.addPlatform(ruleset.platform, os.path.join(certdir, ruleset.platform))
                    theseFetchers.append(http_client.HTTPFetcher(
                        ruleset.platform, platforms, fetchOptions, trie))
                else:
                    theseFetchers = fetchers
                testUrls = []
                for test in ruleset.tests:
                    if not ruleset.excludes(test.url):
                        testedUrlPairCount += 1
                        testUrls.append(test.url)
                    else:
                        # TODO: We should fetch the non-rewritten exclusion URLs to make
                        # sure they still exist.
                        logging.debug("Skipping excluded URL {}".format(test.url))
                task = ComparisonTask(testUrls, fetcherPlain, theseFetchers, ruleset)
                taskQueue.put(task)

        taskQueue.join()
        logging.info("Finished in {:.2f} seconds. Loaded rulesets: {}, URL pairs: {}.".format(
                     time.time() - startTime, len(xmlFnames), testedUrlPairCount))
        if args.json_file:
            json_output(resQueue, args.json_file, problems)
    if checkCoverage:
        if coverageProblemsExist:
            return 1  # exit with error code
    if checkTargetValidity:
        if targetValidityProblemExist:
            return 1  # exit with error code
    if checkNonmatchGroups:
        if nonmatchGroupProblemsExist:
            return 1  # exit with error code
    if checkTestFormatting:
        if testFormattingProblemsExist:
            return 1  # exit with error code
    return 0  # exit with success