def ignore_changing_fields(clues): """Tries to detect and ignore MIME fields with ever changing content. Some servers might include fields varying with time, randomly, etc. Those fields are likely to alter the clue's digest and interfer with L{analyze}, producing many false positives and making the scan useless. This function detects those fields and recalculates each clue's digest so they can be safely analyzed again. @param clues: Sequence of clues. @type clues: C{list} or C{tuple} """ from Halberd.clues.Clue import Clue different = diff_fields(clues) # First alter Clue to be able to cope with the varying fields. ignored = [] for field in different: method = "_get_" + Clue.normalize(field) if not hasattr(Clue, method): logger.debug("ignoring %s", field) ignored.append(method) setattr(Clue, method, lambda s, f: None) for clue in clues: Clue.parse(clue, clue.headers) for method in ignored: # We want to leave the Clue class as before because a MIME field # causing trouble for the current scan might be the source of precious # information for another scan. delattr(Clue, method) return clues
def ignore_changing_fields(clues): """Tries to detect and ignore MIME fields with ever changing content. Some servers might include fields varying with time, randomly, etc. Those fields are likely to alter the clue's digest and interfer with L{analyze}, producing many false positives and making the scan useless. This function detects those fields and recalculates each clue's digest so they can be safely analyzed again. @param clues: Sequence of clues. @type clues: C{list} or C{tuple} """ from Halberd.clues.Clue import Clue different = diff_fields(clues) # First alter Clue to be able to cope with the varying fields. ignored = [] for field in different: method = '_get_' + Clue.normalize(field) if not hasattr(Clue, method): logger.debug('ignoring %s', field) ignored.append(method) setattr(Clue, method, lambda s, f: None) for clue in clues: Clue.parse(clue, clue.headers) for method in ignored: # We want to leave the Clue class as before because a MIME field # causing trouble for the current scan might be the source of precious # information for another scan. delattr(Clue, method) return clues
def reanalyze(clues, analyzed, threshold): """Identify and ignore changing header fields. After initial analysis one must check that there aren't as many realservers as obtained clues. If there were it could be a sign of something wrong happening: each clue is different from the others due to one or more MIME header fields which change unexpectedly. @param clues: Raw sequence of clues. @type clues: C{list} @param analyzed: Result from the first analysis phase. @type analyzed: C{list} @param threshold: Minimum clue-to-realserver ratio in order to trigger field inspection. @type threshold: C{float} """ def ratio(): return len(analyzed) / float(len(clues)) assert len(clues) > 0 r = ratio() if r >= threshold: logger.debug("clue-to-realserver ratio is high (%.3f)", r) logger.debug("reanalyzing clues...") ignore_changing_fields(clues) analyzed = analyze(clues) logger.debug("clue reanalysis done.") # Check again to see if we solved the problem but only warn the user if # there's a significant amount of evidence. if ratio() >= threshold and len(clues) > 10: logger.warn( """The following results might be incorrect. It could be because the remote host keeps changing its server version string or because halberd didn't have enough samples.""" ) return analyzed
def reanalyze(clues, analyzed, threshold): """Identify and ignore changing header fields. After initial analysis one must check that there aren't as many realservers as obtained clues. If there were it could be a sign of something wrong happening: each clue is different from the others due to one or more MIME header fields which change unexpectedly. @param clues: Raw sequence of clues. @type clues: C{list} @param analyzed: Result from the first analysis phase. @type analyzed: C{list} @param threshold: Minimum clue-to-realserver ratio in order to trigger field inspection. @type threshold: C{float} """ def ratio(): return len(analyzed) / float(len(clues)) assert len(clues) > 0 r = ratio() if r >= threshold: logger.debug('clue-to-realserver ratio is high (%.3f)', r) logger.debug('reanalyzing clues...') ignore_changing_fields(clues) analyzed = analyze(clues) logger.debug('clue reanalysis done.') # Check again to see if we solved the problem but only warn the user if # there's a significant amount of evidence. if ratio() >= threshold and len(clues) > 10: logger.warn( '''The following results might be incorrect. It could be because the remote host keeps changing its server version string or because halberd didn't have enough samples.''') return analyzed