def add_be_data(stats): """ Gather the stats bulk_extractor is responsible for and place them into the stats dict. """ scanners = ["email", "accts", "exif", "zip", "gzip", "rar"] # "bulk", bulkextractor.soft_init(scanners) be = bulkextractor.Session() be.analyze_device("/dev/sda", 0.01, 65535) be.finalize() histograms = be.histograms() add_freq_uniq(stats, "Email", histograms["email_histogram"]) add_freq_uniq(stats, "Edom", histograms["email_domain_histogram"]) add_freq_uniq(stats, "CCN", histograms["ccn_histogram"]) # add_freq_uniq(stats, "SSN", histograms["ccn_histogram"]) add_freq_uniq(stats, "URL", histograms["url_histogram"])
def add_be_data(config, stats, heartbeat): """ Gather the stats bulk_extractor is responsible for and place them into the stats dict. Occasionally call a given no-arg heartbeat function to indicate the process is not frozen. config values: bulk.scanners: bulk_extractor scanners to enable bulk.disks: raw disk paths to run bulk_extractor over bulk.sample_rate: fraction of disk to analyze bulk.sample_size: individual sample size """ c = config bulkextractor.soft_init(c["bulk.scanners"]) be = bulkextractor.Session(heartbeat_callback=heartbeat) disks = c["bulk.disks"] c.debug_print("running bulk_extractor on:", ",".join(disks)) for disk in disks: be.analyze_device(disk, c["bulk.sample_rate"], c["bulk.sample_size"]) be.finalize() histograms = be.histograms() heartbeat() for (prefix,hname) in [['Email','email_histogram'], ['Edom','email_domain_histogram'], ['CCN','ccn_histogram'], ['URL','url_histogram']]: if hname in histograms: add_freq_uniq(stats, prefix, histograms) add_freq_uniq(stats, "Email", histograms["email_histogram"]) add_freq_uniq(stats, "Edom", histograms["email_domain_histogram"]) add_freq_uniq(stats, "CCN", histograms["ccn_histogram"]) add_freq_uniq(stats, "URL", histograms["url_histogram"]) #add_freq_uniq(stats, "SSN", histograms["ccn_histogram"]) ##add_freq_uniq(stats, "Udom", histograms["url_domain_histogram"]) heartbeat()
def add_be_data(config, stats, heartbeat): """ Gather the stats bulk_extractor is responsible for and place them into the stats dict. Occasionally call a given no-arg heartbeat function to indicate the process is not frozen. config values: bulk.scanners: bulk_extractor scanners to enable bulk.disks: raw disk paths to run bulk_extractor over bulk.sample_rate: fraction of disk to analyze bulk.sample_size: individual sample size """ c = config bulkextractor.soft_init(c["bulk.scanners"]) be = bulkextractor.Session(heartbeat_callback=heartbeat) disks = c["bulk.disks"] c.debug_print("running bulk_extractor on:", ",".join(disks)) for disk in disks: be.analyze_device(disk, c["bulk.sample_rate"], c["bulk.sample_size"]) be.finalize() histograms = be.histograms() heartbeat() for (prefix, hname) in [['Email', 'email_histogram'], ['Edom', 'email_domain_histogram'], ['CCN', 'ccn_histogram'], ['URL', 'url_histogram']]: if hname in histograms: add_freq_uniq(stats, prefix, histograms) #add_freq_uniq(stats, "Email", histograms["email_histogram"]) #add_freq_uniq(stats, "Edom", histograms["email_domain_histogram"]) #add_freq_uniq(stats, "CCN", histograms["ccn_histogram"]) ##add_freq_uniq(stats, "SSN", histograms["ccn_histogram"]) #add_freq_uniq(stats, "URL", histograms["url_histogram"]) ##add_freq_uniq(stats, "Udom", histograms["url_domain_histogram"]) heartbeat()
def add_be_data(stats, heartbeat): """ Gather the stats bulk_extractor is responsible for and place them into the stats dict. Occasionally call a given no-arg heartbeat function to indicate the process is not frozen. """ scanners = ["email", "accts", "exif", "zip", "gzip", "rar"] # "bulk", bulkextractor.soft_init(scanners) be = bulkextractor.Session(heartbeat_callback=heartbeat) be.analyze_device("/dev/sda", 0.01, 65535) be.finalize() histograms = be.histograms() heartbeat() add_freq_uniq(stats, "Email", histograms["email_histogram"]) add_freq_uniq(stats, "Edom", histograms["email_domain_histogram"]) add_freq_uniq(stats, "CCN", histograms["ccn_histogram"]) # add_freq_uniq(stats, "SSN", histograms["ccn_histogram"]) add_freq_uniq(stats, "URL", histograms["url_histogram"]) # add_freq_uniq(stats, "Udom", histograms["url_domain_histogram"]) heartbeat()
def add_be_data(stats, heartbeat): """ Gather the stats bulk_extractor is responsible for and place them into the stats dict. Occasionally call a given no-arg heartbeat function to indicate the process is not frozen. """ scanners = [ "email", "accts", "exif", "zip", "gzip", "rar", ] # "bulk", bulkextractor.soft_init(scanners) be = bulkextractor.Session(heartbeat_callback=heartbeat) be.analyze_device("/dev/sda", 0.01, 65535) be.finalize() histograms = be.histograms() heartbeat() add_freq_uniq(stats, "Email", histograms["email_histogram"]) add_freq_uniq(stats, "Edom", histograms["email_domain_histogram"]) add_freq_uniq(stats, "CCN", histograms["ccn_histogram"]) #add_freq_uniq(stats, "SSN", histograms["ccn_histogram"]) add_freq_uniq(stats, "URL", histograms["url_histogram"]) #add_freq_uniq(stats, "Udom", histograms["url_domain_histogram"]) heartbeat()
# handle is opaque to python module for simplicity and loose coupling BeHandle = c_void_p BeCallback = CFUNCTYPE( c_int, # return type c_void_p, # arbitrary user data c_uint32, # callback type code c_uint64, # multi-use callback argument c_char_p, # feature recorder name or other c_char_p, # feature forensic path c_char_p, # feature data c_size_t, # feature length c_char_p, # feature context data c_size_t, # context length ) if __name__=="__main__": print("Program to demonstrate the python module") import bulkextractor bulkextractor.lib_init('libbulkextractor.so') bulkextractor.soft_init(['email','accts']) be = bulkextractor.Session() be.analyze_buffer(b" [email protected] 617-555-1212 ") be.finalize() histograms = be.histograms() print(histograms)
DISABLE_ALL = 7 FEATURE_LIST = 8 SCANNER_LIST = 9 # handle is opaque to python module for simplicity and loose coupling BeHandle = c_void_p BeCallback = CFUNCTYPE( c_int, # return type c_void_p, # arbitrary user data c_uint32, # callback type code c_uint64, # multi-use callback argument c_char_p, # feature recorder name or other c_char_p, # feature forensic path c_char_p, # feature data c_size_t, # feature length c_char_p, # feature context data c_size_t, # context length ) if __name__ == "__main__": print("Program to demonstrate the python module") import bulkextractor bulkextractor.lib_init('libbulkextractor.so') bulkextractor.soft_init(['email', 'accts']) be = bulkextractor.Session() be.analyze_buffer(b" [email protected] 617-555-1212 ") be.finalize() histograms = be.histograms() print(histograms)