def create_neo4j_container(container_name, volume_home=VOLUME_HOME): if not os.path.exists(volume_home): os.makedirs(volume_home) # Should we also add the -v {1}/neo4j/conf:/conf \ option to the below command for the neo4j conf files? # See: https://neo4j.com/labs/apoc/4.2/installation/#restricted # and https://github.com/neo4j-contrib/neo4j-apoc-procedures/issues/451 # Should I add the option below too? # -e NEO4J_dbms_security_procedures_whitelist=apoc.coll.\\\*,apoc.load.\\\* \ command = """docker run \ --name {0} \ -p7474:7474 -p7687:7687 \ -d \ -v {1}/neo4j/data:/data \ -v {1}/neo4j/logs:/logs \ -v {4}:/var/lib/neo4j/import \ -v {1}/neo4j/plugins:/plugins \ -e NEO4J_apoc_export_file_enabled=true \ -e NEO4J_apoc_import_file_enabled=true \ -e NEO4J_apoc_import_file_use__neo4j__config=true \ -e NEO4JLABS_PLUGINS='["apoc"]' \ -e NEO4J_dbms_security_procedures_unrestricted=apoc.\\\* \ -e PYTHONUNBUFFERED=1 \ --env NEO4J_AUTH={2}/{3} \ --user="******" \ neo4j:4.2.3 """.format(container_name, volume_home, constants.NEO4J_USER, constants.NEO4J_PASS, constants.OUTPUT_NODES_RELS_PATH) # Note: pass the analyzer outputs folder as the import directory of neo4j utilityModule.run_os_command(command, print_stdout=False) logger.info('Docker container %s is starting.' % str(container_name))
def main(): parser = argparse.ArgumentParser(description='This script builds a property graph from a given JavaScript program.') parser.add_argument('path', metavar='P', help='base path to the folder containing the program files for analysis (must be under the outputs folder).') parser.add_argument('--js', help='name of the JavaScript program for analysis (default: js_program.js)', default='js_program.js') parser.add_argument('--import', help='whether the constructed property graph should be imported to an active neo4j database (default: true)', default='true') parser.add_argument('--hybrid', help='whether the hybrid mode is enabled (default: false)', default='false') parser.add_argument('--reqs', help='for hybrid mode only, name of the file containing the sequence of obsevered network requests, pass the string false to exclude (default: request_logs_short.out)', default='request_logs_short.out') parser.add_argument('--evts', help='for hybrid mode only, name of the file containing the sequence of fired events, pass the string false to exclude (default: events.out)', default='events.out') parser.add_argument('--cookies', help='for hybrid mode only, name of the file containing the cookies, pass the string false to exclude (default: cookies.pkl)', default='cookies.pkl') parser.add_argument('--html', help='for hybrid mode only, name of the file containing the DOM tree snapshot, pass the string false to exclude (default: html_rendered.html)', default='html_rendered.html') # dictionary of the provided arguments args = vars(parser.parse_args()) base_path = args['path'] if constantsModule.OUTPUT_NODES_RELS_PATH not in base_path: logger.error('Path of the program under analysis must be within the outputs folder.') sys.exit(1) js_program = os.path.join(args['path'], args['js']) # find the folder name of the program under analysis within the /outputs folder relative_output_path = args['path'] i = relative_output_path.index('/outputs/')+len('/outputs/') relative_output_path = relative_output_path[i:] # build the property graph for the js program command = "node --max-old-space-size=32000 %s -js %s -o %s"%(constantsModule.ANALYZER_DRIVER_PATH, js_program, relative_output_path) utilityModule.run_os_command(command, timeout=15*60) # store also the dynamic info inside the csv if args['hybrid'] == 'true': if args['reqs'] != 'false': StateValues.add_requests_to_graph(base_path, args['reqs']) if args['evts'] != 'false': StateValues.add_events_to_graph(base_path, args['evts']) if args['cookies'] != 'false': StateValues.add_cookies_to_graph(base_path, args['cookies']) if args['html'] != 'false': StateValues.add_dom_tree_snapshot_to_graph(base_path, args['html']) # import the constructed csv into an active neo4j database if args['import'] == 'true': API_neo4j_prepare(base_path)
def API_build_property_graph_for_file(file_absolute_path, file_name, timeout=30 * 60): """ builds a property graph database for a given file """ file_absolute_path_name = os.path.join(file_absolute_path, file_name) if 'hpg_construction/outputs' in file_absolute_path: e_index = file_absolute_path.index('hpg_construction/outputs') + len( 'hpg_construction/outputs') + 1 output_folder = file_absolute_path[e_index:] output_folder = os.path.join(output_folder, file_name.rstrip('.js')) elif 'hpg_construction/unit_tests' in file_absolute_path: e_index = file_absolute_path.index( 'hpg_construction/unit_tests') + len( 'hpg_construction/unit_tests') + 1 output_folder = os.path.join("unit_tests", file_absolute_path[e_index:]) output_folder = os.path.join(output_folder, file_name.rstrip('.js')) else: logger.error( 'input file for graph construction must be under the hpg_construction/outputs/ folder!' ) sys.exit(1) command = "node --max-old-space-size=32000 %s -js %s -o %s" % ( constantsModule.ANALYZER_DRIVER_PATH, file_absolute_path_name, output_folder) output = run_os_command(command, timeout=timeout) return output
def _analyze(): """ builds the web property graph """ library_name_with_extension = utilityModule.get_directory_last_part( file_path) remove_str = constantsModule.OUTPUT_NODES_RELS_PATH + '/' relative_output_directory = utilityModule.remove_part_from_str( file_path, remove_str) relative_output_directory = utilityModule.get_directory_without_last_part( relative_output_directory) command = "node %s -js %s -o %s" % ( constantsModule.ANALYZER_DRIVER_PATH, file_path, relative_output_directory) utilityModule.run_os_command(command)
def instrument_for_dynamic_js_constructs( file_path_name, create_new_file=False, new_file_name=constantsModule.NAME_JS_PROGRAM_INSTRUMENTED): """ @param {string} file_path_name: input system path to a JS code @param {bool} create_new_file: if set to False, overwrites the input JS file with the normalized version, otherwise, it creates a new file with the name set in the new_file_name field @param {string} new_file_name @description: beautifies a given JS program @return None """ if create_new_file: output = utilityModule.get_directory_without_last_part( file_path_name) + new_file_name else: output = file_path_name driver_path = os.path.join( constantsModule.BASE_DIR, 'hpg_construction/lib/jaw/normalization/dynamic.js') cmd = 'node %s %s %s' % (driver_path, file_path_name, output) utilityModule.run_os_command(cmd)
def import_data_inside_container(container_name, database_name, relative_import_path, mode='graphML'): """ @param {string} container_name @param {string} database_name @param {string} relative_import_path: path relative to ./hpg_construction/outputs/ in case of CSV: path of the folder containing nodes.csv, rels.csv in case of graphML: path of the graphML file @param {string} mode: type of input (options are 'CSV' or 'graphML') """ if mode == 'CSV': csv_path = os.path.join('/var/lib/neo4j/import', relative_import_path) nodes_path = os.path.join(csv_path, constants.NODE_INPUT_FILE_NAME) rels_path = os.path.join(csv_path, constants.RELS_INPUT_FILE_NAME) # see: https://neo4j.com/docs/operations-manual/current/tools/neo4j-admin-import/#import-tool-option-skip-duplicate-nodes if constants.NEO4J_VERSION.startswith(constants.NEOJ_VERSION_4X): neo4j_import_cmd = "neo4j-admin import --database=%s --nodes=%s --relationships=%s --delimiter='¿' --skip-bad-relationships=true --skip-duplicate-nodes=true" % ( database_name, nodes_path, rels_path) else: neo4j_import_cmd = "neo4j-admin import --mode=csv --database=%s --nodes=%s --relationships=%s --delimiter='¿' --skip-bad-relationships=true --skip-duplicate-nodes=true" % ( database_name, nodes_path, rels_path) # directly run the command inside the neo4j container with docker exec cmd = "docker exec -it %s %s" % (container_name, neo4j_import_cmd) utilityModule.run_os_command(cmd, print_stdout=True, prettify=True) return 1 elif mode == 'graphML': return DU.exec_fn_within_transaction( import_data_inside_container_with_cypher, database_name, relative_import_path)
def activate_existing_neo4j_db(database_name): """ activates a neo4j database that has been previously imported @param {string} database_name which has the .db extension included @return {bool} whether or not the given database name is activated """ # stop neo4j STOP_NEO4J_COMMAND = "neo4j stop" run_os_command(STOP_NEO4J_COMMAND) db_absolute_path = os.path.join(constantsModule.NEO4J_DB_PATH, database_name) if not os.path.exists(db_absolute_path): if constantsModule.DEBUG_PRINTS: logger.warning( "No neo4j database with name \'%s\' exists for activation!" % database_name) return False else: # change the active db new_config_line = "dbms.active_database=%s" % database_name if constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[ 'MAC_OS_X']: CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i '' 's/dbms.active_database=.*db/%s/1' %s""" % ( new_config_line, constantsModule.NEO4J_CONF) elif constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[ 'Linux']: CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i 's/dbms.active_database=.*db/%s/1' %s""" % ( new_config_line, constantsModule.NEO4J_CONF) else: logger.error( 'Detected unsupported platform. Check your enviornment variables (.env) file if your platform is supported but it is set wrong.' ) return False run_os_command(CHANGE_ACTIVE_GRAPH_COMMAND, print_stdout=False) # start neo4j START_NEO4J_COMMAND = "neo4j start" run_os_command(START_NEO4J_COMMAND) if constantsModule.DEBUG_PRINTS: logger.info("Neo4J DB setup successful.") time.sleep(3) return True
def API_neo4j_prepare(csv_absolute_path, nodes_name=constantsModule.NODE_INPUT_FILE_NAME, relationships_name=constantsModule.RELS_INPUT_FILE_NAME, load_dom_tree_if_exists=True): """ @param {string} csv_absolute_path: absolute path to the graph node and relation csv files @param {string} nodes_name: the name of the CSV file for nodes @param {string} relationships_name: the name of the CSV file for edges or relationships @param {boolean} load_dom_tree_if_exists: if set, the function loads the HTML code of the dom snapshot to the property graph in addition to the html path @description imports graph csv files, and prepares the db @return {void} None """ folder_name_as_db_name = _get_last_subpath(csv_absolute_path) nodes_path = os.path.join(csv_absolute_path, nodes_name) rels_path = os.path.join(csv_absolute_path, relationships_name) if (not os.path.isfile(nodes_path)) or (not os.path.isfile(rels_path)): logger.error( 'CSV nodes or relationships file does not exist for importing in the given directory: %s' % csv_absolute_path) sys.exit(1) # stop neo4j STOP_NEO4J_COMMAND = "neo4j stop" run_os_command(STOP_NEO4J_COMMAND) # handle the case where the db for the url already exists: skip creating a new one by storing a map or delete the old one! DELETE_OLD_DB_IF_EXISTS_COMMAND = "rm -rf %s.db" % (os.path.join( constantsModule.NEO4J_DB_PATH, folder_name_as_db_name)) run_os_command(DELETE_OLD_DB_IF_EXISTS_COMMAND) # import the data NEO4J_IMPORT_COMMAND = """neo4j-admin import --mode=csv --database=%s.db --nodes=%s --relationships=%s --delimiter='¿'""" % ( folder_name_as_db_name, nodes_path, rels_path) run_os_command(NEO4J_IMPORT_COMMAND) # change the active db new_config_line = "dbms.active_database=%s.db" % folder_name_as_db_name if constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[ 'MAC_OS_X']: CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i '' 's/dbms.active_database=.*db/%s/1' %s""" % ( new_config_line, constantsModule.NEO4J_CONF) elif constantsModule.CURRENT_PLATFORM == constantsModule.PLATFORMS[ 'Linux']: CHANGE_ACTIVE_GRAPH_COMMAND = """sed -i 's/dbms.active_database=.*db/%s/1' %s""" % ( new_config_line, constantsModule.NEO4J_CONF) else: logger.error( 'Detected unsupported platform. Check your enviornment variables (.env) file if your platform is supported but it is set wrong.' ) sys.exit(1) run_os_command(CHANGE_ACTIVE_GRAPH_COMMAND, print_stdout=False) # start neo4j START_NEO4J_COMMAND = "neo4j start" run_os_command(START_NEO4J_COMMAND) if constantsModule.DEBUG_PRINTS: logger.info("Neo4J DB setup in progress. Waiting for 10 seconds.") time.sleep(10) if load_dom_tree_if_exists: dom_tree_nodes = ORMModule.DOMSnapshot.nodes.all() for node in dom_tree_nodes: fd = open(node.Location, 'r') html = fd.read() fd.close() node.Code = html node.save()
def stop_neo4j_container(container_name): command = "docker stop %s" % str(container_name) utilityModule.run_os_command(command, print_stdout=False) logger.warning('Docker container %s is being stopped.' % str(container_name))
def start_neo4j_container(container_name): command = "docker start %s" % str(container_name) utilityModule.run_os_command(command, print_stdout=False) logger.info('Docker container %s is starting.' % str(container_name))