def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_db(tree.target_folder) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute(""" SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def _build_html_for_file_ids(tree, start, end): """Write HTML files for file IDs from ``start`` to ``end``. Return None if all goes well, a tuple of (stringified exception, exc type, exc value, file ID, file path) if something goes wrong while htmlifying a file. This is the top-level function of an HTML worker process. Log progress to a file named "build-html-<start>-<end>.log". """ path = '(no file yet)' id = -1 try: # We might as well have this write its log directly rather than returning # them to the master process, since it's already writing the built HTML # directly, since that probably yields better parallelism. conn = connect_db(tree.target_folder) # TODO: Replace this ad hoc logging with the logging module (or something # more humane) so we can get some automatic timestamps. If we get # timestamps spit out in the parent process, we don't need any of the # timing or counting code here. with open_log(tree, 'build-html-%s-%s.log' % (start, end)) as log: # Load htmlifier plugins: plugins = load_htmlifiers(tree) for plugin in plugins: plugin.load(tree, conn) start_time = datetime.now() # Fetch and htmlify each document: for num_files, (id, path, icon, text) in enumerate( conn.execute( """ SELECT files.id, path, icon, trg_index.text FROM trg_index, files WHERE trg_index.id = files.id AND trg_index.id >= ? AND trg_index.id <= ? """, [start, end]), 1): dst_path = os.path.join(tree.target_folder, path + '.html') log.write('Starting %s.\n' % path) htmlify(tree, conn, icon, path, text, dst_path, plugins) conn.commit() conn.close() # Write time information: time = datetime.now() - start_time log.write('Finished %s files in %s.\n' % (num_files, time)) except Exception as exc: type, value, traceback = exc_info() return format_exc(), type, value, id, path
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False): """Build a DXR instance. :arg config_path: The path to a config file :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults to whatever the config file says. :arg tree: A single tree to build. Defaults to all the trees in the config file. """ # Load configuration file # (this will abort on inconsistencies) overrides = {} if nb_jobs: # TODO: Remove this brain-dead cast when we get the types right in the # Config object: overrides['nb_jobs'] = str(nb_jobs) config = Config(config_path, **overrides) skip_indexing = 'index' in config.skip_stages # Find trees to make, fail if requested tree isn't available if tree: trees = [t for t in config.trees if t.name == tree] if len(trees) == 0: print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree sys.exit(1) else: # Build everything if no tree is provided trees = config.trees # Create config.target_folder (if not exists) print "Generating target folder" ensure_folder(config.target_folder, False) ensure_folder(config.temp_folder, not skip_indexing) ensure_folder(config.log_folder, not skip_indexing) jinja_env = load_template_env(config.temp_folder, config.dxrroot) # We don't want to load config file on the server, so we just write all the # setting into the config.py script, simple as that. _fill_and_write_template( jinja_env, 'config.py.jinja', os.path.join(config.target_folder, 'config.py'), dict(trees=repr(OrderedDict((t.name, t.description) for t in config.trees)), wwwroot=repr(config.wwwroot), generated_date=repr(config.generated_date), directory_index=repr(config.directory_index), default_tree=repr(config.default_tree), filter_language=repr(config.filter_language))) # Create jinja cache folder in target folder ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache')) # TODO Make open-search.xml things (or make the server so it can do them!) # Build trees requested ensure_folder(os.path.join(config.target_folder, 'trees')) for tree in trees: # Note starting time start_time = datetime.now() # Create folders (delete if exists) ensure_folder(tree.target_folder, not skip_indexing) # <config.target_folder>/<tree.name> ensure_folder(tree.object_folder, # Object folder (user defined!) tree.source_folder != tree.object_folder) # Only clean if not the srcdir ensure_folder(tree.temp_folder, not skip_indexing) # <config.temp_folder>/<tree.name> # (or user defined) ensure_folder(tree.log_folder, not skip_indexing) # <config.log_folder>/<tree.name> # (or user defined) # Temporary folders for plugins ensure_folder(os.path.join(tree.temp_folder, 'plugins'), not skip_indexing) for plugin in tree.enabled_plugins: # <tree.config>/plugins/<plugin> ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), not skip_indexing) # Connect to database (exits on failure: sqlite_version, tokenizer, etc) conn = connect_db(tree.target_folder) if skip_indexing: print " - Skipping indexing (due to 'index' in 'skip_stages')" else: # Create database tables create_tables(tree, conn) # Index all source files (for full text search) # Also build all folder listing while we're at it index_files(tree, conn) # Build tree build_tree(tree, conn, verbose) # Optimize and run integrity check on database finalize_database(conn) # Commit database conn.commit() if 'html' in config.skip_stages: print " - Skipping htmlifying (due to 'html' in 'skip_stages')" else: print "Building HTML for the '%s' tree." % tree.name max_file_id = conn.execute("SELECT max(files.id) FROM files").fetchone()[0] if config.disable_workers: print " - Worker pool disabled (due to 'disable_workers')" _build_html_for_file_ids(tree, 0, max_file_id) else: run_html_workers(tree, config, max_file_id) # Close connection conn.commit() conn.close() # Save the tree finish time delta = datetime.now() - start_time print "(finished building '%s' in %s)" % (tree.name, delta)
def direct_result_eq(self, query_text, line_num): conn = connect_db(os.path.join(self._config_dir_path, 'target', 'trees', 'code')) if line_num is None: eq_(Query(conn, query_text).direct_result(), None) else: eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
def build_instance(config_path, nb_jobs=None, tree=None, verbose=False): """Build a DXR instance. :arg config_path: The path to a config file :arg nb_jobs: The number of parallel jobs to pass into ``make``. Defaults to whatever the config file says. :arg tree: A single tree to build. Defaults to all the trees in the config file. """ # Load configuration file # (this will abort on inconsistencies) overrides = {} if nb_jobs: # TODO: Remove this brain-dead cast when we get the types right in the # Config object: overrides['nb_jobs'] = str(nb_jobs) config = Config(config_path, **overrides) skip_indexing = 'index' in config.skip_stages # Find trees to make, fail if requested tree isn't available if tree: trees = [t for t in config.trees if t.name == tree] if len(trees) == 0: print >> sys.stderr, "Tree '%s' is not defined in config file!" % tree sys.exit(1) else: # Build everything if no tree is provided trees = config.trees # Create config.target_folder (if not exists) print "Generating target folder" ensure_folder(config.target_folder, False) ensure_folder(config.temp_folder, not skip_indexing) ensure_folder(config.log_folder, not skip_indexing) jinja_env = load_template_env(config.temp_folder, config.dxrroot) # We don't want to load config file on the server, so we just write all the # setting into the config.py script, simple as that. _fill_and_write_template( jinja_env, 'config.py.jinja', os.path.join(config.target_folder, 'config.py'), dict(trees=repr( OrderedDict((t.name, t.description) for t in config.trees)), wwwroot=repr(config.wwwroot), generated_date=repr(config.generated_date), directory_index=repr(config.directory_index), default_tree=repr(config.default_tree), filter_language=repr(config.filter_language))) # Create jinja cache folder in target folder ensure_folder(os.path.join(config.target_folder, 'jinja_dxr_cache')) # TODO Make open-search.xml things (or make the server so it can do them!) # Build trees requested ensure_folder(os.path.join(config.target_folder, 'trees')) for tree in trees: # Note starting time start_time = datetime.now() # Create folders (delete if exists) ensure_folder(tree.target_folder, not skip_indexing) # <config.target_folder>/<tree.name> ensure_folder( tree.object_folder, # Object folder (user defined!) tree.source_folder != tree.object_folder) # Only clean if not the srcdir ensure_folder(tree.temp_folder, not skip_indexing) # <config.temp_folder>/<tree.name> # (or user defined) ensure_folder(tree.log_folder, not skip_indexing) # <config.log_folder>/<tree.name> # (or user defined) # Temporary folders for plugins ensure_folder(os.path.join(tree.temp_folder, 'plugins'), not skip_indexing) for plugin in tree.enabled_plugins: # <tree.config>/plugins/<plugin> ensure_folder(os.path.join(tree.temp_folder, 'plugins', plugin), not skip_indexing) # Connect to database (exits on failure: sqlite_version, tokenizer, etc) conn = connect_db(tree.target_folder) if skip_indexing: print " - Skipping indexing (due to 'index' in 'skip_stages')" else: # Create database tables create_tables(tree, conn) # Index all source files (for full text search) # Also build all folder listing while we're at it index_files(tree, conn) # Build tree build_tree(tree, conn, verbose) # Optimize and run integrity check on database finalize_database(conn) # Commit database conn.commit() if 'html' in config.skip_stages: print " - Skipping htmlifying (due to 'html' in 'skip_stages')" else: print "Building HTML for the '%s' tree." % tree.name max_file_id = conn.execute( "SELECT max(files.id) FROM files").fetchone()[0] if config.disable_workers: print " - Worker pool disabled (due to 'disable_workers')" _build_html_for_file_ids(tree, 0, max_file_id) else: run_html_workers(tree, config, max_file_id) # Close connection conn.commit() conn.close() # Save the tree finish time delta = datetime.now() - start_time print "(finished building '%s' in %s)" % (tree.name, delta)
def main(): debugfile_path = os.path.abspath('/tmp/dxr-ctags.out') debugfile = open(debugfile_path, 'w') debugfile.write(string.join(sys.argv) + "\n") debugfile.write(os.path.abspath(os.path.curdir)) dxr_tree = find_dxr_tree() if dxr_tree is None: return 1 conn = connect_db(dxr_tree.target_folder) clear_tags_file() query_functions = { 'defs' : query_for_defs, 'decls' : query_for_decls, 'refs' : query_for_refs, 'files' : query_for_files } parser = ArgumentParser(description='Parse command-line arguments for dxrtags') parser.add_argument('-t', '--token', help='The token to search for', required=True) parser.add_argument('-q', '--query_type', choices=query_functions.keys(), help='The type of query to perform', required=True) parser.add_argument('-f', '--from_file', help='The file the token was discovered in') parser.add_argument('-l', '--from_line', type=int, help='The line the token was discovered on') parser.add_argument('-w', '--wiggle_room', type=int, default=0, help='Wiggle room for line number') args = parser.parse_args() from_line_start = args.from_line; from_line_end = args.from_line; if args.wiggle_room is not None and args.from_line is not None: from_line_start -= args.wiggle_room from_line_end += args.wiggle_room # Trim off leading path file_from_here = None leading_path = args.from_file trailing_path = None # Example: # leading_path = /tmp/snapshot.9p-8uq348ihj9d289/directory-in-source-tree/file_we_are_interested_in.c while leading_path is not None and not is_root(leading_path) and leading_path is not "": print(leading_path) next_trailing_path = None (leading_path, next_trailing_path) = os.path.split(leading_path) print(leading_path) print(next_trailing_path) # iter 0 (/tmp/snapshot.9p-8uq348ihj9d289/directory-in-source-tree, file_we_are_interested_in.c) # iter 1 (/tmp/snapshot.9p-8uq348ihj9d289, directory-in-source-tree) if trailing_path is None: trailing_path = next_trailing_path else: trailing_path = os.path.join(next_trailing_path, trailing_path) print("Does " + trailing_path + " exist?") # iter 0 file_we_are_interested_in.c # iter 1 directory-in-source-tree/file_we_are_interested_in.c if os.path.exists(trailing_path): # iter 0 False # iter 1 True file_from_here = trailing_path # Keep iterating, since we might have the same filename in multiple # places, so we want to keep an eye out for better matches (ie; # has a longer matching path prefix). if file_from_here is not None: print("Using " + file_from_here) query_functions[args.query_type](conn, args.token, file_from_here, from_line_start, from_line_end); return 0
def direct_result_eq(self, query_text, line_num): conn = connect_db(os.path.join(self._config_dir_path, 'target', 'trees', 'code')) eq_(Query(conn, query_text).direct_result(), ('main.cpp', line_num))
def search(tree): """Search by regex, caller, superclass, or whatever.""" # TODO: This function still does too much. querystring = request.values offset = non_negative_int(querystring.get('offset'), 0) limit = min(non_negative_int(querystring.get('limit'), 100), 1000) config = current_app.config www_root = config['WWW_ROOT'] trees = config['TREES'] # Arguments for the template: arguments = { # Common template variables 'wwwroot': www_root, 'generated_date': config['GENERATED_DATE']} error = warning = '' status_code = None if tree in trees: arguments['tree'] = tree # Connect to database try: conn = connect_db(join(current_app.instance_path, 'trees', tree)) except sqlite3.Error: error = 'Failed to establish database connection.' else: # Parse the search query qtext = querystring.get('q', '') is_case_sensitive = querystring.get('case') == 'true' q = Query(conn, qtext, should_explain='explain' in querystring, is_case_sensitive=is_case_sensitive) # Try for a direct result: if querystring.get('redirect') == 'true': result = q.direct_result() if result: path, line = result # TODO: Does this escape qtext properly? return redirect( '%s/%s/source/%s?from=%s%s#%i' % (www_root, tree, path, qtext, '&case=true' if is_case_sensitive else '', line)) # Return multiple results: template = 'search.html' start = time() try: results = list(q.results(offset, limit)) except sqlite3.OperationalError as e: if e.message.startswith('REGEXP:'): # Malformed regex warning = e.message[7:] results = [] elif e.message.startswith('QUERY:'): warning = e.message[6:] results = [] else: error = 'Database error: %s' % e.message if not error: # Search template variables: arguments['time'] = time() - start arguments['query'] = qtext arguments['search_url'] = search_url(www_root, arguments['tree'], qtext, redirect=False) arguments['results'] = results arguments['offset'] = offset arguments['limit'] = limit arguments['is_case_sensitive'] = is_case_sensitive arguments['tree_tuples'] = [ (t, search_url(www_root, t, qtext, case=True if is_case_sensitive else None), description) for t, description in trees.iteritems()] else: arguments['tree'] = trees.keys()[0] error = "Tree '%s' is not a valid tree." % tree status_code = 404 if warning or error: arguments['error'] = error or warning if querystring.get('format') == 'json': if error: # Return a non-OK code so the live search doesn't try to replace # the results with our empty ones: return jsonify(arguments), status_code or 500 # Tuples are encoded as lists in JSON, and these are not real # easy to unpack or read in Javascript. So for ease of use, we # convert to dictionaries before returning the json results. # If further discrepancies are introduced, please document them in # templating.mkd. arguments['results'] = [ {'icon': icon, 'path': path, 'lines': [{'line_number': nb, 'line': l} for nb, l in lines]} for icon, path, lines in arguments['results']] return jsonify(arguments) if error: return render_template('error.html', **arguments), status_code or 500 else: arguments['filters'] = filter_menu_items(config['FILTER_LANGUAGE']) return render_template('search.html', **arguments)
def get_original(tree, path=''): comm = connect_db(join(current_app.instance_path, 'trees', tree)) original_file = comm.execute(""" SELECT trg_index_content.text from trg_index_content INNER JOIN files ON files.id=trg_index_content.id WHERE files.path=? """, (path,)); return original_file.fetchall()[0][0];
def search(tree): """Search by regex, caller, superclass, or whatever.""" # TODO: This function still does too much. querystring = request.values offset = non_negative_int(querystring.get("offset"), 0) limit = min(non_negative_int(querystring.get("limit"), 100), 1000) config = current_app.config www_root = config["WWW_ROOT"] trees = config["TREES"] google_analytics_key = config["GOOGLE_ANALYTICS_KEY"] # Arguments for the template: arguments = { # Common template variables "wwwroot": www_root, "google_analytics_key": google_analytics_key, "generated_date": config["GENERATED_DATE"], } error = warning = "" status_code = None if tree in trees: arguments["tree"] = tree # Connect to database try: conn = connect_db(join(current_app.instance_path, "trees", tree)) except sqlite3.Error: error = "Failed to establish database connection." else: # Parse the search query qtext = querystring.get("q", "") is_case_sensitive = querystring.get("case") == "true" q = Query(conn, qtext, should_explain="explain" in querystring, is_case_sensitive=is_case_sensitive) # Try for a direct result: if querystring.get("redirect") == "true": result = q.direct_result() if result: path, line = result # TODO: Does this escape qtext properly? return redirect( "%s/%s/source/%s?from=%s%s#%i" % (www_root, tree, path, qtext, "&case=true" if is_case_sensitive else "", line) ) # Return multiple results: template = "search.html" start = time() try: results = list(q.results(offset, limit)) except sqlite3.OperationalError as e: if e.message.startswith("REGEXP:"): # Malformed regex warning = e.message[7:] results = [] elif e.message.startswith("QUERY:"): warning = e.message[6:] results = [] else: error = "Database error: %s" % e.message if not error: # Search template variables: arguments["time"] = time() - start arguments["query"] = qtext arguments["search_url"] = search_url(www_root, arguments["tree"], qtext, redirect=False) arguments["results"] = results arguments["offset"] = offset arguments["limit"] = limit arguments["is_case_sensitive"] = is_case_sensitive arguments["tree_tuples"] = [ (t, search_url(www_root, t, qtext, case=True if is_case_sensitive else None), description) for t, description in trees.iteritems() ] else: arguments["tree"] = trees.keys()[0] error = "Tree '%s' is not a valid tree." % tree status_code = 404 if warning or error: arguments["error"] = error or warning if querystring.get("format") == "json": if error: # Return a non-OK code so the live search doesn't try to replace # the results with our empty ones: return jsonify(arguments), status_code or 500 # Tuples are encoded as lists in JSON, and these are not real # easy to unpack or read in Javascript. So for ease of use, we # convert to dictionaries before returning the json results. # If further discrepancies are introduced, please document them in # templating.mkd. arguments["results"] = [ {"icon": icon, "path": path, "lines": [{"line_number": nb, "line": l} for nb, l in lines]} for icon, path, lines in arguments["results"] ] return jsonify(arguments) if error: return render_template("error.html", **arguments), status_code or 500 else: arguments["filters"] = filter_menu_items(config["FILTER_LANGUAGE"]) return render_template("search.html", **arguments)
def direct_result_eq(self, query_text, line_num): conn = connect_db(os.path.join(self._config_dir_path, "target", "trees", "code")) if line_num is None: eq_(Query(conn, query_text).direct_result(), None) else: eq_(Query(conn, query_text).direct_result(), ("main.cpp", line_num))