def _hit_context(self, hit): qparser = self._searcher.parser query = self._searcher.query contents = read_file(self.path) lines = [] # For each query word, for queryWord in set(query): # Reverse map query words to document words documentWords = list(qparser.unstemlist(queryWord)) # If the query word is not in the document, skip it if not documentWords: continue # Prepare regular expression using matching document words searchExpression = r'|'.join(documentWords) pattern = re.compile(searchExpression, re.IGNORECASE) for match in pattern.finditer(contents): token = self.Token() token.startchar = match.start() token.endchar = match.end() # get the context line context = fragment_text(token, contents) self.append_line(lines, context) if len(lines) >= self.max_sub_results: break return u''.join(lines)
def process_hit(self, hit): contents = read_file(self.path) self.context = hit.highlights('content', text=contents) # the file path could have matched if not self.context: self.context = self.path pass
def _hit_context(self, hit): qparser = self._searcher.parser query = self._searcher.query contents = read_file(self.path) lines = [] # For each query word, for queryWord in set(query): # Reverse map query words to document words documentWords = list(qparser.unstemlist(queryWord)) # If the query word is not in the document, skip it if not documentWords: continue # Prepare regular expression using matching document words searchExpression = r'|'.join(_ensure_str(word) for word in documentWords) pattern = re.compile(searchExpression, re.IGNORECASE) for match in pattern.finditer(contents): token = self.Token() token.startchar = match.start() token.endchar = match.end() # get the context line context = fragment_text(token, contents) self.append_line(lines, context) if len(lines) >= self.max_sub_results: break return u''.join(lines)
def document(): """Handles document display requests """ http_status = 200 root_dir = FULL_INDEX_PATH full_path = request.args.get('path') is_raw = (request.args.get('raw') == 'true') # allow `lines` or `hl` to highlight the target lines hl_str = request.args.get('lines') or request.args.get('hl', '') # if the full path wasn't appended, then append it (assumes path exist in default index path) if root_dir not in full_path: full_path = os.path.join(root_dir, full_path) search_text = request.args.get('q') pagenum = request.args.get('p') # perform the text search, get wrapped results results = results_from_search_text(full_path, isPath=True) if not results: app.logger.error('Unable to find document: %s' % full_path) abort(404) doc = results.items[0] # grab contents, if file gone, then send 404 error message try: doc_contents = read_file(full_path) except IOError: app.logger.error('Document no longer exists: %s' % full_path) doc_contents = "Document does not exist" http_status = 404 if is_raw: # dump the document text return Response(doc_contents, mimetype='text/plain') db_record = db.get_raw_file_record(full_path) if http_status == 200: # get syntax highlighted html trn = transformer.Transformer() doc_html = trn.to_html(doc_contents, doc.result.filename, highlight_lines=hl_str) else: doc_html = doc_contents # build response response = { "title": doc.result.filename, 'html_css_class': 'document', 'doc': doc, 'contents': doc_html, 'search_text': search_text, 'page_number': pagenum, 'last_modified': db_record.get('mod_date'), 'http_status': http_status } add_default_response(response) return render_template('document.html', **response), http_status
def document(): """Handles document display requests """ http_status = 200 root_dir = FULL_INDEX_PATH full_path = request.args.get('path') is_raw = (request.args.get('raw') == 'true') # allow `lines` or `hl` to highlight the target lines hl_str = request.args.get('lines') or request.args.get('hl', '') # if the full path wasn't appended, then append it (assumes path exist in default index path) if root_dir not in full_path: full_path = os.path.join(root_dir, full_path) search_text = request.args.get('q') pagenum = request.args.get('p') # perform the text search, get wrapped results results = results_from_search_text(full_path, isPath=True) if not results: app.logger.error('Unable to find document: %s' % full_path) abort(404) doc = results.items[0] # grab contents, if file gone, then send 404 error message try: doc_contents = read_file(full_path) except IOError: app.logger.error('Document no longer exists: %s' % full_path) doc_contents = "Document does not exist" http_status = 404 if is_raw: # dump the document text return Response(doc_contents, mimetype='text/plain') db_record = db.get_raw_file_record(full_path) if http_status == 200: # get syntax highlighted html trn = transformer.Transformer() doc_html = trn.to_html(doc_contents, doc.result.filename, highlight_lines=hl_str) else: doc_html = doc_contents # build response response = { "title" : doc.result.filename, 'html_css_class' : 'document', 'doc' : doc, 'contents' : doc_html, 'search_text' : search_text, 'page_number' : pagenum, 'last_modified' : db_record.get('mod_date'), 'http_status' : http_status } add_default_response(response) return render_template('document.html', **response), http_status
def is_valid_template_file(file_path): """Check if it's a valid template file""" data = None filename = ntpath.basename(file_path) ignore_dirs = any(ignr in file_path for ignr in settings.IGNORE_DIRS) ignore_file = bool(filename.lower() in settings.IGNORE_FILES) ext = os.path.splitext(filename)[1] if (ext.lower() in settings.OTHER_SCAN_FILE_EXTENSIONS and not ignore_dirs and not ignore_file): data = utils.read_file(file_path) return data
def is_valid_node(file_path): """Make sure file is a valid Node.js file.""" # Files that doesn't needs to be scanned filename = ntpath.basename(file_path) ext = os.path.splitext(filename)[1] is_js_file = bool(ext.lower() in settings.JS_SCAN_FILE_EXTENSIONS) ignore_dirs = any(ignr in file_path for ignr in settings.IGNORE_DIRS) ignore_file = bool(filename.lower() in settings.IGNORE_FILES) is_node_www = bool(file_path.lower().endswith("bin/www")) valid = (is_js_file or is_node_www) and not ignore_file and not ignore_dirs if valid: data = utils.read_file(file_path) if re.search(NODE_RGX, data): # Possible Node.js Source Code return data return None
def view_file(): """View File""" context = {"contents": "not_found"} path = request.form["path"] scan_hash = request.form["scan_hash"] if utils.sha2_match_regex(scan_hash): res = Results.query.filter(Results.scan_hash == scan_hash).first() if res: safe_dir = settings.UPLOAD_FOLDER req_path = os.path.join(safe_dir, path) if os.path.commonprefix( (os.path.realpath(req_path), safe_dir)) != safe_dir: context = {"contents": "Path Traversal Detected!"} else: if os.path.isfile(req_path): contents = utils.read_file(req_path) context = {"contents": contents} return jsonify(**context)
import time from core.mapping import mapping_xml2sql from core.utils import read_file __author__ = 'sunshine' if __name__ == '__main__': prev_time = time.time() content = read_file('E:\work\projects\pybatis\database.xml') res = mapping_xml2sql(content, 'select', 'select_user_by_id', {'age': 20}) print(res) after_time = time.time() print(after_time - prev_time) pass