def get_all_docs(): try: data = [] cursor = db.cnx.cursor() query = "SELECT * FROM doc_tbl" cursor.execute(query) for row in cursor: data.append({ 'docid': row[0], 'docname': row[1], 'author': row[2], 'path': row[3], 'year': row[4], 'intro': row[5], 'hidden': row[6], 'content': open(row[3], 'r').read() }) cursor.close() return utils.create_res_obj(data) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def restore_doc(docname): try: cursor = db.cnx.cursor() query = ("SELECT docid FROM doc_tbl WHERE docname=%s") data = (docname, ) cursor.execute(query, data) docid = cursor.fetchone()[0] query = ("SELECT * FROM hidden_files WHERE docid = %s") data = (docid, ) cursor.execute(query, data) row_count = cursor.rowcount if row_count == -1: x = cursor.fetchall() cursor = db.cnx.cursor() query = ("DELETE FROM hidden_files WHERE docid = %s") data = (docid, ) cursor.execute(query, data) query = ("UPDATE doc_tbl SET hidden = 0 WHERE docid = %s ") data = (docid, ) cursor.execute(query, data) db.cnx.commit() data = [{'file_restored': 'True'}] else: data = [{'file_restored': 'False'}] cursor.close() return utils.create_res_obj(data) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def init(): try: api_handler.db.init_db() return utils.create_res_obj({'status': 'init success'}) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def delete_doc(docname): try: postid_list = [] query = ("SELECT path FROM doc_tbl WHERE docname=%s") data = (docname, ) doc_path = db.run_query(query, data, one=True)[0] query = ("SELECT docid FROM doc_tbl WHERE path=%s") data = (doc_path, ) docid = db.run_query(query, data, one=True)[0] query = ("SELECT postid FROM postfiletable WHERE docid=%s") data = (docid, ) for row in db.run_query(query, data): postid_list.append(row[0]) for postid in postid_list: query = ("SELECT hit FROM indextable WHERE postid=%s") data = (postid, ) hit = db.run_query(query, data, one=True)[0] if hit == 1: query = ("DELETE FROM indextable WHERE postid=%s") data = (postid, ) db.run_query(query, data, commit=True) else: new_hit = hit - 1 query = ( "UPDATE `indextable` SET `hit` = {} WHERE `indextable`.`postid` = %s" ).format(str(new_hit)) data = (postid, ) db.run_query(query, data, commit=True) query = ("DELETE FROM postfiletable WHERE docid=%s") data = (docid, ) db.run_query(query, data, commit=True) query = ("DELETE FROM doc_tbl WHERE docid=%s") data = (docid, ) db.run_query(query, data, commit=True) query = ("DELETE FROM hidden_files WHERE docid=%s") data = (docid, ) db.run_query(query, data, commit=True) if os.path.exists(doc_path): os.remove(doc_path) return utils.create_res_obj(data) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def getfile(filename): try: x = api_handler.getfile(filename) x = jsonify(x) return x except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def getfile(docname): query = ("SELECT * FROM doc_tbl WHERE docname=%s") data = (docname, ) try: row = db.run_query(query, data, one=True) db.disconnect() return utils.create_res_obj({ 'docid': row[0], 'docname': row[1], 'author': row[2], 'path': row[3], 'year': row[4], 'intro': row[5], 'hidden': row[6], 'content': open(row[3], 'r').read() }) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)
def drop_all_tables(self): cursor = self.cnx.cursor() tables_names = self.get_all_tables() for table_name in tables_names: str = ''.join(table_name) try: cursor.execute("drop table " + str) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args), 'text': "DROP TABLE failed WITH TABLE {} ".format(str) }, success=False)
def res_query(query): class MyTransformer(ast.NodeTransformer): def visit_Str(self, node): return ast.Set(words_dict[node.s]) try: data = [] hidden_files = list_hidden_files() operator = ['OR', 'AND', 'NOT'] data = [] query = query.replace("\'", "'") # check for more than one operator in a row splited_query = query.split() duplicate_op_counter = 0 for first in operator: for second in operator: if not is_in_order(first, second, splited_query): duplicate_op_counter += 1 if duplicate_op_counter < 9: # bad query detect for op in operator: query = re.sub(r'\b' + op + r'\b', ' ', query) tmp_quote = '' quotes_string = re.findall(r'"([^"]*)"', query) if quotes_string: for text in quotes_string: if len(text.split()) > 1: words_in_quotes = text.split() for item in words_in_quotes: tmp_quote += ' \"' + item + '\" ' query = query.replace('\"{}\"'.format(text), tmp_quote, 1) tmp_quote = '' # check for terms only without operators tmp_query = re.sub(' +', ' ', query) if tmp_query.endswith(' AND') or tmp_query.endswith( ' OR') or tmp_query.endswith(' NOT'): tmp_query = query.replace('AND', '').replace('OR', '').replace('NOT', '') if not query.replace(')', '').replace('(', '').replace( 'AND', '').replace('OR', '').replace('NOT', '').replace( '"', '').strip(): query = 'error' if not ((findWholeWord(operator[0])(query)) or (findWholeWord(operator[1])(query)) or (findWholeWord(operator[2])(query))): query = query.strip() query = ' OR '.join(query.split()) tmp_query = re.split(r'(OR|AND|NOT)', query) # split to text OP text OP text new_query = '' for text in tmp_query: if text in operator: new_query += text + ' ' continue if len(text.split()) > 1: # if len(text.replace('(', '').replace(')', '').split()) > 1: new_query += '(' for word in text.split(): new_query += word + ' OR ' new_query = new_query[:-3] new_query += ') ' else: new_query += text # remove stop list terms query = new_query quotes_words_indexs = [] for word in new_query.split(): for term in conf.STOP_LIST: tmp_word = word.replace(')', '').replace('(', '').replace('"', '') if term == tmp_word: if word[0] == '\"' and word[-1] == '\"': quotes_words_indexs = [ (m.start(0), m.end(0)) for m in re.finditer(r'\b{}\b'.format(term), query) ] if quotes_words_indexs: for tup in quotes_words_indexs: if query[tup[0] - 1] == '\"' and query[tup[1]] == '\"': query = query[:tup[0] - 1] + '$' + query[tup[0]:] query = query[:tup[1]] + '$' + query[ tup[1] + 1:] break else: continue else: quotes_words_indexs = [ (m.start(0), m.end(0)) for m in re.finditer( r'\b{}\b'.format(tmp_word), query) ] # query = new_query.replace(tmp_word, 'STOPPED') for tup in quotes_words_indexs: if query[tup[0] - 1] == '$': continue else: start = query[:tup[0]] mid = ' stoppedword ' end = query[tup[1] + 1:] query_helper = start + mid + end query = query_helper break quotes_words_indexs = [] query = re.sub(' +', ' ', query) query = query.replace('$', ' ') # careful query = query.replace('\"', '') tmp_query = query.replace(')', '').replace('(', '').replace( 'AND', '').replace('OR', '').replace('NOT', '') tmp_query = tmp_query.lower() words_list = tmp_query.split() words_list_in_quotes = [ '\'' + re.sub("'", "\\'", w) + '\'' for w in words_list ] words_dict = {} for i in range(len(words_list)): words_dict[words_list[i]] = words_list_in_quotes[i] processed_query = '' for item in query.split(): if item.lower() in words_dict: processed_query += words_dict[item.lower()] elif item.replace(')', '').lower() in words_dict: b = item.count(')') processed_query += words_dict[item.replace(')', '').lower()] processed_query += b * ')' elif item.replace('(', '').lower() in words_dict: b = item.count('(') processed_query += b * '(' processed_query += words_dict[item.replace('(', '').lower()] else: processed_query += item processed_query += ' ' for k, v in words_dict.items(): if k == 'stoppedword': ast_list = create_ast_list([]) else: doc_list = get_doc_list_by_term(k, hidden_files, words_list) ast_list = create_ast_list(doc_list) words_dict[k] = ast_list words_dict = dict( {k.replace('*', ''): v for k, v in words_dict.items()}) words_list = list([word.replace('*', '') for word in words_list]) processed_query = processed_query.replace('*', '') processed_query = processed_query.replace('AND', '&') processed_query = processed_query.replace('OR', '|') processed_query = processed_query.replace('NOT', '-') input_code = ast.parse(processed_query, mode='eval') MyTransformer().visit(input_code) fixed = ast.fix_missing_locations(input_code) code = compile(fixed, '<string>', 'eval') result = eval(code) result = list(result) ranked_doc = _rank(result, words_list) sorted_by_rank = sorted(ranked_doc, key=lambda tup: tup[1]) sorted_docit = [tup[0] for tup in sorted_by_rank] sorted_docit = sorted_docit[::-1] result = sorted_docit for doc_id in result: data.append(get_data_by_docid(doc_id, words_list)) return utils.create_res_obj(data) except Exception as e: return utils.create_res_obj( { 'traceback': traceback.format_exc(), 'msg': "{}".format(e.args) }, success=False)