def init_db(_): '''Initialize the SQLite database (NOTE: does not overwrite)''' top_folder = os.path.dirname(crawler_app.__file__) rel_folder = os.path.join('db', 'crawler.sqlite') db_file = os.path.join(top_folder, rel_folder) DbSessionFactory.global_init(db_file)
def create_account(username, plain_text_password): session = DbSessionFactory.create_session() account = Account() account.username = username account.password_hash = AccountService.hash_text(plain_text_password) session.add(account) session.commit() return account
def find_account_by_id(cls, user_id): if not user_id: return None session = DbSessionFactory.create_session() account = session.query(Account) \ .filter(Account.id == user_id) \ .first() return account
def get_params_by_history_id(lookup_id): ''' return parameters from an old search to seed new crawl''' session = DbSessionFactory.create_session() history = session.query(History) \ .filter(History.auto_id == lookup_id) \ .first() return dict(url=history.url, search_type=history.search_type, search_limit=history.search_limit, keyword=history.keyword)
def get_archived_graph_data(lookup_id): '''get json repr of old graph''' query = "SELECT domain, node_depth, node_id, parent_node, url, found FROM Graph_Data WHERE lookup_id=?" conn = lite.connect(DbSessionFactory.get_db_file_path()) df = pd.read_sql_query(query, conn, params=(lookup_id, )) conn.close() if len(df) > 1: graph = build_json_graph(df) return json.dumps(graph) # if graph only had one node, build with seed url else: graph = json.dumps( dict(url=df['url'].values[0], domain=df['domain'].values[0])) return graph
def find_account_by_username(cls, username): if not username or not username.strip(): return None username = username.lower().strip() session = DbSessionFactory.create_session() account = session.query(Account) \ .filter(Account.username == username) \ .first() return account
def add_history(user_id, url, search_type, search_limit, keyword): ''' add valid parameters that a user crawled for''' session = DbSessionFactory.create_session() history = History() history.user_id = user_id history.url = url history.search_type = search_type history.search_limit = search_limit history.keyword = keyword session.add(history) session.commit() return history.auto_id
def get_history(user_id): ''' query the history to build search history view for returning users''' query = "SELECT auto_id, url, search_type, search_limit, keyword, created FROM History WHERE user_id=?" conn = lite.connect(DbSessionFactory.get_db_file_path()) df = pd.read_sql_query(query, conn, params=(user_id, )) conn.close() history_dict_list = list() for i, r in df.iterrows(): history_dict_list.append( dict(auto_id=r['auto_id'], url=r['url'], search_type=r['search_type'], search_limit=r['search_limit'], keyword=r['keyword'], created=r['created'])) return history_dict_list
def add_data(dataframe): '''add graph data to database for future query''' conn = lite.connect(DbSessionFactory.get_db_file_path()) dataframe.to_sql('Graph_Data', conn, if_exists='append', index=False) conn.close()