Пример #1
0
def init_db(_):
    '''Initialize the SQLite database (NOTE: does not overwrite)'''
    top_folder = os.path.dirname(crawler_app.__file__)
    rel_folder = os.path.join('db', 'crawler.sqlite')

    db_file = os.path.join(top_folder, rel_folder)
    DbSessionFactory.global_init(db_file)
Пример #2
0
    def create_account(username, plain_text_password):
        session = DbSessionFactory.create_session()

        account = Account()
        account.username = username
        account.password_hash = AccountService.hash_text(plain_text_password)

        session.add(account)
        session.commit()

        return account
Пример #3
0
    def find_account_by_id(cls, user_id):
        if not user_id:
            return None

        session = DbSessionFactory.create_session()

        account = session.query(Account) \
            .filter(Account.id == user_id) \
            .first()

        return account
Пример #4
0
    def get_params_by_history_id(lookup_id):
        ''' return parameters from an old search to seed new crawl'''

        session = DbSessionFactory.create_session()

        history = session.query(History) \
            .filter(History.auto_id == lookup_id) \
            .first()

        return dict(url=history.url,
                    search_type=history.search_type,
                    search_limit=history.search_limit,
                    keyword=history.keyword)
Пример #5
0
 def get_archived_graph_data(lookup_id):
     '''get json repr of old graph'''
     query = "SELECT domain, node_depth, node_id, parent_node, url, found FROM Graph_Data WHERE lookup_id=?"
     conn = lite.connect(DbSessionFactory.get_db_file_path())
     df = pd.read_sql_query(query, conn, params=(lookup_id, ))
     conn.close()
     if len(df) > 1:
         graph = build_json_graph(df)
         return json.dumps(graph)
     # if graph only had one node, build with seed url
     else:
         graph = json.dumps(
             dict(url=df['url'].values[0], domain=df['domain'].values[0]))
         return graph
Пример #6
0
    def find_account_by_username(cls, username):

        if not username or not username.strip():
            return None

        username = username.lower().strip()

        session = DbSessionFactory.create_session()

        account = session.query(Account) \
            .filter(Account.username == username) \
            .first()

        return account
Пример #7
0
    def add_history(user_id, url, search_type, search_limit, keyword):
        ''' add valid parameters that a user crawled for'''
        session = DbSessionFactory.create_session()

        history = History()
        history.user_id = user_id
        history.url = url
        history.search_type = search_type
        history.search_limit = search_limit
        history.keyword = keyword

        session.add(history)
        session.commit()

        return history.auto_id
Пример #8
0
 def get_history(user_id):
     ''' query the history to build search history view for returning users'''
     query = "SELECT auto_id, url, search_type, search_limit, keyword, created FROM History WHERE user_id=?"
     conn = lite.connect(DbSessionFactory.get_db_file_path())
     df = pd.read_sql_query(query, conn, params=(user_id, ))
     conn.close()
     history_dict_list = list()
     for i, r in df.iterrows():
         history_dict_list.append(
             dict(auto_id=r['auto_id'],
                  url=r['url'],
                  search_type=r['search_type'],
                  search_limit=r['search_limit'],
                  keyword=r['keyword'],
                  created=r['created']))
     return history_dict_list
Пример #9
0
 def add_data(dataframe):
     '''add graph data to database for future query'''
     conn = lite.connect(DbSessionFactory.get_db_file_path())
     dataframe.to_sql('Graph_Data', conn, if_exists='append', index=False)
     conn.close()