Exemplo n.º 1
0
def test_insert_many(connection_string_database):
    time1 = datetime.datetime.now()
    time2 = datetime.datetime.now() - datetime.timedelta(1)
    time3 = datetime.datetime.now() - datetime.timedelta(2)

    records_to_insert = [("Göteborg", time1, "boll",
                          "annons/goteborg/aktiv_panther_lang__84/93336788",
                          "kolla in värsta dealen!", 93336788, 123, "123kr"),
                         ("Malmö", time2, "racket",
                          "annons/malmo/aktiv_panther_lang__84/93336789",
                          "kom och köp!", 93336789, 456, "456kr"),
                         ("Stockholm", time3, "nät",
                          "annons/stockholm/aktiv_panther_lang__84/93336787",
                          "billigt, billigt billigt!", 93336787, 789, "789kr")]

    list_of_columns = [
        "location", "time", "top_info", "href", "subject_text", "item_id",
        "price", "price_text"
    ]

    connection = mysql_scripts.create_connection(connection_string_database,
                                                 "local_database")
    cursor = mysql_scripts.create_cursor(connection)
    mysql_scripts.insert_many_data(cursor, connection, list_of_columns,
                                   records_to_insert, table_name)
    # mysql_scripts.drop_table(cursor, table_name)
    # mysql_scripts.create_table(cursor, table_name, columns)
    mysql_scripts.close_cursor(cursor)
    mysql_scripts.close_connection(connection)
Exemplo n.º 2
0
def test_delete_data_from_table(connection_string_database):
    table_name = "scrape_log"
    connection = mysql_scripts.create_connection(connection_string_database,
                                                 "local_database")
    cursor = mysql_scripts.create_cursor(connection)
    mysql_scripts.delete_data(cursor, connection, table_name)
    mysql_scripts.close_cursor(cursor)
    mysql_scripts.close_connection(connection)
Exemplo n.º 3
0
def alter_tables_add_id(connection):
    cursor = mysql_scripts.create_cursor(connection)
    cursor.execute(
        "ALTER TABLE scrape_log ADD id INT UNSIGNED NOT NULL AUTO_INCREMENT, ADD INDEX (id)"
    )
    cursor.execute(
        "ALTER TABLE articles ADD id INT UNSIGNED NOT NULL AUTO_INCREMENT, ADD INDEX (id)"
    )
    connection.commit()
    mysql_scripts.close_cursor(cursor)
Exemplo n.º 4
0
def scrape_history_start_up_date(connection):
    # Desc: adding last scraped date as three days ago to avoid heavy load on inital scrape with new database
    # @Parms no parms
    # @output no output
    # Insert Scrape details to scrape log

    time_of_first_article = datetime.datetime.now() - datetime.timedelta(3)
    time_of_scrape = datetime.datetime.now()
    no_of_articles = 0

    cursor = mysql_scripts.create_cursor(connection)
    temporary_scrape_history_insert(connection, cursor, time_of_scrape,
                                    time_of_first_article, no_of_articles)
    mysql_scripts.close_cursor(cursor)
Exemplo n.º 5
0
def test_select_data(connection_string_database):
    connection = mysql_scripts.create_connection(connection_string_database,
                                                 "local_database")
    cursor = mysql_scripts.create_cursor(connection)
    table_name = "scrape_log"
    condition_dict = None
    selection_columns_list = [
        'time_of_first_article', 'time_of_scrape', 'no_of_articles'
    ]
    data = mysql_scripts.select_data(cursor, connection,
                                     selection_columns_list, table_name,
                                     condition_dict)
    print(data[-1][0])
    mysql_scripts.close_cursor(cursor)
    mysql_scripts.close_connection(connection)
Exemplo n.º 6
0
def test_insert_single(connection_string_database):
    connection = mysql_scripts.create_connection(connection_string_database,
                                                 "local_database")
    cursor = mysql_scripts.create_cursor(connection)

    table_name = "scrape_log"
    list_of_columns = ["time_of_scrape", "time_of_scrape", "no_of_articles"]

    time_of_first_article = datetime.datetime(2020, 12, 9, 9, 15, 00, 00)
    time_of_scrape = time_of_first_article = datetime.datetime(
        2020, 12, 9, 9, 15, 00, 00)
    no_of_articles = 98276
    list_of_values = [time_of_first_article, time_of_scrape, no_of_articles]

    cursor.execute(
        "INSERT INTO scrape_log (time_of_first_article, time_of_scrape, no_of_articles) VALUES (%s, %s, %s)",
        (time_of_first_article, time_of_scrape, '90000'))
    connection.commit()
    # mysql_scripts.insert_data(connection, cursor, table_name, list_of_columns, list_of_values)
    mysql_scripts.close_cursor(cursor)
    mysql_scripts.close_connection(connection)
Exemplo n.º 7
0
def check_duplicates(connection_string_database):
    connection = mysql_scripts.create_connection(connection_string_database,
                                                 "local_database")
    cursor = mysql_scripts.create_cursor(connection)
    table_name = "articles"
    condition_dict = None
    selection_columns_list = ['item_id', 'subject_text']
    data = mysql_scripts.select_data(cursor, connection,
                                     selection_columns_list, table_name,
                                     condition_dict)
    mysql_scripts.close_cursor(cursor)
    mysql_scripts.close_connection(connection)
    duplicate_list = []
    for item in data:
        counter = 0
        for item_check in data:
            if item[0] == item_check[0]:
                counter += 1
        if counter > 1:
            duplicate_list.append(item)
    print(duplicate_list)
Exemplo n.º 8
0
def create_tables(connection):
    tables_and_columns = [
        {
            'articles': [{
                "column_name": "location",
                "data_type": "VARCHAR",
                "column_lenght": 255,
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "time",
                "data_type": "DATETIME",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "top_info",
                "data_type": "VARCHAR",
                "column_lenght": 255,
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "href",
                "data_type": "VARCHAR",
                "column_lenght": 255,
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "subject_text",
                "data_type": "VARCHAR",
                "column_lenght": 255,
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "item_id",
                "data_type": "INT",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "price",
                "data_type": "INT",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "price_text",
                "data_type": "VARCHAR",
                "column_lenght": 255,
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }]
        },
        {
            'scrape_log': [{
                "column_name": "time_of_scrape",
                "data_type": "DATETIME",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "time_of_first_article",
                "data_type": "DATETIME",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }, {
                "column_name": "no_of_articles",
                "data_type": "INT",
                "primary_key": False,
                "auto_increment": False,
                "not_null": False,
                "unique": False
            }]
        },
    ]
    cursor = mysql_scripts.create_cursor(connection)
    for table in tables_and_columns:
        for key, value in table.items():
            mysql_scripts.create_table(cursor, key, value)
    mysql_scripts.close_cursor(cursor)