def makeDistancedTable():
    conn, cursor = establishConnection()
    cursor.execute(
        f'''CREATE TABLE IF NOT EXISTS Distance_Table (id, type, url, created_at, company, location, title,
                    latitude REAL, longitude REAL);''')
    conn.commit()
    jobs.close_db(conn)
def makeFiftyMileRadiusTable():
    conn, cursor = establishConnection()
    cursor.execute(
        f''' CREATE TABLE IF NOT EXISTS Fifty_Mile_Radius(id, type, url, created_at, company, location, title,
                    latitude REAL, longitude REAL);''')
    conn.commit()
    jobs.close_db(conn)
def make_combined_map():
    conn, cursor = establishConnection()
    cursor.execute(
        f'''CREATE TABLE IF NOT EXISTS combined_table (id, type, url, created_at, company, location,
                        title, latitude, longitude);''')
    conn.commit()
    jobs.close_db(conn)
def insertIntoFiftyMileRadiusTable():
    makeFiftyMileRadiusTable()
    conn, cursor = establishConnection()
    cursor.execute(f'''DELETE FROM Fifty_Mile_Radius;''')
    cursor.execute(
        f'''INSERT INTO Fifty_Mile_Radius SELECT * FROM github_jobs;''')
    conn.commit()
    jobs.close_db(conn)
Exemplo n.º 5
0
def test_open_db():
    # Checking to see if the database exists, so we can delete it to check
    # if the open_db function actually creates the database.
    if os.path.exists(databaseFileName):
        os.remove(databaseFileName)

    connection, cursor = jobs.open_db(databaseFileName)
    assert os.path.exists('jobs.db')
    jobs.close_db(connection)
Exemplo n.º 6
0
def test_excel_import_jobs():
    jobs_data = jobs.excel_jobs_import("state_job_data.xlsx")
    assert len(jobs_data) > 1000
    conn, cursor = jobs.open_db("excel_import_test.sqlite")
    jobs.setup_db(cursor)
    jobs.insert_data(jobs_data, "jobs", cursor)
    jobs.close_db(conn)
    conn, cursor = jobs.open_db("excel_import_test.sqlite")
    state_query = jobs.query_run("SELECT count(DISTINCT state_name) from" + " jobs;", cursor)
    for element in state_query:
        assert element[0] > 50
 def text_visualization(self):
     self.list_control.clear()
     conn, cursor = jobs.open_db("jobs_db.sqlite")
     if self.order_selector_text.currentText() == "ASC":
         data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + '''
         total(jobs.employment) as employment,
         total(school.size_2018/4),
         round(avg(school.repayment_cohort),3) as repayment_cohort,
         round(avg(jobs.salary_25th_percentile)) as averge_entry_salary
         FROM school
         JOIN states using(state_abrev)
         JOIN jobs using(state_name)
         GROUP BY state_name
         ORDER BY employment ASC;''', cursor)
     elif self.order_selector_text.currentText() == "DESC":
         data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + '''
                     total(jobs.employment) as employment,
                     total(school.size_2018/4),
                     round(avg(school.repayment_cohort),3) as repayment_cohort,
                     round(avg(jobs.salary_25th_percentile)) as averge_entry_salary
                     FROM school
                     JOIN states using(state_abrev)
                     JOIN jobs using(state_name)
                     GROUP BY state_name
                     ORDER BY employment DESC;''', cursor)
     else:
         data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + '''
                                 total(jobs.employment) as employment,
                                 total(school.size_2018/4),
                                 round(avg(school.repayment_cohort),3) as repayment_cohort,
                                 round(avg(jobs.salary_25th_percentile)) as averge_entry_salary
                                 FROM school
                                 JOIN states using(state_abrev)
                                 JOIN jobs using(state_name)
                                 GROUP BY state_name
                                 ;''', cursor)
     QListWidgetItem("State", listview=self.list_control)
     for state in data_visualization_per_state:
         state_display_data = f"{state[0]}, {state[1]}"
         grad_employ_data = f"Employment/Graduates: {state[2]/state[3]}"
         repayment_data = f"Average Entry Salary/Average Declining Balance Percent: {state[5]/state[4]}"
         state_item = QListWidgetItem(state_display_data, listview=self.list_control)
         grad_item = QListWidgetItem(grad_employ_data, listview=self.list_control)
         repayment_item = QListWidgetItem(repayment_data, listview=self.list_control)
         grad_item.setForeground(Qt.darkGreen)
         repayment_item.setForeground(Qt.blue)
         state_item.setForeground(Qt.white)
         state_item.setBackground(Qt.black)
     self.list_control.show()
     jobs.close_db(conn)
Exemplo n.º 8
0
def test_create_table():
    # Checking to see if the database exists, so we can delete it to check
    # if the create_table function actually creates the database table with the appropriate columns.
    if os.path.exists(databaseFileName):
        os.remove(databaseFileName)

    connection, cursor = jobs.open_db(databaseFileName)
    jobs.create_table(connection, cursor)
    columnExists = False

    try:
        cursor.execute('ALTER TABLE jobs ADD COLUMN Description;')
    except sqlite3.OperationalError:
        columnExists = True

    assert columnExists
    jobs.close_db(connection)
def insertIntoDistance():
    starting_coords = (41.9667679, -70.9661533)
    insertIntoFiftyMileRadiusTable()
    conn, cursor = establishConnection()
    cursor.execute(f'''DELETE FROM Distance_Table;''')
    cursor.execute(
        f'''SELECT location, latitude, longitude FROM Fifty_Mile_Radius;''')
    rows = cursor.fetchall()
    for data in rows:
        comparative_coord = (data[1], data[2])
        distance = geoDist.distance(comparative_coord, starting_coords).miles
        if distance <= 50:
            print(data[0])
            cursor.execute(
                f'''INSERT OR IGNORE INTO  Distance_Table SELECT * FROM Fifty_Mile_Radius where location = "{data[0]}"'''
            )
    conn.commit()
    jobs.close_db(conn)
Exemplo n.º 10
0
def test_db_creation():
    school_test_data = [
        {'school.name': 'Test School', '2017.student.size': 5, '2018.student.size': 1, 'school.state': 'NJ',
         'id': 1, '2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line': 1,
         '2016.repayment.3_yr_repayment.overall': 1, '2016.repayment.repayment_cohort.3_year_declining_balance': 0.5},
        {'school.name': 'Test School 1', '2017.student.size': 4, '2018.student.size': 6, 'school.state': 'MA',
         'id': 2, '2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line': 1,
         '2016.repayment.3_yr_repayment.overall': 1, '2016.repayment.repayment_cohort.3_year_declining_balance': 0.5}]
    job_test_data = [{'state': 'Massachusetts', 'code': '00-0001', 'title': 'testttl', 'employment': 5, 'salary': 1000}]
    expected_tables = ['school', 'jobs']

    conn, cursor = jobs.open_db("test_db.sqlite")
    jobs.setup_db(cursor)
    jobs.insert_data(job_test_data, "jobs", cursor)
    jobs.insert_data(school_test_data, "school", cursor)
    jobs.close_db(conn)
    conn, cursor = jobs.open_db("test_db.sqlite")

    tables_query = jobs.query_run("SELECT name FROM " + "sqlite_master WHERE type='table' and name NOT LIKE 'sqlite_%';"
                                                        "", cursor)
    for (table, returned_tables) in zip(expected_tables, tables_query):
        assert table == returned_tables[0]
    school_query = jobs.query_run("SELECT * FROM" + " SCHOOL;", cursor)
    for (row, element) in zip(school_query, school_test_data):
        assert row[0] == element['id']
        assert row[1] == element['school.name']
        assert row[2] == element['school.state']
        assert row[3] == element['2017.student.size']
        assert row[4] == element['2018.student.size']
        assert row[5] == element['2017.earnings.3_yrs_after_completion.overall_count_over_poverty_line']
        assert row[6] == element['2016.repayment.3_yr_repayment.overall']
        assert row[7] == element['2016.repayment.repayment_cohort.3_year_declining_balance']
    jobs_query = jobs.query_run("SELECT * FROM" + " JOBS;", cursor)
    for (row, element) in zip(jobs_query, job_test_data):
        assert row[0] == 1
        assert row[1] == element['state']
        assert row[2] == element['code']
        assert row[3] == element['title']
        assert row[4] == element['employment']
        assert row[5] == element['salary']

    jobs.close_db(conn)
Exemplo n.º 11
0
    def run_map_visualization(self):
        conn, cursor = jobs.open_db("jobs_db.sqlite")
        data_visualization_per_state = jobs.query_run('''SELECT state_abrev, state_name, ''' + '''
                total(jobs.employment) as employment,
                total(school.size_2018/4),
                round(avg(school.repayment_cohort),3) as repayment_cohort,
                round(avg(jobs.salary_25th_percentile)) as averge_entry_salary
                FROM school
                JOIN states using(state_abrev)
                JOIN jobs using(state_name)
                GROUP BY state_name
                ;''', cursor)
        state_abrev = []
        state_grads = []
        state_repayment = []

        for state in data_visualization_per_state:
            state_abrev.append(state[0])
            state_grads.append(state[2]/state[3])
            state_repayment.append(state[5]/state[4])

        if self.data_selector_map.currentText() == "Graduates to Employment":
            us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_grads,
                                                  locationmode='USA-states', colorbar_title="Employment/Graduates"
                                                  ))
            us_map.update_layout(geo_scope='usa', title_text='Employment VS Graduates By State')
            us_map.show()
        elif self.data_selector_map.currentText() == "Average Declining Balance Percent":
            us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_repayment,
                                                  locationmode='USA-states', colorbar_title="Salary/Average Percent"
                                                  ))
            us_map.update_layout(geo_scope='usa', title_text='Average Salary VS Average '
                                                             'Percent of People with Declining Loans')
            us_map.show()
        else:
            us_map = px.Figure(data=px.Choropleth(locations=state_abrev, z=state_grads,
                                                  locationmode='USA-states', colorbar_title="Graduates"
                                                  ))
            us_map.update_layout(geo_scope='usa', title_text='Graduates By State')
            us_map.show()
        jobs.close_db(conn)
        self.list_control.hide()
Exemplo n.º 12
0
def test_add_to_database_with_good_data():
    # Adding good data to the database.
    additionalTestData = [
        {"id": "TESTID1",
         "type": "Full Time",
         "url": "test1.com",
         "created_at": "Sat Feb 01 12:53:40 UTC 2020",
         "company": "TESTCOMPANY",
         "company_url": "test1.com",
         "location": "Boston, MA",
         "title": "Senior Python/Django Developer ",
         "description": "Test Description",
         "how_to_apply": "Yes.",
         "company_logo": "test.png"}]

    # Checking if the new data is added to the database.
    connection, cursor = jobs.open_db(databaseFileName)
    jobs.save_to_database(additionalTestData, connection, cursor)
    cursor.execute("SELECT * FROM jobs WHERE jobs.id = 'TESTID1'")
    assert cursor.fetchone()
    jobs.close_db(connection)
Exemplo n.º 13
0
def test_update_from_list():
    job_test_data = [{'state': 'Massachusetts', 'code': 99, 'title': 'testttl', 'employment': 5, 'salary': 1000}]
    jobs_data_to_replace = [1, 'Rhode Island', 10, 'title', 10, 500]
    conn, cursor = jobs.open_db("update_test.sqlite")
    jobs.setup_db(cursor)
    jobs.insert_data(job_test_data, "jobs", cursor)
    jobs.close_db(conn)
    conn, cursor = jobs.open_db("update_test.sqlite")
    original_data = jobs.query_run("SELECT * " + "FROM jobs;", cursor)
    for (element, query_data) in zip(job_test_data, original_data):
        assert query_data[0] == 1
        assert query_data[1] == element['state']
        assert query_data[2] == element['code']
        assert query_data[3] == element['title']
        assert query_data[4] == element['employment']
        assert query_data[5] == element['salary']
    jobs.close_db(conn)
    jobs.update_data_from_list(jobs_data_to_replace, "Jobs", "update_test.sqlite")
    conn, cursor = jobs.open_db("update_test.sqlite")
    updated_data = jobs.query_run("SELECT *"+" FROM jobs;", cursor)
    for element in updated_data:
        assert jobs_data_to_replace[0] == element[0]
        assert jobs_data_to_replace[1] == element[1]
        assert jobs_data_to_replace[2] == element[2]
        assert jobs_data_to_replace[3] == element[3]
        assert jobs_data_to_replace[4] == element[4]
        assert jobs_data_to_replace[5] == element[5]

    jobs.close_db(conn)
Exemplo n.º 14
0
def test_save_to_database(get_data_github, get_data_stackoverflow):
    # Checking to see if the database exists, so we can delete it to check
    # if the save_to_database function actually saves the data to a fresh, new database.
    if os.path.exists(databaseFileName):
        os.remove(databaseFileName)

    # The amount of jobs is compressed to 30 per source, so the test doesn't take an eternity to run.
    connection, cursor = jobs.open_db(databaseFileName)
    jobs.create_table(connection, cursor)
    jobs.save_to_database(get_data_github[:30], connection, cursor)
    jobs.save_to_database(get_data_stackoverflow[:30], connection, cursor)

    # Checking if the database has some values that should be expected there. In this case, I know
    # that there is a job where the title is 'Lead Data Acquisition Design Engineer'.
    # It also picks a random one from the retrieved list and checks if it exists in the database.
    # UPDATE: It only checks for random titles from the StackOverFlow and Github lists.
    testTitle1 = random.choice(get_data_github[:30])['title']
    testTitle2 = random.choice(get_data_stackoverflow[:30])['title']
    cursor.execute("SELECT * FROM jobs WHERE jobs.Title = ?", (testTitle1,))
    assert cursor.fetchone()
    cursor.execute("SELECT * FROM jobs WHERE jobs.Title = ?", (testTitle2,))
    assert cursor.fetchone()
    jobs.close_db(connection)
Exemplo n.º 15
0
def test_specific_excel_data():
    excel_test_data = ["massachusetts", 0, "test", "major", 5, 5]
    excel_book = openpyxl.load_workbook(filename="test_workbook.xlsx")
    test_sheet = excel_book.active
    test_sheet['B2'] = excel_test_data[0]
    test_sheet['H2'] = excel_test_data[1]
    test_sheet['I2'] = excel_test_data[2]
    test_sheet['J2'] = excel_test_data[3]
    test_sheet['K2'] = excel_test_data[4]
    test_sheet['Y2'] = excel_test_data[5]
    excel_book.save(filename="test_workbook.xlsx")
    test_dict = jobs.excel_jobs_import("test_workbook.xlsx")
    conn, cursor = jobs.open_db("test_specific_excel_data.sqlite")
    jobs.setup_db(cursor)
    jobs.insert_data(test_dict, "jobs", cursor)
    jobs.close_db(conn)
    conn, cursor = jobs.open_db("test_specific_excel_data.sqlite")
    specific_data_query = jobs.query_run("SELECT * FROM " + "JOBS;", cursor)
    for element in specific_data_query:
        assert element[1] == excel_test_data[0]
        assert element[2] == excel_test_data[1]
        assert element[3] == excel_test_data[2]
        assert element[4] == excel_test_data[4]
        assert element[5] == excel_test_data[5]
Exemplo n.º 16
0
def test_update_from_excel():
    test_data = [{'state': 'Massachusetts', 'code': 99, 'title': 'testttl', 'employment': 5, 'salary': 1000}]
    data_to_replace = [1, 'Rhode Island', 10, 'title', 10, 500]
    conn, cursor = jobs.open_db("test_excel_update.sqlite")
    jobs.setup_db(cursor)
    jobs.insert_data(test_data, "jobs", cursor)
    jobs.close_db(conn)
    conn, cursor = jobs.open_db("test_excel_update.sqlite")
    original_data = jobs.query_run("SELECT * " + "FROM jobs;", cursor)
    for (element, query_data) in zip(test_data, original_data):
        assert query_data[0] == 1
        assert query_data[1] == element['state']
        assert query_data[2] == element['code']
        assert query_data[3] == element['title']
        assert query_data[4] == element['employment']
        assert query_data[5] == element['salary']
    jobs.close_db(conn)
    excel_book = openpyxl.load_workbook(filename="update_by_excel.xlsx")
    active_sheet = excel_book.active
    active_sheet['A1'] = data_to_replace[0]
    active_sheet['B1'] = data_to_replace[1]
    active_sheet['C1'] = data_to_replace[2]
    active_sheet['D1'] = data_to_replace[3]
    active_sheet['E1'] = data_to_replace[4]
    active_sheet['F1'] = data_to_replace[5]
    excel_book.save("update_by_excel.xlsx")
    jobs.update_data_from_excel("update_by_excel.xlsx", "Jobs", "test_excel_update.sqlite")
    conn, cursor = jobs.open_db("test_excel_update.sqlite")
    updated_data = jobs.query_run("SELECT * "+"FROM jobs", cursor)
    for element in updated_data:
        assert data_to_replace[0] == element[0]
        assert data_to_replace[1] == element[1]
        assert data_to_replace[2] == element[2]
        assert data_to_replace[3] == element[3]
        assert data_to_replace[4] == element[4]
        assert data_to_replace[5] == element[5]