示例#1
0
def grab_data_from_local_files():
    companies = pd.read_csv('../data/us_companies.csv')
    print(companies.head())

    cmp = pd.read_csv('../data/companies/microsoft.csv')
    print(cmp.head())

    cur.execute("select * from positions limit 10")
    result = cur.fetchall()
    print(result)

    cur.execute("select * from visa_records limit 10")
    result = cur.fetchall()
    print(result)
示例#2
0
def save2mysql(data=[], if_cache=True):
    if if_cache:
        data = pickle.load(open('sss', 'rb'))

    for row in data:
        sql = "INSERT INTO salaries (title, company, base_pay, positions) VALUES "
        sql += '("{}", "{}", {}, {})'.format(row[0], row[1], row[2], row[3])
        try:
            cur.execute(sql)
        except Exception as e:
            print(e)
            print(sql)
            continue

    conn.commit()
示例#3
0
def clean_save_job(data):
    github_jobs = data['github_jobs']
    adzuna_jobs = data['adzuna_jobs']
    jobs = [] # company, title, location, description, created, url
    for item in github_jobs:
        jobs.append({
            'company': item['company'],
            'title': item['title'],
            'location': item['location'],
            'description': item['description'],
            'created': item['created_at'],
            'url': item['company_url']
        })

    for item in adzuna_jobs:
        jobs.append({
            'company': item['company']['display_name'],
            'title': item['title'],
            'location': item['location']['display_name'],
            'description': item['description'],
            'created': item['created'],
            'url': item['redirect_url']
        })

    for item in jobs:
        reg = re.compile('<[^>]*>')
        content = reg.sub('', item['description'])
        content = re.sub('\"', '', content).strip()
        item['description'] = content
        sql = 'INSERT INTO positions (company, title, location, created, url, description) VALUES \
                ("{}", "{}", "{}", "{}", "{}", """{}""")'.format(item['company'], item['title'],
                                                  item['location'],
                                                  item['created'], item['url'], item['description'])
        # sql = 'INSERT INTO positions (company, title, location, created, url) VALUES \
        #         ("{}\", '{}', '{}', '{}', '{}')""".format(item['company'], item['title'],
        #                                           item['location'],
        #                                           item['created'], item['url'])
        try:
            cur.execute(sql)
        except Exception as e:
            print(e)
            # print(sql)
            continue

    conn.commit()
示例#4
0
def clean_save_visa(data):
    for i in range(len(data)):
        line = data[i]
        for item in line:
            sql = "INSERT INTO visa_records (rank, sponsor, num_of_LCA, ave_salary, ryear) VALUES "

            sql += "({}, '{}', {}, '{}', '{}')".format(item['rank'], item['sponsor'],
                                                    int(item['num_of_LCA'].replace(',', '')),
                                                    item['ave_salary'], item['year'])
            try:
                cur.execute(sql)
            except Exception as e:
                # print(e)
                # print(sql)
                continue

    cur.execute(sql)
    conn.commit()
示例#5
0
def clean_save_company():
    df = pd.read_csv('../data/us_companies.csv')
    for i, row in df.iterrows():
        row['description'] = re.sub('\"', '', row['description']).strip()
        if type(row['city']) != str:
            location = row['state']
        else:
            location = row['city'] + "," + row['state']

        sql = "INSERT INTO companies (id, name, location, year_founded, category, description) VALUES "
        sql += '("{}", "{}", "{}", "{}", "{}", """{}""")'.format(row['company_name_id'], row['company_name'],
                                                location,
                                                row['year_founded'], row['company_category'], row['description'])
        try:
            cur.execute(sql)
        except Exception as e:
            print(e)
            print(sql)
            continue

    conn.commit()