Exemplo n.º 1
0
def readfile(name):
    with open('./data_store.pkl', 'rb') as handle:
        data_store = pickle.load(handle)

    write_excel = create_excel_file('./results/{}_results.xlsx'.format(name))
    wb = openpyxl.load_workbook(write_excel)
    ws = wb[wb.sheetnames[-1]]
    print_df_to_excel(df=pd.DataFrame(data=data_store[1], columns=data_store[0]), ws=ws)
    wb.save(write_excel)
Exemplo n.º 2
0
    with open('IDEAScitystatedata{}.pkl'.format(indextostart), 'wb') as handle:
        pickle.dump([data_store_columns2, citystatedataoverall],
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)

    print('Progress: {} out of {} done'.format(authors + indextostart,
                                               len(delimitedlocation)))

    #if authors > 2:
    #    break

write_excel = create_excel_file(
    './results/{}_results.xlsx'.format('IDEAScountrydata'))
wb = openpyxl.load_workbook(write_excel)
ws = wb[wb.sheetnames[-1]]
print_df_to_excel(df=pd.DataFrame(data=countrydataoverall,
                                  columns=data_store_columns),
                  ws=ws)
wb.save(write_excel)

write_excel = create_excel_file(
    './results/{}_results.xlsx'.format('IDEAScitystatedata'))
wb = openpyxl.load_workbook(write_excel)
ws = wb[wb.sheetnames[-1]]
print_df_to_excel(df=pd.DataFrame(data=citystatedataoverall,
                                  columns=data_store_columns2),
                  ws=ws)
wb.save(write_excel)

elapsed = (time.time() - start) / 3600
print(f"Elapsed time: {elapsed} hours")
Exemplo n.º 3
0
        title = 'NA'
        email = 'NA'

    personaldetails.append(name)
    personaldetails.append(title)
    personaldetails.append(email)

    personaldata.append(personaldetails)

    with open('GSaffiliationscrap{}.pkl'.format(indextostart), 'wb') as handle:
        pickle.dump([data_store_columns, personaldata],
                    handle,
                    protocol=pickle.HIGHEST_PROTOCOL)

    print('Progress: {} out of {} for {} done'.format(authors + indextostart,
                                                      numberofauthors, name))

    #if authors > 3:
    #    break

write_excel = create_excel_file(
    './results/{}_results.xlsx'.format('GSAffiliationScrap'))
wb = openpyxl.load_workbook(write_excel)
ws = wb[wb.sheetnames[-1]]
print_df_to_excel(df=pd.DataFrame(data=personaldata,
                                  columns=data_store_columns),
                  ws=ws)
wb.save(write_excel)

elapsed = (time.time() - start) / 3600
print(f"Elapsed time: {elapsed} hours")
Exemplo n.º 4
0
        if np.isnan(GStitles[i]):
            state1 = 'NA'
            state2 = 'NA'
            country = 'NA'
    except:
        geocode_result = gmaps.geocode(GStitles[i])
        try:
            state1, state2, country = getstatecountry(geocode_result)
        except:
            state1 = 'Cannot retrieve'
            state2 = 'Cannot retrieve'
            country = 'Cannot retrieve'

    state1data.append(state1)
    state2data.append(state2)
    countrydata.append(country)
    #if i == 10:
    #    break

data = {'state1': state1data, 'state2': state2data, 'country': countrydata}

write_excel = create_excel_file(
    './results/{}_results.xlsx'.format('GScitystatedata'))
wb = openpyxl.load_workbook(write_excel)
ws = wb[wb.sheetnames[-1]]
print_df_to_excel(df=pd.DataFrame(data), ws=ws)
wb.save(write_excel)

elapsed = (time.time() - start) / 3600
print(f"Elapsed time: {elapsed} hours")
# twitterscraper from:JustinWolfers -bd 2018-07-01 -ed 2019-07-01 --output=JustinWolferspart8.json  ----error 1114
# twitterscraper from:JustinWolfers -bd 2019-07-01 -ed 2020-05-19 --output=JustinWolferspart9.json  ----error 1383
# twitterscraper from:JustinWolfers -bd 2011-07-01 -ed 2012-07-01 --output=JustinWolferspart1.json && twitterscraper from:JustinWolfers -bd 2012-07-01 -ed 2013-07-01 --output=JustinWolferspart2.json && twitterscraper from:JustinWolfers -bd 2013-07-01 -ed 2014-07-01 --output=JustinWolferspart3.json && twitterscraper from:JustinWolfers -bd 2014-07-01 -ed 2015-07-01 --output=JustinWolferspart4.json && twitterscraper from:JustinWolfers -bd 2015-07-01 -ed 2016-07-01 --output=JustinWolferspart5.json && twitterscraper from:JustinWolfers -bd 2016-07-01 -ed 2017-07-01 --output=JustinWolferspart6.json && twitterscraper from:JustinWolfers -bd 2017-07-01 -ed 2018-07-01 --output=JustinWolferspart7.json && twitterscraper from:JustinWolfers -bd 2018-07-01 -ed 2019-07-01 --output=JustinWolferspart8.json && twitterscraper from:JustinWolfers -bd 2019-07-01 -ed 2020-05-19 --output=JustinWolferspart9.json

import codecs, json
import pandas as pd
import openpyxl
from others import create_excel_file, print_df_to_excel

parts = 9

with codecs.open('JustinWolferspart{}.json'.format(1), 'r', 'utf-8') as f:
    tweets = json.load(f, encoding='utf-8')
df = pd.read_json('JustinWolferspart{}.json'.format(1), encoding='utf-8')
print(df)

for partnumber in range(2, parts + 1, 1):
    with codecs.open('JustinWolferspart{}.json'.format(partnumber), 'r',
                     'utf-8') as f:
        tweets = json.load(f, encoding='utf-8')
    dfread = pd.read_json('JustinWolferspart{}.json'.format(partnumber),
                          encoding='utf-8')
    print(dfread)
    df = df.append(dfread)

name = 'JustinWolfers'
write_excel = create_excel_file('./results/{}_results.xlsx'.format(name))
wb = openpyxl.load_workbook(write_excel)
ws = wb[wb.sheetnames[-1]]
print_df_to_excel(df=df, ws=ws)
wb.save(write_excel)