def getData(cty_code): global client # Submit a query. print "Downloading data for: ", cty_code job_id, results = client.query(""" SELECT Source, Target, Year, QuadClass, Count(EventCode) as ECount, SUM(NumSources) as NSources, MAX(NumSources) as MaxSources, AVG(NumSources) as AvgSources, STDDEV(NumSources) as StdDevSources FROM ( SELECT Actor1CountryCode as Source, Actor2CountryCode as Target, Year, QuadClass,EventCode,NumSources FROM [gdelt-bq:full.events] WHERE (Actor1CountryCode = '%s') AND (Actor1CountryCode != Actor2CountryCode) AND (Actor1Type1Code = "GOV" OR Actor1Type2Code = "GOV" OR Actor1Type3Code = "GOV") ), ( SELECT Actor2CountryCode as Source, Actor1CountryCode as Target, Year, QuadClass,EventCode,NumSources FROM [gdelt-bq:full.events] WHERE (Actor2CountryCode = '%s') AND (Actor2CountryCode != Actor1CountryCode) AND (Actor2Type1Code = "GOV" OR Actor2Type2Code = "GOV" OR Actor2Type3Code = "GOV") ) GROUP BY Source, Target, Year, QuadClass ORDER BY MaxSources DESC; """ % (cty_code, cty_code)) # Check if the query has finished running. complete, row_count = client.check_job(job_id) # Retrieve the results. results = client.get_query_rows(job_id) # print results with open(cty_code+'.csv', 'w') as output_file: w = csv.DictWriter(output_file, results[0].keys()) w.writeheader() w.writerows(results)
import pdb import csv import random from connection import connection from bigquery import client results = client.query(""" SELECT * FROM `harvard-599-trendsetters.Genie.final_table_output_to_ui` """) with connection: with connection.cursor() as cur: cur.execute("DELETE FROM relationships;") connection.commit() count = 0 for row in results: count += 1 if count % 5000 == 0: print(count) connection.commit() change_recent = None if row[6] == "N": change_recent = False elif row[6] == "Y": change_recent = True cur.execute( "INSERT INTO relationships VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;", (row[0], row[1][:-1], row[2], row[3], row[4] or "", row[5] or "", change_recent, row[7] and row[7][:-1] or 0, row[8] and row[8][:-1] or 0, row[9], row[10] or 0) )
import pdb import csv import random from connection import connection from bigquery import client results = client.query(""" SELECT * FROM `harvard-599-trendsetters.Genie.ui_papers_links` """) with connection: with connection.cursor() as cur: cur.execute("DELETE FROM paper_links;") connection.commit() count = 0 for row in results: count += 1 if count % 10000 == 0: print(count) connection.commit() if row[6]: cur.execute( "INSERT INTO paper_links VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;", (row[0], row[1], row[2], row[3], row[4], row[5] or 0, row[6], row[7]))
import pdb import csv import random from connection import connection from bigquery import client results = client.query(""" SELECT * FROM `harvard-599-trendsetters.aact.cumulative_journals` """) with connection: with connection.cursor() as cur: cur.execute("DELETE FROM journal_sums;") connection.commit() count = 0 for row in results: count += 1 if count % 10000 == 0: print(count) connection.commit() cur.execute("INSERT INTO journal_sums VALUES (%s, %s, %s);", (row[0], row[1], row[2]))
import pdb import csv import random from connection import connection from bigquery import client results = client.query(""" SELECT * FROM `harvard-599-trendsetters.aact.publications_cumulative_sum` """) with connection: with connection.cursor() as cur: cur.execute("DELETE FROM pub_sums;") connection.commit() count = 0 for row in results: count += 1 if count % 10000 == 0: print(count) connection.commit() cur.execute("INSERT INTO pub_sums VALUES (%s, %s, %s, %s);", (row[0], row[1], row[2] or 0, row[3] or 0))