Exemplo n.º 1
0
def getData(cty_code):
    global client
    # Submit a query.
    print "Downloading data for: ", cty_code
    job_id, results = client.query("""
                        SELECT Source, Target, Year, QuadClass, Count(EventCode) as ECount, SUM(NumSources) as NSources, MAX(NumSources) as MaxSources, AVG(NumSources) as AvgSources, STDDEV(NumSources) as StdDevSources
                        FROM
                            (
                                SELECT  Actor1CountryCode as Source, Actor2CountryCode as Target, Year, QuadClass,EventCode,NumSources
                                FROM [gdelt-bq:full.events]
                                WHERE (Actor1CountryCode = '%s') AND (Actor1CountryCode != Actor2CountryCode)
                                AND (Actor1Type1Code = "GOV" OR Actor1Type2Code = "GOV" OR Actor1Type3Code = "GOV")
                            ),
                            (
                                SELECT  Actor2CountryCode as Source, Actor1CountryCode as Target, Year, QuadClass,EventCode,NumSources
                                FROM [gdelt-bq:full.events]
                                WHERE (Actor2CountryCode = '%s') AND (Actor2CountryCode != Actor1CountryCode)
                                AND (Actor2Type1Code = "GOV" OR Actor2Type2Code = "GOV" OR Actor2Type3Code = "GOV")
                            )
                        GROUP BY Source, Target, Year, QuadClass ORDER BY MaxSources DESC;
                        """ % (cty_code, cty_code))

    # Check if the query has finished running.
    complete, row_count = client.check_job(job_id)

    # Retrieve the results.
    results = client.get_query_rows(job_id)

    # print results

    with open(cty_code+'.csv', 'w') as output_file:
        w = csv.DictWriter(output_file, results[0].keys())
        w.writeheader()
        w.writerows(results)
Exemplo n.º 2
0
import pdb
import csv
import random
from connection import connection
from bigquery import client

results = client.query("""
    SELECT * FROM `harvard-599-trendsetters.Genie.final_table_output_to_ui`
""")

with connection:
    with connection.cursor() as cur:
        cur.execute("DELETE FROM relationships;")
        connection.commit()

        count = 0
        for row in results:
            count += 1
            if count % 5000 == 0:
                print(count)
                connection.commit()

            change_recent = None
            if row[6] == "N":
                change_recent = False
            elif row[6] == "Y":
                change_recent = True
            cur.execute(
                "INSERT INTO relationships VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;",
                (row[0], row[1][:-1], row[2], row[3], row[4] or "", row[5] or "", change_recent, row[7] and row[7][:-1] or 0, row[8] and row[8][:-1] or 0, row[9], row[10] or 0)
            )
Exemplo n.º 3
0
import pdb
import csv
import random
from connection import connection
from bigquery import client

results = client.query("""
    SELECT * FROM `harvard-599-trendsetters.Genie.ui_papers_links`
""")

with connection:
    with connection.cursor() as cur:
        cur.execute("DELETE FROM paper_links;")
        connection.commit()
        count = 0

        for row in results:
            count += 1
            if count % 10000 == 0:
                print(count)
                connection.commit()
            if row[6]:
                cur.execute(
                    "INSERT INTO paper_links VALUES (%s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;",
                    (row[0], row[1], row[2], row[3], row[4], row[5]
                     or 0, row[6], row[7]))
Exemplo n.º 4
0
import pdb
import csv
import random
from connection import connection
from bigquery import client

results = client.query("""
    SELECT * FROM `harvard-599-trendsetters.aact.cumulative_journals`
""")

with connection:
    with connection.cursor() as cur:
        cur.execute("DELETE FROM journal_sums;")
        connection.commit()
        count = 0

        for row in results:
            count += 1
            if count % 10000 == 0:
                print(count)
                connection.commit()
            cur.execute("INSERT INTO journal_sums VALUES (%s, %s, %s);",
                        (row[0], row[1], row[2]))
Exemplo n.º 5
0
import pdb
import csv
import random
from connection import connection
from bigquery import client

results = client.query("""
    SELECT * FROM `harvard-599-trendsetters.aact.publications_cumulative_sum`
""")

with connection:
    with connection.cursor() as cur:
        cur.execute("DELETE FROM pub_sums;")
        connection.commit()
        count = 0

        for row in results:
            count += 1
            if count % 10000 == 0:
                print(count)
                connection.commit()
            cur.execute("INSERT INTO pub_sums VALUES (%s, %s, %s, %s);",
                        (row[0], row[1], row[2] or 0, row[3] or 0))