def test4(self): conn = psycopg2.connect(host="sculptor.stat.cmu.edu", database="jiayuz1", user="******", password="******") cur = conn.cursor() line = "20885,OFFENSE 2.0,3304,Identity Theft,2015-03-10T00:01:00,1400 block Centre Ave,Squirrel Hill South 2,2" row = line.split(',') self.assertEqual(match_data.matchdata(cur, row), True)
def test1(self): conn = psycopg2.connect(host="sculptor.stat.cmu.edu", database="jiayuz1", user="******", password="******") cur = conn.cursor() line = "20885,OFFENSE 2.0,3304,Identity Theft,2015-03-10T00:01:00,1400 block Centre Ave,Golden Triangle/Civic Arena,2" row = line.split(',') self.assertEqual(match_data.matchdata(cur, row), False)
import sys import psycopg2 import match_data # drop the duplicated records, match the neighborhood names and catch other errors conn = psycopg2.connect(host="sculptor.stat.cmu.edu", database="jiayuz1", user="******", password="******") cur = conn.cursor() for line in sys.stdin.readlines(): row = line.split(",") # rematch the unmatched neighborhoods match = match_data.matchdata(cur, row) if match is True: try: cur.execute( """INSERT into blotter (id, report_name, section, description, arrest_time, address, neighborhood, zone) values (%s, %s, %s, %s, %s, %s, %s, %s)""", (row)) conn.commit() except Exception as error: conn.rollback() # catch duplicated id errors if error.pgcode == "23505": print("duplicated id:", str(row), file=open("ingest_errors.txt", 'a')) # catch other errors