示例#1
0
 def test4(self):
     conn = psycopg2.connect(host="sculptor.stat.cmu.edu",
                             database="jiayuz1",
                             user="******",
                             password="******")
     cur = conn.cursor()
     line = "20885,OFFENSE 2.0,3304,Identity Theft,2015-03-10T00:01:00,1400 block Centre Ave,Squirrel Hill South 2,2"
     row = line.split(',')
     self.assertEqual(match_data.matchdata(cur, row), True)
示例#2
0
 def test1(self):
     conn = psycopg2.connect(host="sculptor.stat.cmu.edu",
                             database="jiayuz1",
                             user="******",
                             password="******")
     cur = conn.cursor()
     line = "20885,OFFENSE 2.0,3304,Identity Theft,2015-03-10T00:01:00,1400 block Centre Ave,Golden Triangle/Civic Arena,2"
     row = line.split(',')
     self.assertEqual(match_data.matchdata(cur, row), False)
示例#3
0
import sys
import psycopg2
import match_data

# drop the duplicated records, match the neighborhood names and catch other errors
conn = psycopg2.connect(host="sculptor.stat.cmu.edu",
                        database="jiayuz1",
                        user="******",
                        password="******")
cur = conn.cursor()

for line in sys.stdin.readlines():
    row = line.split(",")

    # rematch the unmatched neighborhoods
    match = match_data.matchdata(cur, row)

    if match is True:
        try:
            cur.execute(
                """INSERT into blotter (id, report_name, section, description, arrest_time, address, neighborhood, zone) 
							values (%s, %s, %s, %s, %s, %s, %s, %s)""", (row))
            conn.commit()
        except Exception as error:
            conn.rollback()
            # catch duplicated id errors
            if error.pgcode == "23505":
                print("duplicated id:",
                      str(row),
                      file=open("ingest_errors.txt", 'a'))
            # catch other errors