def load_data(dump_file): pg = lib.pg.pg_sync() query_queue = [] with open(dump_file) as f: for course in json.load(f): pairs = [(name, _typify(course[name], data_type)) for (name, data_type) in schema if name not in special_fields] pairs += _special_treatment(course, schema) [columns, values] = zip(*pairs) db_query = 'INSERT INTO courses_t (%s) VALUES (%s);' % ( ', '.join(columns), ', '.join(["%s"] * len(values))) query_queue.append(values) if len(query_queue) == 1000: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []
def load_data(dump_file): pg = lib.pg.pg_sync() query_queue = [] with open(dump_file) as f: reader = csv.reader(f) reader.next() # skip header categories row reader.next() # skip header row for row in csv.reader(f): columns = [name for (name, data_type) in schema] values = [_typify(value, data_type) for ((name, data_type), value) in zip(schema, row)] db_query = 'INSERT INTO housing_amenities_t (%s) VALUES (%s);' % ( ', '.join(columns), ', '.join(["%s"] * len(values))) query_queue.append(values) if len(query_queue) == 1000: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []
def load_data(dump_file): pg = lib.pg.pg_sync() query_queue = [] with open(dump_file) as f: reader = csv.reader(f) reader.next() # skip header categories row reader.next() # skip header row for row in csv.reader(f): columns = [name for (name, data_type) in schema] values = [ _typify(value, data_type) for ((name, data_type), value) in zip(schema, row) ] db_query = 'INSERT INTO housing_amenities_t (%s) VALUES (%s);' % ( ', '.join(columns), ', '.join(["%s"] * len(values))) query_queue.append(values) if len(query_queue) == 1000: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []
def load_data(dump_file): pg = lib.pg.pg_sync() query_queue = [] with open(dump_file) as f: for course in json.load(f): pairs = [(name, _typify(course[name], data_type)) for (name, data_type) in course_schema if name not in special_fields] pairs += _special_treatment(course, course_schema) [columns, values] = zip(*pairs) cols = ', '.join(columns) vals = ', '.join(["%s"] * len(values)) insert = 'INSERT INTO courses_v2_t (%s) SELECT %s' % (cols, vals) colmapping = ', '.join(colstr + '=%s' for colstr in columns) update = 'UPDATE courses_v2_t SET %s WHERE course=%%s' % colmapping db_query = 'WITH upsert AS (%s RETURNING *) %s WHERE NOT EXISTS (SELECT * FROM UPSERT);' % (update, insert) query_queue.append(values + (course['Course'][:8],) + values) if len(query_queue) >= 1000: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] with open(dump_file) as f: for course in json.load(f): pairs = [(name, _typify(course[name], data_type)) for (name, data_type) in section_schema if name not in special_fields] pairs += _special_treatment(course, section_schema) [columns, values] = zip(*pairs) db_query = 'INSERT INTO sections_v2_t (%s) VALUES (%s);' % ( ', '.join(columns), ', '.join(["%s"] * len(values))) query_queue.append(values) if len(query_queue) == 1000: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = [] if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []
def drop_table(): print 'Dropping courses, sections table...' pg = lib.pg.pg_sync() db_query = 'DROP TABLE sections_v2_t;' cursor = pg.cursor() cursor.execute(db_query) pg.commit() db_query = 'DROP TABLE courses_v2_t;' cursor = pg.cursor() cursor.execute(db_query) pg.commit()
def create_table(): print 'Creating courses, sections table with proper schema...' pg = lib.pg.pg_sync() db_query = 'CREATE TABLE IF NOT EXISTS courses_v2_t (%s);' % (", ".join( ['%s %s' % column for column in course_schema])) cursor = pg.cursor() cursor.execute(db_query) pg.commit() db_query = 'CREATE TABLE IF NOT EXISTS sections_v2_t (%s);' % (", ".join( ['%s %s' % column for column in section_schema])) cursor = pg.cursor() cursor.execute(db_query) pg.commit()
def drop_table(): print 'Dropping housing_amenities table...' pg = lib.pg.pg_sync() db_query = 'DROP TABLE housing_amenities_t;' cursor = pg.cursor() cursor.execute(db_query) pg.commit()
def create_table(): print 'Creating housing_amenities table with proper schema...' pg = lib.pg.pg_sync() db_query = 'CREATE TABLE IF NOT EXISTS housing_amenities_t (%s);' % ( ", ".join(['%s %s' % column for column in schema])) cursor = pg.cursor() cursor.execute(db_query) pg.commit()
def load_data(dump_file): pg = lib.pg.pg_sync() db_query = """UPDATE courses_t SET description=%s WHERE course~~*%s;""" query_queue = [] doc = etree.parse(dump_file) courses = doc.findall('course') for course in courses: department = course.findtext('subject_area_code') number = course.findtext('course_number_1') description = course.findtext('course_description') description = description.replace('\'', '\'\'') query_queue.append( (description, '%%%s%%' % (department + str(number)))) if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []
def load_data(dump_file): pg = lib.pg.pg_sync() db_query = """UPDATE courses_v2_t SET description=%s WHERE course=%s""" query_queue = [] doc = etree.parse(dump_file) courses = doc.findall('course') for course in courses: department = course.findtext('subject_area_code') number = course.findtext('course_number_1') description = course.findtext('course_description') description = description.replace('\'', '\'\'') description = HTMLParser.HTMLParser().unescape(description) query_queue.append((description, '%s' % (department + str(number)))) if query_queue: print 'submitting a batch' cursor = pg.cursor() cursor.executemany(db_query, query_queue) pg.commit() cursor.close() query_queue = []