示例#1
0
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    query_queue = []
    with open(dump_file) as f:
        for course in json.load(f):
            pairs = [(name, _typify(course[name], data_type))
                     for (name, data_type) in schema
                     if name not in special_fields]
            pairs += _special_treatment(course, schema)
            [columns, values] = zip(*pairs)
            db_query = 'INSERT INTO courses_t (%s) VALUES (%s);' % (
                ', '.join(columns), ', '.join(["%s"] * len(values)))
            query_queue.append(values)
            if len(query_queue) == 1000:
                print 'submitting a batch'
                cursor = pg.cursor()
                cursor.executemany(db_query, query_queue)
                pg.commit()
                cursor.close()
                query_queue = []
        if query_queue:
            print 'submitting a batch'
            cursor = pg.cursor()
            cursor.executemany(db_query, query_queue)
            pg.commit()
            cursor.close()
            query_queue = []
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    query_queue = []
    with open(dump_file) as f:
        reader = csv.reader(f)
        reader.next() # skip header categories row
        reader.next() # skip header row
        for row in csv.reader(f):
            columns = [name for (name, data_type) in schema]
            values = [_typify(value, data_type) for ((name, data_type), value) in zip(schema, row)]
            db_query = 'INSERT INTO housing_amenities_t (%s) VALUES (%s);' % (
                    ', '.join(columns), ', '.join(["%s"] * len(values)))
            query_queue.append(values)
            if len(query_queue) == 1000:
                print 'submitting a batch'
                cursor = pg.cursor()
                cursor.executemany(db_query, query_queue)
                pg.commit()
                cursor.close()
                query_queue = []
        if query_queue:
            print 'submitting a batch'
            cursor = pg.cursor()
            cursor.executemany(db_query, query_queue)
            pg.commit()
            cursor.close()
            query_queue = []
示例#3
0
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    query_queue = []
    with open(dump_file) as f:
         for course in json.load(f):
             pairs = [(name, _typify(course[name], data_type)) for (name,
                     data_type) in schema if name not in special_fields]
             pairs += _special_treatment(course, schema)
             [columns, values] = zip(*pairs)
             db_query = 'INSERT INTO courses_t (%s) VALUES (%s);' % (
                     ', '.join(columns), ', '.join(["%s"] * len(values)))
             query_queue.append(values)
             if len(query_queue) == 1000:
                 print 'submitting a batch'
                 cursor = pg.cursor()
                 cursor.executemany(db_query, query_queue)
                 pg.commit()
                 cursor.close()
                 query_queue = []
         if query_queue:
             print 'submitting a batch'
             cursor = pg.cursor()
             cursor.executemany(db_query, query_queue)
             pg.commit()
             cursor.close()
             query_queue = []
示例#4
0
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    query_queue = []
    with open(dump_file) as f:
        reader = csv.reader(f)
        reader.next()  # skip header categories row
        reader.next()  # skip header row
        for row in csv.reader(f):
            columns = [name for (name, data_type) in schema]
            values = [
                _typify(value, data_type)
                for ((name, data_type), value) in zip(schema, row)
            ]
            db_query = 'INSERT INTO housing_amenities_t (%s) VALUES (%s);' % (
                ', '.join(columns), ', '.join(["%s"] * len(values)))
            query_queue.append(values)
            if len(query_queue) == 1000:
                print 'submitting a batch'
                cursor = pg.cursor()
                cursor.executemany(db_query, query_queue)
                pg.commit()
                cursor.close()
                query_queue = []
        if query_queue:
            print 'submitting a batch'
            cursor = pg.cursor()
            cursor.executemany(db_query, query_queue)
            pg.commit()
            cursor.close()
            query_queue = []
示例#5
0
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    query_queue = []
    with open(dump_file) as f:
        for course in json.load(f):
            pairs = [(name, _typify(course[name], data_type)) for (name,
                    data_type) in course_schema if name not in special_fields]
            pairs += _special_treatment(course, course_schema)

            [columns, values] = zip(*pairs)

            cols = ', '.join(columns)
            vals = ', '.join(["%s"] * len(values))
            insert = 'INSERT INTO courses_v2_t (%s) SELECT %s' % (cols, vals)
           
            colmapping = ', '.join(colstr + '=%s' for colstr in columns)
            update = 'UPDATE courses_v2_t SET %s WHERE course=%%s' % colmapping

            db_query = 'WITH upsert AS (%s RETURNING *) %s WHERE NOT EXISTS (SELECT * FROM UPSERT);' % (update, insert)

            query_queue.append(values + (course['Course'][:8],) + values)

            if len(query_queue) >= 1000:
                print 'submitting a batch'
                cursor = pg.cursor()
                cursor.executemany(db_query, query_queue)
                pg.commit()
                cursor.close()
                query_queue = []
        if query_queue:
            print 'submitting a batch'
            cursor = pg.cursor()
            cursor.executemany(db_query, query_queue)
            pg.commit()
            cursor.close()
            query_queue = []
    with open(dump_file) as f:
        for course in json.load(f):
            pairs = [(name, _typify(course[name], data_type)) for (name,
                    data_type) in section_schema if name not in special_fields]
            pairs += _special_treatment(course, section_schema)
            [columns, values] = zip(*pairs)
            db_query = 'INSERT INTO sections_v2_t (%s) VALUES (%s);' % (
                    ', '.join(columns), ', '.join(["%s"] * len(values)))
            query_queue.append(values)
            if len(query_queue) == 1000:
                print 'submitting a batch'
                cursor = pg.cursor()
                cursor.executemany(db_query, query_queue)
                pg.commit()
                cursor.close()
                query_queue = []
        if query_queue:
            print 'submitting a batch'
            cursor = pg.cursor()
            cursor.executemany(db_query, query_queue)
            pg.commit()
            cursor.close()
            query_queue = []
示例#6
0
def drop_table():
    print 'Dropping courses, sections table...'
    pg = lib.pg.pg_sync()
    db_query = 'DROP TABLE sections_v2_t;'
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
    db_query = 'DROP TABLE courses_v2_t;'
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
示例#7
0
def create_table():
    print 'Creating courses, sections table with proper schema...'
    pg = lib.pg.pg_sync()
    db_query = 'CREATE TABLE IF NOT EXISTS courses_v2_t (%s);' % (", ".join(
            ['%s %s' % column for column in course_schema]))
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
    db_query = 'CREATE TABLE IF NOT EXISTS sections_v2_t (%s);' % (", ".join(
            ['%s %s' % column for column in section_schema]))
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
def drop_table():
    print 'Dropping housing_amenities table...'
    pg = lib.pg.pg_sync()
    db_query = 'DROP TABLE housing_amenities_t;'
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
示例#9
0
def drop_table():
    print 'Dropping housing_amenities table...'
    pg = lib.pg.pg_sync()
    db_query = 'DROP TABLE housing_amenities_t;'
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
def create_table():
    print 'Creating housing_amenities table with proper schema...'
    pg = lib.pg.pg_sync()
    db_query = 'CREATE TABLE IF NOT EXISTS housing_amenities_t (%s);' % (
            ", ".join(['%s %s' % column for column in schema]))
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
示例#11
0
def create_table():
    print 'Creating housing_amenities table with proper schema...'
    pg = lib.pg.pg_sync()
    db_query = 'CREATE TABLE IF NOT EXISTS housing_amenities_t (%s);' % (
        ", ".join(['%s %s' % column for column in schema]))
    cursor = pg.cursor()
    cursor.execute(db_query)
    pg.commit()
示例#12
0
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    db_query = """UPDATE courses_t SET description=%s WHERE course~~*%s;"""
    query_queue = []
    doc = etree.parse(dump_file)
    courses = doc.findall('course')
    for course in courses:
        department = course.findtext('subject_area_code')
        number = course.findtext('course_number_1')
        description = course.findtext('course_description')
        description = description.replace('\'', '\'\'')
        query_queue.append(
            (description, '%%%s%%' % (department + str(number))))
    if query_queue:
        print 'submitting a batch'
        cursor = pg.cursor()
        cursor.executemany(db_query, query_queue)
        pg.commit()
        cursor.close()
        query_queue = []
def load_data(dump_file):
    pg = lib.pg.pg_sync()
    db_query = """UPDATE courses_v2_t SET description=%s WHERE course=%s"""
    query_queue = []
    doc = etree.parse(dump_file)
    courses = doc.findall('course')
    for course in courses:
        department = course.findtext('subject_area_code')
        number = course.findtext('course_number_1')
        description = course.findtext('course_description')
        description = description.replace('\'', '\'\'')
        description = HTMLParser.HTMLParser().unescape(description)
        query_queue.append((description, '%s' % (department + str(number))))
    if query_queue:
        print 'submitting a batch'
        cursor = pg.cursor()
        cursor.executemany(db_query, query_queue)
        pg.commit()
        cursor.close()
        query_queue = []