def FileFlush(file_handle: io.BufferedRandom) -> None: file_handle.flush()
class BinaryCopy(): def __init__(self, chunk_size=256, buffer_size=64 * 1024 * 1024): self.chunk_size = chunk_size self.buffer_size = buffer_size self.fs = BufferedRandom(BytesIO(), buffer_size=buffer_size) self._row_header_struct = Struct("!hiqiq") self._row_int_struct = Struct("!i") self._row_bigint_struct = Struct("!iq") self._row_null_val = pack("!i", -1) def write_binary_header(self): self.fs.write(pack('!11sii', b'PGCOPY\n\xff\r\n\0', 0, 0)) def write_binary_string(self, obj, keyname): try: val = obj[keyname] if val is None: self.fs.write(self._row_null_val) else: val = val.encode() self.fs.write(self._row_int_struct.pack(len(val))) self.fs.write(val) except KeyError as e: self.fs.write(self._row_null_val) def write_binary_bigint(self, val): if val is None: self.fs.write(self._row_null_val) else: self.fs.write(self._row_bigint_struct.pack(8, val)) def write_comment_row(self, row): obj = row[1] self.fs.write( _row_header_struct.pack( 8, 8, int(obj["id"], 36), 8, timestamp_to_pgtimestamp(int(obj["created_utc"])))) # write article id # these have to be written separately because of possible null self.write_binary_bigint(get_article(obj.get("link_id", None))) self.write_binary_bigint(get_sub_id(obj.get("subreddit_id", None))) self.write_binary_bigint(get_parent(obj.get("parent_id", None))) # write strings self.write_binary_string(obj, "author") self.write_binary_string(obj, "subreddit") # write jsonb data data = row[0].encode() self.fs.write(pack("!ib", len(data) + 1, 1)) self.fs.write(data) def write_submission_row(self, row): obj = row[1] self.fs.write( _row_header_struct.pack( 6, 8, int(obj["id"], 36), 8, timestamp_to_pgtimestamp(int(obj["created_utc"])))) # write article id # these have to be written separately because of possible null self.write_binary_bigint(get_sub_id(obj.get("subreddit_id", None))) # write strings self.write_binary_string(obj, "author") self.write_binary_string(obj, "subreddit") # write jsonb data data = row[0].encode() self.fs.write(pack("!ib", len(data) + 1, 1)) self.fs.write(data) def copy_comments(self, conn, table, lines): self.write_binary_header() for l in lines: self.write_comment_row(l) # write end of task self.fs.write(pack('!h', -1)) self.fs.flush() self.fs.seek(0) conn.cursor.copy_expert("copy %s from stdin with binary " % (table), self.fs) self.fs.seek(0) self.fs.truncate() def copy(self, conn, table, lines, thing_type): self.write_binary_header() if thing_type == "comments": for l in lines: self.write_comment_row(l) elif thing_type == "submissions": for l in lines: self.write_submission_row(l) else: raise Exception("Unknown thing type {}".format(thing_type)) # write end of task self.fs.write(pack('!h', -1)) self.fs.flush() self.fs.seek(0) conn.cursor.copy_expert("copy %s from stdin with binary " % (table), self.fs) self.fs.seek(0) self.fs.truncate()