def _build_protocol_factory(protocol): if protocol == Protocol.binary: return TBinaryProtocolFactory() elif protocol == Protocol.compact: return TCompactProtocolFactory() else: raise Exception("Unknown protocol")
def read_thrift(file_obj, ttype): """Read a thrift structure from the given fo.""" tin = TFileTransport(file_obj) pin = TCompactProtocolFactory().get_protocol(tin) page_header = ttype() page_header.read(pin) return page_header
def _read_page_header(file_obj): """Read the page_header from the given fo.""" tin = TFileTransport(file_obj) pin = TCompactProtocolFactory().get_protocol(tin) page_header = parquet_thrift.PageHeader() page_header.read(pin) return page_header
def write_thrift(fobj, thrift): """Write binary compact representation of thiftpy structured object Parameters ---------- fobj: open file-like object (binary mode) thrift: thriftpy object to write Returns ------- Number of bytes written """ t0 = fobj.tell() tout = TFileTransport(fobj) pout = TCompactProtocolFactory().get_protocol(tout) try: thrift.write(pout) fail = False except TProtocolException as e: typ, val, tb = sys.exc_info() frames = [] while tb is not None: frames.append(tb) tb = tb.tb_next frame = [tb for tb in frames if 'write_struct' in str(tb.tb_frame.f_code)] variables = frame[0].tb_frame.f_locals obj = variables['obj'] name = variables['fname'] fail = True if fail: raise ParquetException('Thrift parameter validation failure %s' ' when writing: %s-> Field: %s' % ( val.args[0], obj, name )) return fobj.tell() - t0
def _read_page_header(fo): """Reads the page_header from the given fo""" tin = TFileTransport(fo) pin = TCompactProtocolFactory().get_protocol(tin) ph = parquet_thrift.PageHeader() ph.read(pin) return ph
def _read_footer(fo): """Reads the footer from the given file object, returning a FileMetaData object. This method assumes that the fo references a valid parquet file""" footer_size = _get_footer_size(fo) if logger.isEnabledFor(logging.DEBUG): logger.debug("Footer size in bytes: %s", footer_size) fo.seek(-(8 + footer_size), 2) # seek to beginning of footer tin = TFileTransport(fo) pin = TCompactProtocolFactory().get_protocol(tin) fmd = parquet_thrift.FileMetaData() fmd.read(pin) return fmd
def write_thrift(fobj, thrift): """Write binary compact representation of thiftpy structured object Parameters ---------- fobj: open file-like object (binary mode) thrift: thriftpy object to write Returns ------- Number of bytes written """ t0 = fobj.tell() tout = TFileTransport(fobj) pout = TCompactProtocolFactory().get_protocol(tout) thrift.write(pout) return fobj.tell() - t0