def create_table(wf_module, table, metadata=None): if table is None or table.empty: return StoredObject.__create_empty_table(wf_module, metadata) else: hash = hash_table(table) return StoredObject.__create_table_internal( wf_module, table, metadata, hash)
def create_table_if_different(wf_module, old_so, table, metadata=None): if old_so is None: return StoredObject.create_table(wf_module, table, metadata=metadata) hash = hash_table(table) if hash != old_so.hash: old_table = old_so.get_table() if not old_table.equals(table): return StoredObject.__create_table_internal(wf_module, table, metadata, hash) return None
def create_table_if_different(wf_module, old_so, table, metadata=None): if old_so is None: return StoredObject.create_table(wf_module, table, metadata=metadata) hash = hash_table(table) if hash != old_so.hash: return StoredObject.create_table(wf_module, table, metadata=metadata) else: return None
def create_table_if_different(cls, wf_module, old_so, table): hash = hash_table(table) if (old_so is not None # Fast: hashes differ, so we don't need to read the table and hash == old_so.hash # Slow: compare files. Expensive: reads a file from S3, holds # both DataFrames in RAM, uses lots of CPU. and old_so.get_table().equals(table)): # `table` is identical to what was in `old_so`. return None else: # `table` is new! Yay! return cls.__create_table_internal(wf_module, table, hash)
def create_table(wf_module, table, metadata=None): hash = hash_table(table) return StoredObject.__create_table_internal(wf_module, table, metadata, hash)
def create_table(cls, wf_module, table): hash = hash_table(table) return cls.__create_table_internal(wf_module, table, hash)