def retrieve_portal_manifest(etcd, portal_name): etcd_key = os.path.join(portal_name, 'manifest') raw_data = etcd.get_data(etcd_key) if raw_data is None: raise ValueError("the manifest of {} should be stored "\ "in etcd".format(portal_name)) return text_format.Parse(raw_data, common_pb.DataJoinPortalManifest())
def _setUpPortalManifest(self): self._portal_name = 'test_portal' self._etcd_l.delete_prefix(self._portal_name) self._etcd_f.delete_prefix(self._portal_name) self._portal_manifest_l = common_pb.DataJoinPortalManifest( name=self._portal_name, input_partition_num=4, output_partition_num=2, input_data_base_dir='./portal_input_l', output_data_base_dir='./portal_output_l', begin_timestamp=common.trim_timestamp_by_hourly( common.convert_datetime_to_timestamp(datetime.now()))) self._portal_manifest_f = common_pb.DataJoinPortalManifest( name=self._portal_name, input_partition_num=2, output_partition_num=2, input_data_base_dir='./portal_input_f', output_data_base_dir='./portal_output_f', begin_timestamp=common.trim_timestamp_by_hourly( common.convert_datetime_to_timestamp(datetime.now()))) common.commit_portal_manifest(self._etcd_l, self._portal_manifest_l) common.commit_portal_manifest(self._etcd_f, self._portal_manifest_f)
def _update_portal_commited_timestamp(self, new_committed_datetime): new_manifest = None with self._lock: old_committed_datetime = common.convert_timestamp_to_datetime( common.trim_timestamp_by_hourly( self._portal_manifest.committed_timestamp)) assert new_committed_datetime > old_committed_datetime new_manifest = common_pb.DataJoinPortalManifest() new_manifest.MergeFrom(self._portal_manifest) assert new_manifest is not None new_manifest.committed_timestamp.MergeFrom( common.trim_timestamp_by_hourly( common.convert_datetime_to_timestamp(new_committed_datetime))) common.commit_portal_manifest(self._etcd, new_manifest) return new_manifest
def _prepare_test(self): self._portal_manifest = common_pb.DataJoinPortalManifest( name='test_portal', input_partition_num=4, output_partition_num=8, input_data_base_dir='./portal_input', output_data_base_dir='./portal_output') self._portal_options = dj_pb.DataJoinPotralOptions( example_validator=dj_pb.ExampleValidatorOptions( example_validator='EXAMPLE_VALIDATOR', validate_event_time=True, ), reducer_buffer_size=128, raw_data_options=dj_pb.RawDataOptions(raw_data_iter='TF_RECORD'), use_mock_etcd=True) self._date_time = common.convert_timestamp_to_datetime( common.trim_timestamp_by_hourly( common.convert_datetime_to_timestamp(datetime.now()))) self._generate_input_data()