def _align_data_index(self, data_instance, flowid, data_application=None): header = data_instance.schema.get('header') if data_application is None: LOGGER.warning("not data_application!") return transfer_variable = CrossValidationTransferVariable() if data_application == consts.TRAIN_DATA: transfer_id = transfer_variable.train_sid elif data_application == consts.TEST_DATA: transfer_id = transfer_variable.test_sid else: LOGGER.warning("data_application error!") return if self.role == consts.GUEST: data_sid = data_instance.mapValues(lambda v: 1) transfer_id.remote(data_sid, role=consts.HOST, idx=-1, suffix=(flowid, )) LOGGER.info("remote {} to host".format(data_application)) return None elif self.role == consts.HOST: data_sid = transfer_id.get(idx=0, suffix=(flowid, )) LOGGER.info("get {} from guest".format(data_application)) join_data_insts = data_sid.join(data_instance, lambda s, d: d) join_data_insts.schema['header'] = header return join_data_insts
def _align_data_index(self, data_instance, flowid, data_application=None): schema = data_instance.schema if data_application is None: # LOGGER.warning("not data_application!") # return raise ValueError( "In _align_data_index, data_application should be provided.") transfer_variable = CrossValidationTransferVariable() if data_application == consts.TRAIN_DATA: transfer_id = transfer_variable.train_sid elif data_application == consts.TEST_DATA: transfer_id = transfer_variable.test_sid else: raise ValueError( "In _align_data_index, data_application should be provided.") if self.role == consts.GUEST: data_sid = data_instance.mapValues(lambda v: 1) transfer_id.remote(data_sid, role=consts.HOST, idx=-1, suffix=(flowid, )) LOGGER.info("remote {} to host".format(data_application)) return data_instance elif self.role == consts.HOST: data_sid = transfer_id.get(idx=0, suffix=(flowid, )) LOGGER.info("get {} from guest".format(data_application)) join_data_insts = data_sid.join(data_instance, lambda s, d: d) join_data_insts.schema = schema return join_data_insts