示例#1
0
 def load_udf(self, module_name, func_name):
     try:
         func = __import__(module_name, globals(), locals(), [func_name], -1).__dict__[func_name]
         return func
     except:
         # These errors should always be caused by user code.
         write_user_exception(module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
         self.close_controller(-1)
示例#2
0
 def load_udf(self, module_name, func_name):
     try:
         func = __import__(module_name, globals(), locals(), [func_name],
                           -1).__dict__[func_name]
         return func
     except:
         # These errors should always be caused by user code.
         write_user_exception(module_name, self.stream_error,
                              NUM_LINES_OFFSET_TRACE)
         self.close_controller(-1)
示例#3
0
 def load_udaf(self, module_name, class_name, func_name):
     try:
         if self.udaf_instance is None:
             clazz = __import__(module_name, globals(), locals(), [class_name]).__dict__[class_name]
             self.udaf_instance = clazz()
         func = getattr(self.udaf_instance, func_name)
         return func
     except:
         # These errors should always be caused by user code.
         write_user_exception(module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
         self.close_controller(-1)
示例#4
0
 def load_udaf(self, module_name, class_name, func_name):
     try:
         if self.udaf_instance is None:
             clazz = __import__(module_name, globals(), locals(),
                                [class_name]).__dict__[class_name]
             self.udaf_instance = clazz()
         func = getattr(self.udaf_instance, func_name)
         return func
     except:
         # These errors should always be caused by user code.
         write_user_exception(module_name, self.stream_error,
                              NUM_LINES_OFFSET_TRACE)
         self.close_controller(-1)
示例#5
0
文件: controller.py 项目: sirpkt/tajo
    def process_input(self, func_name, func, input_str):
        try:
            try:
                if self.should_log:
                    self.log_message("Serialized Input: %s" % (input_str))
                inputs = deserialize_input(input_str)
                if self.should_log:
                    self.log_message("Deserialized Input: %s" %
                                     (unicode(inputs)))
            except:
                # Capture errors where the user passes in bad data.
                write_user_exception(self.module_name, self.stream_error,
                                     NUM_LINES_OFFSET_TRACE)
                self.close_controller(-3)

            try:
                if func_name == GET_PARTIAL_RESULT_FUNC:
                    func_output = func()
                    output = json.dumps(func_output)
                elif func_name == GET_FINAL_RESULT_FUNC:
                    func_output = func()
                    output = serialize_output(func_output, self.output_schema)
                else:
                    func_output = func(*inputs)
                    output = serialize_output(func_output, self.output_schema)

                if self.should_log:
                    self.log_message("Serialized Output: %s" % output)
            except:
                # These errors should always be caused by user code.
                write_user_exception(self.module_name, self.stream_error,
                                     NUM_LINES_OFFSET_TRACE)
                self.close_controller(-2)

            self.stream_output.write("%s%s" % (output, END_RECORD_DELIM))
        except Exception as e:
            # This should only catch internal exceptions with the controller
            # and pig- not with user code.
            import traceback
            traceback.print_exc(file=self.stream_error)
            sys.exit(-3)

        sys.stdout.flush()
        sys.stderr.flush()
        self.stream_output.flush()
        self.stream_error.flush()
示例#6
0
    def process_input(self, func_name, func, input_str):
        try:
            try:
                if self.should_log:
                    self.log_message("Serialized Input: %s" % (input_str))
                inputs = deserialize_input(input_str)
                if self.should_log:
                    self.log_message("Deserialized Input: %s" % (unicode(inputs)))
            except:
                # Capture errors where the user passes in bad data.
                write_user_exception(self.module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
                self.close_controller(-3)

            try:
                if func_name == GET_PARTIAL_RESULT_FUNC:
                    func_output = func()
                    output = json.dumps(func_output)
                elif func_name == GET_FINAL_RESULT_FUNC:
                    func_output = func()
                    output = serialize_output(func_output, self.output_schema)
                else:
                    func_output = func(*inputs)
                    output = serialize_output(func_output, self.output_schema)

                if self.should_log:
                    self.log_message("Serialized Output: %s" % output)
            except:
                # These errors should always be caused by user code.
                write_user_exception(self.module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
                self.close_controller(-2)

            self.stream_output.write("%s%s" % (output, END_RECORD_DELIM))
        except Exception as e:
            # This should only catch internal exceptions with the controller
            # and pig- not with user code.
            import traceback
            traceback.print_exc(file=self.stream_error)
            sys.exit(-3)

        sys.stdout.flush()
        sys.stderr.flush()
        self.stream_output.flush()
        self.stream_error.flush()
示例#7
0
    def main(self,
             module_name, file_path, func_name, cache_path,
             output_stream_path, error_stream_path, log_file_name, output_schema):
        sys.stdin = os.fdopen(sys.stdin.fileno(), 'rb', 0)

        # Need to ensure that user functions can't write to the streams we use to communicate with pig.
        self.stream_output = os.fdopen(sys.stdout.fileno(), 'wb', 0)
        self.stream_error = os.fdopen(sys.stderr.fileno(), 'wb', 0)

        self.input_stream = sys.stdin
        # TODO: support controller logging
        # self.log_stream = open(output_stream_path, 'a')
        # sys.stderr = open(error_stream_path, 'w')

        sys.path.append(file_path)
        sys.path.append(cache_path)
        sys.path.append('.')

        should_log = False
        if should_log:
            logging.basicConfig(filename=log_file_name, format="%(asctime)s %(levelname)s %(message)s", level=udf_logging.udf_log_level)
            logging.info("To reduce the amount of information being logged only a small subset of rows are logged at the "
                         "INFO level.  Call udf_logging.set_log_level_debug in tajo_util to see all rows being processed.")

        input_str = self.get_next_input()

        try:
            func = __import__(module_name, globals(), locals(), [func_name], -1).__dict__[func_name]
        except:
            # These errors should always be caused by user code.
            write_user_exception(module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
            self.close_controller(-1)

        log_message = logging.info
        if udf_logging.udf_log_level == logging.DEBUG:
            log_message = logging.debug

        while input_str != END_OF_STREAM:
            try:
                try:
                    if should_log:
                        log_message("Serialized Input: %s" % (input_str))
                    inputs = deserialize_input(input_str)
                    if should_log:
                        log_message("Deserialized Input: %s" % (unicode(inputs)))
                except:
                    # Capture errors where the user passes in bad data.
                    write_user_exception(module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
                    self.close_controller(-3)

                try:
                    func_output = func(*inputs)
                    if should_log:
                        log_message("UDF Output: %s" % (unicode(func_output)))
                except:
                    # These errors should always be caused by user code.
                    write_user_exception(module_name, self.stream_error, NUM_LINES_OFFSET_TRACE)
                    self.close_controller(-2)

                output = serialize_output(func_output, output_schema)
                if should_log:
                    log_message("Serialized Output: %s" % (output))

                self.stream_output.write( "%s%s" % (output, END_RECORD_DELIM) )
            except Exception as e:
                # This should only catch internal exceptions with the controller
                # and pig- not with user code.
                import traceback
                traceback.print_exc(file=self.stream_error)
                sys.exit(-3)

            sys.stdout.flush()
            sys.stderr.flush()
            self.stream_output.flush()
            self.stream_error.flush()

            input_str = self.get_next_input()