class SparkMagicBase(Magics): def __init__(self, shell, data=None, spark_events=None): # You must call the parent constructor super(SparkMagicBase, self).__init__(shell) self.logger = SparkLog(u"SparkMagics") self.ipython_display = IpythonDisplay() self.spark_controller = SparkController(self.ipython_display) self.logger.debug("Initialized spark magics.") if spark_events is None: spark_events = SparkEvents() spark_events.emit_library_loaded_event() def _get_session_name_by_session(self, session): session_name = self.spark_controller.session_manager.get_session_name_by_id( session.id) # 如果session不存在,则将session激活并加入session_list if not session_name: session_name = session.session_name if session_name: self.spark_controller.session_manager.add_session( session_name, session) session.already_start() return session_name else: return session_name return None def init_livy_session(self, language="python"): ''' 执行sql时自动初始化sql :return: ''' return self.__get_or_create_session(language) def __get_or_create_session(self, language): proxy_user = getpass.getuser() self.session_language = language endpoint = build_endpoint(self.session_language) kernel_instance_id = id(self.shell.kernel) session_name_seleted = self.spark_controller.generate_livy_session_name( kernel_instance_id) properties = conf.get_session_properties(self.session_language) properties["proxyUser"] = proxy_user properties["session_language"] = self.session_language properties["session_name"] = session_name_seleted session_info_list = self.spark_controller.get_all_sessions_endpoint( endpoint) for session in session_info_list: # session kind 必须一致 if session.kind != properties['kind']: continue # 区分pyspark 及 pyspark3 if session.session_language != properties['session_language']: continue session_name = self._get_session_name_by_session(session) if session_name == session_name_seleted: if session.status in constants.HEALTHY_SESSION_STATUS: return session_name_seleted elif session.status in constants.FINAL_STATEMENT_STATUS: # FINAL, recreate new session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted else: # 如果livy中没有session,则创建session self.spark_controller.add_session(session_name_seleted, endpoint, False, properties) return session_name_seleted def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce): (success, out) = self.spark_controller.run_command(Command(cell), session_name) if not success: self.ipython_display.send_error(out) else: if isinstance(out, string_types): self.ipython_display.write(out) elif isinstance(out, dict): df = convert_data_struct_to_dataframe(out) html = df.fillna('NULL').astype(str).to_html(notebook=True) self.ipython_display.html(html) else: self.ipython_display.write(out) if output_var is not None: spark_store_command = self._spark_store_command( output_var, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_command(spark_store_command, session_name) self.shell.user_ns[output_var] = df @staticmethod def _spark_store_command(output_var, samplemethod, maxrows, samplefraction, coerce): return SparkStoreCommand(output_var, samplemethod, maxrows, samplefraction, coerce=coerce) def execute_sqlquery(self, cell, samplemethod, maxrows, samplefraction, session, output_var, quiet, coerce): sqlquery = self._sqlquery(cell, samplemethod, maxrows, samplefraction, coerce) df = self.spark_controller.run_sqlquery(sqlquery, session) if output_var is not None: self.shell.user_ns[output_var] = df if quiet: return None else: return df @staticmethod def _sqlquery(cell, samplemethod, maxrows, samplefraction, coerce): return SQLQuery(cell, samplemethod, maxrows, samplefraction, coerce=coerce) def _print_endpoint_info(self, info_sessions, current_session_id): if info_sessions: info_sessions = sorted(info_sessions, key=lambda s: s.id) html = get_sessions_info_html(info_sessions, current_session_id) self.ipython_display.html(html) else: self.ipython_display.html(u'No active sessions.')