Browse Source

Backport PR #11358: Fix Handling of WebSocket Startup Errors

Steven Silvester 3 years ago
parent
commit
143aae3a49

+ 36 - 5
packages/apputils/src/sessioncontext.tsx

@@ -652,12 +652,19 @@ export class SessionContext implements ISessionContext {
    */
   async restartKernel(): Promise<void> {
     const kernel = this.session?.kernel || null;
+    if (this._isRestarting) {
+      return;
+    }
     this._isRestarting = true;
     this._isReady = false;
     this._statusChanged.emit('restarting');
-    await this.session?.kernel?.restart();
+    try {
+      await this.session?.kernel?.restart();
+      this._isReady = true;
+    } catch (e) {
+      console.error(e);
+    }
     this._isRestarting = false;
-    this._isReady = true;
     this._statusChanged.emit(this.session?.kernel?.status || 'unknown');
     this._kernelChanged.emit({
       name: 'kernel',
@@ -832,9 +839,7 @@ export class SessionContext implements ISessionContext {
       this._pendingKernelName = model.name;
     }
 
-    if (this._session && !this._isTerminating) {
-      await this._shutdownSession();
-    } else if (!this._session) {
+    if (!this._session) {
       this._kernelChanged.emit({
         name: 'kernel',
         oldValue: null,
@@ -848,6 +853,17 @@ export class SessionContext implements ISessionContext {
       this._initStarted.resolve(void 0);
     }
 
+    // If we already have a session, just change the kernel.
+    if (this._session && !this._isTerminating) {
+      try {
+        await this._session.changeKernel(model);
+        return this._session.kernel;
+      } catch (err) {
+        void this._handleSessionError(err);
+        throw err;
+      }
+    }
+
     // Use a UUID for the path to overcome a race condition on the server
     // where it will re-use a session for a given path but only after
     // the kernel finishes starting.
@@ -965,6 +981,13 @@ export class SessionContext implements ISessionContext {
     } catch (err) {
       // no-op
     }
+    await this._displayKernelError(message, traceback);
+  }
+
+  /**
+   * Display kernel error
+   */
+  private async _displayKernelError(message: string, traceback: string) {
     const body = (
       <div>
         {message && <pre>{message}</pre>}
@@ -1037,6 +1060,14 @@ export class SessionContext implements ISessionContext {
     sender: Session.ISessionConnection,
     status: Kernel.Status
   ): void {
+    if (status === 'dead') {
+      const model = sender.kernel?.model;
+      if (model?.reason) {
+        const traceback = (model as any).traceback || '';
+        void this._displayKernelError(model.reason, traceback);
+      }
+    }
+
     // Set that this kernel is busy, if we haven't already
     // If we have already, and now we aren't busy, dispose
     // of the busy disposable.

+ 44 - 7
packages/services/src/kernel/default.ts

@@ -119,10 +119,13 @@ export class KernelConnection implements Kernel.IKernelConnection {
    * The kernel model
    */
   get model(): Kernel.IModel {
-    return {
-      id: this.id,
-      name: this.name
-    };
+    return (
+      this._model || {
+        id: this.id,
+        name: this.name,
+        reason: this._reason
+      }
+    );
   }
 
   /**
@@ -1257,10 +1260,40 @@ export class KernelConnection implements Kernel.IKernelConnection {
     // Ensure incoming binary messages are not Blobs
     this._ws.binaryType = 'arraybuffer';
 
+    let alreadyCalledOnclose = false;
+
+    const earlyClose = async (evt: Event) => {
+      // If the websocket was closed early, that could mean
+      // that the kernel is actually dead. Try getting
+      // information about the kernel from the API call --
+      // if that fails, then assume the kernel is dead,
+      // otherwise just follow the typical websocket closed
+      // protocol.
+      if (alreadyCalledOnclose) {
+        return;
+      }
+      alreadyCalledOnclose = true;
+      this._reason = '';
+      this._model = undefined;
+      try {
+        const model = await restapi.getKernelModel(this._id, settings);
+        this._model = model;
+        if (model?.execution_state === 'dead') {
+          this._updateStatus('dead');
+        } else {
+          this._onWSClose(evt);
+        }
+      } catch (e) {
+        this._reason = 'Kernel died unexpectedly';
+        this._updateStatus('dead');
+      }
+      return;
+    };
+
     this._ws.onmessage = this._onWSMessage;
     this._ws.onopen = this._onWSOpen;
-    this._ws.onclose = this._onWSClose;
-    this._ws.onerror = this._onWSClose;
+    this._ws.onclose = earlyClose;
+    this._ws.onerror = earlyClose;
   };
 
   /**
@@ -1469,6 +1502,8 @@ export class KernelConnection implements Kernel.IKernelConnection {
    * Handle a websocket open event.
    */
   private _onWSOpen = (evt: Event) => {
+    this._ws!.onclose = this._onWSClose;
+    this._ws!.onerror = this._onWSClose;
     this._updateConnectionStatus('connected');
   };
 
@@ -1513,7 +1548,7 @@ export class KernelConnection implements Kernel.IKernelConnection {
   /**
    * Handle a websocket close event.
    */
-  private _onWSClose = (evt: CloseEvent) => {
+  private _onWSClose = (evt: Event) => {
     if (!this.isDisposed) {
       this._reconnect();
     }
@@ -1529,6 +1564,7 @@ export class KernelConnection implements Kernel.IKernelConnection {
 
   private _id = '';
   private _name = '';
+  private _model: Kernel.IModel | undefined;
   private _status: KernelMessage.Status = 'unknown';
   private _connectionStatus: Kernel.ConnectionStatus = 'connecting';
   private _kernelSession = '';
@@ -1573,6 +1609,7 @@ export class KernelConnection implements Kernel.IKernelConnection {
   private _msgIdToDisplayIds = new Map<string, string[]>();
   private _msgChain: Promise<void> = Promise.resolve();
   private _hasPendingInput = false;
+  private _reason = '';
   private _noOp = () => {
     /* no-op */
   };

+ 26 - 0
packages/services/src/kernel/restapi.ts

@@ -21,6 +21,32 @@ export interface IModel {
    * The name of the kernel.
    */
   readonly name: string;
+
+  /**
+   * The kernel execution state.
+   */
+  readonly execution_state?: string;
+
+  /**
+   * The timestamp of the last activity on the kernel.
+   */
+  // eslint-disable-next-line camelcase
+  readonly last_activity?: string;
+
+  /**
+   * The number of active connections to the kernel.
+   */
+  readonly connections?: number;
+
+  /**
+   * The reason the kernel died, if applicable.
+   */
+  readonly reason?: string;
+
+  /**
+   * The traceback for a dead kernel, if applicable.
+   */
+  readonly traceback?: string;
 }
 
 /**