Ver Fonte

[LIVY-472][SERVER] Improve the logs for fail-to-create session

## What changes were proposed in this pull request?

Livy currently doesn't give a very clear log about the fail-to-create session, it only says that session related app tag cannot be found in RM, but doesn't tell user how to search and get the true root cause. So here change the logs to make it more clear.

## How was this patch tested?

Local verification.

Author: jerryshao <sshao@hortonworks.com>

Closes #96 from jerryshao/LIVY-472.
jerryshao há 7 anos atrás
pai
commit
ca4cad2296

+ 1 - 1
server/src/main/scala/org/apache/livy/server/interactive/InteractiveSessionServlet.scala

@@ -70,7 +70,7 @@ class InteractiveSessionServlet(
         Option(session.logLines())
           .map { lines =>
             val size = 10
-            var from = math.max(0, lines.length - size)
+            val from = math.max(0, lines.length - size)
             val until = from + size
 
             lines.view(from, until)

+ 11 - 8
server/src/main/scala/org/apache/livy/utils/SparkYarnApp.scala

@@ -25,6 +25,7 @@ import scala.concurrent._
 import scala.concurrent.duration._
 import scala.language.postfixOps
 import scala.util.Try
+import scala.util.control.NonFatal
 
 import org.apache.hadoop.yarn.api.records.{ApplicationId, ApplicationReport, FinalApplicationStatus, YarnApplicationState}
 import org.apache.hadoop.yarn.client.api.YarnClient
@@ -70,7 +71,7 @@ object SparkYarnApp extends Logging {
     override def run(): Unit = {
       while (true) {
         if (!leakedAppTags.isEmpty) {
-          // kill the app if found it and remove it if exceeding a threashold
+          // kill the app if found it and remove it if exceeding a threshold
           val iter = leakedAppTags.entrySet().iterator()
           var isRemoved = false
           val now = System.currentTimeMillis()
@@ -179,9 +180,11 @@ class SparkYarnApp private[utils] (
         if (deadline.isOverdue) {
           process.foreach(_.destroy())
           leakedAppTags.put(appTag, System.currentTimeMillis())
-          throw new Exception(s"No YARN application is found with tag $appTagLowerCase in " +
-            livyConf.getTimeAsMs(LivyConf.YARN_APP_LOOKUP_TIMEOUT)/1000 + " seconds. " +
-            "Please check your cluster status, it is may be very busy.")
+          throw new IllegalStateException(s"No YARN application is found with tag" +
+            s" $appTagLowerCase in ${livyConf.getTimeAsMs(LivyConf.YARN_APP_LOOKUP_TIMEOUT)/1000}" +
+            " seconds. This may be because 1) spark-submit fail to submit application to YARN; " +
+            "or 2) YARN cluster doesn't have enough resources to start the application in time. " +
+            "Please check Livy log and YARN log to know the details.")
         } else {
           Clock.sleep(pollInterval.toMillis)
           getAppIdFromTag(appTagLowerCase, pollInterval, deadline)
@@ -290,12 +293,12 @@ class SparkYarnApp private[utils] (
 
       debug(s"$appId $state ${yarnDiagnostics.mkString(" ")}")
     } catch {
-      case e: InterruptedException =>
+      case _: InterruptedException =>
         yarnDiagnostics = ArrayBuffer("Session stopped by user.")
         changeState(SparkApp.State.KILLED)
-      case e: Throwable =>
-        error(s"Error whiling refreshing YARN state: $e")
-        yarnDiagnostics = ArrayBuffer(e.toString, e.getStackTrace().mkString(" "))
+      case NonFatal(e) =>
+        error(s"Error whiling refreshing YARN state", e)
+        yarnDiagnostics = ArrayBuffer(e.getMessage)
         changeState(SparkApp.State.FAILED)
     }
   }