Browse Source

Merge branch 'yili' of http://gogsb.soaringnova.com/ylproj/jupyterlab into yili

Leo 2 years ago
parent
commit
429e01d34d

+ 20 - 4
Dockerfile.dev

@@ -83,7 +83,7 @@ RUN /opt/conda/bin/jupyter lab --generate-config && \
 
 FROM builder2 as builder3
 
-RUN apt-get update &&  apt-get install -y --no-install-recommends openjdk-8-jdk  krb5-user
+RUN apt-get update &&  apt-get install -y --no-install-recommends openjdk-8-jdk  krb5-user net-tools iputils-ping
 RUN mamba install nodejs  sparkmagic  pyhive thrift sasl thrift_sasl -y
 # RUN /opt/conda/bin/pip install sparkmagipc -i https://pypi.douban.com/simple
 ENV PATH /opt/conda/bin/:$PATH
@@ -100,11 +100,27 @@ RUN /opt/conda/bin/pip install dist/*.whl -i https://mirror.baidu.com/pypi/simpl
 EXPOSE 8888
 
 
-FROM builder3 as image-test
-ADD confs/dev/krb5.conf /etc/
+FROM builder3 as image-dev
+# ADD confs/dev/krb5.conf /etc/
 ADD confs/dev/config.json .
 RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
-ADD confs/dev/user.keytab /
+# ADD confs/dev/user.keytab /
+RUN echo "\
+[program:jupyter]\n\
+directory=/workspace\n\
+command=/bin/bash -c '/opt/conda/bin/jupyter lab --ip 0.0.0.0 --port 8888 --allow-root --no-browser' \n\
+autorestart=true\n\
+startretries=0\n\
+redirect_stderr=true\n\
+stdout_logfile=/var/log/jupyter.log\n\
+stdout_logfile_maxbytes=50MB\n\
+" > /etc/supervisor/conf.d/jupyter.conf
+
+FROM builder3 as image-test
+ADD confs/test/krb5.conf /etc/
+ADD confs/test/config.json .
+RUN mkdir -p $HOME/.sparkmagic && cp config.json $HOME/.sparkmagic
+ADD confs/test/user.keytab /
 RUN echo "\
 [program:jupyter]\n\
 directory=/workspace\n\

+ 4 - 0
Makefile

@@ -10,3 +10,7 @@ prod:
 test:
 	@DOCKER_BUILDKIT=1 docker build -f Dockerfile.dev  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-test -t jupyterlab:test .
 
+
+dev:
+	@DOCKER_BUILDKIT=1 docker build -f Dockerfile.dev  --build-arg BUILDKIT_INLINE_CACHE=1  --target image-dev -t jupyterlab:dev .
+

+ 71 - 73
confs/dev/config.json

@@ -1,77 +1,75 @@
 {
-  "kernel_python_credentials" : {
-    "username": "",
-    "password": "",
-    "url": "http://10.254.7.8:30998",
-    "auth": "None"
-  },
-
-  "kernel_scala_credentials" : {
-    "username": "",
-    "password": "",
-    "url": "http://10.254.7.8:30998",
-    "auth": "None"
-  },
-  "kernel_r_credentials": {
-    "username": "",
-    "password": "",
-    "url": "http://10.254.7.8:30998"
-  },
-
-  "logging_config": {
-    "version": 1,
-    "formatters": {
-      "magicsFormatter": {
-        "format": "%(asctime)s\t%(levelname)s\t%(message)s",
-        "datefmt": ""
-      }
+    "kernel_python_credentials": {
+        "username": "",
+        "password": "",
+        "url": "http://192.168.199.27:8998",
+        "auth": "None"
     },
-    "handlers": {
-      "magicsHandler": {
-        "class": "hdijupyterutils.filehandler.MagicsFileHandler",
-        "formatter": "magicsFormatter",
-        "home_path": "~/.sparkmagic"
-      }
+    "kernel_scala_credentials": {
+        "username": "",
+        "password": "",
+        "url": "http://192.168.199.27:8998",
+        "auth": "None"
     },
-    "loggers": {
-      "magicsLogger": {
-        "handlers": ["magicsHandler"],
-        "level": "DEBUG",
-        "propagate": 0
-      }
-    }
-  },
-  "authenticators": {
-    "Kerberos": "sparkmagic.auth.kerberos.Kerberos",
-    "None": "sparkmagic.auth.customauth.Authenticator",
-    "Basic_Access": "sparkmagic.auth.basic.Basic"
-  },
-
-  "wait_for_idle_timeout_seconds": 15,
-  "livy_session_startup_timeout_seconds": 60,
-
-  "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
-
-  "ignore_ssl_errors": false,
-
-  "session_configs": {
-    "driverMemory": "1000M",
-    "executorCores": 2
-  },
-
-  "use_auto_viz": true,
-  "coerce_dataframe": true,
-  "max_results_sql": 2500,
-  "pyspark_dataframe_encoding": "utf-8",
-
-  "heartbeat_refresh_seconds": 30,
-  "livy_server_heartbeat_timeout_seconds": 0,
-  "heartbeat_retry_seconds": 10,
-
-  "server_extension_default_kernel_name": "pysparkkernel",
-  "custom_headers": {},
-
-  "retry_policy": "configurable",
-  "retry_seconds_to_sleep_list": [0.2, 0.5, 1, 3, 5],
-  "configurable_retry_policy_max_retries": 8
+    "kernel_r_credentials": {
+        "username": "",
+        "password": "",
+        "url": "http://192.168.199.27:8998"
+    },
+    "logging_config": {
+        "version": 1,
+        "formatters": {
+            "magicsFormatter": {
+                "format": "%(asctime)s\t%(levelname)s\t%(message)s",
+                "datefmt": ""
+            }
+        },
+        "handlers": {
+            "magicsHandler": {
+                "class": "hdijupyterutils.filehandler.MagicsFileHandler",
+                "formatter": "magicsFormatter",
+                "home_path": "~/.sparkmagic"
+            }
+        },
+        "loggers": {
+            "magicsLogger": {
+                "handlers": [
+                    "magicsHandler"
+                ],
+                "level": "DEBUG",
+                "propagate": 0
+            }
+        }
+    },
+    "authenticators": {
+        "Kerberos": "sparkmagic.auth.kerberos.Kerberos",
+        "None": "sparkmagic.auth.customauth.Authenticator",
+        "Basic_Access": "sparkmagic.auth.basic.Basic"
+    },
+    "wait_for_idle_timeout_seconds": 15,
+    "livy_session_startup_timeout_seconds": 60,
+    "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
+    "ignore_ssl_errors": false,
+    "session_configs": {
+        "driverMemory": "1000M",
+        "executorCores": 2
+    },
+    "use_auto_viz": true,
+    "coerce_dataframe": true,
+    "max_results_sql": 2500,
+    "pyspark_dataframe_encoding": "utf-8",
+    "heartbeat_refresh_seconds": 30,
+    "livy_server_heartbeat_timeout_seconds": 0,
+    "heartbeat_retry_seconds": 10,
+    "server_extension_default_kernel_name": "pysparkkernel",
+    "custom_headers": {},
+    "retry_policy": "configurable",
+    "retry_seconds_to_sleep_list": [
+        0.2,
+        0.5,
+        1,
+        3,
+        5
+    ],
+    "configurable_retry_policy_max_retries": 8
 }

+ 12 - 14
confs/prod/config.json

@@ -1,12 +1,11 @@
 {
-  "kernel_python_credentials" : {
+  "kernel_python_credentials": {
     "username": "",
     "password": "",
     "url": "http://172.23.7.140:30998",
     "auth": "None"
   },
-
-  "kernel_scala_credentials" : {
+  "kernel_scala_credentials": {
     "username": "",
     "password": "",
     "url": "http://172.23.7.140:30998",
@@ -17,7 +16,6 @@
     "password": "",
     "url": "http://172.23.7.140:30998"
   },
-
   "logging_config": {
     "version": 1,
     "formatters": {
@@ -35,7 +33,9 @@
     },
     "loggers": {
       "magicsLogger": {
-        "handlers": ["magicsHandler"],
+        "handlers": [
+          "magicsHandler"
+        ],
         "level": "DEBUG",
         "propagate": 0
       }
@@ -46,32 +46,30 @@
     "None": "sparkmagic.auth.customauth.Authenticator",
     "Basic_Access": "sparkmagic.auth.basic.Basic"
   },
-
   "wait_for_idle_timeout_seconds": 15,
   "livy_session_startup_timeout_seconds": 60,
-
   "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
-
   "ignore_ssl_errors": false,
-
   "session_configs": {
     "driverMemory": "1000M",
     "executorCores": 2
   },
-
   "use_auto_viz": true,
   "coerce_dataframe": true,
   "max_results_sql": 2500,
   "pyspark_dataframe_encoding": "utf-8",
-
   "heartbeat_refresh_seconds": 30,
   "livy_server_heartbeat_timeout_seconds": 0,
   "heartbeat_retry_seconds": 10,
-
   "server_extension_default_kernel_name": "pysparkkernel",
   "custom_headers": {},
-
   "retry_policy": "configurable",
-  "retry_seconds_to_sleep_list": [0.2, 0.5, 1, 3, 5],
+  "retry_seconds_to_sleep_list": [
+    0.2,
+    0.5,
+    1,
+    3,
+    5
+  ],
   "configurable_retry_policy_max_retries": 8
 }

+ 77 - 0
confs/test/config.json

@@ -0,0 +1,77 @@
+{
+  "kernel_python_credentials" : {
+    "username": "",
+    "password": "",
+    "url": "http://10.254.7.8:30998",
+    "auth": "None"
+  },
+
+  "kernel_scala_credentials" : {
+    "username": "",
+    "password": "",
+    "url": "http://10.254.7.8:30998",
+    "auth": "None"
+  },
+  "kernel_r_credentials": {
+    "username": "",
+    "password": "",
+    "url": "http://10.254.7.8:30998"
+  },
+
+  "logging_config": {
+    "version": 1,
+    "formatters": {
+      "magicsFormatter": {
+        "format": "%(asctime)s\t%(levelname)s\t%(message)s",
+        "datefmt": ""
+      }
+    },
+    "handlers": {
+      "magicsHandler": {
+        "class": "hdijupyterutils.filehandler.MagicsFileHandler",
+        "formatter": "magicsFormatter",
+        "home_path": "~/.sparkmagic"
+      }
+    },
+    "loggers": {
+      "magicsLogger": {
+        "handlers": ["magicsHandler"],
+        "level": "DEBUG",
+        "propagate": 0
+      }
+    }
+  },
+  "authenticators": {
+    "Kerberos": "sparkmagic.auth.kerberos.Kerberos",
+    "None": "sparkmagic.auth.customauth.Authenticator",
+    "Basic_Access": "sparkmagic.auth.basic.Basic"
+  },
+
+  "wait_for_idle_timeout_seconds": 15,
+  "livy_session_startup_timeout_seconds": 60,
+
+  "fatal_error_suggestion": "The code failed because of a fatal error:\n\t{}.\n\nSome things to try:\na) Make sure Spark has enough available resources for Jupyter to create a Spark context.\nb) Contact your Jupyter administrator to make sure the Spark magics library is configured correctly.\nc) Restart the kernel.",
+
+  "ignore_ssl_errors": false,
+
+  "session_configs": {
+    "driverMemory": "1000M",
+    "executorCores": 2
+  },
+
+  "use_auto_viz": true,
+  "coerce_dataframe": true,
+  "max_results_sql": 2500,
+  "pyspark_dataframe_encoding": "utf-8",
+
+  "heartbeat_refresh_seconds": 30,
+  "livy_server_heartbeat_timeout_seconds": 0,
+  "heartbeat_retry_seconds": 10,
+
+  "server_extension_default_kernel_name": "pysparkkernel",
+  "custom_headers": {},
+
+  "retry_policy": "configurable",
+  "retry_seconds_to_sleep_list": [0.2, 0.5, 1, 3, 5],
+  "configurable_retry_policy_max_retries": 8
+}

+ 0 - 0
confs/dev/core-site.xml → confs/test/core-site.xml


+ 0 - 0
confs/dev/emr-5xjsy31f_ailab.keytab → confs/test/emr-5xjsy31f_ailab.keytab


+ 0 - 0
confs/dev/hdfs-site.xml → confs/test/hdfs-site.xml


+ 0 - 0
confs/dev/hive-site.xml → confs/test/hive-site.xml


+ 0 - 0
confs/dev/hosts → confs/test/hosts


+ 0 - 0
confs/dev/krb5.conf → confs/test/krb5.conf


+ 0 - 0
confs/dev/user.keytab → confs/test/user.keytab


+ 0 - 0
confs/dev/yarn-site.xml → confs/test/yarn-site.xml


+ 0 - 0
confs/dev/信息.txt → confs/test/信息.txt


+ 2 - 4
docker-compose.yml

@@ -1,10 +1,10 @@
 version: '2'
 services:
   jupyter:
-    # hostname: jupyter
+    hostname: jupyter
     container_name: jupyter
     restart: always
-    image: jupyter:dag
+    image: jupyterlab:dev
     privileged: true
     ipc: host
     tty: true
@@ -18,5 +18,3 @@ services:
     volumes:
       # - ./config.json:/home/sxkj/.sparkmagic/config.json
       - ./:/workspace
-      - ./confs/krb5.conf:/etc/krb5.conf
-      - ./confs/config.json:/root/.sparkmagic/config.json

+ 1 - 1
docker/Dockerfile

@@ -10,7 +10,7 @@ RUN echo "deb http://mirror.nju.edu.cn/debian/ bullseye main contrib non-free" >
     && pip config set global.index-url https://mirror.nju.edu.cn/pypi/web/simple \
     && pip install -U pip setuptools && pip install -r /src/docker_build/requirements.txt \
     && yarn config set registry https://registry.npmmirror.com \ 
-    && rm -rf sparkmagic && git clone  -b yili http://gogsb.soaringnova.com/ylproj/sparkmagic.git \
+    && rm -rf sparkmagic && git clone  -b yili http://gogsb.soaringnova.com/ylproj/aihub-sparkmagic.git sparkmagic \ 
     && ./bdist_wheel.sh && cd sparkmagic && ./bdist_wheel.sh
 
 FROM jupyterhub/jupyterhub:2.3

+ 17 - 3
packages/yili-dag/src/Dag.tsx

@@ -10,6 +10,8 @@ import OutputNodeInfo from './OutputNodeInfo';
 import ContextMenuView from './ContextMenu';
 import ReactDOM from 'react-dom';
 import { nanoid } from 'nanoid'
+import { signNodesSkip } from './utils';
+// getNodesSourceAndTarget
 
 // 侧边栏UI组件
 const { Stencil } = Addon;
@@ -281,13 +283,17 @@ export default class Dag extends React.Component<any, any> {
 
   // 执行此处
   handleNodeRun = () => {
-    const data = this.state.contextMenuNode.getData() as NodeStatus;
-    this.state.contextMenuNode.setData({
+    const menuNode = this.state.contextMenuNode
+    const {script_nodes, script_edges} = signNodesSkip(this.state.dagGraph,menuNode, 'handle_run')
+    console.log('script_nodes:', script_nodes);
+    console.log('script_edges:', script_edges);
+    const data = menuNode.getData() as NodeStatus;
+    menuNode.setData({
       ...data,
       status: 'running'
     });
     setTimeout(() => {
-      this.state.contextMenuNode.setData({
+      menuNode.setData({
         ...data,
         status: 'success'
       });
@@ -297,10 +303,18 @@ export default class Dag extends React.Component<any, any> {
 
   // 执行到此处
   handleRunEndNode = () => {
+    const menuNode = this.state.contextMenuNode
+    const {script_nodes, script_edges} = signNodesSkip(this.state.dagGraph,menuNode, 'handle_run_end')
+    console.log('script_nodes:', script_nodes);
+    console.log('script_edges:', script_edges);
     this.setState({ contextMenu: null });
   };
   // 从此处开始执行
   handleRunBeginNode = () => {
+    const menuNode = this.state.contextMenuNode
+    const {script_nodes, script_edges} = signNodesSkip(this.state.dagGraph,menuNode, 'handle_run_begin')
+    console.log('script_nodes:', script_nodes);
+    console.log('script_edges:', script_edges);
     this.setState({ contextMenu: null });
   };
 

+ 146 - 63
packages/yili-dag/src/utils.ts

@@ -1,4 +1,74 @@
+// 数据转化
 export const DagToData = (graph: any, dagId: any) => {
+  const dagData = graph.toJSON()
+  const {edges, nodes} = getEdgesAndNodes(graph)
+  const {script_nodes, script_edges} = getScriptEdgesAndNodes(graph)
+  return {
+    dag_id: dagId,
+    user_name: "XXX",
+    user_id: 1,
+    nodes_task_name: "dfs",
+    nodes_task_id: 123,
+    itermidate_data: ["hdfs://host:port/uri"],
+    nodes,
+    edges,
+    dag_script: {
+      sub_nodes: script_nodes,
+      edges: script_edges,
+    },
+    graph: dagData
+  }
+  /* dagData.cells.forEach((item: any) => {
+    if (item?.shape === 'dag-edge') {
+      edges.push({
+        id: item.id,
+        source: item.source.cell,
+        target: item.target.cell
+      })
+    } else {
+      switch (item.data?.type) {
+        case "datasource":
+          nodes.push({
+            id: item.id,
+            name: item.data.nodeName,
+            op: "datasource",
+            data: {
+              input_source: item.data.inputSource,
+              input_table: item.data.dataTable,
+            }
+          })
+          break;
+        case "outputsource":
+          nodes.push({
+            id: item.id,
+            name: item.data.nodeName,
+            op: "datasource",
+            data: {
+              output_source: item.data.outputSource,
+            }
+          })
+          break;
+        default:
+          nodes.push({
+            id: item.id,
+            name: item.data.nodeName,
+            op: item.data.label,
+            data: {
+              input_number: item.data.inputNumber,
+              output: item.data.outputData,
+              script: item.data.scriptText,
+              param: item.data.paramText,
+              package: item.data.packageData
+            }
+          })
+          break;
+      }
+    }
+  }); */
+}
+
+// 获取所有Edge和Nodes
+const getEdgesAndNodes = (graph: any) => {
   const dagData = graph.toJSON()
   const edges: any = []
   const nodes: any = []
@@ -50,6 +120,12 @@ export const DagToData = (graph: any, dagId: any) => {
       }
     }
   });
+  return {edges, nodes}
+}
+
+// 获取所有脚本Edge和Nodes
+const getScriptEdgesAndNodes = (graph: any) => {
+  const dagData = graph.toJSON()
   const script_nodes: any = []
   const script_edges: any = []
   dagData.cells.forEach((item: any) => {
@@ -91,69 +167,7 @@ export const DagToData = (graph: any, dagId: any) => {
       }
     }
   })
-  return {
-    dag_id: dagId,
-    user_name: "XXX",
-    user_id: 1,
-    nodes_task_name: "dfs",
-    nodes_task_id: 123,
-    itermidate_data: ["hdfs://host:port/uri"],
-    nodes,
-    edges,
-    dag_script: {
-      sub_nodes: script_nodes,
-      edges: script_edges,
-    },
-    graph: dagData
-  }
-  /* dagData.cells.forEach((item: any) => {
-    if (item?.shape === 'dag-edge') {
-      edges.push({
-        id: item.id,
-        source: item.source.cell,
-        target: item.target.cell
-      })
-    } else {
-      switch (item.data?.type) {
-        case "datasource":
-          nodes.push({
-            id: item.id,
-            name: item.data.nodeName,
-            op: "datasource",
-            data: {
-              input_source: item.data.inputSource,
-              input_table: item.data.dataTable,
-            }
-          })
-          break;
-        case "outputsource":
-          nodes.push({
-            id: item.id,
-            name: item.data.nodeName,
-            op: "datasource",
-            data: {
-              output_source: item.data.outputSource,
-            }
-          })
-          break;
-        default:
-          nodes.push({
-            id: item.id,
-            name: item.data.nodeName,
-            op: item.data.label,
-            data: {
-              input_number: item.data.inputNumber,
-              output: item.data.outputData,
-              script: item.data.scriptText,
-              param: item.data.paramText,
-              package: item.data.packageData
-            }
-          })
-          break;
-      }
-    }
-  }); */
-
+  return {script_nodes, script_edges}
 }
 
 const datasourceToSql = (nodeData: any) => {
@@ -189,4 +203,73 @@ const generateInputs = (graph: any, id: any) => {
     })
   }
   return inputsResult
+}
+
+// 标记跳过节点
+export const signNodesSkip = (graph: any, node: any, action: any) => {
+  const skipNodes = getSkipNodes(graph, node, action) as any
+  const {script_nodes, script_edges} = getScriptEdgesAndNodes(graph)
+  script_nodes.forEach((item: any) => {
+    item.skip = skipNodes[item.id]
+  })
+  return {script_nodes, script_edges}
+}
+
+// 获取跳过节点
+const getSkipNodes = (graph: any, node: any, action: any) => {
+  // 结果 {nodeId, skip}
+  const result = {} as any
+  // 设置默认true
+  const { nodes } = getEdgesAndNodes(graph)
+  // 获取标记了前置后置的节点
+  const preEndNodes = getNodesSourceAndTarget(graph)
+  nodes.forEach((item: any) => {
+    result[item.id] = true
+  })
+  switch (action) {
+    // 执行到此处
+    case 'handle_run':
+      result[node.data.nodeId] = false
+      break;
+    // 执行到此处
+    case 'handle_run_end':
+      traverseNodes(result, node.data.nodeId, preEndNodes, 'sourceNodes')
+      break
+    // 从此处开始执行
+    case 'handle_run_begin':
+      traverseNodes(result, node.data.nodeId, preEndNodes, 'targetNodes')
+      result[node.data.nodeId] = true
+      break
+    default:
+      break;
+  }
+  
+  return result
+}
+
+// 遍历前后继
+const traverseNodes = (result: any, nodeId: any, preEndNodes: any, nodes: any) => {
+  result[nodeId] = false
+  const sourceNodes = preEndNodes[nodeId][nodes]
+  sourceNodes.forEach((item: any) => {
+    traverseNodes(result, item, preEndNodes, nodes)
+  })
+}
+
+// 获取前置后置
+export const getNodesSourceAndTarget = (graph: any) => {
+  const { nodes, edges } = getEdgesAndNodes(graph)
+  const resultNodes = {} as any
+  nodes.forEach((item: any) => {
+    resultNodes[item.id] = {} as any
+    resultNodes[item.id]['sourceNodes'] = []
+    resultNodes[item.id]['targetNodes'] = []
+  })
+  edges.forEach((item: any) => {
+    const sourceNodeId = item.source
+    const targetNodeId = item.target
+    resultNodes[sourceNodeId]['targetNodes'].push(targetNodeId)
+    resultNodes[targetNodeId]['sourceNodes'].push(sourceNodeId)
+  })
+  return resultNodes
 }