diff --git a/plugins/tensorboard-plugins/tb_plugin/.flake8 b/plugins/tensorboard-plugins/tb_plugin/.flake8 deleted file mode 100644 index 1c5254b9f84568ba37d21c8e77e803558a6dae54..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -max-line-length = 120 -per-file-ignores = __init__.py:F401 torch_tb_profiler/io/file.py: F401 diff --git a/plugins/tensorboard-plugins/tb_plugin/.gitignore b/plugins/tensorboard-plugins/tb_plugin/.gitignore deleted file mode 100644 index dc7d4e6278beafdf41c8304cbc5915b3334095b3..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/build -/dist -/*.egg-info -__pycache__ diff --git a/plugins/tensorboard-plugins/tb_plugin/.pre-commit-config.yaml b/plugins/tensorboard-plugins/tb_plugin/.pre-commit-config.yaml deleted file mode 100644 index a650ec83269e596e6a4634045da8a0eff17830b9..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/.pre-commit-config.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -default_language_version: - python: python3.8 - -ci: - autofix_prs: true - autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' - autoupdate_schedule: quarterly - # submodules: true - -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 - hooks: - - id: end-of-file-fixer - exclude: torch_tb_profiler/static/index.html - - id: trailing-whitespace - - id: double-quote-string-fixer - - - repo: https://github.com/pre-commit/mirrors-autopep8 - rev: v1.6.0 - hooks: - - id: autopep8 - name: Format code - - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: - - "--max-line-length=120" - - "--per-file-ignores=__init__.py:F401 tb_plugin/torch_tb_profiler/io/file.py: F401" - name: Check PEP8 diff --git a/plugins/tensorboard-plugins/tb_plugin/LICENSE b/plugins/tensorboard-plugins/tb_plugin/LICENSE deleted file mode 100644 index edb179715b5213644cfe903d43294f54892e707e..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/LICENSE +++ /dev/null @@ -1,33 +0,0 @@ -BSD License - -For Kineto software - -Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. - -All contributions by Microsoft: -Copyright (c) Microsoft Corporation. (The Azure AI Platform team) - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - - * Neither the name Facebook nor the names of its contributors may be used to - endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/plugins/tensorboard-plugins/tb_plugin/README.md b/plugins/tensorboard-plugins/tb_plugin/README.md deleted file mode 100644 index 6b9a3aee08efded9c219b8b5264cbf03fc3d7e39..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/README.md +++ /dev/null @@ -1,346 +0,0 @@ -# PyTorch Profiler TensorBoard NPU Plugin - -### 介绍 -此工具是PyTorch profiling数据以及可视化的TensorBoard的插件。 \ -它支持将Ascend平台采集、解析的Pytorch Profiling数据可视化呈现,也兼容GPU数据采集、解析可视化,现已支持PyTorch 2.0GPU版本的profiling数据可视化。同时集成了精度比对的功能,支持查看loss曲线和比对两个网络的loss收敛趋势。 - -### 快速安装说明 -* 相关依赖: - pandas >= 1.0.0 ,tensorboard >= 2.11.0,protobuf <= 3.20.3 -* 安装方式 - 1. pip安装(推荐) \ - * 现本插件已经上传到pypi社区,用户可在python环境下直接通过以下pip指令进行安装:\ - `pip install torch-tb-profiler-ascend` - * 也可在pypi社区上下载离线whl包,传输到无法访问公网的环境上离线安装使用。访问[下载链接](https://pypi.org/project/torch-tb-profiler-ascend/#files)选择whl包进行下载,之后便可使用指令安装(此处{version}为whl包实际版本)\ - `pip install torch-tb-profiler_ascend_{version}_py3_none_any.whl` - - 2. 从源代码安装 - * 从仓库下载源码: - - `git clone https://gitee.com/ascend/mstt.git` - - * 进入目录 `/plugins/tensorboard-plugins/tb_plugin` 下. - * 编译前端代码 - ``` - cd fe - # 此步骤为安装前端依赖 - yarn - # 回到上层目录执行构建指令 - cd .. - python setup.py build_fe - ``` - **注意**: 编译前端步骤需要安装[Node.js](https://nodejs.org/zh-cn/download)和[yarn](https://yarn.bootcss.com/docs/install/index.html)环境 - * 执行安装命令可直接安装: - - `pip install .` - * 或: 构建whl包安装 - - `python setup.py build_fe sdist bdist_wheel` - - 在 `/tb_plugins/profiling/tb_plugin/dist` 目录下取出whl包,使用以下指令安装(此处{version}为whl包实际版本) - - `pip install torch-tb-profiler-ascend-{version}-py3-none-any.whl` - -### 解析数据说明 - -* 准备profiling数据 - - 需要在读取的目录下放置指定格式的profiling数据。格式为包含3个层级的目录结构:runs层级为最外层目录(我们将一个完整的Profiling数据视为一个runs进行可视化处理),其子目录为worker_span层级(命名格式为{worker}_{span}_ascend_pt,注:此处span为数字,代表时间戳),下一层级为规定命名的ASCEND_PROFILER_OUTPUT目录,此目录中包含此插件加载展示的数据文件,如trace_view.json、kernel_details.csv、operator_details.csv等。 - 目录结构如下: -* E.g. there are 2 runs: run1, run2 \ - `run1` \ - `--[worker1]_[span1]_ascend_pt` \ - `----ASCEND_PROFILER_OUTPUT` \ - `------trace_view.json` \ - `------kernel_details.csv` \ - `--[worker2]_[span1]_ascend_pt` \ - `----ASCEND_PROFILER_OUTPUT` \ - `------trace_view.json` \ - `------operator_details.csv` \ - `run2` \ - `--[worker1]_[span1]_ascend_pt` \ - `----ASCEND_PROFILER_OUTPUT` \ - `------memory_record.csv` \ - `------operator_memory.csv` - -### 启动方式 - -1. 启动TensorBoard - - `tensorboard --logdir=./samples` - - 如果网络浏览器与启动TensorBoard的机器不在同一台机器上,则需要在尾部加上`--bind_all`命令,如: - - `tensorboard --logdir=./samples --bind_all` - - 注意:确保默认端口6006对浏览器的主机打开。 - - 如果需要切换端口号需要在尾部加上指定的端口号,如`--port=6007` - - `tensorboard --logdir=./samples --port=6007` - -2. 在浏览器上打开tensorboard - - 在浏览器中打开URL: `http://localhost:6006`。 - 如果tensorboard启动命令使用`--bind_all` , 主机名不是`localhost`,而是绑定的主机ip,可以在cmd之后打印的日志中查找。 - - 注意:如果`--logdir` 指定目录下的文件太大或太多,请等候,刷新浏览器查看加载结果。 - -### PyTorch Profiling -#### 页面展示说明 - - 页面加载完成后,左侧视图如图。每个Runs都对应于`--logdir`指定的文件夹下的一个子文件夹(三层目录中的第一层run1, run2等)。 - 每个子文件夹包含一个或多个profiling数据文件夹。 - - ![Alt text](./docs/images/control_panel.PNG) - - Runs: `--logdir`下包含三层目录的所有数据。 - - Views: 展示数据分析的多个视图,包含Operator、NPU Kernel、Trace、Memory等多个view。 - - Workers-Spans: 多线程的情况下Profiling可能包含多组数据,通过Workers和Spans下拉框来选择不同线程和不同时间采集的数据产生的结果。 - -##### Operator View - - Operator View展示的是运行在host侧和device侧的Pytorch算子、计算算子的详细信息。 - - ![Alt text](./docs/images/operator_view.PNG) - - Calls: 表示的是运行过程中此算子被调用的次数。 - - Input Shapes: shapes信息。 - - Device Self Duration: 算子在device侧的耗时(除去子算子)。 - - Device Total Duration: 算子在device侧的耗时。 - - Host Self Duration: 算子在host侧的耗时(除去子算子)。 - - Host Total Duration: 算子在host侧的耗时。 - - AI Cores Eligible: 此算子是否在AICore上运行。 - - AI Cores Self (%): 算子在AICore上的耗时(除去子算子) / Device Self Duration。 - - AI Cores Total (%): 算子在AICore上的耗时 / Device Total Duration。 - - CallStack: 此算子的所有调用堆栈信息。 - - 说明: 由于一些算子之间存在父子关系(在trace上显示为包含关系),Self表示除去子算子的耗时,Total表示包含所有子算子的耗时。 - - ![Alt text](./docs/images/vscode_stack.PNG) - - 页面展示了四个饼图和两张表,通过界面的Group By切换表格和饼图。当切换为Operator时,表格以算子名称的维度进行展示,点击某个算子的View CallStack后,此算子会按照Call Stack分类展示算子信息。点击View call frames可以查看算子的调用信息。 - 当Group By切换为Operator + Input Shape时,算子以name和Input Shape为维度进行展示。 - - ![Alt text](./docs/images/operator_view_group_by_inputshape.PNG) - -##### Kernel View - - Kernel View 展示算子在加速核上运行的详细信息。此视图包含两张饼图和两张表,可通过 Group By 切换表格数据:算子的详情表以及统计表。 - - * 上方为饼图,展示耗时最多的数个算子耗时比例信息(左侧饼图)和算子执行在各类加速核上耗时百分比(右侧饼图) - - ![Alt text](./docs/images/kernel_view.PNG) - - * 选择 Group By 为 All 时,展示算子详情表,部分字段说明如下: - - | 字段名 | 说明 | - | ---------------- | -------------------------------------- | - | Step Id | 标识在哪个 Step 采集的数据 | - | Name | 运行在 npu 上的算子名称 | - | Type | 算子类型 | - | Accelerator Core | AI 加速核类型,包括 AI Core、AI CPU 等 | - | Start Time(us) | 算子执行开始时间 | - | Duration(us) | 当前算子执行耗时 | - | Wait Time(us) | 算子执行等待时间 | - | Block Dim | 运行切分数量,对应任务执行时的核数 | - - ![Alt text](./docs/images/kernel_view_group_by_statistic.PNG) - - * 选择 Group By 为 Statistic 时,展示算子信息统计表,此表格展示各算子的执行统计信息,字段说明如下: - - | 字段名 | 说明 | - | ---------------- | -------| - | Name | 运行在 npu 上的算子名称 | - | Calls | 算子执行次数 | - | Total Duration(us) | 算子执行总时间 | - | Min Duration(us) | 算子执行的最小时间 | - | Max Duration(us) | 算子执行的最大时间 | - | Avg Duration(us) | 算子执行平均时间 | - -##### Trace View - - 此视图显示使用chrome插件,展示在整个训练过程中的时序图。 - - ![Alt text](./docs/images/trace_view.PNG) - - Trace View主要包含三个层级以及层级下各个线程上执行的算子的时序排布。 - - ![Alt text](./docs/images/trace_view_one_step.PNG) - - 目前主要包括三个层级,PTA、CANN和Ascend Hardware。可以通过选择Processes来选择要展示的层级。 - - ![Alt text](./docs/images/trace_view_launch.PNG) - - 选择只展示async_npu,可以查看框架侧算子与昇腾硬件上执行的算子的下发执行关系。 - - ![Alt text](./docs/images/trace_view_npu_utilization.PNG) - - ![Alt text](./docs/images/trace_view_fwd_bwd_correlation.PNG) - - Tips:通过键盘的'W/S'键可以以光标位置为中心放大/缩小当前区域,通过'A/D'可以左移/右移当前可视域。 - -##### Memory View - - 展示的是Pytorch Profiler执行过程中内存申请和释放的信息。 - 主要包括两张折线图和两张表。可以在 'Device' 下拉框下选择要展示的NPU卡的内存使用信息。Group By可以切换总的内存使用和各个组件内存使用图表。 - - * Operator - - 整个采集过程中,算子内存使用情况汇总。 - - ![Alt text](./docs/images/memory_view.PNG) - 表格数据代表含义: - - * Name: 算子名称。 - - * Size: 申请的内存大小。 - - * Allocation Time: 内存申请时间。 - - * Release Time: 内存释放时间。 - - * Duration: 内存持有时间。 - - * Component - - 折线图为算子级上报的PTA侧和GE侧的内存持有和实际使用信息,以及进程级内存申请的趋势变化。表格为组件级内存峰值信息表,展示各NPU组件的内存峰值以及达到峰值的时刻。 - - ![Alt text](./docs/images/memory_view_component.PNG) - 表格数据代表含义: - - * Component: 组件名称。 - - * Peak Memory Reserved: 组件内存持有峰值。 - - * Time: 达到内存峰值的时刻(若存在多个相同峰值则取首次达到峰值时刻)。 - -##### Diff View - - Diff视图提供了Profiling数据比对功能。适用于同一网络不同迭代之间采集数据比对算子耗时情况,网络进行优化前后相同位置算子耗时情况比对、单机多卡不同卡之间采集数据比对以及相同网络不同硬件平台上运行性能情况比对等场景。 - ![Alt text](./docs/images/diff_view.png) - - * 最上方为整体比对,以采集的step为周期比较两份数据各类算子的耗时情况以及累计耗时变化趋势。点击其中某块柱形,可以单点查看对应详情。 - - ![Alt text](./docs/images/diff_detail.png) - - * 中间视图为差异图,由红蓝两块区域构成。横坐标与上方视图对应,蓝色区域为每类算子的耗时差值,红色区域表示当前所有算子耗时差的累加值。 - - * 最下方为算子比对明细表,显示相关差值以及相差比例信息。由于数据条目较多,支持选择是否显示Host Duration、Self Host Duration、Device Duration以及Self Device Duration相关比对信息。 - * Host Duration:算子在Host侧的累计耗时,包括子算子耗时。 - * Self Host Duration:算子在Host侧的累计耗时,不包括子算子耗时。 - * Device Duration:算子在Device侧的累计耗时,包括子算子耗时。 - * Self Device Duration:算子在Device侧的累计耗时,不包括子算子耗时。 - -##### Distributed View - - Distributed视图展示的是多卡采集数据情况,包括每张卡的计算、通信信息以及通信算子的详细信息,界面由两张柱状图和一个通信算子信息表构成,如下图。 - ![Alt text](./docs/images/distributed_view.PNG) - - * 左侧柱状图呈现了每张卡计算和通信等项的耗时,各项定义如下: - - | 字段 | 含义 | - |------|------| - | Computation | 计算时间:在NPU上的计算时间减去和通信重叠的时间。| - | Communication | 通信时间:总通讯时间减去和计算重叠的时间。| - | Overlapp | 重叠时间:计算和通信重叠的时间。此项占比越大代表计算和通信的并行性越好,理想情况下计算和通信完全重叠。| - | Other | 除去计算和通信的其他部分耗时,包括初始化、数据加载等。| - - * 右侧柱状图将通信时间分为数据传输时间和同步时间进行统计,定义如下: - - | 字段 | 含义 | - |------|------| - | Data Transfer Time | 通信时间中实际的数据传输时间。 | - | Synchronizing Time | 通信时间中等待以及和其他卡同步的时间。 | - - * 界面下方为通信算子信息表,统计了各张卡的通信算子详情。 - - | 字段 | 含义 | - |------|------| - | Name | 通信算子名称 | - | Calls | 调用次数。 | - | Total Transit Size(bytes) | 传输的总数据大小。 | - | Avg Transit Size(bytes) | 平均每次传输的数据大小。 | - | Elapse Time(us) | 此类算子总耗时。 | - | Avg Elapse Time(us) | 单个算子平均耗时。 | - | Transit Time(us) | 此类算子传输总耗时。 | - | Avg Transit Time(us) | 单个算子平均传输耗时。 | - -### Loss Comparison -#### 工具介绍 - - Loss Comparison是集成在该插件上的精度比对工具,提供了对loss曲线的可视化,loss数据匹配导出csv,以及两份数据比对等功能。 - -#### 页面展示说明 - 切换顶部页签栏至ACCURACY页签,即可进入精度比对工具页面。 - -##### 文件配置 -###### 文件导入 - 界面分为左侧边栏和右侧展示界面。点击左侧的Import Files或在左侧未勾选文件时点击右侧界面中心的Import Files字体,将会弹出系统文件资源管理窗,可以上传需要比对的模型网络训练日志文件。 - - **注:当前最多支持上传6个文件,单个文件大小不能超过50MB。** - ![Alt text](./docs/images/accuracy.PNG) - -###### 已上传文件操作 - 文件上传后,在左侧侧边栏出现文件列表。每个文件栏内都有配置数据匹配条件、导出CSV以及删除三种操作图标。 - - ![Alt text](./docs/images/accuracy_file_operator.PNG) - - * 点击配置数据匹配条件图标后,出现匹配条件配置弹框,需要设置Loss Tag和Iteration Tag两个配置项,弹框内每个Tag都包含一个输入框。 - ![Alt text](./docs/images/accuracy_config_modal.PNG) - 根据2个Tag的取值有如下3点匹配规则: - 1. 匹配数据时将逐行读取文件,查找是否存在输入框内设定的文本,若找到该文本,若为Loss Tag则查找其后是否存在数字或以科学计数法表示的数字(忽略两者中间空格),若为Iteration Tag则查找其后是否存在整数(忽略两者中间空格)。 - 2. 若存在多个匹配项,将第一项作为匹配值。 - 3. 只有当Loss Tag和Iteration都存在匹配值时,该行的Iteration和Loss才会为有效数据。 - - E.g. - - ![Alt text](./docs/images/accuracy_file.PNG) - - 对于以上这份txt文件,当设定Loss Tag为`loss:`以及Iteration Tag为`iteration`时: - * 根据上方第1点规则,Iteration Tag可匹配图中区域1内的整数,但无法匹配区域3内的整数,因为`iteration`和整数中间多了非数字字符`:`。 - * Loss Tag可匹配图中区域2和4内的数字,但区域2内为第一项匹配值,根据上方第2点规则,因此只取区域2内数字。 - * Loss Tag在图中区域5内有匹配数据,Iteration Tag在图中区域6内有匹配数据,但由于Iteration Tag在区域5内没有匹配数据,Loss Tag在图中区域6内没有匹配数据,根据上方第3点规则,区域5和区域6内不存在有效数据。 - - 因此上方这张图中最终提取出的有效数据为区域1和区域2内的同一行数字的集合。 - - * 点击导出CSV图标后,将导出找到的Iteration和Loss数据为csv文件。 \ - ![Alt text](./docs/images/accuracy_csv.PNG) - - * 点击删除图标后,界面弹出确认删除框,确认后可移除该文件。 - ![Alt text](./docs/images/accuracy_delete.PNG) - -##### Loss数据看板 - 已上传文件后,可在左侧侧边栏勾选该文件,右侧则会展示该文件的Loss数据看板,包含loss折线图和明细表格。 - - * 勾选单个文件时,Loss数据看板将会占满整个右侧展示界面。 - ![Alt text](./docs/images/accuracy_single_file.PNG) - - * 勾选两个以上文件时,右侧将会展示Loss数据看板和Loss比对看板。 - ![Alt text](./docs/images/accuracy_multiple_files.PNG) - - * Loss数据看板为全量展示,折线图内展示的是所有勾选文件的所有数据,表格内展示的同样为勾选文件的全量数据,若表格内iteration为某些文件独有,则其他文件该行显示为`NA`。 - -##### Loss比对看板 - 当勾选文件为2个以上时,将展示Loss比对看板,Loss比对看板基于iteration取两份比对数据的交集进行展示。 - - * 在Comparison objects中选择两个文件,则展示该两个文件的比对信息。 - ![Alt text](./docs/images/accuracy_loss_comparison.png) - - * 比对方式有三种,通过Comparison Setting进行设定。 - * Comparison Normal:相同iteration,后选择文件的loss值减去先选择文件的loss值。 - * Comparison Absolute:相同iteration,两个文件的loss的差值的绝对值。 - * Comparison Relative:相同iteration,两个文件的loss的差值的绝对值 / 先选择文件的loss值。 - -### 公网URL说明 - -[公网URL说明](./docs/公网URL说明.xlsx) \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/ci_scripts/install_env.sh b/plugins/tensorboard-plugins/tb_plugin/ci_scripts/install_env.sh deleted file mode 100644 index 04406ae6e495fbe715c3d8cce54f7a16ee6c3992..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/ci_scripts/install_env.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -set -ex - -# install pytorch -pip install numpy tensorboard typing-extensions pillow pytest -if [ "$PYTORCH_VERSION" = "nightly" ]; then - pip install --pre torch -f "https://download.pytorch.org/whl/nightly/$CUDA_VERSION/torch_nightly.html" - pip install --pre torchvision --no-deps -f "https://download.pytorch.org/whl/nightly/$CUDA_VERSION/torch_nightly.html" -elif [ "$PYTORCH_VERSION" = "1.11rc" ]; then - pip install --pre torch -f "https://download.pytorch.org/whl/test/$CUDA_VERSION/torch_test.html" - pip install --pre torchvision --no-deps -f "https://download.pytorch.org/whl/test/$CUDA_VERSION/torch_test.html" -elif [ "$PYTORCH_VERSION" = "stable" ]; then - pip install torch torchvision -fi - -python -c "import torch; print(torch.__version__, torch.version.git_version); from torch.autograd import kineto_available; print(kineto_available())" diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/gpu_utilization.md b/plugins/tensorboard-plugins/tb_plugin/docs/gpu_utilization.md deleted file mode 100644 index c4f45b880c71fd25f3d2d727408390e4751b72da..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/docs/gpu_utilization.md +++ /dev/null @@ -1,22 +0,0 @@ -* GPU Utilization: GPU busy time / all steps time. The higher, the better. All steps time is the total time of all profiler steps(or called as iterations). - GPU busy time is the time during “all steps time” when is at least one GPU kernel running on this GPU. - However, this high-level utilization metric is coarse. It can’t tell how many SMs(Stream Multiprocessors) are in use. - For example, a kernel with a single thread running continuously will get 100% GPU utilization. - -* Est. SM Efficiency: Estimated Stream Multiprocessor Efficiency. The higher, the better. This metric of a kernel, SM_Eff_K = min(blocks of this kernel / SM number of this GPU, 100%). - This overall number is the sum of all kernels' SM_Eff_K weighted by kernel's execution duration, divided by “all steps time”. - It shows GPU Stream Multiprocessors’ utilization. - Although it is finer grained than above “GPU Utilization”, it still can’t tell the whole story. - For example, a kernel with only one thread per block can’t fully utilize each SM. - -* Est. Achieved Occupancy: For most cases such as memory bandwidth bound kernels, a higher value often translates to better performance, especially when the initial value is very low. [Reference](http://developer.download.nvidia.com/GTC/PDF/GTC2012/PresentationPDF/S0514-GTC2012-GPU-Performance-Analysis.pdf). The definition of occupancy is [here](https://docs.nvidia.com/gameworks/content/developertools/desktop/analysis/report/cudaexperiments/kernellevel/achievedoccupancy.htm). - Occupancy is the ratio of active warps on an SM to the maximum number of - active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple - factors such as kernel shape, kernel used resource, and the GPU compute capability. - Est. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). - This overall number is the weighted sum of all kernels OCC_K using kernel's execution duration as weight. It shows fine-grained low-level GPU utilization. - - * Kernel Time using Tensor Cores: Total GPU Time for Tensor Core kernels / Total GPU Time for all kernels. Higher is better. - Tensor Cores are mixed precision floating point operations available for Volta GPUs (Titan V) and beyond. - The cuDNN and cuBLAS libraries contain several Tensor Cores enabled GPU kernels for most Convolution and GEMM operations. - This number shows Tensor Cores usage time ratio among all kernels on a GPU. diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy.PNG deleted file mode 100644 index a2f5fb486beea241d39c016bdf59f98e9475560b..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_config_modal.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_config_modal.PNG deleted file mode 100644 index 5a4006ad33cb66d8892c2e3e3c183eb1da20eeab..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_config_modal.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_csv.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_csv.PNG deleted file mode 100644 index 8581c592d5fa644d672f4ee4728f1be412f0ab6a..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_csv.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_delete.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_delete.PNG deleted file mode 100644 index f8720e7a687980535c085317476653d9b5589ead..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_delete.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file.PNG deleted file mode 100644 index 063f876d97e4e4d8c288e7cccc310cc8b6e21268..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file_operator.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file_operator.PNG deleted file mode 100644 index ff923095d40986c53976cc6d327969d504d303bb..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_file_operator.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_chart.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_chart.PNG deleted file mode 100644 index f98b8e48444ba74b0a80ef1793a554c6cd229e55..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_chart.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_comparison.png b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_comparison.png deleted file mode 100644 index a7d6b86548ba7e1ed8401299125d7fefcf0ebbe1..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_loss_comparison.png and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_multiple_files.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_multiple_files.PNG deleted file mode 100644 index 3df173a73c5bc92fa49ef5d86b70629ee8384cca..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_multiple_files.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_single_file.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_single_file.PNG deleted file mode 100644 index 6f2ab6c7b8cb4ff8fb5689a980f63474390cd8cb..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/accuracy_single_file.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/control_panel.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/control_panel.PNG deleted file mode 100644 index bf101db7088d52a209fea2424d7da6cfe305aeab..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/control_panel.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_detail.png b/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_detail.png deleted file mode 100644 index 60ca175c121261bc28d8a9843c3fd7202c0d4734..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_detail.png and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_view.png b/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_view.png deleted file mode 100644 index bceb77afb2aad1c3757bba25d90c2c7302f43b40..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/diff_view.png and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/distributed_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/distributed_view.PNG deleted file mode 100644 index bdc60d2d61de8d84c8ead341171b195630eda412..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/distributed_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view.PNG deleted file mode 100644 index 8ba95c165d667158eeec109a757b492b8541671e..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_properties_and_op.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_properties_and_op.PNG deleted file mode 100644 index 5001f28c75a1257689758a15553c9594cd86edd7..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_properties_and_op.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_statistic.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_statistic.PNG deleted file mode 100644 index 04e269b17ca0b442c56e9d84c96203293ccfb766..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/kernel_view_group_by_statistic.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/lightning_view.png b/plugins/tensorboard-plugins/tb_plugin/docs/images/lightning_view.png deleted file mode 100644 index 03a5004f16bf188755640b9de2fce5545af7940f..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/lightning_view.png and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view.PNG deleted file mode 100644 index b89d59e7e7b6a3a60d8d7a30b22babde416c8461..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view_component.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view_component.PNG deleted file mode 100644 index e4c9e27ad8969f330a3dfeb2b73809dc8989181b..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/memory_view_component.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/module_view.png b/plugins/tensorboard-plugins/tb_plugin/docs/images/module_view.png deleted file mode 100644 index 610cf202a3bc6154c79085fe9f6ab12c79c2151a..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/module_view.png and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view.PNG deleted file mode 100644 index 5dbc7d26ab1d69c0d0260ec7a72c9841bca2a229..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view_group_by_inputshape.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view_group_by_inputshape.PNG deleted file mode 100644 index 68b4784f1a54497219c32bcdaa0128adde5f5163..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/operator_view_group_by_inputshape.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/overall_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/overall_view.PNG deleted file mode 100644 index 916be90c0674f8fba5ac36d0c6e07cfd258f49e5..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/overall_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/time_breakdown_priority.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/time_breakdown_priority.PNG deleted file mode 100644 index c8574772786b2e38f52793c51e96fee8e9299aa7..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/time_breakdown_priority.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view.PNG deleted file mode 100644 index 975cec55df98669dd573629ee680e9f1645dbdde..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_fwd_bwd_correlation.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_fwd_bwd_correlation.PNG deleted file mode 100644 index 57fa003cae6f5759dd42c871563ef53c45f73988..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_fwd_bwd_correlation.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_launch.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_launch.PNG deleted file mode 100644 index a381dd17b63b059dddb4db8138fc997d4032fa87..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_launch.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_npu_utilization.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_npu_utilization.PNG deleted file mode 100644 index 4a542138f84ce106425c1cfa7afeabd9e810bf4f..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_npu_utilization.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_one_step.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_one_step.PNG deleted file mode 100644 index 687af618899884c5f2f666fa67e66dcabd67e25f..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/trace_view_one_step.PNG and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/docs/images/vscode_stack.PNG b/plugins/tensorboard-plugins/tb_plugin/docs/images/vscode_stack.PNG deleted file mode 100644 index a109c97a1aaf0011c817e07c0ecb3224fae986f6..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/docs/images/vscode_stack.PNG and /dev/null differ diff --git "a/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" "b/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" deleted file mode 100644 index de0bb25fe155aa188e5670a377311e96168586e8..0000000000000000000000000000000000000000 Binary files "a/plugins/tensorboard-plugins/tb_plugin/docs/\345\205\254\347\275\221URL\350\257\264\346\230\216.xlsx" and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/examples/datapipe_example.py b/plugins/tensorboard-plugins/tb_plugin/examples/datapipe_example.py deleted file mode 100644 index a6eac79b9d993decb3ecea7aa90b1b1fecf8d228..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/examples/datapipe_example.py +++ /dev/null @@ -1,50 +0,0 @@ -import torch -import torch.nn as nn -import torch.optim -from torch.utils.data.dataloader_experimental import DataLoader2 - -from torchvision import transforms as T -import torchvision.prototype.datasets as pdatasets -import torchvision.prototype.models as models -from torchvision.prototype.datasets._builtin import Cifar10 - - -if __name__ == "__main__": - model = models.resnet50(models.ResNet50_Weights.ImageNet1K_V1) - trainset = Cifar10().to_datapipe(root='./data', decoder=pdatasets.decoder.raw) - transform = T.Compose([T.Resize(256), T.CenterCrop(224)]) - trainset = trainset.map(transform, input_col="image") - trainset = trainset.map(fn=T.functional.convert_image_dtype, input_col="image") - dl = DataLoader2(trainset, batch_size=64) - criterion = nn.CrossEntropyLoss().cuda(0) - optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) - device = torch.device("cuda:0") - model.to(device=device).train() - - with torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - schedule=torch.profiler.schedule( - wait=1, - warmup=1, - active=2), - on_trace_ready=torch.profiler.tensorboard_trace_handler('./result', worker_name='datapipe0'), - record_shapes=True, - profile_memory=True, # This will take 1 to 2 minutes. Setting it to False could greatly speedup. - with_stack=True - ) as p: - for step, data in enumerate(dl, 0): - print("step:{}".format(step)) - input_tensors = data['image'] - label_tensors = data['label'] - inputs, labels = input_tensors.to(device=device), label_tensors.to(device=device) - outputs = model(inputs) - loss = criterion(outputs, labels) - optimizer.zero_grad() - loss.backward() - optimizer.step() - if step + 1 >= 4: - break - p.step() - print("done") diff --git a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_autograd_api.py b/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_autograd_api.py deleted file mode 100644 index 9ff5d89aaaab1dc2802c9be16e7f75a37bc2f803..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_autograd_api.py +++ /dev/null @@ -1,46 +0,0 @@ -import os -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -import torch.optim -import torch.utils.data -import torchvision -import torchvision.transforms as T -import torchvision.models as models - -from torch.autograd.profiler import profile - -model = models.resnet50(pretrained=True) -model.cuda() -cudnn.benchmark = True - -transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) -trainset = torchvision.datasets.CIFAR10(root='./data', train=True, - download=True, transform=transform) -trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, - shuffle=True, num_workers=0) - -criterion = nn.CrossEntropyLoss().cuda() -optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) -device = torch.device("cuda:0") -model.train() - -with profile(use_cuda=True, use_kineto=True, record_shapes=True) as p: - for step, data in enumerate(trainloader, 0): - print("step:{}".format(step)) - inputs, labels = data[0].to(device=device), data[1].to(device=device) - - outputs = model(inputs) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - if step >= 5: - break - -try: - os.mkdir("result") -except Exception: - pass -p.export_chrome_trace("./result/worker0.pt.trace.json") diff --git a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_ddp_profiler.py b/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_ddp_profiler.py deleted file mode 100644 index 0ebcfe989e8261026da74cf0d83189b715182fba..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_ddp_profiler.py +++ /dev/null @@ -1,95 +0,0 @@ -import os - -import torch -import torch.backends.cudnn as cudnn -import torch.distributed as dist -import torch.multiprocessing as mp -import torch.nn as nn -import torch.optim -import torch.profiler -import torch.utils.data -import torchvision -import torchvision.models as models -import torchvision.transforms as T -from torch.nn.parallel import DistributedDataParallel as DDP - - -def example(rank, use_gpu=True): - if use_gpu: - torch.cuda.set_device(rank) - model = models.resnet50(pretrained=True).to(rank) - model.cuda() - cudnn.benchmark = True - model = DDP(model, device_ids=[rank]) - else: - model = models.resnet50(pretrained=True) - model = DDP(model) - - # Use gradient compression to reduce communication - # model.register_comm_hook(None, default.fp16_compress_hook) - # or - # state = powerSGD_hook.PowerSGDState(process_group=None,matrix_approximation_rank=1,start_powerSGD_iter=2) - # model.register_comm_hook(state, powerSGD_hook.powerSGD_hook) - - transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) - trainset = torchvision.datasets.CIFAR10(root='./data', train=True, - download=True, transform=transform) - train_sampler = torch.utils.data.distributed.DistributedSampler(trainset) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, sampler=train_sampler, - shuffle=False, num_workers=4) - - if use_gpu: - criterion = nn.CrossEntropyLoss().to(rank) - else: - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) - model.train() - - with torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - schedule=torch.profiler.schedule( - wait=2, - warmup=2, - active=5), - with_stack=False, - on_trace_ready=torch.profiler.tensorboard_trace_handler('./result'), - record_shapes=True - ) as p: - for step, data in enumerate(trainloader, 0): - print("step:{}".format(step)) - if use_gpu: - inputs, labels = data[0].to(rank), data[1].to(rank) - else: - inputs, labels = data[0], data[1] - outputs = model(inputs) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - p.step() - if step + 1 >= 10: - break - - -def init_process(rank, size, fn, backend='nccl'): - """ Initialize the distributed environment. """ - os.environ['MASTER_ADDR'] = '127.0.0.1' - os.environ['MASTER_PORT'] = '29500' - dist.init_process_group(backend, rank=rank, world_size=size) - fn(rank, size) - - -if __name__ == "__main__": - size = 4 - processes = [] - mp.set_start_method("spawn") - for rank in range(size): - p = mp.Process(target=init_process, args=(rank, size, example)) - p.start() - processes.append(p) - - for p in processes: - p.join() diff --git a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_profiler_api.py b/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_profiler_api.py deleted file mode 100644 index cdfa14aa77e1b82101a2083acff86c9e8de2890d..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/examples/resnet50_profiler_api.py +++ /dev/null @@ -1,52 +0,0 @@ -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -import torch.optim -import torch.utils.data -import torchvision -import torchvision.transforms as T -import torchvision.models as models - -import torch.profiler - -model = models.resnet50(pretrained=True) -model.cuda() -cudnn.benchmark = True - -transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) -trainset = torchvision.datasets.CIFAR10(root='./data', train=True, - download=True, transform=transform) -trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, - shuffle=True, num_workers=4) - -criterion = nn.CrossEntropyLoss().cuda() -optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) -device = torch.device("cuda:0") -model.train() - -with torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - schedule=torch.profiler.schedule( - wait=1, - warmup=1, - active=2), - on_trace_ready=torch.profiler.tensorboard_trace_handler('./result', worker_name='worker0'), - record_shapes=True, - profile_memory=True, # This will take 1 to 2 minutes. Setting it to False could greatly speedup. - with_stack=True -) as p: - for step, data in enumerate(trainloader, 0): - print("step:{}".format(step)) - inputs, labels = data[0].to(device=device), data[1].to(device=device) - - outputs = model(inputs) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - if step + 1 >= 4: - break - p.step() diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/.gitignore b/plugins/tensorboard-plugins/tb_plugin/fe/.gitignore deleted file mode 100644 index e6a92696825082a8ff08815b553822cf7a4c4c8e..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -/dist -/node_modules -*.log diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/README.md b/plugins/tensorboard-plugins/tb_plugin/fe/README.md deleted file mode 100644 index aa9cf2587780da8cd911a88c10f763516fc8867f..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Pytorch Profiler - -### Install & Build - -1. install [Node.js](https://nodejs.org/) - * ```bash - curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - - sudo apt-get install -y nodejs``` -2. install [Yarn](https://yarnpkg.com/) - * ```bash - curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add - - echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list - sudo apt update && sudo apt install yarn - ``` -3. shell `yarn` to prepare JS dependency -4. shell `yarn build:copy` -5. Go to `tb_plugin` folder and install the package using `python setup.py develop` -6. Launch tensorboard diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/index.html b/plugins/tensorboard-plugins/tb_plugin/fe/index.html deleted file mode 100644 index a58ddc088336085b78597616844ebb131f49ad51..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/index.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - -
- - diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/package.json b/plugins/tensorboard-plugins/tb_plugin/fe/package.json deleted file mode 100644 index 5edca28fa780b15afbb9c8e286ba4176a79d83f4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/package.json +++ /dev/null @@ -1,47 +0,0 @@ -{ - "name": "fe", - "private": "true", - "version": "1.0.0", - "main": "index.js", - "scripts": { - "build": "cross-env NODE_ENV=production webpack", - "build:copy": "yarn build && node ./update-static.js", - "build:dev": "webpack", - "dev": "webpack serve", - "prettier": "prettier --config ./prettier.json --write ./src/**/*.{ts,tsx} ./*.js" - }, - "dependencies": { - "@ant-design/icons": "^5.2.6", - "@babel/runtime": "^7.13.10", - "@material-ui/core": "^4.11.3", - "@material-ui/icons": "^4.11.2", - "antd": "^4.23.0", - "clsx": "^1.1.1", - "echarts": "^5.4.2", - "portable-fetch": "^3.0.0", - "react": "^16.13.1", - "react-dom": "^16.13.1", - "react-flame-graph": "^1.4.0", - "url": "^0.11.1" - }, - "devDependencies": { - "@types/react": "^17.0.69", - "@types/react-dom": "^18.2.14", - "cross-env": "^7.0.2", - "css-loader": "^5.2.4", - "html-webpack-plugin": "^5.3.1", - "inline-chunk-html-plugin": "^1.1.1", - "prettier": "^2.1.2", - "style-loader": "^2.0.0", - "ts-loader": "^8.0.18", - "typescript": "^4.0.3", - "webpack": "^5.28.0", - "webpack-cli": "^4.5.0", - "webpack-dev-server": "^4.7.4" - }, - "resolutions": { - "portable-fetch/**/node-fetch": "^2.6.1", - "webpack/**/browserslist": "^4.16.5", - "postcss/**/nanoid": "^3.1.31" - } -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/prettier.json b/plugins/tensorboard-plugins/tb_plugin/fe/prettier.json deleted file mode 100644 index ef5789da9458a66e7dacc1dfdeeb764642331734..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/prettier.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "parser": "typescript", - "semi": true, - "singleQuote": true, - "jsxSingleQuote": false, - "bracketSpacing": true, - "tabWidth": 2, - "useTabs": false, - "trailingComma": "all", - "proseWrap": "always", - "endOfLine": "lf" -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py b/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py deleted file mode 100644 index 69bc6c05541cbaff0fc88eb7456f501fb5bd4f71..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/add_header.py +++ /dev/null @@ -1,51 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -# !/usr/bin/env python -import glob -import os -import sys - -HEADER = '''/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -''' - - -def add_header(file): - with open(file, 'r') as f: - contents = f.readlines() - - # do nothing if there is already header - if contents and contents[0].startswith('/*-'): - return - - with open(file, 'w') as out: - out.write(HEADER) - out.writelines(contents) - - -if __name__ == '__main__': - directory = sys.argv[1] - if not os.path.isdir(directory): - raise ValueError('{} is not a directory'.format(directory)) - - for ts_file in glob.glob(directory + '/*.ts'): - add_header(ts_file) diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/build.sh b/plugins/tensorboard-plugins/tb_plugin/fe/scripts/build.sh deleted file mode 100644 index 014a26e26c3b58421b878c886fd1899cae2758b3..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/build.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -set -e - -current_dir="$( cd "$( dirname "$0" )" && pwd )" -FE_ROOT="$(dirname "$current_dir")" -cd $FE_ROOT/ - -java -jar $FE_ROOT/swagger-codegen-cli.jar generate -i $FE_ROOT/src/api/openapi.yaml -l typescript-fetch -o $FE_ROOT/src/api/generated/ --additional-properties modelPropertyNaming=original -rm $FE_ROOT/src/api/generated/api_test.spec.ts -yarn prettier --end-of-line lf -python $FE_ROOT/scripts/add_header.py $FE_ROOT/src/api/generated/ - -yarn build:copy diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/setup.sh b/plugins/tensorboard-plugins/tb_plugin/fe/scripts/setup.sh deleted file mode 100644 index fb2680d9816139854c58b87c6293a8c11ce685f4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/scripts/setup.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e - -current_dir="$( cd "$( dirname "$0" )" && pwd )" -FE_ROOT="$(dirname "$current_dir")" - -# # install nodejs -if ! command -v node &> /dev/null -then - curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash - - sudo apt-get install -y nodejs -fi - -# install yarn -if ! command -v yarn &> /dev/null -then - curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add - - echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list - sudo apt update && sudo apt install yarn -fi - -# download swagger-codegen-cli -if [[ ! -f "$FE_ROOT/swagger-codegen-cli.jar" ]]; then - wget https://repo1.maven.org/maven2/io/swagger/codegen/v3/swagger-codegen-cli/3.0.25/swagger-codegen-cli-3.0.25.jar -O swagger-codegen-cli.jar -fi diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/README.md b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/README.md deleted file mode 100644 index 06208c419e1c72c4d49e3dc06f8304d4198b27c2..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# How to generate the api.ts - -## Prerequisites -1. install java -2. run command -```bash - cd fe - wget https://repo1.maven.org/maven2/io/swagger/codegen/v3/swagger-codegen-cli/3.0.25/swagger-codegen-cli-3.0.25.jar -O swagger-codegen-cli.jar - java -jar swagger-codegen-cli.jar generate -i ./src/api/openapi.yaml -l typescript-fetch -o ./src/api/generated/ --additional-properties modelPropertyNaming=original - rm ./src/api/generated/api_test.spec.ts - yarn prettier --end-of-line lf - python ./scripts/add_header.py ./src/api/generated/ -``` diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/api.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/api.ts deleted file mode 100644 index 29cde96ebbde928cde967b3b1b365d12e74ee734..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/api.ts +++ /dev/null @@ -1,4724 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -/// -// tslint:disable -/** - * Pytorch profile API - * No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) - * - * OpenAPI spec version: 1.0.0 - * - * - * NOTE: This file is auto generated by the swagger code generator program. - * https://github.com/swagger-api/swagger-codegen.git - * Do not edit the file manually. - */ - -import * as url from 'url'; -import * as portableFetch from 'portable-fetch'; -import { Configuration } from './configuration'; - -const BASE_PATH = '.'.replace(/\/+$/, ''); - -/** - * - * @export - */ -export const COLLECTION_FORMATS = { - csv: ',', - ssv: ' ', - tsv: '\t', - pipes: '|', -}; - -/** - * - * @export - * @interface FetchAPI - */ -export interface FetchAPI { - (url: string, init?: any): Promise; -} - -/** - * - * @export - * @interface FetchArgs - */ -export interface FetchArgs { - url: string; - options: any; -} - -/** - * - * @export - * @class BaseAPI - */ -export class BaseAPI { - protected configuration: Configuration; - - constructor( - configuration?: Configuration, - protected basePath: string = BASE_PATH, - protected fetch: FetchAPI = portableFetch - ) { - if (configuration) { - this.configuration = configuration; - this.basePath = configuration.basePath || this.basePath; - } - } -} - -/** - * - * @export - * @class RequiredError - * @extends {Error} - */ -export class RequiredError extends Error { - name: 'RequiredError'; - constructor(public field: string, msg?: string) { - super(msg); - } -} - -/** - * - * @export - * @interface CallStackTableData - */ -export interface CallStackTableData extends Array {} -/** - * - * @export - * @interface CallStackTableDataInner - */ -export interface CallStackTableDataInner { - /** - * - * @type {string} - * @memberof CallStackTableDataInner - */ - name: string; - /** - * - * @type {string} - * @memberof CallStackTableDataInner - */ - input_shape?: string; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - calls: number; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - device_self_duration?: number; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - device_total_duration?: number; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - host_self_duration: number; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - host_total_duration: number; - /** - * - * @type {string} - * @memberof CallStackTableDataInner - */ - call_stack?: string; - /** - * - * @type {string} - * @memberof CallStackTableDataInner - */ - tc_eligible?: string; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - tc_self_ratio?: number; - /** - * - * @type {number} - * @memberof CallStackTableDataInner - */ - tc_total_ratio?: number; -} -/** - * - * @export - * @interface DiffNode - */ -export interface DiffNode { - /** - * - * @type {OpStats} - * @memberof DiffNode - */ - left: OpStats; - /** - * - * @type {OpStats} - * @memberof DiffNode - */ - right: OpStats; - /** - * - * @type {string} - * @memberof DiffNode - */ - path: string; - /** - * - * @type {Array} - * @memberof DiffNode - */ - children: Array; -} -/** - * - * @export - * @interface DistributedGraph - */ -export interface DistributedGraph { - /** - * - * @type {DistributedGraphMetadata} - * @memberof DistributedGraph - */ - metadata: DistributedGraphMetadata; - /** - * - * @type {any} - * @memberof DistributedGraph - */ - data: any; -} -/** - * - * @export - * @interface DistributedGraphMetadata - */ -export interface DistributedGraphMetadata { - /** - * - * @type {string} - * @memberof DistributedGraphMetadata - */ - title: string; - /** - * - * @type {Array} - * @memberof DistributedGraphMetadata - */ - legends: Array; - /** - * - * @type {string} - * @memberof DistributedGraphMetadata - */ - units: string; -} -/** - * - * @export - * @interface Environment - */ -export interface Environment { - /** - * - * @type {string} - * @memberof Environment - */ - title: string; - /** - * - * @type {string} - * @memberof Environment - */ - value: string; -} -/** - * - * @export - * @interface GpuInfo - */ -export interface GpuInfo { - /** - * - * @type {GpuInfoMetadata} - * @memberof GpuInfo - */ - metadata: GpuInfoMetadata; - /** - * - * @type {any} - * @memberof GpuInfo - */ - data: any; -} -/** - * - * @export - * @interface GpuInfoMetadata - */ -export interface GpuInfoMetadata { - /** - * - * @type {string} - * @memberof GpuInfoMetadata - */ - title: string; -} -/** - * - * @export - * @interface GpuMetric - */ -export interface GpuMetric { - /** - * - * @type {string} - * @memberof GpuMetric - */ - title: string; - /** - * - * @type {string} - * @memberof GpuMetric - */ - value: string; -} -/** - * - * @export - * @interface GpuMetrics - */ -export interface GpuMetrics { - /** - * - * @type {Array} - * @memberof GpuMetrics - */ - data: Array; - /** - * - * @type {string} - * @memberof GpuMetrics - */ - tooltip: string; -} -/** - * - * @export - * @interface Graph - */ -export interface Graph { - /** - * - * @type {string} - * @memberof Graph - */ - title?: string; - /** - * - * @type {Array} - * @memberof Graph - */ - columns: Array; - /** - * - * @type {Array>} - * @memberof Graph - */ - rows: Array>; -} -/** - * - * @export - * @interface ValueAndTooltip - */ -export interface ValueAndTooltip { - /** - * - * @type {string | number} - * @memberof ValueAndTooltip - */ - value: string | number; - /** - * - * @type {string} - * @memberof ValueAndTooltip - */ - tooltip?: string; -} -/** - * - * @export - * @interface StepedGraph - */ -export interface StepedGraph { - /** - * - * @type {string} - * @memberof StepedGraph - */ - title?: string; - /** - * - * @type {Array} - * @memberof StepedGraph - */ - columns: Array; - /** - * - * @type {Array>} - * @memberof StepedGraph - */ - rows: Array>; -} -/** - * - * @export - * @interface GraphAscend - */ -export interface GraphAscend { - /** - * - * @type {string} - * @memberof GraphAscend - */ - title?: string; - /** - * - * @type {Array} - * @memberof GraphAscend - */ - columns: Array; - /** - * - * @type {any} - * @memberof GraphAscend - */ - rows: any; -} -/** - * - * @export - * @interface GraphColumn - */ -export interface GraphColumn { - /** - * - * @type {string} - * @memberof GraphColumn - */ - type: string; - /** - * - * @type {string} - * @memberof GraphColumn - */ - name: string; - /** - * - * @type {string} - * @memberof GraphColumn - */ - role?: string; - /** - * - * @type {GraphColumnP} - * @memberof GraphColumn - */ - p?: GraphColumnP; -} -/** - * - * @export - * @interface GraphColumnP - */ -export interface GraphColumnP { - /** - * - * @type {boolean} - * @memberof GraphColumnP - */ - html?: boolean; -} -/** - * - * @export - * @interface InlineResponse200 - */ -export interface InlineResponse200 { - /** - * - * @type {TableMetadata} - * @memberof InlineResponse200 - */ - metadata: TableMetadata; - /** - * - * @type {OperationTableData} - * @memberof InlineResponse200 - */ - data: OperationTableData; -} -/** - * - * @export - * @interface InlineResponse2001 - */ -export interface InlineResponse2001 { - /** - * - * @type {TableMetadata} - * @memberof InlineResponse2001 - */ - metadata: TableMetadata; - /** - * - * @type {CallStackTableData} - * @memberof InlineResponse2001 - */ - data: CallStackTableData; -} -/** - * - * @export - * @interface InlineResponse2002 - */ -export interface InlineResponse2002 { - /** - * - * @type {GpuInfoMetadata} - * @memberof InlineResponse2002 - */ - metadata: GpuInfoMetadata; - /** - * - * @type {any} - * @memberof InlineResponse2002 - */ - data: any; -} -/** - * - * @export - * @interface KernelGraph - */ -export interface KernelGraph { - /** - * - * @type {Graph} - * @memberof KernelGraph - */ - total: Graph; - device_target: string; -} -/** - * - * @export - * @interface KeyedColumn - */ -export interface KeyedColumn { - /** - * - * @type {string} - * @memberof KeyedColumn - */ - type: string; - /** - * - * @type {string} - * @memberof KeyedColumn - */ - name: string; - /** - * - * @type {string} - * @memberof KeyedColumn - */ - key: string; -} -/** - * - * @export - * @interface MemoryCurveDataAll - */ -export interface MemoryCurveDataAll { - /** - * - * @type {string} - * @memberof MemoryCurveDataAll - */ - default_device: string; - /** - * - * @type {Array} - * @memberof MemoryCurveDataAll - */ - devices: Array; - /** - * - * @type {MemoryCurveDataAscend} - * @memberof MemoryCurveDataAll - */ - total: MemoryCurveDataAscend; - /** - * - * @type {MemoryCurveDataAscend} - * @memberof MemoryCurveDataAll - */ - ptaGe: MemoryCurveDataAscend; -} -/** - * - * @export - * @interface MemoryCurveData - */ -export interface MemoryCurveData { - /** - * - * @type {MemoryCurveDataMetadata} - * @memberof MemoryCurveData - */ - metadata: MemoryCurveDataMetadata; - /** - * - * @type {Array} - * @memberof MemoryCurveData - */ - columns: Array; - /** - * - * @type {any} - * @memberof MemoryCurveData - */ - rows: any; -} -/** - * - * @export - * @interface MemoryCurveDataAscend - */ -export interface MemoryCurveDataAscend { - /** - * - * @type {MemoryCurveDataMetadata} - * @memberof MemoryCurveDataAscend - */ - metadata: MemoryCurveDataMetadata; - /** - * - * @type {any} - * @memberof MemoryCurveDataAscend - */ - columns: any; - /** - * - * @type {any} - * @memberof MemoryCurveDataAscend - */ - rows: any; -} -/** - * - * @export - * @interface MemoryCurveDataMetadata - */ -export interface MemoryCurveDataMetadata { - /** - * - * @type {string} - * @memberof MemoryCurveDataMetadata - */ - default_device: string; - /** - * - * @type {Array} - * @memberof MemoryCurveDataMetadata - */ - devices: Array; - /** - * - * @type {any} - * @memberof MemoryCurveDataMetadata - */ - peaks: any; - /** - * - * @type {any} - * @memberof MemoryCurveDataMetadata - */ - totals: any; - /** - * - * @type {number} - * @memberof MemoryCurveDataMetadata - */ - first_ts: number; - /** - * - * @type {string} - * @memberof MemoryCurveDataMetadata - */ - time_metric: string; - /** - * - * @type {string} - * @memberof MemoryCurveDataMetadata - */ - memory_metric: string; - /** - * - * @type {number} - * @memberof MemoryCurveDataMetadata - */ - time_factor: number; - /** - * - * @type {number} - * @memberof MemoryCurveDataMetadata - */ - memory_factor: number; -} -/** - * - * @export - * @interface MemoryEventsData - */ -export interface MemoryEventsData { - /** - * - * @type {MemoryEventsTableMetadata} - * @memberof MemoryEventsData - */ - metadata: MemoryEventsTableMetadata; - /** - * - * @type {Array} - * @memberof MemoryEventsData - */ - columns: Array; - /** - * - * @type {any} - * @memberof MemoryEventsData - */ - rows: any; -} -/** - * - * @exports - * @interface MemoryEventsDataAll - */ -export interface MemoryEventsDataAll { - /** - * - * @type {MemoryEventsData} - * @memberof MemoryEventsDataAll - */ - operator: MemoryEventsData; - /** - * - * @type {MemoryEventsData} - * @memberof MemoryEventsDataAll - */ - component: MemoryEventsData; -} -/** - * - * @export - * @interface MemoryEventsTableMetadata - */ -export interface MemoryEventsTableMetadata { - /** - * - * @type {string} - * @memberof MemoryEventsTableMetadata - */ - title: string; - /** - * - * @type {string} - * @memberof MemoryEventsTableMetadata - */ - default_device: string; - /** - * - * @type {string} - * @memberof MemoryEventsTableMetadata - */ - search?: string; - /** - * - * @type {string} - * @memberof MemoryEventsTableMetadata - */ - sort?: string; -} -/** - * - * @export - * @interface MemoryStatsData - */ -export interface MemoryStatsData { - /** - * - * @type {MemoryStatsTableMetadata} - * @memberof MemoryStatsData - */ - metadata: MemoryStatsTableMetadata; - /** - * - * @type {Array} - * @memberof MemoryStatsData - */ - columns: Array; - /** - * - * @type {any} - * @memberof MemoryStatsData - */ - rows: any; -} -/** - * - * @export - * @interface MemoryStatsTableMetadata - */ -export interface MemoryStatsTableMetadata { - /** - * - * @type {string} - * @memberof MemoryStatsTableMetadata - */ - title: string; - /** - * - * @type {string} - * @memberof MemoryStatsTableMetadata - */ - default_device: string; - /** - * - * @type {string} - * @memberof MemoryStatsTableMetadata - */ - search: string; - /** - * - * @type {string} - * @memberof MemoryStatsTableMetadata - */ - sort: string; -} -/** - * - * @export - * @interface ModuleStats - */ -export interface ModuleStats { - /** - * - * @type {string} - * @memberof ModuleStats - */ - name: string; - /** - * - * @type {string} - * @memberof ModuleStats - */ - id: string; - /** - * - * @type {number} - * @memberof ModuleStats - */ - occurences: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - operators: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - host_duration: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - self_host_duration: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - device_duration: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - self_device_duration: number; - /** - * - * @type {number} - * @memberof ModuleStats - */ - avg_duration: number; - /** - * - * @type {Array} - * @memberof ModuleStats - */ - children: Array; -} -/** - * - * @export - * @interface ModuleViewData - */ -export interface ModuleViewData { - /** - * - * @type {Array} - * @memberof ModuleViewData - */ - columns: Array; - /** - * - * @type {Array} - * @memberof ModuleViewData - */ - data: Array; -} -/** - * - * @export - * @interface OpAgg - */ -export interface OpAgg { - /** - * - * @type {string} - * @memberof OpAgg - */ - name: string; - /** - * - * @type {number} - * @memberof OpAgg - */ - calls: number; - /** - * - * @type {number} - * @memberof OpAgg - */ - host_duration: number; - /** - * - * @type {number} - * @memberof OpAgg - */ - device_duration: number; - /** - * - * @type {number} - * @memberof OpAgg - */ - self_host_duration: number; - /** - * - * @type {number} - * @memberof OpAgg - */ - self_device_duration: number; -} -/** - * - * @export - * @interface OpStats - */ -export interface OpStats { - /** - * - * @type {string} - * @memberof OpStats - */ - name: string; - /** - * - * @type {number} - * @memberof OpStats - */ - duration: number; - /** - * - * @type {number} - * @memberof OpStats - */ - device_duration: number; - /** - * - * @type {number} - * @memberof OpStats - */ - total_duration: number; - /** - * - * @type {Array} - * @memberof OpStats - */ - aggs: Array; -} -/** - * - * @export - * @interface OperationTableData - */ -export interface OperationTableData extends Array {} -/** - * - * @export - * @interface OperationTableDataInner - */ -export interface OperationTableDataInner { - /** - * - * @type {string} - * @memberof OperationTableDataInner - */ - name: string; - /** - * - * @type {string} - * @memberof OperationTableDataInner - */ - input_shape?: string; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - calls: number; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - device_self_duration?: number; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - device_total_duration?: number; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - host_self_duration: number; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - host_total_duration: number; - /** - * - * @type {boolean} - * @memberof OperationTableDataInner - */ - has_call_stack: boolean; - /** - * - * @type {string} - * @memberof OperationTableDataInner - */ - tc_eligible?: string; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - tc_self_ratio?: number; - /** - * - * @type {number} - * @memberof OperationTableDataInner - */ - tc_total_ratio?: number; -} -/** - * - * @export - * @interface OperatorGraph - */ -export interface OperatorGraph { - /** - * - * @type {Graph} - * @memberof OperatorGraph - */ - device_total_time: Graph; - /** - * - * @type {Graph} - * @memberof OperatorGraph - */ - device_self_time: Graph; - /** - * - * @type {Graph} - * @memberof OperatorGraph - */ - host_total_time: Graph; - /** - * - * @type {Graph} - * @memberof OperatorGraph - */ - host_self_time: Graph; -} -/** - * - * @export - * @interface OperatorNode - */ -export interface OperatorNode { - /** - * - * @type {string} - * @memberof OperatorNode - */ - name: string; - /** - * - * @type {number} - * @memberof OperatorNode - */ - start_time: number; - /** - * - * @type {number} - * @memberof OperatorNode - */ - end_time: number; - /** - * - * @type {string} - * @memberof OperatorNode - */ - type: string; - /** - * - * @type {number} - * @memberof OperatorNode - */ - tid: number; - /** - * - * @type {Array} - * @memberof OperatorNode - */ - children: Array; -} -/** - * - * @export - * @interface Overview - */ -export interface Overview { - /** - * - * @type {Array} - * @memberof Overview - */ - performance: Array; - /** - * - * @type {Array} - * @memberof Overview - */ - environments: Array; - /** - * - * @type {StepedGraph} - * @memberof Overview - */ - steps: StepedGraph; - /** - * - * @type {string} - * @memberof Overview - */ - recommendations: string; - /** - * - * @type {GpuMetrics} - * @memberof Overview - */ - gpu_metrics?: GpuMetrics; -} -/** - * - * @export - * @interface Performance - */ -export interface Performance { - /** - * - * @type {string} - * @memberof Performance - */ - name: string; - /** - * - * @type {string} - * @memberof Performance - */ - description?: string; - /** - * - * @type {string} - * @memberof Performance - */ - value?: string; - /** - * - * @type {string} - * @memberof Performance - */ - extra?: string; - /** - * - * @type {Array} - * @memberof Performance - */ - children?: Array; -} -/** - * - * @export - * @interface Runs - */ -export interface Runs { - /** - * - * @type {Array} - * @memberof Runs - */ - runs: Array; - /** - * - * @type {boolean} - * @memberof Runs - */ - loading: boolean; -} -/** - * - * @export - * @interface TableData - */ -export interface TableData { - /** - * - * @type {Graph} - * @memberof TableData - */ - data: Graph; - /** - * - * @type {TableMetadata} - * @memberof TableData - */ - metadata: TableMetadata; -} -/** - * - * @export - * @interface TableMetadata - */ -export interface TableMetadata { - /** - * - * @type {string} - * @memberof TableMetadata - */ - sort: string; - /** - * - * @type {any} - * @memberof TableMetadata - */ - tooltips?: any; -} -/** - * - * @export - * @interface TensorCoresGraph - */ -export interface TensorCoresGraph { - /** - * - * @type {Graph} - * @memberof TensorCoresGraph - */ - total: Graph; -} -/** - * - * @export - * @interface ValueAndFormat - */ -export interface ValueAndFormat { - /** - * - * @type {string | number | boolean} - * @memberof ValueAndFormat - */ - v: string | number | boolean; - /** - * - * @type {string} - * @memberof ValueAndFormat - */ - f: string; -} -/** - * - * @exports - * @interface Views - */ -export interface Views { - /** - * - * @type {string} - * @memberof Views - */ - device_target: string; - /** - * - * @type {Array} - * @memberof Views - */ - views: Array; -} -/** - * DefaultApi - fetch parameter creator - * @export - */ -export const DefaultApiFetchParamCreator = function ( - configuration?: Configuration -) { - return { - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} exp_run - * @param {string} exp_worker - * @param {string} exp_span - * @param {string} [path] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - diffnodeGet( - run: string, - worker: string, - span: string, - exp_run: string, - exp_worker: string, - exp_span: string, - path?: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling diffnodeGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling diffnodeGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling diffnodeGet.' - ); - } - // verify required parameter 'exp_run' is not null or undefined - if (exp_run === null || exp_run === undefined) { - throw new RequiredError( - 'exp_run', - 'Required parameter exp_run was null or undefined when calling diffnodeGet.' - ); - } - // verify required parameter 'exp_worker' is not null or undefined - if (exp_worker === null || exp_worker === undefined) { - throw new RequiredError( - 'exp_worker', - 'Required parameter exp_worker was null or undefined when calling diffnodeGet.' - ); - } - // verify required parameter 'exp_span' is not null or undefined - if (exp_span === null || exp_span === undefined) { - throw new RequiredError( - 'exp_span', - 'Required parameter exp_span was null or undefined when calling diffnodeGet.' - ); - } - const localVarPath = `/diffnode`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (exp_run !== undefined) { - localVarQueryParameter.exp_run = exp_run; - } - - if (exp_worker !== undefined) { - localVarQueryParameter.exp_worker = exp_worker; - } - - if (exp_span !== undefined) { - localVarQueryParameter.exp_span = exp_span; - } - - if (path !== undefined) { - localVarQueryParameter.path = path; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedCommopsGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling distributedCommopsGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling distributedCommopsGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling distributedCommopsGet.' - ); - } - const localVarPath = `/distributed/commops`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedGpuinfoGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling distributedGpuinfoGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling distributedGpuinfoGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling distributedGpuinfoGet.' - ); - } - const localVarPath = `/distributed/gpuinfo`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedOverlapGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling distributedOverlapGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling distributedOverlapGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling distributedOverlapGet.' - ); - } - const localVarPath = `/distributed/overlap`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedWaittimeGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling distributedWaittimeGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling distributedWaittimeGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling distributedWaittimeGet.' - ); - } - const localVarPath = `/distributed/waittime`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelGet( - run: string, - worker: string, - span: string, - group_by: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling kernelGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling kernelGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling kernelGet.' - ); - } - // verify required parameter 'group_by' is not null or undefined - if (group_by === null || group_by === undefined) { - throw new RequiredError( - 'group_by', - 'Required parameter group_by was null or undefined when calling kernelGet.' - ); - } - const localVarPath = `/kernel`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (group_by !== undefined) { - localVarQueryParameter.group_by = group_by; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} [group_by] Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTableGet( - run: string, - worker: string, - span: string, - group_by?: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling kernelTableGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling kernelTableGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling kernelTableGet.' - ); - } - const localVarPath = `/kernel/table`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (group_by !== undefined) { - localVarQueryParameter.group_by = group_by; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTcPieGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling kernelTcPieGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling kernelTcPieGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling kernelTcPieGet.' - ); - } - const localVarPath = `/kernel/tc_pie`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryCurveGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling memoryCurveGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling memoryCurveGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling memoryCurveGet.' - ); - } - const localVarPath = `/memory_curve`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryEventsGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling memoryEventsGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling memoryEventsGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling memoryEventsGet.' - ); - } - const localVarPath = `/memory_events`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (start_ts !== undefined) { - localVarQueryParameter.start_ts = start_ts; - } - - if (end_ts !== undefined) { - localVarQueryParameter.end_ts = end_ts; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling memoryGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling memoryGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling memoryGet.' - ); - } - const localVarPath = `/memory`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (start_ts !== undefined) { - localVarQueryParameter.start_ts = start_ts; - } - - if (end_ts !== undefined) { - localVarQueryParameter.end_ts = end_ts; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - moduleGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling moduleGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling moduleGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling moduleGet.' - ); - } - const localVarPath = `/module`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationGet( - run: string, - worker: string, - span: string, - group_by: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling operationGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling operationGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling operationGet.' - ); - } - // verify required parameter 'group_by' is not null or undefined - if (group_by === null || group_by === undefined) { - throw new RequiredError( - 'group_by', - 'Required parameter group_by was null or undefined when calling operationGet.' - ); - } - const localVarPath = `/operation`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (group_by !== undefined) { - localVarQueryParameter.group_by = group_by; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {string} op_name - * @param {string} [input_shape] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationStackGet( - run: string, - worker: string, - span: string, - group_by: string, - op_name: string, - input_shape?: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling operationStackGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling operationStackGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling operationStackGet.' - ); - } - // verify required parameter 'group_by' is not null or undefined - if (group_by === null || group_by === undefined) { - throw new RequiredError( - 'group_by', - 'Required parameter group_by was null or undefined when calling operationStackGet.' - ); - } - // verify required parameter 'op_name' is not null or undefined - if (op_name === null || op_name === undefined) { - throw new RequiredError( - 'op_name', - 'Required parameter op_name was null or undefined when calling operationStackGet.' - ); - } - const localVarPath = `/operation/stack`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (group_by !== undefined) { - localVarQueryParameter.group_by = group_by; - } - - if (op_name !== undefined) { - localVarQueryParameter.op_name = op_name; - } - - if (input_shape !== undefined) { - localVarQueryParameter.input_shape = input_shape; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationTableGet( - run: string, - worker: string, - span: string, - group_by: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling operationTableGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling operationTableGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling operationTableGet.' - ); - } - // verify required parameter 'group_by' is not null or undefined - if (group_by === null || group_by === undefined) { - throw new RequiredError( - 'group_by', - 'Required parameter group_by was null or undefined when calling operationTableGet.' - ); - } - const localVarPath = `/operation/table`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - if (group_by !== undefined) { - localVarQueryParameter.group_by = group_by; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - overviewGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling overviewGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling overviewGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling overviewGet.' - ); - } - const localVarPath = `/overview`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - runsGet(options: any = {}): FetchArgs { - const localVarPath = `/runs`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - spansGet(run: string, worker: string, options: any = {}): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling spansGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling spansGet.' - ); - } - const localVarPath = `/spans`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - traceGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling traceGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling traceGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling traceGet.' - ); - } - const localVarPath = `/trace`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - treeGet( - run: string, - worker: string, - span: string, - options: any = {} - ): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling treeGet.' - ); - } - // verify required parameter 'worker' is not null or undefined - if (worker === null || worker === undefined) { - throw new RequiredError( - 'worker', - 'Required parameter worker was null or undefined when calling treeGet.' - ); - } - // verify required parameter 'span' is not null or undefined - if (span === null || span === undefined) { - throw new RequiredError( - 'span', - 'Required parameter span was null or undefined when calling treeGet.' - ); - } - const localVarPath = `/tree`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (worker !== undefined) { - localVarQueryParameter.worker = worker; - } - - if (span !== undefined) { - localVarQueryParameter.span = span; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - viewsGet(run: string, options: any = {}): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling viewsGet.' - ); - } - const localVarPath = `/views`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @param {string} run - * @param {string} view - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - workersGet(run: string, view: string, options: any = {}): FetchArgs { - // verify required parameter 'run' is not null or undefined - if (run === null || run === undefined) { - throw new RequiredError( - 'run', - 'Required parameter run was null or undefined when calling workersGet.' - ); - } - // verify required parameter 'view' is not null or undefined - if (view === null || view === undefined) { - throw new RequiredError( - 'view', - 'Required parameter view was null or undefined when calling workersGet.' - ); - } - const localVarPath = `/workers`; - const localVarUrlObj = url.parse(localVarPath, true); - const localVarRequestOptions = Object.assign({ method: 'GET' }, options); - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - if (run !== undefined) { - localVarQueryParameter.run = run; - } - - if (view !== undefined) { - localVarQueryParameter.view = view; - } - - localVarUrlObj.query = Object.assign( - {}, - localVarUrlObj.query, - localVarQueryParameter, - options.query - ); - // fix override query string Detail: https://stackoverflow.com/a/7517673/1077943 - delete localVarUrlObj.search; - localVarRequestOptions.headers = Object.assign( - {}, - localVarHeaderParameter, - options.headers - ); - - return { - url: url.format(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - }; -}; - -/** - * DefaultApi - functional programming interface - * @export - */ -export const DefaultApiFp = function (configuration?: Configuration) { - return { - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} exp_run - * @param {string} exp_worker - * @param {string} exp_span - * @param {string} [path] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - diffnodeGet( - run: string, - worker: string, - span: string, - exp_run: string, - exp_worker: string, - exp_span: string, - path?: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).diffnodeGet( - run, - worker, - span, - exp_run, - exp_worker, - exp_span, - path, - options - ); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedCommopsGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).distributedCommopsGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedGpuinfoGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).distributedGpuinfoGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedOverlapGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).distributedOverlapGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedWaittimeGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).distributedWaittimeGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).kernelGet(run, worker, span, group_by, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} [group_by] Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTableGet( - run: string, - worker: string, - span: string, - group_by?: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).kernelTableGet(run, worker, span, group_by, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTcPieGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).kernelTcPieGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryCurveGet( - run: string, - worker: string, - span: string, - options?: any - ): ( - fetch?: FetchAPI, - basePath?: string - ) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).memoryCurveGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryEventsGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ): ( - fetch?: FetchAPI, - basePath?: string - ) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).memoryEventsGet(run, worker, span, start_ts, end_ts, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).memoryGet(run, worker, span, start_ts, end_ts, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - moduleGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).moduleGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).operationGet(run, worker, span, group_by, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {string} op_name - * @param {string} [input_shape] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationStackGet( - run: string, - worker: string, - span: string, - group_by: string, - op_name: string, - input_shape?: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).operationStackGet( - run, - worker, - span, - group_by, - op_name, - input_shape, - options - ); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationTableGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).operationTableGet(run, worker, span, group_by, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - overviewGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).overviewGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - runsGet( - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = - DefaultApiFetchParamCreator(configuration).runsGet(options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - spansGet( - run: string, - worker: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise> { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).spansGet(run, worker, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - traceGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).traceGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - treeGet( - run: string, - worker: string, - span: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).treeGet(run, worker, span, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - viewsGet( - run: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).viewsGet(run, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - /** - * - * @param {string} run - * @param {string} view - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - workersGet( - run: string, - view: string, - options?: any - ): (fetch?: FetchAPI, basePath?: string) => Promise> { - const localVarFetchArgs = DefaultApiFetchParamCreator( - configuration - ).workersGet(run, view, options); - return ( - fetch: FetchAPI = portableFetch, - basePath: string = BASE_PATH - ) => { - return fetch( - basePath + localVarFetchArgs.url, - localVarFetchArgs.options - ).then((response) => { - if (response.status >= 200 && response.status < 300) { - return response.json(); - } else { - throw response; - } - }); - }; - }, - }; -}; - -/** - * DefaultApi - factory interface - * @export - */ -export const DefaultApiFactory = function ( - configuration?: Configuration, - fetch?: FetchAPI, - basePath?: string -) { - return { - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} exp_run - * @param {string} exp_worker - * @param {string} exp_span - * @param {string} [path] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - diffnodeGet( - run: string, - worker: string, - span: string, - exp_run: string, - exp_worker: string, - exp_span: string, - path?: string, - options?: any - ) { - return DefaultApiFp(configuration).diffnodeGet( - run, - worker, - span, - exp_run, - exp_worker, - exp_span, - path, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedCommopsGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(configuration).distributedCommopsGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedGpuinfoGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(configuration).distributedGpuinfoGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedOverlapGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(configuration).distributedOverlapGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - distributedWaittimeGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(configuration).distributedWaittimeGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(configuration).kernelGet( - run, - worker, - span, - group_by, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} [group_by] Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTableGet( - run: string, - worker: string, - span: string, - group_by?: string, - options?: any - ) { - return DefaultApiFp(configuration).kernelTableGet( - run, - worker, - span, - group_by, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - kernelTcPieGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).kernelTcPieGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryCurveGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).memoryCurveGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryEventsGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ) { - return DefaultApiFp(configuration).memoryEventsGet( - run, - worker, - span, - start_ts, - end_ts, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - memoryGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ) { - return DefaultApiFp(configuration).memoryGet( - run, - worker, - span, - start_ts, - end_ts, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - moduleGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).moduleGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(configuration).operationGet( - run, - worker, - span, - group_by, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {string} op_name - * @param {string} [input_shape] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationStackGet( - run: string, - worker: string, - span: string, - group_by: string, - op_name: string, - input_shape?: string, - options?: any - ) { - return DefaultApiFp(configuration).operationStackGet( - run, - worker, - span, - group_by, - op_name, - input_shape, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - operationTableGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(configuration).operationTableGet( - run, - worker, - span, - group_by, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - overviewGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).overviewGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - runsGet(options?: any) { - return DefaultApiFp(configuration).runsGet(options)(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - spansGet(run: string, worker: string, options?: any) { - return DefaultApiFp(configuration).spansGet( - run, - worker, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - traceGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).traceGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - treeGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(configuration).treeGet( - run, - worker, - span, - options - )(fetch, basePath); - }, - /** - * - * @param {string} run - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - viewsGet(run: string, options?: any) { - return DefaultApiFp(configuration).viewsGet(run, options)( - fetch, - basePath - ); - }, - /** - * - * @param {string} run - * @param {string} view - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - workersGet(run: string, view: string, options?: any) { - return DefaultApiFp(configuration).workersGet( - run, - view, - options - )(fetch, basePath); - }, - }; -}; - -/** - * DefaultApi - object-oriented interface - * @export - * @class DefaultApi - * @extends {BaseAPI} - */ -export class DefaultApi extends BaseAPI { - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} exp_run - * @param {string} exp_worker - * @param {string} exp_span - * @param {string} [path] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public diffnodeGet( - run: string, - worker: string, - span: string, - exp_run: string, - exp_worker: string, - exp_span: string, - path?: string, - options?: any - ) { - return DefaultApiFp(this.configuration).diffnodeGet( - run, - worker, - span, - exp_run, - exp_worker, - exp_span, - path, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public distributedCommopsGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).distributedCommopsGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public distributedGpuinfoGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).distributedGpuinfoGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public distributedOverlapGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).distributedOverlapGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public distributedWaittimeGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).distributedWaittimeGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public kernelGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(this.configuration).kernelGet( - run, - worker, - span, - group_by, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} [group_by] Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public kernelTableGet( - run: string, - worker: string, - span: string, - group_by?: string, - options?: any - ) { - return DefaultApiFp(this.configuration).kernelTableGet( - run, - worker, - span, - group_by, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public kernelTcPieGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).kernelTcPieGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public memoryCurveGet( - run: string, - worker: string, - span: string, - options?: any - ) { - return DefaultApiFp(this.configuration).memoryCurveGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public memoryEventsGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ) { - return DefaultApiFp(this.configuration).memoryEventsGet( - run, - worker, - span, - start_ts, - end_ts, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {number} [start_ts] - * @param {number} [end_ts] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public memoryGet( - run: string, - worker: string, - span: string, - start_ts?: number, - end_ts?: number, - options?: any - ) { - return DefaultApiFp(this.configuration).memoryGet( - run, - worker, - span, - start_ts, - end_ts, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public moduleGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(this.configuration).moduleGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public operationGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(this.configuration).operationGet( - run, - worker, - span, - group_by, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {string} op_name - * @param {string} [input_shape] - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public operationStackGet( - run: string, - worker: string, - span: string, - group_by: string, - op_name: string, - input_shape?: string, - options?: any - ) { - return DefaultApiFp(this.configuration).operationStackGet( - run, - worker, - span, - group_by, - op_name, - input_shape, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {string} group_by Group By - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public operationTableGet( - run: string, - worker: string, - span: string, - group_by: string, - options?: any - ) { - return DefaultApiFp(this.configuration).operationTableGet( - run, - worker, - span, - group_by, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public overviewGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(this.configuration).overviewGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public runsGet(options?: any) { - return DefaultApiFp(this.configuration).runsGet(options)( - this.fetch, - this.basePath - ); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public spansGet(run: string, worker: string, options?: any) { - return DefaultApiFp(this.configuration).spansGet( - run, - worker, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public traceGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(this.configuration).traceGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {string} worker - * @param {string} span - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public treeGet(run: string, worker: string, span: string, options?: any) { - return DefaultApiFp(this.configuration).treeGet( - run, - worker, - span, - options - )(this.fetch, this.basePath); - } - - /** - * - * @param {string} run - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public viewsGet(run: string, options?: any) { - return DefaultApiFp(this.configuration).viewsGet(run, options)( - this.fetch, - this.basePath - ); - } - - /** - * - * @param {string} run - * @param {string} view - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public workersGet(run: string, view: string, options?: any) { - return DefaultApiFp(this.configuration).workersGet( - run, - view, - options - )(this.fetch, this.basePath); - } -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/configuration.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/configuration.ts deleted file mode 100644 index 85b77bf651c049ec5a2ec85379414f619904c6dd..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/configuration.ts +++ /dev/null @@ -1,68 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -// tslint:disable -/** - * Pytorch profile API - * No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) - * - * OpenAPI spec version: 1.0.0 - * - * - * NOTE: This file is auto generated by the swagger code generator program. - * https://github.com/swagger-api/swagger-codegen.git - * Do not edit the file manually. - */ -export interface ConfigurationParameters { - apiKey?: string | ((name: string) => string); - username?: string; - password?: string; - accessToken?: string | ((name: string, scopes?: string[]) => string); - basePath?: string; -} - -export class Configuration { - /** - * parameter for apiKey security - * @param name security name - * @memberof Configuration - */ - apiKey?: string | ((name: string) => string); - /** - * parameter for basic security - * - * @type {string} - * @memberof Configuration - */ - username?: string; - /** - * parameter for basic security - * - * @type {string} - * @memberof Configuration - */ - password?: string; - /** - * parameter for oauth2 security - * @param name security name - * @param scopes oauth2 scope - * @memberof Configuration - */ - accessToken?: string | ((name: string, scopes?: string[]) => string); - /** - * override base path - * - * @type {string} - * @memberof Configuration - */ - basePath?: string; - - constructor(param: ConfigurationParameters = {}) { - this.apiKey = param.apiKey; - this.username = param.username; - this.password = param.password; - this.accessToken = param.accessToken; - this.basePath = param.basePath; - } -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/custom.d.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/custom.d.ts deleted file mode 100644 index 992af468898f15bee4f609a8cb752e21f0a9ad48..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/custom.d.ts +++ /dev/null @@ -1,6 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -declare module 'portable-fetch'; -declare module 'url'; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/index.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/index.ts deleted file mode 100644 index 7ad784e60de2777174cea9d902ad9cf2550fad68..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/generated/index.ts +++ /dev/null @@ -1,18 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -// tslint:disable -/** - * Pytorch profile API - * No description provided (generated by Swagger Codegen https://github.com/swagger-api/swagger-codegen) - * - * OpenAPI spec version: 1.0.0 - * - * - * NOTE: This file is auto generated by the swagger code generator program. - * https://github.com/swagger-api/swagger-codegen.git - * Do not edit the file manually. - */ -export * from './api'; -export * from './configuration'; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/index.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/index.ts deleted file mode 100644 index 98b35abfbc09785ffa09b1bbaa48c73685ec84f5..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/index.ts +++ /dev/null @@ -1,8 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as api from './generated'; - -export const defaultApi = new api.DefaultApi(undefined, undefined, fetch); -export * from './generated/api'; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/mock.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/mock.ts deleted file mode 100644 index 4b4b447d97192b7c7c00784dd9176faeed25d64b..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/mock.ts +++ /dev/null @@ -1,6684 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export class MockAPI { - runsGet() { - return { - runs: ['resnet50_num_workers_0', 'resnet50_num_workers_4'], - loading: false, - }; - } - - viewsGet(run: string) { - return Promise.resolve([ - 'Overview', - 'Operator', - 'Kernel', - 'Trace', - 'Memory', - ]); - } - - spansGet(run: string, view: string): Promise { - return Promise.resolve(['1', '2']); - } - - workersGet(run: string, view: string): Promise { - return Promise.resolve(['worker0']); - } - - overviewGet(run: string, worker: string, span: string) { - return Promise.resolve({ - steps: { - columns: [ - { type: 'string', name: 'Step' }, - { type: 'number', name: 'Kernel' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'Memcpy' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'Memset' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'Runtime' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'DataLoader' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'CPU Exec' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - { type: 'number', name: 'Other' }, - { type: 'string', role: 'tooltip', p: { html: 'true' } }, - ], - rows: [ - [ - '5', - 98598, - '
Step 5
Total: 187948us
Kernel: 98598us
Percentage: 52.46%
', - 1941, - '
Step 5
Total: 187948us
Memcpy: 1941us
Percentage: 1.03%
', - 90, - '
Step 5
Total: 187948us
Memset: 90us
Percentage: 0.05%
', - 2796, - '
Step 5
Total: 187948us
Runtime: 2796us
Percentage: 1.49%
', - 69317, - '
Step 5
Total: 187948us
DataLoader: 69317us
Percentage: 36.88%
', - 14091, - '
Step 5
Total: 187948us
CPU Exec: 14091us
Percentage: 7.5%
', - 1115, - '
Step 5
Total: 187948us
Other: 1115us
Percentage: 0.59%
', - ], - [ - '6', - 98570, - '
Step 6
Total: 175153us
Kernel: 98570us
Percentage: 56.28%
', - 1947, - '
Step 6
Total: 175153us
Memcpy: 1947us
Percentage: 1.11%
', - 89, - '
Step 6
Total: 175153us
Memset: 89us
Percentage: 0.05%
', - 2762, - '
Step 6
Total: 175153us
Runtime: 2762us
Percentage: 1.58%
', - 57669, - '
Step 6
Total: 175153us
DataLoader: 57669us
Percentage: 32.92%
', - 12968, - '
Step 6
Total: 175153us
CPU Exec: 12968us
Percentage: 7.4%
', - 1148, - '
Step 6
Total: 175153us
Other: 1148us
Percentage: 0.66%
', - ], - [ - '7', - 98596, - '
Step 7
Total: 179733us
Kernel: 98596us
Percentage: 54.86%
', - 1931, - '
Step 7
Total: 179733us
Memcpy: 1931us
Percentage: 1.07%
', - 91, - '
Step 7
Total: 179733us
Memset: 91us
Percentage: 0.05%
', - 2877, - '
Step 7
Total: 179733us
Runtime: 2877us
Percentage: 1.6%
', - 61257, - '
Step 7
Total: 179733us
DataLoader: 61257us
Percentage: 34.08%
', - 13768, - '
Step 7
Total: 179733us
CPU Exec: 13768us
Percentage: 7.66%
', - 1213, - '
Step 7
Total: 179733us
Other: 1213us
Percentage: 0.67%
', - ], - [ - '8', - 98623, - '
Step 8
Total: 174564us
Kernel: 98623us
Percentage: 56.5%
', - 1938, - '
Step 8
Total: 174564us
Memcpy: 1938us
Percentage: 1.11%
', - 89, - '
Step 8
Total: 174564us
Memset: 89us
Percentage: 0.05%
', - 2841, - '
Step 8
Total: 174564us
Runtime: 2841us
Percentage: 1.63%
', - 56453, - '
Step 8
Total: 174564us
DataLoader: 56453us
Percentage: 32.34%
', - 13420, - '
Step 8
Total: 174564us
CPU Exec: 13420us
Percentage: 7.69%
', - 1200, - '
Step 8
Total: 174564us
Other: 1200us
Percentage: 0.69%
', - ], - [ - '9', - 98504, - '
Step 9
Total: 182172us
Kernel: 98504us
Percentage: 54.07%
', - 1937, - '
Step 9
Total: 182172us
Memcpy: 1937us
Percentage: 1.06%
', - 87, - '
Step 9
Total: 182172us
Memset: 87us
Percentage: 0.05%
', - 2788, - '
Step 9
Total: 182172us
Runtime: 2788us
Percentage: 1.53%
', - 62690, - '
Step 9
Total: 182172us
DataLoader: 62690us
Percentage: 34.41%
', - 15025, - '
Step 9
Total: 182172us
CPU Exec: 15025us
Percentage: 8.25%
', - 1141, - '
Step 9
Total: 182172us
Other: 1141us
Percentage: 0.63%
', - ], - [ - '10', - 98641, - '
Step 10
Total: 165983us
Kernel: 98641us
Percentage: 59.43%
', - 1798, - '
Step 10
Total: 165983us
Memcpy: 1798us
Percentage: 1.08%
', - 88, - '
Step 10
Total: 165983us
Memset: 88us
Percentage: 0.05%
', - 3381, - '
Step 10
Total: 165983us
Runtime: 3381us
Percentage: 2.04%
', - 48185, - '
Step 10
Total: 165983us
DataLoader: 48185us
Percentage: 29.03%
', - 12773, - '
Step 10
Total: 165983us
CPU Exec: 12773us
Percentage: 7.7%
', - 1117, - '
Step 10
Total: 165983us
Other: 1117us
Percentage: 0.67%
', - ], - ], - }, - performance: [ - { - name: 'Average Step Time', - description: '', - value: 177592, - extra: 100, - children: [ - { name: 'Kernel', description: '', value: 98589, extra: 55.51 }, - { name: 'Memcpy', description: '', value: 1915, extra: 1.08 }, - { name: 'Memset', description: '', value: 89, extra: 0.05 }, - { name: 'Runtime', description: '', value: 2908, extra: 1.64 }, - { name: 'DataLoader', description: '', value: 59262, extra: 33.37 }, - { name: 'CPU Exec', description: '', value: 13674, extra: 7.7 }, - { name: 'Other', description: '', value: 1156, extra: 0.65 }, - ], - }, - ], - recommendations: - '
  • This run has high time cost on input data loading. 33.4% of the step time is in DataLoader. You could try to set num_workers on DataLoader\'s construction and enable multi-processes on data loading.
  • Kernels with 68% time are launched by Tensor Cores eligible operators. You could enable Automatic Mixed Precision to speedup by using FP16.
', - environments: [ - { title: 'Number of Worker(s)', value: '1' }, - { title: 'Device Type', value: 'GPU' }, - ], - gpu_metrics: { - title: 'GPU Summary', - data: [ - { title: 'GPU 0:', value: '' }, - { title: 'Name', value: 'Tesla V100-DGXS-32GB' }, - { title: 'Memory', value: '31.74 GB' }, - { title: 'Compute Capability', value: '7.0' }, - { title: 'GPU Utilization', value: '55.51 %' }, - { title: 'Est. SM Efficiency', value: '54.68 %' }, - { title: 'Est. Achieved Occupancy', value: '49.13 %' }, - { title: 'Kernel Time using Tensor Cores', value: '0.0 %' }, - ], - tooltip: - "The GPU usage metrics:\n\nGPU Utilization:\nGPU busy time / All steps time. The higher, the better. GPU busy time is the time during which there is at least one GPU kernel running on it. All steps time is the total time of all profiler steps(or called as iterations).\n\nEst. SM Efficiency:\nEstimated Stream Multiprocessor Efficiency. The higher, the better. This metric of a kernel, SM_Eff_K = min(blocks of this kernel / SM number of this GPU, 100%). This overall number is the sum of all kernels' SM_Eff_K weighted by kernel's execution duration, divided by all steps time.\n\nEst. Achieved Occupancy:\nFor most cases such as memory bandwidth bounded kernels, the higher the better. Occupancy is the ratio of active warps on an SM to the maximum number of active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple factors such as kernel shape, kernel used resource, and the GPU compute capability.\nEst. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). This overall number is the weighted average of all kernels' OCC_K using kernel's execution duration as weight. It shows fine-grained low-level GPU utilization.\n\nKernel using Tensor Cores:\nTotal GPU Time for Tensor Core kernels / Total GPU Time for all kernels.\n", - }, - }); - } - - diffnodeGet( - run: string, - worker: string, - span: string, - exp_run: string, - exp_worker: string, - exp_span: string, - path?: string - ) { - return Promise.resolve({ - left: { - name: 'multiple nodes', - duration: 4246748, - device_duration: 376761, - total_duration: 3823182, - aggs: [ - { - name: 'aten::empty', - calls: 4214, - host_duration: 186312, - device_duration: 0, - self_host_duration: 186312, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 846, - host_duration: 31902, - device_duration: 736, - self_host_duration: 17460, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 520, - host_duration: 62713, - device_duration: 0, - self_host_duration: 32640, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2696, - host_duration: 1711486, - device_duration: 8796, - self_host_duration: 37162, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 256, - host_duration: 4379, - device_duration: 0, - self_host_duration: 4379, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 256, - host_duration: 10596, - device_duration: 0, - self_host_duration: 6217, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 914, - host_duration: 8470, - device_duration: 0, - self_host_duration: 8470, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 384, - host_duration: 19150, - device_duration: 0, - self_host_duration: 16142, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 1158, - host_duration: 50043, - device_duration: 0, - self_host_duration: 50043, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 1412, - host_duration: 1518205, - device_duration: 8796, - self_host_duration: 1509009, - self_device_duration: 8796, - }, - { - name: 'aten::_to_copy', - calls: 1284, - host_duration: 1674324, - device_duration: 8796, - self_host_duration: 104788, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 128, - host_duration: 460479, - device_duration: 0, - self_host_duration: 421547, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 128, - host_duration: 9401, - device_duration: 0, - self_host_duration: 8211, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 128, - host_duration: 31311, - device_duration: 0, - self_host_duration: 31311, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 260, - host_duration: 17762, - device_duration: 0, - self_host_duration: 15082, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 512, - host_duration: 4194, - device_duration: 0, - self_host_duration: 4194, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 512, - host_duration: 14514, - device_duration: 0, - self_host_duration: 10320, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 640, - host_duration: 1734, - device_duration: 0, - self_host_duration: 1734, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 640, - host_duration: 86249, - device_duration: 0, - self_host_duration: 78373, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 640, - host_duration: 183533, - device_duration: 0, - self_host_duration: 75637, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 640, - host_duration: 71284, - device_duration: 0, - self_host_duration: 49575, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 768, - host_duration: 4948, - device_duration: 0, - self_host_duration: 4948, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 768, - host_duration: 20922, - device_duration: 0, - self_host_duration: 15974, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 640, - host_duration: 27934, - device_duration: 0, - self_host_duration: 10747, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 130, - host_duration: 168214, - device_duration: 75, - self_host_duration: 146203, - self_device_duration: 75, - }, - { - name: 'aten::resize_', - calls: 6, - host_duration: 248, - device_duration: 0, - self_host_duration: 248, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 4, - host_duration: 280, - device_duration: 0, - self_host_duration: 99, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 4, - host_duration: 92993, - device_duration: 0, - self_host_duration: 92405, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 4, - host_duration: 93282, - device_duration: 0, - self_host_duration: 289, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 4, - host_duration: 124757, - device_duration: 0, - self_host_duration: 22050, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 106, - host_duration: 44043, - device_duration: 71832, - self_host_duration: 35027, - self_device_duration: 71832, - }, - { - name: 'aten::_convolution', - calls: 106, - host_duration: 51312, - device_duration: 71832, - self_host_duration: 7269, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 106, - host_duration: 55287, - device_duration: 71832, - self_host_duration: 3975, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 106, - host_duration: 59323, - device_duration: 71832, - self_host_duration: 4036, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 138, - host_duration: 17461, - device_duration: 10540, - self_host_duration: 15188, - self_device_duration: 10540, - }, - { - name: 'aten::empty_like', - calls: 108, - host_duration: 11504, - device_duration: 0, - self_host_duration: 4865, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 214, - host_duration: 3589, - device_duration: 0, - self_host_duration: 3589, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 106, - host_duration: 71328, - device_duration: 25802, - self_host_duration: 40944, - self_device_duration: 25802, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 106, - host_duration: 76354, - device_duration: 25802, - self_host_duration: 5026, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 106, - host_duration: 79832, - device_duration: 25802, - self_host_duration: 3478, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 98, - host_duration: 5417, - device_duration: 12000, - self_host_duration: 3885, - self_device_duration: 12000, - }, - { - name: 'aten::clamp_min_', - calls: 98, - host_duration: 8537, - device_duration: 12000, - self_host_duration: 3120, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 98, - host_duration: 16708, - device_duration: 12000, - self_host_duration: 8171, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 2, - host_duration: 442, - device_duration: 940, - self_host_duration: 405, - self_device_duration: 940, - }, - { - name: 'aten::max_pool2d', - calls: 2, - host_duration: 542, - device_duration: 940, - self_host_duration: 100, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 998, - host_duration: 72931, - device_duration: 13090, - self_host_duration: 57558, - self_device_duration: 13090, - }, - { - name: 'aten::mean', - calls: 2, - host_duration: 376, - device_duration: 133, - self_host_duration: 339, - self_device_duration: 133, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 2, - host_duration: 465, - device_duration: 133, - self_host_duration: 89, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 4, - host_duration: 170, - device_duration: 0, - self_host_duration: 170, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 2, - host_duration: 207, - device_duration: 0, - self_host_duration: 103, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 10, - host_duration: 587, - device_duration: 0, - self_host_duration: 465, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 10, - host_duration: 1068, - device_duration: 0, - self_host_duration: 481, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 4, - host_duration: 277, - device_duration: 0, - self_host_duration: 227, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 2, - host_duration: 809, - device_duration: 84, - self_host_duration: 604, - self_device_duration: 84, - }, - { - name: 'aten::linear', - calls: 2, - host_duration: 1185, - device_duration: 84, - self_host_duration: 137, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 2, - host_duration: 308, - device_duration: 14, - self_host_duration: 271, - self_device_duration: 14, - }, - { - name: 'aten::log_softmax', - calls: 2, - host_duration: 472, - device_duration: 14, - self_host_duration: 153, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 2, - host_duration: 522, - device_duration: 8, - self_host_duration: 476, - self_device_duration: 8, - }, - { - name: 'aten::nll_loss', - calls: 2, - host_duration: 590, - device_duration: 8, - self_host_duration: 68, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 2, - host_duration: 641, - device_duration: 8, - self_host_duration: 51, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 2, - host_duration: 1234, - device_duration: 22, - self_host_duration: 121, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 328, - host_duration: 14541, - device_duration: 738, - self_host_duration: 10083, - self_device_duration: 738, - }, - { - name: 'aten::ones_like', - calls: 2, - host_duration: 516, - device_duration: 2, - self_host_duration: 142, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 2, - host_duration: 573, - device_duration: 8, - self_host_duration: 310, - self_device_duration: 6, - }, - { - name: 'NllLossBackward0', - calls: 2, - host_duration: 774, - device_duration: 8, - self_host_duration: 201, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 2, - host_duration: 1025, - device_duration: 8, - self_host_duration: 251, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 2, - host_duration: 236, - device_duration: 18, - self_host_duration: 196, - self_device_duration: 18, - }, - { - name: 'LogSoftmaxBackward0', - calls: 2, - host_duration: 385, - device_duration: 18, - self_host_duration: 149, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 2, - host_duration: 632, - device_duration: 18, - self_host_duration: 247, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 4, - host_duration: 668, - device_duration: 140, - self_host_duration: 547, - self_device_duration: 140, - }, - { - name: 'AddmmBackward0', - calls: 2, - host_duration: 1698, - device_duration: 140, - self_host_duration: 417, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 2, - host_duration: 370, - device_duration: 15, - self_host_duration: 328, - self_device_duration: 15, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 2, - host_duration: 2710, - device_duration: 155, - self_host_duration: 567, - self_device_duration: 0, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 322, - host_duration: 41184, - device_duration: 997, - self_host_duration: 16159, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 322, - host_duration: 70946, - device_duration: 997, - self_host_duration: 29762, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 2, - host_duration: 280, - device_duration: 0, - self_host_duration: 64, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 2, - host_duration: 428, - device_duration: 0, - self_host_duration: 148, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 2, - host_duration: 170, - device_duration: 0, - self_host_duration: 104, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 2, - host_duration: 264, - device_duration: 0, - self_host_duration: 94, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 2, - host_duration: 402, - device_duration: 0, - self_host_duration: 138, - self_device_duration: 0, - }, - { - name: 'MeanBackward1', - calls: 2, - host_duration: 1036, - device_duration: 75, - self_host_duration: 231, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 2, - host_duration: 1254, - device_duration: 75, - self_host_duration: 218, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 98, - host_duration: 13838, - device_duration: 17984, - self_host_duration: 12131, - self_device_duration: 17984, - }, - { - name: 'ReluBackward0', - calls: 98, - host_duration: 21183, - device_duration: 17984, - self_host_duration: 7345, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 98, - host_duration: 33492, - device_duration: 17984, - self_host_duration: 12309, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 32, - host_duration: 251, - device_duration: 0, - self_host_duration: 251, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 32, - host_duration: 2579, - device_duration: 0, - self_host_duration: 2328, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 106, - host_duration: 62175, - device_duration: 44433, - self_host_duration: 36053, - self_device_duration: 44433, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 106, - host_duration: 69160, - device_duration: 44433, - self_host_duration: 6985, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 106, - host_duration: 88613, - device_duration: 44433, - self_host_duration: 19453, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 104, - host_duration: 40820, - device_duration: 76620, - self_host_duration: 30768, - self_device_duration: 76620, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 106, - host_duration: 44875, - device_duration: 90108, - self_host_duration: 27458, - self_device_duration: 90108, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 106, - host_duration: 101020, - device_duration: 166728, - self_host_duration: 15325, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 106, - host_duration: 107964, - device_duration: 166728, - self_host_duration: 6944, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 106, - host_duration: 129129, - device_duration: 177161, - self_host_duration: 16746, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 2, - host_duration: 483, - device_duration: 3048, - self_host_duration: 257, - self_device_duration: 2588, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 2, - host_duration: 599, - device_duration: 3048, - self_host_duration: 116, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 2, - host_duration: 836, - device_duration: 3048, - self_host_duration: 237, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 322, - host_duration: 23818, - device_duration: 797, - self_host_duration: 19073, - self_device_duration: 797, - }, - ], - }, - right: { - name: 'multiple nodes', - duration: 468427, - device_duration: 374211, - total_duration: 644686, - aggs: [ - { - name: 'aten::empty', - calls: 4214, - host_duration: 31594, - device_duration: 0, - self_host_duration: 31594, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 846, - host_duration: 6010, - device_duration: 864, - self_host_duration: 1910, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 520, - host_duration: 10338, - device_duration: 0, - self_host_duration: 2951, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2696, - host_duration: 47031, - device_duration: 8684, - self_host_duration: 4258, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 256, - host_duration: 701, - device_duration: 0, - self_host_duration: 698, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 256, - host_duration: 1374, - device_duration: 0, - self_host_duration: 676, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 914, - host_duration: 1013, - device_duration: 0, - self_host_duration: 1013, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 384, - host_duration: 2074, - device_duration: 0, - self_host_duration: 1723, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 1158, - host_duration: 6859, - device_duration: 0, - self_host_duration: 6859, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 1412, - host_duration: 25248, - device_duration: 8684, - self_host_duration: 16166, - self_device_duration: 8684, - }, - { - name: 'aten::_to_copy', - calls: 1284, - host_duration: 42773, - device_duration: 8684, - self_host_duration: 10227, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 128, - host_duration: 51788, - device_duration: 0, - self_host_duration: 46788, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 128, - host_duration: 1035, - device_duration: 0, - self_host_duration: 895, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 128, - host_duration: 11074, - device_duration: 0, - self_host_duration: 11074, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 260, - host_duration: 1892, - device_duration: 0, - self_host_duration: 1600, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 512, - host_duration: 278, - device_duration: 0, - self_host_duration: 244, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 512, - host_duration: 1341, - device_duration: 0, - self_host_duration: 1097, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 640, - host_duration: 317, - device_duration: 0, - self_host_duration: 317, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 640, - host_duration: 8857, - device_duration: 0, - self_host_duration: 7959, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 640, - host_duration: 17840, - device_duration: 0, - self_host_duration: 7688, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 640, - host_duration: 6903, - device_duration: 0, - self_host_duration: 4901, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 768, - host_duration: 395, - device_duration: 0, - self_host_duration: 395, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 768, - host_duration: 2532, - device_duration: 0, - self_host_duration: 2130, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 640, - host_duration: 3601, - device_duration: 0, - self_host_duration: 1427, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 130, - host_duration: 11707, - device_duration: 75, - self_host_duration: 9531, - self_device_duration: 75, - }, - { - name: 'aten::resize_', - calls: 6, - host_duration: 79, - device_duration: 0, - self_host_duration: 79, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 4, - host_duration: 37, - device_duration: 0, - self_host_duration: 16, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 4, - host_duration: 9241, - device_duration: 0, - self_host_duration: 9113, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 4, - host_duration: 9286, - device_duration: 0, - self_host_duration: 45, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 4, - host_duration: 16195, - device_duration: 0, - self_host_duration: 6105, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 106, - host_duration: 17357, - device_duration: 71414, - self_host_duration: 13601, - self_device_duration: 71414, - }, - { - name: 'aten::_convolution', - calls: 106, - host_duration: 18514, - device_duration: 71414, - self_host_duration: 1157, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 106, - host_duration: 19185, - device_duration: 71414, - self_host_duration: 671, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 106, - host_duration: 19750, - device_duration: 71414, - self_host_duration: 565, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 138, - host_duration: 4973, - device_duration: 10567, - self_host_duration: 3157, - self_device_duration: 10567, - }, - { - name: 'aten::empty_like', - calls: 108, - host_duration: 1924, - device_duration: 0, - self_host_duration: 598, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 214, - host_duration: 596, - device_duration: 0, - self_host_duration: 596, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 106, - host_duration: 11083, - device_duration: 25737, - self_host_duration: 5031, - self_device_duration: 25737, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 106, - host_duration: 11856, - device_duration: 25737, - self_host_duration: 773, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 106, - host_duration: 12386, - device_duration: 25737, - self_host_duration: 530, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 98, - host_duration: 2189, - device_duration: 12010, - self_host_duration: 1030, - self_device_duration: 12010, - }, - { - name: 'aten::clamp_min_', - calls: 98, - host_duration: 2614, - device_duration: 12010, - self_host_duration: 425, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 98, - host_duration: 3880, - device_duration: 12010, - self_host_duration: 1266, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 2, - host_duration: 112, - device_duration: 938, - self_host_duration: 82, - self_device_duration: 938, - }, - { - name: 'aten::max_pool2d', - calls: 2, - host_duration: 127, - device_duration: 938, - self_host_duration: 15, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 998, - host_duration: 21459, - device_duration: 13178, - self_host_duration: 11041, - self_device_duration: 13178, - }, - { - name: 'aten::mean', - calls: 2, - host_duration: 104, - device_duration: 126, - self_host_duration: 76, - self_device_duration: 126, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 2, - host_duration: 117, - device_duration: 126, - self_host_duration: 13, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 4, - host_duration: 26, - device_duration: 0, - self_host_duration: 26, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 2, - host_duration: 31, - device_duration: 0, - self_host_duration: 15, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 10, - host_duration: 85, - device_duration: 0, - self_host_duration: 68, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 10, - host_duration: 145, - device_duration: 0, - self_host_duration: 60, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 4, - host_duration: 30, - device_duration: 0, - self_host_duration: 25, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 2, - host_duration: 334, - device_duration: 84, - self_host_duration: 234, - self_device_duration: 84, - }, - { - name: 'aten::linear', - calls: 2, - host_duration: 386, - device_duration: 84, - self_host_duration: 19, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 2, - host_duration: 83, - device_duration: 14, - self_host_duration: 55, - self_device_duration: 14, - }, - { - name: 'aten::log_softmax', - calls: 2, - host_duration: 106, - device_duration: 14, - self_host_duration: 20, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 2, - host_duration: 96, - device_duration: 8, - self_host_duration: 68, - self_device_duration: 8, - }, - { - name: 'aten::nll_loss', - calls: 2, - host_duration: 105, - device_duration: 8, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 2, - host_duration: 113, - device_duration: 8, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 2, - host_duration: 243, - device_duration: 22, - self_host_duration: 24, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 328, - host_duration: 4140, - device_duration: 866, - self_host_duration: 1851, - self_device_duration: 866, - }, - { - name: 'aten::ones_like', - calls: 2, - host_duration: 104, - device_duration: 2, - self_host_duration: 14, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 2, - host_duration: 192, - device_duration: 9, - self_host_duration: 84, - self_device_duration: 6, - }, - { - name: 'NllLossBackward0', - calls: 2, - host_duration: 297, - device_duration: 9, - self_host_duration: 105, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 2, - host_duration: 352, - device_duration: 9, - self_host_duration: 55, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 2, - host_duration: 71, - device_duration: 18, - self_host_duration: 43, - self_device_duration: 18, - }, - { - name: 'LogSoftmaxBackward0', - calls: 2, - host_duration: 91, - device_duration: 18, - self_host_duration: 20, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 2, - host_duration: 126, - device_duration: 18, - self_host_duration: 35, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 4, - host_duration: 283, - device_duration: 134, - self_host_duration: 186, - self_device_duration: 134, - }, - { - name: 'AddmmBackward0', - calls: 2, - host_duration: 418, - device_duration: 134, - self_host_duration: 47, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 2, - host_duration: 92, - device_duration: 14, - self_host_duration: 62, - self_device_duration: 14, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 2, - host_duration: 594, - device_duration: 148, - self_host_duration: 75, - self_device_duration: 0, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 322, - host_duration: 10317, - device_duration: 1069, - self_host_duration: 2127, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 322, - host_duration: 15128, - device_duration: 1069, - self_host_duration: 4811, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 2, - host_duration: 30, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 2, - host_duration: 45, - device_duration: 0, - self_host_duration: 15, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 2, - host_duration: 20, - device_duration: 0, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 2, - host_duration: 31, - device_duration: 0, - self_host_duration: 11, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 2, - host_duration: 48, - device_duration: 0, - self_host_duration: 17, - self_device_duration: 0, - }, - { - name: 'MeanBackward1', - calls: 2, - host_duration: 172, - device_duration: 75, - self_host_duration: 18, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 2, - host_duration: 201, - device_duration: 75, - self_host_duration: 29, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 98, - host_duration: 3652, - device_duration: 18018, - self_host_duration: 2361, - self_device_duration: 18018, - }, - { - name: 'ReluBackward0', - calls: 98, - host_duration: 4567, - device_duration: 18018, - self_host_duration: 915, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 98, - host_duration: 6457, - device_duration: 18018, - self_host_duration: 1890, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 32, - host_duration: 26, - device_duration: 0, - self_host_duration: 26, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 32, - host_duration: 261, - device_duration: 0, - self_host_duration: 235, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 106, - host_duration: 9943, - device_duration: 44401, - self_host_duration: 4355, - self_device_duration: 44401, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 106, - host_duration: 11132, - device_duration: 44401, - self_host_duration: 1189, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 106, - host_duration: 14696, - device_duration: 44401, - self_host_duration: 3564, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 104, - host_duration: 18813, - device_duration: 75568, - self_host_duration: 13997, - self_device_duration: 75568, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 106, - host_duration: 18792, - device_duration: 88992, - self_host_duration: 11101, - self_device_duration: 88992, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 106, - host_duration: 40064, - device_duration: 164560, - self_host_duration: 2459, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 106, - host_duration: 41205, - device_duration: 164560, - self_host_duration: 1141, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 106, - host_duration: 45209, - device_duration: 175014, - self_host_duration: 2826, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 2, - host_duration: 145, - device_duration: 3016, - self_host_duration: 61, - self_device_duration: 2556, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 2, - host_duration: 165, - device_duration: 3016, - self_host_duration: 20, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 2, - host_duration: 209, - device_duration: 3016, - self_host_duration: 44, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 322, - host_duration: 6835, - device_duration: 803, - self_host_duration: 3630, - self_device_duration: 803, - }, - ], - }, - path: '0', - children: [ - { - left: { - name: 'multiple nodes', - duration: 168, - device_duration: 0, - total_duration: 168, - aggs: [ - { - name: 'aten::empty', - calls: 2, - host_duration: 100, - device_duration: 0, - self_host_duration: 100, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 4, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 119, - device_duration: 0, - self_host_duration: 64, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'multiple nodes', - duration: 24, - device_duration: 0, - total_duration: 24, - aggs: [ - { - name: 'aten::empty', - calls: 2, - host_duration: 17, - device_duration: 0, - self_host_duration: 17, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 15, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - ], - }, - path: '0-0', - }, - { - left: { - name: 'enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__', - duration: 1766103, - device_duration: 0, - total_duration: 1766103, - aggs: [ - { - name: 'aten::empty', - calls: 1413, - host_duration: 62288, - device_duration: 0, - self_host_duration: 62288, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 257, - host_duration: 959, - device_duration: 0, - self_host_duration: 959, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 257, - host_duration: 35273, - device_duration: 0, - self_host_duration: 16154, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1344, - host_duration: 877101, - device_duration: 0, - self_host_duration: 18482, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 128, - host_duration: 2191, - device_duration: 0, - self_host_duration: 2191, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 128, - host_duration: 5301, - device_duration: 0, - self_host_duration: 3110, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 450, - host_duration: 4175, - device_duration: 0, - self_host_duration: 4175, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 192, - host_duration: 9560, - device_duration: 0, - self_host_duration: 8045, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 576, - host_duration: 24689, - device_duration: 0, - self_host_duration: 24689, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 704, - host_duration: 780214, - device_duration: 0, - self_host_duration: 780214, - self_device_duration: 0, - }, - { - name: 'aten::_to_copy', - calls: 640, - host_duration: 858619, - device_duration: 0, - self_host_duration: 53009, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 64, - host_duration: 224031, - device_duration: 0, - self_host_duration: 204660, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 64, - host_duration: 4719, - device_duration: 0, - self_host_duration: 4119, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 64, - host_duration: 16028, - device_duration: 0, - self_host_duration: 16028, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 130, - host_duration: 8918, - device_duration: 0, - self_host_duration: 7569, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 256, - host_duration: 2092, - device_duration: 0, - self_host_duration: 2092, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 256, - host_duration: 7228, - device_duration: 0, - self_host_duration: 5136, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 320, - host_duration: 884, - device_duration: 0, - self_host_duration: 884, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 320, - host_duration: 43030, - device_duration: 0, - self_host_duration: 39068, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 320, - host_duration: 91440, - device_duration: 0, - self_host_duration: 37676, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 320, - host_duration: 35514, - device_duration: 0, - self_host_duration: 24706, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 384, - host_duration: 2467, - device_duration: 0, - self_host_duration: 2467, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 384, - host_duration: 10375, - device_duration: 0, - self_host_duration: 7908, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 320, - host_duration: 13905, - device_duration: 0, - self_host_duration: 5383, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 64, - host_duration: 87841, - device_duration: 0, - self_host_duration: 76794, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 2, - host_duration: 117, - device_duration: 0, - self_host_duration: 117, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 2, - host_duration: 142, - device_duration: 0, - self_host_duration: 51, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 2, - host_duration: 51526, - device_duration: 0, - self_host_duration: 51229, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 2, - host_duration: 51674, - device_duration: 0, - self_host_duration: 148, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 2, - host_duration: 75677, - device_duration: 0, - self_host_duration: 19330, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__', - duration: 146745, - device_duration: 0, - total_duration: 146745, - aggs: [ - { - name: 'aten::empty', - calls: 1413, - host_duration: 12399, - device_duration: 0, - self_host_duration: 12399, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 257, - host_duration: 98, - device_duration: 0, - self_host_duration: 98, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 257, - host_duration: 7665, - device_duration: 0, - self_host_duration: 1689, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1344, - host_duration: 21137, - device_duration: 0, - self_host_duration: 2377, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 128, - host_duration: 364, - device_duration: 0, - self_host_duration: 361, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 128, - host_duration: 745, - device_duration: 0, - self_host_duration: 384, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 450, - host_duration: 527, - device_duration: 0, - self_host_duration: 527, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 192, - host_duration: 1050, - device_duration: 0, - self_host_duration: 869, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 576, - host_duration: 3689, - device_duration: 0, - self_host_duration: 3689, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 704, - host_duration: 8695, - device_duration: 0, - self_host_duration: 8695, - self_device_duration: 0, - }, - { - name: 'aten::_to_copy', - calls: 640, - host_duration: 18760, - device_duration: 0, - self_host_duration: 6122, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 64, - host_duration: 20349, - device_duration: 0, - self_host_duration: 17634, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 64, - host_duration: 562, - device_duration: 0, - self_host_duration: 487, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 64, - host_duration: 6658, - device_duration: 0, - self_host_duration: 6658, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 130, - host_duration: 1028, - device_duration: 0, - self_host_duration: 870, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 256, - host_duration: 142, - device_duration: 0, - self_host_duration: 129, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 256, - host_duration: 755, - device_duration: 0, - self_host_duration: 626, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 320, - host_duration: 168, - device_duration: 0, - self_host_duration: 168, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 320, - host_duration: 4922, - device_duration: 0, - self_host_duration: 4440, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 320, - host_duration: 9959, - device_duration: 0, - self_host_duration: 4339, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 320, - host_duration: 3848, - device_duration: 0, - self_host_duration: 2737, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 384, - host_duration: 209, - device_duration: 0, - self_host_duration: 209, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 384, - host_duration: 1398, - device_duration: 0, - self_host_duration: 1187, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 320, - host_duration: 2013, - device_duration: 0, - self_host_duration: 812, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 64, - host_duration: 7421, - device_duration: 0, - self_host_duration: 6234, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 2, - host_duration: 36, - device_duration: 0, - self_host_duration: 36, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 2, - host_duration: 19, - device_duration: 0, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 2, - host_duration: 4628, - device_duration: 0, - self_host_duration: 4566, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 2, - host_duration: 4649, - device_duration: 0, - self_host_duration: 21, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 2, - host_duration: 10884, - device_duration: 0, - self_host_duration: 5859, - self_device_duration: 0, - }, - ], - }, - path: '0-1', - }, - { - left: { - name: 'multiple nodes', - duration: 5170, - device_duration: 4402, - total_duration: 4402, - aggs: [ - { - name: 'aten::empty_strided', - calls: 2, - host_duration: 209, - device_duration: 0, - self_host_duration: 209, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 2, - host_duration: 4696, - device_duration: 4402, - self_host_duration: 93, - self_device_duration: 4402, - }, - { - name: 'aten::_to_copy', - calls: 2, - host_duration: 5111, - device_duration: 4402, - self_host_duration: 206, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2, - host_duration: 5170, - device_duration: 4402, - self_host_duration: 59, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'multiple nodes', - duration: 4681, - device_duration: 4350, - total_duration: 4350, - aggs: [ - { - name: 'aten::empty_strided', - calls: 2, - host_duration: 65, - device_duration: 0, - self_host_duration: 65, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 2, - host_duration: 4575, - device_duration: 4350, - self_host_duration: 26, - self_device_duration: 4350, - }, - { - name: 'aten::_to_copy', - calls: 2, - host_duration: 4670, - device_duration: 4350, - self_host_duration: 30, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2, - host_duration: 4681, - device_duration: 4350, - self_host_duration: 11, - self_device_duration: 0, - }, - ], - }, - path: '0-2', - }, - { - left: { - name: 'nn.Module: ResNet', - duration: 113664, - device_duration: 61356, - total_duration: 61356, - aggs: [ - { - name: 'aten::empty', - calls: 318, - host_duration: 14161, - device_duration: 0, - self_host_duration: 14161, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 53, - host_duration: 22091, - device_duration: 36599, - self_host_duration: 17567, - self_device_duration: 36599, - }, - { - name: 'aten::_convolution', - calls: 53, - host_duration: 25744, - device_duration: 36599, - self_host_duration: 3653, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 53, - host_duration: 27753, - device_duration: 36599, - self_host_duration: 2009, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 53, - host_duration: 29777, - device_duration: 36599, - self_host_duration: 2024, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 53, - host_duration: 6519, - device_duration: 54, - self_host_duration: 5666, - self_device_duration: 54, - }, - { - name: 'aten::empty_like', - calls: 53, - host_duration: 5624, - device_duration: 0, - self_host_duration: 2390, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 53, - host_duration: 826, - device_duration: 0, - self_host_duration: 826, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 53, - host_duration: 35818, - device_duration: 12974, - self_host_duration: 20557, - self_device_duration: 12974, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 53, - host_duration: 38324, - device_duration: 12974, - self_host_duration: 2506, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 53, - host_duration: 40105, - device_duration: 12974, - self_host_duration: 1781, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 49, - host_duration: 2702, - device_duration: 6002, - self_host_duration: 1935, - self_device_duration: 6002, - }, - { - name: 'aten::clamp_min_', - calls: 49, - host_duration: 4273, - device_duration: 6002, - self_host_duration: 1571, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 49, - host_duration: 8371, - device_duration: 6002, - self_host_duration: 4098, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 1, - host_duration: 230, - device_duration: 474, - self_host_duration: 212, - self_device_duration: 474, - }, - { - name: 'aten::max_pool2d', - calls: 1, - host_duration: 280, - device_duration: 474, - self_host_duration: 50, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 16, - host_duration: 1546, - device_duration: 5141, - self_host_duration: 1290, - self_device_duration: 5141, - }, - { - name: 'aten::mean', - calls: 1, - host_duration: 189, - device_duration: 69, - self_host_duration: 170, - self_device_duration: 69, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 1, - host_duration: 234, - device_duration: 69, - self_host_duration: 45, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 52, - device_duration: 0, - self_host_duration: 52, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 1, - host_duration: 106, - device_duration: 0, - self_host_duration: 54, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 2, - host_duration: 23, - device_duration: 0, - self_host_duration: 23, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 1, - host_duration: 55, - device_duration: 0, - self_host_duration: 41, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 1, - host_duration: 119, - device_duration: 0, - self_host_duration: 64, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 49, - device_duration: 0, - self_host_duration: 40, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 1, - host_duration: 404, - device_duration: 43, - self_host_duration: 302, - self_device_duration: 43, - }, - { - name: 'aten::linear', - calls: 1, - host_duration: 591, - device_duration: 43, - self_host_duration: 68, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: ResNet', - duration: 28725, - device_duration: 60899, - total_duration: 60899, - aggs: [ - { - name: 'aten::empty', - calls: 318, - host_duration: 2292, - device_duration: 0, - self_host_duration: 2292, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 53, - host_duration: 8713, - device_duration: 36205, - self_host_duration: 6819, - self_device_duration: 36205, - }, - { - name: 'aten::_convolution', - calls: 53, - host_duration: 9298, - device_duration: 36205, - self_host_duration: 585, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 53, - host_duration: 9653, - device_duration: 36205, - self_host_duration: 355, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 53, - host_duration: 9932, - device_duration: 36205, - self_host_duration: 279, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 53, - host_duration: 1897, - device_duration: 58, - self_host_duration: 1201, - self_device_duration: 58, - }, - { - name: 'aten::empty_like', - calls: 53, - host_duration: 933, - device_duration: 0, - self_host_duration: 284, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 53, - host_duration: 130, - device_duration: 0, - self_host_duration: 130, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 53, - host_duration: 5540, - device_duration: 12913, - self_host_duration: 2504, - self_device_duration: 12913, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 53, - host_duration: 5942, - device_duration: 12913, - self_host_duration: 402, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 53, - host_duration: 6219, - device_duration: 12913, - self_host_duration: 277, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 49, - host_duration: 1108, - device_duration: 6006, - self_host_duration: 523, - self_device_duration: 6006, - }, - { - name: 'aten::clamp_min_', - calls: 49, - host_duration: 1315, - device_duration: 6006, - self_host_duration: 207, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 49, - host_duration: 1939, - device_duration: 6006, - self_host_duration: 624, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 1, - host_duration: 53, - device_duration: 472, - self_host_duration: 38, - self_device_duration: 472, - }, - { - name: 'aten::max_pool2d', - calls: 1, - host_duration: 61, - device_duration: 472, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 16, - host_duration: 448, - device_duration: 5140, - self_host_duration: 268, - self_device_duration: 5140, - }, - { - name: 'aten::mean', - calls: 1, - host_duration: 53, - device_duration: 63, - self_host_duration: 39, - self_device_duration: 63, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 1, - host_duration: 59, - device_duration: 63, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 8, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 1, - host_duration: 15, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 2, - host_duration: 3, - device_duration: 0, - self_host_duration: 3, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 1, - host_duration: 8, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 1, - host_duration: 15, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 1, - host_duration: 173, - device_duration: 42, - self_host_duration: 123, - self_device_duration: 42, - }, - { - name: 'aten::linear', - calls: 1, - host_duration: 198, - device_duration: 42, - self_host_duration: 10, - self_device_duration: 0, - }, - ], - }, - path: '0-3', - }, - { - left: { - name: 'nn.Module: CrossEntropyLoss', - duration: 711, - device_duration: 11, - total_duration: 11, - aggs: [ - { - name: 'aten::to', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 1, - host_duration: 158, - device_duration: 7, - self_host_duration: 139, - self_device_duration: 7, - }, - { - name: 'aten::log_softmax', - calls: 1, - host_duration: 241, - device_duration: 7, - self_host_duration: 78, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 1, - host_duration: 256, - device_duration: 4, - self_host_duration: 233, - self_device_duration: 4, - }, - { - name: 'aten::nll_loss', - calls: 1, - host_duration: 290, - device_duration: 4, - self_host_duration: 34, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 1, - host_duration: 313, - device_duration: 4, - self_host_duration: 23, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 1, - host_duration: 614, - device_duration: 11, - self_host_duration: 60, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: CrossEntropyLoss', - duration: 156, - device_duration: 11, - total_duration: 11, - aggs: [ - { - name: 'aten::to', - calls: 1, - host_duration: 2, - device_duration: 0, - self_host_duration: 2, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 1, - host_duration: 42, - device_duration: 7, - self_host_duration: 28, - self_device_duration: 7, - }, - { - name: 'aten::log_softmax', - calls: 1, - host_duration: 54, - device_duration: 7, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 1, - host_duration: 0, - device_duration: 0, - self_host_duration: 0, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 1, - host_duration: 47, - device_duration: 4, - self_host_duration: 34, - self_device_duration: 4, - }, - { - name: 'aten::nll_loss', - calls: 1, - host_duration: 52, - device_duration: 4, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 1, - host_duration: 56, - device_duration: 4, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 1, - host_duration: 119, - device_duration: 11, - self_host_duration: 9, - self_device_duration: 0, - }, - ], - }, - path: '0-4', - }, - { - left: { - name: 'aten::zeros', - duration: 119, - device_duration: 0, - total_duration: 119, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 47, - device_duration: 0, - self_host_duration: 47, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 4, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 119, - device_duration: 0, - self_host_duration: 68, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::zeros', - duration: 17, - device_duration: 0, - total_duration: 17, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 8, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 2, - device_duration: 0, - self_host_duration: 2, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 17, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - ], - }, - path: '0-5', - }, - { - left: { - name: 'Optimizer.zero_grad#SGD.zero_grad', - duration: 22960, - device_duration: 142, - total_duration: 142, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 38, - device_duration: 0, - self_host_duration: 38, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 161, - host_duration: 7097, - device_duration: 142, - self_host_duration: 4914, - self_device_duration: 142, - }, - { - name: 'aten::zero_', - calls: 161, - host_duration: 14725, - device_duration: 142, - self_host_duration: 7628, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'Optimizer.zero_grad#SGD.zero_grad', - duration: 4075, - device_duration: 264, - total_duration: 264, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 161, - host_duration: 2036, - device_duration: 264, - self_host_duration: 909, - self_device_duration: 264, - }, - { - name: 'aten::zero_', - calls: 161, - host_duration: 2855, - device_duration: 264, - self_host_duration: 819, - self_device_duration: 0, - }, - ], - }, - path: '0-6', - }, - { - left: { - name: 'aten::ones_like', - duration: 253, - device_duration: 1, - total_duration: 1, - aggs: [ - { - name: 'aten::empty_strided', - calls: 1, - host_duration: 79, - device_duration: 0, - self_host_duration: 79, - self_device_duration: 0, - }, - { - name: 'aten::empty_like', - calls: 1, - host_duration: 126, - device_duration: 0, - self_host_duration: 47, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 50, - device_duration: 1, - self_host_duration: 35, - self_device_duration: 1, - }, - { - name: 'aten::ones_like', - calls: 1, - host_duration: 253, - device_duration: 1, - self_host_duration: 77, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::ones_like', - duration: 53, - device_duration: 1, - total_duration: 1, - aggs: [ - { - name: 'aten::empty_strided', - calls: 1, - host_duration: 18, - device_duration: 0, - self_host_duration: 18, - self_device_duration: 0, - }, - { - name: 'aten::empty_like', - calls: 1, - host_duration: 26, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 20, - device_duration: 1, - self_host_duration: 8, - self_device_duration: 1, - }, - { - name: 'aten::ones_like', - calls: 1, - host_duration: 53, - device_duration: 1, - self_host_duration: 7, - self_device_duration: 0, - }, - ], - }, - path: '0-7', - }, - { - left: { - name: 'nn.Module: CrossEntropyLoss.backward', - duration: 898, - device_duration: 13, - total_duration: 13, - aggs: [ - { - name: 'aten::fill_', - calls: 1, - host_duration: 69, - device_duration: 1, - self_host_duration: 43, - self_device_duration: 1, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 120, - device_duration: 1, - self_host_duration: 51, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 1, - host_duration: 304, - device_duration: 4, - self_host_duration: 168, - self_device_duration: 3, - }, - { - name: 'NllLossBackward0', - calls: 1, - host_duration: 368, - device_duration: 4, - self_host_duration: 64, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 1, - host_duration: 503, - device_duration: 4, - self_host_duration: 135, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 1, - host_duration: 127, - device_duration: 9, - self_host_duration: 105, - self_device_duration: 9, - }, - { - name: 'LogSoftmaxBackward0', - calls: 1, - host_duration: 207, - device_duration: 9, - self_host_duration: 80, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 1, - host_duration: 349, - device_duration: 9, - self_host_duration: 142, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: CrossEntropyLoss.backward', - duration: 214, - device_duration: 14, - total_duration: 14, - aggs: [ - { - name: 'aten::fill_', - calls: 1, - host_duration: 36, - device_duration: 2, - self_host_duration: 13, - self_device_duration: 2, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 45, - device_duration: 2, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 1, - host_duration: 99, - device_duration: 5, - self_host_duration: 43, - self_device_duration: 3, - }, - { - name: 'NllLossBackward0', - calls: 1, - host_duration: 112, - device_duration: 5, - self_host_duration: 13, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 1, - host_duration: 141, - device_duration: 5, - self_host_duration: 29, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 1, - host_duration: 35, - device_duration: 9, - self_host_duration: 21, - self_device_duration: 9, - }, - { - name: 'LogSoftmaxBackward0', - calls: 1, - host_duration: 46, - device_duration: 9, - self_host_duration: 11, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 1, - host_duration: 64, - device_duration: 9, - self_host_duration: 18, - self_device_duration: 0, - }, - ], - }, - path: '0-8', - }, - { - left: { - name: 'nn.Module: ResNet.backward', - duration: 180998, - device_duration: 123177, - total_duration: 123177, - aggs: [ - { - name: 'aten::as_strided', - calls: 5, - host_duration: 61, - device_duration: 0, - self_host_duration: 61, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 4, - host_duration: 226, - device_duration: 0, - self_host_duration: 180, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 4, - host_duration: 399, - device_duration: 0, - self_host_duration: 173, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 2, - host_duration: 345, - device_duration: 72, - self_host_duration: 282, - self_device_duration: 72, - }, - { - name: 'AddmmBackward0', - calls: 1, - host_duration: 854, - device_duration: 72, - self_host_duration: 208, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 1, - host_duration: 173, - device_duration: 8, - self_host_duration: 153, - self_device_duration: 8, - }, - { - name: 'aten::view', - calls: 54, - host_duration: 971, - device_duration: 0, - self_host_duration: 971, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 1, - host_duration: 1333, - device_duration: 80, - self_host_duration: 271, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 161, - host_duration: 12621, - device_duration: 501, - self_host_duration: 9839, - self_device_duration: 501, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 20767, - device_duration: 501, - self_host_duration: 8146, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 35735, - device_duration: 501, - self_host_duration: 14968, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 1, - host_duration: 128, - device_duration: 0, - self_host_duration: 30, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 1, - host_duration: 197, - device_duration: 0, - self_host_duration: 69, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 31, - device_duration: 0, - self_host_duration: 31, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 1, - host_duration: 79, - device_duration: 0, - self_host_duration: 48, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 1, - host_duration: 131, - device_duration: 0, - self_host_duration: 52, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 1, - host_duration: 197, - device_duration: 0, - self_host_duration: 66, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 84, - device_duration: 0, - self_host_duration: 69, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 1, - host_duration: 289, - device_duration: 38, - self_host_duration: 267, - self_device_duration: 38, - }, - { - name: 'MeanBackward1', - calls: 1, - host_duration: 489, - device_duration: 38, - self_host_duration: 110, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 1, - host_duration: 592, - device_duration: 38, - self_host_duration: 103, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 49, - host_duration: 6958, - device_duration: 8972, - self_host_duration: 6094, - self_device_duration: 8972, - }, - { - name: 'ReluBackward0', - calls: 49, - host_duration: 10647, - device_duration: 8972, - self_host_duration: 3689, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 49, - host_duration: 16826, - device_duration: 8972, - self_host_duration: 6179, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 16, - host_duration: 129, - device_duration: 0, - self_host_duration: 129, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 16, - host_duration: 1301, - device_duration: 0, - self_host_duration: 1172, - self_device_duration: 0, - }, - { - name: 'aten::empty', - calls: 370, - host_duration: 20319, - device_duration: 0, - self_host_duration: 20319, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 53, - host_duration: 31300, - device_duration: 22267, - self_host_duration: 18144, - self_device_duration: 22267, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 53, - host_duration: 34805, - device_duration: 22267, - self_host_duration: 3505, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 53, - host_duration: 44607, - device_duration: 22267, - self_host_duration: 9802, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 52, - host_duration: 20324, - device_duration: 38733, - self_host_duration: 15252, - self_device_duration: 38733, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 53, - host_duration: 21997, - device_duration: 45837, - self_host_duration: 13786, - self_device_duration: 45837, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 53, - host_duration: 50059, - device_duration: 84570, - self_host_duration: 7738, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 53, - host_duration: 53558, - device_duration: 84570, - self_host_duration: 3499, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 53, - host_duration: 64252, - device_duration: 89775, - self_host_duration: 8462, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 16, - host_duration: 2232, - device_duration: 5205, - self_host_duration: 1944, - self_device_duration: 5205, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 61, - device_duration: 230, - self_host_duration: 44, - self_device_duration: 230, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 104, - device_duration: 230, - self_host_duration: 43, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 1, - host_duration: 246, - device_duration: 1544, - self_host_duration: 128, - self_device_duration: 1314, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 304, - device_duration: 1544, - self_host_duration: 58, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 425, - device_duration: 1544, - self_host_duration: 121, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: ResNet.backward', - duration: 43714, - device_duration: 120604, - total_duration: 120604, - aggs: [ - { - name: 'aten::as_strided', - calls: 5, - host_duration: 9, - device_duration: 0, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 4, - host_duration: 38, - device_duration: 0, - self_host_duration: 31, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 4, - host_duration: 59, - device_duration: 0, - self_host_duration: 21, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 2, - host_duration: 139, - device_duration: 67, - self_host_duration: 90, - self_device_duration: 67, - }, - { - name: 'AddmmBackward0', - calls: 1, - host_duration: 210, - device_duration: 67, - self_host_duration: 23, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 1, - host_duration: 47, - device_duration: 7, - self_host_duration: 32, - self_device_duration: 7, - }, - { - name: 'aten::view', - calls: 54, - host_duration: 166, - device_duration: 0, - self_host_duration: 166, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 1, - host_duration: 299, - device_duration: 74, - self_host_duration: 37, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 161, - host_duration: 4087, - device_duration: 534, - self_host_duration: 2037, - self_device_duration: 534, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 5134, - device_duration: 534, - self_host_duration: 1047, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 7473, - device_duration: 534, - self_host_duration: 2339, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 1, - host_duration: 14, - device_duration: 0, - self_host_duration: 3, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 1, - host_duration: 21, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 1, - host_duration: 10, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 1, - host_duration: 14, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 1, - host_duration: 21, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 9, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 1, - host_duration: 70, - device_duration: 38, - self_host_duration: 49, - self_device_duration: 38, - }, - { - name: 'MeanBackward1', - calls: 1, - host_duration: 89, - device_duration: 38, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 1, - host_duration: 102, - device_duration: 38, - self_host_duration: 13, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 49, - host_duration: 1789, - device_duration: 9015, - self_host_duration: 1158, - self_device_duration: 9015, - }, - { - name: 'ReluBackward0', - calls: 49, - host_duration: 2237, - device_duration: 9015, - self_host_duration: 448, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 49, - host_duration: 3144, - device_duration: 9015, - self_host_duration: 907, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 16, - host_duration: 12, - device_duration: 0, - self_host_duration: 12, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 16, - host_duration: 126, - device_duration: 0, - self_host_duration: 114, - self_device_duration: 0, - }, - { - name: 'aten::empty', - calls: 370, - host_duration: 3292, - device_duration: 0, - self_host_duration: 3292, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 53, - host_duration: 4896, - device_duration: 22157, - self_host_duration: 2136, - self_device_duration: 22157, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 53, - host_duration: 5495, - device_duration: 22157, - self_host_duration: 599, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 53, - host_duration: 7289, - device_duration: 22157, - self_host_duration: 1794, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 52, - host_duration: 9468, - device_duration: 37714, - self_host_duration: 7052, - self_device_duration: 37714, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 53, - host_duration: 8906, - device_duration: 44342, - self_host_duration: 5723, - self_device_duration: 44342, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 53, - host_duration: 19611, - device_duration: 82056, - self_host_duration: 1237, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 53, - host_duration: 20205, - device_duration: 82056, - self_host_duration: 594, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 53, - host_duration: 22185, - device_duration: 87283, - self_host_duration: 1386, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 16, - host_duration: 594, - device_duration: 5227, - self_host_duration: 380, - self_device_duration: 5227, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 24, - device_duration: 230, - self_host_duration: 11, - self_device_duration: 230, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 32, - device_duration: 230, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 1, - host_duration: 72, - device_duration: 1503, - self_host_duration: 31, - self_device_duration: 1273, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 82, - device_duration: 1503, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 103, - device_duration: 1503, - self_host_duration: 21, - self_device_duration: 0, - }, - ], - }, - path: '0-9', - }, - { - left: { - name: 'aten::zeros', - duration: 154, - device_duration: 0, - total_duration: 154, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 75, - device_duration: 0, - self_host_duration: 75, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 4, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 154, - device_duration: 0, - self_host_duration: 75, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::zeros', - duration: 42, - device_duration: 0, - total_duration: 42, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 32, - device_duration: 0, - self_host_duration: 32, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 42, - device_duration: 0, - self_host_duration: 9, - self_device_duration: 0, - }, - ], - }, - path: '0-10', - }, - { - left: { - name: 'Optimizer.step#SGD.step', - duration: 75880, - device_duration: 1289, - total_duration: 1289, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 40, - device_duration: 0, - self_host_duration: 40, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 161, - host_duration: 11873, - device_duration: 396, - self_host_duration: 9505, - self_device_duration: 396, - }, - { - name: 'aten::add_', - calls: 322, - host_duration: 22327, - device_duration: 893, - self_host_duration: 17668, - self_device_duration: 893, - }, - ], - }, - right: { - name: 'Optimizer.step#SGD.step', - duration: 16441, - device_duration: 1305, - total_duration: 1305, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 161, - host_duration: 3395, - device_duration: 399, - self_host_duration: 1806, - self_device_duration: 399, - }, - { - name: 'aten::add_', - calls: 322, - host_duration: 6217, - device_duration: 906, - self_host_duration: 3246, - self_device_duration: 906, - }, - ], - }, - path: '0-11', - }, - { - left: { - name: 'multiple nodes', - duration: 145, - device_duration: 0, - total_duration: 145, - aggs: [ - { - name: 'aten::empty', - calls: 2, - host_duration: 79, - device_duration: 0, - self_host_duration: 79, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 4, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 106, - device_duration: 0, - self_host_duration: 62, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'multiple nodes', - duration: 15, - device_duration: 0, - total_duration: 15, - aggs: [ - { - name: 'aten::empty', - calls: 2, - host_duration: 10, - device_duration: 0, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 0, - device_duration: 0, - self_host_duration: 0, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 9, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - ], - }, - path: '0-12', - }, - { - left: { - name: 'enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__', - duration: 1679463, - device_duration: 0, - total_duration: 1679463, - aggs: [ - { - name: 'aten::empty', - calls: 1413, - host_duration: 53837, - device_duration: 0, - self_host_duration: 53837, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 257, - host_duration: 955, - device_duration: 0, - self_host_duration: 955, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 257, - host_duration: 26673, - device_duration: 0, - self_host_duration: 16083, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1344, - host_duration: 824006, - device_duration: 0, - self_host_duration: 18525, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 128, - host_duration: 2188, - device_duration: 0, - self_host_duration: 2188, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 128, - host_duration: 5295, - device_duration: 0, - self_host_duration: 3107, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 450, - host_duration: 4123, - device_duration: 0, - self_host_duration: 4123, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 192, - host_duration: 9590, - device_duration: 0, - self_host_duration: 8097, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 576, - host_duration: 24764, - device_duration: 0, - self_host_duration: 24764, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 704, - host_duration: 728608, - device_duration: 0, - self_host_duration: 728608, - self_device_duration: 0, - }, - { - name: 'aten::_to_copy', - calls: 640, - host_duration: 805481, - device_duration: 0, - self_host_duration: 51350, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 64, - host_duration: 236448, - device_duration: 0, - self_host_duration: 216887, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 64, - host_duration: 4682, - device_duration: 0, - self_host_duration: 4092, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 64, - host_duration: 15283, - device_duration: 0, - self_host_duration: 15283, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 130, - host_duration: 8844, - device_duration: 0, - self_host_duration: 7513, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 256, - host_duration: 2102, - device_duration: 0, - self_host_duration: 2102, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 256, - host_duration: 7286, - device_duration: 0, - self_host_duration: 5184, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 320, - host_duration: 850, - device_duration: 0, - self_host_duration: 850, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 320, - host_duration: 43219, - device_duration: 0, - self_host_duration: 39305, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 320, - host_duration: 92093, - device_duration: 0, - self_host_duration: 37961, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 320, - host_duration: 35770, - device_duration: 0, - self_host_duration: 24869, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 384, - host_duration: 2481, - device_duration: 0, - self_host_duration: 2481, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 384, - host_duration: 10547, - device_duration: 0, - self_host_duration: 8066, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 320, - host_duration: 14029, - device_duration: 0, - self_host_duration: 5364, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 64, - host_duration: 79760, - device_duration: 0, - self_host_duration: 68841, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 2, - host_duration: 121, - device_duration: 0, - self_host_duration: 121, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 2, - host_duration: 138, - device_duration: 0, - self_host_duration: 48, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 2, - host_duration: 41467, - device_duration: 0, - self_host_duration: 41176, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 2, - host_duration: 41608, - device_duration: 0, - self_host_duration: 141, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 2, - host_duration: 49080, - device_duration: 0, - self_host_duration: 2720, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__', - duration: 123490, - device_duration: 0, - total_duration: 123490, - aggs: [ - { - name: 'aten::empty', - calls: 1413, - host_duration: 6528, - device_duration: 0, - self_host_duration: 6528, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 257, - host_duration: 94, - device_duration: 0, - self_host_duration: 94, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 257, - host_duration: 2448, - device_duration: 0, - self_host_duration: 1214, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1344, - host_duration: 16544, - device_duration: 0, - self_host_duration: 1856, - self_device_duration: 0, - }, - { - name: 'detach', - calls: 128, - host_duration: 337, - device_duration: 0, - self_host_duration: 337, - self_device_duration: 0, - }, - { - name: 'aten::detach', - calls: 128, - host_duration: 629, - device_duration: 0, - self_host_duration: 292, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 450, - host_duration: 464, - device_duration: 0, - self_host_duration: 464, - self_device_duration: 0, - }, - { - name: 'aten::unsqueeze', - calls: 192, - host_duration: 1024, - device_duration: 0, - self_host_duration: 854, - self_device_duration: 0, - }, - { - name: 'aten::empty_strided', - calls: 576, - host_duration: 3009, - device_duration: 0, - self_host_duration: 3009, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 704, - host_duration: 7419, - device_duration: 0, - self_host_duration: 7419, - self_device_duration: 0, - }, - { - name: 'aten::_to_copy', - calls: 640, - host_duration: 14688, - device_duration: 0, - self_host_duration: 4039, - self_device_duration: 0, - }, - { - name: 'aten::upsample_bilinear2d', - calls: 64, - host_duration: 31439, - device_duration: 0, - self_host_duration: 29154, - self_device_duration: 0, - }, - { - name: 'aten::squeeze', - calls: 64, - host_duration: 473, - device_duration: 0, - self_host_duration: 408, - self_device_duration: 0, - }, - { - name: 'aten::round', - calls: 64, - host_duration: 4416, - device_duration: 0, - self_host_duration: 4416, - self_device_duration: 0, - }, - { - name: 'aten::slice', - calls: 130, - host_duration: 864, - device_duration: 0, - self_host_duration: 730, - self_device_duration: 0, - }, - { - name: 'detach_', - calls: 256, - host_duration: 136, - device_duration: 0, - self_host_duration: 115, - self_device_duration: 0, - }, - { - name: 'aten::detach_', - calls: 256, - host_duration: 586, - device_duration: 0, - self_host_duration: 471, - self_device_duration: 0, - }, - { - name: 'aten::result_type', - calls: 320, - host_duration: 149, - device_duration: 0, - self_host_duration: 149, - self_device_duration: 0, - }, - { - name: 'aten::pow', - calls: 320, - host_duration: 3935, - device_duration: 0, - self_host_duration: 3519, - self_device_duration: 0, - }, - { - name: 'aten::sub', - calls: 320, - host_duration: 7881, - device_duration: 0, - self_host_duration: 3349, - self_device_duration: 0, - }, - { - name: 'aten::gt', - calls: 320, - host_duration: 3055, - device_duration: 0, - self_host_duration: 2164, - self_device_duration: 0, - }, - { - name: 'aten::_local_scalar_dense', - calls: 384, - host_duration: 186, - device_duration: 0, - self_host_duration: 186, - self_device_duration: 0, - }, - { - name: 'aten::item', - calls: 384, - host_duration: 1134, - device_duration: 0, - self_host_duration: 943, - self_device_duration: 0, - }, - { - name: 'aten::is_nonzero', - calls: 320, - host_duration: 1588, - device_duration: 0, - self_host_duration: 615, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 64, - host_duration: 4153, - device_duration: 0, - self_host_duration: 3203, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 2, - host_duration: 42, - device_duration: 0, - self_host_duration: 42, - self_device_duration: 0, - }, - { - name: 'aten::narrow', - calls: 2, - host_duration: 18, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::_cat', - calls: 2, - host_duration: 4613, - device_duration: 0, - self_host_duration: 4547, - self_device_duration: 0, - }, - { - name: 'aten::cat', - calls: 2, - host_duration: 4637, - device_duration: 0, - self_host_duration: 24, - self_device_duration: 0, - }, - { - name: 'aten::stack', - calls: 2, - host_duration: 5311, - device_duration: 0, - self_host_duration: 246, - self_device_duration: 0, - }, - ], - }, - path: '0-13', - }, - { - left: { - name: 'multiple nodes', - duration: 5185, - device_duration: 4394, - total_duration: 4394, - aggs: [ - { - name: 'aten::empty_strided', - calls: 2, - host_duration: 203, - device_duration: 0, - self_host_duration: 203, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 2, - host_duration: 4687, - device_duration: 4394, - self_host_duration: 94, - self_device_duration: 4394, - }, - { - name: 'aten::_to_copy', - calls: 2, - host_duration: 5113, - device_duration: 4394, - self_host_duration: 223, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2, - host_duration: 5185, - device_duration: 4394, - self_host_duration: 72, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'multiple nodes', - duration: 4664, - device_duration: 4334, - total_duration: 4334, - aggs: [ - { - name: 'aten::empty_strided', - calls: 2, - host_duration: 60, - device_duration: 0, - self_host_duration: 60, - self_device_duration: 0, - }, - { - name: 'aten::copy_', - calls: 2, - host_duration: 4559, - device_duration: 4334, - self_host_duration: 26, - self_device_duration: 4334, - }, - { - name: 'aten::_to_copy', - calls: 2, - host_duration: 4655, - device_duration: 4334, - self_host_duration: 36, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 2, - host_duration: 4664, - device_duration: 4334, - self_host_duration: 9, - self_device_duration: 0, - }, - ], - }, - path: '0-14', - }, - { - left: { - name: 'nn.Module: ResNet', - duration: 112761, - device_duration: 59848, - total_duration: 59848, - aggs: [ - { - name: 'aten::empty', - calls: 318, - host_duration: 13992, - device_duration: 0, - self_host_duration: 13992, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 53, - host_duration: 21952, - device_duration: 35233, - self_host_duration: 17460, - self_device_duration: 35233, - }, - { - name: 'aten::_convolution', - calls: 53, - host_duration: 25568, - device_duration: 35233, - self_host_duration: 3616, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 53, - host_duration: 27534, - device_duration: 35233, - self_host_duration: 1966, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 53, - host_duration: 29546, - device_duration: 35233, - self_host_duration: 2012, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 53, - host_duration: 6523, - device_duration: 53, - self_host_duration: 5669, - self_device_duration: 53, - }, - { - name: 'aten::empty_like', - calls: 53, - host_duration: 5605, - device_duration: 0, - self_host_duration: 2378, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 53, - host_duration: 829, - device_duration: 0, - self_host_duration: 829, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 53, - host_duration: 35510, - device_duration: 12828, - self_host_duration: 20387, - self_device_duration: 12828, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 53, - host_duration: 38030, - device_duration: 12828, - self_host_duration: 2520, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 53, - host_duration: 39727, - device_duration: 12828, - self_host_duration: 1697, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 49, - host_duration: 2715, - device_duration: 5998, - self_host_duration: 1950, - self_device_duration: 5998, - }, - { - name: 'aten::clamp_min_', - calls: 49, - host_duration: 4264, - device_duration: 5998, - self_host_duration: 1549, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 49, - host_duration: 8337, - device_duration: 5998, - self_host_duration: 4073, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 1, - host_duration: 212, - device_duration: 466, - self_host_duration: 193, - self_device_duration: 466, - }, - { - name: 'aten::max_pool2d', - calls: 1, - host_duration: 262, - device_duration: 466, - self_host_duration: 50, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 16, - host_duration: 1553, - device_duration: 5165, - self_host_duration: 1297, - self_device_duration: 5165, - }, - { - name: 'aten::mean', - calls: 1, - host_duration: 187, - device_duration: 64, - self_host_duration: 169, - self_device_duration: 64, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 1, - host_duration: 231, - device_duration: 64, - self_host_duration: 44, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 52, - device_duration: 0, - self_host_duration: 52, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 1, - host_duration: 101, - device_duration: 0, - self_host_duration: 49, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 2, - host_duration: 21, - device_duration: 0, - self_host_duration: 21, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 1, - host_duration: 51, - device_duration: 0, - self_host_duration: 40, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 1, - host_duration: 120, - device_duration: 0, - self_host_duration: 69, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 49, - device_duration: 0, - self_host_duration: 39, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 1, - host_duration: 405, - device_duration: 41, - self_host_duration: 302, - self_device_duration: 41, - }, - { - name: 'aten::linear', - calls: 1, - host_duration: 594, - device_duration: 41, - self_host_duration: 69, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: ResNet', - duration: 28459, - device_duration: 59832, - total_duration: 59832, - aggs: [ - { - name: 'aten::empty', - calls: 318, - host_duration: 2234, - device_duration: 0, - self_host_duration: 2234, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution', - calls: 53, - host_duration: 8644, - device_duration: 35209, - self_host_duration: 6782, - self_device_duration: 35209, - }, - { - name: 'aten::_convolution', - calls: 53, - host_duration: 9216, - device_duration: 35209, - self_host_duration: 572, - self_device_duration: 0, - }, - { - name: 'aten::convolution', - calls: 53, - host_duration: 9532, - device_duration: 35209, - self_host_duration: 316, - self_device_duration: 0, - }, - { - name: 'aten::conv2d', - calls: 53, - host_duration: 9818, - device_duration: 35209, - self_host_duration: 286, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 53, - host_duration: 1898, - device_duration: 55, - self_host_duration: 1202, - self_device_duration: 55, - }, - { - name: 'aten::empty_like', - calls: 53, - host_duration: 941, - device_duration: 0, - self_host_duration: 300, - self_device_duration: 0, - }, - { - name: 'aten::view', - calls: 53, - host_duration: 137, - device_duration: 0, - self_host_duration: 137, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm', - calls: 53, - host_duration: 5543, - device_duration: 12824, - self_host_duration: 2527, - self_device_duration: 12824, - }, - { - name: 'aten::_batch_norm_impl_index', - calls: 53, - host_duration: 5914, - device_duration: 12824, - self_host_duration: 371, - self_device_duration: 0, - }, - { - name: 'aten::batch_norm', - calls: 53, - host_duration: 6167, - device_duration: 12824, - self_host_duration: 253, - self_device_duration: 0, - }, - { - name: 'aten::clamp_min', - calls: 49, - host_duration: 1081, - device_duration: 6004, - self_host_duration: 507, - self_device_duration: 6004, - }, - { - name: 'aten::clamp_min_', - calls: 49, - host_duration: 1299, - device_duration: 6004, - self_host_duration: 218, - self_device_duration: 0, - }, - { - name: 'aten::relu_', - calls: 49, - host_duration: 1941, - device_duration: 6004, - self_host_duration: 642, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices', - calls: 1, - host_duration: 59, - device_duration: 466, - self_host_duration: 44, - self_device_duration: 466, - }, - { - name: 'aten::max_pool2d', - calls: 1, - host_duration: 66, - device_duration: 466, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 16, - host_duration: 443, - device_duration: 5169, - self_host_duration: 267, - self_device_duration: 5169, - }, - { - name: 'aten::mean', - calls: 1, - host_duration: 51, - device_duration: 63, - self_host_duration: 37, - self_device_duration: 63, - }, - { - name: 'aten::adaptive_avg_pool2d', - calls: 1, - host_duration: 58, - device_duration: 63, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 8, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::flatten', - calls: 1, - host_duration: 16, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::as_strided', - calls: 2, - host_duration: 3, - device_duration: 0, - self_host_duration: 3, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 1, - host_duration: 10, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 1, - host_duration: 18, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::addmm', - calls: 1, - host_duration: 161, - device_duration: 42, - self_host_duration: 111, - self_device_duration: 42, - }, - { - name: 'aten::linear', - calls: 1, - host_duration: 188, - device_duration: 42, - self_host_duration: 9, - self_device_duration: 0, - }, - ], - }, - path: '0-15', - }, - { - left: { - name: 'nn.Module: CrossEntropyLoss', - duration: 712, - device_duration: 11, - total_duration: 11, - aggs: [ - { - name: 'aten::to', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 1, - host_duration: 150, - device_duration: 7, - self_host_duration: 132, - self_device_duration: 7, - }, - { - name: 'aten::log_softmax', - calls: 1, - host_duration: 231, - device_duration: 7, - self_host_duration: 75, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 1, - host_duration: 266, - device_duration: 4, - self_host_duration: 243, - self_device_duration: 4, - }, - { - name: 'aten::nll_loss', - calls: 1, - host_duration: 300, - device_duration: 4, - self_host_duration: 34, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 1, - host_duration: 328, - device_duration: 4, - self_host_duration: 28, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 1, - host_duration: 620, - device_duration: 11, - self_host_duration: 61, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: CrossEntropyLoss', - duration: 156, - device_duration: 11, - total_duration: 11, - aggs: [ - { - name: 'aten::to', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax', - calls: 1, - host_duration: 41, - device_duration: 7, - self_host_duration: 27, - self_device_duration: 7, - }, - { - name: 'aten::log_softmax', - calls: 1, - host_duration: 52, - device_duration: 7, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'aten::resize_', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_forward', - calls: 1, - host_duration: 49, - device_duration: 4, - self_host_duration: 34, - self_device_duration: 4, - }, - { - name: 'aten::nll_loss', - calls: 1, - host_duration: 53, - device_duration: 4, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_nd', - calls: 1, - host_duration: 57, - device_duration: 4, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::cross_entropy_loss', - calls: 1, - host_duration: 124, - device_duration: 11, - self_host_duration: 15, - self_device_duration: 0, - }, - ], - }, - path: '0-16', - }, - { - left: { - name: 'aten::zeros', - duration: 109, - device_duration: 0, - total_duration: 109, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 39, - device_duration: 0, - self_host_duration: 39, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 109, - device_duration: 0, - self_host_duration: 65, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::zeros', - duration: 23, - device_duration: 0, - total_duration: 23, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 13, - device_duration: 0, - self_host_duration: 13, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 23, - device_duration: 0, - self_host_duration: 9, - self_device_duration: 0, - }, - ], - }, - path: '0-17', - }, - { - left: { - name: 'Optimizer.zero_grad#SGD.zero_grad', - duration: 24374, - device_duration: 132, - total_duration: 132, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 44, - device_duration: 0, - self_host_duration: 44, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 161, - host_duration: 7104, - device_duration: 132, - self_host_duration: 4941, - self_device_duration: 132, - }, - { - name: 'aten::zero_', - calls: 161, - host_duration: 14806, - device_duration: 132, - self_host_duration: 7702, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'Optimizer.zero_grad#SGD.zero_grad', - duration: 4461, - device_duration: 137, - total_duration: 137, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 6, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 161, - host_duration: 1945, - device_duration: 137, - self_host_duration: 878, - self_device_duration: 137, - }, - { - name: 'aten::zero_', - calls: 161, - host_duration: 2805, - device_duration: 137, - self_host_duration: 860, - self_device_duration: 0, - }, - ], - }, - path: '0-18', - }, - { - left: { - name: 'aten::ones_like', - duration: 263, - device_duration: 1, - total_duration: 1, - aggs: [ - { - name: 'aten::empty_strided', - calls: 1, - host_duration: 99, - device_duration: 0, - self_host_duration: 99, - self_device_duration: 0, - }, - { - name: 'aten::empty_like', - calls: 1, - host_duration: 149, - device_duration: 0, - self_host_duration: 50, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 49, - device_duration: 1, - self_host_duration: 34, - self_device_duration: 1, - }, - { - name: 'aten::ones_like', - calls: 1, - host_duration: 263, - device_duration: 1, - self_host_duration: 65, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::ones_like', - duration: 51, - device_duration: 1, - total_duration: 1, - aggs: [ - { - name: 'aten::empty_strided', - calls: 1, - host_duration: 18, - device_duration: 0, - self_host_duration: 18, - self_device_duration: 0, - }, - { - name: 'aten::empty_like', - calls: 1, - host_duration: 24, - device_duration: 0, - self_host_duration: 6, - self_device_duration: 0, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 20, - device_duration: 1, - self_host_duration: 8, - self_device_duration: 1, - }, - { - name: 'aten::ones_like', - calls: 1, - host_duration: 51, - device_duration: 1, - self_host_duration: 7, - self_device_duration: 0, - }, - ], - }, - path: '0-19', - }, - { - left: { - name: 'nn.Module: CrossEntropyLoss.backward', - duration: 845, - device_duration: 13, - total_duration: 13, - aggs: [ - { - name: 'aten::fill_', - calls: 1, - host_duration: 58, - device_duration: 1, - self_host_duration: 36, - self_device_duration: 1, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 112, - device_duration: 1, - self_host_duration: 54, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 1, - host_duration: 269, - device_duration: 4, - self_host_duration: 142, - self_device_duration: 3, - }, - { - name: 'NllLossBackward0', - calls: 1, - host_duration: 406, - device_duration: 4, - self_host_duration: 137, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 1, - host_duration: 522, - device_duration: 4, - self_host_duration: 116, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 1, - host_duration: 109, - device_duration: 9, - self_host_duration: 91, - self_device_duration: 9, - }, - { - name: 'LogSoftmaxBackward0', - calls: 1, - host_duration: 178, - device_duration: 9, - self_host_duration: 69, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 1, - host_duration: 283, - device_duration: 9, - self_host_duration: 105, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: CrossEntropyLoss.backward', - duration: 283, - device_duration: 13, - total_duration: 13, - aggs: [ - { - name: 'aten::fill_', - calls: 1, - host_duration: 33, - device_duration: 1, - self_host_duration: 12, - self_device_duration: 1, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 41, - device_duration: 1, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::nll_loss_backward', - calls: 1, - host_duration: 93, - device_duration: 4, - self_host_duration: 41, - self_device_duration: 3, - }, - { - name: 'NllLossBackward0', - calls: 1, - host_duration: 185, - device_duration: 4, - self_host_duration: 92, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: NllLossBackward0', - calls: 1, - host_duration: 211, - device_duration: 4, - self_host_duration: 26, - self_device_duration: 0, - }, - { - name: 'aten::_log_softmax_backward_data', - calls: 1, - host_duration: 36, - device_duration: 9, - self_host_duration: 22, - self_device_duration: 9, - }, - { - name: 'LogSoftmaxBackward0', - calls: 1, - host_duration: 45, - device_duration: 9, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: LogSoftmaxBackward0', - calls: 1, - host_duration: 62, - device_duration: 9, - self_host_duration: 17, - self_device_duration: 0, - }, - ], - }, - path: '0-20', - }, - { - left: { - name: 'nn.Module: ResNet.backward', - duration: 180218, - device_duration: 120676, - total_duration: 120676, - aggs: [ - { - name: 'aten::as_strided', - calls: 5, - host_duration: 67, - device_duration: 0, - self_host_duration: 67, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 4, - host_duration: 255, - device_duration: 0, - self_host_duration: 204, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 4, - host_duration: 430, - device_duration: 0, - self_host_duration: 175, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 2, - host_duration: 323, - device_duration: 68, - self_host_duration: 265, - self_device_duration: 68, - }, - { - name: 'AddmmBackward0', - calls: 1, - host_duration: 844, - device_duration: 68, - self_host_duration: 209, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 1, - host_duration: 197, - device_duration: 7, - self_host_duration: 175, - self_device_duration: 7, - }, - { - name: 'aten::view', - calls: 54, - host_duration: 963, - device_duration: 0, - self_host_duration: 963, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 1, - host_duration: 1377, - device_duration: 75, - self_host_duration: 296, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 161, - host_duration: 12404, - device_duration: 496, - self_host_duration: 9659, - self_device_duration: 496, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 20417, - device_duration: 496, - self_host_duration: 8013, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 35211, - device_duration: 496, - self_host_duration: 14794, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 1, - host_duration: 152, - device_duration: 0, - self_host_duration: 34, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 1, - host_duration: 231, - device_duration: 0, - self_host_duration: 79, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 35, - device_duration: 0, - self_host_duration: 35, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 1, - host_duration: 91, - device_duration: 0, - self_host_duration: 56, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 1, - host_duration: 133, - device_duration: 0, - self_host_duration: 42, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 1, - host_duration: 205, - device_duration: 0, - self_host_duration: 72, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 95, - device_duration: 0, - self_host_duration: 79, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1, - host_duration: 7, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 1, - host_duration: 324, - device_duration: 37, - self_host_duration: 301, - self_device_duration: 37, - }, - { - name: 'MeanBackward1', - calls: 1, - host_duration: 547, - device_duration: 37, - self_host_duration: 121, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 1, - host_duration: 662, - device_duration: 37, - self_host_duration: 115, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 49, - host_duration: 6880, - device_duration: 9012, - self_host_duration: 6037, - self_device_duration: 9012, - }, - { - name: 'ReluBackward0', - calls: 49, - host_duration: 10536, - device_duration: 9012, - self_host_duration: 3656, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 49, - host_duration: 16666, - device_duration: 9012, - self_host_duration: 6130, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 16, - host_duration: 122, - device_duration: 0, - self_host_duration: 122, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 16, - host_duration: 1278, - device_duration: 0, - self_host_duration: 1156, - self_device_duration: 0, - }, - { - name: 'aten::empty', - calls: 370, - host_duration: 21126, - device_duration: 0, - self_host_duration: 21126, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 53, - host_duration: 30875, - device_duration: 22166, - self_host_duration: 17909, - self_device_duration: 22166, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 53, - host_duration: 34355, - device_duration: 22166, - self_host_duration: 3480, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 53, - host_duration: 44006, - device_duration: 22166, - self_host_duration: 9651, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 52, - host_duration: 20496, - device_duration: 37887, - self_host_duration: 15516, - self_device_duration: 37887, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 53, - host_duration: 22878, - device_duration: 44271, - self_host_duration: 13672, - self_device_duration: 44271, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 53, - host_duration: 50961, - device_duration: 82158, - self_host_duration: 7587, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 53, - host_duration: 54406, - device_duration: 82158, - self_host_duration: 3445, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 53, - host_duration: 64877, - device_duration: 87386, - self_host_duration: 8284, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 16, - host_duration: 2187, - device_duration: 5228, - self_host_duration: 1909, - self_device_duration: 5228, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 53, - device_duration: 230, - self_host_duration: 36, - self_device_duration: 230, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 96, - device_duration: 230, - self_host_duration: 43, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 1, - host_duration: 237, - device_duration: 1504, - self_host_duration: 129, - self_device_duration: 1274, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 295, - device_duration: 1504, - self_host_duration: 58, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 411, - device_duration: 1504, - self_host_duration: 116, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'nn.Module: ResNet.backward', - duration: 45132, - device_duration: 121137, - total_duration: 121137, - aggs: [ - { - name: 'aten::as_strided', - calls: 5, - host_duration: 7, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::transpose', - calls: 4, - host_duration: 29, - device_duration: 0, - self_host_duration: 23, - self_device_duration: 0, - }, - { - name: 'aten::t', - calls: 4, - host_duration: 53, - device_duration: 0, - self_host_duration: 24, - self_device_duration: 0, - }, - { - name: 'aten::mm', - calls: 2, - host_duration: 144, - device_duration: 67, - self_host_duration: 96, - self_device_duration: 67, - }, - { - name: 'AddmmBackward0', - calls: 1, - host_duration: 208, - device_duration: 67, - self_host_duration: 24, - self_device_duration: 0, - }, - { - name: 'aten::sum', - calls: 1, - host_duration: 45, - device_duration: 7, - self_host_duration: 30, - self_device_duration: 7, - }, - { - name: 'aten::view', - calls: 54, - host_duration: 163, - device_duration: 0, - self_host_duration: 163, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddmmBackward0', - calls: 1, - host_duration: 295, - device_duration: 74, - self_host_duration: 38, - self_device_duration: 0, - }, - { - name: 'aten::add_', - calls: 161, - host_duration: 4103, - device_duration: 535, - self_host_duration: 2037, - self_device_duration: 535, - }, - { - name: 'torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 5183, - device_duration: 535, - self_host_duration: 1080, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad', - calls: 161, - host_duration: 7655, - device_duration: 535, - self_host_duration: 2472, - self_device_duration: 0, - }, - { - name: 'TBackward0', - calls: 1, - host_duration: 16, - device_duration: 0, - self_host_duration: 3, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: TBackward0', - calls: 1, - host_duration: 24, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::_reshape_alias', - calls: 1, - host_duration: 5, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'aten::reshape', - calls: 1, - host_duration: 10, - device_duration: 0, - self_host_duration: 5, - self_device_duration: 0, - }, - { - name: 'ReshapeAliasBackward0', - calls: 1, - host_duration: 17, - device_duration: 0, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReshapeAliasBackward0', - calls: 1, - host_duration: 27, - device_duration: 0, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'aten::expand', - calls: 1, - host_duration: 10, - device_duration: 0, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'aten::to', - calls: 1, - host_duration: 1, - device_duration: 0, - self_host_duration: 1, - self_device_duration: 0, - }, - { - name: 'aten::div', - calls: 1, - host_duration: 63, - device_duration: 37, - self_host_duration: 45, - self_device_duration: 37, - }, - { - name: 'MeanBackward1', - calls: 1, - host_duration: 83, - device_duration: 37, - self_host_duration: 9, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MeanBackward1', - calls: 1, - host_duration: 99, - device_duration: 37, - self_host_duration: 16, - self_device_duration: 0, - }, - { - name: 'aten::threshold_backward', - calls: 49, - host_duration: 1863, - device_duration: 9003, - self_host_duration: 1203, - self_device_duration: 9003, - }, - { - name: 'ReluBackward0', - calls: 49, - host_duration: 2330, - device_duration: 9003, - self_host_duration: 467, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: ReluBackward0', - calls: 49, - host_duration: 3313, - device_duration: 9003, - self_host_duration: 983, - self_device_duration: 0, - }, - { - name: 'AddBackward0', - calls: 16, - host_duration: 14, - device_duration: 0, - self_host_duration: 14, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: AddBackward0', - calls: 16, - host_duration: 135, - device_duration: 0, - self_host_duration: 121, - self_device_duration: 0, - }, - { - name: 'aten::empty', - calls: 370, - host_duration: 4638, - device_duration: 0, - self_host_duration: 4638, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_batch_norm_backward', - calls: 53, - host_duration: 5047, - device_duration: 22244, - self_host_duration: 2219, - self_device_duration: 22244, - }, - { - name: 'CudnnBatchNormBackward0', - calls: 53, - host_duration: 5637, - device_duration: 22244, - self_host_duration: 590, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnBatchNormBackward0', - calls: 53, - host_duration: 7407, - device_duration: 22244, - self_host_duration: 1770, - self_device_duration: 0, - }, - { - name: 'aten::cudnn_convolution_backward_input', - calls: 52, - host_duration: 9345, - device_duration: 37854, - self_host_duration: 6945, - self_device_duration: 37854, - }, - { - name: 'aten::cudnn_convolution_backward_weight', - calls: 53, - host_duration: 9886, - device_duration: 44650, - self_host_duration: 5378, - self_device_duration: 44650, - }, - { - name: 'aten::cudnn_convolution_backward', - calls: 53, - host_duration: 20453, - device_duration: 82504, - self_host_duration: 1222, - self_device_duration: 0, - }, - { - name: 'CudnnConvolutionBackward0', - calls: 53, - host_duration: 21000, - device_duration: 82504, - self_host_duration: 547, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: CudnnConvolutionBackward0', - calls: 53, - host_duration: 23024, - device_duration: 87731, - self_host_duration: 1440, - self_device_duration: 0, - }, - { - name: 'aten::add', - calls: 16, - host_duration: 584, - device_duration: 5227, - self_host_duration: 374, - self_device_duration: 5227, - }, - { - name: 'aten::fill_', - calls: 1, - host_duration: 26, - device_duration: 230, - self_host_duration: 12, - self_device_duration: 230, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 33, - device_duration: 230, - self_host_duration: 7, - self_device_duration: 0, - }, - { - name: 'aten::max_pool2d_with_indices_backward', - calls: 1, - host_duration: 73, - device_duration: 1513, - self_host_duration: 30, - self_device_duration: 1283, - }, - { - name: 'MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 83, - device_duration: 1513, - self_host_duration: 10, - self_device_duration: 0, - }, - { - name: 'autograd::engine::evaluate_function: MaxPool2DWithIndicesBackward0', - calls: 1, - host_duration: 106, - device_duration: 1513, - self_host_duration: 23, - self_device_duration: 0, - }, - ], - }, - path: '0-21', - }, - { - left: { - name: 'aten::zeros', - duration: 160, - device_duration: 0, - total_duration: 160, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 87, - device_duration: 0, - self_host_duration: 87, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 4, - device_duration: 0, - self_host_duration: 4, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 160, - device_duration: 0, - self_host_duration: 69, - self_device_duration: 0, - }, - ], - }, - right: { - name: 'aten::zeros', - duration: 119, - device_duration: 0, - total_duration: 119, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 105, - device_duration: 0, - self_host_duration: 105, - self_device_duration: 0, - }, - { - name: 'aten::zero_', - calls: 1, - host_duration: 2, - device_duration: 0, - self_host_duration: 2, - self_device_duration: 0, - }, - { - name: 'aten::zeros', - calls: 1, - host_duration: 119, - device_duration: 0, - self_host_duration: 12, - self_device_duration: 0, - }, - ], - }, - path: '0-22', - }, - { - left: { - name: 'Optimizer.step#SGD.step', - duration: 75435, - device_duration: 1295, - total_duration: 1295, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 40, - device_duration: 0, - self_host_duration: 40, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 161, - host_duration: 11945, - device_duration: 401, - self_host_duration: 9568, - self_device_duration: 401, - }, - { - name: 'aten::add_', - calls: 322, - host_duration: 22480, - device_duration: 894, - self_host_duration: 17805, - self_device_duration: 894, - }, - ], - }, - right: { - name: 'Optimizer.step#SGD.step', - duration: 16687, - device_duration: 1298, - total_duration: 1298, - aggs: [ - { - name: 'aten::empty', - calls: 1, - host_duration: 8, - device_duration: 0, - self_host_duration: 8, - self_device_duration: 0, - }, - { - name: 'aten::mul_', - calls: 161, - host_duration: 3440, - device_duration: 404, - self_host_duration: 1824, - self_device_duration: 404, - }, - { - name: 'aten::add_', - calls: 322, - host_duration: 6161, - device_duration: 894, - self_host_duration: 3186, - self_device_duration: 894, - }, - ], - }, - path: '0-23', - }, - ], - }); - } -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/openapi.yaml b/plugins/tensorboard-plugins/tb_plugin/fe/src/api/openapi.yaml deleted file mode 100644 index 0218adb9d74ed1505d6b86b5ac6550ec33539144..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/api/openapi.yaml +++ /dev/null @@ -1,1204 +0,0 @@ -openapi: 3.0.1 -info: - title: Pytorch profile API - version: 1.0.0 -servers: - - url: . -paths: - /runs: - get: - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/Runs' - /views: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - responses: - '200': - description: successful views - content: - '*/*': - schema: - type: array - items: - type: string - enum: - - Overview - - Operator - - Kernel - - Trace - - Distributed - - Memory - /workers: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: view - required: true - schema: - type: string - responses: - '200': - description: successful workers - content: - '*/*': - schema: - type: array - items: - type: string - /spans: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - responses: - '200': - description: successful spans - content: - '*/*': - schema: - type: array - items: - type: string - /overview: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/Overview' - /operation: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: group_by - required: true - schema: - type: string - enum: - - Operation - - OperationAndInputShape - description: Group By - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/OperatorGraph' - /operation/table: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: group_by - required: true - schema: - type: string - enum: - - Operation - - OperationAndInputShape - description: Group By - responses: - '200': - description: successful operation - content: - '*/*': - schema: - type: object - required: - - metadata - - data - properties: - metadata: - $ref: '#/components/schemas/TableMetadata' - data: - $ref: '#/components/schemas/OperationTableData' - /operation/stack: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: group_by - required: true - schema: - type: string - enum: - - Operation - - OperationAndInputShape - description: Group By - - in: query - name: op_name - required: true - schema: - type: string - - in: query - name: input_shape - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - type: object - required: - - metadata - - data - properties: - metadata: - $ref: '#/components/schemas/TableMetadata' - data: - $ref: '#/components/schemas/CallStackTableData' - /distributed/overlap: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/DistributedGraph' - /distributed/waittime: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/DistributedGraph' - /distributed/commops: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - type: object - required: - - metadata - - data - properties: - metadata: - type: object - required: - - title - properties: - title: - type: string - data: - type: object - /distributed/gpuinfo: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/GpuInfo' - /memory: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: start_ts - required: false - schema: - type: number - - in: query - name: end_ts - required: false - schema: - type: number - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/MemoryStatsData' - /memory_curve: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/MemoryCurveData' - /memory_events: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: start_ts - required: false - schema: - type: number - - in: query - name: end_ts - required: false - schema: - type: number - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/MemoryEventsData' - /kernel: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: group_by - required: true - schema: - type: string - enum: - - Kernel - - KernelNameAndOpName - description: Group By - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/KernelGraph' - /kernel/table: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: group_by - required: false - schema: - type: string - enum: - - Kernel - - KernelNameAndOpName - description: Group By - responses: - '200': - description: successful kernel - content: - '*/*': - schema: - $ref: '#/components/schemas/TableData' - /kernel/tc_pie: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/TensorCoresGraph' - /trace: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful trace data - content: - '*/*': - schema: - type: object - /module: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/ModuleViewData' - /tree: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/OperatorNode' - /diffnode: - get: - parameters: - - in: query - name: run - required: true - schema: - type: string - - in: query - name: worker - required: true - schema: - type: string - - in: query - name: span - required: true - schema: - type: string - - in: query - name: exp_run - required: true - schema: - type: string - - in: query - name: exp_worker - required: true - schema: - type: string - - in: query - name: exp_span - required: true - schema: - type: string - - in: query - name: path - required: false - schema: - type: string - responses: - '200': - description: successful operation - content: - '*/*': - schema: - $ref: '#/components/schemas/DiffNode' -components: - schemas: - Runs: - type: object - required: - - runs - - loading - properties: - runs: - type: array - items: - type: string - loading: - type: boolean - Performance: - type: object - required: - - name - properties: - name: - type: string - description: - type: string - value: - type: string - extra: - type: string - children: - type: array - items: - $ref: '#/components/schemas/Performance' - Environment: - type: object - required: - - title - - value - properties: - title: - type: string - value: - type: string - GraphColumn: - type: object - required: - - type - - name - properties: - type: - type: string - name: - type: string - role: - type: string - p: - type: object - properties: - html: - type: boolean - ValueAndFormat: - type: object - required: - - v - - f - properties: - v: - oneOf: - - type: string - - type: number - - type: boolean - f: - type: string - Graph: - type: object - required: - - columns - - rows - properties: - title: - type: string - columns: - type: array - items: - $ref: '#/components/schemas/GraphColumn' - rows: - type: array - items: - type: array - items: - oneOf: - - type: string - - type: number - - type: boolean - - $ref: '#/components/schemas/ValueAndFormat' - Overview: - type: object - required: - - performance - - environments - - steps - - recommendations - properties: - performance: - type: array - items: - $ref: '#/components/schemas/Performance' - environments: - type: array - items: - $ref: '#/components/schemas/Environment' - steps: - $ref: '#/components/schemas/Graph' - recommendations: - type: string - gpu_metrics: - $ref: '#/components/schemas/GpuMetrics' - OperatorGraph: - type: object - required: - - device_total_time - - device_self_time - - host_total_time - - host_self_time - properties: - device_total_time: - $ref: '#/components/schemas/Graph' - device_self_time: - $ref: '#/components/schemas/Graph' - host_total_time: - $ref: '#/components/schemas/Graph' - host_self_time: - $ref: '#/components/schemas/Graph' - TableMetadata: - type: object - required: - - sort - properties: - sort: - type: string - tooltips: - type: object - TableData: - type: object - required: - - metadata - - data - properties: - data: - $ref: '#/components/schemas/Graph' - metadata: - $ref: '#/components/schemas/TableMetadata' - KernelGraph: - type: object - required: - - total - properties: - total: - $ref: '#/components/schemas/Graph' - TensorCoresGraph: - type: object - required: - - total - properties: - total: - $ref: '#/components/schemas/Graph' - OperationTableData: - type: array - items: - type: object - required: - - name - - calls - - host_self_duration - - host_total_duration - - has_call_stack - properties: - name: - type: string - input_shape: - type: string - calls: - type: number - device_self_duration: - type: number - device_total_duration: - type: number - host_self_duration: - type: number - host_total_duration: - type: number - has_call_stack: - type: boolean - tc_eligible: - type: string - tc_self_ratio: - type: number - tc_total_ratio: - type: number - CallStackTableData: - type: array - items: - type: object - required: - - name - - calls - - host_self_duration - - host_total_duration - properties: - name: - type: string - input_shape: - type: string - calls: - type: number - device_self_duration: - type: number - device_total_duration: - type: number - host_self_duration: - type: number - host_total_duration: - type: number - call_stack: - type: string - tc_eligible: - type: string - tc_self_ratio: - type: number - tc_total_ratio: - type: number - DistributedGraph: - type: object - required: - - metadata - - data - properties: - metadata: - type: object - required: - - title - - legends - - units - properties: - title: - type: string - legends: - type: array - items: - type: string - units: - type: string - data: - type: object - GpuInfo: - type: object - required: - - metadata - - data - properties: - metadata: - type: object - required: - - title - properties: - title: - type: string - data: - type: object - GpuMetrics: - type: object - required: - - data - - tooltip - properties: - data: - type: array - items: - $ref: '#/components/schemas/GpuMetric' - tooltip: - type: string - GpuMetric: - type: object - required: - - title - - value - properties: - title: - type: string - value: - type: string - MemoryStatsData: - type: object - required: - - metadata - - columns - - rows - properties: - metadata: - $ref: '#/components/schemas/MemoryStatsTableMetadata' - columns: - type: array - items: - $ref: '#/components/schemas/GraphColumn' - rows: - type: object - MemoryEventsData: - type: object - required: - - metadata - - columns - - rows - properties: - metadata: - $ref: '#/components/schemas/MemoryEventsTableMetadata' - columns: - type: array - items: - $ref: '#/components/schemas/GraphColumn' - rows: - type: object - MemoryEventsTableMetadata: - type: object - required: - - title - - default_device - - value - properties: - title: - type: string - default_device: - type: string - search: - type: string - sort: - type: string - MemoryStatsTableMetadata: - type: object - required: - - title - - default_device - - search - - sort - - value - properties: - title: - type: string - default_device: - type: string - search: - type: string - sort: - type: string - MemoryCurveDataMetadata: - type: object - required: - - default_device - - devices - - peaks - - totals - - first_ts - - time_metric - - memory_metric - - time_factor - - memory_factor - properties: - default_device: - type: string - devices: - type: array - items: - type: string - peaks: - type: object - totals: - type: object - first_ts: - type: number - time_metric: - type: string - memory_metric: - type: string - time_factor: - type: number - memory_factor: - type: number - MemoryCurveData: - type: object - required: - - metadata - - columns - - rows - properties: - metadata: - $ref: '#/components/schemas/MemoryCurveDataMetadata' - columns: - type: array - items: - $ref: '#/components/schemas/GraphColumn' - rows: - type: object - KeyedColumn: - type: object - required: - - type - - name - - key - properties: - type: - type: string - name: - type: string - key: - type: string - ModuleViewData: - type: object - required: - - columns - - data - properties: - columns: - type: array - items: - $ref: '#/components/schemas/KeyedColumn' - data: - type: array - items: - $ref: '#/components/schemas/ModuleStats' - ModuleStats: - type: object - required: - - name - - id - - occurences - - operators - - host_duration - - self_host_duration - - device_duration - - self_device_duration - - avg_duration - - children - properties: - name: - type: string - id: - type: string - occurences: - type: number - operators: - type: number - host_duration: - type: number - self_host_duration: - type: number - device_duration: - type: number - self_device_duration: - type: number - avg_duration: - type: number - children: - type: array - items: - $ref: '#/components/schemas/ModuleStats' - OperatorNode: - type: object - required: - - name - - start_time - - end_time - - type - - tid - - children - properties: - name: - type: string - start_time: - type: number - end_time: - type: number - type: - type: string - tid: - type: number - children: - type: array - items: - $ref: '#/components/schemas/OperatorNode' - OpAgg: - type: object - required: - - name - - calls - - host_duration - - device_duration - - self_host_duration - - self_device_duration - properties: - name: - type: string - calls: - type: number - host_duration: - type: number - device_duration: - type: number - self_host_duration: - type: number - self_device_duration: - type: number - OpStats: - type: object - required: - - name - - duration - - device_duration - - total_duration - - aggs - properties: - name: - type: string - duration: - type: number - device_duration: - type: number - total_duration: - type: number - aggs: - type: array - items: - $ref: '#/components/schemas/OpAgg' - DiffNode: - type: object - required: - - left - - right - - children - - path - properties: - left: - $ref: '#/components/schemas/OpStats' - right: - $ref: '#/components/schemas/OpStats' - path: - type: string - children: - type: array - items: - $ref: '#/components/schemas/DiffNode' diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/app.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/app.tsx deleted file mode 100644 index 19eb4b112529073c6b8db9a86b8d68a7633598db..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/app.tsx +++ /dev/null @@ -1,625 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import Box from '@material-ui/core/Box'; -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import ClickAwayListener from '@material-ui/core/ClickAwayListener'; -import CssBaseline from '@material-ui/core/CssBaseline'; -import Divider from '@material-ui/core/Divider'; -import Drawer from '@material-ui/core/Drawer'; -import Fab from '@material-ui/core/Fab'; -import FormControl from '@material-ui/core/FormControl'; -import IconButton from '@material-ui/core/IconButton'; -import ListSubheader from '@material-ui/core/ListSubheader'; -import MenuItem from '@material-ui/core/MenuItem'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import { makeStyles } from '@material-ui/core/styles'; -import Tab from '@material-ui/core/Tab'; -import Tabs from '@material-ui/core/Tabs'; -import Typography from '@material-ui/core/Typography'; -import ChevronLeftIcon from '@material-ui/icons/ChevronLeft'; -import ChevronRightIcon from '@material-ui/icons/ChevronRight'; -import { message } from 'antd'; -import 'antd/es/button/style/css'; -import 'antd/es/list/style/css'; -import 'antd/es/table/style/css'; -import clsx from 'clsx'; -import * as React from 'react'; -import * as api from './api'; -import { AccuracyLeftPanel } from './components/Accuracy/AccuracyLeftPanel'; -import { FileInfo } from './components/Accuracy/entity'; -import { LossComparison } from './components/Accuracy/LossComparison'; -import { DiffOverview } from './components/DiffOverview'; -import { DistributedView } from './components/DistributedView'; -import { FullCircularProgress } from './components/FullCircularProgress'; -import { Kernel as KernelView } from './components/Kernel'; -import { MemoryView } from './components/MemoryView'; -import { ModuleView } from './components/ModuleView'; -import { Operator as OperatorView } from './components/Operator'; -import { Overview as OverviewPage } from './components/Overview'; -import { TraceView } from './components/TraceView'; -import { setup } from './setup'; -import './styles.css'; -import { firstOrUndefined, sleep } from './utils'; - -export enum Views { - Overview = 'Overview', - Operator = 'Operator', - Kernel = 'Kernel', - Trace = 'Trace', - Distributed = 'Distributed', - Memory = 'Memory', - Module = 'Module', - Lightning = 'Lightning', -} - -const viewNames = { - [Views.Overview]: Views.Overview, - [Views.Operator]: Views.Operator, - [Views.Kernel]: 'Kernel', - [Views.Trace]: Views.Trace, - [Views.Distributed]: Views.Distributed, - [Views.Memory]: Views.Memory, - [Views.Module]: Views.Module, - [Views.Lightning]: Views.Lightning, -}; - -const drawerWidth = 340; -const useStyles = makeStyles((theme) => ({ - root: { - display: 'flex', - height: '100%', - }, - appBar: { - zIndex: theme.zIndex.drawer + 1, - transition: theme.transitions.create(['width', 'margin'], { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.leavingScreen, - }), - }, - appBarShift: { - marginLeft: drawerWidth, - width: `calc(100% - ${drawerWidth}px)`, - transition: theme.transitions.create(['width', 'margin'], { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.enteringScreen, - }), - }, - menuButton: { - marginRight: 36, - }, - hide: { - display: 'none', - }, - drawer: { - width: drawerWidth, - flexShrink: 0, - whiteSpace: 'nowrap', - }, - drawerOpen: { - width: drawerWidth, - zIndex: 999, - transition: theme.transitions.create('width', { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.enteringScreen, - }), - }, - drawerClose: { - transition: theme.transitions.create('width', { - easing: theme.transitions.easing.sharp, - duration: theme.transitions.duration.leavingScreen, - }), - overflowX: 'hidden', - width: 0, - [theme.breakpoints.up('sm')]: { - width: 0, - }, - }, - toolbar: { - display: 'flex', - alignItems: 'center', - justifyContent: 'flex-end', - padding: theme.spacing(0, 1), - // necessary for content to be below app bar - ...theme.mixins.toolbar, - }, - content: { - flexGrow: 1, - padding: theme.spacing(3), - overflowX: 'hidden', - }, - formControl: { - margin: theme.spacing(1), - minWidth: 120, - }, - fab: { - marginLeft: theme.spacing(1), - marginTop: theme.spacing(1), - position: 'absolute', - }, - iconButton: { - padding: '8px', - }, -})); - -export const App = (): JSX.Element => { - const classes = useStyles(); - - // #region - State - const [selectedTab, setSelectedTab] = React.useState(0); - - const [run, setRun] = React.useState(''); - const [runs, setRuns] = React.useState([]); - const [runsLoading, setRunsLoading] = React.useState(true); - - const [workers, setWorkers] = React.useState([]); - const [worker, setWorker] = React.useState(''); - - const [spans, setSpans] = React.useState([]); - const [span, setSpan] = React.useState(''); - - const [views, setViews] = React.useState([]); - const [view, setView] = React.useState(''); - const [loaded, setLoaded] = React.useState(false); - const iframeRef = React.useRef(null); - const [deviceTarget, setDeviceTarget] = React.useState('GPU'); - - const [diffLeftWorkerOptions, setDiffLeftWorkerOptions] = React.useState([]); - const [diffLeftSpansOptions, setDiffLeftSpansOptions] = React.useState([]); - const [diffLeftRun, setDiffLeftRun] = React.useState(''); - const [diffLeftWorker, setDiffLeftWorker] = React.useState(''); - const [diffLeftSpan, setDiffLeftSpan] = React.useState(''); - - const [diffRightWorkerOptions, setDiffRightWorkerOptions] = React.useState([]); - const [diffRightSpansOptions, setDiffRightSpansOptions] = React.useState([]); - const [diffRightRun, setDiffRightRun] = React.useState(''); - const [diffRightWorker, setDiffRightWorker] = React.useState(''); - const [diffRightSpan, setDiffRightSpan] = React.useState(''); - - const [open, setOpen] = React.useState(true); - - const [topTab, setTopTab] = React.useState(0); - const [fileList, setFileList] = React.useState([]); - const [uploadedCount, setUploadedCount] = React.useState(0); // #endregion - - React.useEffect(() => { - setup() - .catch(() => { - message.warning('google chart is not supported offline'); - }) - .finally(() => { - setLoaded(true); - }); - }, []); - - const continuouslyFetchRuns = async (): Promise => { - while (true) { - try { - const result = await api.defaultApi.runsGet(); - setRuns(result.runs); - setRunsLoading(result.loading); - } catch (e) { - message.warning(`Cannot fetch runs: ${e}`); - } - await sleep(5000); - } - }; - - React.useEffect(() => { - continuouslyFetchRuns(); - }, []); - - React.useEffect(() => { - if (!run || !runs.includes(run)) { - setRun(firstOrUndefined(runs) ?? ''); - } - }, [runs]); // #region - Diff Left - - React.useEffect(() => { - if (diffLeftRun) { - api.defaultApi.workersGet(diffLeftRun, Views.Overview).then((data) => { - setDiffLeftWorkerOptions(data); - }); - } - }, [diffLeftRun]); - - React.useEffect(() => { - if (diffLeftRun && diffLeftWorker) { - api.defaultApi.spansGet(diffLeftRun, diffLeftWorker).then((data) => { - setDiffLeftSpansOptions(data); - }); - } - }, [diffLeftRun, diffLeftWorker]); - - // #endregion - // #region - Diff Right - React.useEffect(() => { - if (diffRightRun) { - api.defaultApi.workersGet(diffRightRun, Views.Overview).then((data) => { - setDiffRightWorkerOptions(data); - }); - } - }, [diffRightRun]); - - React.useEffect(() => { - if (diffRightRun && diffRightWorker) { - api.defaultApi.spansGet(diffRightRun, diffRightWorker).then((data) => { - setDiffRightSpansOptions(data); - }); - } - }, [diffRightRun, diffRightWorker]); - - // #endregion - // #region - normal - React.useEffect(() => { - if (run) { - api.defaultApi.viewsGet(run).then((rawViews) => { - const result = rawViews.views.map((v) => Views[Views[v as Views]]).filter(Boolean); - setDeviceTarget(rawViews.device_target); - setViews(result); - }); - } - }, [run]); - - React.useEffect(() => { - setView(firstOrUndefined(views) ?? ''); - }, [views]); - - React.useEffect(() => { - if (run && view) { - api.defaultApi.workersGet(run, view).then((data) => { - setWorkers(data); - }); - } - }, [run, view]); - - React.useEffect(() => { - setWorker(firstOrUndefined(workers) ?? ''); - }, [workers]); - - React.useEffect(() => { - if (run && worker) { - api.defaultApi.spansGet(run, worker).then((data) => { - setSpans(data); - }); - } - }, [run, worker]); - - React.useEffect(() => { - setSpan(firstOrUndefined(spans) ?? ''); - }, [spans]); - - // #endregion - - // #region - Event Handler - const handleTabChange = (event: React.ChangeEvent>, value: any): void => { - setSelectedTab(value as number); - }; - - const handleTopTabChange = (event: React.ChangeEvent>, value: any): void => { - setTopTab(value as number); - }; - - const handleRunChange: SelectProps['onChange'] = (event) => { - setRun(event.target.value as string); - setView(''); - setWorker(''); - setSpan(''); - }; - - const handleViewChange: SelectProps['onChange'] = (event) => { - setView(event.target.value as Views); - setWorker(''); - setSpan(''); - }; - - const handleWorkerChange: SelectProps['onChange'] = (event) => { - setWorker(event.target.value as string); - setSpan(''); - }; - - const handleSpanChange: SelectProps['onChange'] = (event) => { - setSpan(event.target.value as string); - }; - - const handleDiffLeftRunChange: SelectProps['onChange'] = (event) => { - setDiffLeftRun(event.target.value as string); - setDiffLeftWorker(''); - setDiffLeftSpan(''); - }; - - const handleDiffLeftWorkerChange: SelectProps['onChange'] = (event) => { - setDiffLeftWorker(event.target.value as string); - setDiffLeftSpan(''); - }; - - const handleDiffLeftSpanChange: SelectProps['onChange'] = (event) => { - setDiffLeftSpan(event.target.value as string); - }; - - const handleDiffRightRunChange: SelectProps['onChange'] = (event) => { - setDiffRightRun(event.target.value as string); - setDiffRightWorker(''); - setDiffRightSpan(''); - }; - - const handleDiffRightWorkerChange: SelectProps['onChange'] = (event) => { - setDiffRightWorker(event.target.value as string); - setDiffRightSpan(''); - }; - - const handleDiffRightSpanChange: SelectProps['onChange'] = (event) => { - setDiffRightSpan(event.target.value as string); - }; - - const handleDrawerOpen = (): void => { - setOpen(true); - setIframeActive(); - }; - - const handleDrawerClose = (): void => { - setOpen(false); - setIframeActive(); - }; - - const setIframeActive = (): void => { - iframeRef.current?.focus(); - }; - - const _changeFileList = (files: FileInfo[]): void => { - if (JSON.stringify(files) !== JSON.stringify(fileList)) { - setFileList(files); - } - }; - - const _getViews = (viewName: Views): string => { - if (viewName === Views.Kernel) { - return deviceTarget === 'Ascend' ? `NPU ${viewNames[viewName]}` : `GPU ${viewNames[viewName]}`; - } else { - return viewNames[viewName]; - } - }; - - const _changeUploadCount = (count: number): void => { - setUploadedCount(count); - }; // #endregion - - const renderContent = (): JSX.Element => { - if (!runsLoading && runs.length === 0) { - return ( - - - - There are not any runs in the log folder. - - - ); - } - const notReady = !loaded || !run || !worker || !view || !span; - if (notReady) { - return ; - } - - if (selectedTab === 0) { - switch (view) { - case Views.Overview: - return ; - case Views.Operator: - return ; - case Views.Kernel: - return ; - case Views.Trace: - return ; - case Views.Distributed: - return ; - case Views.Memory: - return ; - case Views.Module: - case Views.Lightning: - return ; - default: - return <>; - } - } else { - return ( - - ); - } - }; - - const spanComponent = (): JSX.Element => { - const spanFragment = ( - - Spans - - - - - - - ); - - if (!spans || spans.length <= 1) { - return
{spanFragment}
; - } else { - return spanFragment; - } - }; - - return ( -
- - -
- - - -
- - - - - - - - {topTab === 0 ? ( - <> - - - - - - - {selectedTab === 0 ? ( - <> - Runs - - - - - - Views - - - - - - Workers - - - - - - {spanComponent()} - - ) : ( - <> -   Baseline - Runs - - - - Workers - - - - - Spans - - - - - - -   Experimental - Runs - - - - Workers - - - - Spans - - - - - )} - - ) : ( - - )} -
- {!open && ( - - - - )} -
- {topTab === 0 ? renderContent() : } -
-
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx deleted file mode 100644 index c7b7d7cf0841e7dc3686138b584e101e5052f4a6..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/AccuracyLeftPanel.tsx +++ /dev/null @@ -1,334 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the 'License') - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an 'AS IS' BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { useState, useEffect, useCallback, useRef } from 'react'; -import { makeStyles } from '@material-ui/core/styles'; -import { Button, Checkbox, Spin, Modal, message } from 'antd'; -import { CheckboxChangeEvent } from 'antd/es/checkbox'; -import { DeleteOutlined, DownloadOutlined, ImportOutlined, SettingOutlined, WarningTwoTone } from '@ant-design/icons'; -import { RegexConfigModal } from './RegexConfigModal'; -import { FileInfo } from './entity'; - -interface IProps { - onChangeCheckedFileList: (files: FileInfo[]) => void; - onChangeUploadedCount: (count: number) => void; -} - -// 匹配数字包括科学计数法 -const LOSS_REG_EXP = /[+-]?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?/; -// 匹配自然数 -const ITER_REG_EXP = /\d+/; -// 单个文件最大大小 -const FILE_MAX_SIZE = 50 * 1024 * 1024; -// 最大文件上传数量 -export const MAX_FILE_COUNT = 6; - -const useStyles = makeStyles(() => ({ - root: { - height: '100%', - }, - btnPanel: { - height: 50, - lineHeight: '50px', - borderBottom: '1px solid #DFE5EF', - display: 'flex', - '& .ant-btn': { - margin: 'auto', - }, - }, - fileContainer: { - height: 54, - padding: '0 24px', - display: 'flex', - alignItems: 'center', - '& .fileNameLabel': { - display: 'inline-block', - marginLeft: 12, - width: 200, - fontSize: 14, - overflow: 'hidden', - textOverflow: 'ellipsis', - whiteSpace: 'nowrap', - }, - '& .btns': { - display: 'inline-block', - marginLeft: 'auto', - '& .icon': { - cursor: 'pointer', - '&:hover': { - color: '#1890ff', - }, - }, - '& .iconLeft': { - marginRight: 8, - }, - }, - }, - deleteModal: { - '& .ant-modal-title': { - fontWeight: 'bold', - }, - '& .deleteModalBody': { - display: 'flex', - alignItems: 'center', - height: 80, - '& .warningIcon': { - display: 'inline-block', - fontSize: 50, - }, - '& .warningText': { - display: 'inline-block', - marginLeft: 16, - overflow: 'hidden', - wordBreak: 'break-all', - flex: 1, - }, - }, - }, -})); - -export const AccuracyLeftPanel: React.FC = (props) => { - const { onChangeCheckedFileList, onChangeUploadedCount } = props; - const classes = useStyles(); - const [configModalVis, setConfigModalVis] = useState(false); - const [deleteModalVis, setDeleteModalVis] = useState(false); - const [fileList, setFileList] = useState([]); - const [importSpin, setImportSpin] = useState(false); - const [selectedFile, setSelectedFile] = useState(undefined); - const downLoadRef = useRef(null); - - const parseFile = (file: FileInfo): FileInfo => { - file.losses = []; - file.iterLosses = {}; - file.iters = []; - const lines = file.fileContent.split(/\r\n|\n|\r/); - for (let i = 0; i < lines.length; i++) { - const iter = parseByTag(lines[i], file.iterTag, false); - const loss = parseByTag(lines[i], file.lossTag, true); - if (iter !== null && loss !== null) { - file.iters.push(iter); - file.losses.push([iter, loss]); - file.iterLosses[iter] = loss; - } - } - return file; - }; - - const parseByTag = (line: string, tag: string, isLoss: boolean): number | null => { - let pos = line.indexOf(tag); - let result: number | null = null; - if (pos !== -1) { - const res = (isLoss ? LOSS_REG_EXP : ITER_REG_EXP).exec( - line - .substring(pos + tag.length) - .trim() - .split(/\s+/)[0] - ); - if (res !== null) { - if (isLoss) { - result = parseFloat(res[0]); - } else { - result = parseInt(res[0]); - } - } else { - console.warn(`Found ${isLoss ? 'loss' : 'iteration'} text, but parse value with error: [${line}]`); - } - } - return result; - }; - - const importFile = (): void => { - document.getElementById('accComparisonSelectFile')?.click(); - }; - - const uploadFile = (e: React.ChangeEvent): void => { - setImportSpin(true); - const file = e.target.files?.[0]; - if (file) { - if (file.size > FILE_MAX_SIZE) { - message.warn('Sorry, the file size cannot be greater than 50MB.'); - setImportSpin(false); - // 防止同名文件不触发事件 - e.target.value = ''; - return; - } - const reader = new FileReader(); - reader.onload = ((loadedFile) => { - return (event) => { - addFile(loadedFile.name.trim(), event.target?.result as string); - setImportSpin(false); - }; - })(file); - reader.readAsText(file); - } - // 防止同名文件不触发事件 - e.target.value = ''; - }; - - const addFile = (fileName: string, fileContent: string): void => { - const fileLength = fileName.length; - const tempList: FileInfo[] = JSON.parse(JSON.stringify(fileList)); - let updatedFileName = fileName; // 新变量用于存储更新后的文件名 - // 上传同名文件加上(1~最大文件数减1)标识 - if (!!tempList.find((item) => item.fileName === fileName)) { - for (let i = 1; i < MAX_FILE_COUNT; i++) { - let temp = `${fileName.slice(0, fileLength - 4)}(${i})${fileName.slice(fileLength - 4)}`; - if (tempList.find((item) => item.fileName === temp) === undefined) { - updatedFileName = temp; - break; - } - } - } - const file: FileInfo = { - id: fileList.length, - fileName: updatedFileName, - fileContent, - checked: true, - lossTag: 'loss:', - iterTag: 'iteration', - iters: [], - losses: [], - iterLosses: {}, - }; - tempList.push(parseFile(file)); - setFileList(tempList); - }; - - const exportCsv = (data: FileInfo): void => { - let csvContent = `data:text/csv;charset=utf-8,${data.iterTag},${data.lossTag}\n`; - data.losses.forEach((item) => { - csvContent += `${item[0]},${item[1]}\n`; - }); - downLoadRef.current?.setAttribute('href', encodeURI(csvContent)); - downLoadRef.current?.setAttribute('download', `${data.fileName}.csv`); - downLoadRef.current?.click(); - }; - - const onCheckChange = (e: CheckboxChangeEvent, index: number): void => { - const tempList: FileInfo[] = JSON.parse(JSON.stringify(fileList)); - tempList[index].checked = e.target.checked; - setFileList(tempList); - }; - - const onConfigIconClick = (data: FileInfo): void => { - setSelectedFile(data); - setConfigModalVis(true); - }; - - const onDeleteIconClick = (data: FileInfo): void => { - setSelectedFile(data); - setDeleteModalVis(true); - }; - - const configModalOk = (data: FileInfo): void => { - const tempList = fileList.map((item) => { - return item.id === data.id ? parseFile(data) : item; - }); - setFileList(tempList); - setConfigModalVis(false); - }; - - const configModalCancel = (): void => { - setConfigModalVis(false); - }; - - const deleteModalOk = (): void => { - const tempList = JSON.parse(JSON.stringify(fileList)); - let founded = false; - let index = 0; - for (let i = 0; i < tempList.length; i++) { - if (founded) { - tempList[i].id -= 1; - continue; - } - if (tempList[i].id === selectedFile?.id) { - founded = true; - index = i; - } - } - tempList.splice(index, 1); - setFileList(tempList); - setSelectedFile(undefined); - setDeleteModalVis(false); - }; - - const renderFileItems = useCallback(() => { - return fileList.map((item) => { - return ( -
- onCheckChange(e, item.id)} /> - - {item.fileName} - -
- onConfigIconClick(item)} /> - exportCsv(item)} /> - onDeleteIconClick(item)} /> -
-
- ); - }); - }, [JSON.stringify(fileList)]); - - useEffect(() => { - onChangeCheckedFileList(fileList.filter((item) => item.checked)); - onChangeUploadedCount(fileList.length); - }, [JSON.stringify(fileList)]); - - return ( -
- -
- - -
- {renderFileItems()} -
- {configModalVis && ( - - )} - setDeleteModalVis(false)} - onOk={deleteModalOk} - width={500} - className={classes.deleteModal} - > -
- - - Are you sure to delete "{selectedFile?.fileName}"? - -
-
- -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/ComparisonPanel.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/ComparisonPanel.tsx deleted file mode 100644 index 500d29764c5209958ba19630ac1d4e08c10f24a5..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/ComparisonPanel.tsx +++ /dev/null @@ -1,322 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the 'License') - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an 'AS IS' BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { useState, useLayoutEffect, useRef, useEffect } from 'react'; -import { makeStyles } from '@material-ui/core/styles'; -import { FileInfo } from './entity'; -import { Empty, Popover, Radio, RadioChangeEvent, Select, Table } from 'antd'; -import { ColumnsType } from 'antd/es/table'; -import * as echarts from 'echarts'; -import { InfoCircleOutlined } from '@ant-design/icons'; - -interface IProps { - fileList: FileInfo[]; -} - -interface ILineDataList { - normal: number[][]; - absolute: number[][]; - relative: number[][]; -} - -const useStyles = makeStyles(() => ({ - root: { - height: '50%', - width: '100%', - padding: '0 24px 24px', - display: 'flex', - flexDirection: 'column', - }, - title: { - height: 24, - lineHeight: '24px', - fontFamily: 'sans-serif', - fontSize: 16, - fontWeight: 700, - }, - filter: { - height: 40, - lineHeight: '40px', - '& .comparisonSelect': { - margin: '0 8px', - }, - '& .comparisonLabel': { - marginRight: 8, - }, - '& .comparisonBtn': { - marginLeft: 20, - }, - '& .infoLabel': { - fontSize: 20, - }, - }, - empty: { - marginTop: 60, - }, - content: { - flex: 1, - display: 'flex', - }, - lossChart: { - height: '100%', - flex: 1, - }, - lossTable: { - height: '100%', - width: '32%', - }, - tableHeader: { - display: 'inline-block', - width: 134, - position: 'absolute', - top: '50%', - transform: 'translateY(-50%)', - overflow: 'hidden', - textOverflow: 'ellipsis', - whiteSpace: 'nowrap', - }, -})); - -export const ComparisonPanel: React.FC = (props) => { - const { fileList } = props; - const classes = useStyles(); - const [selectedFiles, setSelectedFiles] = useState([]); - const [compareWay, setCompareWay] = useState(0); - const [pageSize, setPageSize] = useState(20); - const [lineData, setLineData] = useState(undefined); - const [tableData, setTableData] = useState([]); - const chartRef = useRef(null); - - const getColumns = (): ColumnsType => { - const columns: ColumnsType = [ - { - title: 'Iteration', - key: 'iter', - dataIndex: 'iter', - }, - ]; - selectedFiles.forEach((item, index) => { - columns.push({ - title: () => ( -
- {item} -
- ), - key: index, - dataIndex: item, - width: '40%', - }); - }); - return columns; - }; - - const compareFile = (fileNames: string[]): void => { - if (fileNames.length < 2) { - return; - } - const baseFile = fileList.find((item) => item.fileName === fileNames[0]); - const expFile = fileList.find((item) => item.fileName === fileNames[1]); - if (!!baseFile && !!expFile) { - const commonIters: number[] = []; - const lessIters = baseFile.iters.length <= expFile.iters.length ? baseFile.iters : expFile.iters; - const moreIters = baseFile.iters.length > expFile.iters.length ? baseFile.iters : expFile.iters; - lessIters.forEach((iter) => { - if (moreIters.includes(iter)) { - commonIters.push(iter); - } - }); - commonIters.sort((a, b) => a - b); - const tempTableData: any[] = []; - const tempChartData: ILineDataList = { - normal: [], - absolute: [], - relative: [], - }; - commonIters.forEach((iter, index) => { - const baseLoss = baseFile.iterLosses[iter]; - const expLoss = expFile.iterLosses[iter]; - tempTableData.push({ - key: `${iter}_${index}`, - iter, - [baseFile.fileName]: baseLoss, - [expFile.fileName]: expLoss, - }); - tempChartData.normal.push([iter, expLoss - baseLoss]); - tempChartData.absolute.push([iter, Math.abs(expLoss - baseLoss)]); - tempChartData.relative.push([iter, baseLoss === 0 ? 0 : Math.abs(expLoss - baseLoss) / baseLoss]); - }); - setTableData(tempTableData); - setLineData(tempChartData); - } - }; - - const onSelectChange = (value: string[]): void => { - setSelectedFiles(value); - compareFile(value); - }; - - const onRadioChange = (e: RadioChangeEvent): void => { - setCompareWay(e.target.value); - }; - - const onShowSizeChange = (current: number, size: number): void => { - setPageSize(size); - }; - - useLayoutEffect(() => { - const element = chartRef.current; - if (!element || !lineData) { - return undefined; - } - const echart = echarts.init(element); - let dataSource: number[][] = []; - if (compareWay === 0) { - dataSource = lineData.normal; - } else if (compareWay === 1) { - dataSource = lineData.absolute; - } else { - dataSource = lineData.relative; - } - const option: echarts.EChartsOption = { - title: { - text: 'Comparison Chart', - textStyle: { - fontSize: 12, - color: '#000', - }, - }, - legend: { bottom: 0 }, - xAxis: { - type: 'category', - boundaryGap: false, - name: 'Iteration', - }, - yAxis: { - type: 'value', - name: 'Difference', - scale: true, - }, - tooltip: { - trigger: 'axis', - valueFormatter: (value) => (value as number).toFixed(6), - }, - dataZoom: { - type: 'inside', - }, - dataset: { - source: dataSource, - }, - series: { - type: 'line', - name: 'Difference', - symbol: 'none', - }, - }; - - if (option) { - echart.setOption(option, true); - } - return () => { - echart.dispose(); - }; - }, [compareWay, lineData]); - - useEffect(() => { - const tempValue = selectedFiles.filter((item) => { - return !!fileList.find((file) => file.fileName === item); - }); - if (JSON.stringify(tempValue) === JSON.stringify(selectedFiles)) { - compareFile(tempValue); - } - setSelectedFiles(tempValue); - }, [fileList]); - - return ( -
-
Comparison Data
-
- Comparison objects: - -
-
- Iteration Tag - -
- - ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/entity.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/entity.ts deleted file mode 100644 index 270c4cb6535633f9a03e5b9fe02dca6121cd3ba7..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Accuracy/entity.ts +++ /dev/null @@ -1,30 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the 'License') - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an 'AS IS' BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *--------------------------------------------------------------------------------------------*/ - -export interface FileInfo { - id: number; - fileName: string; - fileContent: string; - checked: boolean; - lossTag: string; - iterTag: string; - iters: number[]; - losses: number[][]; - iterLosses: { [iter: number]: number }; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DataLoading.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DataLoading.tsx deleted file mode 100644 index 3c5d353ce641c409b51a7aaef8c00ff2f57df6e8..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DataLoading.tsx +++ /dev/null @@ -1,19 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { FullCircularProgress } from './FullCircularProgress'; - -interface IProps { - value?: T | null; - children: (t: T) => JSX.Element; -} - -export function DataLoading(props: IProps): JSX.Element { - if (props.value === undefined || props.value === null) { - return ; - } - - return props.children(props.value); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx deleted file mode 100644 index ed029d5020ed1eaf8caea159b25d33c7a5ad03e3..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DiffOverview.tsx +++ /dev/null @@ -1,942 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import Button from '@material-ui/core/Button'; -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import Grid from '@material-ui/core/Grid'; -import { makeStyles } from '@material-ui/core/styles'; -import Typography from '@material-ui/core/Typography'; -import ChevronLeftIcon from '@material-ui/icons/ChevronLeft'; -import { Select, Table } from 'antd'; -import * as React from 'react'; -import * as api from '../api'; -import { useResizeEventDependency } from '../utils/resize'; -import { FullCircularProgress } from './FullCircularProgress'; -import * as echarts from 'echarts'; - -const { Option } = Select; - -const topGraphHeight = 230; - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - pre: { - '& ul': { - margin: 0, - paddingLeft: theme.spacing(3), - ...theme.typography.body1, - }, - '& li': {}, - '& a': { - color: '#ffa726', - }, - '& a:active': { - color: '#ffa726', - }, - '& p': { - margin: 0, - ...theme.typography.subtitle1, - fontWeight: theme.typography.fontWeightBold, - }, - }, - topGraph: { - height: topGraphHeight + 40, - }, - iconButton: { - padding: '8px', - }, -})); - -const getAngleByDataLength = (data: number): number => { - if (data < 10) { - return 0; - } else { - // 数量越大越趋近于旋转90度 - return 90 * (1 - (10 / data)); - } -}; - -export interface DiffColumnChartIProps { - rawData: any[]; - selectCallback: (row: number, column: number) => void; -} - -export interface DiffStepChartIProps { - rawData: any[]; -} - -const DiffColumnChart: React.FC = (props: DiffColumnChartIProps) => { - const { rawData, selectCallback } = props; - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - - const chart = echarts.init(element); - - const options: echarts.EChartsOption = { - title: { - text: 'Execution Comparsion', - }, - legend: { - top: 10, - right: 10, - }, - tooltip: { - trigger: 'axis', - formatter: function (params: any) { - const index = params[0].name.indexOf('@'); - const safeName = params[0].name.replace(//g, '>'); - let res = `${index > -1 ? safeName.slice(index + 1) : safeName}
`; - for (const item of params) { - if (typeof item.value[item.encode.y[0]] === 'number') { - res += ` - - ${item.seriesName}: ${item.value[item.encode.y[0]]}
`; - } - } - return res; - }, - }, - series: [ - { - type: 'bar', - itemStyle: { - color: '#3366cc', - }, - yAxisIndex: 0, - }, - { - type: 'bar', - itemStyle: { - color: '#dc3912', - }, - yAxisIndex: 0, - }, - { - type: 'line', - itemStyle: { - color: '#ff9900', - }, - yAxisIndex: 1, - }, - { - type: 'line', - itemStyle: { - color: '#109618', - }, - yAxisIndex: 1, - }, - ], - xAxis: { - type: 'category', - axisLabel: { - interval: 0, - rotate: getAngleByDataLength(rawData.length), - formatter: (name: string) => { - const index = name.indexOf('@'); - const displayName = index > -1 ? name.slice(index + 1) : name; // 创建新变量 - return displayName.length > 16 ? `${displayName.slice(0, 14)}...` : displayName; - }, - }, - }, - yAxis: [ - { - type: 'value', - name: 'Time Difference(us)', - scale: true, - }, - { - type: 'value', - name: 'Accumulated Difference(us)', - scale: true, - }, - ], - dataset: { - source: rawData.map((item, idx) => { - // 添加索引保证x轴刻度不重复 - let param: any[] = [...item]; - param[0] = `${idx}@${param[0]}`; - return param; - }), - }, - }; - - if (options) { - chart.setOption(options, true); - } - chart.on('click', (param) => { - if (param.seriesIndex !== undefined) { - selectCallback(param.dataIndex, param.seriesIndex + 1); - } - }); - - return () => { - chart.dispose(); - }; - }, [rawData, resizeEventDependency]); - - return ( -
-
-
- ); -}; - -const DiffStepChart: React.FC = (props: DiffStepChartIProps) => { - const { rawData } = props; - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - const chart = echarts.init(element); - const options: echarts.EChartsOption = { - title: { - text: 'Execution Diff', - }, - legend: { - top: 10, - right: 10, - }, - dataset: { - source: rawData.map((item, idx) => { - // 添加索引保证x轴刻度不重复 - let param: any[] = [...item]; - param[0] = `${idx}@${param[0]}`; - return param; - }), - }, - xAxis: { - type: 'category', - axisLabel: { - interval: 0, - rotate: getAngleByDataLength(rawData.length), - formatter: (name: string) => { - const index = name.indexOf('@'); - const displayName = index > -1 ? name.slice(index + 1) : name; // 创建新变量 - return displayName.length > 16 ? `${displayName.slice(0, 14)}...` : displayName; - }, - }, - }, - yAxis: { - type: 'value', - scale: true, - }, - tooltip: { - trigger: 'axis', - formatter: function (params: any) { - const index = params[0].name.indexOf('@'); - const safeName = params[0].name.replace(//g, '>'); - let res = `${index > -1 ? safeName.slice(index + 1) : safeName}
`; - for (const item of params) { - if (typeof item.value[item.encode.y[0]] === 'number') { - res += ` - - ${item.seriesName}: ${item.value[item.encode.y[0]]}
`; - } - } - return res; - }, - }, - series: [ - { - type: 'line', - color: '#3366cc', - symbolSize: 0, - step: 'middle', - areaStyle: { - color: '#c1d1ef', - opacity: 1, - }, - }, - { - type: 'line', - color: '#dc3912', - symbolSize: 0, - step: 'middle', - areaStyle: { - color: '#f4c3b7', - opacity: 1, - }, - }, - ], - }; - - if (options) { - chart.setOption(options, true); - } - return () => { - chart.dispose(); - }; - }, [rawData, resizeEventDependency]); - - return ( -
-
-
- ); -}; - -export interface IProps { - run: string; - worker: string; - span: string; - expRun: string; - expWorker: string; - expSpan: string; -} - -export interface ColumnUnderlyingData { - name: string; - path: string; - leftAggs: any[]; - rightAggs: any[]; -} - -export interface TableRow { - key: number; - - operator: string; - baselineCalls?: number; - expCalls?: number; - deltaCalls?: number; - deltaCallsPercentNumber?: number; - deltaCallsPercent?: string; - - baselineHostDuration: number; - expHostDuration: number; - deltaHostDuration: number; - deltaHostDurationPercentNumber: number; - deltaHostDurationPercent: string; - - baselineSelfHostDuration: number; - expSelfHostDuration: number; - deltaSelfHostDuration: number; - deltaSelfHostDurationPercentNumber: number; - deltaSelfHostDurationPercent: string; - - baselineDeviceDuration: number; - expDeviceDuration: number; - deltaDeviceDuration: number; - deltaDeviceDurationPercentNumber: number; - deltaDeviceDurationPercent: string; - - baselineSelfDeviceDuration: number; - expSelfDeviceDuration: number; - deltaSelfDeviceDuration: number; - deltaSelfDeviceDurationPercentNumber: number; - deltaSelfDeviceDurationPercent: string; -} - -let columnChartDataStack: any[][] = []; -let stepChartDataStack: any[][] = []; -let columnUnderlyingDataStack: ColumnUnderlyingData[][] = []; -let columnTableDataSourceStack: TableRow[][] = []; - -export const DiffOverview: React.FC = (props: IProps) => { - // #region - Constant - const COMPOSITE_NODES_NAME = 'CompositeNodes'; - - const hostDurationColumns = [ - { - title: 'Baseline Host Duration (us)', - dataIndex: 'baselineHostDuration', - key: 'baselineHostDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aBaselineHost = a.baselineHostDuration ?? 0; - const bBaselineHost = b.baselineHostDuration ?? 0; - return aBaselineHost - bBaselineHost; - }, - }, - { - title: 'Exp Host Duration (us)', - dataIndex: 'expHostDuration', - key: 'expHostDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aExpHost = a.expHostDuration ?? 0; - const bExpHost = b.expHostDuration ?? 0; - return aExpHost - bExpHost; - }, - }, - { - title: 'Delta Host Duration (us)', - dataIndex: 'deltaHostDuration', - key: 'deltaHostDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaHost = a.deltaHostDuration ?? 0; - const bDeltaHost = b.deltaHostDuration ?? 0; - return aDeltaHost - bDeltaHost; - }, - }, - { - title: 'Delta Host Duration%', - dataIndex: 'deltaHostDurationPercent', - key: 'deltaHostDurationPercent', - sorter: (a: TableRow, b: TableRow): number => { - const aPercent = a.deltaHostDurationPercentNumber ?? 0; - const bPercent = b.deltaHostDurationPercentNumber ?? 0; - return aPercent - bPercent; - }, - }, - ]; - - const selfHostDurationColumns = [ - { - title: 'Baseline Self Host Duration (us)', - dataIndex: 'baselineSelfHostDuration', - key: 'baselineSelfHostDuration', - sorter: (a: TableRow, b: TableRow): number => a.baselineSelfHostDuration - b.baselineSelfHostDuration, - }, - { - title: 'Exp Self Host Duration (us)', - dataIndex: 'expSelfHostDuration', - key: 'expSelfHostDuration', - sorter: (a: TableRow, b: TableRow): number => a.expSelfHostDuration - b.expSelfHostDuration, - }, - { - title: 'Delta Self Host Duration (us)', - dataIndex: 'deltaSelfHostDuration', - key: 'deltaSelfHostDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaSelfHost = a.deltaSelfHostDuration ?? 0; - const bDeltaSelfHost = b.deltaSelfHostDuration ?? 0; - return aDeltaSelfHost - bDeltaSelfHost; - }, - }, - { - title: 'Delta Self Host Duration%', - dataIndex: 'deltaSelfHostDurationPercent', - key: 'deltaSelfHostDurationPercent', - sorter: (a: TableRow, b: TableRow): number => { - const aSelfPercent = a.deltaSelfHostDurationPercentNumber ?? 0; - const bSelfPercent = b.deltaSelfHostDurationPercentNumber ?? 0; - return aSelfPercent - bSelfPercent; - }, - }, - ]; - - const deviceDurationColumns = [ - { - title: 'Baseline Device Duration (us)', - dataIndex: 'baselineDeviceDuration', - key: 'baselineDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => a.baselineDeviceDuration - b.baselineDeviceDuration, - }, - { - title: 'Exp Device Duration (us)', - dataIndex: 'expDeviceDuration', - key: 'expDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => a.expDeviceDuration - b.expDeviceDuration, - }, - { - title: 'Delta Device Duration (us)', - dataIndex: 'deltaDeviceDuration', - key: 'deltaDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaDeviceDuration = a.deltaDeviceDuration ?? 0; - const bdeltaDeviceDuration = b.deltaDeviceDuration ?? 0; - return aDeltaDeviceDuration - bdeltaDeviceDuration; - }, - }, - { - title: 'Delta Device Duration%', - dataIndex: 'deltaDeviceDurationPercent', - key: 'deltaDeviceDurationPercent', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaDeviceDurationPercentNumber = a.deltaDeviceDurationPercentNumber ?? 0; - const bDeltaDeviceDurationPercentNumber = b.deltaDeviceDurationPercentNumber ?? 0; - return aDeltaDeviceDurationPercentNumber - bDeltaDeviceDurationPercentNumber; - }, - }, - ]; - - const selfDeviceDurationColumns = [ - { - title: 'Baseline Self Device Duration (us)', - dataIndex: 'baselineSelfDeviceDuration', - key: 'baselineSelfDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => a.baselineSelfDeviceDuration - b.baselineSelfDeviceDuration, - }, - { - title: 'Exp Self Device Duration (us)', - dataIndex: 'expSelfDeviceDuration', - key: 'expSelfDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => a.expSelfDeviceDuration - b.expSelfDeviceDuration, - }, - { - title: 'Delta Self Device Duration (us)', - dataIndex: 'deltaSelfDeviceDuration', - key: 'deltaSelfDeviceDuration', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaSelfDeviceDuration = a.deltaSelfDeviceDuration ?? 0; - const bDeltaSelfDeviceDuration = b.deltaSelfDeviceDuration ?? 0; - return aDeltaSelfDeviceDuration - bDeltaSelfDeviceDuration; - }, - }, - { - title: 'Delta Self Device Duration%', - dataIndex: 'deltaSelfDeviceDurationPercent', - key: 'deltaSelfDeviceDurationPercent', - sorter: (a: TableRow, b: TableRow): number => { - const aDeltaSelfDeviceDurationPercentNumber = a.deltaSelfDeviceDurationPercentNumber ?? 0; - const bDeltaSelfDeviceDurationPercentNumber = b.deltaSelfDeviceDurationPercentNumber ?? 0; - return aDeltaSelfDeviceDurationPercentNumber - bDeltaSelfDeviceDurationPercentNumber; - }, - }, - ]; - - interface IColumnMap { - [key: string]: any; - } - type IColumnMapType = IColumnMap; - - const tableSourceColumnMap: IColumnMapType = { - selfHostDuration: selfHostDurationColumns, - hostDuration: hostDurationColumns, - deviceDuration: deviceDurationColumns, - selfDeviceDuration: selfDeviceDurationColumns, - }; - - const baseTableColumns = [ - { - title: 'Operator', - dataIndex: 'operator', - key: 'operator', - sorter: (a: TableRow, b: TableRow) => a.operator.localeCompare(b.operator), - }, - { - title: 'Baseline Calls', - dataIndex: 'baselineCalls', - key: 'baselineCalls', - sorter: (a: TableRow, b: TableRow) => a.baselineCalls ?? 0 - (b.baselineCalls ?? 0), - }, - { - title: 'Exp Calls', - dataIndex: 'expCalls', - key: 'expCalls', - sorter: (a: TableRow, b: TableRow) => a.expCalls ?? 0 - (b.expCalls ?? 0), - }, - { - title: 'Delta Calls', - dataIndex: 'deltaCalls', - key: 'deltaCalls', - sorter: (a: TableRow, b: TableRow) => a.deltaCalls ?? 0 - (b.deltaCalls ?? 0), - }, - { - title: 'Delta Calls%', - dataIndex: 'deltaCallsPercent', - key: 'deltaCallsPercent', - sorter: (a: TableRow, b: TableRow) => a.deltaCallsPercentNumber ?? 0 - (b.deltaCallsPercentNumber ?? 0), - }, - ]; - - // #endregion - - // #region - State - const [tableDataSource, setTableDataSource] = React.useState([]); - const { run, worker, span, expRun, expWorker, expSpan } = props; - - const [columnUnderlyingData, setColumnUnderlyingData] = React.useState([]); - - const [rootUnderlyingData, setRootUnderlyingData] = React.useState(); - - const [columnChartData, setColumnChartData] = React.useState([]); - const [stepChartData, setStepChartData] = React.useState([]); - - const [selectedTableColumnsOptions, setSelectedTableColumnsOptions] = React.useState<[key: string]>(['hostDuration']); - const [selectedTableColumns, setSelectedTableColumns] = React.useState([ - ...baseTableColumns, - ...hostDurationColumns, - ]); - - const [dataStackLevel, setDataStackLevel] = React.useState(0); - const [loading, setLoading] = React.useState(false); - - // #endregion - const classes = useStyles(); - - // #region - Event Handler - const handleChartColumnSelect = (row: number, column: number): void => { - if (columnUnderlyingData.length === 0) { - return; - } - - let selectedUnderlyingData = columnUnderlyingData[row]; - if (!selectedUnderlyingData) { - return; - } - - let tableDataSource1 = generateDataSourceFromUnderlyingData(selectedUnderlyingData); - setTableDataSource(tableDataSource1); - columnTableDataSourceStack.push(tableDataSource1); - - setLoading(true); - - api.defaultApi - .diffnodeGet(run, worker, span, expRun, expWorker, expSpan, selectedUnderlyingData.path) - .then((resp) => handleDiffNodeResp(resp)) - .finally(() => setLoading(false)); - }; - - const handleGoBack = (): void => { - if (columnChartDataStack.length > 1) { - columnChartDataStack.pop(); - let top = columnChartDataStack[columnChartDataStack.length - 1]; - setColumnChartData(top); - } - - if (stepChartDataStack.length > 1) { - stepChartDataStack.pop(); - let top = stepChartDataStack[stepChartDataStack.length - 1]; - setStepChartData(top); - } - - if (columnUnderlyingDataStack.length > 0) { - columnUnderlyingDataStack.pop(); - let top = columnUnderlyingDataStack[columnUnderlyingDataStack.length - 1]; - setColumnUnderlyingData(top); - } - - if (columnTableDataSourceStack.length > 0) { - columnTableDataSourceStack.pop(); - let top = columnTableDataSourceStack[columnTableDataSourceStack.length - 1]; - - if (top) { - setTableDataSource(top); - } else { - let tableDataSource2 = generateDataSourceFromUnderlyingData(rootUnderlyingData); - setTableDataSource(tableDataSource2); - } - } - - setDataStackLevel(dataStackLevel - 1); - }; - - const toPercentString = (percentNumber: number): string => { - if (isNaN(percentNumber)) { - return 'N/A'; - } - - return `${percentNumber.toFixed(2)}%`; - }; - - const handleColumnSelectionChange = (value: [key: string]): void => { - let columns = value.map((x) => tableSourceColumnMap[x]).flat(); - let r = [...baseTableColumns, ...columns]; - setSelectedTableColumnsOptions(value); - setSelectedTableColumns(r); - }; - - const generateDataSourceFromUnderlyingData = (selectedUnderlyingData?: ColumnUnderlyingData): TableRow[] => { - if (!selectedUnderlyingData) { - return []; - } - let newTableDataSource: TableRow[] = []; - - for (let i = 0; i < selectedUnderlyingData.leftAggs.length; i++) { - let left = selectedUnderlyingData.leftAggs[i]; - let right = selectedUnderlyingData.rightAggs[i]; - - let deltaCallsPercentNumber = ((right.calls - left.calls) / left.calls) * 100; - - let deltaHostDurationPercentNumber = ((right.host_duration - left.host_duration) / left.host_duration) * 100; - - let deltaSelfHostDurationPercentNumber = - ((right.self_host_duration - left.self_host_duration) / left.self_host_duration) * 100; - - let deltaDeviceDurationPercentNumber = - ((right.device_duration - left.device_duration) / left.device_duration) * 100; - - let deltaSelfDeviceDurationPercentNumber = - ((right.self_device_duration - left.self_device_duration) / left.self_device_duration) * 100; - - newTableDataSource.push({ - key: i, - operator: left.name, - baselineCalls: left.calls, - expCalls: right.calls, - deltaCalls: right.calls - left.calls, - deltaCallsPercentNumber: deltaCallsPercentNumber, - deltaCallsPercent: toPercentString(deltaCallsPercentNumber), - - baselineHostDuration: left.host_duration, - expHostDuration: right.host_duration, - deltaHostDuration: parseFloat((right.host_duration - left.host_duration).toFixed(3)), - deltaHostDurationPercentNumber: deltaHostDurationPercentNumber, - deltaHostDurationPercent: toPercentString(deltaHostDurationPercentNumber), - - baselineSelfHostDuration: left.self_host_duration, - expSelfHostDuration: right.self_host_duration, - deltaSelfHostDuration: parseFloat((right.self_host_duration - left.self_host_duration).toFixed(3)), - deltaSelfHostDurationPercentNumber: deltaSelfHostDurationPercentNumber, - deltaSelfHostDurationPercent: toPercentString(deltaSelfHostDurationPercentNumber), - - baselineDeviceDuration: left.device_duration, - expDeviceDuration: right.device_duration, - deltaDeviceDuration: parseFloat((right.device_duration - left.device_duration).toFixed(3)), - deltaDeviceDurationPercentNumber: deltaDeviceDurationPercentNumber, - deltaDeviceDurationPercent: toPercentString(deltaDeviceDurationPercentNumber), - - baselineSelfDeviceDuration: left.self_device_duration, - expSelfDeviceDuration: right.self_device_duration, - deltaSelfDeviceDuration: parseFloat((right.self_device_duration - left.self_device_duration).toFixed(3)), - deltaSelfDeviceDurationPercentNumber: deltaSelfDeviceDurationPercentNumber, - deltaSelfDeviceDurationPercent: toPercentString(deltaSelfDeviceDurationPercentNumber), - }); - } - - return newTableDataSource; - }; - - React.useEffect(() => { - const hasData = - run.length > 0 && - worker.length > 0 && - span.length > 0 && - expRun.length > 0 && - expWorker.length > 0 && - expSpan.length > 0; - if (hasData) { - setLoading(true); - - columnChartDataStack = []; - stepChartDataStack = []; - columnUnderlyingDataStack = []; - columnTableDataSourceStack = []; - - api.defaultApi - .diffnodeGet(run, worker, span, expRun, expWorker, expSpan) - .then((resp) => { - handleDiffNodeResp(resp); - let newRootUnderlyingData = { - name: 'rootNode', - path: resp.path, - leftAggs: resp.left.aggs, - rightAggs: resp.right.aggs, - }; - - setRootUnderlyingData(newRootUnderlyingData); - let tableDataSource3 = generateDataSourceFromUnderlyingData(newRootUnderlyingData); - setTableDataSource(tableDataSource3); - }) - .finally(() => setLoading(false)); - - setSelectedTableColumns([...baseTableColumns, ...hostDurationColumns]); - } - }, [run, worker, span, expRun, expWorker, expSpan]); - - const handleDiffNodeResp = (resp: any): void => { - let newColumnChartData: any[] = []; - let newStepChartData: any[] = []; - let underlyingData: ColumnUnderlyingData[] = []; - - newColumnChartData.push(['Call', 'Baseline', 'Experiment', 'Baseline Trend', 'Exp Trend']); - newStepChartData.push(['Call', 'Diff', 'Accumulated Diff']); - - if (resp.children.length > 0) { - let accumulatedLeftDuration = 0; - let accumulatedRightDuration = 0; - let accumulatedStepDiff = 0; - for (let i = 0; i < resp.children.length; i++) { - let left = resp.children[i].left; - let right = resp.children[i].right; - let currColumn: any[] = []; - let currStep: any[] = []; - - let name = left.name; - if (name === COMPOSITE_NODES_NAME) { - continue; - } - - if (name.startsWith('aten::')) { - // Ignore aten operators - continue; - } - - if (name.startsWith('enumerate(DataLoader)')) { - name = name.substring(21); - } - - if (name.startsWith('enumerate(DataPipe)')) { - name = name.substring(19); - } - - if (name.startsWith('nn.Module: ')) { - name = name.substring(11); - } - - if (name.startsWith('Optimizer.zero_grad')) { - name = 'Optimizer.zero_grad'; - } - - if (name.startsWith('Optimizer.step')) { - name = 'Optimizer.step'; - } - - currColumn.push(name); - currColumn.push(left.total_duration); - currColumn.push(right.total_duration); - - accumulatedLeftDuration += left.total_duration; - currColumn.push(accumulatedLeftDuration); - - accumulatedRightDuration += right.total_duration; - currColumn.push(accumulatedRightDuration); - newColumnChartData.push(currColumn); - - underlyingData.push({ - name: name, - path: resp.children[i].path, - leftAggs: left.aggs, - rightAggs: right.aggs, - }); - - currStep.push(name); - let stepDiff = right.total_duration - left.total_duration; - currStep.push(stepDiff); - - accumulatedStepDiff += stepDiff; - currStep.push(accumulatedStepDiff); - - newStepChartData.push(currStep); - } - } else { - let left = resp.left; - let right = resp.right; - let currColumn: any[] = []; - let currStep: any[] = []; - let name = left.name; - - if (name.startsWith('nn.Module: ')) { - name = name.substring(11); - } - - currColumn.push(name); - currColumn.push(left.total_duration); - currColumn.push(right.total_duration); - currColumn.push(left.total_duration); - currColumn.push(right.total_duration); - - newColumnChartData.push(currColumn); - - currStep.push(name); - let stepDiff = right.total_duration - left.total_duration; - currStep.push(stepDiff); - currStep.push(stepDiff); - newStepChartData.push(currStep); - } - - setColumnChartData(newColumnChartData); - columnChartDataStack.push(newColumnChartData); - - setStepChartData(newStepChartData); - stepChartDataStack.push(newStepChartData); - - setColumnUnderlyingData(underlyingData); - columnUnderlyingDataStack.push(underlyingData); - - setDataStackLevel(columnChartDataStack.length); - }; // #endregion - - if (!loading && columnUnderlyingDataStack.length === 0) { - return ( - - - - There is no run selected for diff. - - - ); - } - - if (loading) { - return ; - } - - return ( -
- - - - - - - - {columnChartData.length > 1 && ( - <> - - - - )} - {columnChartData.length === 1 && No more level to show.} - - - - - - - - - - -   - - - - - - - - ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DistributedView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DistributedView.tsx deleted file mode 100644 index 096501b61bc9ce41978c65dc24f6b3640ab960f3..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/DistributedView.tsx +++ /dev/null @@ -1,313 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import Grid from '@material-ui/core/Grid'; -import InputLabel from '@material-ui/core/InputLabel'; -import MenuItem from '@material-ui/core/MenuItem'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import { makeStyles } from '@material-ui/core/styles'; -import { Table } from 'antd'; -import { ColumnsType } from 'antd/es/table'; -import * as React from 'react'; -import * as api from '../api'; -import { DistributedGraph, GpuInfo, Graph } from '../api'; -import { firstOrUndefined } from '../utils'; -import { ColumnChart } from './charts/ColumnChart'; -import { DataLoading } from './DataLoading'; -import { GpuInfoTable } from './GpuInfoTable'; -import { makeChartHeaderRenderer, useTooltipCommonStyles } from './helpers'; -import { - distributedCommopsTableTooltip, - distributedGpuInfoTableTooltip, - distributedOverlapGraphTooltip, - distributedWaittimeGraphTooltip, -} from './TooltipDescriptions'; - -export interface IProps { - run: string; - worker: string; - span: string; -} - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - verticalInput: { - display: 'flex', - alignItems: 'center', - }, - inputWidth: { - width: '4em', - }, - inputWidthOverflow: { - minWidth: '15em', - whiteSpace: 'nowrap', - }, - description: { - marginLeft: theme.spacing(1), - }, - table: { - height: '100%', - border: '1px solid #efefef', - '& .ant-table-tbody > tr': { - height: 20, - fontSize: '10pt', - '& > td': { - padding: '0 8px!important', - }, - }, - }, -})); - -export const DistributedView: React.FC = (props) => { - const tooltipCommonClasses = useTooltipCommonStyles(); - const chartHeaderRenderer = React.useMemo( - () => makeChartHeaderRenderer(tooltipCommonClasses), - [tooltipCommonClasses] - ); - - let { run, worker, span } = props; - const classes = useStyles(); - - const [overlapGraph, setOverlapGraph] = React.useState(undefined); - const [waittimeGraph, setWaittimeGraph] = React.useState(undefined); - const [commopsTableData, setCommopsTableData] = React.useState(undefined); - const [gpuInfo, setGpuInfo] = React.useState(undefined); - const [commopsTableTitle, setCommopsTableTitle] = React.useState(''); - const [commopsWorkers, setCommopsWorkers] = React.useState([]); - const [overlapSteps, setOverlapSteps] = React.useState([]); - const [waittimeSteps, setWaittimeSteps] = React.useState([]); - const [overlapStep, setOverlapStep] = React.useState(''); - const [waittimeStep, setWaittimeStep] = React.useState(''); - const [commopsWorker, setCommopsWorker] = React.useState(''); - const [columns, setColumns] = React.useState>([]); - const [pageSize, setPageSize] = React.useState(30); - - React.useEffect(() => { - if (waittimeSteps.includes('all')) { - setWaittimeStep('all'); - } else { - setWaittimeStep(firstOrUndefined(waittimeSteps) ?? ''); - } - }, [waittimeSteps]); - - React.useEffect(() => { - if (overlapSteps.includes('all')) { - setOverlapStep('all'); - } else { - setOverlapStep(firstOrUndefined(overlapSteps) ?? ''); - } - }, [overlapSteps]); - - React.useEffect(() => { - setCommopsWorker(firstOrUndefined(commopsWorkers) ?? ''); - }, [commopsWorkers]); - - React.useEffect(() => { - api.defaultApi.distributedOverlapGet(run, 'All', span).then((resp) => { - setOverlapGraph(resp); - setOverlapSteps(Object.keys(resp.data)); - }); - api.defaultApi.distributedWaittimeGet(run, 'All', span).then((resp) => { - setWaittimeGraph(resp); - setWaittimeSteps(Object.keys(resp.data)); - }); - api.defaultApi.distributedCommopsGet(run, 'All', span).then((resp) => { - setCommopsTableData(resp.data); - setCommopsWorkers(Object.keys(resp.data)); - setCommopsTableTitle(resp.metadata.title); - }); - api.defaultApi.distributedGpuinfoGet(run, 'All', span).then((resp) => { - setGpuInfo(resp); - }); - }, [run, worker, span]); - - const onCommopsWorkerChanged: SelectProps['onChange'] = (event) => { - setCommopsWorker(event.target.value as string); - }; - - const onOverlapStepChanged: SelectProps['onChange'] = (event) => { - setOverlapStep(event.target.value as string); - }; - - const onWaittimeStepChanged: SelectProps['onChange'] = (event) => { - setWaittimeStep(event.target.value as string); - }; - - const getColumnChartData = (distributedGraph?: DistributedGraph, step?: string): any => { - if (!distributedGraph || !step) { - return undefined; - } - const barLabels = Object.keys(distributedGraph.data[step]); - return { - legends: distributedGraph.metadata.legends, - barLabels, - barHeights: barLabels.map((label) => distributedGraph.data[step][label]), - }; - }; - const overlapData = React.useMemo(() => getColumnChartData(overlapGraph, overlapStep), [overlapGraph, overlapStep]); - const waittimeData = React.useMemo( - () => getColumnChartData(waittimeGraph, waittimeStep), - [waittimeGraph, waittimeStep] - ); - - const getTableData = (tableData?: any, opsWorker?: string): any[] => { - if (!tableData || !opsWorker) { - return []; - } - let dataInfo: api.Graph = tableData[opsWorker]; - const stringCompare = (a: string, b: string): number => a.localeCompare(b); - const numberCompare = (a: number, b: number): number => a - b; - let column: any[] = dataInfo.columns.map((item) => { - return { - title: item.name, - key: item.name, - dataIndex: item.name, - sorter: - item.type === 'string' - ? (a: any, b: any): number => stringCompare(a[item.name], b[item.name]) - : (a: any, b: any): number => numberCompare(a[item.name], b[item.name]), - }; - }); - setColumns(column); - return dataInfo.rows.map((row, index) => { - if (row.length !== dataInfo.columns.length) { - return null; - } - const dataRow: { [column: string]: number | string } = { key: index }; - dataInfo.columns.forEach((item, idx) => { - dataRow[item.name] = row[idx] as string | number; - }); - return dataRow; - }); - }; - const commopsTable: any[] = React.useMemo(() => { - return getTableData(commopsTableData, commopsWorker); - }, [commopsTableData, commopsWorker]); - - const onShowSizeChange = (current: number, size: number): void => { - setPageSize(size); - }; - - return ( -
- - - - - {gpuInfo && ( - - - - - - - - - )} - - - {(chartData): JSX.Element => ( - - - - - Step - - - - - - - {overlapGraph?.metadata?.title && ( - - )} - - - )} - - - - - {(chartData): JSX.Element => ( - - - - - Step - - - - - - - {waittimeGraph?.metadata?.title && ( - - )} - - - )} - - - - - - - - - - Worker - - - - - - - - -
- - - - - - - - ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/FullCircularProgress.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/FullCircularProgress.tsx deleted file mode 100644 index 3f4c0fbaf15a15d402aa205574a28df045d24aec..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/FullCircularProgress.tsx +++ /dev/null @@ -1,23 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ -import CircularProgress from '@material-ui/core/CircularProgress'; -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; - -const useStyles = makeStyles(() => ({ - root: { - width: '100%', - display: 'flex', - justifyContent: 'center', - }, -})); - -export const FullCircularProgress: React.FC = () => { - const classes = useStyles(); - return ( -
- -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/GpuInfoTable.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/GpuInfoTable.tsx deleted file mode 100644 index 07f6f1d78c88abab5f62f844356b47ca517a2561..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/GpuInfoTable.tsx +++ /dev/null @@ -1,130 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; - -export interface IProps { - gpuInfo: any; -} - -const useStyles = makeStyles((theme) => ({ - root: { - border: '1px solid #E0E0E0', - borderCollapse: 'collapse', - width: '100%', - }, - td: { - borderTop: '1px solid #E0E0E0', - borderBottom: '1px solid #E0E0E0', - borderCollapse: 'collapse', - paddingLeft: 10, - paddingRight: 10, - }, - nodeTd: { - fontWeight: 'bold', - }, - pidTd: { - fontWeight: 'normal', - }, - gpuTd: { - fontWeight: 'normal', - }, - keyTd: { - fontWeight: 'normal', - textAlign: 'right', - }, - valueTd: { - fontWeight: 'bold', - }, -})); - -interface TableCellInfo { - content: string; - rowspan: number; - cellType: 'node' | 'pid' | 'gpu' | 'key' | 'value'; - last?: boolean; -} - -function makeTableCellInfo(gpuInfo: any): TableCellInfo[][] { - const rows: TableCellInfo[][] = []; - let currRow: TableCellInfo[] = []; - rows.push(currRow); - Object.keys(gpuInfo.data).forEach((nodeName) => { - const nodeCell = { - content: nodeName, - rowspan: 0, - cellType: 'node' as const, - }; - const i = rows.length; - currRow.push(nodeCell); - Object.keys(gpuInfo.data[nodeName]).forEach((pid) => { - const pidCell = { content: pid, rowspan: 0, cellType: 'pid' as const }; - const j = rows.length; - currRow.push(pidCell); - Object.keys(gpuInfo.data[nodeName][pid]).forEach((gpu) => { - const gpuCell = { content: gpu, rowspan: 0, cellType: 'gpu' as const }; - const k = rows.length; - currRow.push(gpuCell); - Object.keys(gpuInfo.data[nodeName][pid][gpu]).forEach((keyName) => { - currRow.push({ - content: keyName, - rowspan: 1, - cellType: 'key' as const, - }); - const value: string = gpuInfo.data[nodeName][pid][gpu][keyName]; - currRow.push({ - content: value, - rowspan: 1, - cellType: 'value' as const, - }); - currRow = []; - rows.push(currRow); - }); - gpuCell.rowspan = rows.length - k; - }); - pidCell.rowspan = rows.length - j; - }); - nodeCell.rowspan = rows.length - i; - }); - rows.pop(); - return rows; -} - -export const GpuInfoTable: React.FC = (props) => { - const classes = useStyles(); - interface TableCellInfoNoLast { - content: string; - rowspan: number; - cellType: 'node' | 'pid' | 'gpu' | 'key' | 'value'; - } - - const rows = React.useMemo(() => makeTableCellInfo(props.gpuInfo), [props.gpuInfo]); - - const cellToClass = { - node: classes.nodeTd, - pid: classes.pidTd, - gpu: classes.gpuTd, - key: classes.keyTd, - value: classes.valueTd, - }; - - const renderCell = function (info: TableCellInfoNoLast): JSX.Element { - let cellClass = cellToClass[info.cellType]; - let content = info.cellType === 'key' ? `${info.content}:` : info.content; - return ( - - ); - }; - - return ( -
- {content} -
- {rows.map((row) => ( - {row.map(renderCell)} - ))} -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Kernel.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Kernel.tsx deleted file mode 100644 index 66e05695153a853f68d382a2f3b6a68931861abf..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Kernel.tsx +++ /dev/null @@ -1,301 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import FormControlLabel from '@material-ui/core/FormControlLabel'; -import Grid from '@material-ui/core/Grid'; -import InputLabel from '@material-ui/core/InputLabel'; -import MenuItem from '@material-ui/core/MenuItem'; -import Radio from '@material-ui/core/Radio'; -import RadioGroup, { RadioGroupProps } from '@material-ui/core/RadioGroup'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import { makeStyles } from '@material-ui/core/styles'; -import TextField, { StandardTextFieldProps, TextFieldProps } from '@material-ui/core/TextField'; -import * as React from 'react'; -import * as api from '../api'; -import { Graph } from '../api'; -import { KernelGroupBy } from '../constants/groupBy'; -import { useSearch } from '../utils/search'; -import { topIsValid, UseTop, useTopN } from '../utils/top'; -import { AntTableChart } from './charts/AntTableChart'; -import { PieChart } from './charts/PieChart'; -import { DataLoading } from './DataLoading'; -import { makeChartHeaderRenderer, useTooltipCommonStyles } from './helpers'; -import { - gpuKernelTotalTimeTooltip, - tensorCoresPieChartTooltip, - tensorCoresPieChartTooltipAscend, -} from './TooltipDescriptions'; - -export interface IProps { - run: string; - worker: string; - span: string; - deviceTarget: string; -} - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - verticalInput: { - display: 'flex', - alignItems: 'center', - }, - inputWidth: { - width: '4em', - }, - inputWidthOverflow: { - minWidth: '15em', - whiteSpace: 'nowrap', - }, - description: { - marginLeft: theme.spacing(1), - }, -})); - -export const Kernel: React.FC = (props) => { - const { run, worker, span, deviceTarget } = props; - const classes = useStyles(); - const tooltipCommonClasses = useTooltipCommonStyles(); - const chartHeaderRenderer = React.useMemo( - () => makeChartHeaderRenderer(tooltipCommonClasses), - [tooltipCommonClasses] - ); - - const [kernelGraph, setKernelGraph] = React.useState(undefined); - const [tcGraph, setTcGraph] = React.useState(undefined); - const [kernelTable, setKernelTable] = React.useState(undefined); - const [groupBy, setGroupBy] = React.useState(KernelGroupBy.KERNEL); - const [searchKernelName, setSearchKernelName] = React.useState(''); - const [searchOpName, setSearchOpName] = React.useState(''); - const [sortColumn, setSortColumn] = React.useState(''); - const [hasStep, setHasStep] = React.useState(false); - - const [topText, actualTop, useTop, setTopText, setUseTop] = useTopN({ - defaultUseTop: UseTop.USE, - defaultTop: 10, - }); - - React.useEffect(() => { - setSearchOpName(''); - }, [groupBy]); - - React.useEffect(() => { - if (kernelGraph) { - setTopText(String(Math.min(kernelGraph.rows?.length, 10))); - } - }, [kernelGraph]); - - React.useEffect(() => { - api.defaultApi.kernelTableGet(run, worker, span, groupBy).then((resp) => { - setSortColumn(resp.metadata.sort); - setKernelTable(resp.data); - const nameColumnIdx = resp.data.columns.findIndex((c) => c.name.toLowerCase() === 'step id'); - setHasStep(nameColumnIdx > -1); - }); - }, [run, worker, span, groupBy]); - - React.useEffect(() => { - api.defaultApi.kernelGet(run, worker, span, KernelGroupBy.KERNEL).then((resp) => { - setKernelGraph(resp.total); - setGroupBy(resp.device_target === 'Ascend' ? KernelGroupBy.KERNEL_NAME_AND_OP_NAME : KernelGroupBy.KERNEL); - }); - }, [run, worker, span]); - - React.useEffect(() => { - api.defaultApi.kernelTcPieGet(run, worker, span).then((resp) => { - setTcGraph(resp.total); - }); - }, [run, worker, span]); - - const [searchedKernelTable] = useSearch(searchKernelName, 'name', kernelTable); - const [searchedOpTable] = useSearch( - searchOpName, - deviceTarget === 'Ascend' ? 'step id' : 'operator', - searchedKernelTable - ); - - const onGroupByChanged: SelectProps['onChange'] = (event) => { - setGroupBy(event.target.value as KernelGroupBy); - }; - - const onSearchKernelChanged: TextFieldProps['onChange'] = (event) => { - setSearchKernelName(event.target.value as string); - }; - - const onSearchOpChanged: TextFieldProps['onChange'] = (event) => { - setSearchOpName(event.target.value as string); - }; - - const onUseTopChanged: RadioGroupProps['onChange'] = (event) => { - setUseTop(event.target.value as UseTop); - }; - - const onTopChanged = (event: React.ChangeEvent): void => { - setTopText(event.target.value); - }; - - const inputProps: StandardTextFieldProps['inputProps'] = { - min: 1, - }; - - const GPUKernelTotalTimeTitle = React.useMemo( - () => chartHeaderRenderer('Total Time (us)', gpuKernelTotalTimeTooltip), - [chartHeaderRenderer] - ); - - const TensorCoresTitle = React.useMemo( - () => - deviceTarget === 'Ascend' - ? chartHeaderRenderer('Accelerator Core Utilization', tensorCoresPieChartTooltipAscend) - : chartHeaderRenderer('Tensor Cores Utilization', tensorCoresPieChartTooltip), - [chartHeaderRenderer, deviceTarget] - ); - - return ( -
- - - - - - - - } label='All kernels' /> - } label='Top kernels to show' /> - - - {useTop === UseTop.USE && ( - - - - )} - - - - {(graph): JSX.Element => ( - - - - - )} - - - - - {(graph): JSX.Element => ( - - - - - )} - - - - - - - Group By - - - - - - - - {deviceTarget === 'Ascend' - ? groupBy === KernelGroupBy.KERNEL && - hasStep && ( - - - - ) - : groupBy === KernelGroupBy.KERNEL_NAME_AND_OP_NAME && ( - - - - )} - - - - - - {(graph): JSX.Element => } - - - - - - - -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx deleted file mode 100644 index 225f28a931e969d7cfd40d3f490e7cb45c64a305..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/MemoryView.tsx +++ /dev/null @@ -1,531 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import Grid from '@material-ui/core/Grid'; -import InputLabel from '@material-ui/core/InputLabel'; -import MenuItem from '@material-ui/core/MenuItem'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import Slider from '@material-ui/core/Slider'; -import { makeStyles } from '@material-ui/core/styles'; -import TextField, { TextFieldProps } from '@material-ui/core/TextField'; -import * as React from 'react'; -import * as api from '../api'; -import { - Graph, - GraphAscend, - MemoryCurveDataAll, - MemoryCurveData, - MemoryCurveDataAscend, - MemoryEventsData, - MemoryEventsDataAll, - MemoryStatsData, -} from '../api'; -import { useSearchDirectly } from '../utils/search'; -import { AntTableChart } from './charts/AntTableChart'; -import { LineChart } from './charts/NewLineChart'; -import { DataLoading } from './DataLoading'; -import { MemoryStatsTable } from './tables/MemoryStatsTable'; - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - curve: { - marginBottom: 20, - }, - verticalInput: { - display: 'flex', - alignItems: 'center', - }, - inputWidth: { - width: '4em', - }, - inputWidthOverflow: { - minWidth: '15em', - whiteSpace: 'nowrap', - }, - full: { - width: '100%', - }, - description: { - marginLeft: theme.spacing(1), - }, - filterSlider: { - marginTop: 15, - marginRight: 6, - width: 250, - }, - filterInput: { - width: 100, - }, -})); - -export interface IProps { - run: string; - worker: string; - span: string; - deviceTarget: string; -} - -const tags = ['Operator', 'Component']; - -export const MemoryView: React.FC = React.memo((props) => { - interface EventSizeFilter { - [deviceName: string]: Array; - } - - interface MaxEventSize { - [deviceName: string]: number; - } - - const { run, worker, span, deviceTarget } = props; - const classes = useStyles(); - - const [memoryStatsData, setMemoryStatsData] = React.useState(undefined); - - // for backward compatability, old profile do not have events to show - const showEvents = (): boolean | undefined => { - return memoryEventsData && Object.keys(memoryEventsData.rows).length !== 0; - }; - const [memoryEventsData, setMemoryEventsData] = React.useState(undefined); - - // for backward compatability, old profile do not have curve to show - const showCurve = (): boolean | undefined => { - return memoryCurveData && Object.keys(memoryCurveData.rows).length !== 0; - }; - const [memoryCurveData, setMemoryCurveData] = React.useState( - undefined - ); - - const [lineChartData, setLineChartData] = React.useState(undefined); - - const [devices, setDevices] = React.useState([]); - const [device, setDevice] = React.useState(''); - const [tag, setTag] = React.useState('Operator'); - const memoryCurveDataAllRef = React.useRef(undefined); - const memoryEventDataAllRef = React.useRef(undefined); - - interface SelectedRange { - start: number; - end: number; - startTs: number; - endTs: number; - } - const [selectedRange, setSelectedRange] = React.useState(); - const [searchOperatorName, setSearchOperatorName] = React.useState(''); - const [searchEventOperatorName, setSearchEventOperatorName] = React.useState(''); - const [filterEventSize, setFilterEventSize] = React.useState({}); - const [maxSize, setMaxSize] = React.useState({}); - - const getSearchIndex = function (): number { - if (!memoryStatsData) { - return -1; - } - for (let i = 0; i < memoryStatsData.columns.length; i++) { - if (memoryStatsData.columns[i].name === memoryStatsData.metadata.search) { - return i; - } - } - return -1; - }; - - const getStep = (size: number, indexBias: number): number => { - return 10 ** (Math.floor(Math.log10(size !== 0 ? size : 1)) - indexBias); - }; - - const filterByEventSize = (rows: T[] | undefined, size: Array): T[] | undefined => { - const result = React.useMemo(() => { - if (!rows) { - return undefined; - } - - // workaround type system - const field = (row: any): number => { - const sizeColIndex = 1; - return row[sizeColIndex]; - }; - - return rows.filter((row) => { - return field(row) >= size[0] && field(row) <= size[1]; - }); - }, [rows, size]); - - return result; - }; - - const searchIndex = getSearchIndex(); - const getName = React.useCallback((row: any) => row[searchIndex], [searchIndex]); - const getNameAscend = (row: any): any => row[0]; - const [searchedTableDataRows] = useSearchDirectly(searchOperatorName, getName, memoryStatsData?.rows[device] ?? []); - const [searchedEventsTableDataRows] = useSearchDirectly( - searchEventOperatorName, - deviceTarget === 'Ascend' ? getNameAscend : getName, - filterByEventSize(memoryEventsData?.rows[device], filterEventSize[device] ?? [0, Infinity]) ?? [] - ); - - const onSearchOperatorChanged: TextFieldProps['onChange'] = (event) => { - setSearchOperatorName(event.target.value as string); - }; - - const onSearchEventOperatorChanged: TextFieldProps['onChange'] = (event) => { - setSearchEventOperatorName(event.target.value as string); - }; - - const [selectedRecord, setSelectedRecord] = React.useState(); - const onRowSelected = (record?: object, rowIndex?: number): void => { - setSelectedRecord(record); - }; - - const onFilterEventSizeChanged = (event: any, newValue: number | number[]): void => { - setFilterEventSize({ - ...filterEventSize, - [device]: newValue as number[], - }); - }; - - const onFilterEventMinSizeInputChanged = (event: React.ChangeEvent): void => { - setFilterEventSize({ - ...filterEventSize, - [device]: [Number(event.target.value), filterEventSize[device][1]], - }); - }; - - const onFilterEventMaxSizeInputChanged = (event: React.ChangeEvent): void => { - setFilterEventSize({ - ...filterEventSize, - [device]: [filterEventSize[device][0], Number(event.target.value)], - }); - }; - - React.useEffect(() => { - if (deviceTarget !== 'Ascend') { - api.defaultApi.memoryGet(run, worker, span, selectedRange?.startTs, selectedRange?.endTs).then((resp) => { - setMemoryStatsData(resp); - if (!devices || devices.length === 0) { - // setDevices only execute on view load. Since selection on curve - // might filter all events later, some devices might is missing. - setDevices(Object.keys(resp.rows)); - setDevice(resp.metadata.default_device); - } - }); - } - }, [run, worker, span, selectedRange]); - - React.useEffect(() => { - api.defaultApi.memoryEventsGet(run, worker, span, selectedRange?.startTs, selectedRange?.endTs).then((resp) => { - const tempRes = deviceTarget === 'Ascend' ? (resp as MemoryEventsDataAll).operator : (resp as MemoryEventsData); - if (deviceTarget === 'Ascend') { - memoryEventDataAllRef.current = resp as MemoryEventsDataAll; - } - let curMaxSize: MaxEventSize = {}; - let curFilterEventSize: EventSizeFilter = {}; - Object.keys(tempRes.rows).forEach((deviceName) => { - curMaxSize[deviceName] = 0; - for (let i = 0; i < tempRes.rows[deviceName].length; i++) { - curMaxSize[deviceName] = Math.max(curMaxSize[deviceName], tempRes.rows[deviceName][i][1]); - } - curFilterEventSize[deviceName] = [curMaxSize[deviceName] / 4, curMaxSize[deviceName]]; - curMaxSize[deviceName] = curMaxSize[deviceName]; - }); - setMaxSize(curMaxSize); - setFilterEventSize(curFilterEventSize); - setMemoryEventsData(tempRes); - }); - }, [run, worker, span, selectedRange]); - - React.useEffect(() => { - api.defaultApi.memoryCurveGet(run, worker, span).then((resp) => { - // Reset the select range to null whenever run/worker/span changes - setSelectedRange(undefined); - if (deviceTarget === 'Ascend') { - const allCurveData = resp as MemoryCurveDataAll; - memoryCurveDataAllRef.current = allCurveData; - setDevice(allCurveData.default_device); - setDevices(allCurveData.devices); - setMemoryCurveData(allCurveData.total); - setTag('Operator'); - } else { - setMemoryCurveData(resp as MemoryCurveData); - } - }); - }, [run, worker, span]); - - React.useEffect(() => { - if (memoryCurveData !== undefined) { - if (deviceTarget === 'Ascend') { - setLineChartData({ - title: memoryCurveData.metadata.peaks[device] ?? '', - columns: memoryCurveData.columns[device] ?? [], - rows: memoryCurveData.rows[device] ?? {}, - }); - } else { - setLineChartData({ - title: memoryCurveData.metadata.peaks[device], - columns: memoryCurveData.columns, - rows: memoryCurveData.rows[device] ?? [], - }); - } - } - }, [memoryCurveData, device]); - - const onDeviceChanged: SelectProps['onChange'] = (event) => { - setDevice(event.target.value as string); - setSelectedRange(undefined); - }; - - const onTagChanged: SelectProps['onChange'] = (event) => { - setTag(event.target.value as string); - if (event.target.value === 'Operator') { - setMemoryCurveData(memoryCurveDataAllRef.current?.total); - setMemoryEventsData(memoryEventDataAllRef.current?.operator); - setSelectedRange(undefined); - } else { - setMemoryCurveData(memoryCurveDataAllRef.current?.ptaGe); - setMemoryEventsData(memoryEventDataAllRef.current?.component); - } - }; - - const onSelectedRangeChanged = (start: number, end: number): void => { - if (start > end) { - setSelectedRange(undefined); - return; - } - - let allDatas = deviceTarget === 'Ascend' ? memoryCurveData?.rows[device]?.Allocated : memoryCurveData?.rows[device]; - if (allDatas.length <= 1) { - setSelectedRange(undefined); - return; - } - - let startTs = 0; - let endTs = 0; - let realStart = 0; - let realEnd = 0; - let startId = 1; - let endId = 0; - let needLoopStart = true; - for (let i = 1; i < allDatas.length; i++) { - if (startId > start && needLoopStart) { - needLoopStart = false; - realStart = i - 1; - } - if (allDatas[i][0] !== allDatas[i - 1][0]) { - if (startId <= start) { - startId += 1; - } - endId += 1; - } - if (endId > end) { - realEnd = i - 1; - break; - } else { - realEnd = i; - if (needLoopStart) { - realStart = i; - } - } - } - - if (deviceTarget === 'Ascend') { - startTs = allDatas[realStart][0]; - endTs = allDatas[realEnd][0]; - } else { - let bias = memoryCurveData?.metadata.first_ts ?? 0; - let scale = 1 / (memoryCurveData?.metadata.time_factor ?? 1); - startTs = Math.round((allDatas[realStart][0] * scale) + bias); - endTs = Math.round((allDatas[realEnd][0] * scale) + bias); - } - - setSelectedRange({ start, end, startTs, endTs }); - }; - - return ( -
- - - - - - - {(graph): JSX.Element => ( - - - - Device - - - {deviceTarget === 'Ascend' && ( - - Group By - - - )} - - {showCurve() && lineChartData && lineChartData.columns.length > 0 && ( - -
- -
-
- )} -
- )} -
-
- {showEvents() && ( - <> - {(deviceTarget !== 'Ascend' || tag === 'Operator') && ( - - - - - - - - - - - - - - - - - - - - - )} - - - {(data): JSX.Element => { - return ( - - ); - }} - - - - )} - {deviceTarget !== 'Ascend' && ( - <> - - - - - - - - {(data): JSX.Element => ( - - )} - - - - )} -
-
-
-
- ); -}); diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx deleted file mode 100644 index a66a825365fd3c813e58865c609643ab547b4c49..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/ModuleView.tsx +++ /dev/null @@ -1,244 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ -import Card from '@material-ui/core/Card'; -import CardHeader from '@material-ui/core/CardHeader'; -import InputLabel from '@material-ui/core/InputLabel'; -import MenuItem from '@material-ui/core/MenuItem'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import { makeStyles } from '@material-ui/core/styles'; -import { message, Table } from 'antd'; -import * as React from 'react'; -import { FlameGraph } from 'react-flame-graph'; -import { defaultApi, KeyedColumn, ModuleStats, ModuleViewData, OperatorNode } from '../api'; - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - hide: { - display: 'none', - }, -})); - -export interface IProps { - run: string; - worker: string; - span: string; -} - -const getKeyedTableColumns = (columns: KeyedColumn[]): any[] => { - return columns.map((col) => { - return { - dataIndex: col.key, - key: col.key, - title: col.name, - }; - }); -}; - -const getTableRows = (key: number, rows: ModuleStats[]): any[] => { - let initialKey = key; - return rows.map((row) => { - const currentKey = initialKey++; - const data: any = { - key: currentKey, - name: row.name, - occurences: row.occurences, - operators: row.operators, - host_duration: row.host_duration, - self_host_duration: row.self_host_duration, - device_duration: row.device_duration, - self_device_duration: row.self_device_duration, - }; - - if (row.children.length) { - data.children = getTableRows(key, row.children); - } - - return data; - }); -}; - -const getFlameGraphData = (rows: ModuleStats[]): any[] => { - return rows.map((row) => { - const data: any = { - name: row.name, - value: row.avg_duration, - tooltip: `${row.name} (module id: ${row.id}): ${row.avg_duration} us`, - }; - - if (row.children.length) { - data.children = getFlameGraphData(row.children); - } - - return data; - }); -}; - -const getTreeHeight = (row: ModuleStats): number => { - if (row.children?.length) { - return 1 + Math.max(...row.children.map((child) => getTreeHeight(child))); - } else { - return 1; - } -}; - -const getOperatorTree = (level: number, row: OperatorNode, result: object[]): void => { - result.push({ - level: level, - name: row.name, - start: row.start_time, - end: row.end_time, - }); - if (row.children.length) { - row.children.forEach((child) => getOperatorTree(level + 1, child, result)); - } -}; - -export const ModuleView: React.FC = (props) => { - const { run, worker, span } = props; - const classes = useStyles(); - - const [moduleView, setModuleView] = React.useState(undefined); - const [flameData, setFlameData] = React.useState([]); - const [flameHeight, setFlameHeight] = React.useState(0); - const [modules, setModules] = React.useState([]); - const [module, setModule] = React.useState(0); - - const [columns, setColumns] = React.useState([]); - const [rows, setRows] = React.useState([]); - - const cardRef = React.useRef(null); - const [cardWidth, setCardWidth] = React.useState(undefined); - const timelineRef = React.useRef(null); - - React.useEffect(() => { - defaultApi - .moduleGet(run, worker, span) - .then((resp) => { - setModuleView(resp); - if (resp) { - // set the flamegraph data - const flameGraphData: any[] = getFlameGraphData(resp.data); - setFlameData(flameGraphData); - const flameGraphHeight = Math.max(...flameGraphData.map((x) => getTreeHeight(x))); - setFlameHeight(flameGraphHeight * 25); - setModules(Array.from(Array(flameGraphData.length).keys())); - setModule(0); - - // set the tree table data - setColumns(getKeyedTableColumns(resp.columns)); - setRows(getTableRows(1, resp.data)); - } - }) - .catch((e) => { - if (e.status === 404) { - setModules([]); - setFlameData([]); - setRows([]); - } - }); - - if (cardRef.current) { - setCardWidth(cardRef.current.offsetWidth - 10); - } - try { - if (timelineRef.current) { - defaultApi.treeGet(run, worker, span).then((resp) => { - if (resp) { - const data = new google.visualization.DataTable(); - data.addColumn({ type: 'string', id: 'Layer' }); - data.addColumn({ type: 'string', id: 'Name' }); - data.addColumn({ type: 'string', role: 'tooltip' }); - data.addColumn({ type: 'number', id: 'Start' }); - data.addColumn({ type: 'number', id: 'End' }); - - let timelineData: any[] = []; - getOperatorTree(0, resp, timelineData); - timelineData.sort((a, b) => a.level - b.level); - const maxLevel = timelineData[timelineData.length - 1].level; - timelineData.forEach((d) => { - data.addRow([ - d.level.toString(), - d.name, - `${d.name} Duration: ${d.end - d.start} us`, - d.start / 1000.0, // the time unit is us returned from server, but the google charts only accept milliseconds here - d.end / 1000.0, - ]); - }); - - const chart = new google.visualization.Timeline(timelineRef.current); - const options = { - height: (maxLevel + 1) * 50, - tooltip: { - isHtml: true, - }, - timeline: { - showRowLabels: false, - }, - }; - chart.draw(data, options); - } - }); - } - } catch (e) { - message.warning('Timeline in module view is not supported offline.'); - } - }, [run, worker, span]); - - const handleModuleChange: SelectProps['onChange'] = (event) => { - setModule(event.target.value as number); - }; - - const moduleComponent = (): JSX.Element => { - const moduleFragment = ( - - Module - - - ); - - if (!modules || modules.length <= 1) { - return
{moduleFragment}
; - } else { - return moduleFragment; - } - }; - - return ( -
- - - {rows && rows.length > 0 && ( - - )} - - {moduleComponent()} - - {flameData && flameData.length > 0 && ( - {}} - /> - )} - -
- -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Operator.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Operator.tsx deleted file mode 100644 index b19bef1967a31915c3c1d660b699b11c83ebb226..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Operator.tsx +++ /dev/null @@ -1,282 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import FormControlLabel from '@material-ui/core/FormControlLabel'; -import Grid from '@material-ui/core/Grid'; -import GridList from '@material-ui/core/GridList'; -import GridListTile from '@material-ui/core/GridListTile'; -import InputLabel from '@material-ui/core/InputLabel'; -import MenuItem from '@material-ui/core/MenuItem'; -import Radio from '@material-ui/core/Radio'; -import RadioGroup, { RadioGroupProps } from '@material-ui/core/RadioGroup'; -import Select, { SelectProps } from '@material-ui/core/Select'; -import { makeStyles } from '@material-ui/core/styles'; -import TextField, { StandardTextFieldProps, TextFieldProps } from '@material-ui/core/TextField'; -import * as React from 'react'; -import * as api from '../api'; -import { OperationTableData, OperationTableDataInner, OperatorGraph } from '../api'; -import { OperationGroupBy } from '../constants/groupBy'; -import { useSearchDirectly } from '../utils/search'; -import { topIsValid, UseTop, useTopN } from '../utils/top'; -import { PieChart } from './charts/PieChart'; -import { DataLoading } from './DataLoading'; -import { makeChartHeaderRenderer, useTooltipCommonStyles } from './helpers'; -import { OperationTable } from './tables/OperationTable'; -import { - deviceSelfTimeTooltip, - deviceSelfTimeTooltipAscend, - deviceTotalTimeTooltip, - deviceTotalTimeTooltipAscend, - hostSelfTimeTooltip, - hostTotalTimeTooltip, -} from './TooltipDescriptions'; - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - verticalInput: { - display: 'flex', - alignItems: 'center', - }, - inputWidth: { - width: '4em', - }, - inputWidthOverflow: { - minWidth: '15em', - whiteSpace: 'nowrap', - }, - full: { - width: '100%', - }, - description: { - marginLeft: theme.spacing(1), - }, -})); - -export interface IProps { - run: string; - worker: string; - span: string; - deviceTarget: string; -} - -export const Operator: React.FC = (props) => { - const { run, worker, span, deviceTarget } = props; - const classes = useStyles(); - const tooltipCommonClasses = useTooltipCommonStyles(); - const chartHeaderRenderer = React.useMemo( - () => makeChartHeaderRenderer(tooltipCommonClasses), - [tooltipCommonClasses] - ); - - const [operatorGraph, setOperatorGraph] = React.useState(undefined); - const [operatorTable, setOperatorTable] = React.useState(undefined); - const [sortColumn, setSortColumn] = React.useState(''); - const [tableTooltips, setTableTooltips] = React.useState(undefined); - const [groupBy, setGroupBy] = React.useState(OperationGroupBy.OPERATION); - const [searchOperatorName, setSearchOperatorName] = React.useState(''); - const [topText, actualTop, useTop, setTopText, setUseTop] = useTopN({ - defaultUseTop: UseTop.USE, - defaultTop: 10, - }); - - const getName = React.useCallback((row: OperationTableDataInner) => row.name, []); - const [searchedOperatorTable] = useSearchDirectly(searchOperatorName, getName, operatorTable); - - const onSearchOperatorChanged: TextFieldProps['onChange'] = (event) => { - setSearchOperatorName(event.target.value as string); - }; - - React.useEffect(() => { - if (operatorGraph) { - const counts = [ - operatorGraph.device_self_time?.rows.length ?? 0, - operatorGraph.device_total_time?.rows.length ?? 0, - operatorGraph.host_self_time.rows?.length ?? 0, - operatorGraph.host_total_time.rows?.length ?? 0, - ]; - setTopText(String(Math.min(Math.max(...counts), 10))); - } - }, [operatorGraph]); - - React.useEffect(() => { - api.defaultApi.operationTableGet(run, worker, span, groupBy).then((resp) => { - setSortColumn(resp.metadata.sort); - setTableTooltips(resp.metadata.tooltips); - setOperatorTable(resp.data); - }); - }, [run, worker, span, groupBy]); - - React.useEffect(() => { - api.defaultApi.operationGet(run, worker, span, groupBy).then((resp) => { - setOperatorGraph(resp); - }); - }, [run, worker, span, groupBy]); - - const onGroupByChanged: SelectProps['onChange'] = (event) => { - setGroupBy(event.target.value as OperationGroupBy); - }; - - const onUseTopChanged: RadioGroupProps['onChange'] = (event) => { - setUseTop(event.target.value as UseTop); - }; - - const onTopChanged = (event: React.ChangeEvent): void => { - setTopText(event.target.value); - }; - - const inputProps: StandardTextFieldProps['inputProps'] = { - min: 1, - }; - - const renderCharts = (graph: api.OperatorGraph): JSX.Element => { - return ( - - {graph.device_self_time && ( - - - {graph.device_self_time.title && ( - - )} - - - - )} - {graph.device_total_time && ( - - - {graph.device_total_time.title && ( - - )} - - - - )} - - - {graph.host_self_time.title && ( - - )} - - - - - - {graph.host_total_time.title && ( - - )} - - - - - ); - }; - - return ( -
- - - - - - - - } label='All operators' /> - } label='Top operators to show' /> - - - {useTop === UseTop.USE && ( - - - - )} - - - {renderCharts} - - - - - - Group By - - - - - - - - - - {(table): JSX.Element => ( - - )} - - - - - - -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Overview.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Overview.tsx deleted file mode 100644 index 6a81c567bc5e44b1dd6eb4746135d61268cadb81..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/Overview.tsx +++ /dev/null @@ -1,225 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import Card from '@material-ui/core/Card'; -import CardContent from '@material-ui/core/CardContent'; -import CardHeader from '@material-ui/core/CardHeader'; -import Grid from '@material-ui/core/Grid'; -import { makeStyles } from '@material-ui/core/styles'; -import { Table } from 'antd'; -import { ColumnsType } from 'antd/es/table'; -import * as React from 'react'; -import * as api from '../api'; -import { PieChart } from './charts/PieChart'; -import { SteppedAreaChart } from './charts/SteppedAreaChart'; -import { DataLoading } from './DataLoading'; -import { makeChartHeaderRenderer, useTooltipCommonStyles } from './helpers'; -import { TextListItem } from './TextListItem'; -import { stepTimeBreakDownTooltip } from './TooltipDescriptions'; -import { transformPerformanceIntoPie, transformPerformanceIntoTable } from './transform'; - -const topGraphHeight = 230; - -const useStyles = makeStyles((theme) => ({ - root: { - flexGrow: 1, - }, - pre: { - '& ul': { - margin: 0, - paddingLeft: theme.spacing(3), - ...theme.typography.body1, - }, - '& li': {}, - '& a': { - color: '#ffa726', - }, - '& a:active': { - color: '#ffa726', - }, - '& p': { - margin: 0, - ...theme.typography.subtitle1, - fontWeight: theme.typography.fontWeightBold, - }, - }, - topGraph: { - height: topGraphHeight + 40, - }, - table: { - height: '100%', - border: '1px solid #efefef', - '& .ant-table-tbody > tr': { - height: 20, - fontSize: '10pt', - '& > td': { - padding: '0 8px!important', - }, - }, - }, -})); - -export interface IProps { - run: string; - worker: string; - span: string; -} - -export const Overview: React.FC = (props) => { - const { run, worker, span } = props; - - const [steps, setSteps] = React.useState(undefined); - const [performances, setPerformances] = React.useState([]); - const [environments, setEnvironments] = React.useState([]); - const [gpuMetrics, setGpuMetrics] = React.useState(undefined); - const [recommendations, setRecommendations] = React.useState(''); - const [columns, setColumns] = React.useState>([]); - - const tableRows = React.useMemo(() => { - let dataInfo: api.Graph = transformPerformanceIntoTable(performances); - if (dataInfo.columns.length < 3) { - return []; - } - const stringCompare = (a: string, b: string): number => a.localeCompare(b); - const numberCompare = (a: number, b: number): number => a - b; - let column: any[] = dataInfo.columns.map((item) => { - return { - title: item.name, - key: item.name, - dataIndex: item.name, - sorter: - item.type === 'string' - ? (a: any, b: any): number => stringCompare(a[item.name], b[item.name]) - : (a: any, b: any): number => numberCompare(a[item.name], b[item.name]), - }; - }); - setColumns(column); - return dataInfo.rows.map((row, index) => { - if (row.length < 3) { - return null; - } - return { - key: index, - [dataInfo.columns[0].name]: row[0], - [dataInfo.columns[1].name]: row[1], - [dataInfo.columns[2].name]: row[2], - }; - }); - }, [performances]); - - const synthesizedPieGraph = React.useMemo(() => { - return transformPerformanceIntoPie(performances); - }, [performances]); - - React.useEffect(() => { - api.defaultApi.overviewGet(run, worker, span).then((resp) => { - setPerformances(resp.performance); - setEnvironments(resp.environments); - setSteps(resp.steps); - setRecommendations(resp.recommendations); - setGpuMetrics(resp.gpu_metrics); - }); - }, [run, worker, span]); - - const classes = useStyles(); - const tooltipCommonClasses = useTooltipCommonStyles(); - const chartHeaderRenderer = React.useMemo( - () => makeChartHeaderRenderer(tooltipCommonClasses, false), - [tooltipCommonClasses] - ); - - const stepTimeBreakDownTitle = React.useMemo( - () => chartHeaderRenderer('Step Time Breakdown', stepTimeBreakDownTooltip), - [tooltipCommonClasses, chartHeaderRenderer] - ); - - const cardSizes = gpuMetrics ? ([2, 3, 7] as const) : ([4, undefined, 8] as const); - - return ( -
- - - - {React.useMemo( - () => ( - - - - {environments.map((environment) => ( - - ))} - - - ), - [environments] - )} - - {gpuMetrics && ( - - - - - {gpuMetrics.data.map((metric) => ( - - ))} - - - - )} - - - - - - -
- - - - - - - - - - - - - - - - {(graph): JSX.Element => ( - - )} - - - - - - - - - - -
-
-
- - - - - -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TextListItem.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TextListItem.tsx deleted file mode 100644 index 59eb79c2a8f05cc750d264880bb66ab646c4bbb4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TextListItem.tsx +++ /dev/null @@ -1,82 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import Grid from '@material-ui/core/Grid'; -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; - -export interface IStylesProps { - root?: string; - name?: string; -} - -export interface IProps { - name: string; - value?: string; - description?: string; - extra?: string; - classes?: IStylesProps; - dangerouslyAllowHtml?: boolean; -} - -const useStyles = makeStyles((theme) => ({ - label: { - ...theme.typography.subtitle2, - fontWeight: 'bolder', - }, - value: { - textAlign: 'right', - ...theme.typography.subtitle2, - fontWeight: 'bolder', - }, -})); - -export const TextListItem: React.FC = (props) => { - const classes = useStyles(); - - const getSizes = function (): readonly any[] { - if (props.value && props.extra) { - return [4, 4, 4] as const; - } - if (props.value) { - if (props.value.length > props.name.length) { - return [4, 8, undefined] as const; - } - return [8, 4, undefined] as const; - } - return [12, undefined, undefined] as const; - }; - - const sizes = getSizes(); - - const renderSpan = function (content: string, className?: string): React.JSX.Element { - if (props.dangerouslyAllowHtml) { - return ; - } - return {content}; - }; - - return ( - - - - - {renderSpan(props.name, props.classes?.name)} - - {props.description && {renderSpan(props.description)}} - - - {props.value && ( - - {renderSpan(props.value)} - - )} - {props.extra && ( - - {renderSpan(props.extra)} - - )} - - ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TooltipDescriptions.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TooltipDescriptions.ts deleted file mode 100644 index 6d3631fee97a4dd8da5ebde1550573d8c6e501fa..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TooltipDescriptions.ts +++ /dev/null @@ -1,38 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export const stepTimeBreakDownTooltip = `The time spent on each step is broken down into multiple categories as follows: -Kernel: Kernels execution time on GPU device; -Memcpy: GPU involved memory copy time (either D2D, D2H or H2D); -Memset: GPU involved memory set time; -Runtime: CUDA runtime execution time on host side; Such as cudaLaunchKernel, cudaMemcpyAsync, cudaStreamSynchronize, ... -DataLoader: The data loading time spent in PyTorch DataLoader object; -CPU Exec: Host compute time, including every PyTorch operator running time; -Other: The time not included in any of the above.`; - -export const deviceSelfTimeTooltip = `The accumulated time spent on GPU, not including this operator’s child operators.`; - -export const deviceSelfTimeTooltipAscend = `The accumulated time spent on NPU, not including this operator’s child operators.`; - -export const deviceTotalTimeTooltip = `The accumulated time spent on GPU, including this operator’s child operators.`; - -export const deviceTotalTimeTooltipAscend = `The accumulated time spent on NPU, including this operator’s child operators.`; - -export const hostSelfTimeTooltip = `The accumulated time spent on Host, not including this operator’s child operators.`; - -export const hostTotalTimeTooltip = `The accumulated time spent on Host, including this operator’s child operators.`; - -export const gpuKernelTotalTimeTooltip = `The accumulated time of all calls of this kernel.`; - -export const tensorCoresPieChartTooltip = `The accumulated time of all kernels using or not using Tensor Cores.`; - -export const tensorCoresPieChartTooltipAscend = `The accumulated time of all kernels group by Accelerator Core.`; - -export const distributedGpuInfoTableTooltip = `Information about GPU hardware used during the run.`; - -export const distributedOverlapGraphTooltip = `The time spent on computation vs communication.`; - -export const distributedWaittimeGraphTooltip = `The time spent waiting vs communicating between devices.`; - -export const distributedCommopsTableTooltip = `Statistics for operations managing communications between nodes.`; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TraceView.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TraceView.tsx deleted file mode 100644 index be499794936a085ed72740eea8bac5f33df37171..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/TraceView.tsx +++ /dev/null @@ -1,79 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import ClickAwayListener from '@material-ui/core/ClickAwayListener'; -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; -import * as api from '../api'; - -export interface IProps { - run: string; - worker: string; - span: string; - iframeRef: React.RefObject; -} - -const useStyles = makeStyles(() => ({ - root: { - flexGrow: 1, - }, - frame: { - width: '100%', - height: 'calc(100vh - 48px)', - border: 'none', - }, -})); - -export const TraceView: React.FC = (props) => { - const { run, worker, span, iframeRef } = props; - const classes = useStyles(); - - const [traceData, setTraceData] = React.useState | null>(null); - const [traceViewReady, setTraceViewReady] = React.useState(false); - - React.useEffect(() => { - setTraceData( - api.defaultApi.traceGet(run, worker, span).then((resp) => { - return JSON.stringify(resp); - }) - ); - }, [run, worker, span]); - - React.useEffect(() => { - function callback(event: MessageEvent): void { - const data = event.data || {}; - if (data.msg === 'ready') { - setTraceViewReady(true); - } - } - - window.addEventListener('message', callback); - return () => { - window.removeEventListener('message', callback); - }; - }, []); - - React.useEffect(() => { - if (traceData && traceViewReady) { - traceData.then((data) => { - iframeRef.current?.contentWindow?.postMessage({ msg: 'data', data }, window.origin); - }); - } - }, [traceData, traceViewReady]); - const setIframeActive = (): void => { - iframeRef.current?.focus(); - }; - return ( -
- {React.useMemo( - () => ( - - - - ), - [] - )} -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AntTableChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AntTableChart.tsx deleted file mode 100644 index 83618064b55223ab06d4d1fec8b8b5eeab8d3268..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AntTableChart.tsx +++ /dev/null @@ -1,111 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import { Table } from 'antd'; -import * as React from 'react'; -import { Graph } from '../../api'; - -interface IProps { - graph: Graph; - sortColumn?: string; - initialPageSize?: number; - onRowSelected?: (record?: object, rowIndex?: number) => void; -} - -const useStyles = makeStyles((theme) => ({ - tooltip: { - whiteSpace: 'pre-wrap', - }, - row: { - wordBreak: 'break-word', - }, -})); - -const getTableColumns = function (columns: any, sort: string | undefined, tooltipClass: string): any { - let i = 0; - return columns.map((col: any) => { - const key = `col${i++}`; - const stringCompare = (a: any, b: any): number => a[key].localeCompare(b[key]); - const numberCompare = (a: any, b: any): number => (a[key] || 0) - (b[key] || 0); - return { - dataIndex: key, - key: key, - title: col.name, - sorter: col.type === 'string' ? stringCompare : numberCompare, - defaultSortOrder: sort === col.name ? ('descend' as const) : undefined, - showSorterTooltip: col.tooltip ? { title: col.tooltip, overlayClassName: tooltipClass } : true, - }; - }); -}; - -const getTableRows = function (rows: any): any { - return rows.map((row: any) => { - let i = 0; - const res: any = {}; - row.forEach((entry: any) => { - res[`col${i++}`] = entry; - }); - return res; - }); -}; - -export const AntTableChart: React.FC = (props) => { - const { graph, sortColumn, initialPageSize, onRowSelected } = props; - const classes = useStyles(props); - - const rows = React.useMemo(() => getTableRows(graph.rows), [graph.rows]); - - const columns = React.useMemo( - () => getTableColumns(graph.columns, sortColumn, classes.tooltip), - [graph.columns, sortColumn, classes.tooltip] - ); - - // key is used to reset the Table state (page and sort) if the columns change - const key: string = React.useMemo(() => `${Math.random()}`, [graph.columns]); - - const [pageSize, setPageSize] = React.useState(initialPageSize ?? 30); - const onShowSizeChange = (current: number, size: number): void => { - setPageSize(size); - }; - - const onRow = ( - record: object, - rowIndex?: number - ): { - onMouseEnter: (event: any) => void; - onMouseLeave: (event: any) => void; - } => { - return { - onMouseEnter: (event: any): void => { - if (onRowSelected) { - onRowSelected(record, rowIndex); - } - }, - onMouseLeave: (event: any): void => { - if (onRowSelected) { - onRowSelected(undefined, undefined); - } - }, - }; - }; - - return ( -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AreaChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AreaChart.tsx deleted file mode 100644 index cda12860c2fba41f5a15c5d9e73fb92093c0371b..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/AreaChart.tsx +++ /dev/null @@ -1,72 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; -import { Graph } from '../../api'; -import { useResizeEventDependency } from '../../utils/resize'; - -interface IProps { - graph: Graph; - height?: number; - hAxisTitle?: string; -} - -const useStyles = makeStyles(() => ({ - root: { - height: (props: Pick): number | undefined => props.height, - }, -})); - -export const AreaChart: React.FC = (props) => { - const { graph, height = 400, hAxisTitle } = props; - const classes = useStyles({ height }); - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - - const data = new google.visualization.DataTable(); - data.addColumn('string', 'step'); - graph.columns.forEach((column) => { - data.addColumn({ - type: column.type, - label: column.name, - role: column.role, - p: column.p, - }); - }); - data.addRows(graph.rows.map((x, i) => [(i + 1).toString(), ...x])); - - const options = { - title: graph.title, - isStacked: true, - height, - legend: { position: 'bottom' }, - tooltip: { isHtml: true }, - chartArea: { left: '15%', width: '80%', top: '10%' }, - hAxis: { - title: hAxisTitle, - }, - }; - - const chart = new google.visualization.AreaChart(element); - - chart.draw(data, options); - - return () => { - chart.clearChart(); - }; - }, [graph, height, resizeEventDependency]); - - return ( -
-
-
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/ColumnChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/ColumnChart.tsx deleted file mode 100644 index ae51dc1a34e94b1c91eab2fe502ffe2cbc20f618..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/ColumnChart.tsx +++ /dev/null @@ -1,118 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Offer offline supporting. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { useResizeEventDependency } from '../../utils/resize'; -import * as echarts from 'echarts'; - -interface IProps { - title?: string; - units?: string; - colors?: Array; - chartData: ColumnChartData; -} - -export interface ColumnChartData { - legends: Array; - barLabels: Array; - barHeights: Array>; -} - -export const ColumnChart: React.FC = (props) => { - const { title, units, colors, chartData } = props; - const { legends, barLabels, barHeights } = chartData; - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - const getAngleByDataLength = (data: number): number => { - if (data < 10) { - return 0; - } else { - // 数量越大越趋近于旋转90度 - return 90 * (1 - (10 / data)); - } - }; - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - - const chart = echarts.init(element); - const dataSource: Array> = []; - dataSource.push(['worker', ...legends]); - barHeights.forEach((item, index) => { - if (barLabels[index] !== undefined) { - dataSource.push([barLabels[index], ...item]); - } - }); - const options: echarts.EChartsOption = { - title: { - text: title, - }, - legend: { - bottom: 0, - }, - xAxis: { - type: 'category', - axisLabel: { - interval: 0, - rotate: getAngleByDataLength(barLabels.length), - formatter: (name: string) => { - const index = name.indexOf('@'); - const processedName = index > -1 ? name.slice(index + 1) : name; // 使用新变量处理 - return processedName.length > 16 ? `${processedName.slice(0, 14)}...` : processedName; - }, - }, - }, - yAxis: { - type: 'value', - name: units, - nameTextStyle: { - fontSize: 16, - }, - }, - tooltip: { - trigger: 'item', - }, - dataset: { - source: dataSource, - }, - series: Array(legends.length).fill({ - type: 'bar', - stack: 'samesign', - }), - }; - if (colors) { - options.color = colors.slice(0, barLabels.length); - } - - if (options) { - chart.setOption(options, true); - } - return () => { - chart.dispose(); - }; - }, [title, chartData, resizeEventDependency]); - - return
; -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx deleted file mode 100644 index a6e222a6cc9d04b3b0c9031be60b91b75fe9ab37..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/NewLineChart.tsx +++ /dev/null @@ -1,386 +0,0 @@ -/*-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { Graph, GraphAscend } from '../../api'; -import { useResizeEventDependency } from '../../utils/resize'; -import { binarySearch } from '../../utils/binarysearch'; -import * as echarts from 'echarts'; - -interface IProps { - graph: Graph | GraphAscend; - height?: number; - deviceTarget: string; - tag: string; - hAxisTitle?: string; - vAxisTitle?: string; - onSelectionChanged?: (start: number, end: number) => void; - record?: any; -} - -export const LineChart: React.FC = (props) => { - const { graph, height = 400, deviceTarget, tag, hAxisTitle, vAxisTitle, onSelectionChanged, record } = props; - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - const [chartObj, setChartObj] = React.useState(); - const selectedPoints = React.useRef>([]); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - element.oncontextmenu = (): boolean => { - return false; - }; - - let myChart = echarts.init(element); - - let option: echarts.EChartsOption = { - title: { - text: graph.title, - textStyle: { - fontSize: 16, - }, - }, - tooltip: { trigger: 'axis' }, - legend: { - type: 'scroll', - bottom: 0, - }, - xAxis: { - type: 'category', - boundaryGap: false, - name: hAxisTitle, - }, - yAxis: { - type: 'value', - name: vAxisTitle, - scale: true, - }, - toolbox: { - feature: { - dataZoom: { - yAxisIndex: 'none', - }, - restore: {}, - }, - }, - }; - - if (deviceTarget === 'Ascend') { - if (tag === 'Component') { - const mixedTooltip: echarts.TooltipComponentOption = { - trigger: 'axis', - formatter: function (params: any) { - let res = `${params[0].name}
`; - for (const item of params) { - if (typeof item.value[item.encode.y[0]] === 'number') { - res += ` - - ${item.seriesName}: ${item.value[item.encode.y[0]]}
`; - } - } - return res; - }, - }; - if (graph.columns.length <= 4) { - let finalRows = graph.rows.PTA ?? graph.rows.GE; - if (graph.columns.length === 4) { - const mergedAPPRows = graph.rows.APP.map((item: Array) => { - return [item[0], null, null, item[1]]; - }); - finalRows = finalRows.concat(mergedAPPRows).sort((a: any, b: any) => { - return a[0] - b[0]; - }); - } - option = { - ...option, - tooltip: mixedTooltip, - dataset: { - source: [graph.columns.map((column) => column.name), ...finalRows], - }, - series: Array(graph.columns.length - 1).fill({ - type: 'line', - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - emphasis: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - selectedMode: 'single', - }), - }; - } else if (graph.columns.length <= 6) { - const datasetTitle = graph.columns.map((item) => item.name); - let mergedGERows = graph.rows.GE.map((item: Array) => { - return [item[0], null, null, item[1], item[2]]; - }); - if (graph.columns.length === 6) { - const mergedAPPRows = graph.rows.APP.map((item: Array) => { - return [item[0], null, null, null, null, item[2]]; - }); - mergedGERows = mergedGERows.concat(mergedAPPRows); - } - const finalRows = graph.rows.PTA.concat(mergedGERows).sort((a: any, b: any) => { - return a[0] - b[0]; - }); - option = { - ...option, - tooltip: mixedTooltip, - dataset: { - source: [datasetTitle, ...finalRows], - }, - series: Array(graph.columns.length - 1).fill({ - type: 'line', - connectNulls: true, - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - emphasis: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - selectedMode: 'single', - datasetIndex: 0, - }), - }; - } - } else { - if (graph.columns.length === 3) { - const datasetTitle1: Array = []; - const datasetTitle2: Array = []; - graph.columns.forEach((column, index) => { - if (index === 0 || index < 2) { - datasetTitle1.push(column.name); - } - if (index === 0 || index >= 2) { - datasetTitle2.push(column.name); - } - }); - option = { - ...option, - dataset: [ - { - source: [datasetTitle1, ...graph.rows.Allocated], - }, - { - source: [datasetTitle2, ...graph.rows.Reserved], - }, - ], - series: [ - { - type: 'line', - name: 'Allocated', - emphasis: { - label: { - show: true, - }, - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - datasetIndex: 0, - }, - { - type: 'line', - name: 'Reserved', - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - emphasis: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - selectedMode: 'single', - datasetIndex: 1, - }, - ], - }; - } - } - } else { - option = { - ...option, - dataset: { - source: [graph.columns.map((column) => column.name), ...graph.rows], - }, - series: [ - { - type: 'line', - name: 'Allocated', - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - emphasis: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - selectedMode: 'single', - }, - { - type: 'line', - name: 'Reserved', - select: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - emphasis: { - itemStyle: { - borderWidth: 5, - shadowBlur: 5, - }, - }, - selectedMode: 'single', - }, - ], - }; - } - - if (option) { - myChart.setOption(option, true); - } - myChart.dispatchAction({ - type: 'takeGlobalCursor', - key: 'dataZoomSelect', - dataZoomSelectActive: true, - }); - - myChart.on('dataZoom', (param: any) => { - if (onSelectionChanged) { - onSelectionChanged(param.batch[0].startValue, param.batch[0].endValue); - } - }); - - myChart.on('restore', () => { - if (onSelectionChanged) { - // Set startId greater than endId to query all memory events. - onSelectionChanged(0, -1); - } - }); - - myChart.on('click', (param) => { - myChart.dispatchAction({ - type: 'unselect', - seriesId: param.seriesId, - dataIndex: selectedPoints.current, - }); - myChart.dispatchAction({ - type: 'select', - seriesId: param.seriesId, - dataIndex: param.dataIndex, - }); - - selectedPoints.current = [param.dataIndex]; - }); - - myChart.getZr().on('contextmenu', () => { - myChart.dispatchAction({ - type: 'restore', - }); - myChart.dispatchAction({ - type: 'takeGlobalCursor', - key: 'dataZoomSelect', - dataZoomSelectActive: true, - }); - }); - - setChartObj(myChart); - return () => { - myChart.dispose(); - }; - }, [graph, height, resizeEventDependency]); - - React.useEffect(() => { - const compareFn = (key: number, mid: Array): number => key - mid[0]; - if (chartObj && tag === 'Operator') { - if (record) { - let startId = -1; - let endId = -1; - if (deviceTarget === 'Ascend') { - startId = binarySearch(graph.rows.Allocated, record.col2, compareFn); - endId = binarySearch(graph.rows.Allocated, record.col3, compareFn); - } else { - startId = binarySearch(graph.rows, record.col2, compareFn); - endId = binarySearch(graph.rows, record.col3, compareFn); - } - let selection = []; - if (startId >= 0) { - selection.push(startId); - } - if (endId >= 0) { - selection.push(endId); - } - chartObj.dispatchAction({ - type: 'downplay', - seriesName: 'Allocated', - dataIndex: selectedPoints.current, - }); - chartObj.dispatchAction({ - type: 'highlight', - seriesName: 'Allocated', - dataIndex: selection, - }); - selectedPoints.current = selection; - } else { - chartObj.dispatchAction({ - type: 'downplay', - seriesName: 'Allocated', - dataIndex: selectedPoints.current, - }); - selectedPoints.current = []; - } - } - }, [graph, record, chartObj]); - - return
; -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/PieChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/PieChart.tsx deleted file mode 100644 index 49c59ff02e91f7b7fe0d90ddff4239478ca19a0a..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/PieChart.tsx +++ /dev/null @@ -1,173 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Offer offline supporting. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { Graph } from '../../api'; -import { value } from '../../utils'; -import { useResizeEventDependency } from '../../utils/resize'; -import * as echarts from 'echarts'; - -interface IProps { - graph: Graph; - height?: number; - top?: number; - noLegend?: boolean; - title?: string; - colors?: Array; - tooltipMode?: string; -} - -interface IAreaPosition { - left: string; - width: string; - top?: string; - height?: string; -} - -const noLegendArea: IAreaPosition = { - left: '5%', - width: '90%', - top: '5%', - height: '90%', -}; -const normalArea: IAreaPosition = { left: '5%', width: '95%' }; -const noTitleArea: IAreaPosition = { - left: '5%', - width: '95%', - top: '10%', - height: '80%', -}; - -export const PieChart: React.FC = (props) => { - const { graph, height = 300, top, noLegend, title, colors, tooltipMode = 'both' } = props; - const graphRef = React.useRef(null); - - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - - const chart = echarts.init(element); - - let totalValue = 0; - const rowsWithUniqueName: Array<{ name: string; value: number }> = - top === undefined - ? graph.rows.map((item, index) => { - totalValue += item[1] as number; - return { name: `${index}_${item[0]}`, value: item[1] as number }; - }) - : graph.rows - .sort((a, b) => (value(b[1]) as number) - (value(a[1]) as number)) - .slice(0, top) - .map((item, index) => { - totalValue += item[1] as number; - return { name: `${index}_${item[0]}`, value: item[1] as number }; - }); - - const option: echarts.EChartsOption = { - height, - width: '100%', - title: { - text: title, - }, - tooltip: { - trigger: 'item', - formatter: (data) => { - const typedData = data as echarts.DefaultLabelFormatterCallbackParams; - const index = typedData.name.indexOf('_'); - const safeName = typedData.name.replace(//g, '>'); - return `${index > -1 ? safeName.slice(index + 1) : safeName}
${ - tooltipMode === 'both' ? typedData.value : '' - }(${typedData.percent}%)`; - }, - confine: true, - extraCssText: `max-width: 300px; - word-wrap:break-word; - white-space:pre-wrap; - padding-right: 10px`, - }, - chartArea: ((): IAreaPosition => { - if (noLegend) { - return noLegendArea; - } - if (!title) { - return noTitleArea; - } else { - return normalArea; - } - })(), - legend: { - type: noLegend ? 'plain' : 'scroll', - orient: 'vertical', - left: 'right', - z: 10, - // Display at most 36 characters. - formatter: (name) => { - // Show legends for datas with the same name. - const index = name.indexOf('_'); - const processedName = index > -1 ? name.slice(index + 1) : name; // 使用新变量处理 - return processedName.length > 36 ? `${processedName.slice(0, 34)}...` : processedName; - }, - tooltip: { - show: true, - triggerOn: 'mousemove', - formatter: (data) => { - const currentItem = rowsWithUniqueName.find((item) => item.name === data.name); - const index = data.name.indexOf('_'); - const percent = (((currentItem?.value || 0) * 100) / totalValue).toFixed(2); - const safeName = data.name.replace(//g, '>'); - return `${index > -1 ? safeName.slice(index + 1) : safeName}
${ - tooltipMode === 'both' ? currentItem?.value || 0 : '' - }(${percent}%)`; - }, - }, - }, - sliceVisibilityThreshold: 0, - colors, - series: [ - { - type: 'pie', - radius: ['32%', '80%'], - center: ['32%', '50%'], - label: { - position: 'inside', - formatter: `{d}%`, - color: '#ffffff', - }, - data: rowsWithUniqueName, - }, - ], - }; - - if (option) { - chart.setOption(option, true); - } - - return () => { - chart.dispose(); - }; - }, [graph, height, top, resizeEventDependency]); - - return
; -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/SteppedAreaChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/SteppedAreaChart.tsx deleted file mode 100644 index 3e3b01ccb112aeb80795246bd6f3e2ad83aa2a66..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/SteppedAreaChart.tsx +++ /dev/null @@ -1,106 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Offer offline supporting. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; -import { StepedGraph } from '../../api'; -import { useResizeEventDependency } from '../../utils/resize'; -import * as echarts from 'echarts'; - -interface IProps { - graph: StepedGraph; - height?: number; - hAxisTitle?: string; - vAxisTitle?: string; -} - -const useStyles = makeStyles(() => ({ - root: { - height: (props: Pick): number | undefined => props.height, - }, -})); - -export const SteppedAreaChart: React.FC = (props) => { - const { graph, height = 400, hAxisTitle, vAxisTitle } = props; - const classes = useStyles({ height }); - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element) { - return undefined; - } - - const chart = echarts.init(element); - const dataSource: Array> = []; - dataSource.push(graph.columns); - graph.rows.forEach((row) => { - dataSource.push(row.map((item) => item.value)); - }); - const options: echarts.EChartsOption = { - title: { - text: graph.title, - }, - legend: { - bottom: 0, - }, - xAxis: { - type: 'category', - name: hAxisTitle, - axisLabel: { - interval: 0, - }, - }, - yAxis: { - type: 'value', - name: vAxisTitle, - }, - tooltip: { - trigger: 'item', - formatter: (params: any) => { - return graph.rows[params.dataIndex][params.seriesIndex + 1]?.tooltip || ''; - }, - }, - dataset: { - source: dataSource, - }, - series: Array(graph.columns.length - 1).fill({ - type: 'bar', - stack: 'samesign', - }), - }; - - if (options) { - chart.setOption(options, true); - } - - return () => { - chart.dispose(); - }; - }, [graph, height, resizeEventDependency]); - - return ( -
-
-
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/TableChart.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/TableChart.tsx deleted file mode 100644 index 444b41b196c162340b846ac488d70eb908c7b717..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/charts/TableChart.tsx +++ /dev/null @@ -1,85 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import * as React from 'react'; -import { Graph } from '../../api'; -import { useResizeEventDependency } from '../../utils/resize'; - -interface IProps { - graph: Graph; - sortColumn?: number; - height?: number; - allowHtml?: boolean; - setCellProperty?: (row: number, column: number, cb: (key: string, value: any) => void) => void; -} - -const useStyles = makeStyles(() => ({ - root: { - height: (props: IProps): number | undefined => props.height, - }, -})); - -export const TableChart: React.FC = (props) => { - const { graph, sortColumn, setCellProperty, allowHtml } = props; - const classes = useStyles(props); - const graphRef = React.useRef(null); - const [resizeEventDependency] = useResizeEventDependency(); - - React.useLayoutEffect(() => { - const element = graphRef.current; - if (!element || !element.parentElement) { - return; - } - - const data = new google.visualization.DataTable(); - graph.columns.forEach((column) => { - data.addColumn({ - type: column.type, - label: column.name, - role: column.role, - p: column.p, - }); - }); - data.addRows(graph.rows); - - if (setCellProperty) { - for (let row = 0; row < graph.rows.length; ++row) { - for (let column = 0; column < graph.columns.length; ++column) { - setCellProperty(row, column, (key: string, value: any) => { - data.setProperty(row, column, key, value); - }); - } - } - } - - const options = { - width: '100%', - height: '100%', - page: 'enable', - allowHtml, - pageSize: 30, - tooltip: { isHtml: true }, - sortColumn: sortColumn, - sortAscending: false, - }; - - const chart = new google.visualization.Table(element); - - /* `chart.draw()` removes the contents of `element` and rebuilds it. This can cause a jump in the scroll position - * if the height/width change to 0. Since we can't change the code of Google Charts, we temporarily lock the dims - * of the parent container. */ - if (element.offsetHeight > 0) { - element.parentElement.style.height = `${element.offsetHeight}px`; - } - chart.draw(data, options); - element.parentElement.style.height = ''; - }, [graph, resizeEventDependency]); - - return ( -
-
-
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/helpers.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/helpers.tsx deleted file mode 100644 index bfbb346e4b3daf65247e6e954346ed7245993f31..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/helpers.tsx +++ /dev/null @@ -1,41 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { makeStyles } from '@material-ui/core/styles'; -import Tooltip from '@material-ui/core/Tooltip'; -import HelpOutline from '@material-ui/icons/HelpOutline'; -import clsx from 'clsx'; -import * as React from 'react'; - -export const useTooltipCommonStyles = makeStyles((theme) => ({ - tooltip: { - maxWidth: '600px', - whiteSpace: 'pre-wrap', - fontSize: '14px', - }, - cardTitle: { - display: 'flex', - alignItems: 'center', - }, - titleText: { - marginRight: theme.spacing(0.5), - }, - smallTitleText: { - fontSize: '.8rem', - fontWeight: 'bold', - }, -})); - -export const makeChartHeaderRenderer = - (classes: ReturnType, smallTitleText = true) => - (title: string, tooltip: string): JSX.Element => { - return ( - - {title} - - - - - ); - }; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallFrameList.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallFrameList.tsx deleted file mode 100644 index 0334d29e511399664d5204224e47cf1b88d50655..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallFrameList.tsx +++ /dev/null @@ -1,35 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { CallStackFrame } from './transform'; -import { List } from 'antd'; -import { NavToCodeButton } from './NavToCodeButton'; -import { makeStyles } from '@material-ui/core/styles'; - -interface IProps { - callFrames: CallStackFrame[]; -} - -const useStyles = makeStyles(() => ({ - item: { - paddingTop: '1px !important', - paddingBottom: '1px !important', - }, -})); - -export const CallFrameList = (props: IProps): React.JSX.Element => { - const classes = useStyles(); - - const renderItem = React.useCallback( - (item: CallStackFrame) => ( - - - - ), - [classes.item] - ); - - return ; -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallStackTable.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallStackTable.tsx deleted file mode 100644 index c3176428d11b8b40c691947b2f0da8fc15674c16..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/CallStackTable.tsx +++ /dev/null @@ -1,103 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { makeStyles } from '@material-ui/core/styles'; -import { CallStackTableData, OperationTableDataInner } from '../../api'; -import { Table, TableProps } from 'antd'; - -import * as api from '../../api'; -import { transformTableData, TransformedCallStackDataInner } from './transform'; -import { attachId, getCommonOperationColumns } from './common'; -import { OperationGroupBy } from '../../constants/groupBy'; -import { makeExpandIcon } from './ExpandIcon'; -import { CallFrameList } from './CallFrameList'; - -export interface IProps { - data: OperationTableDataInner; - run: string; - worker: string; - span: string; - groupBy: OperationGroupBy; - deviceTarget: string; -} - -const useStyles = makeStyles((theme) => ({ - tooltip: { - whiteSpace: 'pre-wrap', - }, -})); - -const expandIcon = makeExpandIcon( - 'View call frames', - (record) => !record.callStackFrames.length -); - -const rowExpandable = (record: TransformedCallStackDataInner): boolean => !!record.callStackFrames.length; -const expandedRowRender = (record: TransformedCallStackDataInner): React.JSX.Element => ( - -); - -export const CallStackTable = (props: IProps): React.JSX.Element => { - const { data, run, worker, span, groupBy, deviceTarget } = props; - const { name, input_shape } = data; - const classes = useStyles(props); - - const [stackData, setStackData] = React.useState(undefined); - const [tooltips, setTooltips] = React.useState(); - - React.useEffect(() => { - api.defaultApi.operationStackGet(run, worker, span, groupBy, name, input_shape).then((resp) => { - setTooltips(resp.metadata.tooltips); - setStackData(resp.data); - }); - }, [name, input_shape, run, worker, span, groupBy]); - - const transformedData = React.useMemo(() => stackData && transformTableData(attachId(stackData)), [stackData]); - - const columns = React.useMemo( - () => transformedData && getCommonOperationColumns(transformedData, deviceTarget, undefined, tooltips, classes), - [transformedData] - ); - - const expandIconColumnIndex = columns?.length; - - const expandable: TableProps['expandable'] = React.useMemo( - () => ({ - expandIconColumnIndex, - expandIcon, - expandedRowRender, - rowExpandable, - }), - [expandIconColumnIndex] - ); - - return ( -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/ExpandIcon.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/ExpandIcon.tsx deleted file mode 100644 index 422bb781630c24c6dc4915c3aed8c1f341dba363..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/ExpandIcon.tsx +++ /dev/null @@ -1,35 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { Button, TableProps } from 'antd'; -import { OperationTableDataInner, CallStackTableDataInner } from '../../api'; -import { Arguments } from '../../utils/type'; - -type Types = NonNullable['expandable']>['expandIcon']; -type BasePropType = Arguments>>[0]; -type PropType = BasePropType & { text: string; disabled?: boolean }; - -export function ExpandIcon( - props: PropType -): React.JSX.Element { - const onClick = (e: React.MouseEvent): void => { - props.onExpand(props.record, e); - }; - - return ( - - ); -} - -export function makeExpandIcon( - text: string, - disabled?: (v: T) => boolean -) { - return (props: BasePropType): React.JSX.Element => ( - - ); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/MemoryStatsTable.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/MemoryStatsTable.tsx deleted file mode 100644 index c7e1809a3c0b58297ca99066243cf7d65fbe4c8c..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/MemoryStatsTable.tsx +++ /dev/null @@ -1,77 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { Table } from 'antd'; -import { makeStyles } from '@material-ui/core'; - -export interface IProps { - data: any; - sort: string; -} - -const useStyles = makeStyles((theme) => ({ - tooltip: { - whiteSpace: 'pre-wrap', - }, -})); - -const getMemoryStatsTableColumns = function (columns: any, sort: string, tooltipClass: string): any { - let i = 0; - return columns.map((col: any) => { - const key = `col${i++}`; - const stringCompare = (a: any, b: any): number => a[key].localeCompare(b[key]); - const numberCompare = (a: any, b: any): number => (a[key] || 0) - (b[key] || 0); - return { - dataIndex: key, - key: key, - title: col.name, - sorter: col.type === 'string' ? stringCompare : numberCompare, - defaultSortOrder: sort === col.name ? ('descend' as const) : undefined, - showSorterTooltip: col.tooltip ? { title: col.tooltip, overlayClassName: tooltipClass } : true, - }; - }); -}; - -const getMemoryStatsTableRows = function (rows: any): any { - return rows.map((row: any) => { - let i = 0; - const res: any = {}; - row.forEach((entry: any) => { - res[`col${i++}`] = entry; - }); - return res; - }); -}; - -export const MemoryStatsTable = (props: IProps): React.JSX.Element => { - const { data, sort } = props; - const classes = useStyles(); - - const rows = React.useMemo(() => getMemoryStatsTableRows(data.rows), [data.rows]); - - const columns = React.useMemo( - () => getMemoryStatsTableColumns(data.columns, sort, classes.tooltip), - [data.columns, sort, classes.tooltip] - ); - - const [pageSize, setPageSize] = React.useState(30); - const onShowSizeChange = (current: number, size: number): void => { - setPageSize(size); - }; - - return ( -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/NavToCodeButton.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/NavToCodeButton.tsx deleted file mode 100644 index 2c999aa12a49726aad12321f260b31b6f331eda2..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/NavToCodeButton.tsx +++ /dev/null @@ -1,29 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { CallStackFrame } from './transform'; -import { Button } from 'antd'; -import { navToCode } from '../../utils/vscode'; - -interface IProps { - frame: CallStackFrame; -} - -export const NavToCodeButton = (props: IProps): React.JSX.Element => { - const { raw, line, file } = props.frame; - const couldNavToFile = line && file; - - const onClick = (): void => { - if (line && file) { - navToCode(file, line - 1); - } - }; - - return ( - - ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/OperationTable.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/OperationTable.tsx deleted file mode 100644 index 1ce77ee817967ee69961ccd8c91dbc3b0357bed7..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/OperationTable.tsx +++ /dev/null @@ -1,105 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { makeStyles } from '@material-ui/core/styles'; -import { OperationTableData, OperationTableDataInner, TableMetadata } from '../../api'; -import { OperationGroupBy } from '../../constants/groupBy'; -import { attachId, getCommonOperationColumns } from './common'; -import { Table, TableProps } from 'antd'; -import { makeExpandIcon } from './ExpandIcon'; -import { CallStackTable } from './CallStackTable'; - -export interface IProps { - data: OperationTableData; - run: string; - worker: string; - span: string; - groupBy: OperationGroupBy; - sortColumn: string; - tooltips?: any; - deviceTarget: string; -} - -const useStyles = makeStyles((theme) => ({ - tooltip: { - whiteSpace: 'pre-wrap', - }, -})); - -const rowExpandable = (record: OperationTableDataInner): boolean => record.has_call_stack; -const expandIcon = makeExpandIcon('View CallStack', (record) => !record.has_call_stack); -export const OperationTable = (props: IProps): React.JSX.Element => { - const { data, run, worker, span, groupBy, sortColumn, tooltips, deviceTarget } = props; - const classes = useStyles(props); - - const rows = React.useMemo(() => attachId(data), [data]); - - const columns = React.useMemo( - () => getCommonOperationColumns(rows, deviceTarget, sortColumn, tooltips, classes), - [rows] - ); - - const [pageSize, setPageSize] = React.useState(30); - const onShowSizeChange = (current: number, size: number): void => { - setPageSize(size); - }; - - const expandIconColumnIndex = columns.length; - const expandedRowRender = React.useCallback( - (record: OperationTableDataInner) => ( - - ), - [run, worker, span, groupBy] - ); - - const expandable: TableProps['expandable'] = React.useMemo( - () => ({ - expandIconColumnIndex, - expandIcon, - expandedRowRender, - rowExpandable, - }), - [expandIconColumnIndex, expandedRowRender] - ); - - return ( -
- ); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/common.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/common.tsx deleted file mode 100644 index a84a1a3bb3ff96fd5df257af51bdcd302dc318e2..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/common.tsx +++ /dev/null @@ -1,150 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *-------------------------------------------------------------------------------------------- - * Copyright (c) 2023, Huawei Technologies. - * All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications: Add visualization of PyTorch Ascend profiling. - *--------------------------------------------------------------------------------------------*/ - -import { firstOrUndefined, isDef } from '../../utils/def'; -import { CallStackTableDataInner, OperationTableDataInner } from '../../api'; -import type { ColumnsType } from 'antd/es/table'; -import { ClassNameMap } from '@material-ui/styles'; - -export function getCommonOperationColumns( - data?: T[], - deviceTarget?: string, - defaultSort?: string, - tooltips?: any, - classes?: ClassNameMap<'tooltip'> -): ColumnsType { - const firstData = firstOrUndefined(data); - - const hasInputShape = !firstData || isDef(firstData.input_shape); - const hasDeviceSelfDuration = !firstData || isDef(firstData.device_self_duration); - const hasDeviceTotalDuration = !firstData || isDef(firstData.device_total_duration); - const hasTcEligible = !firstData || isDef(firstData.tc_eligible); - const hasTcSelfRatio = !firstData || isDef(firstData.tc_self_ratio); - const hasTcTotalRatio = !firstData || isDef(firstData.tc_total_ratio); - - const nameCompare = (a: T, b: T): number => a.name.localeCompare(b.name); - const callsCompare = (a: T, b: T): number => a.calls - b.calls; - const deviceSelfDurationCompare = (a: T, b: T): number => - (a.device_self_duration || 0) - (b.device_self_duration || 0); - const deviceTotalDurationCompare = (a: T, b: T): number => - (a.device_total_duration || 0) - (b.device_total_duration || 0); - const hostSelfDurationCompare = (a: T, b: T): number => (a.host_self_duration || 0) - (b.host_self_duration || 0); - const hostTotalDurationCompare = (a: T, b: T): number => (a.host_total_duration || 0) - (b.host_total_duration || 0); - const tcEligibleCompare = (a: T, b: T): number => (a.tc_eligible ?? '').localeCompare(b.tc_eligible ?? ''); - const tcSelfRatioCompare = (a: T, b: T): number => (a.tc_self_ratio || 0) - (b.tc_self_ratio || 0); - const tcTotalRatioCompare = (a: T, b: T): number => (a.tc_total_ratio || 0) - (b.tc_total_ratio || 0); - - const columns: ColumnsType = [ - { - dataIndex: 'name', - key: 'name', - title: 'Name', - sorter: nameCompare, - }, - hasInputShape - ? { - dataIndex: 'input_shape', - key: 'input_shape', - title: 'Input Shape', - } - : undefined, - { - dataIndex: 'calls', - sorter: callsCompare, - key: 'calls', - title: 'Calls', - }, - hasDeviceSelfDuration - ? { - dataIndex: 'device_self_duration', - key: 'device_self_duration', - title: 'Device Self Duration (us)', - sorter: deviceSelfDurationCompare, - // Use device_self_duration as default sort if defaultSort is unspecified - defaultSortOrder: defaultSort ? undefined : ('descend' as const), - } - : undefined, - hasDeviceTotalDuration - ? { - dataIndex: 'device_total_duration', - key: 'device_total_duration', - title: 'Device Total Duration (us)', - sorter: deviceTotalDurationCompare, - } - : undefined, - { - dataIndex: 'host_self_duration', - key: 'host_self_duration', - title: 'Host Self Duration (us)', - sorter: hostSelfDurationCompare, - }, - { - dataIndex: 'host_total_duration', - key: 'host_total_duration', - title: 'Host Total Duration (us)', - sorter: hostTotalDurationCompare, - }, - hasTcEligible - ? { - dataIndex: 'tc_eligible', - key: 'tc_eligible', - title: deviceTarget === 'Ascend' ? 'AI Cores Eligible' : 'Tensor Cores Eligible', - sorter: tcEligibleCompare, - } - : undefined, - hasTcSelfRatio - ? { - dataIndex: 'tc_self_ratio', - key: 'tc_self_ratio', - title: deviceTarget === 'Ascend' ? 'AI Cores Self(%)' : 'Tensor Cores Self(%)', - sorter: tcSelfRatioCompare, - } - : undefined, - hasTcTotalRatio - ? { - dataIndex: 'tc_total_ratio', - key: 'tc_total_ratio', - title: deviceTarget === 'Ascend' ? 'AI Cores Total(%)' : 'Tensor Cores Total(%)', - sorter: tcTotalRatioCompare, - } - : undefined, - ].filter(isDef); - columns.forEach((column) => { - if (column.key === defaultSort) { - column.defaultSortOrder = 'descend' as const; - } - if (tooltips[column.key as string]) { - column.showSorterTooltip = { - title: tooltips[column.key as string], - overlayClassName: classes?.tooltip, - }; - } - }); - return columns; -} - -let uid = 1; -export function attachId(data: T[]): T[] { - return data.map((d) => ({ - ...d, - key: uid++, - })); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/transform.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/transform.ts deleted file mode 100644 index 5f59728feb30ef6d3230c3eec9803b08cdd72779..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/tables/transform.ts +++ /dev/null @@ -1,63 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { CallStackTableData, CallStackTableDataInner } from '../../api'; - -export interface CallStackFrame { - file?: string; - line?: number; - raw: string; -} - -export interface TransformedCallStackDataInner extends CallStackTableDataInner { - callStackFrames: CallStackFrame[]; -} - -const lineRegex = /\([0-9]+\)$/; - -function parseCallStackLine(raw: string): CallStackFrame { - let rawResult = raw.trim(); - const results = rawResult.split(':'); - const location = results.slice(0, results.length - 1).join(':'); - - const result = lineRegex.exec(location); - if (!result) { - return { raw: rawResult }; - } - - const lineWithParens = result[0].trim(); - const file = rawResult.slice(0, result.index).trim(); - const line = Number( - lineWithParens.substr(1, lineWithParens.length - 2).trim() - ); - - return { - raw: rawResult, - file, - line, - }; -} - -function parseCallStack(callStack?: string): CallStackFrame[] { - const lines = (callStack ?? '') - .trim() - .split(';') - .map((x) => x.trim()); - return lines.map(parseCallStackLine); -} - -function transformCallStackData( - data: CallStackTableDataInner -): TransformedCallStackDataInner { - return { - ...data, - callStackFrames: parseCallStack(data.call_stack), - }; -} - -export function transformTableData( - data: CallStackTableData -): TransformedCallStackDataInner[] { - return data.map(transformCallStackData); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/transform.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/components/transform.ts deleted file mode 100644 index 94ee9f384ebde3a3ddb057c88fc42beb69b0c908..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/components/transform.ts +++ /dev/null @@ -1,83 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as api from '../api'; -import { assertDef, isDef } from '../utils/def'; - -export function transformPerformanceIntoTable(performances: api.Performance[]): api.Graph { - const columns: api.GraphColumn[] = [ - { type: 'string', name: 'Category' }, - { type: 'number', name: 'Time Duration (us)' }, - { type: 'number', name: 'Percentage (%)' }, - ]; - - const rows: api.Graph['rows'] = []; - const queue = [...performances]; - - while (queue.length) { - const first = queue.shift(); - assertDef(first); - - const row: api.Graph['rows'][number] = []; - const { name, value, extra, children } = first; - assertDef(value); - assertDef(extra); - - row.push(name); - row.push(value); - row.push(extra); - - if (isDef(children) && children.length) { - queue.push(...children); - } - - rows.push(row); - } - - return { - columns, - rows, - }; -} - -export function transformPerformanceIntoPie(performances: api.Performance[]): { - columns: api.GraphColumn[]; - rows: Array>; -} { - const columns: api.GraphColumn[] = [ - { type: 'string', name: 'Name' }, - { type: 'number', name: 'Value' }, - ]; - - const rows: api.Graph['rows'] = []; - const queue: api.Performance[] = []; - performances.forEach((topLevel) => { - if (topLevel.children) { - queue.push(...topLevel.children); - } - }); - - while (queue.length) { - const first = queue.shift(); - assertDef(first); - - const row: api.Graph['rows'][number] = []; - const { name, value, children } = first; - assertDef(value); - - row.push(name); - row.push(Number.parseInt(value, 10)); - - if (isDef(children) && children.length) { - queue.push(...children); - } - - rows.push(row); - } - - return { - columns, - rows, - }; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/constants/groupBy.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/constants/groupBy.ts deleted file mode 100644 index 88ea9e3f42adfecd2a829384cc78b7ddc88d11aa..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/constants/groupBy.ts +++ /dev/null @@ -1,13 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export enum OperationGroupBy { - OPERATION = 'Operation', - OPERATION_AND_INPUT_SHAPE = 'OperationAndInputShape', -} - -export enum KernelGroupBy { - KERNEL = 'Kernel', - KERNEL_NAME_AND_OP_NAME = 'KernelNameAndOpName', -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/gstatic.d.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/gstatic.d.ts deleted file mode 100644 index 521c5fbb8d985136529d8233f8a65dffb8acca95..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/gstatic.d.ts +++ /dev/null @@ -1,6 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -declare const google: any; -declare module 'react-flame-graph'; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/index.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/index.tsx deleted file mode 100644 index 851474766de5d9adee682e66ed752c85ffd6d4bf..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/index.tsx +++ /dev/null @@ -1,10 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { render } from 'react-dom'; -import { App } from './app'; -import 'antd/dist/antd.css'; - -render(, document.getElementById('app')); diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/setup.tsx b/plugins/tensorboard-plugins/tb_plugin/fe/src/setup.tsx deleted file mode 100644 index c811ae1524ec7cc6f82410e8aeb999f2ea22476b..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/setup.tsx +++ /dev/null @@ -1,9 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export async function setup(): Promise { - await google.charts.load('current', { - packages: ['corechart', 'table', 'timeline'], - }); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/styles.css b/plugins/tensorboard-plugins/tb_plugin/fe/src/styles.css deleted file mode 100644 index f7705b54a81898a2caf859211dc44e7d9ae4b429..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/styles.css +++ /dev/null @@ -1,16 +0,0 @@ -#app { - height: 100%; -} -.goog-tooltip { - display: none !important; -} - -.visualization-tooltip { - padding: 4px 10px; - white-space: nowrap; -} - -div.google-visualization-tooltip { - pointer-events: none; - max-width: 90%; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/binarysearch.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/binarysearch.ts deleted file mode 100644 index 41382dcdb7acc8cb9e2b1b4f856e1855fb7ed88f..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/binarysearch.ts +++ /dev/null @@ -1,20 +0,0 @@ -export function binarySearch( - arr: Array, - key: any, - compareFn: (key: number, mid: Array) => number -): number { - let low = 0; - let high = arr.length - 1; - while (low <= high) { - let mid = Math.round((high + low) / 2); - let cmp = compareFn(key, arr[mid]); - if (cmp > 0) { - low = mid + 1; - } else if (cmp < 0) { - high = mid - 1; - } else { - return mid; - } - } - return -1; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/debounce.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/debounce.ts deleted file mode 100644 index 82c7f04a98b788ab2c7c7647c292f163b8a92783..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/debounce.ts +++ /dev/null @@ -1,21 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; - -export function useDebounce(value: T, delay: number): T { - const [debouncedValue, setDebouncedValue] = React.useState(value); - - React.useEffect(() => { - const handler = setTimeout(() => { - setDebouncedValue(value); - }, delay); - - return () => { - clearTimeout(handler); - }; - }, [value, delay]); - - return debouncedValue; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/def.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/def.ts deleted file mode 100644 index df6bef8eab076d13c0785902127f46a472ff9fa6..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/def.ts +++ /dev/null @@ -1,20 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export function isDef(v?: T | null): v is T { - return v !== null && v !== undefined; -} - -export function assertDef(v?: T | null): asserts v is T { - if (!isDef(v)) { - throw new Error('Must be defined'); - } -} - -export function firstOrUndefined(v?: T[]): T | undefined { - if (!v || !v.length) { - return undefined; - } - return v[0]; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/hooks.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/hooks.ts deleted file mode 100644 index 473b393d9fa270438be85a7b528d78107c5f87f5..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/hooks.ts +++ /dev/null @@ -1,27 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; - -const cbs: Array<() => void> = []; -export const useOnResize = (cb: () => void): void => { - React.useEffect(() => { - if (cbs.length === 0) { - window.addEventListener('resize', () => { - cbs.forEach((callback) => callback()); - }); - } - cbs.push(cb); - - return (): void => { - const idx = cbs.findIndex(cb); - if (idx > -1) { - cbs.splice(idx, 1); - } - if (cbs.length === 0) { - window.removeEventListener('reset', cb); - } - }; - }, [cb]); -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/index.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/index.ts deleted file mode 100644 index 5da446721e9d1cac3729d8aea03bca2615031f41..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/index.ts +++ /dev/null @@ -1,24 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import { ValueAndFormat } from '../api'; - -export function firstOrUndefined(v?: T[] | null): T | undefined { - if (!v || !v.length) { - return undefined; - } - return v[0]; -} - -export function sleep(delay: number): Promise { - return new Promise((resolve) => setTimeout(resolve, delay)); -} - -export function isValueAndFormat(v: any): v is ValueAndFormat { - return 'f' in v && 'v' in v; -} - -export function value(v: boolean | number | string | ValueAndFormat): boolean | number | string { - return typeof v === 'object' && isValueAndFormat(v) ? v.v : v; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/resize.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/resize.ts deleted file mode 100644 index 766a10d54143fecd637b1d0dff33db17f22bee0d..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/resize.ts +++ /dev/null @@ -1,27 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import debounce from '@material-ui/core/utils/debounce'; - -export function useResizeEventDependency(): readonly [number] { - const [version, setVersion] = React.useState(0); - - const increaseVersion = React.useCallback( - debounce(() => { - setVersion((prev) => prev + 1); - }, 100), - [] - ); - - React.useEffect(() => { - window.addEventListener('resize', increaseVersion); - - return (): void => { - window.removeEventListener('resize', increaseVersion); - }; - }, []); - - return [version] as const; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/search.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/search.ts deleted file mode 100644 index 8a2efc36ddf505aee50171affd722bd5ef0a5b86..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/search.ts +++ /dev/null @@ -1,68 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import * as React from 'react'; -import { value } from '.'; -import * as api from '../api'; -import { useDebounce } from './debounce'; - -export function useSearch( - searchName: string, - columnName: string, - table?: api.Graph -): [api.Graph | undefined] { - const searchNameDebounce = useDebounce(searchName.trim(), 500); - - const searchedTable: api.Graph | undefined = React.useMemo(() => { - if (!searchNameDebounce) { - return table; - } - - if (!table) { - return undefined; - } - - const columnNameToFind = columnName.toLowerCase(); - const nameColumnIdx = table.columns.findIndex( - (c) => c.name.toLowerCase() === columnNameToFind - ); - if (nameColumnIdx < 0) { - return table; - } - - return { - ...table, - rows: table.rows.filter((x) => { - const cell = value(x[nameColumnIdx]); - return typeof cell === 'string' && cell.includes(searchNameDebounce); - }), - }; - }, [table, searchNameDebounce]); - return [searchedTable]; -} - -export function useSearchDirectly( - searchName: string, - field: (v: T) => string, - table?: T[] -): [T[] | undefined] { - const searchNameDebounce = useDebounce(searchName.trim(), 500); - - const result = React.useMemo(() => { - if (!searchNameDebounce) { - return table; - } - - if (!table) { - return undefined; - } - - return table.filter((row) => { - return field(row) - .toLowerCase() - .includes(searchNameDebounce.toLowerCase()); - }); - }, [table, field, searchNameDebounce]); - return [result]; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/top.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/top.ts deleted file mode 100644 index 4af19968d637d6c13bf64caa94f09fff104f6091..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/top.ts +++ /dev/null @@ -1,54 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -import debounce from '@material-ui/core/utils/debounce'; -import * as React from 'react'; - -export enum UseTop { - NOT_USE = 'NotUse', - USE = 'Use', -} - -interface IOptions { - defaultTop?: number; - defaultUseTop?: UseTop; - noDebounce?: boolean; - wait?: number; -} - -export function useTopN( - options?: IOptions -): readonly [ - string, - number | undefined, - UseTop, - React.Dispatch>, - React.Dispatch> -] { - let realOptions = options ?? {}; - - const [topText, setTopText] = React.useState(String(realOptions.defaultTop ?? 15)); - const [actualTop, setActualTop] = React.useState(Number(topText)); - const [useTop, setUseTop] = React.useState(realOptions.defaultUseTop ?? UseTop.NOT_USE); - - const setActualDebounce = !realOptions.noDebounce - ? React.useCallback(debounce(setActualTop, realOptions.wait ?? 500), []) - : setActualTop; - React.useEffect(() => { - if (useTop !== UseTop.USE) { - setActualDebounce(undefined); - } else if (topIsValid(topText)) { - setActualDebounce(Number(topText)); - } else { - setActualDebounce(actualTop); - } - }, [topText, useTop]); - - return [topText, actualTop, useTop, setTopText, setUseTop] as const; -} - -export function topIsValid(topText: string): boolean { - const top = Number(topText); - return !Number.isNaN(top) && top > 0 && Number.isInteger(top); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/type.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/type.ts deleted file mode 100644 index ccd45fd16e11043abe40a4235a7b39a5d18afcdd..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/type.ts +++ /dev/null @@ -1,9 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export type Arguments void> = T extends ( - ...args: infer A -) => void - ? A - : never; diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/vscode.ts b/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/vscode.ts deleted file mode 100644 index 2a763adca54ef3eba96837aa111df627e3f8b116..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/src/utils/vscode.ts +++ /dev/null @@ -1,13 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) Microsoft Corporation. All rights reserved. - *--------------------------------------------------------------------------------------------*/ - -export function navToCode(filename: string, line: number): void { - window.parent.parent.postMessage( - { - filename, - line, - }, - window.origin - ); -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/tsconfig.json b/plugins/tensorboard-plugins/tb_plugin/fe/tsconfig.json deleted file mode 100644 index 182aafbe127c2c3209eacae7483ec02a2cd622cc..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/tsconfig.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "compilerOptions": { - "target": "es5", - "lib": [ - "dom", - "dom.iterable", - "esnext" - ], - "strict": true, - "strictPropertyInitialization": false, - "module": "esnext", - "moduleResolution": "node", - "forceConsistentCasingInFileNames": true, - "rootDir": "src", - "outDir": "dist", - "jsx": "react", - } -} diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/update-static.js b/plugins/tensorboard-plugins/tb_plugin/fe/update-static.js deleted file mode 100644 index 67c9be6ccc266ca2470705ad7bb990e550769e96..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/update-static.js +++ /dev/null @@ -1,7 +0,0 @@ -const fs = require('fs'); -const path = require('path'); - -fs.copyFileSync( - path.resolve(__dirname, 'dist/index.html'), - path.resolve(__dirname, '../torch_tb_profiler/static/index.html') -); diff --git a/plugins/tensorboard-plugins/tb_plugin/fe/webpack.config.js b/plugins/tensorboard-plugins/tb_plugin/fe/webpack.config.js deleted file mode 100644 index a47f8b319e83a9c96c80c11afe5adf09e308fbfa..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/fe/webpack.config.js +++ /dev/null @@ -1,36 +0,0 @@ -const path = require('path'); -const HtmlWebpackPlugin = require('html-webpack-plugin'); -const InlineChunkHtmlPlugin = require('inline-chunk-html-plugin'); - -const isDev = process.env.NODE_ENV !== 'production'; - -/** - * @type {import('webpack').Configuration & import('webpack-dev-server').Configuration} - */ -module.exports = { - mode: isDev ? 'development' : 'production', - entry: './src/index.tsx', - output: { - path: path.resolve(__dirname, 'dist'), - filename: 'index.js', - }, - resolve: { - // Add `.ts` and `.tsx` as a resolvable extension. - extensions: ['.ts', '.tsx', '.js'], - }, - module: { - rules: [ - { test: /\.tsx?$/i, use: 'ts-loader' }, - { test: /\.css$/i, use: ['style-loader', 'css-loader'] }, - ], - }, - plugins: [ - new HtmlWebpackPlugin({ - inject: true, - scriptLoading: 'blocking', - template: 'index.html', - }), - !isDev ? new InlineChunkHtmlPlugin(HtmlWebpackPlugin, [/.*/]) : undefined, - ].filter(Boolean), - devServer: {}, -}; diff --git a/plugins/tensorboard-plugins/tb_plugin/packaging/torch_tb_profiler/meta.yaml b/plugins/tensorboard-plugins/tb_plugin/packaging/torch_tb_profiler/meta.yaml deleted file mode 100644 index ab105fd04d21315cf57376e947bd52ddd98d417b..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/packaging/torch_tb_profiler/meta.yaml +++ /dev/null @@ -1,39 +0,0 @@ -{% set data = load_setup_py_data(setup_file='../../setup.py', from_recipe_dir=True) %} - -package: - name: torch_tb_profiler - version: "{{ data.get('version') }}" - -source: - path: "{{ environ.get('SOURCE_ROOT_DIR', '../..') }}" - -requirements: - host: - - python - - setuptools - - pytorch - - run: - - python - - pandas - - tensorboard - -build: - noarch: python - script: python setup.py install --single-version-externally-managed --record=record.txt - -test: - imports: - - torch_tb_profiler - source_files: - - test - requires: - - tensorboard - - pandas - - -about: - home: https://github.com/pytorch/kineto/tree/main/tb_plugin - license: BSD - license_file: LICENSE - summary: 'Tensoboard Plugin that provides visualization of PyTorch profiling' diff --git a/plugins/tensorboard-plugins/tb_plugin/setup.py b/plugins/tensorboard-plugins/tb_plugin/setup.py deleted file mode 100644 index 2d4260b2133ae00a91831a7e2867b467e029d108..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/setup.py +++ /dev/null @@ -1,125 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import os -import pathlib -import subprocess -from configparser import ConfigParser - -import setuptools - -config = ConfigParser() -config.read('./torch_tb_profiler/config/config.ini') - - -def read(rel_path): - here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, rel_path)) as fp: - return fp.read() - - -def get_version(rel_path): - for line in read(rel_path).splitlines(): - if line.startswith("__version__"): - delim = '"' if '"' in line else "'" - version = line.split(delim)[1] - - if os.getenv('TORCH_TB_PROFILER_BUILD_VERSION'): - version = os.getenv('TORCH_TB_PROFILER_BUILD_VERSION') - return version - - -INSTALL_REQUIRED = [ - "pandas >= 1.0.0", - "tensorboard >= 2.11.0" -] - -TESTS_REQUIRED = INSTALL_REQUIRED + [ - "torch >= 1.8", - "torchvision >= 0.8" -] - -EXTRAS = { - "s3": ["boto3"], - "blob": ["azure-storage-blob"], - "gs": ["google-cloud-storage"] -} - - -class build_fe(setuptools.Command): - """Build the frontend""" - description = "run yarn build on frontend directory" - - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - cwd = pathlib.Path().absolute() - root = pathlib.Path(__file__).parent.absolute() - os.chdir(root / "fe") - subprocess.run(["yarn", "build:copy"], check=True) - # restore the working directory - os.chdir(cwd) - - -setuptools.setup( - name="torch-tb-profiler-ascend", - version=get_version(os.path.join('torch_tb_profiler', '__init__.py')), - description="PyTorch Ascend Profiler TensorBoard Plugin", - long_description=f"PyTorch Ascend Profiler TensorBoard Plugin: {config.get('URL', 'repository_url')}", - url=config.get('URL', 'repository_url'), - author="Ascend Team", - author_email=config.get('EMAIL', 'author_email'), - cmdclass={ - "build_fe": build_fe - }, - packages=setuptools.find_packages(), - package_data={ - "torch_tb_profiler": ["static/**", "config/**"], - }, - entry_points={ - "tensorboard_plugins": [ - "torch_profiler = torch_tb_profiler.plugin:TorchProfilerPlugin", - ], - }, - python_requires=">=3.6.2", - install_requires=INSTALL_REQUIRED, - tests_require=TESTS_REQUIRED, - classifiers=[ - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - license='BSD-3', - keywords='pytorch tensorboard profile plugin', - extras_require=EXTRAS -) diff --git a/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_expected.json b/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_expected.json deleted file mode 100644 index 81f03632c83fa6abb41fa42011f7d7616b7d9d3a..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_expected.json +++ /dev/null @@ -1,3105 +0,0 @@ - -{ - "schemaVersion": 1, - - "computeProperties": [ - - { - "id": 0, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34084028416, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 1, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 2, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 3, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - } - ], - "traceEvents": [ - - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223197, "dur": 21, - "args": { - "Device": 24572, "External id": 2, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187223264, "dur": 5, - "args": { - "Device": 24572, "External id": 3, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187223182, "dur": 99, - "args": { - "Device": 24572, "External id": 1, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223376, "dur": 19, - "args": { - "Device": 24572, "External id": 5, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223480, "dur": 18, - "args": { - "Device": 24572, "External id": 7, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187223530, "dur": 5, - "args": { - "Device": 24572, "External id": 8, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187223469, "dur": 72, - "args": { - "Device": 24572, "External id": 6, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223622, "dur": 19, - "args": { - "Device": 24572, "External id": 10, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187223790, "dur": 12, - "args": { - "Device": 24572, "External id": 13, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187223777, "dur": 50, - "args": { - "Device": 24572, "External id": 12, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187223850, "dur": 7, - "args": { - "Device": 24572, "External id": 15, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187223841, "dur": 24, - "args": { - "Device": 24572, "External id": 14, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223904, "dur": 16, - "args": { - "Device": 24572, "External id": 18, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187223945, "dur": 14, - "args": { - "Device": 24572, "External id": 19, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_cat", "pid": 24572, "tid": "24572", - "ts": 1621401187223888, "dur": 87, - "args": { - "Device": 24572, "External id": 17, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::cat", "pid": 24572, "tid": "24572", - "ts": 1621401187223876, "dur": 106, - "args": { - "Device": 24572, "External id": 16, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::stack", "pid": 24572, "tid": "24572", - "ts": 1621401187223752, "dur": 245, - "args": { - "Device": 24572, "External id": 11, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224094, "dur": 12, - "args": { - "Device": 24572, "External id": 22, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187224074, "dur": 43, - "args": { - "Device": 24572, "External id": 21, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224137, "dur": 6, - "args": { - "Device": 24572, "External id": 24, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187224128, "dur": 21, - "args": { - "Device": 24572, "External id": 23, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187224184, "dur": 15, - "args": { - "Device": 24572, "External id": 27, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187224223, "dur": 12, - "args": { - "Device": 24572, "External id": 28, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_cat", "pid": 24572, "tid": "24572", - "ts": 1621401187224169, "dur": 79, - "args": { - "Device": 24572, "External id": 26, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::cat", "pid": 24572, "tid": "24572", - "ts": 1621401187224159, "dur": 96, - "args": { - "Device": 24572, "External id": 25, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::stack", "pid": 24572, "tid": "24572", - "ts": 1621401187224056, "dur": 213, - "args": { - "Device": 24572, "External id": 20, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 24572, "tid": "24572", - "ts": 1621401187223604, "dur": 725, - "args": { - "Device": 24572, "External id": 9, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224415, "dur": 54, - "args": { - "Device": 24572, "External id": 30, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::copy_", "pid": 24572, "tid": "24572", - "ts": 1621401187224496, "dur": 80, - "args": { - "Device": 24572, "External id": 31, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 24572, "tid": "24572", - "ts": 1621401187224398, "dur": 193, - "args": { - "Device": 24572, "External id": 29, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224645, "dur": 51, - "args": { - "Device": 24572, "External id": 33, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::copy_", "pid": 24572, "tid": "24572", - "ts": 1621401187224720, "dur": 65, - "args": { - "Device": 24572, "External id": 34, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 24572, "tid": "24572", - "ts": 1621401187224631, "dur": 168, - "args": { - "Device": 24572, "External id": 32, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224956, "dur": 14, - "args": { - "Device": 24572, "External id": 38, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24572", - "ts": 1621401187224945, "dur": 37, - "args": { - "Device": 24572, "External id": 37, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24572", - "ts": 1621401187224917, "dur": 101, - "args": { - "Device": 24572, "External id": 36, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187225058, "dur": 33, - "args": { - "Device": 24572, "External id": 40, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187225181, "dur": 41, - "args": { - "Device": 24572, "External id": 42, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24572", - "ts": 1621401187225112, "dur": 197, - "args": { - "Device": 24572, "External id": 41, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187225367, "dur": 17, - "args": { - "Device": 24572, "External id": 44, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572", - "ts": 1621401187225336, "dur": 79, - "args": { - "Device": 24572, "External id": 43, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 24 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::matmul", "pid": 24572, "tid": "24572", - "ts": 1621401187225037, "dur": 394, - "args": { - "Device": 24572, "External id": 39, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187225449, "dur": 107, - "args": { - "Device": 24572, "External id": 45, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 25 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::linear", "pid": 24572, "tid": "24572", - "ts": 1621401187224907, "dur": 664, - "args": { - "Device": 24572, "External id": 35, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187225662, "dur": 25, - "args": { - "Device": 24572, "External id": 47, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187225746, "dur": 30, - "args": { - "Device": 24572, "External id": 50, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp_min", "pid": 24572, "tid": "24572", - "ts": 1621401187225721, "dur": 105, - "args": { - "Device": 24572, "External id": 49, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp", "pid": 24572, "tid": "24572", - "ts": 1621401187225709, "dur": 128, - "args": { - "Device": 24572, "External id": 48, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp", "pid": 24572, "tid": "24572", - "ts": 1621401187225606, "dur": 263, - "args": { - "Device": 24572, "External id": 46, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 26 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187225978, "dur": 14, - "args": { - "Device": 24572, "External id": 54, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24572", - "ts": 1621401187225968, "dur": 36, - "args": { - "Device": 24572, "External id": 53, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24572", - "ts": 1621401187225941, "dur": 98, - "args": { - "Device": 24572, "External id": 52, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187226077, "dur": 60, - "args": { - "Device": 24572, "External id": 56, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226233, "dur": 41, - "args": { - "Device": 24572, "External id": 58, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24572", - "ts": 1621401187226161, "dur": 197, - "args": { - "Device": 24572, "External id": 57, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 29 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187226416, "dur": 17, - "args": { - "Device": 24572, "External id": 60, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572", - "ts": 1621401187226384, "dur": 79, - "args": { - "Device": 24572, "External id": 59, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 30 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::matmul", "pid": 24572, "tid": "24572", - "ts": 1621401187226057, "dur": 422, - "args": { - "Device": 24572, "External id": 55, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187226497, "dur": 103, - "args": { - "Device": 24572, "External id": 61, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 31 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::linear", "pid": 24572, "tid": "24572", - "ts": 1621401187225932, "dur": 683, - "args": { - "Device": 24572, "External id": 51, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::broadcast_tensors", "pid": 24572, "tid": "24572", - "ts": 1621401187226708, "dur": 11, - "args": { - "Device": 24572, "External id": 62, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226827, "dur": 41, - "args": { - "Device": 24572, "External id": 64, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226955, "dur": 35, - "args": { - "Device": 24572, "External id": 66, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187227020, "dur": 11, - "args": { - "Device": 24572, "External id": 67, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24572", - "ts": 1621401187226930, "dur": 176, - "args": { - "Device": 24572, "External id": 65, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss", "pid": 24572, "tid": "24572", - "ts": 1621401187226753, "dur": 445, - "args": { - "Device": 24572, "External id": 63, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187227327, "dur": 21, - "args": { - "Device": 24572, "External id": 69, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227368, "dur": 5, - "args": { - "Device": 24572, "External id": 70, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187227314, "dur": 65, - "args": { - "Device": 24572, "External id": 68, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187227464, "dur": 18, - "args": { - "Device": 24572, "External id": 72, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227576, "dur": 49, - "args": { - "Device": 24572, "External id": 74, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227553, "dur": 97, - "args": { - "Device": 24572, "External id": 73, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227707, "dur": 43, - "args": { - "Device": 24572, "External id": 76, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227689, "dur": 79, - "args": { - "Device": 24572, "External id": 75, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227823, "dur": 42, - "args": { - "Device": 24572, "External id": 78, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227805, "dur": 77, - "args": { - "Device": 24572, "External id": 77, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227937, "dur": 41, - "args": { - "Device": 24572, "External id": 80, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227919, "dur": 77, - "args": { - "Device": 24572, "External id": 79, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "Optimizer.zero_grad#SGD.zero_grad", "pid": 24572, "tid": "24572", - "ts": 1621401187227446, "dur": 606, - "args": { - "Device": 24572, "External id": 71, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187228150, "dur": 53, - "args": { - "Device": 24572, "External id": 83, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_like", "pid": 24572, "tid": "24572", - "ts": 1621401187228137, "dur": 81, - "args": { - "Device": 24572, "External id": 82, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187228235, "dur": 50, - "args": { - "Device": 24572, "External id": 84, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ones_like", "pid": 24572, "tid": "24572", - "ts": 1621401187228128, "dur": 169, - "args": { - "Device": 24572, "External id": 81, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187228708, "dur": 79, - "args": { - "Device": 24572, "External id": 89, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_like", "pid": 24572, "tid": "24610", - "ts": 1621401187228680, "dur": 146, - "args": { - "Device": 24572, "External id": 88, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24610", - "ts": 1621401187228885, "dur": 93, - "args": { - "Device": 24572, "External id": 91, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24610", - "ts": 1621401187228858, "dur": 147, - "args": { - "Device": 24572, "External id": 90, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros_like", "pid": 24572, "tid": "24610", - "ts": 1621401187228647, "dur": 369, - "args": { - "Device": 24572, "External id": 87, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610", - "ts": 1621401187229048, "dur": 122, - "args": { - "Device": 24572, "External id": 92, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610", - "ts": 1621401187228603, "dur": 614, - "args": { - "Device": 24572, "External id": 86, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MseLossBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187228516, "dur": 727, - "args": { - "Device": 24572, "External id": 85, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "AddBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187229384, "dur": 17, - "args": { - "Device": 24572, "External id": 93, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 31 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187229506, "dur": 73, - "args": { - "Device": 24572, "External id": 95, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24610", - "ts": 1621401187229459, "dur": 279, - "args": { - "Device": 24572, "External id": 94, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187229788, "dur": 65, - "args": { - "Device": 24572, "External id": 96, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187230059, "dur": 131, - "args": { - "Device": 24572, "External id": 98, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187230028, "dur": 228, - "args": { - "Device": 24572, "External id": 97, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187230405, "dur": 61, - "args": { - "Device": 24572, "External id": 101, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187230383, "dur": 107, - "args": { - "Device": 24572, "External id": 100, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187230354, "dur": 146, - "args": { - "Device": 24572, "External id": 99, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 30 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187230751, "dur": 22, - "args": { - "Device": 24572, "External id": 105, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187230732, "dur": 65, - "args": { - "Device": 24572, "External id": 104, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187230710, "dur": 124, - "args": { - "Device": 24572, "External id": 103, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187230862, "dur": 7, - "args": { - "Device": 24572, "External id": 106, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187230935, "dur": 73, - "args": { - "Device": 24572, "External id": 108, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187230889, "dur": 235, - "args": { - "Device": 24572, "External id": 107, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187231211, "dur": 23, - "args": { - "Device": 24572, "External id": 111, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187231191, "dur": 69, - "args": { - "Device": 24572, "External id": 110, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187231168, "dur": 129, - "args": { - "Device": 24572, "External id": 109, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187231376, "dur": 17, - "args": { - "Device": 24572, "External id": 114, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187231360, "dur": 49, - "args": { - "Device": 24572, "External id": 113, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187231340, "dur": 100, - "args": { - "Device": 24572, "External id": 112, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187231465, "dur": 6, - "args": { - "Device": 24572, "External id": 115, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187231534, "dur": 72, - "args": { - "Device": 24572, "External id": 117, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187231491, "dur": 225, - "args": { - "Device": 24572, "External id": 116, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MmBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187230626, "dur": 1124, - "args": { - "Device": 24572, "External id": 102, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 29 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187231992, "dur": 61, - "args": { - "Device": 24572, "External id": 120, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187231970, "dur": 108, - "args": { - "Device": 24572, "External id": 119, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187231941, "dur": 166, - "args": { - "Device": 24572, "External id": 118, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187232305, "dur": 21, - "args": { - "Device": 24572, "External id": 124, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187232286, "dur": 62, - "args": { - "Device": 24572, "External id": 123, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187232265, "dur": 123, - "args": { - "Device": 24572, "External id": 122, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "TBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187232239, "dur": 161, - "args": { - "Device": 24572, "External id": 121, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187232535, "dur": 85, - "args": { - "Device": 24572, "External id": 126, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187232515, "dur": 148, - "args": { - "Device": 24572, "External id": 125, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187232790, "dur": 47, - "args": { - "Device": 24572, "External id": 129, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24610", - "ts": 1621401187232866, "dur": 68, - "args": { - "Device": 24572, "External id": 130, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::scalar_tensor", "pid": 24572, "tid": "24610", - "ts": 1621401187232776, "dur": 174, - "args": { - "Device": 24572, "External id": 128, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187233023, "dur": 27, - "args": { - "Device": 24572, "External id": 132, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_local_scalar_dense", "pid": 24572, "tid": "24610", - "ts": 1621401187233192, "dur": 6, - "args": { - "Device": 24572, "External id": 135, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::item", "pid": 24572, "tid": "24610", - "ts": 1621401187233184, "dur": 24, - "args": { - "Device": 24572, "External id": 134, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24610", - "ts": 1621401187233251, "dur": 41, - "args": { - "Device": 24572, "External id": 136, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ge", "pid": 24572, "tid": "24610", - "ts": 1621401187233168, "dur": 182, - "args": { - "Device": 24572, "External id": 133, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ge", "pid": 24572, "tid": "24610", - "ts": 1621401187232971, "dur": 404, - "args": { - "Device": 24572, "External id": 131, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233430, "dur": 15, - "args": { - "Device": 24572, "External id": 139, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233414, "dur": 62, - "args": { - "Device": 24572, "External id": 138, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233508, "dur": 10, - "args": { - "Device": 24572, "External id": 141, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233494, "dur": 48, - "args": { - "Device": 24572, "External id": 140, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233571, "dur": 10, - "args": { - "Device": 24572, "External id": 143, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233558, "dur": 43, - "args": { - "Device": 24572, "External id": 142, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187233649, "dur": 46, - "args": { - "Device": 24572, "External id": 145, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_s_where", "pid": 24572, "tid": "24610", - "ts": 1621401187233620, "dur": 167, - "args": { - "Device": 24572, "External id": 144, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::where", "pid": 24572, "tid": "24610", - "ts": 1621401187233398, "dur": 409, - "args": { - "Device": 24572, "External id": 137, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ClampBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187232724, "dur": 1110, - "args": { - "Device": 24572, "External id": 127, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 26 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "AddBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187233941, "dur": 12, - "args": { - "Device": 24572, "External id": 146, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 25 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187234021, "dur": 46, - "args": { - "Device": 24572, "External id": 148, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24610", - "ts": 1621401187233990, "dur": 182, - "args": { - "Device": 24572, "External id": 147, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187234208, "dur": 43, - "args": { - "Device": 24572, "External id": 149, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187234378, "dur": 84, - "args": { - "Device": 24572, "External id": 151, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187234357, "dur": 144, - "args": { - "Device": 24572, "External id": 150, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187234593, "dur": 39, - "args": { - "Device": 24572, "External id": 154, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187234580, "dur": 67, - "args": { - "Device": 24572, "External id": 153, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187234561, "dur": 92, - "args": { - "Device": 24572, "External id": 152, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 24 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187234803, "dur": 14, - "args": { - "Device": 24572, "External id": 158, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187234792, "dur": 41, - "args": { - "Device": 24572, "External id": 157, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187234778, "dur": 79, - "args": { - "Device": 24572, "External id": 156, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187234874, "dur": 4, - "args": { - "Device": 24572, "External id": 159, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187234918, "dur": 47, - "args": { - "Device": 24572, "External id": 161, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187234890, "dur": 149, - "args": { - "Device": 24572, "External id": 160, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187235092, "dur": 15, - "args": { - "Device": 24572, "External id": 164, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187235080, "dur": 39, - "args": { - "Device": 24572, "External id": 163, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187235067, "dur": 75, - "args": { - "Device": 24572, "External id": 162, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MmBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187234734, "dur": 424, - "args": { - "Device": 24572, "External id": 155, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187235312, "dur": 13, - "args": { - "Device": 24572, "External id": 168, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187235301, "dur": 40, - "args": { - "Device": 24572, "External id": 167, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187235288, "dur": 78, - "args": { - "Device": 24572, "External id": 166, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "TBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187235271, "dur": 103, - "args": { - "Device": 24572, "External id": 165, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187235487, "dur": 85, - "args": { - "Device": 24572, "External id": 170, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187235467, "dur": 147, - "args": { - "Device": 24572, "External id": 169, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187235803, "dur": 24, - "args": { - "Device": 24572, "External id": 172, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187235850, "dur": 5, - "args": { - "Device": 24572, "External id": 173, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187235787, "dur": 75, - "args": { - "Device": 24572, "External id": 171, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187235954, "dur": 20, - "args": { - "Device": 24572, "External id": 175, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236091, "dur": 82, - "args": { - "Device": 24572, "External id": 176, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236221, "dur": 70, - "args": { - "Device": 24572, "External id": 177, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236334, "dur": 68, - "args": { - "Device": 24572, "External id": 178, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236444, "dur": 68, - "args": { - "Device": 24572, "External id": 179, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "Optimizer.step#SGD.step", "pid": 24572, "tid": "24572", - "ts": 1621401187235935, "dur": 663, - "args": { - "Device": 24572, "External id": 174, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 24572, "tid": "24572", - "ts": 1621401187223358, "dur": 13410, - "args": { - "Device": 24572, "External id": 4, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 1621401187224556, "dur": 1, - "args": { - "device": 0, "context": 1, - "stream": 7, "correlation": 311, "external id": 31, - "bytes": 640, "memory bandwidth (GB/s)": 0.46511627906976744 - } - }, - { - "ph": "f", "id": 311, "pid": 0, "tid": "stream 7", "ts": 1621401187224556, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572", - "ts": 1621401187224533, "dur": 20, - "args": { - "cbid": 41, "correlation": 311, - "external id": 31, "external ts": 1621401187224496 - } - }, - { - "ph": "s", "id": 311, "pid": 24572, "tid": 24572, "ts": 1621401187224533, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187224554, "dur": 8, - "args": { - "cbid": 131, "correlation": 312, - "external id": 31, "external ts": 1621401187224496 - } - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 1621401187224767, "dur": 1, - "args": { - "device": 0, "context": 1, - "stream": 7, "correlation": 323, "external id": 34, - "bytes": 128, "memory bandwidth (GB/s)": 0.09523809523809523 - } - }, - { - "ph": "f", "id": 323, "pid": 0, "tid": "stream 7", "ts": 1621401187224767, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572", - "ts": 1621401187224752, "dur": 12, - "args": { - "cbid": 41, "correlation": 323, - "external id": 34, "external ts": 1621401187224720 - } - }, - { - "ph": "s", "id": 323, "pid": 24572, "tid": 24572, "ts": 1621401187224752, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187224765, "dur": 7, - "args": { - "cbid": 131, "correlation": 324, - "external id": 34, "external ts": 1621401187224720 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572", - "ts": 1621401187225253, "dur": 2, - "args": { - "cbid": 251, "correlation": 332, - "external id": 41, "external ts": 1621401187225112 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225275, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 333, "external id": 41, - "registers per thread": 72, - "shared memory": 13824, - "blocks per SM": 0.025, - "warps per SM": 0.1, - "grid": [1, 2, 1], - "block": [128, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 333, "pid": 0, "tid": "stream 7", "ts": 1621401187225275, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225258, "dur": 16, - "args": { - "cbid": 211, "correlation": 333, - "external id": 41, "external ts": 1621401187225112 - } - }, - { - "ph": "s", "id": 333, "pid": 24572, "tid": 24572, "ts": 1621401187225258, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225530, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 338, "external id": 45, - "registers per thread": 22, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 338, "pid": 0, "tid": "stream 7", "ts": 1621401187225530, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225512, "dur": 16, - "args": { - "cbid": 211, "correlation": 338, - "external id": 45, "external ts": 1621401187225449 - } - }, - { - "ph": "s", "id": 338, "pid": 24572, "tid": 24572, "ts": 1621401187225512, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225820, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 352, "external id": 49, - "registers per thread": 18, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 352, "pid": 0, "tid": "stream 7", "ts": 1621401187225820, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225803, "dur": 15, - "args": { - "cbid": 211, "correlation": 352, - "external id": 49, "external ts": 1621401187225721 - } - }, - { - "ph": "s", "id": 352, "pid": 24572, "tid": 24572, "ts": 1621401187225803, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572", - "ts": 1621401187226305, "dur": 2, - "args": { - "cbid": 251, "correlation": 363, - "external id": 57, "external ts": 1621401187226161 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226325, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 364, "external id": 57, - "registers per thread": 72, - "shared memory": 13824, - "blocks per SM": 0.025, - "warps per SM": 0.1, - "grid": [1, 2, 1], - "block": [128, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 364, "pid": 0, "tid": "stream 7", "ts": 1621401187226325, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226309, "dur": 15, - "args": { - "cbid": 211, "correlation": 364, - "external id": 57, "external ts": 1621401187226161 - } - }, - { - "ph": "s", "id": 364, "pid": 24572, "tid": 24572, "ts": 1621401187226309, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226575, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 369, "external id": 61, - "registers per thread": 22, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 369, "pid": 0, "tid": "stream 7", "ts": 1621401187226575, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226558, "dur": 15, - "args": { - "cbid": 211, "correlation": 369, - "external id": 61, "external ts": 1621401187226497 - } - }, - { - "ph": "s", "id": 369, "pid": 24572, "tid": 24572, "ts": 1621401187226558, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array >(int, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226912, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 377, "external id": 63, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 377, "pid": 0, "tid": "stream 7", "ts": 1621401187226912, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226895, "dur": 16, - "args": { - "cbid": 211, "correlation": 377, - "external id": 63, "external ts": 1621401187226753 - } - }, - { - "ph": "s", "id": 377, "pid": 24572, "tid": 24572, "ts": 1621401187226895, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227092, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 388, "external id": 65, - "registers per thread": 32, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.0125, - "grid": [1, 1, 1], - "block": [32, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 388, "pid": 0, "tid": "stream 7", "ts": 1621401187227092, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227075, "dur": 15, - "args": { - "cbid": 211, "correlation": 388, - "external id": 65, "external ts": 1621401187226930 - } - }, - { - "ph": "s", "id": 388, "pid": 24572, "tid": 24572, "ts": 1621401187227075, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227619, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 395, "external id": 74, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 395, "pid": 0, "tid": "stream 7", "ts": 1621401187227619, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227601, "dur": 16, - "args": { - "cbid": 211, "correlation": 395, - "external id": 74, "external ts": 1621401187227576 - } - }, - { - "ph": "s", "id": 395, "pid": 24572, "tid": 24572, "ts": 1621401187227601, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227745, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 402, "external id": 76, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 402, "pid": 0, "tid": "stream 7", "ts": 1621401187227745, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227729, "dur": 14, - "args": { - "cbid": 211, "correlation": 402, - "external id": 76, "external ts": 1621401187227707 - } - }, - { - "ph": "s", "id": 402, "pid": 24572, "tid": 24572, "ts": 1621401187227729, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227859, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 409, "external id": 78, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 409, "pid": 0, "tid": "stream 7", "ts": 1621401187227859, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227844, "dur": 13, - "args": { - "cbid": 211, "correlation": 409, - "external id": 78, "external ts": 1621401187227823 - } - }, - { - "ph": "s", "id": 409, "pid": 24572, "tid": 24572, "ts": 1621401187227844, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227973, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 416, "external id": 80, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 416, "pid": 0, "tid": "stream 7", "ts": 1621401187227973, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227958, "dur": 13, - "args": { - "cbid": 211, "correlation": 416, - "external id": 80, "external ts": 1621401187227937 - } - }, - { - "ph": "s", "id": 416, "pid": 24572, "tid": 24572, "ts": 1621401187227958, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187228279, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 429, "external id": 84, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 429, "pid": 0, "tid": "stream 7", "ts": 1621401187228279, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187228262, "dur": 15, - "args": { - "cbid": 211, "correlation": 429, - "external id": 84, "external ts": 1621401187228235 - } - }, - { - "ph": "s", "id": 429, "pid": 24572, "tid": 24572, "ts": 1621401187228262, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187228962, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 440, "external id": 91, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 440, "pid": 0, "tid": "stream 7", "ts": 1621401187228962, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187228932, "dur": 30, - "args": { - "cbid": 211, "correlation": 440, - "external id": 91, "external ts": 1621401187228885 - } - }, - { - "ph": "s", "id": 440, "pid": 24572, "tid": 24610, "ts": 1621401187228932, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::mse_backward_cuda_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float, float)#1}, at::detail::Array, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187229153, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 446, "external id": 92, - "registers per thread": 28, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 446, "pid": 0, "tid": "stream 7", "ts": 1621401187229153, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187229127, "dur": 26, - "args": { - "cbid": 211, "correlation": 446, - "external id": 92, "external ts": 1621401187229048 - } - }, - { - "ph": "s", "id": 446, "pid": 24572, "tid": 24610, "ts": 1621401187229127, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<256, 2, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187229711, "dur": 4, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 460, "external id": 94, - "registers per thread": 35, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.00625, - "grid": [1, 1, 1], - "block": [1, 16, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 460, "pid": 0, "tid": "stream 7", "ts": 1621401187229711, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187229681, "dur": 30, - "args": { - "cbid": 211, "correlation": 460, - "external id": 94, "external ts": 1621401187229459 - } - }, - { - "ph": "s", "id": 460, "pid": 24572, "tid": 24610, "ts": 1621401187229681, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187230162, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 467, "external id": 98, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 467, "pid": 0, "tid": "stream 7", "ts": 1621401187230162, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187230133, "dur": 29, - "args": { - "cbid": 211, "correlation": 467, - "external id": 98, "external ts": 1621401187230059 - } - }, - { - "ph": "s", "id": 467, "pid": 24572, "tid": 24610, "ts": 1621401187230133, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231063, "dur": 4, - "args": { - "cbid": 251, "correlation": 480, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231069, "dur": 1, - "args": { - "cbid": 251, "correlation": 481, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7", - "ts": 1621401187231100, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 482, "external id": 107, - "registers per thread": 55, - "shared memory": 16384, - "blocks per SM": 0.0125, - "warps per SM": 0.1, - "grid": [1, 1, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 482, "pid": 0, "tid": "stream 7", "ts": 1621401187231100, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187231073, "dur": 27, - "args": { - "cbid": 211, "correlation": 482, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "s", "id": 482, "pid": 24572, "tid": 24610, "ts": 1621401187231073, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231658, "dur": 3, - "args": { - "cbid": 251, "correlation": 491, - "external id": 116, "external ts": 1621401187231491 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_NN_kernel, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187231692, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 492, "external id": 116, - "registers per thread": 64, - "shared memory": 12288, - "blocks per SM": 0.05, - "warps per SM": 0.4, - "grid": [1, 4, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 1 - } - }, - { - "ph": "f", "id": 492, "pid": 0, "tid": "stream 7", "ts": 1621401187231692, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187231665, "dur": 27, - "args": { - "cbid": 211, "correlation": 492, - "external id": 116, "external ts": 1621401187231491 - } - }, - { - "ph": "s", "id": 492, "pid": 24572, "tid": 24610, "ts": 1621401187231665, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187232603, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 503, "external id": 126, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 503, "pid": 0, "tid": "stream 7", "ts": 1621401187232603, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187232583, "dur": 19, - "args": { - "cbid": 211, "correlation": 503, - "external id": 126, "external ts": 1621401187232535 - } - }, - { - "ph": "s", "id": 503, "pid": 24572, "tid": 24610, "ts": 1621401187232583, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187232921, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 513, "external id": 130, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 513, "pid": 0, "tid": "stream 7", "ts": 1621401187232921, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187232901, "dur": 19, - "args": { - "cbid": 211, "correlation": 513, - "external id": 130, "external ts": 1621401187232866 - } - }, - { - "ph": "s", "id": 513, "pid": 24572, "tid": 24610, "ts": 1621401187232901, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187233342, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 526, "external id": 133, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 526, "pid": 0, "tid": "stream 7", "ts": 1621401187233342, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187233323, "dur": 18, - "args": { - "cbid": 211, "correlation": 526, - "external id": 133, "external ts": 1621401187233168 - } - }, - { - "ph": "s", "id": 526, "pid": 24572, "tid": 24610, "ts": 1621401187233323, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&, c10::ScalarType)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187233770, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 535, "external id": 144, - "registers per thread": 26, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 535, "pid": 0, "tid": "stream 7", "ts": 1621401187233770, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187233751, "dur": 19, - "args": { - "cbid": 211, "correlation": 535, - "external id": 144, "external ts": 1621401187233620 - } - }, - { - "ph": "s", "id": 535, "pid": 24572, "tid": 24610, "ts": 1621401187233751, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187234156, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 548, "external id": 147, - "registers per thread": 32, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [4, 16, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 548, "pid": 0, "tid": "stream 7", "ts": 1621401187234156, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187234135, "dur": 19, - "args": { - "cbid": 211, "correlation": 548, - "external id": 147, "external ts": 1621401187233990 - } - }, - { - "ph": "s", "id": 548, "pid": 24572, "tid": 24610, "ts": 1621401187234135, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187234445, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 555, "external id": 151, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 555, "pid": 0, "tid": "stream 7", "ts": 1621401187234445, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187234425, "dur": 19, - "args": { - "cbid": 211, "correlation": 555, - "external id": 151, "external ts": 1621401187234378 - } - }, - { - "ph": "s", "id": 555, "pid": 24572, "tid": 24610, "ts": 1621401187234425, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187235000, "dur": 2, - "args": { - "cbid": 251, "correlation": 568, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187235004, "dur": 0, - "args": { - "cbid": 251, "correlation": 569, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7", - "ts": 1621401187235025, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 570, "external id": 160, - "registers per thread": 55, - "shared memory": 16384, - "blocks per SM": 0.0125, - "warps per SM": 0.1, - "grid": [1, 1, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 570, "pid": 0, "tid": "stream 7", "ts": 1621401187235025, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187235006, "dur": 17, - "args": { - "cbid": 211, "correlation": 570, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "s", "id": 570, "pid": 24572, "tid": 24610, "ts": 1621401187235006, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187235555, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 579, "external id": 170, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 579, "pid": 0, "tid": "stream 7", "ts": 1621401187235555, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187235535, "dur": 19, - "args": { - "cbid": 211, "correlation": 579, - "external id": 170, "external ts": 1621401187235487 - } - }, - { - "ph": "s", "id": 579, "pid": 24572, "tid": 24610, "ts": 1621401187235535, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236158, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 585, "external id": 176, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 585, "pid": 0, "tid": "stream 7", "ts": 1621401187236158, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236138, "dur": 18, - "args": { - "cbid": 211, "correlation": 585, - "external id": 176, "external ts": 1621401187236091 - } - }, - { - "ph": "s", "id": 585, "pid": 24572, "tid": 24572, "ts": 1621401187236138, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236278, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 590, "external id": 177, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 590, "pid": 0, "tid": "stream 7", "ts": 1621401187236278, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236261, "dur": 15, - "args": { - "cbid": 211, "correlation": 590, - "external id": 177, "external ts": 1621401187236221 - } - }, - { - "ph": "s", "id": 590, "pid": 24572, "tid": 24572, "ts": 1621401187236261, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236390, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 595, "external id": 178, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 595, "pid": 0, "tid": "stream 7", "ts": 1621401187236390, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236373, "dur": 15, - "args": { - "cbid": 211, "correlation": 595, - "external id": 178, "external ts": 1621401187236334 - } - }, - { - "ph": "s", "id": 595, "pid": 24572, "tid": 24572, "ts": 1621401187236373, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236501, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 600, "external id": 179, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 600, "pid": 0, "tid": "stream 7", "ts": 1621401187236501, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236483, "dur": 15, - "args": { - "cbid": 211, "correlation": 600, - "external id": 179, "external ts": 1621401187236444 - } - }, - { - "ph": "s", "id": 600, "pid": 24572, "tid": 24572, "ts": 1621401187236483, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaDeviceSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187236853, "dur": 10, - "args": { - "cbid": 165, "correlation": 605, - "external id": 0, "external ts": 0 - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0, - "args": { - "labels": "CPU" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0, - "args": { - "labels": "GPU 0" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0, - "args": { - "labels": "GPU 1" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0, - "args": { - "labels": "GPU 2" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0, - "args": { - "labels": "GPU 3" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0, - "args": { - "labels": "GPU 4" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0, - "args": { - "labels": "GPU 5" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0, - "args": { - "labels": "GPU 6" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0, - "args": { - "labels": "GPU 7" - } - }, - { - "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24610", - "args": { - "name": "thread 24610 (python)" - } - }, - { - "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24572", - "args": { - "name": "thread 24572 (python)" - } - }, - { - "ph": "X", "cat": "Trace", "ts": 1621401187223005, "dur": 13896, - "pid": "Traces", "tid": "PyTorch Profiler", - "name": "PyTorch Profiler (0)", - "args": { - "Op count": 0 - } - }, - { - "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", - "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1621401187223005 - }, - { - "name": "Record Window End", "ph": "i", "s": "g", - "pid": "", "tid": "", "ts": 1621401187237108 - } -, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":1}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187223005, "args":{"GPU Utilization":0.0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187224005, "args":{"GPU Utilization":0.0}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187225005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187226005, "args":{"GPU Utilization":0.5}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187227005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187228005, "args":{"GPU Utilization":0.2}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187229005, "args":{"GPU Utilization":0.6}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187230005, "args":{"GPU Utilization":0.1}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187231005, "args":{"GPU Utilization":0.5}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187232005, "args":{"GPU Utilization":0.2}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187233005, "args":{"GPU Utilization":0.3}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187234005, "args":{"GPU Utilization":0.4}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187235005, "args":{"GPU Utilization":0.4219409282700422}}, {"ph":"C", "name":"GPU 0 Utilization", "pid":0, "ts":1621401187236901, "args":{"GPU Utilization":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187223005, "args":{"Est. SM Efficiency":1}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187223005, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225275, "args":{"Est. SM Efficiency":0.25}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225278, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225530, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225532, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225820, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187225821, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226325, "args":{"Est. SM Efficiency":0.25}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226327, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226575, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226577, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226912, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187226913, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227092, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227094, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227619, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227620, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227745, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227746, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227859, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227860, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227973, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187227974, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228279, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228280, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228962, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187228963, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229153, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229155, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229711, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187229715, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187230162, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187230163, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231100, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231103, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231692, "args":{"Est. SM Efficiency":0.5}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187231694, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232603, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232604, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232921, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187232922, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233342, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233343, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233770, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187233772, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234156, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234159, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234445, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187234446, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235025, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235028, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235555, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187235556, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236158, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236159, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236278, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236279, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236390, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236391, "args":{"Est. SM Efficiency":0}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236501, "args":{"Est. SM Efficiency":0.125}}, {"ph":"C", "name":"GPU 0 Est. SM Efficiency", "pid":0, "ts":1621401187236502, "args":{"Est. SM Efficiency":0}}]} \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_input.json b/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_input.json deleted file mode 100644 index 71530b1d6e5602c3ef1decf2bee33c0a3f98cc1c..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/gpu_metrics_input.json +++ /dev/null @@ -1,3105 +0,0 @@ - -{ - "schemaVersion": 1, - - "computeProperties": [ - - { - "id": 0, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34084028416, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 1, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 2, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - }, - - { - "id": 3, "name": "Tesla V100-DGXS-32GB", "totalGlobalMem": 34087305216, - "major": 7, "minor": 0, - "maxThreadsPerBlock": 1024, "maxThreadsPerMultiProcessor": 2048, - "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, - "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 98304, - "multiProcessorCount": 80, "sharedMemPerBlockOptin": 98304 - } - ], - "traceEvents": [ - - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223197, "dur": 21, - "args": { - "Device": 24572, "External id": 2, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187223264, "dur": 5, - "args": { - "Device": 24572, "External id": 3, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187223182, "dur": 99, - "args": { - "Device": 24572, "External id": 1, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223376, "dur": 19, - "args": { - "Device": 24572, "External id": 5, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223480, "dur": 18, - "args": { - "Device": 24572, "External id": 7, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187223530, "dur": 5, - "args": { - "Device": 24572, "External id": 8, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187223469, "dur": 72, - "args": { - "Device": 24572, "External id": 6, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223622, "dur": 19, - "args": { - "Device": 24572, "External id": 10, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187223790, "dur": 12, - "args": { - "Device": 24572, "External id": 13, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187223777, "dur": 50, - "args": { - "Device": 24572, "External id": 12, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187223850, "dur": 7, - "args": { - "Device": 24572, "External id": 15, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187223841, "dur": 24, - "args": { - "Device": 24572, "External id": 14, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187223904, "dur": 16, - "args": { - "Device": 24572, "External id": 18, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187223945, "dur": 14, - "args": { - "Device": 24572, "External id": 19, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_cat", "pid": 24572, "tid": "24572", - "ts": 1621401187223888, "dur": 87, - "args": { - "Device": 24572, "External id": 17, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::cat", "pid": 24572, "tid": "24572", - "ts": 1621401187223876, "dur": 106, - "args": { - "Device": 24572, "External id": 16, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::stack", "pid": 24572, "tid": "24572", - "ts": 1621401187223752, "dur": 245, - "args": { - "Device": 24572, "External id": 11, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224094, "dur": 12, - "args": { - "Device": 24572, "External id": 22, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187224074, "dur": 43, - "args": { - "Device": 24572, "External id": 21, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224137, "dur": 6, - "args": { - "Device": 24572, "External id": 24, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::unsqueeze", "pid": 24572, "tid": "24572", - "ts": 1621401187224128, "dur": 21, - "args": { - "Device": 24572, "External id": 23, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187224184, "dur": 15, - "args": { - "Device": 24572, "External id": 27, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187224223, "dur": 12, - "args": { - "Device": 24572, "External id": 28, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_cat", "pid": 24572, "tid": "24572", - "ts": 1621401187224169, "dur": 79, - "args": { - "Device": 24572, "External id": 26, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::cat", "pid": 24572, "tid": "24572", - "ts": 1621401187224159, "dur": 96, - "args": { - "Device": 24572, "External id": 25, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::stack", "pid": 24572, "tid": "24572", - "ts": 1621401187224056, "dur": 213, - "args": { - "Device": 24572, "External id": 20, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 24572, "tid": "24572", - "ts": 1621401187223604, "dur": 725, - "args": { - "Device": 24572, "External id": 9, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224415, "dur": 54, - "args": { - "Device": 24572, "External id": 30, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::copy_", "pid": 24572, "tid": "24572", - "ts": 1621401187224496, "dur": 80, - "args": { - "Device": 24572, "External id": 31, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 24572, "tid": "24572", - "ts": 1621401187224398, "dur": 193, - "args": { - "Device": 24572, "External id": 29, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224645, "dur": 51, - "args": { - "Device": 24572, "External id": 33, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::copy_", "pid": 24572, "tid": "24572", - "ts": 1621401187224720, "dur": 65, - "args": { - "Device": 24572, "External id": 34, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 24572, "tid": "24572", - "ts": 1621401187224631, "dur": 168, - "args": { - "Device": 24572, "External id": 32, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187224956, "dur": 14, - "args": { - "Device": 24572, "External id": 38, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24572", - "ts": 1621401187224945, "dur": 37, - "args": { - "Device": 24572, "External id": 37, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24572", - "ts": 1621401187224917, "dur": 101, - "args": { - "Device": 24572, "External id": 36, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187225058, "dur": 33, - "args": { - "Device": 24572, "External id": 40, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187225181, "dur": 41, - "args": { - "Device": 24572, "External id": 42, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24572", - "ts": 1621401187225112, "dur": 197, - "args": { - "Device": 24572, "External id": 41, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187225367, "dur": 17, - "args": { - "Device": 24572, "External id": 44, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572", - "ts": 1621401187225336, "dur": 79, - "args": { - "Device": 24572, "External id": 43, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 24 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::matmul", "pid": 24572, "tid": "24572", - "ts": 1621401187225037, "dur": 394, - "args": { - "Device": 24572, "External id": 39, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187225449, "dur": 107, - "args": { - "Device": 24572, "External id": 45, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 25 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::linear", "pid": 24572, "tid": "24572", - "ts": 1621401187224907, "dur": 664, - "args": { - "Device": 24572, "External id": 35, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187225662, "dur": 25, - "args": { - "Device": 24572, "External id": 47, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24572", - "ts": 1621401187225746, "dur": 30, - "args": { - "Device": 24572, "External id": 50, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp_min", "pid": 24572, "tid": "24572", - "ts": 1621401187225721, "dur": 105, - "args": { - "Device": 24572, "External id": 49, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp", "pid": 24572, "tid": "24572", - "ts": 1621401187225709, "dur": 128, - "args": { - "Device": 24572, "External id": 48, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::clamp", "pid": 24572, "tid": "24572", - "ts": 1621401187225606, "dur": 263, - "args": { - "Device": 24572, "External id": 46, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 26 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187225978, "dur": 14, - "args": { - "Device": 24572, "External id": 54, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24572", - "ts": 1621401187225968, "dur": 36, - "args": { - "Device": 24572, "External id": 53, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24572", - "ts": 1621401187225941, "dur": 98, - "args": { - "Device": 24572, "External id": 52, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187226077, "dur": 60, - "args": { - "Device": 24572, "External id": 56, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226233, "dur": 41, - "args": { - "Device": 24572, "External id": 58, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24572", - "ts": 1621401187226161, "dur": 197, - "args": { - "Device": 24572, "External id": 57, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 29 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24572", - "ts": 1621401187226416, "dur": 17, - "args": { - "Device": 24572, "External id": 60, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_unsafe_view", "pid": 24572, "tid": "24572", - "ts": 1621401187226384, "dur": 79, - "args": { - "Device": 24572, "External id": 59, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 30 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::matmul", "pid": 24572, "tid": "24572", - "ts": 1621401187226057, "dur": 422, - "args": { - "Device": 24572, "External id": 55, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187226497, "dur": 103, - "args": { - "Device": 24572, "External id": 61, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 31 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::linear", "pid": 24572, "tid": "24572", - "ts": 1621401187225932, "dur": 683, - "args": { - "Device": 24572, "External id": 51, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::broadcast_tensors", "pid": 24572, "tid": "24572", - "ts": 1621401187226708, "dur": 11, - "args": { - "Device": 24572, "External id": 62, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226827, "dur": 41, - "args": { - "Device": 24572, "External id": 64, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187226955, "dur": 35, - "args": { - "Device": 24572, "External id": 66, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187227020, "dur": 11, - "args": { - "Device": 24572, "External id": 67, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24572", - "ts": 1621401187226930, "dur": 176, - "args": { - "Device": 24572, "External id": 65, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss", "pid": 24572, "tid": "24572", - "ts": 1621401187226753, "dur": 445, - "args": { - "Device": 24572, "External id": 63, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187227327, "dur": 21, - "args": { - "Device": 24572, "External id": 69, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227368, "dur": 5, - "args": { - "Device": 24572, "External id": 70, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187227314, "dur": 65, - "args": { - "Device": 24572, "External id": 68, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187227464, "dur": 18, - "args": { - "Device": 24572, "External id": 72, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227576, "dur": 49, - "args": { - "Device": 24572, "External id": 74, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227553, "dur": 97, - "args": { - "Device": 24572, "External id": 73, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227707, "dur": 43, - "args": { - "Device": 24572, "External id": 76, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227689, "dur": 79, - "args": { - "Device": 24572, "External id": 75, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227823, "dur": 42, - "args": { - "Device": 24572, "External id": 78, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227805, "dur": 77, - "args": { - "Device": 24572, "External id": 77, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187227937, "dur": 41, - "args": { - "Device": 24572, "External id": 80, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187227919, "dur": 77, - "args": { - "Device": 24572, "External id": 79, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 0, "Sequence number": 33 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "Optimizer.zero_grad#SGD.zero_grad", "pid": 24572, "tid": "24572", - "ts": 1621401187227446, "dur": 606, - "args": { - "Device": 24572, "External id": 71, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_strided", "pid": 24572, "tid": "24572", - "ts": 1621401187228150, "dur": 53, - "args": { - "Device": 24572, "External id": 83, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_like", "pid": 24572, "tid": "24572", - "ts": 1621401187228137, "dur": 81, - "args": { - "Device": 24572, "External id": 82, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24572", - "ts": 1621401187228235, "dur": 50, - "args": { - "Device": 24572, "External id": 84, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ones_like", "pid": 24572, "tid": "24572", - "ts": 1621401187228128, "dur": 169, - "args": { - "Device": 24572, "External id": 81, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187228708, "dur": 79, - "args": { - "Device": 24572, "External id": 89, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty_like", "pid": 24572, "tid": "24610", - "ts": 1621401187228680, "dur": 146, - "args": { - "Device": 24572, "External id": 88, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24610", - "ts": 1621401187228885, "dur": 93, - "args": { - "Device": 24572, "External id": 91, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24610", - "ts": 1621401187228858, "dur": 147, - "args": { - "Device": 24572, "External id": 90, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros_like", "pid": 24572, "tid": "24610", - "ts": 1621401187228647, "dur": 369, - "args": { - "Device": 24572, "External id": 87, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610", - "ts": 1621401187229048, "dur": 122, - "args": { - "Device": 24572, "External id": 92, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mse_loss_backward", "pid": 24572, "tid": "24610", - "ts": 1621401187228603, "dur": 614, - "args": { - "Device": 24572, "External id": 86, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MseLossBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187228516, "dur": 727, - "args": { - "Device": 24572, "External id": 85, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 32 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "AddBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187229384, "dur": 17, - "args": { - "Device": 24572, "External id": 93, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 31 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187229506, "dur": 73, - "args": { - "Device": 24572, "External id": 95, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24610", - "ts": 1621401187229459, "dur": 279, - "args": { - "Device": 24572, "External id": 94, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187229788, "dur": 65, - "args": { - "Device": 24572, "External id": 96, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187230059, "dur": 131, - "args": { - "Device": 24572, "External id": 98, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187230028, "dur": 228, - "args": { - "Device": 24572, "External id": 97, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187230405, "dur": 61, - "args": { - "Device": 24572, "External id": 101, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187230383, "dur": 107, - "args": { - "Device": 24572, "External id": 100, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187230354, "dur": 146, - "args": { - "Device": 24572, "External id": 99, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 30 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187230751, "dur": 22, - "args": { - "Device": 24572, "External id": 105, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187230732, "dur": 65, - "args": { - "Device": 24572, "External id": 104, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187230710, "dur": 124, - "args": { - "Device": 24572, "External id": 103, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187230862, "dur": 7, - "args": { - "Device": 24572, "External id": 106, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187230935, "dur": 73, - "args": { - "Device": 24572, "External id": 108, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187230889, "dur": 235, - "args": { - "Device": 24572, "External id": 107, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187231211, "dur": 23, - "args": { - "Device": 24572, "External id": 111, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187231191, "dur": 69, - "args": { - "Device": 24572, "External id": 110, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187231168, "dur": 129, - "args": { - "Device": 24572, "External id": 109, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187231376, "dur": 17, - "args": { - "Device": 24572, "External id": 114, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187231360, "dur": 49, - "args": { - "Device": 24572, "External id": 113, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187231340, "dur": 100, - "args": { - "Device": 24572, "External id": 112, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187231465, "dur": 6, - "args": { - "Device": 24572, "External id": 115, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187231534, "dur": 72, - "args": { - "Device": 24572, "External id": 117, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187231491, "dur": 225, - "args": { - "Device": 24572, "External id": 116, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MmBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187230626, "dur": 1124, - "args": { - "Device": 24572, "External id": 102, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 29 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187231992, "dur": 61, - "args": { - "Device": 24572, "External id": 120, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187231970, "dur": 108, - "args": { - "Device": 24572, "External id": 119, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187231941, "dur": 166, - "args": { - "Device": 24572, "External id": 118, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 28 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187232305, "dur": 21, - "args": { - "Device": 24572, "External id": 124, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187232286, "dur": 62, - "args": { - "Device": 24572, "External id": 123, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187232265, "dur": 123, - "args": { - "Device": 24572, "External id": 122, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "TBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187232239, "dur": 161, - "args": { - "Device": 24572, "External id": 121, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 27 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187232535, "dur": 85, - "args": { - "Device": 24572, "External id": 126, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187232515, "dur": 148, - "args": { - "Device": 24572, "External id": 125, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187232790, "dur": 47, - "args": { - "Device": 24572, "External id": 129, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::fill_", "pid": 24572, "tid": "24610", - "ts": 1621401187232866, "dur": 68, - "args": { - "Device": 24572, "External id": 130, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::scalar_tensor", "pid": 24572, "tid": "24610", - "ts": 1621401187232776, "dur": 174, - "args": { - "Device": 24572, "External id": 128, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187233023, "dur": 27, - "args": { - "Device": 24572, "External id": 132, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_local_scalar_dense", "pid": 24572, "tid": "24610", - "ts": 1621401187233192, "dur": 6, - "args": { - "Device": 24572, "External id": 135, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::item", "pid": 24572, "tid": "24610", - "ts": 1621401187233184, "dur": 24, - "args": { - "Device": 24572, "External id": 134, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::resize_", "pid": 24572, "tid": "24610", - "ts": 1621401187233251, "dur": 41, - "args": { - "Device": 24572, "External id": 136, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ge", "pid": 24572, "tid": "24610", - "ts": 1621401187233168, "dur": 182, - "args": { - "Device": 24572, "External id": 133, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::ge", "pid": 24572, "tid": "24610", - "ts": 1621401187232971, "dur": 404, - "args": { - "Device": 24572, "External id": 131, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233430, "dur": 15, - "args": { - "Device": 24572, "External id": 139, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233414, "dur": 62, - "args": { - "Device": 24572, "External id": 138, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233508, "dur": 10, - "args": { - "Device": 24572, "External id": 141, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233494, "dur": 48, - "args": { - "Device": 24572, "External id": 140, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187233571, "dur": 10, - "args": { - "Device": 24572, "External id": 143, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::expand", "pid": 24572, "tid": "24610", - "ts": 1621401187233558, "dur": 43, - "args": { - "Device": 24572, "External id": 142, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187233649, "dur": 46, - "args": { - "Device": 24572, "External id": 145, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::_s_where", "pid": 24572, "tid": "24610", - "ts": 1621401187233620, "dur": 167, - "args": { - "Device": 24572, "External id": 144, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::where", "pid": 24572, "tid": "24610", - "ts": 1621401187233398, "dur": 409, - "args": { - "Device": 24572, "External id": 137, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ClampBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187232724, "dur": 1110, - "args": { - "Device": 24572, "External id": 127, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 26 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "AddBackward1", "pid": 24572, "tid": "24610", - "ts": 1621401187233941, "dur": 12, - "args": { - "Device": 24572, "External id": 146, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 25 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187234021, "dur": 46, - "args": { - "Device": 24572, "External id": 148, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::sum", "pid": 24572, "tid": "24610", - "ts": 1621401187233990, "dur": 182, - "args": { - "Device": 24572, "External id": 147, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187234208, "dur": 43, - "args": { - "Device": 24572, "External id": 149, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187234378, "dur": 84, - "args": { - "Device": 24572, "External id": 151, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187234357, "dur": 144, - "args": { - "Device": 24572, "External id": 150, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::view", "pid": 24572, "tid": "24610", - "ts": 1621401187234593, "dur": 39, - "args": { - "Device": 24572, "External id": 154, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::reshape", "pid": 24572, "tid": "24610", - "ts": 1621401187234580, "dur": 67, - "args": { - "Device": 24572, "External id": 153, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "UnsafeViewBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187234561, "dur": 92, - "args": { - "Device": 24572, "External id": 152, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 24 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187234803, "dur": 14, - "args": { - "Device": 24572, "External id": 158, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187234792, "dur": 41, - "args": { - "Device": 24572, "External id": 157, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187234778, "dur": 79, - "args": { - "Device": 24572, "External id": 156, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::conj", "pid": 24572, "tid": "24610", - "ts": 1621401187234874, "dur": 4, - "args": { - "Device": 24572, "External id": 159, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24610", - "ts": 1621401187234918, "dur": 47, - "args": { - "Device": 24572, "External id": 161, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 24572, "tid": "24610", - "ts": 1621401187234890, "dur": 149, - "args": { - "Device": 24572, "External id": 160, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187235092, "dur": 15, - "args": { - "Device": 24572, "External id": 164, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187235080, "dur": 39, - "args": { - "Device": 24572, "External id": 163, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187235067, "dur": 75, - "args": { - "Device": 24572, "External id": 162, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "MmBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187234734, "dur": 424, - "args": { - "Device": 24572, "External id": 155, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 23 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::as_strided", "pid": 24572, "tid": "24610", - "ts": 1621401187235312, "dur": 13, - "args": { - "Device": 24572, "External id": 168, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 24572, "tid": "24610", - "ts": 1621401187235301, "dur": 40, - "args": { - "Device": 24572, "External id": 167, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 24572, "tid": "24610", - "ts": 1621401187235288, "dur": 78, - "args": { - "Device": 24572, "External id": 166, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "TBackward", "pid": 24572, "tid": "24610", - "ts": 1621401187235271, "dur": 103, - "args": { - "Device": 24572, "External id": 165, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 , - "Fwd thread id": 1, "Sequence number": 22 - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24610", - "ts": 1621401187235487, "dur": 85, - "args": { - "Device": 24572, "External id": 170, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "torch::autograd::AccumulateGrad", "pid": 24572, "tid": "24610", - "ts": 1621401187235467, "dur": 147, - "args": { - "Device": 24572, "External id": 169, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187235803, "dur": 24, - "args": { - "Device": 24572, "External id": 172, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zero_", "pid": 24572, "tid": "24572", - "ts": 1621401187235850, "dur": 5, - "args": { - "Device": 24572, "External id": 173, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::zeros", "pid": 24572, "tid": "24572", - "ts": 1621401187235787, "dur": 75, - "args": { - "Device": 24572, "External id": 171, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::empty", "pid": 24572, "tid": "24572", - "ts": 1621401187235954, "dur": 20, - "args": { - "Device": 24572, "External id": 175, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236091, "dur": 82, - "args": { - "Device": 24572, "External id": 176, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236221, "dur": 70, - "args": { - "Device": 24572, "External id": 177, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236334, "dur": 68, - "args": { - "Device": 24572, "External id": 178, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::add_", "pid": 24572, "tid": "24572", - "ts": 1621401187236444, "dur": 68, - "args": { - "Device": 24572, "External id": 179, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "Optimizer.step#SGD.step", "pid": 24572, "tid": "24572", - "ts": 1621401187235935, "dur": 663, - "args": { - "Device": 24572, "External id": 174, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 24572, "tid": "24572", - "ts": 1621401187223358, "dur": 13410, - "args": { - "Device": 24572, "External id": 4, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - - } - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 1621401187224556, "dur": 1, - "args": { - "device": 0, "context": 1, - "stream": 7, "correlation": 311, "external id": 31, - "bytes": 640, "memory bandwidth (GB/s)": 0.46511627906976744 - } - }, - { - "ph": "f", "id": 311, "pid": 0, "tid": "stream 7", "ts": 1621401187224556, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572", - "ts": 1621401187224533, "dur": 20, - "args": { - "cbid": 41, "correlation": 311, - "external id": 31, "external ts": 1621401187224496 - } - }, - { - "ph": "s", "id": 311, "pid": 24572, "tid": 24572, "ts": 1621401187224533, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187224554, "dur": 8, - "args": { - "cbid": 131, "correlation": 312, - "external id": 31, "external ts": 1621401187224496 - } - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 1621401187224767, "dur": 1, - "args": { - "device": 0, "context": 1, - "stream": 7, "correlation": 323, "external id": 34, - "bytes": 128, "memory bandwidth (GB/s)": 0.09523809523809523 - } - }, - { - "ph": "f", "id": 323, "pid": 0, "tid": "stream 7", "ts": 1621401187224767, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 24572, "tid": "24572", - "ts": 1621401187224752, "dur": 12, - "args": { - "cbid": 41, "correlation": 323, - "external id": 34, "external ts": 1621401187224720 - } - }, - { - "ph": "s", "id": 323, "pid": 24572, "tid": 24572, "ts": 1621401187224752, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaStreamSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187224765, "dur": 7, - "args": { - "cbid": 131, "correlation": 324, - "external id": 34, "external ts": 1621401187224720 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572", - "ts": 1621401187225253, "dur": 2, - "args": { - "cbid": 251, "correlation": 332, - "external id": 41, "external ts": 1621401187225112 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225275, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 333, "external id": 41, - "registers per thread": 72, - "shared memory": 13824, - "blocks per SM": 0.025, - "warps per SM": 0.1, - "grid": [1, 2, 1], - "block": [128, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 333, "pid": 0, "tid": "stream 7", "ts": 1621401187225275, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225258, "dur": 16, - "args": { - "cbid": 211, "correlation": 333, - "external id": 41, "external ts": 1621401187225112 - } - }, - { - "ph": "s", "id": 333, "pid": 24572, "tid": 24572, "ts": 1621401187225258, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225530, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 338, "external id": 45, - "registers per thread": 22, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 338, "pid": 0, "tid": "stream 7", "ts": 1621401187225530, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225512, "dur": 16, - "args": { - "cbid": 211, "correlation": 338, - "external id": 45, "external ts": 1621401187225449 - } - }, - { - "ph": "s", "id": 338, "pid": 24572, "tid": 24572, "ts": 1621401187225512, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187225820, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 352, "external id": 49, - "registers per thread": 18, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 352, "pid": 0, "tid": "stream 7", "ts": 1621401187225820, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187225803, "dur": 15, - "args": { - "cbid": 211, "correlation": 352, - "external id": 49, "external ts": 1621401187225721 - } - }, - { - "ph": "s", "id": 352, "pid": 24572, "tid": 24572, "ts": 1621401187225803, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24572", - "ts": 1621401187226305, "dur": 2, - "args": { - "cbid": 251, "correlation": 363, - "external id": 57, "external ts": 1621401187226161 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226325, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 364, "external id": 57, - "registers per thread": 72, - "shared memory": 13824, - "blocks per SM": 0.025, - "warps per SM": 0.1, - "grid": [1, 2, 1], - "block": [128, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 364, "pid": 0, "tid": "stream 7", "ts": 1621401187226325, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226309, "dur": 15, - "args": { - "cbid": 211, "correlation": 364, - "external id": 57, "external ts": 1621401187226161 - } - }, - { - "ph": "s", "id": 364, "pid": 24572, "tid": 24572, "ts": 1621401187226309, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::AddFunctor, at::detail::Array, OffsetCalculator<2, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226575, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 369, "external id": 61, - "registers per thread": 22, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 369, "pid": 0, "tid": "stream 7", "ts": 1621401187226575, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226558, "dur": 15, - "args": { - "cbid": 211, "correlation": 369, - "external id": 61, "external ts": 1621401187226497 - } - }, - { - "ph": "s", "id": 369, "pid": 24572, "tid": 24572, "ts": 1621401187226558, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array >(int, at::native::mse_kernel_cuda(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float)#1}, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187226912, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 377, "external id": 63, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 377, "pid": 0, "tid": "stream 7", "ts": 1621401187226912, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187226895, "dur": 16, - "args": { - "cbid": 211, "correlation": 377, - "external id": 63, "external ts": 1621401187226753 - } - }, - { - "ph": "s", "id": 377, "pid": 24572, "tid": 24572, "ts": 1621401187226895, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227092, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 388, "external id": 65, - "registers per thread": 32, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.0125, - "grid": [1, 1, 1], - "block": [32, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 388, "pid": 0, "tid": "stream 7", "ts": 1621401187227092, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227075, "dur": 15, - "args": { - "cbid": 211, "correlation": 388, - "external id": 65, "external ts": 1621401187226930 - } - }, - { - "ph": "s", "id": 388, "pid": 24572, "tid": 24572, "ts": 1621401187227075, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227619, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 395, "external id": 74, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 395, "pid": 0, "tid": "stream 7", "ts": 1621401187227619, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227601, "dur": 16, - "args": { - "cbid": 211, "correlation": 395, - "external id": 74, "external ts": 1621401187227576 - } - }, - { - "ph": "s", "id": 395, "pid": 24572, "tid": 24572, "ts": 1621401187227601, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227745, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 402, "external id": 76, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 402, "pid": 0, "tid": "stream 7", "ts": 1621401187227745, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227729, "dur": 14, - "args": { - "cbid": 211, "correlation": 402, - "external id": 76, "external ts": 1621401187227707 - } - }, - { - "ph": "s", "id": 402, "pid": 24572, "tid": 24572, "ts": 1621401187227729, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227859, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 409, "external id": 78, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 409, "pid": 0, "tid": "stream 7", "ts": 1621401187227859, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227844, "dur": 13, - "args": { - "cbid": 211, "correlation": 409, - "external id": 78, "external ts": 1621401187227823 - } - }, - { - "ph": "s", "id": 409, "pid": 24572, "tid": 24572, "ts": 1621401187227844, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187227973, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 416, "external id": 80, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 416, "pid": 0, "tid": "stream 7", "ts": 1621401187227973, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187227958, "dur": 13, - "args": { - "cbid": 211, "correlation": 416, - "external id": 80, "external ts": 1621401187227937 - } - }, - { - "ph": "s", "id": 416, "pid": 24572, "tid": 24572, "ts": 1621401187227958, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187228279, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 429, "external id": 84, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 429, "pid": 0, "tid": "stream 7", "ts": 1621401187228279, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187228262, "dur": 15, - "args": { - "cbid": 211, "correlation": 429, - "external id": 84, "external ts": 1621401187228235 - } - }, - { - "ph": "s", "id": 429, "pid": 24572, "tid": 24572, "ts": 1621401187228262, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187228962, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 440, "external id": 91, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 440, "pid": 0, "tid": "stream 7", "ts": 1621401187228962, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187228932, "dur": 30, - "args": { - "cbid": 211, "correlation": 440, - "external id": 91, "external ts": 1621401187228885 - } - }, - { - "ph": "s", "id": 440, "pid": 24572, "tid": 24610, "ts": 1621401187228932, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::mse_backward_cuda_kernel(at::TensorIterator&, c10::Scalar const&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(float, float, float)#1}, at::detail::Array, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187229153, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 446, "external id": 92, - "registers per thread": 28, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 446, "pid": 0, "tid": "stream 7", "ts": 1621401187229153, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187229127, "dur": 26, - "args": { - "cbid": 211, "correlation": 446, - "external id": 92, "external ts": 1621401187229048 - } - }, - { - "ph": "s", "id": 446, "pid": 24572, "tid": 24610, "ts": 1621401187229127, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<256, 2, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187229711, "dur": 4, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 460, "external id": 94, - "registers per thread": 35, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.00625, - "grid": [1, 1, 1], - "block": [1, 16, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 460, "pid": 0, "tid": "stream 7", "ts": 1621401187229711, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187229681, "dur": 30, - "args": { - "cbid": 211, "correlation": 460, - "external id": 94, "external ts": 1621401187229459 - } - }, - { - "ph": "s", "id": 460, "pid": 24572, "tid": 24610, "ts": 1621401187229681, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187230162, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 467, "external id": 98, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 467, "pid": 0, "tid": "stream 7", "ts": 1621401187230162, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187230133, "dur": 29, - "args": { - "cbid": 211, "correlation": 467, - "external id": 98, "external ts": 1621401187230059 - } - }, - { - "ph": "s", "id": 467, "pid": 24572, "tid": 24610, "ts": 1621401187230133, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231063, "dur": 4, - "args": { - "cbid": 251, "correlation": 480, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231069, "dur": 1, - "args": { - "cbid": 251, "correlation": 481, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7", - "ts": 1621401187231100, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 482, "external id": 107, - "registers per thread": 55, - "shared memory": 16384, - "blocks per SM": 0.0125, - "warps per SM": 0.1, - "grid": [1, 1, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 482, "pid": 0, "tid": "stream 7", "ts": 1621401187231100, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187231073, "dur": 27, - "args": { - "cbid": 211, "correlation": 482, - "external id": 107, "external ts": 1621401187230889 - } - }, - { - "ph": "s", "id": 482, "pid": 24572, "tid": 24610, "ts": 1621401187231073, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187231658, "dur": 3, - "args": { - "cbid": 251, "correlation": 491, - "external id": 116, "external ts": 1621401187231491 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_NN_kernel, cublasGemvTensorStridedBatched >(cublasGemmSmallNParams, cublasGemvTensorStridedBatched, float>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187231692, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 492, "external id": 116, - "registers per thread": 64, - "shared memory": 12288, - "blocks per SM": 0.05, - "warps per SM": 0.4, - "grid": [1, 4, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 1 - } - }, - { - "ph": "f", "id": 492, "pid": 0, "tid": "stream 7", "ts": 1621401187231692, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187231665, "dur": 27, - "args": { - "cbid": 211, "correlation": 492, - "external id": 116, "external ts": 1621401187231491 - } - }, - { - "ph": "s", "id": 492, "pid": 24572, "tid": 24610, "ts": 1621401187231665, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187232603, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 503, "external id": 126, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 503, "pid": 0, "tid": "stream 7", "ts": 1621401187232603, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187232583, "dur": 19, - "args": { - "cbid": 211, "correlation": 503, - "external id": 126, "external ts": 1621401187232535 - } - }, - { - "ph": "s", "id": 503, "pid": 24572, "tid": 24610, "ts": 1621401187232583, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187232921, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 513, "external id": 130, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 513, "pid": 0, "tid": "stream 7", "ts": 1621401187232921, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187232901, "dur": 19, - "args": { - "cbid": 211, "correlation": 513, - "external id": 130, "external ts": 1621401187232866 - } - }, - { - "ph": "s", "id": 513, "pid": 24572, "tid": 24610, "ts": 1621401187232901, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187233342, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 526, "external id": 133, - "registers per thread": 16, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 526, "pid": 0, "tid": "stream 7", "ts": 1621401187233342, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187233323, "dur": 18, - "args": { - "cbid": 211, "correlation": 526, - "external id": 133, "external ts": 1621401187233168 - } - }, - { - "ph": "s", "id": 526, "pid": 24572, "tid": 24610, "ts": 1621401187233323, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::unrolled_elementwise_kernel, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast>(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&, c10::ScalarType)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array, OffsetCalculator<3, unsigned int>, at::detail::Array<1, unsigned int>, at::native::memory::LoadWithoutCast, OffsetCalculator::StoreWithoutCast)", "pid": 0, "tid": "stream 7", - "ts": 1621401187233770, "dur": 2, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 535, "external id": 144, - "registers per thread": 26, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 535, "pid": 0, "tid": "stream 7", "ts": 1621401187233770, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187233751, "dur": 19, - "args": { - "cbid": 211, "correlation": 535, - "external id": 144, "external ts": 1621401187233620 - } - }, - { - "ph": "s", "id": 535, "pid": 24572, "tid": 24610, "ts": 1621401187233751, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": "stream 7", - "ts": 1621401187234156, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 548, "external id": 147, - "registers per thread": 32, - "shared memory": 16, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [4, 16, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 548, "pid": 0, "tid": "stream 7", "ts": 1621401187234156, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187234135, "dur": 19, - "args": { - "cbid": 211, "correlation": 548, - "external id": 147, "external ts": 1621401187233990 - } - }, - { - "ph": "s", "id": 548, "pid": 24572, "tid": 24610, "ts": 1621401187234135, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187234445, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 555, "external id": 151, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 555, "pid": 0, "tid": "stream 7", "ts": 1621401187234445, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187234425, "dur": 19, - "args": { - "cbid": 211, "correlation": 555, - "external id": 151, "external ts": 1621401187234378 - } - }, - { - "ph": "s", "id": 555, "pid": 24572, "tid": 24610, "ts": 1621401187234425, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187235000, "dur": 2, - "args": { - "cbid": 251, "correlation": 568, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 24572, "tid": "24610", - "ts": 1621401187235004, "dur": 0, - "args": { - "cbid": 251, "correlation": 569, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "X", "cat": "Kernel", - "name": "volta_sgemm_128x32_nt", "pid": 0, "tid": "stream 7", - "ts": 1621401187235025, "dur": 3, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 570, "external id": 160, - "registers per thread": 55, - "shared memory": 16384, - "blocks per SM": 0.0125, - "warps per SM": 0.1, - "grid": [1, 1, 1], - "block": [256, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 570, "pid": 0, "tid": "stream 7", "ts": 1621401187235025, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187235006, "dur": 17, - "args": { - "cbid": 211, "correlation": 570, - "external id": 160, "external ts": 1621401187234890 - } - }, - { - "ph": "s", "id": 570, "pid": 24572, "tid": 24610, "ts": 1621401187235006, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187235555, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 579, "external id": 170, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 579, "pid": 0, "tid": "stream 7", "ts": 1621401187235555, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24610", - "ts": 1621401187235535, "dur": 19, - "args": { - "cbid": 211, "correlation": 579, - "external id": 170, "external ts": 1621401187235487 - } - }, - { - "ph": "s", "id": 579, "pid": 24572, "tid": 24610, "ts": 1621401187235535, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236158, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 585, "external id": 176, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 585, "pid": 0, "tid": "stream 7", "ts": 1621401187236158, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236138, "dur": 18, - "args": { - "cbid": 211, "correlation": 585, - "external id": 176, "external ts": 1621401187236091 - } - }, - { - "ph": "s", "id": 585, "pid": 24572, "tid": 24572, "ts": 1621401187236138, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236278, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 590, "external id": 177, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 590, "pid": 0, "tid": "stream 7", "ts": 1621401187236278, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236261, "dur": 15, - "args": { - "cbid": 211, "correlation": 590, - "external id": 177, "external ts": 1621401187236221 - } - }, - { - "ph": "s", "id": 590, "pid": 24572, "tid": 24572, "ts": 1621401187236261, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236390, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 595, "external id": 178, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 595, "pid": 0, "tid": "stream 7", "ts": 1621401187236390, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236373, "dur": 15, - "args": { - "cbid": 211, "correlation": 595, - "external id": 178, "external ts": 1621401187236334 - } - }, - { - "ph": "s", "id": 595, "pid": 24572, "tid": 24572, "ts": 1621401187236373, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "pid": 0, "tid": "stream 7", - "ts": 1621401187236501, "dur": 1, - "args": { - "queued": 0, "device": 0, "context": 1, - "stream": 7, "correlation": 600, "external id": 179, - "registers per thread": 20, - "shared memory": 0, - "blocks per SM": 0.0125, - "warps per SM": 0.025, - "grid": [1, 1, 1], - "block": [64, 1, 1], - "theoretical occupancy %": 0 - } - }, - { - "ph": "f", "id": 600, "pid": 0, "tid": "stream 7", "ts": 1621401187236501, - "cat": "async", "name": "launch", "bp": "e" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 24572, "tid": "24572", - "ts": 1621401187236483, "dur": 15, - "args": { - "cbid": 211, "correlation": 600, - "external id": 179, "external ts": 1621401187236444 - } - }, - { - "ph": "s", "id": 600, "pid": 24572, "tid": 24572, "ts": 1621401187236483, - "cat": "async", "name": "launch" - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaDeviceSynchronize", "pid": 24572, "tid": "24572", - "ts": 1621401187236853, "dur": 10, - "args": { - "cbid": 165, "correlation": 605, - "external id": 0, "external ts": 0 - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": 0, - "args": { - "labels": "CPU" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 0, "tid": 0, - "args": { - "labels": "GPU 0" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 1, "tid": 0, - "args": { - "labels": "GPU 1" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 2, "tid": 0, - "args": { - "labels": "GPU 2" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 3, "tid": 0, - "args": { - "labels": "GPU 3" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 4, "tid": 0, - "args": { - "labels": "GPU 4" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 5, "tid": 0, - "args": { - "labels": "GPU 5" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 6, "tid": 0, - "args": { - "labels": "GPU 6" - } - }, - { - "name": "process_name", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0, - "args": { - "name": "python" - } - }, - { - "name": "process_labels", "ph": "M", "ts": 1621401187223005, "pid": 7, "tid": 0, - "args": { - "labels": "GPU 7" - } - }, - { - "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24610", - "args": { - "name": "thread 24610 (python)" - } - }, - { - "name": "thread_name", "ph": "M", "ts": 1621401187223005, "pid": 24572, "tid": "24572", - "args": { - "name": "thread 24572 (python)" - } - }, - { - "ph": "X", "cat": "Trace", "ts": 1621401187223005, "dur": 13896, - "pid": "Traces", "tid": "PyTorch Profiler", - "name": "PyTorch Profiler (0)", - "args": { - "Op count": 0 - } - }, - { - "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", - "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1621401187223005 - }, - { - "name": "Record Window End", "ph": "i", "s": "g", - "pid": "", "tid": "", "ts": 1621401187237108 - } -]} \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143089861.pt.trace.json.gz b/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143089861.pt.trace.json.gz deleted file mode 100644 index 769c3eb7843639ac114d183e07304b6d44931452..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143089861.pt.trace.json.gz and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143566756.pt.trace.json.gz b/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143566756.pt.trace.json.gz deleted file mode 100644 index 383a066433725823a57b0dfd047d718b65d4741b..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_0/worker0.1623143566756.pt.trace.json.gz and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623212756351.pt.trace.json.gz b/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623212756351.pt.trace.json.gz deleted file mode 100644 index 234cf25df597fb99765ee2a49978f5f3c4bbbaf9..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623212756351.pt.trace.json.gz and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623213129365.pt.trace.json.gz b/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623213129365.pt.trace.json.gz deleted file mode 100644 index 3e633b5fde9d2e99ea8b07a67771183b7011bbc5..0000000000000000000000000000000000000000 Binary files a/plugins/tensorboard-plugins/tb_plugin/test/resources/resnet50_num_workers_4/worker0.1623213129365.pt.trace.json.gz and /dev/null differ diff --git a/plugins/tensorboard-plugins/tb_plugin/test/result_check_file.txt b/plugins/tensorboard-plugins/tb_plugin/test/result_check_file.txt deleted file mode 100644 index 845aae2420fd3c75808a58937b0a7a794777914d..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/result_check_file.txt +++ /dev/null @@ -1,10 +0,0 @@ -{"steps": {"columns": [{"type": "string", "name": "Step"}, {"type": "number", "name": "Kernel"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Memcpy"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Memset"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Runtime"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "DataLoader"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "CPU Exec"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Other"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}], "rows": [["5", 98598, "
Step 5
Total: 187948us
Kernel: 98598us
Percentage: 52.46%
", 1941, "
Step 5
Total: 187948us
Memcpy: 1941us
Percentage: 1.03%
", 90, "
Step 5
Total: 187948us
Memset: 90us
Percentage: 0.05%
", 2796, "
Step 5
Total: 187948us
Runtime: 2796us
Percentage: 1.49%
", 69317, "
Step 5
Total: 187948us
DataLoader: 69317us
Percentage: 36.88%
", 14091, "
Step 5
Total: 187948us
CPU Exec: 14091us
Percentage: 7.5%
", 1115, "
Step 5
Total: 187948us
Other: 1115us
Percentage: 0.59%
"], ["6", 98570, "
Step 6
Total: 175153us
Kernel: 98570us
Percentage: 56.28%
", 1947, "
Step 6
Total: 175153us
Memcpy: 1947us
Percentage: 1.11%
", 89, "
Step 6
Total: 175153us
Memset: 89us
Percentage: 0.05%
", 2762, "
Step 6
Total: 175153us
Runtime: 2762us
Percentage: 1.58%
", 57669, "
Step 6
Total: 175153us
DataLoader: 57669us
Percentage: 32.92%
", 12968, "
Step 6
Total: 175153us
CPU Exec: 12968us
Percentage: 7.4%
", 1148, "
Step 6
Total: 175153us
Other: 1148us
Percentage: 0.66%
"], ["7", 98596, "
Step 7
Total: 179733us
Kernel: 98596us
Percentage: 54.86%
", 1931, "
Step 7
Total: 179733us
Memcpy: 1931us
Percentage: 1.07%
", 91, "
Step 7
Total: 179733us
Memset: 91us
Percentage: 0.05%
", 2877, "
Step 7
Total: 179733us
Runtime: 2877us
Percentage: 1.6%
", 61257, "
Step 7
Total: 179733us
DataLoader: 61257us
Percentage: 34.08%
", 13768, "
Step 7
Total: 179733us
CPU Exec: 13768us
Percentage: 7.66%
", 1213, "
Step 7
Total: 179733us
Other: 1213us
Percentage: 0.67%
"], ["8", 98623, "
Step 8
Total: 174564us
Kernel: 98623us
Percentage: 56.5%
", 1938, "
Step 8
Total: 174564us
Memcpy: 1938us
Percentage: 1.11%
", 89, "
Step 8
Total: 174564us
Memset: 89us
Percentage: 0.05%
", 2841, "
Step 8
Total: 174564us
Runtime: 2841us
Percentage: 1.63%
", 56453, "
Step 8
Total: 174564us
DataLoader: 56453us
Percentage: 32.34%
", 13420, "
Step 8
Total: 174564us
CPU Exec: 13420us
Percentage: 7.69%
", 1200, "
Step 8
Total: 174564us
Other: 1200us
Percentage: 0.69%
"], ["9", 98504, "
Step 9
Total: 182172us
Kernel: 98504us
Percentage: 54.07%
", 1937, "
Step 9
Total: 182172us
Memcpy: 1937us
Percentage: 1.06%
", 87, "
Step 9
Total: 182172us
Memset: 87us
Percentage: 0.05%
", 2788, "
Step 9
Total: 182172us
Runtime: 2788us
Percentage: 1.53%
", 62690, "
Step 9
Total: 182172us
DataLoader: 62690us
Percentage: 34.41%
", 15025, "
Step 9
Total: 182172us
CPU Exec: 15025us
Percentage: 8.25%
", 1141, "
Step 9
Total: 182172us
Other: 1141us
Percentage: 0.63%
"], ["10", 98641, "
Step 10
Total: 165983us
Kernel: 98641us
Percentage: 59.43%
", 1798, "
Step 10
Total: 165983us
Memcpy: 1798us
Percentage: 1.08%
", 88, "
Step 10
Total: 165983us
Memset: 88us
Percentage: 0.05%
", 3381, "
Step 10
Total: 165983us
Runtime: 3381us
Percentage: 2.04%
", 48185, "
Step 10
Total: 165983us
DataLoader: 48185us
Percentage: 29.03%
", 12773, "
Step 10
Total: 165983us
CPU Exec: 12773us
Percentage: 7.7%
", 1117, "
Step 10
Total: 165983us
Other: 1117us
Percentage: 0.67%
"]]}, "performance": [{"name": "Average Step Time", "description": "", "value": 177592, "extra": 100, "children": [{"name": "Kernel", "description": "", "value": 98589, "extra": 55.51}, {"name": "Memcpy", "description": "", "value": 1915, "extra": 1.08}, {"name": "Memset", "description": "", "value": 89, "extra": 0.05}, {"name": "Runtime", "description": "", "value": 2908, "extra": 1.64}, {"name": "DataLoader", "description": "", "value": 59262, "extra": 33.37}, {"name": "CPU Exec", "description": "", "value": 13674, "extra": 7.7}, {"name": "Other", "description": "", "value": 1156, "extra": 0.65}]}], "recommendations": "
  • This run has high time cost on input data loading. 33.4% of the step time is in DataLoader. You could try to set num_workers on DataLoader's construction and enable multi-processes on data loading.
  • Kernels with 68% time are launched by Tensor Cores eligible operators. You could enable Automatic Mixed Precision to speedup by using FP16.
", "environments": [{"title": "Number of Worker(s)", "value": "1"}, {"title": "Device Type", "value": "GPU"}], "gpu_metrics": {"title": "GPU Summary", "data": [{"title": "GPU 0:", "value": ""}, {"title": "Name", "value": "Tesla V100-DGXS-32GB"}, {"title": "Memory", "value": "31.74 GB"}, {"title": "Compute Capability", "value": "7.0"}, {"title": "GPU Utilization", "value": "55.51 %"}, {"title": "Est. SM Efficiency", "value": "54.68 %"}, {"title": "Est. Achieved Occupancy", "value": "49.13 %"}, {"title": "Kernel Time using Tensor Cores", "value": "0.0 %"}], "tooltip": "The GPU usage metrics:\n\nGPU Utilization:\nGPU busy time / All steps time. The higher, the better. GPU busy time is the time during which there is at least one GPU kernel running on it. All steps time is the total time of all profiler steps(or called as iterations).\n\nEst. SM Efficiency:\nEstimated Stream Multiprocessor Efficiency. The higher, the better. This metric of a kernel, SM_Eff_K = min(blocks of this kernel / SM number of this GPU, 100%). This overall number is the sum of all kernels' SM_Eff_K weighted by kernel's execution duration, divided by all steps time.\n\nEst. Achieved Occupancy:\nFor most cases such as memory bandwidth bounded kernels, the higher the better. Occupancy is the ratio of active warps on an SM to the maximum number of active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple factors such as kernel shape, kernel used resource, and the GPU compute capability.\nEst. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). This overall number is the weighted average of all kernels' OCC_K using kernel's execution duration as weight. It shows fine-grained low-level GPU utilization.\n\nKernel using Tensor Cores:\nTotal GPU Time for Tensor Core kernels / Total GPU Time for all kernels.\n"}} -{"device_total_time": {"title": "Device Total Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::cudnn_convolution_backward", 273428], ["CudnnConvolutionBackward", 273428], ["aten::cudnn_convolution_backward_weight", 142461], ["aten::cudnn_convolution_backward_input", 130967], ["aten::cudnn_convolution", 126619], ["aten::_convolution", 126619], ["aten::convolution", 126619], ["aten::conv2d", 126619], ["aten::cudnn_batch_norm_backward", 61939], ["CudnnBatchNormBackward", 61939], ["aten::cudnn_batch_norm", 34245], ["aten::_batch_norm_impl_index", 34245], ["aten::batch_norm", 34245], ["aten::threshold_backward", 27298], ["ReluBackward1", 27298], ["aten::add_", 24098], ["aten::clamp_min", 17860], ["aten::clamp_min_", 17860], ["aten::relu_", 17860], ["aten::add", 16038], ["aten::copy_", 11492], ["aten::to", 11492], ["aten::max_pool2d_with_indices_backward", 4677], ["MaxPool2DWithIndicesBackward", 4677], ["torch::autograd::AccumulateGrad", 3030], ["aten::mul_", 2409], ["aten::fill_", 1887], ["aten::zero_", 1881], ["aten::max_pool2d_with_indices", 1420], ["aten::max_pool2d", 1420], ["aten::mm", 275], ["AddmmBackward", 275], ["aten::mean", 212], ["aten::adaptive_avg_pool2d", 212], ["aten::addmm", 197], ["aten::linear", 197], ["aten::div", 144], ["MeanBackward1", 144], ["aten::cross_entropy_loss", 60], ["aten::_log_softmax_backward_data", 53], ["LogSoftmaxBackward", 53], ["aten::sum", 44], ["aten::_log_softmax", 42], ["aten::log_softmax", 42], ["aten::nll_loss_forward", 18], ["aten::nll_loss", 18], ["aten::nll_loss_nd", 18], ["aten::nll_loss_backward", 18], ["NllLossBackward", 18], ["aten::ones_like", 6]]}, "device_self_time": {"title": "Device Self Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::cudnn_convolution_backward_weight", 142461], ["aten::cudnn_convolution_backward_input", 130967], ["aten::cudnn_convolution", 126619], ["aten::cudnn_batch_norm_backward", 61939], ["aten::cudnn_batch_norm", 34245], ["aten::threshold_backward", 27298], ["aten::add_", 24098], ["aten::clamp_min", 17860], ["aten::add", 16038], ["aten::copy_", 11492], ["aten::max_pool2d_with_indices_backward", 3822], ["aten::mul_", 2409], ["aten::fill_", 1887], ["aten::max_pool2d_with_indices", 1420], ["aten::mm", 275], ["aten::mean", 212], ["aten::addmm", 197], ["aten::div", 144], ["aten::_log_softmax_backward_data", 53], ["aten::sum", 44], ["aten::_log_softmax", 42], ["aten::nll_loss_forward", 18], ["aten::nll_loss_backward", 18]]}, "host_total_time": {"title": "Host Total Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["CudnnConvolutionBackward", 90989], ["aten::batch_norm", 87977], ["aten::cudnn_convolution_backward", 87772], ["aten::add_", 78125], ["aten::_batch_norm_impl_index", 78071], ["aten::conv2d", 77781], ["aten::cudnn_batch_norm", 71527], ["aten::convolution", 70394], ["aten::empty", 68147], ["aten::to", 64332], ["aten::_convolution", 64243], ["aten::cudnn_convolution", 56998], ["aten::copy_", 52853], ["aten::cudnn_convolution_backward_input", 41445], ["aten::cudnn_convolution_backward_weight", 40246], ["aten::div", 35158], ["CudnnBatchNormBackward", 34608], ["aten::contiguous", 31137], ["aten::cudnn_batch_norm_backward", 30460], ["aten::mul_", 29081], ["torch::autograd::AccumulateGrad", 28494], ["aten::zero_", 27597], ["aten::empty_like", 26064], ["aten::stack", 24346], ["aten::relu_", 24181], ["aten::add", 19289], ["aten::cat", 17085], ["aten::fill_", 17059], ["aten::_cat", 16933], ["aten::clamp_min_", 15665], ["aten::view", 14027], ["aten::resize_", 12406], ["aten::empty_strided", 11829], ["ReluBackward1", 11656], ["aten::clamp_min", 10311], ["aten::permute", 9775], ["aten::threshold_backward", 9482], ["aten::as_strided", 7600], ["aten::unsqueeze", 6603], ["aten::linear", 1408], ["AddmmBackward", 1303], ["aten::cross_entropy_loss", 1180], ["aten::zeros", 1105], ["aten::addmm", 1034], ["MeanBackward1", 987], ["aten::mm", 860], ["NllLossBackward", 716], ["aten::max_pool2d", 687], ["aten::nll_loss_backward", 614], ["aten::t", 584], ["aten::log_softmax", 567], ["aten::max_pool2d_with_indices", 562], ["aten::adaptive_avg_pool2d", 561], ["aten::nll_loss_nd", 495], ["MaxPool2DWithIndicesBackward", 484], ["aten::ones_like", 452], ["aten::mean", 445], ["aten::_log_softmax", 433], ["aten::nll_loss", 414], ["aten::max_pool2d_with_indices_backward", 411], ["LogSoftmaxBackward", 359], ["aten::narrow", 350], ["aten::nll_loss_forward", 346], ["aten::transpose", 329], ["aten::sum", 327], ["aten::_log_softmax_backward_data", 306], ["aten::expand", 229], ["aten::slice", 223], ["aten::detach_", 208], ["AddBackward0", 175], ["aten::flatten", 164], ["TBackward", 103], ["detach_", 100], ["ViewBackward", 80], ["aten::reshape", 55], ["aten::conj", 12]]}, "host_self_time": {"title": "Host Self Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::empty", 68147], ["aten::add_", 51013], ["aten::copy_", 40255], ["aten::cudnn_convolution", 33121], ["aten::cudnn_convolution_backward_input", 29324], ["aten::cudnn_convolution_backward_weight", 22804], ["aten::mul_", 20515], ["aten::div", 20135], ["aten::cudnn_batch_norm", 19843], ["aten::_cat", 16282], ["aten::to", 14834], ["aten::add", 14329], ["aten::view", 14027], ["aten::resize_", 12406], ["aten::cudnn_batch_norm_backward", 12238], ["aten::empty_strided", 11829], ["aten::empty_like", 11742], ["aten::zero_", 10693], ["aten::batch_norm", 9906], ["aten::fill_", 9879], ["aten::relu_", 8516], ["aten::as_strided", 7600], ["aten::conv2d", 7387], ["aten::_convolution", 7245], ["aten::clamp_min", 7106], ["aten::_batch_norm_impl_index", 6544], ["aten::convolution", 6151], ["aten::threshold_backward", 6090], ["aten::cudnn_convolution_backward", 6081], ["aten::permute", 5515], ["aten::contiguous", 5510], ["torch::autograd::AccumulateGrad", 5457], ["aten::clamp_min_", 5354], ["CudnnBatchNormBackward", 4148], ["aten::unsqueeze", 3574], ["CudnnConvolutionBackward", 3217], ["ReluBackward1", 2174], ["aten::zeros", 659], ["aten::stack", 658], ["aten::addmm", 639], ["aten::mm", 575], ["MeanBackward1", 541], ["aten::max_pool2d_with_indices", 477], ["aten::nll_loss_backward", 388], ["aten::nll_loss_forward", 266], ["aten::t", 255], ["aten::mean", 234], ["aten::transpose", 197], ["AddmmBackward", 182], ["aten::max_pool2d_with_indices_backward", 176], ["AddBackward0", 175], ["aten::_log_softmax", 170], ["aten::sum", 153], ["aten::cat", 152], ["aten::expand", 150], ["aten::narrow", 127], ["aten::max_pool2d", 125], ["aten::linear", 124], ["aten::slice", 123], ["aten::cross_entropy_loss", 118], ["aten::adaptive_avg_pool2d", 116], ["aten::detach_", 108], ["aten::_log_softmax_backward_data", 108], ["NllLossBackward", 102], ["detach_", 100], ["aten::ones_like", 95], ["aten::log_softmax", 90], ["aten::flatten", 84], ["aten::nll_loss_nd", 81], ["MaxPool2DWithIndicesBackward", 73], ["aten::nll_loss", 68], ["LogSoftmaxBackward", 53], ["aten::reshape", 29], ["ViewBackward", 25], ["TBackward", 18], ["aten::conj", 12]]}} -{"metadata": {"sort": "device_self_duration", "tooltips": {"tc_eligible": "Whether this operator is eligible to use Tensor Cores.", "tc_self_ratio": "Time of self-kernels with Tensor Cores / Time of self-kernels.", "tc_total_ratio": "Time of kernels with Tensor Cores / Time of kernels."}}, "data": [{"name": "aten::cudnn_convolution_backward_weight", "calls": 318, "device_self_duration": 142461, "device_total_duration": 142461, "host_self_duration": 22804, "host_total_duration": 40246, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution_backward_input", "calls": 312, "device_self_duration": 130967, "device_total_duration": 130967, "host_self_duration": 29324, "host_total_duration": 41445, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution", "calls": 318, "device_self_duration": 126619, "device_total_duration": 126619, "host_self_duration": 33121, "host_total_duration": 56998, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::cudnn_batch_norm_backward", "calls": 318, "device_self_duration": 61939, "device_total_duration": 61939, "host_self_duration": 12238, "host_total_duration": 30460, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_batch_norm", "calls": 318, "device_self_duration": 34245, "device_total_duration": 34245, "host_self_duration": 19843, "host_total_duration": 71527, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::threshold_backward", "calls": 294, "device_self_duration": 27298, "device_total_duration": 27298, "host_self_duration": 6090, "host_total_duration": 9482, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::add_", "calls": 2994, "device_self_duration": 24098, "device_total_duration": 24098, "host_self_duration": 51013, "host_total_duration": 78125, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::clamp_min", "calls": 294, "device_self_duration": 17860, "device_total_duration": 17860, "host_self_duration": 7106, "host_total_duration": 10311, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::add", "calls": 414, "device_self_duration": 16038, "device_total_duration": 16038, "host_self_duration": 14329, "host_total_duration": 19289, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::copy_", "calls": 588, "device_self_duration": 11492, "device_total_duration": 11492, "host_self_duration": 40255, "host_total_duration": 52853, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d_with_indices_backward", "calls": 6, "device_self_duration": 3822, "device_total_duration": 4677, "host_self_duration": 176, "host_total_duration": 411, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::mul_", "calls": 966, "device_self_duration": 2409, "device_total_duration": 2409, "host_self_duration": 20515, "host_total_duration": 29081, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::fill_", "calls": 978, "device_self_duration": 1887, "device_total_duration": 1887, "host_self_duration": 9879, "host_total_duration": 17059, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d_with_indices", "calls": 6, "device_self_duration": 1420, "device_total_duration": 1420, "host_self_duration": 477, "host_total_duration": 562, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::mm", "calls": 12, "device_self_duration": 275, "device_total_duration": 275, "host_self_duration": 575, "host_total_duration": 860, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::mean", "calls": 6, "device_self_duration": 212, "device_total_duration": 212, "host_self_duration": 234, "host_total_duration": 445, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::addmm", "calls": 6, "device_self_duration": 197, "device_total_duration": 197, "host_self_duration": 639, "host_total_duration": 1034, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::div", "calls": 198, "device_self_duration": 144, "device_total_duration": 144, "host_self_duration": 20135, "host_total_duration": 35158, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::_log_softmax_backward_data", "calls": 6, "device_self_duration": 53, "device_total_duration": 53, "host_self_duration": 108, "host_total_duration": 306, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::sum", "calls": 6, "device_self_duration": 44, "device_total_duration": 44, "host_self_duration": 153, "host_total_duration": 327, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::_log_softmax", "calls": 6, "device_self_duration": 42, "device_total_duration": 42, "host_self_duration": 170, "host_total_duration": 433, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_forward", "calls": 6, "device_self_duration": 18, "device_total_duration": 18, "host_self_duration": 266, "host_total_duration": 346, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_backward", "calls": 6, "device_self_duration": 18, "device_total_duration": 18, "host_self_duration": 388, "host_total_duration": 614, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::empty", "calls": 4404, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 68147, "host_total_duration": 68147, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::zero_", "calls": 996, "device_self_duration": 0, "device_total_duration": 1881, "host_self_duration": 10693, "host_total_duration": 27597, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::zeros", "calls": 24, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 659, "host_total_duration": 1105, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::view", "calls": 846, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 14027, "host_total_duration": 14027, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::as_strided", "calls": 432, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 7600, "host_total_duration": 7600, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::permute", "calls": 192, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 5515, "host_total_duration": 9775, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::empty_like", "calls": 528, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 11742, "host_total_duration": 26064, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::contiguous", "calls": 192, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 5510, "host_total_duration": 31137, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::empty_strided", "calls": 402, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 11829, "host_total_duration": 11829, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::to", "calls": 414, "device_self_duration": 0, "device_total_duration": 11492, "host_self_duration": 14834, "host_total_duration": 64332, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::unsqueeze", "calls": 192, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 3574, "host_total_duration": 6603, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::resize_", "calls": 1902, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 12406, "host_total_duration": 12406, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::slice", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 123, "host_total_duration": 223, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::narrow", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 127, "host_total_duration": 350, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::_cat", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 16282, "host_total_duration": 16933, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::cat", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 152, "host_total_duration": 17085, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::stack", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 658, "host_total_duration": 24346, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "detach_", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 100, "host_total_duration": 100, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::detach_", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 108, "host_total_duration": 208, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::_convolution", "calls": 318, "device_self_duration": 0, "device_total_duration": 126619, "host_self_duration": 7245, "host_total_duration": 64243, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::convolution", "calls": 318, "device_self_duration": 0, "device_total_duration": 126619, "host_self_duration": 6151, "host_total_duration": 70394, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::conv2d", "calls": 318, "device_self_duration": 0, "device_total_duration": 126619, "host_self_duration": 7387, "host_total_duration": 77781, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::_batch_norm_impl_index", "calls": 318, "device_self_duration": 0, "device_total_duration": 34245, "host_self_duration": 6544, "host_total_duration": 78071, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::batch_norm", "calls": 318, "device_self_duration": 0, "device_total_duration": 34245, "host_self_duration": 9906, "host_total_duration": 87977, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::clamp_min_", "calls": 294, "device_self_duration": 0, "device_total_duration": 17860, "host_self_duration": 5354, "host_total_duration": 15665, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::relu_", "calls": 294, "device_self_duration": 0, "device_total_duration": 17860, "host_self_duration": 8516, "host_total_duration": 24181, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d", "calls": 6, "device_self_duration": 0, "device_total_duration": 1420, "host_self_duration": 125, "host_total_duration": 687, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::adaptive_avg_pool2d", "calls": 6, "device_self_duration": 0, "device_total_duration": 212, "host_self_duration": 116, "host_total_duration": 561, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::flatten", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 84, "host_total_duration": 164, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::transpose", "calls": 30, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 197, "host_total_duration": 329, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::t", "calls": 30, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 255, "host_total_duration": 584, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::expand", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 150, "host_total_duration": 229, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::linear", "calls": 6, "device_self_duration": 0, "device_total_duration": 197, "host_self_duration": 124, "host_total_duration": 1408, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::log_softmax", "calls": 6, "device_self_duration": 0, "device_total_duration": 42, "host_self_duration": 90, "host_total_duration": 567, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 68, "host_total_duration": 414, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_nd", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 81, "host_total_duration": 495, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::cross_entropy_loss", "calls": 6, "device_self_duration": 0, "device_total_duration": 60, "host_self_duration": 118, "host_total_duration": 1180, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::ones_like", "calls": 6, "device_self_duration": 0, "device_total_duration": 6, "host_self_duration": 95, "host_total_duration": 452, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "NllLossBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 102, "host_total_duration": 716, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "LogSoftmaxBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 53, "host_self_duration": 53, "host_total_duration": 359, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::conj", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 12, "host_total_duration": 12, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "AddmmBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 275, "host_self_duration": 182, "host_total_duration": 1303, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "torch::autograd::AccumulateGrad", "calls": 966, "device_self_duration": 0, "device_total_duration": 3030, "host_self_duration": 5457, "host_total_duration": 28494, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "TBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 18, "host_total_duration": 103, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "aten::reshape", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 29, "host_total_duration": 55, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "ViewBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 25, "host_total_duration": 80, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "MeanBackward1", "calls": 6, "device_self_duration": 0, "device_total_duration": 144, "host_self_duration": 541, "host_total_duration": 987, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "ReluBackward1", "calls": 294, "device_self_duration": 0, "device_total_duration": 27298, "host_self_duration": 2174, "host_total_duration": 11656, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "AddBackward0", "calls": 96, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 175, "host_total_duration": 175, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "CudnnBatchNormBackward", "calls": 318, "device_self_duration": 0, "device_total_duration": 61939, "host_self_duration": 4148, "host_total_duration": 34608, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution_backward", "calls": 318, "device_self_duration": 0, "device_total_duration": 273428, "host_self_duration": 6081, "host_total_duration": 87772, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "CudnnConvolutionBackward", "calls": 318, "device_self_duration": 0, "device_total_duration": 273428, "host_self_duration": 3217, "host_total_duration": 90989, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "MaxPool2DWithIndicesBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 4677, "host_self_duration": 73, "host_total_duration": 484, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}]} -{"metadata": {"sort": "Total Duration (us)"}, "data": {"columns": [{"type": "string", "name": "Name"}, {"type": "string", "name": "Tensor Cores Used", "tooltip": "Whether this kernel uses Tensor Cores."}, {"type": "number", "name": "Calls"}, {"type": "number", "name": "Total Duration (us)"}, {"type": "number", "name": "Mean Duration (us)"}, {"type": "number", "name": "Max Duration (us)"}, {"type": "number", "name": "Min Duration (us)"}, {"type": "number", "name": "Mean Blocks Per SM", "tooltip": "Blocks Per SM = blocks of this kernel / SM number of this GPU.\nIf this number is less than 1, it indicates the GPU multiprocessors are not fully utilized.\n\"Mean Blocks per SM\" is the weighted average of all calls of this kernel, using each call's execution duration as weight."}, {"type": "number", "name": "Mean Est. Achieved Occupancy (%)", "tooltip": "Est. Achieved Occupancy:\nFor most cases such as memory bandwidth bounded kernels, the higher the better. Occupancy is the ratio of active warps on an SM to the maximum number of active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple factors such as kernel shape, kernel used resource, and the GPU compute capability.\nEst. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). This \"Mean\" number is the weighted average of all calls' OCC_K of the kernel, using each call's execution duration as weight. It shows fine-grained low-level GPU utilization."}], "rows": [["void cudnn::detail::dgrad_engine(int, int, int, float const*, int, float const*, int, float*, kernel_grad_params, unsigned long long, int, unsigned long long, int, float, int, int, int)", "No", 162, 80756, 498, 1017, 323, 42.25, 29.97], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 156, 66472, 426, 745, 345, 9.78, 38.0], ["void cudnn::bn_bw_1C11_kernel_new(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float)", "No", 264, 59642, 226, 915, 45, 4.34, 67.98], ["void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "No", 3090, 39814, 13, 378, 1, 641.54, 92.32], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 90, 36957, 411, 748, 347, 12.34, 50.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array >(int, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array)", "No", 294, 27298, 93, 377, 13, 653.06, 100.0], ["void cudnn::bn_fw_tr_1C11_kernel_NCHW(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float)", "No", 150, 27060, 180, 452, 53, 3.12, 64.06], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 60, 25782, 430, 729, 352, 3.9, 42.09], ["volta_sgemm_64x64_nt", "No", 102, 21084, 207, 279, 184, 10.24, 19.38], ["volta_scudnn_128x128_stridedB_splitK_small_nn_v1", "No", 48, 20448, 426, 676, 307, 6.83, 25.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", "No", 294, 17860, 61, 252, 5, 666.65, 100.0], ["volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1", "No", 36, 12704, 353, 362, 344, 22.4, 25.0], ["volta_scudnn_128x64_stridedB_interior_nn_v1", "No", 30, 9597, 320, 510, 252, 12.9, 19.0], ["volta_sgemm_128x32_nt", "No", 24, 8629, 360, 477, 18, 0.97, 11.51], ["volta_sgemm_64x64_nn", "No", 42, 8551, 204, 217, 195, 12.34, 24.14], ["volta_scudnn_128x64_relu_interior_nn_v1", "No", 30, 8022, 267, 316, 94, 37.1, 25.0], ["volta_scudnn_128x64_stridedB_splitK_xregs_large_nn_v1", "No", 12, 7817, 651, 671, 635, 15.96, 19.0], ["void cudnn::bn_fw_tr_1C11_singleread(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnFwPersistentState*, int, float, float, float, int, float, float, cudnnStatus_t*, bool)", "No", 168, 7185, 43, 89, 13, 12.57, 75.0], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 12, 7068, 589, 987, 193, 85.34, 37.5], ["void cudnn::winograd_nonfused::winogradForwardOutput4x4(cudnn::winograd_nonfused::WinogradOutputParams)", "No", 120, 5369, 45, 73, 19, 10.0, 50.0], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 12, 5219, 435, 437, 432, 9.8, 31.0], ["void explicit_convolve_sgemm(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, unsigned long long, int, unsigned long long, int, float, float, int, float const*, float const*)", "No", 6, 4759, 793, 796, 790, 9.8, 31.0], ["void cudnn::winograd_nonfused::winogradForwardData4x4(cudnn::winograd_nonfused::WinogradDataParams)", "No", 120, 4710, 39, 66, 17, 10.11, 50.0], ["volta_scudnn_128x128_stridedB_interior_nn_v1", "No", 18, 4693, 261, 281, 252, 9.8, 25.0], ["void cudnn::winograd_nonfused::winogradWgradData4x4(cudnn::winograd_nonfused::WinogradDataParams)", "No", 78, 4692, 60, 126, 20, 15.46, 38.0], ["void cudnn::ops::scalePackedTensor_kernel(cudnnTensor4dStruct, float*, float)", "No", 162, 4631, 29, 143, 5, 496.39, 100.0], ["void cudnn::winograd_nonfused::winogradWgradDelta4x4(cudnn::winograd_nonfused::WinogradDeltaParams)", "No", 78, 4573, 59, 125, 17, 15.69, 50.0], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 6, 4065, 678, 692, 652, 6.4, 25.0], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 6, 3917, 653, 686, 595, 4.9, 25.0], ["void at::native::(anonymous namespace)::max_pool_backward_nchw(int, float const*, long const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*)", "No", 6, 3822, 637, 638, 636, 1254.4, 100.0], ["volta_scudnn_128x128_stridedB_splitK_medium_nn_v1", "No", 6, 3720, 620, 623, 614, 5.6, 25.0], ["volta_scudnn_128x64_relu_medium_nn_v1", "No", 6, 3627, 604, 606, 603, 39.2, 25.0], ["volta_scudnn_128x128_stridedB_medium_nn_v1", "No", 12, 3501, 292, 296, 286, 19.6, 25.0], ["volta_scudnn_128x32_sliced1x4_ldg4_relu_exp_medium_nhwc_tn_v1", "No", 6, 3270, 545, 627, 526, 4.9, 25.0], ["volta_scudnn_128x64_relu_small_nn_v1", "No", 12, 3265, 272, 279, 254, 9.8, 25.0], ["volta_scudnn_128x64_relu_xregs_large_nn_v1", "No", 6, 3200, 533, 607, 516, 4.9, 19.0], ["volta_sgemm_32x128_nn", "No", 18, 3053, 170, 171, 168, 22.05, 50.0], ["volta_scudnn_128x128_relu_interior_nn_v1", "No", 6, 3010, 502, 508, 495, 9.8, 25.0], ["volta_scudnn_128x128_stridedB_small_nn_v1", "No", 6, 2995, 499, 505, 493, 19.6, 25.0], ["volta_sgemm_32x128_nt", "No", 18, 2843, 158, 159, 156, 22.05, 50.0], ["void cudnn::winograd_nonfused::winogradForwardFilter4x4(cudnn::winograd_nonfused::WinogradFilterParams)", "No", 120, 2662, 22, 67, 5, 8.68, 73.22], ["void at::native::vectorized_elementwise_kernel<4, at::native::MulScalarFunctor, at::detail::Array >(int, at::native::MulScalarFunctor, at::detail::Array)", "No", 966, 2409, 2, 25, 1, 43.72, 58.39], ["void cudnn::bn_bw_1C11_singleread(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnBwPersistentState*, int, float, float, float, int, float, cudnnStatus_t*, bool)", "No", 54, 2297, 43, 73, 18, 20.81, 75.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "No", 978, 1887, 2, 143, 0, 599.07, 86.78], ["void cudnn::winograd_nonfused::winogradWgradOutput4x4(cudnn::winograd_nonfused::WinogradWgradOutputParams)", "No", 78, 1504, 19, 69, 5, 8.06, 41.33], ["void at::native::(anonymous namespace)::max_pool_forward_nchw(int, float const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*, long*)", "No", 6, 1420, 237, 239, 234, 313.6, 100.0], ["void cudnn::cnn::im2col4d_kernel(cudnn::cnn::im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*)", "No", 6, 614, 102, 103, 101, 0.95, 24.0], ["volta_scudnn_128x64_stridedB_small_nn_v1", "No", 6, 584, 97, 100, 93, 9.8, 19.0], ["void nchwToNhwcKernel(int, int, int, int, float const*, float*, float, float)", "No", 12, 453, 38, 68, 9, 73.28, 100.0], ["cask_cudnn::computeOffsetsKernel(cask_cudnn::ComputeOffsetsParams)", "No", 138, 342, 2, 4, 1, 0.13, 1.73], ["void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", "No", 318, 322, 1, 2, 1, 0.01, 0.0], ["void at::native::reduce_kernel<512, 1, at::native::ReduceOp, unsigned int, float, 4> >(at::native::ReduceOp, unsigned int, float, 4>)", "No", 6, 212, 35, 36, 35, 51.2, 100.0], ["volta_sgemm_64x32_sliced1x4_nn", "No", 6, 150, 25, 26, 24, 2.0, 25.0], ["volta_sgemm_64x32_sliced1x4_tn", "No", 6, 149, 25, 26, 24, 1.0, 13.0], ["void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::MulScalarFunctor, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "No", 6, 144, 24, 24, 24, 156.8, 100.0], ["void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams)", "No", 36, 134, 4, 5, 2, 0.4, 3.0], ["void nhwcToNchwKernel(int, int, int, int, float const*, float*, float, float)", "No", 6, 105, 18, 18, 17, 22.4, 100.0], ["cask_cudnn::computeWgradSplitKOffsetsKernel(cask_cudnn::ComputeSplitKOffsetsParams)", "No", 66, 81, 1, 2, 1, 0.15, 1.68], ["cask_cudnn::computeWgradBOffsetsKernel(cask_cudnn::ComputeWgradBOffsetsParams)", "No", 66, 81, 1, 2, 1, 0.02, 0.0], ["cask_cudnn::computeBOffsetsKernel(cask_cudnn::ComputeBOffsetsParams)", "No", 72, 73, 1, 2, 1, 0.02, 0.0], ["void (anonymous namespace)::softmax_warp_backward(float*, float const*, float const*, int, int, int)", "No", 6, 53, 9, 9, 8, 0.1, 1.0], ["void at::native::reduce_kernel<128, 4, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "No", 6, 44, 7, 8, 7, 0.03, 0.0], ["void (anonymous namespace)::softmax_warp_forward(float*, float const*, int, int, int)", "No", 6, 42, 7, 7, 7, 0.1, 1.0], ["void splitKreduce_kernel(cublasSplitKParams, float const*, float const*, float*, float const*, float const*, float const*)", "No", 12, 30, 2, 3, 2, 4.44, 28.0], ["void at::native::unrolled_elementwise_kernel, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast)", "No", 6, 30, 5, 5, 5, 1.56, 5.0], ["void cunn_ClassNLLCriterion_updateOutput_kernel(float*, float*, float*, long*, float*, int, int, int, int, long)", "No", 6, 18, 3, 3, 3, 0.01, 0.0], ["void cunn_ClassNLLCriterion_updateGradInput_kernel(float*, float*, long*, float*, float*, int, int, int, int, long)", "No", 6, 12, 2, 2, 2, 0.01, 0.0]]}} -{"total": {"columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["void cudnn::detail::dgrad_engine(int, int, int, float const*, int, float const*, int, float*, kernel_grad_params, unsigned long long, int, unsigned long long, int, float, int, int, int)", 80756], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 66472], ["void cudnn::bn_bw_1C11_kernel_new(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float)", 59642], ["void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", 39814], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 36957], ["void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array >(int, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array)", 27298], ["void cudnn::bn_fw_tr_1C11_kernel_NCHW(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float)", 27060], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 25782], ["volta_sgemm_64x64_nt", 21084], ["volta_scudnn_128x128_stridedB_splitK_small_nn_v1", 20448], ["void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", 17860], ["volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1", 12704], ["volta_scudnn_128x64_stridedB_interior_nn_v1", 9597], ["volta_sgemm_128x32_nt", 8629], ["volta_sgemm_64x64_nn", 8551], ["volta_scudnn_128x64_relu_interior_nn_v1", 8022], ["volta_scudnn_128x64_stridedB_splitK_xregs_large_nn_v1", 7817], ["void cudnn::bn_fw_tr_1C11_singleread(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnFwPersistentState*, int, float, float, float, int, float, float, cudnnStatus_t*, bool)", 7185], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 7068], ["void cudnn::winograd_nonfused::winogradForwardOutput4x4(cudnn::winograd_nonfused::WinogradOutputParams)", 5369], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 5219], ["void explicit_convolve_sgemm(int, int, int, float const*, int, float const*, int, float*, kernel_conv_params, unsigned long long, int, unsigned long long, int, float, float, int, float const*, float const*)", 4759], ["void cudnn::winograd_nonfused::winogradForwardData4x4(cudnn::winograd_nonfused::WinogradDataParams)", 4710], ["volta_scudnn_128x128_stridedB_interior_nn_v1", 4693], ["void cudnn::winograd_nonfused::winogradWgradData4x4(cudnn::winograd_nonfused::WinogradDataParams)", 4692], ["void cudnn::ops::scalePackedTensor_kernel(cudnnTensor4dStruct, float*, float)", 4631], ["void cudnn::winograd_nonfused::winogradWgradDelta4x4(cudnn::winograd_nonfused::WinogradDeltaParams)", 4573], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 4065], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 3917], ["void at::native::(anonymous namespace)::max_pool_backward_nchw(int, float const*, long const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*)", 3822], ["volta_scudnn_128x128_stridedB_splitK_medium_nn_v1", 3720], ["volta_scudnn_128x64_relu_medium_nn_v1", 3627], ["volta_scudnn_128x128_stridedB_medium_nn_v1", 3501], ["volta_scudnn_128x32_sliced1x4_ldg4_relu_exp_medium_nhwc_tn_v1", 3270], ["volta_scudnn_128x64_relu_small_nn_v1", 3265], ["volta_scudnn_128x64_relu_xregs_large_nn_v1", 3200], ["volta_sgemm_32x128_nn", 3053], ["volta_scudnn_128x128_relu_interior_nn_v1", 3010], ["volta_scudnn_128x128_stridedB_small_nn_v1", 2995], ["volta_sgemm_32x128_nt", 2843], ["void cudnn::winograd_nonfused::winogradForwardFilter4x4(cudnn::winograd_nonfused::WinogradFilterParams)", 2662], ["void at::native::vectorized_elementwise_kernel<4, at::native::MulScalarFunctor, at::detail::Array >(int, at::native::MulScalarFunctor, at::detail::Array)", 2409], ["void cudnn::bn_bw_1C11_singleread(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnBwPersistentState*, int, float, float, float, int, float, cudnnStatus_t*, bool)", 2297], ["void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", 1887], ["void cudnn::winograd_nonfused::winogradWgradOutput4x4(cudnn::winograd_nonfused::WinogradWgradOutputParams)", 1504], ["void at::native::(anonymous namespace)::max_pool_forward_nchw(int, float const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*, long*)", 1420], ["void cudnn::cnn::im2col4d_kernel(cudnn::cnn::im2col4d_params, cudnnConvolutionStruct, cudnnTensor4dStruct, float const*, float*)", 614], ["volta_scudnn_128x64_stridedB_small_nn_v1", 584], ["void nchwToNhwcKernel(int, int, int, int, float const*, float*, float, float)", 453], ["cask_cudnn::computeOffsetsKernel(cask_cudnn::ComputeOffsetsParams)", 342], ["void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", 322], ["void at::native::reduce_kernel<512, 1, at::native::ReduceOp, unsigned int, float, 4> >(at::native::ReduceOp, unsigned int, float, 4>)", 212], ["volta_sgemm_64x32_sliced1x4_nn", 150], ["volta_sgemm_64x32_sliced1x4_tn", 149], ["void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::MulScalarFunctor, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", 144], ["void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams)", 134], ["void nhwcToNchwKernel(int, int, int, int, float const*, float*, float, float)", 105], ["cask_cudnn::computeWgradSplitKOffsetsKernel(cask_cudnn::ComputeSplitKOffsetsParams)", 81], ["cask_cudnn::computeWgradBOffsetsKernel(cask_cudnn::ComputeWgradBOffsetsParams)", 81], ["cask_cudnn::computeBOffsetsKernel(cask_cudnn::ComputeBOffsetsParams)", 73], ["void (anonymous namespace)::softmax_warp_backward(float*, float const*, float const*, int, int, int)", 53], ["void at::native::reduce_kernel<128, 4, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", 44], ["void (anonymous namespace)::softmax_warp_forward(float*, float const*, int, int, int)", 42], ["void splitKreduce_kernel(cublasSplitKParams, float const*, float const*, float*, float const*, float const*, float const*)", 30], ["void at::native::unrolled_elementwise_kernel, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast)", 30], ["void cunn_ClassNLLCriterion_updateOutput_kernel(float*, float*, float*, long*, float*, int, int, int, int, long)", 18], ["void cunn_ClassNLLCriterion_updateGradInput_kernel(float*, float*, long*, float*, float*, int, int, int, int, long)", 12]]}} -{"steps": {"columns": [{"type": "string", "name": "Step"}, {"type": "number", "name": "Kernel"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Memcpy"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Memset"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Runtime"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "DataLoader"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "CPU Exec"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}, {"type": "number", "name": "Other"}, {"type": "string", "role": "tooltip", "p": {"html": "true"}}], "rows": [["5", 99778, "
Step 5
Total: 182306us
Kernel: 99778us
Percentage: 54.73%
", 3606, "
Step 5
Total: 182306us
Memcpy: 3606us
Percentage: 1.98%
", 98, "
Step 5
Total: 182306us
Memset: 98us
Percentage: 0.05%
", 41028, "
Step 5
Total: 182306us
Runtime: 41028us
Percentage: 22.51%
", 4341, "
Step 5
Total: 182306us
DataLoader: 4341us
Percentage: 2.38%
", 27460, "
Step 5
Total: 182306us
CPU Exec: 27460us
Percentage: 15.06%
", 5995, "
Step 5
Total: 182306us
Other: 5995us
Percentage: 3.29%
"], ["6", 99208, "
Step 6
Total: 126183us
Kernel: 99208us
Percentage: 78.62%
", 2948, "
Step 6
Total: 126183us
Memcpy: 2948us
Percentage: 2.34%
", 98, "
Step 6
Total: 126183us
Memset: 98us
Percentage: 0.08%
", 3406, "
Step 6
Total: 126183us
Runtime: 3406us
Percentage: 2.7%
", 0, "
Step 6
Total: 126183us
DataLoader: 0us
Percentage: 0.0%
", 16404, "
Step 6
Total: 126183us
CPU Exec: 16404us
Percentage: 13.0%
", 4119, "
Step 6
Total: 126183us
Other: 4119us
Percentage: 3.26%
"], ["7", 99114, "
Step 7
Total: 127181us
Kernel: 99114us
Percentage: 77.93%
", 2949, "
Step 7
Total: 127181us
Memcpy: 2949us
Percentage: 2.32%
", 98, "
Step 7
Total: 127181us
Memset: 98us
Percentage: 0.08%
", 3417, "
Step 7
Total: 127181us
Runtime: 3417us
Percentage: 2.69%
", 6, "
Step 7
Total: 127181us
DataLoader: 6us
Percentage: 0.0%
", 19521, "
Step 7
Total: 127181us
CPU Exec: 19521us
Percentage: 15.35%
", 2076, "
Step 7
Total: 127181us
Other: 2076us
Percentage: 1.63%
"], ["8", 99021, "
Step 8
Total: 123079us
Kernel: 99021us
Percentage: 80.45%
", 2975, "
Step 8
Total: 123079us
Memcpy: 2975us
Percentage: 2.42%
", 97, "
Step 8
Total: 123079us
Memset: 97us
Percentage: 0.08%
", 3544, "
Step 8
Total: 123079us
Runtime: 3544us
Percentage: 2.88%
", 0, "
Step 8
Total: 123079us
DataLoader: 0us
Percentage: 0.0%
", 15464, "
Step 8
Total: 123079us
CPU Exec: 15464us
Percentage: 12.56%
", 1978, "
Step 8
Total: 123079us
Other: 1978us
Percentage: 1.61%
"], ["9", 98791, "
Step 9
Total: 163461us
Kernel: 98791us
Percentage: 60.44%
", 3596, "
Step 9
Total: 163461us
Memcpy: 3596us
Percentage: 2.2%
", 97, "
Step 9
Total: 163461us
Memset: 97us
Percentage: 0.06%
", 8275, "
Step 9
Total: 163461us
Runtime: 8275us
Percentage: 5.06%
", 1370, "
Step 9
Total: 163461us
DataLoader: 1370us
Percentage: 0.84%
", 43905, "
Step 9
Total: 163461us
CPU Exec: 43905us
Percentage: 26.86%
", 7427, "
Step 9
Total: 163461us
Other: 7427us
Percentage: 4.54%
"], ["10", 98956, "
Step 10
Total: 124198us
Kernel: 98956us
Percentage: 79.68%
", 2885, "
Step 10
Total: 124198us
Memcpy: 2885us
Percentage: 2.32%
", 98, "
Step 10
Total: 124198us
Memset: 98us
Percentage: 0.08%
", 3714, "
Step 10
Total: 124198us
Runtime: 3714us
Percentage: 2.99%
", 1400, "
Step 10
Total: 124198us
DataLoader: 1400us
Percentage: 1.13%
", 13235, "
Step 10
Total: 124198us
CPU Exec: 13235us
Percentage: 10.66%
", 3910, "
Step 10
Total: 124198us
Other: 3910us
Percentage: 3.15%
"]]}, "performance": [{"name": "Average Step Time", "description": "", "value": 141068, "extra": 100, "children": [{"name": "Kernel", "description": "", "value": 99145, "extra": 70.28}, {"name": "Memcpy", "description": "", "value": 3160, "extra": 2.24}, {"name": "Memset", "description": "", "value": 98, "extra": 0.07}, {"name": "Runtime", "description": "", "value": 10564, "extra": 7.49}, {"name": "DataLoader", "description": "", "value": 1186, "extra": 0.84}, {"name": "CPU Exec", "description": "", "value": 22665, "extra": 16.07}, {"name": "Other", "description": "", "value": 4251, "extra": 3.01}]}], "recommendations": "
  • Kernels with 68% time are launched by Tensor Cores eligible operators. You could enable Automatic Mixed Precision to speedup by using FP16.
", "environments": [{"title": "Number of Worker(s)", "value": "1"}, {"title": "Device Type", "value": "GPU"}], "gpu_metrics": {"title": "GPU Summary", "data": [{"title": "GPU 0:", "value": ""}, {"title": "Name", "value": "Tesla V100-DGXS-32GB"}, {"title": "Memory", "value": "31.74 GB"}, {"title": "Compute Capability", "value": "7.0"}, {"title": "GPU Utilization", "value": "70.27 %"}, {"title": "Est. SM Efficiency", "value": "69.22 %"}, {"title": "Est. Achieved Occupancy", "value": "48.91 %"}, {"title": "Kernel Time using Tensor Cores", "value": "0.0 %"}], "tooltip": "The GPU usage metrics:\n\nGPU Utilization:\nGPU busy time / All steps time. The higher, the better. GPU busy time is the time during which there is at least one GPU kernel running on it. All steps time is the total time of all profiler steps(or called as iterations).\n\nEst. SM Efficiency:\nEstimated Stream Multiprocessor Efficiency. The higher, the better. This metric of a kernel, SM_Eff_K = min(blocks of this kernel / SM number of this GPU, 100%). This overall number is the sum of all kernels' SM_Eff_K weighted by kernel's execution duration, divided by all steps time.\n\nEst. Achieved Occupancy:\nFor most cases such as memory bandwidth bounded kernels, the higher the better. Occupancy is the ratio of active warps on an SM to the maximum number of active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple factors such as kernel shape, kernel used resource, and the GPU compute capability.\nEst. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). This overall number is the weighted average of all kernels' OCC_K using kernel's execution duration as weight. It shows fine-grained low-level GPU utilization.\n\nKernel using Tensor Cores:\nTotal GPU Time for Tensor Core kernels / Total GPU Time for all kernels.\n"}} -{"device_total_time": {"title": "Device Total Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::cudnn_convolution_backward", 274794], ["CudnnConvolutionBackward", 274794], ["aten::cudnn_convolution_backward_weight", 141300], ["aten::cudnn_convolution_backward_input", 133494], ["aten::cudnn_convolution", 128683], ["aten::_convolution", 128683], ["aten::convolution", 128683], ["aten::conv2d", 128683], ["aten::cudnn_batch_norm_backward", 61899], ["CudnnBatchNormBackward", 61899], ["aten::cudnn_batch_norm", 34315], ["aten::_batch_norm_impl_index", 34315], ["aten::batch_norm", 34315], ["aten::threshold_backward", 27280], ["ReluBackward1", 27280], ["aten::add_", 24052], ["aten::to", 18959], ["aten::copy_", 18959], ["aten::clamp_min", 17862], ["aten::clamp_min_", 17862], ["aten::relu_", 17862], ["aten::add", 16026], ["aten::max_pool2d_with_indices_backward", 4695], ["MaxPool2DWithIndicesBackward", 4695], ["torch::autograd::AccumulateGrad", 3012], ["aten::mul_", 2395], ["aten::fill_", 1888], ["aten::zero_", 1882], ["aten::max_pool2d_with_indices", 1422], ["aten::max_pool2d", 1422], ["aten::mm", 274], ["AddmmBackward", 274], ["aten::mean", 210], ["aten::adaptive_avg_pool2d", 210], ["aten::addmm", 197], ["aten::linear", 197], ["aten::div", 145], ["MeanBackward1", 145], ["aten::cross_entropy_loss", 60], ["aten::_log_softmax_backward_data", 51], ["LogSoftmaxBackward", 51], ["aten::sum", 45], ["aten::_log_softmax", 42], ["aten::log_softmax", 42], ["aten::nll_loss_forward", 18], ["aten::nll_loss", 18], ["aten::nll_loss_nd", 18], ["aten::nll_loss_backward", 18], ["NllLossBackward", 18], ["aten::ones_like", 6]]}, "device_self_time": {"title": "Device Self Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::cudnn_convolution_backward_weight", 141300], ["aten::cudnn_convolution_backward_input", 133494], ["aten::cudnn_convolution", 128683], ["aten::cudnn_batch_norm_backward", 61899], ["aten::cudnn_batch_norm", 34315], ["aten::threshold_backward", 27280], ["aten::add_", 24052], ["aten::copy_", 18959], ["aten::clamp_min", 17862], ["aten::add", 16026], ["aten::max_pool2d_with_indices_backward", 3838], ["aten::mul_", 2395], ["aten::fill_", 1888], ["aten::max_pool2d_with_indices", 1422], ["aten::mm", 274], ["aten::mean", 210], ["aten::addmm", 197], ["aten::div", 145], ["aten::_log_softmax_backward_data", 51], ["aten::sum", 45], ["aten::_log_softmax", 42], ["aten::nll_loss_forward", 18], ["aten::nll_loss_backward", 18]]}, "host_total_time": {"title": "Host Total Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["CudnnConvolutionBackward", 119890], ["aten::cudnn_convolution_backward", 115797], ["aten::batch_norm", 105589], ["aten::add_", 97540], ["aten::_batch_norm_impl_index", 95925], ["aten::conv2d", 91000], ["aten::cudnn_batch_norm", 87823], ["aten::empty", 82024], ["aten::convolution", 81781], ["aten::_convolution", 74086], ["aten::cudnn_convolution", 64167], ["aten::cudnn_convolution_backward_weight", 60712], ["aten::to", 57776], ["aten::copy_", 56915], ["aten::cudnn_convolution_backward_input", 47359], ["CudnnBatchNormBackward", 41825], ["torch::autograd::AccumulateGrad", 37189], ["aten::cudnn_batch_norm_backward", 36641], ["aten::mul_", 35389], ["aten::relu_", 29432], ["aten::zero_", 28309], ["aten::add", 23831], ["aten::clamp_min_", 19059], ["aten::empty_like", 18591], ["aten::fill_", 17657], ["aten::resize_", 15019], ["ReluBackward1", 14944], ["aten::clamp_min", 12503], ["aten::threshold_backward", 12062], ["aten::view", 9046], ["AddmmBackward", 2026], ["aten::linear", 1463], ["aten::mm", 1424], ["aten::zeros", 1319], ["aten::cross_entropy_loss", 1225], ["aten::addmm", 1060], ["NllLossBackward", 889], ["aten::nll_loss_backward", 747], ["aten::t", 725], ["MeanBackward1", 663], ["aten::max_pool2d", 599], ["MaxPool2DWithIndicesBackward", 590], ["aten::adaptive_avg_pool2d", 581], ["aten::log_softmax", 580], ["aten::nll_loss_nd", 507], ["LogSoftmaxBackward", 500], ["aten::max_pool2d_with_indices_backward", 493], ["aten::ones_like", 470], ["aten::div", 469], ["aten::mean", 454], ["aten::empty_strided", 453], ["aten::_log_softmax_backward_data", 424], ["aten::max_pool2d_with_indices", 422], ["aten::_log_softmax", 420], ["aten::nll_loss", 418], ["aten::transpose", 413], ["aten::sum", 411], ["aten::nll_loss_forward", 343], ["aten::detach_", 323], ["aten::as_strided", 244], ["aten::expand", 237], ["aten::set_", 221], ["AddBackward0", 200], ["aten::flatten", 163], ["detach_", 156], ["TBackward", 151], ["ViewBackward", 132], ["aten::reshape", 88], ["aten::conj", 15]]}, "host_self_time": {"title": "Host Self Time (us)", "columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["aten::empty", 82024], ["aten::add_", 62385], ["aten::cudnn_convolution", 35632], ["aten::cudnn_convolution_backward_input", 31902], ["aten::cudnn_convolution_backward_weight", 30672], ["aten::mul_", 24617], ["aten::cudnn_batch_norm", 23800], ["aten::add", 17808], ["aten::cudnn_batch_norm_backward", 15118], ["aten::resize_", 15019], ["aten::zero_", 10815], ["aten::relu_", 10373], ["aten::_convolution", 9919], ["aten::batch_norm", 9664], ["aten::fill_", 9660], ["aten::conv2d", 9219], ["aten::view", 9046], ["aten::clamp_min", 8409], ["aten::empty_like", 8385], ["aten::_batch_norm_impl_index", 8102], ["aten::threshold_backward", 7820], ["aten::cudnn_convolution_backward", 7726], ["aten::convolution", 7695], ["torch::autograd::AccumulateGrad", 7181], ["aten::clamp_min_", 6556], ["CudnnBatchNormBackward", 5184], ["CudnnConvolutionBackward", 4093], ["ReluBackward1", 2882], ["aten::mm", 1032], ["aten::zeros", 877], ["aten::addmm", 652], ["aten::to", 547], ["aten::nll_loss_backward", 463], ["aten::empty_strided", 453], ["aten::div", 343], ["aten::max_pool2d_with_indices", 325], ["aten::t", 312], ["aten::nll_loss_forward", 264], ["aten::transpose", 254], ["aten::as_strided", 244], ["AddmmBackward", 244], ["aten::mean", 233], ["aten::copy_", 230], ["aten::set_", 221], ["aten::max_pool2d_with_indices_backward", 213], ["aten::sum", 201], ["AddBackward0", 200], ["aten::max_pool2d", 177], ["aten::_log_softmax", 168], ["aten::detach_", 167], ["detach_", 156], ["aten::expand", 152], ["NllLossBackward", 142], ["aten::_log_softmax_backward_data", 142], ["aten::linear", 139], ["aten::cross_entropy_loss", 138], ["aten::adaptive_avg_pool2d", 127], ["aten::log_softmax", 106], ["MaxPool2DWithIndicesBackward", 97], ["aten::ones_like", 96], ["MeanBackward1", 95], ["aten::nll_loss_nd", 89], ["aten::flatten", 88], ["LogSoftmaxBackward", 76], ["aten::nll_loss", 75], ["ViewBackward", 44], ["aten::reshape", 43], ["TBackward", 33], ["aten::conj", 15]]}} -{"metadata": {"sort": "device_self_duration", "tooltips": {"tc_eligible": "Whether this operator is eligible to use Tensor Cores.", "tc_self_ratio": "Time of self-kernels with Tensor Cores / Time of self-kernels.", "tc_total_ratio": "Time of kernels with Tensor Cores / Time of kernels."}}, "data": [{"name": "aten::cudnn_convolution_backward_weight", "calls": 318, "device_self_duration": 141300, "device_total_duration": 141300, "host_self_duration": 30672, "host_total_duration": 60712, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution_backward_input", "calls": 312, "device_self_duration": 133494, "device_total_duration": 133494, "host_self_duration": 31902, "host_total_duration": 47359, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution", "calls": 318, "device_self_duration": 128683, "device_total_duration": 128683, "host_self_duration": 35632, "host_total_duration": 64167, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::cudnn_batch_norm_backward", "calls": 318, "device_self_duration": 61899, "device_total_duration": 61899, "host_self_duration": 15118, "host_total_duration": 36641, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_batch_norm", "calls": 318, "device_self_duration": 34315, "device_total_duration": 34315, "host_self_duration": 23800, "host_total_duration": 87823, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::threshold_backward", "calls": 294, "device_self_duration": 27280, "device_total_duration": 27280, "host_self_duration": 7820, "host_total_duration": 12062, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::add_", "calls": 2994, "device_self_duration": 24052, "device_total_duration": 24052, "host_self_duration": 62385, "host_total_duration": 97540, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::copy_", "calls": 12, "device_self_duration": 18959, "device_total_duration": 18959, "host_self_duration": 230, "host_total_duration": 56915, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::clamp_min", "calls": 294, "device_self_duration": 17862, "device_total_duration": 17862, "host_self_duration": 8409, "host_total_duration": 12503, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::add", "calls": 414, "device_self_duration": 16026, "device_total_duration": 16026, "host_self_duration": 17808, "host_total_duration": 23831, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d_with_indices_backward", "calls": 6, "device_self_duration": 3838, "device_total_duration": 4695, "host_self_duration": 213, "host_total_duration": 493, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::mul_", "calls": 966, "device_self_duration": 2395, "device_total_duration": 2395, "host_self_duration": 24617, "host_total_duration": 35389, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::fill_", "calls": 978, "device_self_duration": 1888, "device_total_duration": 1888, "host_self_duration": 9660, "host_total_duration": 17657, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d_with_indices", "calls": 6, "device_self_duration": 1422, "device_total_duration": 1422, "host_self_duration": 325, "host_total_duration": 422, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::mm", "calls": 12, "device_self_duration": 274, "device_total_duration": 274, "host_self_duration": 1032, "host_total_duration": 1424, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::mean", "calls": 6, "device_self_duration": 210, "device_total_duration": 210, "host_self_duration": 233, "host_total_duration": 454, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::addmm", "calls": 6, "device_self_duration": 197, "device_total_duration": 197, "host_self_duration": 652, "host_total_duration": 1060, "tc_eligible": "Yes", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::div", "calls": 6, "device_self_duration": 145, "device_total_duration": 145, "host_self_duration": 343, "host_total_duration": 469, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::_log_softmax_backward_data", "calls": 6, "device_self_duration": 51, "device_total_duration": 51, "host_self_duration": 142, "host_total_duration": 424, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::sum", "calls": 6, "device_self_duration": 45, "device_total_duration": 45, "host_self_duration": 201, "host_total_duration": 411, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::_log_softmax", "calls": 6, "device_self_duration": 42, "device_total_duration": 42, "host_self_duration": 168, "host_total_duration": 420, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_forward", "calls": 6, "device_self_duration": 18, "device_total_duration": 18, "host_self_duration": 264, "host_total_duration": 343, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_backward", "calls": 6, "device_self_duration": 18, "device_total_duration": 18, "host_self_duration": 463, "host_total_duration": 747, "tc_eligible": "No", "tc_self_ratio": 0.0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::empty", "calls": 4212, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 82024, "host_total_duration": 82024, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::zero_", "calls": 996, "device_self_duration": 0, "device_total_duration": 1882, "host_self_duration": 10815, "host_total_duration": 28309, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::zeros", "calls": 24, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 877, "host_total_duration": 1319, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::to", "calls": 36, "device_self_duration": 0, "device_total_duration": 18959, "host_self_duration": 547, "host_total_duration": 57776, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "detach_", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 156, "host_total_duration": 156, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::detach_", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 167, "host_total_duration": 323, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::set_", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 221, "host_total_duration": 221, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::empty_strided", "calls": 18, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 453, "host_total_duration": 453, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::resize_", "calls": 1896, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 15019, "host_total_duration": 15019, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::_convolution", "calls": 318, "device_self_duration": 0, "device_total_duration": 128683, "host_self_duration": 9919, "host_total_duration": 74086, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::convolution", "calls": 318, "device_self_duration": 0, "device_total_duration": 128683, "host_self_duration": 7695, "host_total_duration": 81781, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::conv2d", "calls": 318, "device_self_duration": 0, "device_total_duration": 128683, "host_self_duration": 9219, "host_total_duration": 91000, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::empty_like", "calls": 336, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 8385, "host_total_duration": 18591, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::view", "calls": 654, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 9046, "host_total_duration": 9046, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::_batch_norm_impl_index", "calls": 318, "device_self_duration": 0, "device_total_duration": 34315, "host_self_duration": 8102, "host_total_duration": 95925, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::batch_norm", "calls": 318, "device_self_duration": 0, "device_total_duration": 34315, "host_self_duration": 9664, "host_total_duration": 105589, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::clamp_min_", "calls": 294, "device_self_duration": 0, "device_total_duration": 17862, "host_self_duration": 6556, "host_total_duration": 19059, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::relu_", "calls": 294, "device_self_duration": 0, "device_total_duration": 17862, "host_self_duration": 10373, "host_total_duration": 29432, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::max_pool2d", "calls": 6, "device_self_duration": 0, "device_total_duration": 1422, "host_self_duration": 177, "host_total_duration": 599, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::adaptive_avg_pool2d", "calls": 6, "device_self_duration": 0, "device_total_duration": 210, "host_self_duration": 127, "host_total_duration": 581, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::flatten", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 88, "host_total_duration": 163, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::as_strided", "calls": 42, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 244, "host_total_duration": 244, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::transpose", "calls": 30, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 254, "host_total_duration": 413, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::t", "calls": 30, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 312, "host_total_duration": 725, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::expand", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 152, "host_total_duration": 237, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": true}, {"name": "aten::linear", "calls": 6, "device_self_duration": 0, "device_total_duration": 197, "host_self_duration": 139, "host_total_duration": 1463, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::log_softmax", "calls": 6, "device_self_duration": 0, "device_total_duration": 42, "host_self_duration": 106, "host_total_duration": 580, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 75, "host_total_duration": 418, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::nll_loss_nd", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 89, "host_total_duration": 507, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::cross_entropy_loss", "calls": 6, "device_self_duration": 0, "device_total_duration": 60, "host_self_duration": 138, "host_total_duration": 1225, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "aten::ones_like", "calls": 6, "device_self_duration": 0, "device_total_duration": 6, "host_self_duration": 96, "host_total_duration": 470, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": true}, {"name": "NllLossBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 18, "host_self_duration": 142, "host_total_duration": 889, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "LogSoftmaxBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 51, "host_self_duration": 76, "host_total_duration": 500, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::conj", "calls": 12, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 15, "host_total_duration": 15, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "AddmmBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 274, "host_self_duration": 244, "host_total_duration": 2026, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "torch::autograd::AccumulateGrad", "calls": 966, "device_self_duration": 0, "device_total_duration": 3012, "host_self_duration": 7181, "host_total_duration": 37189, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "TBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 33, "host_total_duration": 151, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "aten::reshape", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 43, "host_total_duration": 88, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "ViewBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 44, "host_total_duration": 132, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "MeanBackward1", "calls": 6, "device_self_duration": 0, "device_total_duration": 145, "host_self_duration": 95, "host_total_duration": 663, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "ReluBackward1", "calls": 294, "device_self_duration": 0, "device_total_duration": 27280, "host_self_duration": 2882, "host_total_duration": 14944, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "AddBackward0", "calls": 96, "device_self_duration": 0, "device_total_duration": 0, "host_self_duration": 200, "host_total_duration": 200, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0, "has_call_stack": false}, {"name": "CudnnBatchNormBackward", "calls": 318, "device_self_duration": 0, "device_total_duration": 61899, "host_self_duration": 5184, "host_total_duration": 41825, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "aten::cudnn_convolution_backward", "calls": 318, "device_self_duration": 0, "device_total_duration": 274794, "host_self_duration": 7726, "host_total_duration": 115797, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "CudnnConvolutionBackward", "calls": 318, "device_self_duration": 0, "device_total_duration": 274794, "host_self_duration": 4093, "host_total_duration": 119890, "tc_eligible": "Yes", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}, {"name": "MaxPool2DWithIndicesBackward", "calls": 6, "device_self_duration": 0, "device_total_duration": 4695, "host_self_duration": 97, "host_total_duration": 590, "tc_eligible": "No", "tc_self_ratio": 0, "tc_total_ratio": 0.0, "has_call_stack": false}]} -{"metadata": {"sort": "Total Duration (us)"}, "data": {"columns": [{"type": "string", "name": "Name"}, {"type": "string", "name": "Tensor Cores Used", "tooltip": "Whether this kernel uses Tensor Cores."}, {"type": "number", "name": "Calls"}, {"type": "number", "name": "Total Duration (us)"}, {"type": "number", "name": "Mean Duration (us)"}, {"type": "number", "name": "Max Duration (us)"}, {"type": "number", "name": "Min Duration (us)"}, {"type": "number", "name": "Mean Blocks Per SM", "tooltip": "Blocks Per SM = blocks of this kernel / SM number of this GPU.\nIf this number is less than 1, it indicates the GPU multiprocessors are not fully utilized.\n\"Mean Blocks per SM\" is the weighted average of all calls of this kernel, using each call's execution duration as weight."}, {"type": "number", "name": "Mean Est. Achieved Occupancy (%)", "tooltip": "Est. Achieved Occupancy:\nFor most cases such as memory bandwidth bounded kernels, the higher the better. Occupancy is the ratio of active warps on an SM to the maximum number of active warps supported by the SM. The theoretical occupancy of a kernel is upper limit occupancy of this kernel, limited by multiple factors such as kernel shape, kernel used resource, and the GPU compute capability.\nEst. Achieved Occupancy of a kernel, OCC_K = min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). This \"Mean\" number is the weighted average of all calls' OCC_K of the kernel, using each call's execution duration as weight. It shows fine-grained low-level GPU utilization."}], "rows": [["void cudnn::detail::dgrad_engine(int, int, int, float const*, int, float const*, int, float*, kernel_grad_params, unsigned long long, int, unsigned long long, int, float, int, int, int)", "No", 180, 86855, 483, 1023, 323, 45.33, 30.04], ["void cudnn::bn_bw_1C11_kernel_new(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float)", "No", 264, 59568, 226, 923, 45, 4.33, 67.92], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 90, 43471, 483, 742, 363, 8.18, 38.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", "No", 3090, 39753, 13, 376, 1, 641.51, 92.35], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 90, 37016, 411, 735, 346, 12.39, 50.0], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 72, 35106, 488, 822, 350, 3.83, 41.64], ["void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array >(int, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array)", "No", 294, 27280, 93, 377, 13, 653.26, 100.0], ["void cudnn::bn_fw_tr_1C11_kernel_NCHW(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float)", "No", 150, 27084, 181, 454, 53, 3.12, 64.02], ["volta_scudnn_128x128_stridedB_splitK_medium_nn_v1", "No", 72, 25342, 352, 629, 323, 3.21, 25.0], ["volta_sgemm_64x64_nt", "No", 102, 21125, 207, 281, 184, 10.28, 19.38], ["volta_scudnn_128x128_stridedB_splitK_small_nn_v1", "No", 48, 20473, 427, 681, 309, 6.82, 25.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", "No", 294, 17862, 61, 252, 5, 666.77, 100.0], ["volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1", "No", 36, 12761, 354, 365, 344, 22.4, 25.0], ["volta_scudnn_128x64_stridedB_interior_nn_v1", "No", 30, 9559, 319, 508, 255, 12.91, 19.0], ["volta_sgemm_128x32_nt", "No", 24, 8658, 361, 479, 18, 0.97, 11.51], ["volta_sgemm_64x64_nn", "No", 42, 8544, 203, 210, 197, 12.35, 24.14], ["volta_scudnn_128x64_relu_interior_nn_v1", "No", 30, 7976, 266, 316, 92, 37.08, 25.0], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 12, 7939, 662, 733, 584, 7.54, 25.0], ["volta_scudnn_128x64_stridedB_splitK_xregs_large_nn_v1", "No", 12, 7819, 652, 670, 634, 15.96, 19.0], ["void cudnn::bn_fw_tr_1C11_singleread(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnFwPersistentState*, int, float, float, float, int, float, float, cudnnStatus_t*, bool)", "No", 168, 7231, 43, 89, 11, 12.63, 75.0], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 12, 7068, 589, 990, 192, 85.38, 37.51], ["void cudnn::ops::scalePackedTensor_kernel(cudnnTensor4dStruct, float*, float)", "No", 180, 5901, 33, 142, 5, 525.02, 100.0], ["void cudnn::winograd_nonfused::winogradForwardOutput4x4(cudnn::winograd_nonfused::WinogradOutputParams)", "No", 120, 5314, 44, 72, 20, 10.02, 50.0], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", "No", 12, 5221, 435, 440, 431, 9.8, 31.0], ["void cudnn::winograd_nonfused::winogradWgradData4x4(cudnn::winograd_nonfused::WinogradDataParams)", "No", 78, 4681, 60, 126, 20, 15.46, 38.0], ["void cudnn::winograd_nonfused::winogradForwardData4x4(cudnn::winograd_nonfused::WinogradDataParams)", "No", 120, 4648, 39, 67, 17, 10.15, 50.0], ["void cudnn::winograd_nonfused::winogradWgradDelta4x4(cudnn::winograd_nonfused::WinogradDeltaParams)", "No", 78, 4559, 58, 126, 17, 15.71, 50.0], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", "No", 6, 4038, 673, 691, 649, 6.4, 25.0], ["void at::native::(anonymous namespace)::max_pool_backward_nchw(int, float const*, long const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*)", "No", 6, 3838, 640, 643, 637, 1254.4, 100.0], ["volta_scudnn_128x32_sliced1x4_ldg4_relu_exp_small_nhwc_tn_v1", "No", 6, 3697, 616, 621, 614, 2.6, 25.0], ["volta_scudnn_128x64_relu_medium_nn_v1", "No", 6, 3647, 608, 620, 602, 39.2, 25.0], ["volta_scudnn_128x128_stridedB_medium_nn_v1", "No", 12, 3550, 296, 309, 286, 19.6, 25.0], ["volta_scudnn_128x64_relu_small_nn_v1", "No", 12, 3273, 273, 286, 258, 9.8, 25.0], ["volta_sgemm_32x128_nn", "No", 18, 3059, 170, 173, 167, 22.05, 50.0], ["volta_scudnn_128x128_stridedB_small_nn_v1", "No", 6, 3034, 506, 520, 491, 19.6, 25.0], ["volta_sgemm_32x128_nt", "No", 18, 2837, 158, 159, 156, 22.05, 50.0], ["void cudnn::winograd_nonfused::winogradForwardFilter4x4(cudnn::winograd_nonfused::WinogradFilterParams)", "No", 120, 2632, 22, 67, 4, 8.75, 73.78], ["void at::native::vectorized_elementwise_kernel<4, at::native::MulScalarFunctor, at::detail::Array >(int, at::native::MulScalarFunctor, at::detail::Array)", "No", 966, 2395, 2, 25, 1, 44.01, 58.56], ["void cudnn::bn_bw_1C11_singleread(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnBwPersistentState*, int, float, float, float, int, float, cudnnStatus_t*, bool)", "No", 54, 2331, 43, 75, 19, 20.83, 75.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", "No", 978, 1888, 2, 143, 0, 600.2, 86.95], ["void cudnn::winograd_nonfused::winogradWgradOutput4x4(cudnn::winograd_nonfused::WinogradWgradOutputParams)", "No", 78, 1484, 19, 69, 3, 8.13, 41.71], ["void at::native::(anonymous namespace)::max_pool_forward_nchw(int, float const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*, long*)", "No", 6, 1422, 237, 243, 234, 313.6, 100.0], ["volta_scudnn_128x64_stridedB_small_nn_v1", "No", 6, 582, 97, 99, 94, 9.8, 19.0], ["void nchwToNhwcKernel(int, int, int, int, float const*, float*, float, float)", "No", 12, 383, 32, 34, 29, 71.72, 100.0], ["void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", "No", 318, 325, 1, 2, 1, 0.01, 0.0], ["cask_cudnn::computeOffsetsKernel(cask_cudnn::ComputeOffsetsParams)", "No", 108, 216, 2, 5, 1, 0.16, 2.0], ["void at::native::reduce_kernel<512, 1, at::native::ReduceOp, unsigned int, float, 4> >(at::native::ReduceOp, unsigned int, float, 4>)", "No", 6, 210, 35, 35, 35, 51.2, 100.0], ["cask_cudnn::computeWgradSplitKOffsetsKernel(cask_cudnn::ComputeSplitKOffsetsParams)", "No", 132, 155, 1, 2, 1, 0.16, 1.83], ["cask_cudnn::computeWgradBOffsetsKernel(cask_cudnn::ComputeWgradBOffsetsParams)", "No", 132, 150, 1, 2, 1, 0.02, 0.0], ["volta_sgemm_64x32_sliced1x4_nn", "No", 6, 149, 25, 25, 24, 2.0, 25.0], ["volta_sgemm_64x32_sliced1x4_tn", "No", 6, 148, 25, 25, 24, 1.0, 13.0], ["void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::MulScalarFunctor, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", "No", 6, 145, 24, 25, 24, 156.8, 100.0], ["void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams)", "No", 36, 126, 4, 5, 2, 0.4, 3.0], ["cask_cudnn::computeBOffsetsKernel(cask_cudnn::ComputeBOffsetsParams)", "No", 54, 57, 1, 2, 1, 0.02, 0.0], ["void nhwcToNchwKernel(int, int, int, int, float const*, float*, float, float)", "No", 6, 54, 9, 10, 8, 12.8, 100.0], ["void (anonymous namespace)::softmax_warp_backward(float*, float const*, float const*, int, int, int)", "No", 6, 51, 8, 9, 8, 0.1, 1.0], ["void at::native::reduce_kernel<128, 4, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "No", 6, 45, 8, 8, 7, 0.03, 0.0], ["void (anonymous namespace)::softmax_warp_forward(float*, float const*, int, int, int)", "No", 6, 42, 7, 7, 7, 0.1, 1.0], ["void splitKreduce_kernel(cublasSplitKParams, float const*, float const*, float*, float const*, float const*, float const*)", "No", 12, 31, 3, 4, 2, 4.39, 27.74], ["void at::native::unrolled_elementwise_kernel, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast)", "No", 6, 30, 5, 5, 5, 1.56, 5.0], ["void cunn_ClassNLLCriterion_updateOutput_kernel(float*, float*, float*, long*, float*, int, int, int, int, long)", "No", 6, 18, 3, 3, 3, 0.01, 0.0], ["void cunn_ClassNLLCriterion_updateGradInput_kernel(float*, float*, long*, float*, float*, int, int, int, int, long)", "No", 6, 12, 2, 2, 2, 0.01, 0.0]]}} -{"total": {"columns": [{"type": "string", "name": "name"}, {"type": "number", "name": "value"}], "rows": [["void cudnn::detail::dgrad_engine(int, int, int, float const*, int, float const*, int, float*, kernel_grad_params, unsigned long long, int, unsigned long long, int, float, int, int, int)", 86855], ["void cudnn::bn_bw_1C11_kernel_new(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float)", 59568], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 43471], ["void at::native::vectorized_elementwise_kernel<4, at::native::AddFunctor, at::detail::Array >(int, at::native::AddFunctor, at::detail::Array)", 39753], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 37016], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 35106], ["void at::native::vectorized_elementwise_kernel<4, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array >(int, at::native::threshold_kernel_impl(at::TensorIteratorBase&, float, float)::{lambda(float, float)#1}, at::detail::Array)", 27280], ["void cudnn::bn_fw_tr_1C11_kernel_NCHW(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float)", 27084], ["volta_scudnn_128x128_stridedB_splitK_medium_nn_v1", 25342], ["volta_sgemm_64x64_nt", 21125], ["volta_scudnn_128x128_stridedB_splitK_small_nn_v1", 20473], ["void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array >(int, at::native::(anonymous namespace)::clamp_min_scalar_kernel_impl(at::TensorIterator&, c10::Scalar)::{lambda()#1}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array)", 17862], ["volta_scudnn_winograd_128x128_ldg1_ldg4_relu_tile148t_nt_v1", 12761], ["volta_scudnn_128x64_stridedB_interior_nn_v1", 9559], ["volta_sgemm_128x32_nt", 8658], ["volta_sgemm_64x64_nn", 8544], ["volta_scudnn_128x64_relu_interior_nn_v1", 7976], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 7939], ["volta_scudnn_128x64_stridedB_splitK_xregs_large_nn_v1", 7819], ["void cudnn::bn_fw_tr_1C11_singleread(cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float const*, float, float, float*, float*, float*, float*, float, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnFwPersistentState*, int, float, float, float, int, float, float, cudnnStatus_t*, bool)", 7231], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 7068], ["void cudnn::ops::scalePackedTensor_kernel(cudnnTensor4dStruct, float*, float)", 5901], ["void cudnn::winograd_nonfused::winogradForwardOutput4x4(cudnn::winograd_nonfused::WinogradOutputParams)", 5314], ["void implicit_convolve_sgemm(int, int, int, float const*, int, float*, float const*, kernel_conv_params, unsigned long long, int, float, float, int, float const*, float const*, bool, int, int)", 5221], ["void cudnn::winograd_nonfused::winogradWgradData4x4(cudnn::winograd_nonfused::WinogradDataParams)", 4681], ["void cudnn::winograd_nonfused::winogradForwardData4x4(cudnn::winograd_nonfused::WinogradDataParams)", 4648], ["void cudnn::winograd_nonfused::winogradWgradDelta4x4(cudnn::winograd_nonfused::WinogradDeltaParams)", 4559], ["void cudnn::cnn::wgrad_alg0_engine(int, int, int, float const*, int, float*, float const*, kernel_grad_params, unsigned long long, int, float, int, int, int, int)", 4038], ["void at::native::(anonymous namespace)::max_pool_backward_nchw(int, float const*, long const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*)", 3838], ["volta_scudnn_128x32_sliced1x4_ldg4_relu_exp_small_nhwc_tn_v1", 3697], ["volta_scudnn_128x64_relu_medium_nn_v1", 3647], ["volta_scudnn_128x128_stridedB_medium_nn_v1", 3550], ["volta_scudnn_128x64_relu_small_nn_v1", 3273], ["volta_sgemm_32x128_nn", 3059], ["volta_scudnn_128x128_stridedB_small_nn_v1", 3034], ["volta_sgemm_32x128_nt", 2837], ["void cudnn::winograd_nonfused::winogradForwardFilter4x4(cudnn::winograd_nonfused::WinogradFilterParams)", 2632], ["void at::native::vectorized_elementwise_kernel<4, at::native::MulScalarFunctor, at::detail::Array >(int, at::native::MulScalarFunctor, at::detail::Array)", 2395], ["void cudnn::bn_bw_1C11_singleread(float, float, float, float, cudnnTensorStruct, float const*, cudnnTensorStruct, float const*, cudnnTensorStruct, float*, float const*, float*, float*, float const*, float const*, float, cudnn::reduced_divisor, int, cudnn::reduced_divisor, cudnn::bnBwPersistentState*, int, float, float, float, int, float, cudnnStatus_t*, bool)", 2331], ["void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor, at::detail::Array >(int, at::native::FillFunctor, at::detail::Array)", 1888], ["void cudnn::winograd_nonfused::winogradWgradOutput4x4(cudnn::winograd_nonfused::WinogradWgradOutputParams)", 1484], ["void at::native::(anonymous namespace)::max_pool_forward_nchw(int, float const*, int, int, int, int, int, int, int, int, int, int, int, int, int, int, float*, long*)", 1422], ["volta_scudnn_128x64_stridedB_small_nn_v1", 582], ["void nchwToNhwcKernel(int, int, int, int, float const*, float*, float, float)", 383], ["void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor >, at::detail::Array >(int, at::native::BUnaryFunctor >, at::detail::Array)", 325], ["cask_cudnn::computeOffsetsKernel(cask_cudnn::ComputeOffsetsParams)", 216], ["void at::native::reduce_kernel<512, 1, at::native::ReduceOp, unsigned int, float, 4> >(at::native::ReduceOp, unsigned int, float, 4>)", 210], ["cask_cudnn::computeWgradSplitKOffsetsKernel(cask_cudnn::ComputeSplitKOffsetsParams)", 155], ["cask_cudnn::computeWgradBOffsetsKernel(cask_cudnn::ComputeWgradBOffsetsParams)", 150], ["volta_sgemm_64x32_sliced1x4_nn", 149], ["volta_sgemm_64x32_sliced1x4_tn", 148], ["void at::native::unrolled_elementwise_kernel, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast>(int, at::native::MulScalarFunctor, at::detail::Array, OffsetCalculator<1, unsigned int>, OffsetCalculator<1, unsigned int>, at::native::memory::LoadWithoutCast, at::native::memory::StoreWithoutCast)", 145], ["void cudnn::winograd::generateWinogradTilesKernel<0, float, float>(cudnn::winograd::GenerateWinogradTilesParams)", 126], ["cask_cudnn::computeBOffsetsKernel(cask_cudnn::ComputeBOffsetsParams)", 57], ["void nhwcToNchwKernel(int, int, int, int, float const*, float*, float, float)", 54], ["void (anonymous namespace)::softmax_warp_backward(float*, float const*, float const*, int, int, int)", 51], ["void at::native::reduce_kernel<128, 4, at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", 45], ["void (anonymous namespace)::softmax_warp_forward(float*, float const*, int, int, int)", 42], ["void splitKreduce_kernel(cublasSplitKParams, float const*, float const*, float*, float const*, float const*, float const*)", 31], ["void at::native::unrolled_elementwise_kernel, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast>(int, at::native::copy_device_to_device(at::TensorIterator&, bool)::{lambda()#2}::operator()() const::{lambda()#8}::operator()() const::{lambda(float)#1}, at::detail::Array, OffsetCalculator<1, unsigned int>, char*, at::native::memory::LoadWithoutCast, at::detail::Array::StoreWithoutCast)", 30], ["void cunn_ClassNLLCriterion_updateOutput_kernel(float*, float*, float*, long*, float*, int, int, int, int, long)", 18], ["void cunn_ClassNLLCriterion_updateGradInput_kernel(float*, float*, long*, float*, float*, int, int, int, int, long)", 12]]}} diff --git a/plugins/tensorboard-plugins/tb_plugin/test/test_compare_with_autograd.py b/plugins/tensorboard-plugins/tb_plugin/test/test_compare_with_autograd.py deleted file mode 100644 index d097fbd3ccc52e9d750b8c8f618198393b5ebc7b..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/test_compare_with_autograd.py +++ /dev/null @@ -1,301 +0,0 @@ -import os -import time -import unittest -import pytest -import torch -import torch.nn as nn -import torch.backends.cudnn as cudnn -import torch.optim -import torch.utils.data -import torchvision -import torchvision.transforms as T -import torchvision.models as models -import torch_tb_profiler.io as io -from torch_tb_profiler.profiler import RunLoader - - -def create_log_dir(): - log_dir_name = './log{}'.format(str(int(time.time()*1000))) - try: - os.makedirs(log_dir_name) - except Exception: - raise RuntimeError("Can't create directory: " + log_dir_name) - return log_dir_name - - -def get_autograd_result(p, worker_name, record_shapes=False, with_stack=False): - avgs = p.key_averages() - sort_by = 'self_cuda_time_total' - avgs = sorted( - avgs, key=lambda evt: getattr(evt, sort_by), reverse=True - ) - is_gpu = False - if avgs[0].self_cuda_time_total > 0: - is_gpu = True - others_prefix = {'enumerate(DataLoader)#', 'Optimizer.zero_grad#', 'Optimizer.step#', - 'ProfilerStep*', - 'Memcpy', 'Memset', - 'cuda'} - postfix_to_type = {'CPU': 'operator', 'CUDA': 'kernel'} - - def get_type(evt): - s = str(evt.device_type) - postfix = s[s.index('.') + 1:] - evt_type = postfix_to_type[postfix] - for prefix in others_prefix: - if evt.key.startswith(prefix): - evt_type = 'Other' - break - return evt_type - - result_dict = dict() - result_dict[worker_name + '#operator'] = list() - if is_gpu: - result_dict[worker_name + '#kernel'] = list() - for avg in avgs: - evt_type = get_type(avg) - if evt_type == 'operator': - line = [avg.key, int(avg.count)] - if is_gpu: - line.extend([int(avg.self_cuda_time_total), int(avg.cuda_time_total)]) - line.extend([int(avg.self_cpu_time_total), int(avg.cpu_time_total)]) - result_dict[worker_name + '#operator'].append(line) - elif is_gpu and evt_type == 'kernel': - line = [avg.key, int(avg.count), int(avg.self_cuda_time_total)] - result_dict[worker_name + '#kernel'].append(line) - if record_shapes: - result_dict[worker_name + '#operator#input_shape'] = list() - avgs = p.key_averages(True) - sort_by = 'self_cuda_time_total' - avgs = sorted( - avgs, key=lambda evt: getattr(evt, sort_by), reverse=True - ) - for avg in avgs: - evt_type = get_type(avg) - if evt_type == 'operator': - line = [avg.key, str(avg.input_shapes) if avg.input_shapes else '[]', int(avg.count)] - if is_gpu: - line.extend([int(avg.self_cuda_time_total), int(avg.cuda_time_total)]) - line.extend([int(avg.self_cpu_time_total), int(avg.cpu_time_total)]) - result_dict[worker_name + '#operator#input_shape'].append(line) - # The call stack for legacy and kineto profiler is different for now, - # The legacy profiler has stack for backward while kineto not - # So, just disable call stack compare for the moment - if False and with_stack: - result_dict[worker_name + '#operator#stack'] = list() - avgs = p.key_averages(False, 100) - sort_by = 'self_cuda_time_total' - avgs = sorted( - avgs, key=lambda evt: getattr(evt, sort_by), reverse=True - ) - for avg in avgs: - evt_type = get_type(avg) - if evt_type == 'operator' and avg.stack: - line = [avg.key, int(avg.count)] - if is_gpu: - line.extend([int(avg.self_cuda_time_total), int(avg.cuda_time_total)]) - line.extend([int(avg.self_cpu_time_total), int(avg.cpu_time_total), ''.join(avg.stack)]) - result_dict[worker_name + '#operator#stack'].append(line) - - result_dict[worker_name + '#operator#stack#input_shape'] = list() - avgs = p.key_averages(True, 100) - sort_by = 'self_cuda_time_total' - avgs = sorted( - avgs, key=lambda evt: getattr(evt, sort_by), reverse=True - ) - for avg in avgs: - evt_type = get_type(avg) - if evt_type == 'operator' and avg.stack: - line = [avg.key, str(avg.input_shapes), int(avg.count)] - if is_gpu: - line.extend([int(avg.self_cuda_time_total), int(avg.cuda_time_total)]) - line.extend([int(avg.self_cpu_time_total), int(avg.cpu_time_total), ''.join(avg.stack)]) - result_dict[worker_name + '#operator#stack#input_shape'].append(line) - - return result_dict - - -def generate_plugin_result_row(data): - row = list() - row.append(data['name']) - if 'input_shape' in data: - row.append(data['input_shape']) - row.append(data['calls']) - if 'device_self_duration' in data: - row.append(data['device_self_duration']) - row.append(data['device_total_duration']) - row.extend([data['host_self_duration'], data['host_total_duration']]) - if 'call_stack' in data: - row.append(data['call_stack']) - return row - - -def get_plugin_result(run, record_shapes=False, with_stack=False): - result_dict = dict() - for (worker_name, span), profile in run.profiles.items(): - worker_name = worker_name.split('.')[0] - assert profile.operation_table_by_name is not None - result_dict[worker_name + '#operator'] = list() - for data in profile.operation_table_by_name['data']: - row = generate_plugin_result_row(data) - result_dict[worker_name + '#operator'].append(row) - if profile.kernel_table is not None: - rows = profile.kernel_table['data']['rows'] - result_dict[worker_name + '#kernel'] = list() - for row in rows: - result_dict[worker_name + '#kernel'].append([row[0], row[2], row[3]]) # row[1] is 'Tensor Cores Used'. - if record_shapes: - assert profile.operation_table_by_name_input is not None - result_dict[worker_name + '#operator#input_shape'] = list() - for data in profile.operation_table_by_name_input['data']: - row = generate_plugin_result_row(data) - result_dict[worker_name + '#operator#input_shape'].append(row) - # The call stack for legacy and kineto profiler is different for now, - # The legacy profiler has stack for backward while kineto not - # So, just disable call stack compare for the moment - if False and with_stack: - assert profile.operation_stack_by_name is not None - assert profile.operation_stack_by_name_input is not None - result_dict[worker_name + '#operator#stack'] = list() - op_stack_dict = profile.operation_stack_by_name - for k, datalist in op_stack_dict.items(): - for data in datalist: - row = generate_plugin_result_row(data) - result_dict[worker_name + '#operator#stack'].append(row) - if record_shapes: - result_dict[worker_name + '#operator#stack#input_shape'] = list() - op_stack_dict = profile.operation_stack_by_name_input - for k, datalist in op_stack_dict.items(): - for data in datalist: - row = generate_plugin_result_row(data) - result_dict[worker_name + '#operator#stack#input_shape'].append(row) - - return result_dict - - -def get_train_func(use_gpu=True): - model = models.resnet50(pretrained=True) - if use_gpu: - model.cuda() - cudnn.benchmark = True - - transform = T.Compose([T.Resize(256), T.CenterCrop(224), T.ToTensor()]) - trainset = torchvision.datasets.CIFAR10(root='./data', train=True, - download=True, transform=transform) - trainloader = torch.utils.data.DataLoader(trainset, batch_size=2, - shuffle=True, num_workers=0) - - if use_gpu: - criterion = nn.CrossEntropyLoss().cuda() - else: - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) - if use_gpu: - device = torch.device('cuda:0') - else: - device = torch.device('cpu') - model.train() - - def train(train_step, prof=None): - for step, data in enumerate(trainloader, 0): - print('step:{}'.format(step)) - inputs, labels = data[0].to(device=device), data[1].to(device=device) - - outputs = model(inputs) - loss = criterion(outputs, labels) - - optimizer.zero_grad() - loss.backward() - optimizer.step() - if prof is not None: - prof.step() - if step >= train_step: - break - return train - - -def get_output_fn(dir_name, profilers_dict): - def output_fn(p): - # In current torch.profiler.profile, at beginning of each span, a new p.profiler will be created. - # So the same p.profiler will not be shared among different spans - worker_name = 'worker{}'.format(p.step_num) - profilers_dict[worker_name] = p.profiler - tb_trace_handler = torch.profiler.tensorboard_trace_handler(dir_name, worker_name) - tb_trace_handler(p) - return output_fn - - -class TestCompareWithAutogradResult(unittest.TestCase): - - def compare_results(self, log_dir, profilers_dict, use_gpu=True, record_shapes=False, with_stack=False): - cache = io.Cache() - loader = RunLoader(os.path.split(log_dir)[-1], log_dir, cache) - run = loader.load() - plugin_result = get_plugin_result(run, record_shapes, with_stack) - count = 0 - for worker_name, p in profilers_dict.items(): - autograd_result = get_autograd_result(p, worker_name, record_shapes, with_stack) - for key in autograd_result.keys(): - count += 1 - self.assertTrue(key in plugin_result.keys()) - self.assertEqual(len(plugin_result[key]), len(autograd_result[key])) - for line in plugin_result[key]: - self.assertTrue(line in autograd_result[key]) - self.assertEqual(count, len(plugin_result.keys())) - - @pytest.mark.skipif(not torch.cuda.is_available(), reason='') - def test_autograd_api(self): - with torch.autograd.profiler.profile(use_cuda=True, use_kineto=True, record_shapes=True) as p: - get_train_func()(5) - log_dir = create_log_dir() - p.export_chrome_trace(os.path.join(log_dir, 'worker0.{}.pt.trace.json'.format(int(time.time() * 1000)))) - self.compare_results(log_dir, {'worker0': p}) - - def base_profiler_api(self, use_gpu, record_shapes, profile_memory, with_stack): - log_dir = create_log_dir() - profilers_dict = dict() - if use_gpu: - activities = [ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA] - else: - activities = [torch.profiler.ProfilerActivity.CPU] - - with torch.profiler.profile( - activities=activities, - schedule=torch.profiler.schedule( - wait=2, - warmup=2, - active=3), - on_trace_ready=get_output_fn(log_dir, profilers_dict), - record_shapes=record_shapes, - profile_memory=profile_memory, - with_stack=with_stack - ) as p: - get_train_func(use_gpu)(13, p) - self.compare_results(log_dir, profilers_dict, use_gpu, record_shapes, with_stack) - - def test_profiler_api_without_gpu(self): - self.base_profiler_api(False, True, True, False) - - @pytest.mark.skipif(not torch.cuda.is_available(), reason='') - def test_profiler_api_with_record_shapes_memory_stack(self): - self.base_profiler_api(True, True, True, True) - - @pytest.mark.skipif(not torch.cuda.is_available(), reason='') - def test_profiler_api_without_record_shapes_memory_stack(self): - self.base_profiler_api(True, False, False, False) - - @pytest.mark.skipif(not torch.cuda.is_available(), reason='') - def test_profiler_api_without_step(self): - log_dir = create_log_dir() - profilers_dict = dict() - with torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA], - on_trace_ready=get_output_fn(log_dir, profilers_dict), - record_shapes=True - ): - get_train_func()(7) - self.compare_results(log_dir, profilers_dict) diff --git a/plugins/tensorboard-plugins/tb_plugin/test/test_diffrun.py b/plugins/tensorboard-plugins/tb_plugin/test/test_diffrun.py deleted file mode 100644 index 06fbf398a7d5dacc26c4edffb5796387e8a37258..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/test_diffrun.py +++ /dev/null @@ -1,51 +0,0 @@ -import os -import unittest - -import pytest -from torch_tb_profiler.profiler.data import RunProfileData -from torch_tb_profiler.profiler.diffrun import (compare_op_tree, diff_summary, - print_node, print_ops) -from torch_tb_profiler.utils import timing - - -def load_profile(worker, span, path): - return RunProfileData.parse_gpu(worker, span, path, '.') - - -class TestDiffRun(unittest.TestCase): - - @pytest.mark.skipif(not (os.path.isfile(os.path.expanduser('~/profile_result/worker0.pt.trace.json')) and - os.path.isfile(os.path.expanduser('~/profile_result/worker1.pt.trace.json'))), - reason="file doesn't exist") - def test_happy_path(self): - # path1 = os.path.expanduser('~/profile_result/worker0.pt.trace.json') - path1 = '/home/mike/git/kineto/tb_plugin/examples/result/datapipe0.1638760942588.pt.trace.json' - profile1 = load_profile('worker0', 1, path1) - roots = list(profile1.tid2tree.values()) - root = roots[0] - - # path2 = os.path.expanduser('~/profile_result/worker1.pt.trace.json') - path2 = '/home/mike/git/kineto/tb_plugin/examples/result/datapipe0.1638835897553.pt.trace.json' - profile2 = load_profile('worker0', 1, path2) - roots1 = list(profile2.tid2tree.values()) - root1 = roots1[0] - - with timing('Compare operator tree', True): - node = compare_op_tree(root, root1) - - print_ops(node.children[4].left, prefix=' ') - print('========================================================') - print_ops(node.children[4].right) - - print('*********************** summary *************************') - with timing('Diff summary', True): - stats = diff_summary(node) - - # result = stats.flatten_diff_tree() - # path = '0-1-1' - # json_data = result[path].get_diff_node_summary(path) - print_node(stats, 0, 0) - - -if __name__ == '__main__': - unittest.main() diff --git a/plugins/tensorboard-plugins/tb_plugin/test/test_profiler.py b/plugins/tensorboard-plugins/tb_plugin/test/test_profiler.py deleted file mode 100644 index fda2208aab5f786af3fa1dbc08efdd43653073a2..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/test_profiler.py +++ /dev/null @@ -1,2752 +0,0 @@ -import gzip -import json -import os -import unittest - -from torch_tb_profiler.profiler.data import (DistributedRunProfileData, - RunProfileData) -from torch_tb_profiler.profiler.loader import RunLoader -from torch_tb_profiler.profiler.overall_parser import ProfileRole -from torch_tb_profiler.profiler.gpu_metrics_parser import GPUMetricsParser -from torch_tb_profiler.run import RunProfile - -SCHEMA_VERSION = 1 -WORKER_NAME = 'worker0' - - -def parse_json_trace(json_content, worker_name=WORKER_NAME) -> RunProfileData: - trace_json = json.loads(json_content) - trace_json = {'schemaVersion': 1, 'traceEvents': trace_json} - return RunProfileData.from_json(worker_name, 0, trace_json) - - -''' -All the events in json string are only simulation, not actual generated events. -We removed the data fields that not used by current version of our profiler, -for easy to check correctness and shorter in length. -We even renamed the data values such as kernel name or 'ts', to simplify the string. -''' - - -class TestProfiler(unittest.TestCase): - # A test case including all 7 event categories. - def test_all_categories(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 13721, "tid": "123", - "ts": 100, "dur": 180, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::nll_loss_backward", "pid": 13721, "tid": "456", - "ts": 340, "dur": 70, - "args": {"Input Dims": [[], [32, 1000], [32], [], [], [], []], "External id": 4} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 50, "dur": 400, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 405, "dur": 10, - "args": {"stream": 7, "correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 13721, "tid": "456", - "ts": 360, "dur": 20, - "args": {"correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Memset", - "name": "Memset (Device)", "pid": 0, "tid": "stream 7", - "ts": 420, "dur": 5, - "args": {"stream": 7, "correlation": 40344, "external id": 4} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemsetAsync", "pid": 13721, "tid": "456", - "ts": 390, "dur": 10, - "args": {"correlation": 40344, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 430, "dur": 15, - "args": {"correlation": 40348, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 405, "dur": 5, - "args": {"correlation": 40348, "external id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertTrue(profile.has_runtime) - self.assertTrue(profile.has_kernel) - self.assertTrue(profile.has_memcpy_or_memset) - step = profile.steps_costs[0] - self.assertEqual(step.costs[ProfileRole.Kernel], 15) - self.assertEqual(step.costs[ProfileRole.Memcpy], 10) - self.assertEqual(step.costs[ProfileRole.Memset], 5) - self.assertEqual(step.costs[ProfileRole.Runtime], 30) - self.assertEqual(step.costs[ProfileRole.DataLoader], 180) - self.assertEqual(step.costs[ProfileRole.CpuOp], 35) - self.assertEqual(step.costs[ProfileRole.Other], 125) - - self.assertEqual(len(profile.op_list_groupby_name), 2) - self.assertEqual(len(profile.op_list_groupby_name_input), 2) - - def test_op_list(op_list): - op_count = 0 - for op_agg in op_list: - if op_agg.name == 'aten::to': - op_count += 1 - self.assertEqual(op_agg.input_shape, - '[[2, 8, 5], [], [], [], [], [], [], []]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 60) - self.assertEqual(op_agg.device_duration, 0) - self.assertEqual(op_agg.self_host_duration, 60) - self.assertEqual(op_agg.self_device_duration, 0) - if op_agg.name == 'aten::nll_loss_backward': - op_count += 1 - self.assertEqual(op_agg.input_shape, - '[[], [32, 1000], [32], [], [], [], []]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 70) - self.assertEqual(op_agg.device_duration, 30) - self.assertEqual( - op_agg.self_host_duration, 70 - 20 - 10 - 5) - self.assertEqual(op_agg.self_device_duration, 30) - self.assertEqual(op_count, 2) - - test_op_list(profile.op_list_groupby_name) - test_op_list(profile.op_list_groupby_name_input) - - self.assertEqual(len(profile.kernel_list_groupby_name_op), 1) - self.assertEqual(profile.kernel_stat.shape[0], 1) - self.assertEqual(profile.kernel_list_groupby_name_op[0].name, - 'void cunn_ClassNLLCriterion_updateGradInput_kernel') - self.assertEqual( - profile.kernel_list_groupby_name_op[0].op_name, 'aten::nll_loss_backward') - self.assertEqual(profile.kernel_list_groupby_name_op[0].calls, 1) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].total_duration, 15) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].min_duration, 15) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].max_duration, 15) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - self.assertEqual(profile.kernel_stat.iloc[0]['sum'], 15) - self.assertEqual(profile.kernel_stat.iloc[0]['mean'], 15) - self.assertEqual(profile.kernel_stat.iloc[0]['min'], 15) - self.assertEqual(profile.kernel_stat.iloc[0]['max'], 15) - - # Test using external_id to build relationship between Operator and Runtime. - # Use external_id to build correlation with its father OperatorNode or ProfilerStepNode. - # Because in the case when RuntimeNode has duration 0 and starts at same time as a OperatorNode, - # just use interval containing relationship can't tell it is child or brother of the OperatorNode. - def test_external_id(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 120, "dur": 70, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 130, "dur": 5, - "args": {"correlation": 334, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 120, "dur": 0, - "args": {"correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 130, "dur": 6, - "args": {"correlation": 335, "external id": 2, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 120, "dur": 0, - "args": {"correlation": 335, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 130, "dur": 7, - "args": {"correlation": 336, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 190, "dur": 0, - "args": {"correlation": 336, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 130, "dur": 8, - "args": {"correlation": 337, "external id": 2, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 190, "dur": 0, - "args": {"correlation": 337, "external id": 2} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - op_count = 0 - for op_agg in profile.op_list_groupby_name: - if op_agg.name == 'aten::mat_mul': - op_count += 1 - self.assertEqual(op_agg.device_duration, 5 + 6 + 7 + 8) - self.assertEqual(op_agg.self_device_duration, 6 + 8) - if op_agg.name == 'aten::mm': - op_count += 1 - self.assertEqual(op_agg.device_duration, 5 + 7) - self.assertEqual(op_agg.self_device_duration, 5 + 7) - self.assertEqual(op_count, 2) - - # Test operator's father-child relationship when they have same start time or end time. - def test_operator_relation(self): - # 2 events with same start time. - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 100, "dur": 70, - "args": {"Input Dims": [], "External id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - op_count = 0 - for op_agg in profile.op_list_groupby_name: - if op_agg.name == 'aten::mat_mul': - op_count += 1 - self.assertEqual(op_agg.self_host_duration, 100 - 70) - if op_agg.name == 'aten::mm': - op_count += 1 - self.assertEqual(op_agg.self_host_duration, 70) - self.assertEqual(op_count, 2) - - # 2 events with same end time. - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 130, "dur": 70, - "args": {"Input Dims": [], "External id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - op_count = 0 - for op_agg in profile.op_list_groupby_name: - if op_agg.name == 'aten::mat_mul': - op_count += 1 - self.assertEqual(op_agg.self_host_duration, 100 - 70) - if op_agg.name == 'aten::mm': - op_count += 1 - self.assertEqual(op_agg.self_host_duration, 70) - self.assertEqual(op_count, 2) - - # Test multiple father-child operators with same name. - # In this case, all the operators except the top operator should be removed, - # and all runtime/kernels belong to the children operators should be attached to the only kept one. - # This behavior is to keep consistent with _remove_dup_nodes in torch/autograd/profiler.py. - def test_remove_dup_nodes(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 110, "dur": 80, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 120, "dur": 60, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 130, "dur": 20, - "args": {"correlation": 335, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr", "pid": 0, "tid": "stream 7", - "ts": 220, "dur": 8, - "args": {"correlation": 335, "external id": 4, "device": 0} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - self.assertEqual(len(profile.op_list_groupby_name), 1) - self.assertEqual( - profile.op_list_groupby_name[0].self_device_duration, 8) - - # Test Runtime with 'external id' 0. - # This kind of Runtime should not be attached to any operator, - # and should be included in accumulating device time. - def test_top_level_runtime(self): - # This operator is different thread with the runtime. - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "123", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 130, "dur": 20, - "args": {"correlation": 335, "external id": 0} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr", "pid": 0, "tid": "stream 7", - "ts": 220, "dur": 8, - "args": {"correlation": 335, "external id": 0, "device": 0} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - self.assertEqual(profile.op_list_groupby_name[0].device_duration, 0) - self.assertEqual( - profile.op_list_groupby_name[0].self_device_duration, 0) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - - # Test Runtime directly called in ProfilerStep, not inside any operator. - def test_runtime_called_by_profilerstep(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "456", - "ts": 100, "dur": 300, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 130, "dur": 20, - "args": {"correlation": 335, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr", "pid": 0, "tid": "stream 7", - "ts": 220, "dur": 8, - "args": {"correlation": 335, "external id": 2, "device": 0} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - step = profile.steps_costs[0] - self.assertEqual(step.costs[ProfileRole.Kernel], 8) - self.assertEqual(step.costs[ProfileRole.Runtime], 20) - self.assertEqual(step.costs[ProfileRole.CpuOp], 0) - self.assertEqual(step.costs[ProfileRole.Other], 300 - 8 - 20) - # ProfilerStep is not regarded as an operator. - self.assertEqual(len(profile.op_list_groupby_name), 0) - self.assertEqual(len(profile.op_list_groupby_name_input), 0) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - self.assertEqual(len(profile.kernel_list_groupby_name_op), 1) - - # Test one Runtime lauch more than one Kernels. - # Sometimes such as running Bert using DataParallel mode(1 process, 2GPUs), - # one runtime such as cudaLaunchCooperativeKernelMultiDevice could trigger more than one kernel, - # each Kernel runs at a seperate GPU card. - def test_runtime_launch_multipe_kernels(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "Broadcast", "pid": 13721, "tid": "456", - "ts": 100, "dur": 300, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchCooperativeKernelMultiDevice", "pid": 13721, "tid": "456", - "ts": 130, "dur": 20, - "args": {"correlation": 335, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclBroadcastRingLLKernel_copy_i8(ncclColl)", "pid": 0, "tid": "stream 13", - "ts": 160, "dur": 120318, - "args": {"device": 0, "context": 1, "stream": 13, - "correlation": 335, "external id": 2, "device": 0} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclBroadcastRingLLKernel_copy_i8(ncclColl)", "pid": 0, "tid": "stream 22", - "ts": 170, "dur": 132800, - "args": {"device": 0, "context": 2, "stream": 22, - "correlation": 335, "external id": 2} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - self.assertEqual( - profile.op_list_groupby_name[0].device_duration, 120318 + 132800) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 2) - self.assertEqual(len(profile.kernel_list_groupby_name_op), 1) - - # Test when there is no ProfilerStep#. - def test_no_profilerstep(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 100, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::nll_loss_backward", "pid": 13721, "tid": "456", - "ts": 300, "dur": 70, - "args": {"Input Dims": [[], [32, 1000], [32], [], [], [], []], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 320, "dur": 100, - "args": {"correlation": 40348, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 310, "dur": 20, - "args": {"correlation": 40348, "external id": 4} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertTrue(profile.has_runtime) - self.assertTrue(profile.has_kernel) - self.assertTrue(not profile.has_memcpy_or_memset) - self.assertEqual(len(profile.steps_costs), 1) - step = profile.steps_costs[0] - - self.assertEqual(step.costs[ProfileRole.Kernel], 100) - self.assertEqual(step.costs[ProfileRole.Memcpy], 0) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 320 - 310) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], 60 + (310 - 300)) - # If no ProfilerStep, all events will be regarded as a step. - self.assertEqual(step.costs[ProfileRole.Other], 300 - (100 + 60)) - self.assertEqual(step.costs[ProfileRole.Total], (320 + 100) - 100) - self.assertEqual(len(profile.op_list_groupby_name), 2) - self.assertEqual(len(profile.op_list_groupby_name_input), 2) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - self.assertEqual(len(profile.kernel_list_groupby_name_op), 1) - - def test_op_list(op_list): - op_count = 0 - for op_agg in op_list: - if op_agg.name == 'aten::to': - op_count += 1 - self.assertEqual(op_agg.input_shape, - '[[2, 8, 5], [], [], [], [], [], [], []]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 60) - self.assertEqual(op_agg.device_duration, 0) - self.assertEqual(op_agg.self_host_duration, 60) - self.assertEqual(op_agg.self_device_duration, 0) - if op_agg.name == 'aten::nll_loss_backward': - op_count += 1 - self.assertEqual(op_agg.input_shape, - '[[], [32, 1000], [32], [], [], [], []]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 70) - self.assertEqual(op_agg.device_duration, 100) - self.assertEqual(op_agg.self_host_duration, 70 - 20) - self.assertEqual(op_agg.self_device_duration, 100) - self.assertEqual(op_count, 2) - - test_op_list(profile.op_list_groupby_name) - test_op_list(profile.op_list_groupby_name_input) - - self.assertEqual(profile.kernel_list_groupby_name_op[0].name, - 'void cunn_ClassNLLCriterion_updateGradInput_kernel') - self.assertEqual( - profile.kernel_list_groupby_name_op[0].op_name, 'aten::nll_loss_backward') - self.assertEqual(profile.kernel_list_groupby_name_op[0].calls, 1) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].total_duration, 100) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].min_duration, 100) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].max_duration, 100) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - self.assertEqual(profile.kernel_stat.iloc[0]['sum'], 100) - self.assertEqual(profile.kernel_stat.iloc[0]['mean'], 100) - self.assertEqual(profile.kernel_stat.iloc[0]['min'], 100) - self.assertEqual(profile.kernel_stat.iloc[0]['max'], 100) - - # 2 steps without overlap with each other. - def test_multiple_profilersteps_no_overlap(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 100, "dur": 200, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "123", - "ts": 350, "dur": 150, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "123", - "ts": 360, "dur": 50, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 280, "dur": 40, - "args": {"stream": 7, "correlation": 334, "external id": 2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 13721, "tid": "123", - "ts": 250, "dur": 5, - "args": {"correlation": 334, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 410, "dur": 200, - "args": {"correlation": 40348, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "123", - "ts": 400, "dur": 5, - "args": {"correlation": 40348, "external id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertTrue(profile.has_runtime) - self.assertTrue(profile.has_kernel) - self.assertTrue(profile.has_memcpy_or_memset) - self.assertEqual(len(profile.steps_costs), 2) - step = profile.steps_costs[0] - self.assertEqual(step.costs[ProfileRole.Kernel], 0) - self.assertEqual(step.costs[ProfileRole.Memcpy], 40) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 5) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], 60 - 5) - self.assertEqual(step.costs[ProfileRole.Other], 200 - 60 - 20) - # Device side takes effect. - self.assertEqual(step.costs[ProfileRole.Total], 320 - 100) - step = profile.steps_costs[1] - self.assertEqual(step.costs[ProfileRole.Kernel], 200) - self.assertEqual(step.costs[ProfileRole.Memcpy], 0) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 5) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], 50 - 5) - self.assertEqual(step.costs[ProfileRole.Other], 360 - 350) - # Device side takes effect. - self.assertEqual(step.costs[ProfileRole.Total], 610 - 350) - self.assertEqual( - profile.avg_costs.costs[ProfileRole.Total], ((320 - 100) + (610 - 350)) / 2) - - self.assertEqual(len(profile.op_list_groupby_name), 2) - self.assertEqual(len(profile.op_list_groupby_name_input), 2) - - def test_op_list(op_list): - op_count = 0 - for op_agg in op_list: - if op_agg.name == 'aten::to': - op_count += 1 - self.assertEqual(op_agg.input_shape, - '[[2, 8, 5], [], [], [], [], [], [], []]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 60) - self.assertEqual(op_agg.device_duration, 40) - self.assertEqual(op_agg.self_host_duration, 60 - 5) - self.assertEqual(op_agg.self_device_duration, 40) - if op_agg.name == 'aten::mm': - op_count += 1 - self.assertEqual(op_agg.input_shape, '[]') - self.assertEqual(op_agg.calls, 1) - self.assertEqual(op_agg.host_duration, 50) - self.assertEqual(op_agg.device_duration, 200) - self.assertEqual(op_agg.self_host_duration, 50 - 5) - self.assertEqual(op_agg.self_device_duration, 200) - self.assertEqual(op_count, 2) - - test_op_list(profile.op_list_groupby_name) - test_op_list(profile.op_list_groupby_name_input) - - self.assertEqual(len(profile.kernel_list_groupby_name_op), 1) - self.assertEqual(profile.kernel_stat.shape[0], 1) - self.assertEqual(profile.kernel_list_groupby_name_op[0].name, - 'void cunn_ClassNLLCriterion_updateGradInput_kernel') - self.assertEqual( - profile.kernel_list_groupby_name_op[0].op_name, 'aten::mm') - self.assertEqual(profile.kernel_list_groupby_name_op[0].calls, 1) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].total_duration, 200) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].min_duration, 200) - self.assertEqual( - profile.kernel_list_groupby_name_op[0].max_duration, 200) - self.assertEqual(profile.kernel_stat.iloc[0]['count'], 1) - self.assertEqual(profile.kernel_stat.iloc[0]['sum'], 200) - self.assertEqual(profile.kernel_stat.iloc[0]['mean'], 200) - self.assertEqual(profile.kernel_stat.iloc[0]['min'], 200) - self.assertEqual(profile.kernel_stat.iloc[0]['max'], 200) - - # Test self time and total time on operator with nested operator. - def test_self_time(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 120, "dur": 40, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 155, "dur": 20, - "args": {"correlation": 334, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 150, "dur": 10, - "args": {"correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 210, "dur": 16, - "args": {"correlation": 335, "external id": 2, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 170, "dur": 25, - "args": {"correlation": 335, "external id": 2} - }] - """ - profile = parse_json_trace(json_content) - - op_count = 0 - for op_agg in profile.op_list_groupby_name: - if op_agg.name == 'aten::mat_mul': - op_count += 1 - self.assertEqual(op_agg.host_duration, 100) - self.assertEqual(op_agg.device_duration, 20 + 16) - self.assertEqual(op_agg.self_host_duration, 100 - 40 - 25) - self.assertEqual(op_agg.self_device_duration, 16) - if op_agg.name == 'aten::mm': - op_count += 1 - self.assertEqual(op_agg.host_duration, 40) - self.assertEqual(op_agg.device_duration, 20) - self.assertEqual(op_agg.self_host_duration, 30) - self.assertEqual(op_agg.self_device_duration, 20) - self.assertEqual(op_count, 2) - - # 2 steps with overlap with each other. - def test_multiple_profilersteps_with_overlap(self): - # The kernel with 'correlation' as 123 is launched by previous step, - # its end time is bigger than 'ProfilerStep#1''s start time, - # so it is regarded as beginning of 'ProfilerStep#1'. - # The memcpy with 'correlation' as 334 is launched by 'ProfilerStep#1', - # its end time is bigger than 'ProfilerStep#2''s start time, - # so it is regarded as beginning of 'ProfilerStep#2'. - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 100, "dur": 200, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "123", - "ts": 350, "dur": 150, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "123", - "ts": 360, "dur": 50, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 150, "dur": 90, - "args": {"correlation": 123, "external id": 0, "device": 0} - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 280, "dur": 100, - "args": {"stream": 7, "correlation": 334, "external id": 2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 13721, "tid": "123", - "ts": 250, "dur": 5, - "args": {"correlation": 334, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 410, "dur": 200, - "args": {"correlation": 40348, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "123", - "ts": 400, "dur": 5, - "args": {"correlation": 40348, "external id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertTrue(profile.has_runtime) - self.assertTrue(profile.has_kernel) - self.assertTrue(profile.has_memcpy_or_memset) - self.assertEqual(len(profile.steps_costs), 2) - step = profile.steps_costs[0] - self.assertEqual(step.costs[ProfileRole.Kernel], 0) - self.assertEqual(step.costs[ProfileRole.Memcpy], 100) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 5) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], - (200 + 60) - (150 + 90) - 5) - self.assertEqual(step.costs[ProfileRole.Other], 280 - (200 + 60)) - # Device side takes effect. - self.assertEqual(step.costs[ProfileRole.Total], - (280 + 100) - (150 + 90)) - step = profile.steps_costs[1] - self.assertEqual(step.costs[ProfileRole.Kernel], 200) - self.assertEqual(step.costs[ProfileRole.Memcpy], 0) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 5) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], - (280 + 100) - 360 + (410 - 405)) - self.assertEqual(step.costs[ProfileRole.Other], 0) - # Device side takes effect. - self.assertEqual(step.costs[ProfileRole.Total], 610 - (280 + 100)) - - # Test whether step time is calculated correctly when the last 2 steps have no kernels launched. - def test_last_steps_no_kernel(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 100, "dur": 200, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 120, "dur": 10, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "123", - "ts": 300, "dur": 100, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#3", "pid": 13721, "tid": "123", - "ts": 400, "dur": 50, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 90, "dur": 20, - "args": {"correlation": 123, "external id": 0, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 13721, "tid": "123", - "ts": 125, "dur": 5, - "args": {"correlation": 334, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 150, "dur": 180, - "args": {"correlation": 334, "external id": 2, "device": 0} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - # The last 2 steps without kernels are removed from overall view. - self.assertEqual(len(profile.steps_costs), 1) - step = profile.steps_costs[0] - self.assertEqual( - step.costs[ProfileRole.Total], (150 + 180) - (90 + 20)) - - def test_pure_cpu(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 100, "dur": 200, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 120, "dur": 10, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "123", - "ts": 300, "dur": 100, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "123", - "ts": 350, "dur": 40, - "args": {"Input Dims": [], "External id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertEqual(len(profile.steps_costs), 2) - step = profile.steps_costs[0] - self.assertEqual(step.costs[ProfileRole.Kernel], 0) - self.assertEqual(step.costs[ProfileRole.Memcpy], 0) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 0) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], 10) - self.assertEqual(step.costs[ProfileRole.Other], 200 - 10) - self.assertEqual(step.costs[ProfileRole.Total], 200) - step = profile.steps_costs[1] - self.assertEqual(step.costs[ProfileRole.Kernel], 0) - self.assertEqual(step.costs[ProfileRole.Memcpy], 0) - self.assertEqual(step.costs[ProfileRole.Memset], 0) - self.assertEqual(step.costs[ProfileRole.Runtime], 0) - self.assertEqual(step.costs[ProfileRole.DataLoader], 0) - self.assertEqual(step.costs[ProfileRole.CpuOp], 40) - self.assertEqual(step.costs[ProfileRole.Other], 100 - 40) - self.assertEqual(step.costs[ProfileRole.Total], 100) - - # Test GPU utilization, est. SM efficiency, and occupancy. - def test_gpu_utilization(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 120, "dur": 70, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 1, "tid": "stream 7", - "ts": 130, "dur": 10, - "args": {"correlation": 334, "external id": 4, "device": 1, - "blocks per SM": 0.5, "est. achieved occupancy %": 0.6} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 120, "dur": 0, - "args": {"correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr", "pid": 1, "tid": "stream 8", - "ts": 135, "dur": 15, - "args": {"correlation": 335, "external id": 2, "device": 1, - "blocks per SM": 0.6, "est. achieved occupancy %": 0.1} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void gemmSN_TN_kernel_64addr", "pid": 1, "tid": "stream 8", - "ts": 150, "dur": 0, - "args": {"correlation": 335, "external id": 2, "device": 1, - "blocks per SM": 0.3, "est. achieved occupancy %": 0.2} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 120, "dur": 0, - "args": {"correlation": 335, "external id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 1, "tid": "stream 7", - "ts": 145, "dur": 25, - "args": {"correlation": 336, "external id": 4, "device": 1, - "blocks per SM": 0.3, "est. achieved occupancy %": 1.0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 125, "dur": 3, - "args": {"correlation": 336, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 1, "tid": "stream 7", - "ts": 200, "dur": 20, - "args": {"correlation": 337, "external id": 2, "device": 1, - "blocks per SM": 10.5, "est. achieved occupancy %": 0.3} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 195, "dur": 1, - "args": {"correlation": 337, "external id": 2} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertEqual(len(profile.gpu_metrics_parser.gpu_ids), 1) - self.assertAlmostEqual(profile.gpu_metrics_parser.gpu_utilization[1], (40 + 20) / 120) - self.assertAlmostEqual(profile.gpu_metrics_parser.avg_approximated_sm_efficiency_per_device[1], - (0.5 * (135 - 130) - + 1.0 * (140 - 135) - + 0.6 * (145 - 140) - + 0.9 * (150 - 145) - + 0.3 * (170 - 150) - + 1.0 * (220 - 200)) / (220 - 100)) - self.assertAlmostEqual(profile.gpu_metrics_parser.avg_occupancy_per_device[1], - (0.6 * 10 + 0.1 * 15 + 1.0 * 25 + 0.3 * 20) / (10 + 15 + 25 + 20)) - - gpu_util_expected = [(100, 0), (110, 0), (120, 0), (130, 1.0), (140, 1.0), (150, 1.0), (160, 1.0), - (170, 0), (180, 0), (190, 0), (200, 1.0), (210, 1.0), (220, 0)] - for gpu_id in profile.gpu_metrics_parser.gpu_ids: - buckets = profile.gpu_metrics_parser.gpu_util_buckets[gpu_id] - gpu_util_id = 0 - for b in buckets: - self.assertEqual(b[0], gpu_util_expected[gpu_util_id][0]) - self.assertAlmostEqual(b[1], gpu_util_expected[gpu_util_id][1]) - gpu_util_id += 1 - self.assertEqual(gpu_util_id, len(gpu_util_expected)) - - sm_efficiency_expected = [(130, 0.5), (135, 0), (135, 1.0), (140, 0), (140, 0.6), (145, 0), (145, 0.9), - (150, 0), (150, 0.3), (170, 0), (170, 0), (200, 0), (200, 1.0), (220, 0)] - for gpu_id in profile.gpu_metrics_parser.gpu_ids: - ranges = profile.gpu_metrics_parser.approximated_sm_efficiency_ranges[gpu_id] - sm_efficiency_id = 0 - for r in ranges: - self.assertEqual( - r[0], sm_efficiency_expected[sm_efficiency_id][0]) - self.assertAlmostEqual( - r[2], sm_efficiency_expected[sm_efficiency_id][1]) - sm_efficiency_id += 1 - self.assertEqual( - r[1], sm_efficiency_expected[sm_efficiency_id][0]) - self.assertAlmostEqual( - 0, sm_efficiency_expected[sm_efficiency_id][1]) - sm_efficiency_id += 1 - self.assertEqual(sm_efficiency_id, len(sm_efficiency_expected)) - - count = 0 - for agg_by_op in profile.kernel_list_groupby_name_op: - if agg_by_op.name == 'void gemmSN_TN_kernel_64addr' and agg_by_op.op_name == 'aten::mat_mul': - self.assertAlmostEqual(agg_by_op.avg_blocks_per_sm, 0.6) - self.assertAlmostEqual(agg_by_op.avg_occupancy, 0.1) - count += 1 - if agg_by_op.name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel' and \ - agg_by_op.op_name == 'aten::mm': - self.assertAlmostEqual( - agg_by_op.avg_blocks_per_sm, (0.5 * 10 + 0.3 * 25) / (10 + 25)) - self.assertAlmostEqual( - agg_by_op.avg_occupancy, (0.6 * 10 + 1.0 * 25) / (10 + 25)) - count += 1 - if agg_by_op.name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel' and \ - agg_by_op.op_name == 'aten::mat_mul': - self.assertAlmostEqual(agg_by_op.avg_blocks_per_sm, 10.5) - self.assertAlmostEqual(agg_by_op.avg_occupancy, 0.3) - count += 1 - self.assertEqual(count, 3) - - count = 0 - for _id, (name, row) in enumerate(profile.kernel_stat.iterrows()): - # The kernel with zero 'dur' should be ignored. - if name == 'void gemmSN_TN_kernel_64addr': - self.assertAlmostEqual(row['blocks_per_sm'], 0.6) - self.assertAlmostEqual(row['occupancy'], 0.1) - count += 1 - if name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel': - self.assertAlmostEqual( - row['blocks_per_sm'], (0.5 * 10 + 0.3 * 25 + 10.5 * 20) / (10 + 25 + 20)) - self.assertAlmostEqual( - row['occupancy'], (0.6 * 10 + 1.0 * 25 + 0.3 * 20) / (10 + 25 + 20)) - count += 1 - self.assertEqual(count, 2) - - # Test GPU utilization 3 metrics works fine if kernel out of ProfilerStep. - def test_gpu_utilization_kernel_out_of_step(self): - json_content = """ - [{ - "ph": "X", "cat": "Operator", - "name": "aten::mat_mul", "pid": 13721, "tid": "456", - "ts": 10, "dur": 10, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 120, "dur": 70, - "args": {"Input Dims": [], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::mm", "pid": 13721, "tid": "456", - "ts": 220, "dur": 20, - "args": {"Input Dims": [], "External id": 4} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "456", - "ts": 100, "dur": 100, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 1, "tid": "stream 7", - "ts": 60, "dur": 20, - "args": {"correlation": 334, "external id": 1, "device": 1, - "blocks per SM": 0.5, "est. achieved occupancy %": 0.6} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 15, "dur": 5, - "args": {"correlation": 334, "external id": 1} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 1, "tid": "stream 7", - "ts": 240, "dur": 25, - "args": {"correlation": 337, "external id": 4, "device": 1, - "blocks per SM": 10.5, "est. achieved occupancy %": 0.3} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 230, "dur": 10, - "args": {"correlation": 337, "external id": 4} - }] - """ - profile = parse_json_trace(json_content) - profile.process() - - self.assertEqual(len(profile.gpu_metrics_parser.gpu_ids), 1) - self.assertAlmostEqual(profile.gpu_metrics_parser.gpu_utilization[1], 0.0) - self.assertTrue(profile.gpu_metrics_parser.avg_approximated_sm_efficiency_per_device[1] is None) - self.assertTrue(profile.gpu_metrics_parser.avg_occupancy_per_device[1] is None) - self.assertTrue(profile.gpu_metrics_parser.blocks_per_sm_count[1] > 0) - self.assertTrue(profile.gpu_metrics_parser.occupancy_count[1] > 0) - - count = 0 - for agg_by_op in profile.kernel_list_groupby_name_op: - if agg_by_op.name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel' \ - and agg_by_op.op_name == 'aten::mat_mul': - self.assertAlmostEqual(agg_by_op.avg_blocks_per_sm, 0.5) - self.assertAlmostEqual(agg_by_op.avg_occupancy, 0.6) - count += 1 - if agg_by_op.name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel' and \ - agg_by_op.op_name == 'aten::mm': - self.assertAlmostEqual( - agg_by_op.avg_blocks_per_sm, 10.5) - self.assertAlmostEqual( - agg_by_op.avg_occupancy, 0.3) - count += 1 - self.assertEqual(count, 2) - - count = 0 - for _id, (name, row) in enumerate(profile.kernel_stat.iterrows()): - # The kernel with zero 'dur' should be ignored. - if name == 'void cunn_ClassNLLCriterion_updateGradInput_kernel': - self.assertAlmostEqual(row['blocks_per_sm'], (20 * 0.5 + 25 * 10.5) / (20 + 25)) - self.assertAlmostEqual(row['occupancy'], (20 * 0.6 + 25 * 0.3) / (20 + 25)) - count += 1 - self.assertEqual(count, 1) - - def test_dump_gpu_metrics(self): - profile = RunProfile('test_dump_gpu_metrics', None) - # Faked data for easy to see in UI. Real data values are 1/100 of these. - gpu_util_buckets = [[(1621401187223005, 0.0), (1621401187224005, 0.0), - (1621401187225005, 0.6), (1621401187226005, 0.5), - (1621401187227005, 0.6), (1621401187228005, 0.2), - (1621401187229005, 0.6), (1621401187230005, 0.1), - (1621401187231005, 0.5), (1621401187232005, 0.2), - (1621401187233005, 0.3), (1621401187234005, 0.4), - (1621401187235005, 0.4219409282700422), - (1621401187236901, 0)]] - # Faked data for easy to see in UI. Real data values are 1/10 of these. - approximated_sm_efficiency_ranges = \ - [[(1621401187225275, 1621401187225278, 0.25), (1621401187225530, 1621401187225532, 0.125), - (1621401187225820, 1621401187225821, 0.125), (1621401187226325, 1621401187226327, 0.25), - (1621401187226575, 1621401187226577, 0.125), (1621401187226912, 1621401187226913, 0.125), - (1621401187227092, 1621401187227094, 0.125), (1621401187227619, 1621401187227620, 0.125), - (1621401187227745, 1621401187227746, 0.125), (1621401187227859, 1621401187227860, 0.125), - (1621401187227973, 1621401187227974, 0.125), (1621401187228279, 1621401187228280, 0.125), - (1621401187228962, 1621401187228963, 0.125), (1621401187229153, 1621401187229155, 0.125), - (1621401187229711, 1621401187229715, 0.125), (1621401187230162, 1621401187230163, 0.125), - (1621401187231100, 1621401187231103, 0.125), (1621401187231692, 1621401187231694, 0.5), - (1621401187232603, 1621401187232604, 0.125), (1621401187232921, 1621401187232922, 0.125), - (1621401187233342, 1621401187233343, 0.125), (1621401187233770, 1621401187233772, 0.125), - (1621401187234156, 1621401187234159, 0.125), (1621401187234445, 1621401187234446, 0.125), - (1621401187235025, 1621401187235028, 0.125), (1621401187235555, 1621401187235556, 0.125), - (1621401187236158, 1621401187236159, 0.125), (1621401187236278, 1621401187236279, 0.125), - (1621401187236390, 1621401187236391, 0.125), (1621401187236501, 1621401187236502, 0.125)]] - - basedir = os.path.dirname(os.path.realpath(__file__)) - trace_json_flat_path = os.path.join(basedir, 'gpu_metrics_input.json') - gpu_metrics_parser = GPUMetricsParser() - gpu_metrics_parser.gpu_util_buckets = gpu_util_buckets - gpu_metrics_parser.approximated_sm_efficiency_ranges = approximated_sm_efficiency_ranges - profile.gpu_metrics = gpu_metrics_parser.get_gpu_metrics() - with open(trace_json_flat_path, 'rb') as file: - raw_data = file.read() - data_with_gpu_metrics_compressed = profile.append_gpu_metrics(raw_data) - data_with_gpu_metrics_flat = gzip.decompress( - data_with_gpu_metrics_compressed) - - trace_json_expected_path = os.path.join(basedir, 'gpu_metrics_expected.json') - with open(trace_json_expected_path, 'rb') as file: - data_expected = file.read() - - # Parse to json in order to ignore text format difference. - data_with_gpu_metrics_json = json.loads( - data_with_gpu_metrics_flat.decode('utf8')) - data_expected_json = json.loads(data_expected.decode('utf8')) - data_with_gpu_metrics_str = json.dumps( - data_with_gpu_metrics_json, sort_keys=True) - data_expected_str = json.dumps(data_expected_json, sort_keys=True) - - self.assertEqual(data_with_gpu_metrics_str, data_expected_str) - - try: - _ = json.loads(data_with_gpu_metrics_flat.decode('utf8')) - except Exception: - self.assertTrue( - False, 'The string fails to be parsed by json after appending gpu metrics.') - - def test_memory_view(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 10, "dur": 10, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "enumerate(DataLoader)#_SingleProcessDataLoaderIter.__next__", "pid": 13721, "tid": "123", - "ts": 100, "dur": 180, - "args": {"Input Dims": [], "External id": 2} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::nll_loss_backward", "pid": 13721, "tid": "123", - "ts": 340, "dur": 70, - "args": {"Input Dims": [[], [32, 1000], [32], [], [], [], []], "External id": 4} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#1", "pid": 13721, "tid": "123", - "ts": 50, "dur": 400, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "ProfilerStep#2", "pid": 13721, "tid": "123", - "ts": 500, "dur": 500, - "args": {"Input Dims": [], "External id": 1} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 510, "dur": 150, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::copy_", "pid": 13721, "tid": "123", - "ts": 520, "dur": 100, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - - { - "ph": "X", "cat": "Operator", - "name": "aten::liner", "pid": 13721, "tid": "123", - "ts": 700, "dur": 100, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::t", "pid": 13721, "tid": "123", - "ts": 705, "dur": 40, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::transpose", "pid": 13721, "tid": "123", - "ts": 710, "dur": 30, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::tranas_stride", "pid": 13721, "tid": "123", - "ts": 720, "dur": 10, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::addmm", "pid": 13721, "tid": "123", - "ts": 750, "dur": 40, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::to", "pid": 13721, "tid": "123", - "ts": 900, "dur": 100, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Memcpy", - "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": "stream 7", - "ts": 405, "dur": 10, - "args": {"stream": 7, "correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemcpyAsync", "pid": 13721, "tid": "456", - "ts": 360, "dur": 20, - "args": {"correlation": 334, "external id": 4} - }, - { - "ph": "X", "cat": "Memset", - "name": "Memset (Device)", "pid": 0, "tid": "stream 7", - "ts": 420, "dur": 5, - "args": {"stream": 7, "correlation": 40344, "external id": 4} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaMemsetAsync", "pid": 13721, "tid": "456", - "ts": 390, "dur": 10, - "args": {"correlation": 40344, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void cunn_ClassNLLCriterion_updateGradInput_kernel", "pid": 0, "tid": "stream 7", - "ts": 430, "dur": 15, - "args": {"correlation": 40348, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 405, "dur": 5, - "args": {"correlation": 40348, "external id": 4} - }, - - - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 90, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 90, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 150, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 150, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 200, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 200, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 210, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 210, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 265, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 265, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 300, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 300, "Bytes": 4 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 350, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 350, "Bytes": 10 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 360, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 350, "Bytes": -10 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 450, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 450, "Bytes": 1000000 - } - }, - - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 515, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 515, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 520, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 520, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 600, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 520, "Bytes": -100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 690, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 690, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 701, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 701, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 796, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 515, "Bytes": -100 - } - }, - - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 708, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 708, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 742, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 708, "Bytes": -100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 715, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 715, "Bytes": 50 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 735, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 715, "Bytes": -50 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 725, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 725, "Bytes": 50 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 728, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 725, "Bytes": -50 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 729, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 729, "Bytes": 50 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 746, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 746, "Bytes": 100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 747, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 747, "Bytes": 20 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 749, - "args": { - "Device Type": 0, "Device Id": -1, "Addr": 690, "Bytes": -100 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 760, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 760, "Bytes": 30 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 780, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 760, "Bytes": -30 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 795, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 795, "Bytes": 10 - } - }, - { - "ph": "i", "s": "t", "name": "[memory]", - "pid": 13721, "tid": 123, - "ts": 799, - "args": { - "Device Type": 1, "Device Id": 0, "Addr": 795, "Bytes": -10 - } - } - ] - """ - import logging - - from torch_tb_profiler.utils import get_logger - logger = get_logger() - logger.addHandler(logging.StreamHandler()) - - profile = parse_json_trace(json_content) - profile.process() - memory_stats = profile.memory_snapshot.get_memory_statistics(profile.tid2tree) - - self.assertEqual(len(memory_stats), 2) - self.assertIn('GPU0', memory_stats) - - # validation - gpu_expected_data = { - # self increase size, self allocation size, self allocation count, increase size, allocation size, allocation count, call # noqa: E501 - 'aten::to': [104, 104, 2, 104, 204, 3, 4], - 'aten::nll_loss_backward': [0, 10, 1, 0, 10, 1, 1], - 'aten::copy_': [0, 100, 1, 0, 100, 1, 1], - 'aten::addmm': [0, 30, 1, 0, 30, 1, 1], - 'aten::tranas_stride': [0, 50, 1, 0, 50, 1, 1], - 'aten::transpose': [0, 50, 1, 0, 100, 2, 1], - 'aten::t': [0, 100, 1, 0, 200, 3, 1], - 'aten::liner': [20, 130, 3, 20, 360, 7, 1] - } - - cpu_expected_data = { - 'aten::to': [4, 4, 1, 4, 4, 1, 4], - 'aten::liner': [0, 100, 1, 50, 150, 2, 1], - 'aten::tranas_stride': [50, 50, 1, 50, 50, 1, 1], - 'aten::transpose': [0, 0, 0, 50, 50, 1, 1], - 'aten::t': [0, 0, 0, 50, 50, 1, 1] - } - - validate_data = [ - (memory_stats['CPU'], cpu_expected_data), - (memory_stats['GPU0'], gpu_expected_data) - ] - for (mem_stat, expected_data) in validate_data: - for name, values in expected_data.items(): - self.assertEqual(mem_stat[name], values) - - # Test group by 'kernel detail + op name'. - def test_group_by_kernel_columns(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "op1", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "op2", "pid": 13721, "tid": "456", - "ts": 340, "dur": 70, - "args": {"Input Dims": [[], [32, 1000], [32], [], [], [], []], "External id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 230, "dur": 15, - "args": {"correlation": 1000, "external id": 3, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 16], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 210, "dur": 5, - "args": {"correlation": 1000, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 250, "dur": 10, - "args": {"correlation": 1001, "external id": 3, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 16], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 215, "dur": 5, - "args": {"correlation": 1001, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 250, "dur": 13, - "args": {"correlation": 1002, "external id": 3, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 64], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 220, "dur": 5, - "args": {"correlation": 1002, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 250, "dur": 17, - "args": {"correlation": 1003, "external id": 4, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 64], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 350, "dur": 5, - "args": {"correlation": 1003, "external id": 4} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - expected_agg_kernels = [ - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': '[16, 1, 1]', - 'block': '[16, 16, 16]', - 'registers per thread': 18, - 'shared memory': 0, - 'calls': 2, - 'total_duration': 15 + 10, - 'avg_duration': (15 + 10) / 2, - 'min_duration': min(15, 10), - 'max_duration': max(15, 10) - }, - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': '[16, 1, 1]', - 'block': '[16, 16, 64]', # Only changed this. - 'registers per thread': 18, - 'shared memory': 0, - 'calls': 1, - 'total_duration': 13, - 'avg_duration': 13, - 'min_duration': 13, - 'max_duration': 13 - }, - { - 'name': 'kernel1', - 'op_name': 'op2', # Only changed this. - 'grid': '[16, 1, 1]', - 'block': '[16, 16, 64]', - 'registers per thread': 18, - 'shared memory': 0, - 'calls': 1, - 'total_duration': 17, - 'avg_duration': 17, - 'min_duration': 17, - 'max_duration': 17 - } - ] - index = 0 - self.assertEqual(len(profile.kernel_list_groupby_name_op), len(expected_agg_kernels)) - for agg_kernel in profile.kernel_list_groupby_name_op: - expected_agg_kernel = expected_agg_kernels[index] - self.assertEqual(agg_kernel.name, expected_agg_kernel['name']) - self.assertEqual(agg_kernel.op_name, expected_agg_kernel['op_name']) - self.assertEqual(str(agg_kernel.grid), expected_agg_kernel['grid']) - self.assertEqual(str(agg_kernel.block), expected_agg_kernel['block']) - self.assertEqual(agg_kernel.regs_per_thread, expected_agg_kernel['registers per thread']) - self.assertEqual(agg_kernel.shared_memory, expected_agg_kernel['shared memory']) - self.assertEqual(agg_kernel.calls, expected_agg_kernel['calls']) - self.assertEqual(agg_kernel.total_duration, expected_agg_kernel['total_duration']) - self.assertAlmostEqual(agg_kernel.avg_duration, expected_agg_kernel['avg_duration']) - self.assertEqual(agg_kernel.min_duration, expected_agg_kernel['min_duration']) - self.assertEqual(agg_kernel.max_duration, expected_agg_kernel['max_duration']) - index += 1 - - # Test group by 'kernel detail + op name' with invalid input lack of some kernel field - def test_group_by_kernel_columns_invalid_input(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "op1", "pid": 13721, "tid": "123", - "ts": 200, "dur": 60, - "args": {"Input Dims": [[2, 8, 5], [], [], [], [], [], [], []], "External id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 220, "dur": 1, - "args": {"correlation": 1000, "external id": 3, "device": 0, - "block": [16, 16, 16], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 210, "dur": 5, - "args": {"correlation": 1000, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 230, "dur": 2, - "args": {"correlation": 1001, "external id": 3, "device": 0, - "grid": [16, 1, 1], "registers per thread": 18, "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 220, "dur": 5, - "args": {"correlation": 1001, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 240, "dur": 3, - "args": {"correlation": 1002, "external id": 3, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 16], "shared memory": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 230, "dur": 5, - "args": {"correlation": 1002, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 250, "dur": 4, - "args": {"correlation": 1003, "external id": 3, "device": 0, - "grid": [16, 1, 1], "block": [16, 16, 16], "registers per thread": 18} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 240, "dur": 5, - "args": {"correlation": 1003, "external id": 3} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel1", "pid": 0, "tid": "stream 7", - "ts": 260, "dur": 5, - "args": {"correlation": 1004, "external id": 3, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "456", - "ts": 250, "dur": 5, - "args": {"correlation": 1004, "external id": 3} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - expected_agg_kernels = [ - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': None, - 'block': [16, 16, 16], - 'registers per thread': 18, - 'shared memory': 0, - 'calls': 1, - 'total_duration': 1, - 'avg_duration': 1, - 'min_duration': 1, - 'max_duration': 1 - }, - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': [16, 1, 1], - 'block': None, - 'registers per thread': 18, - 'shared memory': 0, - 'calls': 1, - 'total_duration': 2, - 'avg_duration': 2, - 'min_duration': 2, - 'max_duration': 2 - }, - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': [16, 1, 1], - 'block': [16, 16, 16], - 'registers per thread': None, - 'shared memory': 0, - 'calls': 1, - 'total_duration': 3, - 'avg_duration': 3, - 'min_duration': 3, - 'max_duration': 3 - }, - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': [16, 1, 1], - 'block': [16, 16, 16], - 'registers per thread': 18, - 'shared memory': None, - 'calls': 1, - 'total_duration': 4, - 'avg_duration': 4, - 'min_duration': 4, - 'max_duration': 4 - }, - { - 'name': 'kernel1', - 'op_name': 'op1', - 'grid': None, - 'block': None, - 'registers per thread': None, - 'shared memory': None, - 'calls': 1, - 'total_duration': 5, - 'avg_duration': 5, - 'min_duration': 5, - 'max_duration': 5 - } - ] - index = 0 - self.assertEqual(len(profile.kernel_list_groupby_name_op), len(expected_agg_kernels)) - for agg_kernel in profile.kernel_list_groupby_name_op: - expected_agg_kernel = expected_agg_kernels[index] - self.assertEqual(agg_kernel.name, expected_agg_kernel['name']) - self.assertEqual(agg_kernel.op_name, expected_agg_kernel['op_name']) - self.assertEqual(agg_kernel.grid, expected_agg_kernel['grid']) - self.assertEqual(agg_kernel.block, expected_agg_kernel['block']) - self.assertEqual(agg_kernel.regs_per_thread, expected_agg_kernel['registers per thread']) - print(agg_kernel.name, agg_kernel.grid, agg_kernel.block, agg_kernel.shared_memory) - self.assertEqual(agg_kernel.shared_memory, expected_agg_kernel['shared memory']) - self.assertEqual(agg_kernel.calls, expected_agg_kernel['calls']) - self.assertEqual(agg_kernel.total_duration, expected_agg_kernel['total_duration']) - self.assertAlmostEqual(agg_kernel.avg_duration, expected_agg_kernel['avg_duration']) - self.assertEqual(agg_kernel.min_duration, expected_agg_kernel['min_duration']) - self.assertEqual(agg_kernel.max_duration, expected_agg_kernel['max_duration']) - index += 1 - - # Test tensor core related feature. - def test_tensor_core(self): - json_content = """[ - { - "ph": "X", "cat": "Operator", - "name": "aten::conv2d", "pid": 13721, "tid": "123", - "ts": 200, "dur": 100, - "args": {"Input Dims": [[]], "External id": 3} - }, - { - "ph": "X", "cat": "Operator", - "name": "op_no_tc", "pid": 13721, "tid": "123", - "ts": 205, "dur": 10, - "args": {"Input Dims": [[]], "External id": 4} - }, - { - "ph": "X", "cat": "Operator", - "name": "aten::cudnn_convolution", "pid": 13721, "tid": "123", - "ts": 215, "dur": 10, - "args": {"Input Dims": [[]], "External id": 5} - }, - { - "ph": "X", "cat": "Kernel", - "name": "kernel_no_tc", "pid": 0, "tid": "stream 7", - "ts": 210, "dur": 10, - "args": {"correlation": 1000, "external id": 4, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "123", - "ts": 205, "dur": 5, - "args": {"correlation": 1000, "external id": 4} - }, - { - "ph": "X", "cat": "Kernel", - "name": "volta_fp16_s884cudnn_fp16_128x128_ldg8_splitK_relu_f2f_exp_small_nhwc_tn_v1", - "pid": 0, "tid": "stream 7", - "ts": 220, "dur": 15, - "args": {"correlation": 1001, "external id": 5, "device": 0} - }, - { - "ph": "X", "cat": "Runtime", - "name": "cudaLaunchKernel", "pid": 13721, "tid": "123", - "ts": 215, "dur": 5, - "args": {"correlation": 1001, "external id": 5} - } - ] - """ - profile = parse_json_trace(json_content) - profile.process() - - expected_agg_ops = { - 'aten::conv2d': { - 'tc_eligible': True, - 'tc_self_ratio': 0, - 'tc_total_ratio': 15 / (15 + 10) - }, - 'op_no_tc': { - 'tc_eligible': False, - 'tc_self_ratio': 0, - 'tc_total_ratio': 0 - }, - 'aten::cudnn_convolution': { - 'tc_eligible': True, - 'tc_self_ratio': 1.0, - 'tc_total_ratio': 1.0 - } - } - self.assertEqual(len(profile.op_list_groupby_name), len(expected_agg_ops)) - for agg_op in profile.op_list_groupby_name: - expected_agg_op = expected_agg_ops[agg_op.name] - self.assertEqual(agg_op.tc_eligible, expected_agg_op['tc_eligible']) - self.assertAlmostEqual(agg_op.tc_self_ratio, expected_agg_op['tc_self_ratio']) - self.assertAlmostEqual(agg_op.tc_total_ratio, expected_agg_op['tc_total_ratio']) - - expected_kernels_groupby_op = { - 'kernel_no_tc': { - 'op_name': 'op_no_tc', - 'tc_used': False, - 'op_tc_eligible': False - }, - 'volta_fp16_s884cudnn_fp16_128x128_ldg8_splitK_relu_f2f_exp_small_nhwc_tn_v1': { - 'op_name': 'aten::cudnn_convolution', - 'tc_used': True, - 'op_tc_eligible': True - } - } - self.assertEqual(len(profile.kernel_list_groupby_name_op), len(expected_kernels_groupby_op)) - for agg_kernel in profile.kernel_list_groupby_name_op: - expected_agg_kernel = expected_kernels_groupby_op[agg_kernel.name] - self.assertEqual(agg_kernel.op_name, expected_agg_kernel['op_name']) - self.assertEqual(agg_kernel.tc_used, expected_agg_kernel['tc_used']) - self.assertEqual(agg_kernel.op_tc_eligible, expected_agg_kernel['op_tc_eligible']) - - self.assertAlmostEqual(profile.tc_ratio[0], 15 / (15 + 10)) - self.assertAlmostEqual(profile.tc_eligible_ops_kernel_ratio, 15 / (15 + 10)) - - -class TestDistributed(unittest.TestCase): - - def test_distributed_nccl(self): - json_content0 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "nccl:broadcast", "pid": 23803, "tid": "23803", - "ts": 0, "dur": 75, - "args": {"External id": 146, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclKernel_Broadcast_RING_LL_Sum_int8_t(ncclWorkElem)", "pid": 0, "tid": "stream 16", - "ts": 16, "dur": 16, - "args": {"device": 0, "correlation": 28506, "external id": 146} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23803", - "ts": 100, "dur": 20, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel", "pid": 0, "tid": "stream 7", - "ts": 130, "dur": 161, - "args": {"device": 0, "correlation": 99765, "external id": 24504} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "nccl:all_reduce", "pid": 23803, "tid": "25166", - "ts": 160, "dur": 75, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclKernel_AllReduce_RING_LL_Sum_float(ncclWorkElem)", "pid": 0, "tid": "stream 16", - "ts": 162, "dur": 1556, - "args": {"device": 0, "correlation": 33218, "external id": 2513} - } - ] - """ - json_content1 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "nccl:broadcast", "pid": 23803, "tid": "23803", - "ts": 0, "dur": 20, - "args": {"External id": 146, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclKernel_Broadcast_RING_LL_Sum_int8_t(ncclWorkElem)", "pid": 0, "tid": "stream 16", - "ts": 8, "dur": 31, - "args": {"device": 0, "correlation": 28506, "external id": 146} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23803", - "ts": 25, "dur": 20, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel", "pid": 0, "tid": "stream 7", - "ts": 30, "dur": 161, - "args": {"device": 0, "correlation": 99765, "external id": 24504} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "nccl:all_reduce", "pid": 23803, "tid": "25166", - "ts": 160, "dur": 75, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "ncclKernel_AllReduce_RING_LL_Sum_float(ncclWorkElem)", "pid": 0, "tid": "stream 16", - "ts": 562, "dur": 1058, - "args": {"device": 0, "correlation": 33218, "external id": 2513} - } - ] - """ - - profile0 = parse_json_trace(json_content0, 'worker0') - dist_data0 = DistributedRunProfileData(profile0) - self.assertTrue(profile0.has_communication) - self.assertEqual(len(profile0.comm_node_list), 2) - self.assertEqual(profile0.steps_costs[0].costs, [105, 0, 0, 16, 0, 0, 79, 35, 235]) - - profile1 = parse_json_trace(json_content1, 'worker1') - dist_data1 = DistributedRunProfileData(profile1) - self.assertTrue(profile1.has_communication) - self.assertEqual(len(profile1.comm_node_list), 2) - self.assertEqual(profile1.steps_costs[0].costs[3], 22) - - loader = RunLoader('test_nccl', '', None) - dist_profile = loader._process_distributed_profiles([dist_data0, dist_data1], 0) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker0'], [32, 73, 16, 114]) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker1'], [152, 9, 22, 52]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker0'], [1074, 498]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker1'], [1074, 15]) - self.assertEqual(dist_profile.comm_ops['data']['worker0']['rows'], - [['nccl:broadcast', 1, 212480, 212480, 16, 16, 16, 16], - ['nccl:all_reduce', 1, 8196000, 8196000, 1556, 1556, 1058, 1058]]) - self.assertEqual(dist_profile.comm_ops['data']['worker1']['rows'], - [['nccl:broadcast', 1, 212480, 212480, 31, 31, 16, 16], - ['nccl:all_reduce', 1, 8196000, 8196000, 1058, 1058, 1058, 1058]]) - - def test_distributed_gloo_gpu(self): - json_content0 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 16, "dur": 38, - "args": {"External id": 165, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23805", - "ts": 25, "dur": 36, - "args": {"External id": 166, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 66, "dur": 18, - "args": {"External id": 167, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23800", - "ts": 0, "dur": 20, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel", "pid": 0, "tid": "stream 7", - "ts": 30, "dur": 101, - "args": {"device": 0, "correlation": 99765, "external id": 24504} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23805", - "ts": 110, "dur": 18, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23803", - "ts": 120, "dur": 36, - "args": {"External id": 2516, "Input Dims": [[2049000]], "Input type": ["float"]} - } - ] - """ - json_content1 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 20, "dur": 28, - "args": {"External id": 256, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23805", - "ts": 28, "dur": 30, - "args": {"External id": 257, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 77, "dur": 6, - "args": {"External id": 258, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23800", - "ts": 0, "dur": 30, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "Kernel", - "name": "void at::native::vectorized_elementwise_kernel", "pid": 0, "tid": "stream 7", - "ts": 70, "dur": 70, - "args": {"device": 0, "correlation": 99765, "external id": 24504} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23805", - "ts": 88, "dur": 38, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23803", - "ts": 130, "dur": 16, - "args": {"External id": 2516, "Input Dims": [[2049000]], "Input type": ["float"]} - } - ] - """ - - profile0 = parse_json_trace(json_content0, 'worker0') - dist_data0 = DistributedRunProfileData(profile0) - self.assertTrue(profile0.has_communication) - self.assertEqual(len(profile0.comm_node_list), 5) - self.assertEqual(profile0.steps_costs[0].costs, [101, 0, 0, 39, 0, 0, 16, 0, 156]) - - profile1 = parse_json_trace(json_content1, 'worker1') - dist_data1 = DistributedRunProfileData(profile1) - self.assertTrue(profile1.has_communication) - self.assertEqual(len(profile1.comm_node_list), 5) - self.assertEqual(profile1.steps_costs[0].costs, [70, 0, 0, 44, 0, 0, 20, 12, 146]) - - loader = RunLoader('test_gloo_gpu', '', None) - dist_profile = loader._process_distributed_profiles([dist_data0, dist_data1], 0) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker0'], [31, 70, 39, 16]) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker1'], [16, 54, 44, 32]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker0'], [75, 34]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker1'], [78, 20]) - self.assertEqual(dist_profile.comm_ops['data']['worker0']['rows'], - [['gloo:broadcast', 3, 637440, 212480, 63, 21, 41, 14], - ['gloo:all_reduce', 2, 16392000, 8196000, 46, 23, 34, 17]]) - self.assertEqual(dist_profile.comm_ops['data']['worker1']['rows'], - [['gloo:broadcast', 3, 637440, 212480, 44, 15, 44, 15], - ['gloo:all_reduce', 2, 16392000, 8196000, 54, 27, 34, 17]]) - - def test_distributed_gloo_cpu(self): - json_content0 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 16, "dur": 38, - "args": {"External id": 165, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23805", - "ts": 25, "dur": 36, - "args": {"External id": 166, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 66, "dur": 18, - "args": {"External id": 167, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23800", - "ts": 0, "dur": 20, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::mul", "pid": 23803, "tid": "23800", - "ts": 30, "dur": 101, - "args": {"External id": 24505} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23805", - "ts": 110, "dur": 18, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23803", - "ts": 120, "dur": 36, - "args": {"External id": 2516, "Input Dims": [[2049000]], "Input type": ["float"]} - } - ] - """ - json_content1 = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 20, "dur": 28, - "args": {"External id": 256, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23805", - "ts": 28, "dur": 30, - "args": {"External id": 257, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:broadcast", "pid": 23803, "tid": "23803", - "ts": 77, "dur": 6, - "args": {"External id": 258, "Input Dims": [[53120]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::add_", "pid": 23803, "tid": "23800", - "ts": 0, "dur": 30, - "args": {"External id": 24504, "Input Dims": [[1000], [1000], []], "Input type": ["float", "float", "Int"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "aten::mul", "pid": 23803, "tid": "23800", - "ts": 70, "dur": 70, - "args": {"External id": 24505} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23805", - "ts": 88, "dur": 38, - "args": {"External id": 2513, "Input Dims": [[2049000]], "Input type": ["float"]} - }, - { - "ph": "X", "cat": "cpu_op", - "name": "gloo:all_reduce", "pid": 23803, "tid": "23803", - "ts": 130, "dur": 16, - "args": {"External id": 2516, "Input Dims": [[2049000]], "Input type": ["float"]} - } - ] - """ - - profile0 = parse_json_trace(json_content0, 'worker0') - dist_data0 = DistributedRunProfileData(profile0) - self.assertTrue(profile0.has_communication) - self.assertEqual(len(profile0.comm_node_list), 5) - self.assertEqual(profile0.steps_costs[0].costs, [0, 0, 0, 109, 0, 0, 47, 0, 156]) - - profile1 = parse_json_trace(json_content1, 'worker1') - dist_data1 = DistributedRunProfileData(profile1) - self.assertTrue(profile1.has_communication) - self.assertEqual(len(profile1.comm_node_list), 5) - self.assertEqual(profile1.steps_costs[0].costs, [0, 0, 0, 98, 0, 0, 36, 12, 146]) - - loader = RunLoader('test_gloo_cpu', '', None) - dist_profile = loader._process_distributed_profiles([dist_data0, dist_data1], 0) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker0'], [47, 74, 35, 0]) - self.assertEqual(dist_profile.steps_to_overlap['data']['0']['worker1'], [36, 64, 34, 12]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker0'], [75, 34]) - self.assertEqual(dist_profile.steps_to_wait['data']['0']['worker1'], [78, 20]) - self.assertEqual(dist_profile.comm_ops['data']['worker0']['rows'], - [['gloo:broadcast', 3, 637440, 212480, 63, 21, 41, 14], - ['gloo:all_reduce', 2, 16392000, 8196000, 46, 23, 34, 17]]) - self.assertEqual(dist_profile.comm_ops['data']['worker1']['rows'], - [['gloo:broadcast', 3, 637440, 212480, 44, 15, 44, 15], - ['gloo:all_reduce', 2, 16392000, 8196000, 54, 27, 34, 17]]) - - -class TestMemoryCurve(unittest.TestCase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.event_data_cpu = [ - [1, 0, 0, 1, 4, 4, 0], # alloc 1 - [20, 0, 0, 1, -4, 0, 0], # free 1 - [100, 0, 0, 2, 8000, 8000, 0], # alloc 2 - [200, 0, 0, 2, -8000, 0, 0], # free 2 - [300, 0, 0, 3, 4, 4, 0], # alloc 3 - [400, 0, 0, 4, 16, 20, 0], # alloc 4 - [500, 0, 0, 5, 4000, 4020, 0], # alloc 5 - [600, 0, 0, 4, -16, 4004, 0], # free 4 - [700, 0, 0, 7, 80, 4084, 0], # alloc 7 - [800, 0, 0, 3, -4, 4080, 0], # free 3 - [900, 0, 0, 7, -80, 4000, 0], # free 7 - [905, 0, 0, 4, -4000, 0, 0], # free 5 - ] - - self.event_data_gpu = [ - [2, 1, 0, 11, 400, 400, 512], # alloc 11 - [22, 1, 0, 11, -400, 0, 512], # free 11 - [105, 1, 0, 12, 5000, 5000, 10240], # alloc 12 - [106, 1, 0, 13, 3000, 8000, 10240], # alloc 13 - [205, 1, 0, 12, -5000, 3000, 10240], # free 12 - [401, 1, 0, 14, 1024, 4024, 10240], # alloc 14 - [499, 1, 0, 15, 4, 4028, 10240], # alloc 15 - [501, 1, 0, 13, -3000, 1028, 10240], # free 13 - [502, 1, 0, 15, -4, 1024, 10240], # free 15 - [906, 1, 0, 14, -1024, 0, 10240], # free 14 - ] - - self.all_events = sorted(self.event_data_cpu + self.event_data_gpu, key=lambda e: e[0]) - - def entry(self, ts, dev, dev_id, addr, alloc_size, total_allocated, total_reserved): - return { - 'ph': 'i', 's': 't', 'name': '[memory]', 'pid': 0, 'tid': 0, 'ts': ts, - 'args': { - 'Device Type': dev, - 'Device Id': dev_id, - 'Addr': addr, - 'Bytes': alloc_size, - 'Total Allocated': total_allocated, - 'Total Reserved': total_reserved, - }, - } - - def test_memory_curve_no_step_plot(self): - json_content = json.dumps([self.entry(*data) for data in self.all_events]) - - profile = parse_json_trace(json_content) - profile.process() - result = RunProfile.get_memory_curve(profile, time_metric='us', memory_metric='B', patch_for_step_plot=False) - - start_ts = profile.profiler_start_ts - self.assertEqual(1, start_ts) - - curves = result['rows'] - - self.assertIn('CPU', curves) - self.assertIn('GPU0', curves) - - self.assertEqual(len(self.event_data_cpu), len(curves['CPU'])) - for i in range(len(self.event_data_cpu)): - # adjusted timestamp - self.assertEqual(self.event_data_cpu[i][0] - start_ts, curves['CPU'][i][0]) - # total allocated - self.assertEqual(self.event_data_cpu[i][-2], curves['CPU'][i][1]) - # total reserved - self.assertEqual(self.event_data_cpu[i][-1], curves['CPU'][i][2]) - - self.assertEqual(len(self.event_data_gpu), len(curves['GPU0'])) - for i in range(len(self.event_data_gpu)): - self.assertEqual(self.event_data_gpu[i][0] - start_ts, curves['GPU0'][i][0]) - self.assertEqual(self.event_data_gpu[i][-2], curves['GPU0'][i][1]) - self.assertEqual(self.event_data_gpu[i][-1], curves['GPU0'][i][2]) - - def test_memory_curve_step_plot(self): - json_content = json.dumps([self.entry(*data) for data in self.all_events]) - - profile = parse_json_trace(json_content) - profile.process() - result = RunProfile.get_memory_curve(profile, time_metric='us', memory_metric='B', patch_for_step_plot=True) - - start_ts = profile.profiler_start_ts - self.assertEqual(1, start_ts) - - curves = result['rows'] - - self.assertIn('CPU', curves) - self.assertIn('GPU0', curves) - - self.assertEqual(2 * len(self.event_data_cpu) - 1, len(curves['CPU'])) - for i in range(len(curves['CPU'])): - if i % 2 == 0: # original values - # adjusted timestamp - self.assertEqual(self.event_data_cpu[i//2][0] - start_ts, curves['CPU'][i][0]) - # total allocated - self.assertEqual(self.event_data_cpu[i//2][-2], curves['CPU'][i][1]) - # total reserved - self.assertEqual(self.event_data_cpu[i//2][-1], curves['CPU'][i][2]) - else: # interpolated values - self.assertEqual(self.event_data_cpu[i//2+1][0] - start_ts, curves['CPU'][i][0]) - self.assertEqual(self.event_data_cpu[i//2][-2], curves['CPU'][i][1]) - self.assertEqual(self.event_data_cpu[i//2][-1], curves['CPU'][i][2]) - - self.assertEqual(2 * len(self.event_data_gpu) - 1, len(curves['GPU0'])) - for i in range(len(self.event_data_gpu)): - if i % 2 == 0: # original values - self.assertEqual(self.event_data_gpu[i//2][0] - start_ts, curves['GPU0'][i][0]) - self.assertEqual(self.event_data_gpu[i//2][-2], curves['GPU0'][i][1]) - self.assertEqual(self.event_data_gpu[i//2][-1], curves['GPU0'][i][2]) - else: # interpolated values - self.assertEqual(self.event_data_gpu[i//2+1][0] - start_ts, curves['GPU0'][i][0]) - self.assertEqual(self.event_data_gpu[i//2][-2], curves['GPU0'][i][1]) - self.assertEqual(self.event_data_gpu[i//2][-1], curves['GPU0'][i][2]) - - -class TestModuleView(unittest.TestCase): - - def test_build_module_hierarchy(self): - from torch_tb_profiler.profiler import trace - from torch_tb_profiler.profiler.module_op import ( - _build_module_hierarchy, aggegate_module_view) - - json_content = """[ - { - "ph": "X", - "cat": "python_function", - "name": "test_root", - "pid": 1908, - "tid": 1908, - "ts": 1, - "dur": 19367, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 1, - "Python thread": 0 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: MyModule", - "pid": 1908, - "tid": 1908, - "ts": 2, - "dur": 211, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 2, - "Python parent id": 1, - "Python module id": 0 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: Linear", - "pid": 1908, - "tid": 1908, - "ts": 5, - "dur": 62, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 3, - "Python parent id": 2, - "Python thread": 0, - "Python module id": 1 - } - }, - { - "ph": "X", - "cat": "cpu_op", - "name": "aten::addmm", - "pid": 1908, - "tid": 1908, - "ts": 10, - "dur": 31, - "args": { - "External id": 12182, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Fwd thread id": 0, - "Sequence number": 4006, - "python_caller_id": 3 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: MyModule", - "pid": 1908, - "tid": 1908, - "ts": 1000, - "dur": 211, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 4, - "Python parent id": 1, - "Python module id": 0 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: Linear", - "pid": 1908, - "tid": 1908, - "ts": 1001, - "dur": 62, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 5, - "Python parent id": 4, - "Python thread": 0, - "Python module id": 1 - } - }, - { - "ph": "X", - "cat": "cpu_op", - "name": "aten::addmm", - "pid": 1908, - "tid": 1908, - "ts": 1002, - "dur": 32, - "args": { - "External id": 12182, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Fwd thread id": 0, - "Sequence number": 4006, - "python_caller_id": 5 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: MyModule", - "pid": 1908, - "tid": 1908, - "ts": 2000, - "dur": 211, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 6, - "Python parent id": 1, - "Python module id": 0 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: Linear", - "pid": 1908, - "tid": 1908, - "ts": 2001, - "dur": 62, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 7, - "Python parent id": 6, - "Python thread": 0, - "Python module id": 1 - } - }, - { - "ph": "X", - "cat": "cpu_op", - "name": "aten::addmm", - "pid": 1908, - "tid": 1908, - "ts": 2002, - "dur": 33, - "args": { - "External id": 12182, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Fwd thread id": 0, - "Sequence number": 4006, - "python_caller_id": 7 - } - }, - { - "ph": "X", - "cat": "python_function", - "name": "nn.Module: Conv2", - "pid": 1908, - "tid": 1908, - "ts": 3000, - "dur": 211, - "args": { - "External id": 0, - "Trace name": "PyTorch Profiler", - "Trace iteration": 0, - "Python id": 8, - "Python parent id": 1, - "Python module id": 100 - } - } - ] - """ - data = parse_json_trace(json_content) - stats = aggegate_module_view(data.tid2tree, data.events) - stats.sort(key=lambda x: x.name) - self.assertEqual(2, len(stats)) - self.assertEqual('Conv2', stats[0].name) - self.assertEqual('MyModule', stats[1].name) - self.assertEqual(1, len(stats[1].children)) - self.assertEqual('Linear', stats[1].children[0].name) - - content = json.loads(json_content) - - events = [] - for data in content: - event = trace.create_event(data, False) - events.append(event) - - roots = _build_module_hierarchy(events) - roots.sort(key=lambda x: x.name) - self.assertEqual(2, len(roots)) - self.assertEqual('nn.Module: Conv2', roots[0].name) - self.assertEqual('nn.Module: MyModule', roots[1].name) - self.assertEqual(1, len(roots[1].children)) - self.assertEqual('nn.Module: Linear', roots[1].children[0].name) - - -class TestDataPipe(unittest.TestCase): - - def test_datapipe(self): - json_content = """[ - { - "ph": "X", "cat": "cpu_op", - "name": "enumerate(DataPipe)#ShufflerIterDataPipe", "pid": 7557, "tid": 7557, - "ts": 100, "dur": 23, - "args": { - "External id": 34, - "Trace name": "PyTorch Profiler", "Trace iteration": 0 - } - } - ]""" - profile = parse_json_trace(json_content) - profile.process() - - dataloader_ranges = profile.role_ranges[ProfileRole.DataLoader] - datapipe_range = None - for range in dataloader_ranges: - if range[0] == 100 and range[1] == 123: - datapipe_range = range - break - self.assertTrue(datapipe_range is not None) - - root = next(iter(profile.tid2tree.values())) - ops, _ = root.get_operator_and_kernels() - datapipe_op = None - for op in ops: - if op.name.startswith('enumerate(DataPipe)'): - datapipe_op = op - break - - self.assertTrue(datapipe_op is None) - - -if __name__ == '__main__': - unittest.main() diff --git a/plugins/tensorboard-plugins/tb_plugin/test/test_ranges.py b/plugins/tensorboard-plugins/tb_plugin/test/test_ranges.py deleted file mode 100644 index d40fc3e02ce3dac96f4b87ec9336dc5c1bb37b36..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/test_ranges.py +++ /dev/null @@ -1,50 +0,0 @@ -import unittest -import math - -from torch_tb_profiler.profiler.overall_parser import ( - merge_ranges, subtract_ranges_lists, intersection_ranges_lists, get_ranges_sum -) - - -def check_ranges_equal(ranges1, ranges2): - if len(ranges1) != len(ranges2): - return False - for i in range(len(ranges1)): - if ranges1[i][0] != ranges2[i][0] or ranges1[i][1] != ranges2[i][1]: - return False - return True - - -class TestOverallParser(unittest.TestCase): - def test_merge_ranges(self): - src_ranges = [(1.1, 2.2), (1.5, 2.3), (3.3, 3.9), (3.5, 3.6), (3.7, 3.8), (4.1, 4.2)] - expected_ranges = [(1.1, 2.3), (3.3, 3.9), (4.1, 4.2)] - dst_ranges = merge_ranges(src_ranges, True) - is_equal = check_ranges_equal(dst_ranges, expected_ranges) - self.assertTrue(is_equal) - - def test_subtract_ranges_lists(self): - ranges1 = [(1.1, 2.2), (3.3, 4.4), (5.5, 6.6)] - ranges2 = [(0, 0.1), (1.0, 1.4), (1.5, 1.6), (1.9, 3.4), (4.3, 4.6)] - expected_ranges = [(1.4, 1.5), (1.6, 1.9), (3.4, 4.3), (5.5, 6.6)] - dst_ranges = subtract_ranges_lists(ranges1, ranges2) - is_equal = check_ranges_equal(dst_ranges, expected_ranges) - self.assertTrue(is_equal) - - def test_intersection_ranges_lists(self): - ranges1 = [(1.1, 2.2), (3.3, 4.4), (5.5, 6.6)] - ranges2 = [(0, 0.1), (1.0, 1.4), (1.5, 1.6), (1.9, 3.4), (4.3, 4.6)] - expected_ranges = [(1.1, 1.4), (1.5, 1.6), (1.9, 2.2), (3.3, 3.4), (4.3, 4.4)] - dst_ranges = intersection_ranges_lists(ranges1, ranges2) - is_equal = check_ranges_equal(dst_ranges, expected_ranges) - self.assertTrue(is_equal) - - def test_get_ranges_sum(self): - ranges = [(1.1, 2.2), (3.3, 4.4), (5.5, 6.6)] - expected_sum = 3.3 - dst_sum = get_ranges_sum(ranges) - self.assertTrue(math.isclose(dst_sum, expected_sum)) - - -if __name__ == '__main__': - unittest.main() diff --git a/plugins/tensorboard-plugins/tb_plugin/test/test_tensorboard_end2end.py b/plugins/tensorboard-plugins/tb_plugin/test/test_tensorboard_end2end.py deleted file mode 100644 index 46636d11801a739935b4f385c6ce548009d09916..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/test/test_tensorboard_end2end.py +++ /dev/null @@ -1,170 +0,0 @@ -import json -import os -import random -import shutil -import socket -import tempfile -import time -import unittest -import urllib -import urllib.request -from subprocess import Popen -from urllib.error import HTTPError - - -def get_samples_dir(): - return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resources') - - -class TestEnd2End(unittest.TestCase): - - # def test_tensorboard_gs(self): - # test_folder = 'gs://pe-tests-public/tb_samples/' - # expected_runs = b'["resnet50_profiler_api_num_workers_0", "resnet50_profiler_api_num_workers_4"]' - # self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD':'spawn'}) - - def test_tensorboard_end2end(self): - test_folder = get_samples_dir() - expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' - - print('starting spawn mode testing...') - self._test_tensorboard_with_arguments(test_folder, expected_runs, {'TORCH_PROFILER_START_METHOD': 'spawn'}) - - @unittest.skip('fork is not use anymore') - def test_tensorboard_fork(self): - test_folder = get_samples_dir() - expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' - - print('starting fork mode testing') - self._test_tensorboard_with_arguments(test_folder, expected_runs) - - def test_tensorboard_with_path_prefix(self): - test_folder = get_samples_dir() - expected_runs = b'["resnet50_num_workers_0", "resnet50_num_workers_4"]' - self._test_tensorboard_with_arguments(test_folder, expected_runs, path_prefix='/tensorboard/viewer/') - - def test_tensorboard_with_symlinks(self): - logdir = tempfile.mkdtemp(prefix='tensorboard_logdir') - - samples_dir = get_samples_dir() - - # Create the following layout, with 1 symlink to a run dir, and 1 regular run dir: - # logdir/ - # run_concrete/ - # run_symlink/ --> path/to/samples/resnet50_num_workers_4/ - shutil.copytree(os.path.join(samples_dir, 'resnet50_num_workers_0'), os.path.join(logdir, 'run_concrete')) - os.symlink(os.path.join(samples_dir, 'resnet50_num_workers_4'), os.path.join(logdir, 'run_symlink')) - - expected_runs = b'["run_concrete", "run_symlink"]' - self._test_tensorboard_with_arguments(logdir, expected_runs) - - shutil.rmtree(logdir) - - def _test_tensorboard_with_arguments(self, test_folder, expected_runs, env=None, path_prefix=None): - host = 'localhost' - port = random.randint(6008, 65535) - - try: - if env: - env_copy = os.environ.copy() - env_copy.update(env) - env = env_copy - if not path_prefix: - tb = Popen(['tensorboard', '--logdir='+test_folder, '--port='+str(port)], env=env) - else: - tb = Popen(['tensorboard', '--logdir='+test_folder, '--port='+str(port), - '--path_prefix='+path_prefix], env=env) - self._test_tensorboard(host, port, expected_runs, path_prefix) - finally: - pid = tb.pid - print('tensorboard process {} is terminating.'.format(pid)) - tb.terminate() - - def _test_tensorboard(self, host, port, expected_runs, path_prefix): - if not path_prefix: - link_prefix = 'http://{}:{}/data/plugin/pytorch_profiler/'.format(host, port) - else: - path_prefix = path_prefix.strip('/') - link_prefix = 'http://{}:{}/{}/data/plugin/pytorch_profiler/'.format(host, port, path_prefix) - run_link = link_prefix + 'runs' - - expected_links_format = [ - link_prefix + 'overview?run={}&worker=worker0&span=1&view=Overview', - link_prefix + 'operation?run={}&worker=worker0&span=1&view=Operator&group_by=Operation', - link_prefix + 'operation/table?run={}&worker=worker0&span=1&view=Operator&group_by=Operation', - link_prefix + 'kernel/table?run={}&worker=worker0&span=1&view=Kernel&group_by=Kernel', - link_prefix + 'kernel?run={}&worker=worker0&span=1&view=Kernel&group_by=Kernel' - ] - - retry_times = 60 - while True: - try: - socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port)) - print('tensorboard start successfully') - break - except socket.error: - time.sleep(2) - retry_times -= 1 - if retry_times < 0: - self.fail('tensorboard start timeout') - continue - - retry_times = 60 - - while True: - try: - response = urllib.request.urlopen(run_link) - data = response.read() - runs = None - if data: - data = json.loads(data) - runs = data.get('runs') - if runs: - runs = '[{}]'.format(', '.join(['"{}"'.format(i) for i in runs])) - runs = runs.encode('utf-8') - if runs == expected_runs: - break - if retry_times % 10 == 0: - print('receive mismatched data, retrying', data) - time.sleep(2) - retry_times -= 1 - if retry_times < 0: - self.fail('Load run timeout') - except Exception: - if retry_times > 0: - continue - else: - raise - - links = [] - for run in json.loads(expected_runs): - for expected_link in expected_links_format: - links.append(expected_link.format(run)) - - if os.environ.get('TORCH_PROFILER_REGEN_RESULT_CHECK') == '1': - with open('result_check_file.txt', 'w', encoding='utf-8') as f: - # NOTE: result_check_file.txt is manually generated and verified. - # And then checked-in so that we can make sure that frontend - # content change can be detected on code change. - for link in links: - response = urllib.request.urlopen(link) - f.write(response.read().decode('utf-8')) - f.write('\n') - else: - with open('result_check_file.txt', 'r') as f: - lines = f.readlines() - i = 0 - print('starting testing...') - for link in links: - try: - response = urllib.request.urlopen(link) - self.assertEqual(response.read(), lines[i].strip().encode(encoding='utf-8')) - i = i + 1 - except HTTPError as e: - self.fail(e) - self.assertEqual(i, 10) - print('ending testing...') - - -if __name__ == '__main__': - unittest.main() diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py deleted file mode 100644 index f7b951e609e5c65895a6db82d391e8d584eb37c8..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- - -# Entry point for Pytorch TensorBoard plugin package. - -__version__ = '0.4.0.11' diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/config/config.ini b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/config/config.ini deleted file mode 100644 index 500d472d27b2ca574e07829a64c50d6eb2ab7e71..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/config/config.ini +++ /dev/null @@ -1,11 +0,0 @@ -[URL] -pytorch_data_loading_url = https://pytorch.org/docs/stable/data.html#single-and-multi-process-data-loading -pytorch_amp_url = https://pytorch.org/docs/stable/amp.html -pytorch_ckp_url = https://pytorch.org/docs/stable/checkpoint.html -cuda_nn_ddp_instead_url = https://pytorch.org/docs/stable/notes/cuda.html#cuda-nn-ddp-instead -compress_url = https://pytorch.org/docs/stable/ddp_comm_hooks.html -grad_acc_url = https://towardsdatascience.com/what-is-gradient-accumulation-in-deep-learning-ec034122cfa -lamb_url = https://nvidia.github.io/apex/optimizers.html#apex.optimizers.FusedLAMB -repository_url = https://gitee.com/ascend/att/tree/master/plugins/tensorboard-plugins/tb_plugin -[EMAIL] -author_email = pmail_mindstudio@huawei.com \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py deleted file mode 100644 index b3e202af61eb9df1d210cd366e7d172075e1e570..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/consts.py +++ /dev/null @@ -1,116 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import enum -import re -from collections import namedtuple - -PLUGIN_NAME = 'pytorch_profiler' - -WORKER_PATTERN = re.compile(r"""^(.*?) # worker name - (\.\d+)? # optional timestamp like 1619499959628 used as span name - \.pt\.trace\.json$""", re.X) - -TRACE_PATTERN = re.compile(r"""^trace_view\.json$""") -WORKER_SPAN_PATTERN = re.compile(r"""^(.*?)_(\d+(\.\d+)?)_ascend_pt$""") - -NODE_PROCESS_PATTERN = re.compile(r"""^(.*)_(\d+)""") -MONITOR_RUN_REFRESH_INTERNAL_IN_SECONDS = 10 -MAX_GPU_PER_NODE = 64 -MAX_FILE_SIZE = 500 * 1024 * 1024 -MAX_LINUX_PATH_LENGTH = 4096 -MAX_WINDOWS_PATH_LENGTH = 260 - -View = namedtuple('View', 'id, name, display_name') -OVERALL_VIEW = View(1, 'overall', 'Overview') -OP_VIEW = View(2, 'operator', 'Operator') -KERNEL_VIEW = View(3, 'kernel', 'Kernel') -TRACE_VIEW = View(4, 'trace', 'Trace') -DISTRIBUTED_VIEW = View(5, 'distributed', 'Distributed') -MEMORY_VIEW = View(6, 'memory', 'Memory') -MODULE_VIEW = View(7, 'module', 'Module') -LIGHTNING_VIEW = View(8, 'lightning', 'Lightning') - -TOOLTIP_GPU_UTIL = \ - 'GPU Utilization:\n' \ - 'GPU busy time / All steps time. The higher, the better. ' \ - 'GPU busy time is the time during which there is at least one GPU kernel running on it. ' \ - 'All steps time is the total time of all profiler steps(or called as iterations).\n' -TOOLTIP_SM_EFFICIENCY = \ - 'Est. SM Efficiency:\n' \ - 'Estimated Stream Multiprocessor Efficiency. The higher, the better. ' \ - 'This metric of a kernel, SM_Eff_K = min(blocks of this kernel / SM number of this GPU, 100%). ' \ - "This overall number is the sum of all kernels' SM_Eff_K weighted by kernel's execution duration, " \ - 'divided by all steps time.\n' -TOOLTIP_OCCUPANCY_COMMON = \ - 'Est. Achieved Occupancy:\n' \ - 'For most cases such as memory bandwidth bounded kernels, the higher the better. ' \ - 'Occupancy is the ratio of active warps on an SM ' \ - 'to the maximum number of active warps supported by the SM. ' \ - 'The theoretical occupancy of a kernel is upper limit occupancy of this kernel, ' \ - 'limited by multiple factors such as kernel shape, kernel used resource, ' \ - 'and the GPU compute capability.\n' \ - 'Est. Achieved Occupancy of a kernel, OCC_K = ' \ - 'min(threads of the kernel / SM number / max threads per SM, theoretical occupancy of the kernel). ' -TOOLTIP_OCCUPANCY_OVERVIEW = \ - "This overall number is the weighted average of all kernels' OCC_K " \ - "using kernel's execution duration as weight. " \ - 'It shows fine-grained low-level GPU utilization.\n' -TOOLTIP_TENSOR_CORES = \ - 'Kernel using Tensor Cores:\n' \ - 'Total GPU Time for Tensor Core kernels / Total GPU Time for all kernels.\n' -TOOLTIP_OCCUPANCY_TABLE = \ - "This \"Mean\" number is the weighted average of all calls' OCC_K of the kernel, " \ - "using each call's execution duration as weight. " \ - 'It shows fine-grained low-level GPU utilization.' -TOOLTIP_BLOCKS_PER_SM = \ - 'Blocks Per SM = blocks of this kernel / SM number of this GPU.\n' \ - 'If this number is less than 1, it indicates the GPU multiprocessors are not fully utilized.\n' \ - '\"Mean Blocks per SM\" is the weighted average of all calls of this kernel, ' \ - "using each call's execution duration as weight." -TOOLTIP_OP_TC_ELIGIBLE = \ - 'Whether this operator is eligible to use Tensor Cores.' -TOOLTIP_OP_TC_SELF = \ - 'Time of self-kernels with Tensor Cores / Time of self-kernels.' -TOOLTIP_OP_TC_TOTAL = \ - 'Time of kernels with Tensor Cores / Time of kernels.' -TOOLTIP_KERNEL_USES_TC = \ - 'Whether this kernel uses Tensor Cores.' -TOOLTIP_KERNEL_OP_TC_ELIGIBLE = \ - 'Whether the operator launched this kernel is eligible to use Tensor Cores.' -TOOLTIP_OP_TC_ELIGIBLE_AICORE = \ - 'Whether this operator is eligible to use AICore.' -TOOLTIP_OP_TC_SELF_AICORE = \ - 'Time of Device Self Duration With AICore / Device Self Duration.' -TOOLTIP_OP_TC_TOTAL_AICORE = \ - 'Time of Device Total Duration With AICore / Device Total Duration.' - - -class InputFilesType(enum.Enum): - KERNEL_DETAILS_CSV = 'kernel_details.csv' - MEMORY_RECORD_CSV = 'memory_record.csv' - MEMORY_OPERATOR_CSV = 'operator_memory.csv' - MEMORY_COMPONENT_CSV = 'npu_module_mem.csv' - OPERATOR_DETAILS_CSV = 'operator_details.csv' - DISTRIBUTED_STEP_CSV = 'step_trace_time.csv' - DISTRIBUTED_COMMUNICATION_JSON = 'communication.json' - - -INPUT_FILE_LIST = [e.value for e in InputFilesType] diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py deleted file mode 100644 index 296f53b7c813b2c97b498469f49b973438d9f3ae..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -from .cache import Cache -from .file import (BaseFileSystem, StatData, abspath, basename, download_file, - exists, get_filesystem, glob, isdir, join, listdir, - makedirs, read, register_filesystem, relpath, walk, stat, check_file_valid) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/azureblob.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/azureblob.py deleted file mode 100644 index 2fcd69fee8c24393458875635c17bd74a71b0fc4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/azureblob.py +++ /dev/null @@ -1,187 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import os - -from azure.storage.blob import ContainerClient - -from .. import utils -from .base import BaseFileSystem, RemotePath, StatData -from .utils import as_bytes, as_text, parse_blob_url - -logger = utils.get_logger() - - -class AzureBlobSystem(RemotePath, BaseFileSystem): - """Provides filesystem access to S3.""" - - def __init__(self): - if not ContainerClient: - raise ImportError('azure-storage-blob must be installed for Azure Blob support.') - self.connection_string = os.environ.get('AZURE_STORAGE_CONNECTION_STRING', None) - - def exists(self, filename): - """Returns whether the path is a directory or not.""" - basename, parts = self.split_blob_path(filename) - if basename is None or parts is None: - return False - if basename == '': - # root container case - return True - else: - return basename == parts[0] - - def read(self, file, binary_mode=False, size=None, continue_from=None): - """Reads contents of a file to a string.""" - logger.info('azure blob: starting reading file %s' % file) - account, container, path = self.container_and_path(file) - client = self.create_container_client(account, container) - blob_client = client.get_blob_client(path) - if not blob_client.exists(): - raise FileNotFoundError("file %s doesn't exist!" % path) - - downloader = blob_client.download_blob(offset=continue_from, length=size) - if continue_from is not None: - continuation_token = continue_from + downloader.size - else: - continuation_token = downloader.size - - data = downloader.readall() - logger.info('azure blob: file %s download is done, size is %d' % (file, len(data))) - if binary_mode: - return as_bytes(data), continuation_token - else: - return as_text(data), continuation_token - - def write(self, filename, file_content, binary_mode=False): - """Writes string file contents to a file.""" - account, container, path = self.container_and_path(filename) - client = self.create_container_client(account, container) - - if binary_mode: - if not isinstance(file_content, bytes): - raise TypeError('File content type must be bytes') - else: - file_content = as_bytes(file_content) - client.upload_blob(path, file_content) - - def download_file(self, file_to_download, file_to_save): - logger.info('azure blob: starting downloading file %s as %s' % (file_to_download, file_to_save)) - account, container, path = self.container_and_path(file_to_download) - client = self.create_container_client(account, container) - blob_client = client.get_blob_client(path) - if not blob_client.exists(): - raise FileNotFoundError("file %s doesn't exist!" % path) - - downloader = blob_client.download_blob() - with open(file_to_save, 'wb') as downloaded_file: - data = downloader.readall() - downloaded_file.write(data) - logger.info('azure blob: file %s is downloaded as %s, size is %d' % - (file_to_download, file_to_save, len(data))) - - def glob(self, filename): - """Returns a list of files that match the given pattern(s).""" - # Only support prefix with * at the end and no ? in the string - star_i = filename.find('*') - quest_i = filename.find('?') - if quest_i >= 0: - raise NotImplementedError( - '{} not supported by compat glob'.format(filename) - ) - if star_i != len(filename) - 1: - return [] - - filename = filename[:-1] - - account, container, path = self.container_and_path(filename) - client = self.create_container_client(account, container) - blobs = client.list_blobs(name_starts_with=path) - return [blob.name for blob in blobs] - - def isdir(self, dirname): - """Returns whether the path is a directory or not.""" - basename, parts = self.split_blob_path(dirname) - if basename is None or parts is None: - return False - if basename == '': - # root container case - return True - else: - return basename == parts[0] and len(parts) > 1 - - def listdir(self, dirname): - """Returns a list of entries contained within a directory.""" - account, container, path = self.container_and_path(dirname) - client = self.create_container_client(account, container) - blob_iter = client.list_blobs(name_starts_with=path) - items = [] - for blob in blob_iter: - item = self.relpath(blob.name, path) - if items not in items: - items.append(item) - return items - - def makedirs(self, path): - """No need create directory since the upload blob will automatically create""" - pass - - def stat(self, filename): - """Returns file statistics for a given path.""" - account, container, path = self.container_and_path(filename) - client = self.create_container_client(account, container) - blob_client = client.get_blob_client(path) - props = blob_client.get_blob_properties() - return StatData(props.size) - - def walk(self, top, topdown=True, onerror=None): - account, container, path = self.container_and_path(top) - client = self.create_container_client(account, container) - blobs = client.list_blobs(name_starts_with=path) - results = {} - for blob in blobs: - dirname, basename = self.split(blob.name) - dirname = 'https://{}/{}/{}'.format(account, container, dirname) - results.setdefault(dirname, []).append(basename) - for key, value in results.items(): - yield key, None, value - - def split_blob_path(self, blob_path): - """ Find the first blob start with blob_path, then get the relative path starting from dirname(blob_path). - Finally, split the relative path. - return (basename(blob_path), [relative splitted paths]) - If blob_path doesn't exist, return (None, None) - For example, - For blob https://trainingdaemon.blob.core.windows.net/tests/test1/test2/test.txt - * If the blob_path is '', return ('', [test1, test2, test.txt]) - * If the blob_path is test1, return (test1, [test2, test.txt]) - * If the blob_path is test1/test2, return (test2, [test2, test.txt]) - * If the blob_path is test1/test2/test.txt, return (test.txt, [test.txt]) - """ - account, container, path = self.container_and_path(blob_path) - client = self.create_container_client(account, container) - blobs = client.list_blobs(name_starts_with=path, maxresults=1) - - for blob in blobs: - dir_path, basename = self.split(path) - if dir_path: - rel_path = blob.name[len(dir_path):] - parts = rel_path.lstrip('/').split('/') - else: - parts = blob.name.split('/') - return (basename, parts) - return (None, None) - - def container_and_path(self, url): - """Split an Azure blob -prefixed URL into container and blob path.""" - root, parts = parse_blob_url(url) - if len(parts) != 2: - raise ValueError('Invalid azure blob url %s' % url) - return root, parts[0], parts[1] - - def create_container_client(self, account, container): - if self.connection_string: - client = ContainerClient.from_connection_string(self.connection_string, container) - else: - client = ContainerClient.from_container_url('https://{}/{}'.format(account, container)) - return client diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/base.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/base.py deleted file mode 100644 index ab0a40f04216b3b7766f7e6fb109b448c1fdba7a..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/base.py +++ /dev/null @@ -1,114 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import os -from abc import ABC, abstractmethod -from collections import namedtuple - -# Data returned from the Stat call. -StatData = namedtuple('StatData', ['length']) - - -class BaseFileSystem(ABC): - def support_append(self): - return False - - def append(self, filename, file_content, binary_mode=False): - pass - - def download_file(self, file_to_download, file_to_save): - pass - - @abstractmethod - def exists(self, filename): - raise NotImplementedError - - @abstractmethod - def read(self, file, binary_mode=False, size=None, continue_from=None): - raise NotImplementedError - - @abstractmethod - def write(self, filename, file_content, binary_mode=False): - raise NotImplementedError - - @abstractmethod - def glob(self, filename): - raise NotImplementedError - - @abstractmethod - def isdir(self, dirname): - raise NotImplementedError - - @abstractmethod - def listdir(self, dirname): - raise NotImplementedError - - @abstractmethod - def makedirs(self, path): - raise NotImplementedError - - @abstractmethod - def stat(self, filename): - raise NotImplementedError - - -class BasePath(ABC): - @abstractmethod - def join(self, path, *paths): - pass - - @abstractmethod - def abspath(self, path): - pass - - @abstractmethod - def basename(self, path): - pass - - @abstractmethod - def relpath(self, path, start): - pass - - -class LocalPath(BasePath): - def abspath(self, path): - return os.path.abspath(os.path.expanduser(os.path.expandvars(path))) - - def basename(self, path): - return os.path.basename(path) - - def relpath(self, path, start): - return os.path.relpath(path, start) - - def join(self, path, *paths): - return os.path.join(path, *paths) - - -class RemotePath(BasePath): - def split(self, path): - """ - Split a pathname. Returns tuple '(head, tail)' where 'tail' is - everything after the final slash. Either part may be empty. - """ - sep = '/' - i = path.rfind(sep) + 1 - head, tail = path[:i], path[i:] - head = head.rstrip(sep) - return (head, tail) - - def join(self, path, *paths): - """Join paths with a slash.""" - return '/'.join((path,) + paths) - - def abspath(self, path): - return path - - def basename(self, path): - return path.split('/')[-1] - - def relpath(self, path, start): - if not path.startswith(start): - return path - start = start.rstrip('/') - begin = len(start) + 1 # include the ending slash '/' - return path[begin:] diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/cache.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/cache.py deleted file mode 100644 index ea9afab669d79885227e8c0dd165721a73a124bb..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/cache.py +++ /dev/null @@ -1,81 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import tempfile - -from .. import utils -from .. import multiprocessing as mp -from . import file -from .file import basename, is_local, download_file, read - -logger = utils.get_logger() - - -class Cache: - def __init__(self, cache_dir=None): - self._lock = mp.Lock() - self._manager = mp.Manager() - self._cache_dict = self._manager.dict() - self._cache_dir = cache_dir - - def __getstate__(self): - """The multiprocessing module can start one of three ways: spawn, fork, or forkserver. - The default mode is fork in Unix and spawn on Windows and macOS. - Therefore, the __getstate__ and __setstate__ are used to pickle/unpickle the state in spawn mode. - """ - data = self.__dict__.copy() - # remove the _manager to bypass the following pickle error - # TypeError: cannot pickle 'weakref' object - if hasattr(self, '_manager'): - del data['_manager'] - logger.debug('Cache.__getstate__: %s ' % data) - return data, file._REGISTERED_FILESYSTEMS - - def __setstate__(self, state): - """The default logging level in new process is warning. Only warning and error log can be written to - streams. - So, we need call use_absl_handler in the new process. - """ - from absl import logging - logging.use_absl_handler() - logger.debug('Cache.__setstate__ %s ' % (state,)) - data, file._REGISTERED_FILESYSTEMS = state - self.__dict__.update(data) - - def read(self, filename): - local_file = self.get_remote_cache(filename) - return read(local_file) - - @property - def cache_dir(self): - return self._cache_dir - - def get_remote_cache(self, filename): - """Try to get the local file in the cache. download it to local if it cannot be found in cache.""" - local_file = self.get_file(filename) - if local_file is None: - if is_local(filename): - return filename - else: - local_file = tempfile.NamedTemporaryFile( - 'w+t', suffix='.%s' % basename(filename), dir=self._cache_dir, delete=False) - local_file.close() - download_file(filename, local_file.name) - self.add_file(filename, local_file.name) - return local_file.name - - return local_file - - def get_file(self, filename): - return self._cache_dict.get(filename) - - def add_file(self, source_file, local_file): - with self._lock: - logger.debug('add local cache %s for file %s' % (local_file, source_file)) - self._cache_dict[source_file] = local_file - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._manager.__exit__(exc_type, exc_value, traceback) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/file.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/file.py deleted file mode 100644 index 9ef5d8485264f18426c18147663f2e1b9fb6900e..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/file.py +++ /dev/null @@ -1,658 +0,0 @@ -""" -This file is forked from -https://github.com/tensorflow/tensorboard/blob/master/tensorboard/compat/tensorflow_stub/io/gfile.py. -The following functionalities are added after forking: -* Check Azure Blob & Google Cloud available or not -* get_filesystem changes to support Azure Blobs -* add BaseFileSystem and PathBase abstracted class for the filesystem. -* add download_file for each file system to cache the remote file to local temporary folder. -* add AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for S3 file system which is not supported by tensorboard. -* add Azure blob file system -* add Google Cloud file system -* add specialized walk for Local file system, Azure Blob and Google Cloud to improve the walk performance. -* add global wrapper for abspath, basename, join, download_file. -* change the global walk wrapper to support specialized walk. -""" -import glob as py_glob -import os -import platform -import sys -import tempfile - -from .. import utils -from .base import BaseFileSystem, LocalPath, RemotePath, StatData -from .utils import as_bytes, as_text, parse_blob_url -from ..consts import MAX_FILE_SIZE, MAX_WINDOWS_PATH_LENGTH, MAX_LINUX_PATH_LENGTH - -logger = utils.get_logger() - -S3_ENABLED = True -try: - import boto3 - import botocore.exceptions -except ImportError: - S3_ENABLED = False - -BLOB_ENABLED = True -try: - from azure.storage.blob import ContainerClient -except ImportError: - BLOB_ENABLED = False - -GS_ENABLED = True -try: - # Imports the Google Cloud client library - from google.cloud import storage -except ImportError: - GS_ENABLED = False - - -_DEFAULT_BLOCK_SIZE = 16 * 1024 * 1024 - -# Registry of filesystems by prefix. -# -# Currently supports: -# * "s3://" URLs for S3 based on boto3 -# * "https://.blob.core.windows.net" for Azure Blob based on azure-storage-blob -# * "gs://" URLs for Google Cloud based on google-cloud-storage -# * Local filesystem when not match any prefix. -_REGISTERED_FILESYSTEMS = {} - - -def register_filesystem(prefix, filesystem): - if ":" in prefix: - raise ValueError("Filesystem prefix cannot contain a :") - _REGISTERED_FILESYSTEMS[prefix] = filesystem - - -def get_filesystem(filename): - """Return the registered filesystem for the given file.""" - prefix = "" - index = filename.find("://") - if index >= 0: - prefix = filename[:index] - if prefix.upper() in ('HTTP', 'HTTPS'): - root, _ = parse_blob_url(filename) - if root.lower().endswith('.blob.core.windows.net'): - fs = _REGISTERED_FILESYSTEMS.get('blob', None) - else: - raise ValueError("Not supported file system for prefix %s" % root) - else: - fs = _REGISTERED_FILESYSTEMS.get(prefix, None) - if fs is None: - raise ValueError("No recognized filesystem for prefix %s" % prefix) - return fs - - -class LocalFileSystem(LocalPath, BaseFileSystem): - def __init__(self): - pass - - @staticmethod - def islink(path): - return os.path.islink(path) - - def exists(self, filename): - return os.path.exists(filename) - - def read(self, file, binary_mode=False, size=None, continue_from=None): - mode = "rb" if binary_mode else "r" - encoding = None if binary_mode else "utf8" - if not self.exists(file): - raise FileNotFoundError(file) - - offset = None - if continue_from is not None: - offset = continue_from.get("opaque_offset", None) - with open(file, mode, encoding=encoding) as f: - if offset is not None: - f.seek(offset) - data = f.read(size) - # The new offset may not be `offset + len(data)`, due to decoding - # and newline translation. - # So, just measure it in whatever terms the underlying stream uses. - continuation_token = {"opaque_offset": f.tell()} - return (data, continuation_token) - - def write(self, filename, file_content, binary_mode=False): - """Writes string file contents to a file, overwriting any existing contents. - """ - self._write(filename, file_content, "wb" if binary_mode else "w") - - def support_append(self): - return True - - def append(self, filename, file_content, binary_mode=False): - """Append string file contents to a file. - """ - self._write(filename, file_content, "ab" if binary_mode else "a") - - def _write(self, filename, file_content, mode): - encoding = None if "b" in mode else "utf8" - with open(filename, mode, encoding=encoding) as f: - compatify = as_bytes if "b" in mode else as_text - f.write(compatify(file_content)) - - def glob(self, filename): - """Returns a list of files that match the given pattern(s).""" - if isinstance(filename, str): - return [ - matching_filename - for matching_filename in py_glob.glob(filename) - ] - else: - return [ - matching_filename - for single_filename in filename - for matching_filename in py_glob.glob(single_filename) - ] - - def isdir(self, dirname): - return os.path.isdir(dirname) - - def listdir(self, dirname): - entries = os.listdir(dirname) - entries = [item for item in entries] - return entries - - def makedirs(self, path): - os.makedirs(path, exist_ok=True) - - def stat(self, filename): - """Returns file statistics for a given path.""" - # NOTE: Size of the file is given by .st_size as returned from - # os.stat(), but we convert to .length - file_length = os.stat(filename).st_size - return StatData(file_length) - - def walk(self, top, topdown=True, onerror=None): - yield from os.walk(top, topdown, onerror, followlinks=True) - - -class S3FileSystem(RemotePath, BaseFileSystem): - """Provides filesystem access to S3.""" - - def __init__(self): - if not boto3: - raise ImportError("boto3 must be installed for S3 support.") - self._s3_endpoint = os.environ.get("S3_ENDPOINT", None) - access_key = os.environ.get("AWS_ACCESS_KEY_ID") - secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY") - if access_key and secret_key: - boto3.setup_default_session( - aws_access_key_id=access_key, aws_secret_access_key=secret_key) - - def bucket_and_path(self, url): - """Split an S3-prefixed URL into bucket and path.""" - if url.startswith("s3://"): - url = url[len("s3://"):] - idx = url.index("/") - bucket = url[:idx] - path = url[(idx + 1):] - return bucket, path - - def exists(self, filename): - """Determines whether a path exists or not.""" - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(filename) - r = client.list_objects(Bucket=bucket, Prefix=path, Delimiter="/") - if r.get("Contents") or r.get("CommonPrefixes"): - return True - return False - - def read(self, file, binary_mode=False, size=None, continue_from=None): - """Reads contents of a file to a string.""" - s3 = boto3.resource("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(file) - args = {} - - # S3 use continuation tokens of the form: {byte_offset: number} - offset = 0 - if continue_from is not None: - offset = continue_from.get("byte_offset", 0) - - endpoint = "" - if size is not None: - endpoint = offset + size - - if offset != 0 or endpoint != "": - args["Range"] = "bytes={}-{}".format(offset, endpoint) - - logger.info("s3: starting reading file %s" % file) - try: - stream = s3.Object(bucket, path).get(**args)["Body"].read() - except botocore.exceptions.ClientError as exc: - if exc.response["Error"]["Code"] in ["416", "InvalidRange"]: - if size is not None: - # Asked for too much, so request just to the end. Do this - # in a second request so we don't check length in all cases. - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - obj = client.head_object(Bucket=bucket, Key=path) - content_length = obj["ContentLength"] - endpoint = min(content_length, offset + size) - if offset == endpoint: - # Asked for no bytes, so just return empty - stream = b"" - else: - args["Range"] = "bytes={}-{}".format(offset, endpoint) - stream = s3.Object(bucket, path).get(**args)["Body"].read() - else: - raise - - logger.info("s3: file %s download is done, size is %d" % - (file, len(stream))) - # `stream` should contain raw bytes here (i.e., there has been neither decoding nor newline translation), - # so the byte offset increases by the expected amount. - continuation_token = {"byte_offset": (offset + len(stream))} - if binary_mode: - return (bytes(stream), continuation_token) - else: - return (stream.decode("utf-8"), continuation_token) - - def write(self, filename, file_content, binary_mode=False): - """Writes string file contents to a file.""" - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(filename) - if binary_mode: - if not isinstance(file_content, bytes): - raise TypeError("File content type must be bytes") - else: - file_content = as_bytes(file_content) - client.put_object(Body=file_content, Bucket=bucket, Key=path) - - def download_file(self, file_to_download, file_to_save): - logger.info("s3: starting downloading file %s as %s" % - (file_to_download, file_to_save)) - s3 = boto3.resource("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(file_to_download) - s3.Bucket(bucket).download_file(path, file_to_save) - logger.info("s3: file %s is downloaded as %s" % (file_to_download, file_to_save)) - return - - def glob(self, filename): - """Returns a list of files that match the given pattern(s).""" - # Only support prefix with * at the end and no ? in the string - star_i = filename.find("*") - quest_i = filename.find("?") - if quest_i >= 0: - raise NotImplementedError("{} not supported".format(filename)) - if star_i != len(filename) - 1: - return [] - - filename = filename[:-1] - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(filename) - p = client.get_paginator("list_objects") - keys = [] - for r in p.paginate(Bucket=bucket, Prefix=path): - for content in r.get("Contents", []): - key = content["Key"][len(path):] - if key: - keys.append(filename + key) - return keys - - def isdir(self, dirname): - """Returns whether the path is a directory or not.""" - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(dirname) - if not path.endswith("/"): - path += "/" - r = client.list_objects(Bucket=bucket, Prefix=path, Delimiter="/") - if r.get("Contents") or r.get("CommonPrefixes"): - return True - return False - - def listdir(self, dirname): - """Returns a list of entries contained within a directory.""" - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(dirname) - p = client.get_paginator("list_objects") - if not path.endswith("/"): - path += "/" - keys = [] - for r in p.paginate(Bucket=bucket, Prefix=path, Delimiter="/"): - keys.extend( - prefixes["Prefix"][len(path): -1] for prefixes in r.get("CommonPrefixes", []) - ) - for content in r.get("Contents", []): - key = content["Key"][len(path):] - if key: - keys.append(key) - return keys - - def makedirs(self, path): - """Creates a directory and all parent/intermediate directories.""" - if not self.exists(path): - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, dir_path = self.bucket_and_path(path) - if not dir_path.endswith("/"): - dir_path += "/" - client.put_object(Body="", Bucket=bucket, Key=dir_path) - - def stat(self, filename): - """Returns file statistics for a given path.""" - # Size of the file is given by ContentLength from S3 - client = boto3.client("s3", endpoint_url=self._s3_endpoint) - bucket, path = self.bucket_and_path(filename) - - obj = client.head_object(Bucket=bucket, Key=path) - return StatData(obj["ContentLength"]) - - -register_filesystem("", LocalFileSystem()) -if S3_ENABLED: - register_filesystem("s3", S3FileSystem()) - -if BLOB_ENABLED: - from .azureblob import AzureBlobSystem - register_filesystem("blob", AzureBlobSystem()) - -if GS_ENABLED: - from .gs import GoogleBlobSystem - register_filesystem("gs", GoogleBlobSystem()) - - -class File(object): - def __init__(self, filename, mode): - if mode not in ("r", "rb", "br", "w", "wb", "bw"): - raise ValueError("mode {} not supported by File".format(mode)) - self.filename = filename - self.fs = get_filesystem(self.filename) - self.fs_supports_append = self.fs.support_append() - self.buff = None - self.buff_chunk_size = _DEFAULT_BLOCK_SIZE - self.buff_offset = 0 - self.continuation_token = None - self.write_temp = None - self.write_started = False - self.binary_mode = "b" in mode - self.write_mode = "w" in mode - self.closed = False - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - self.buff = None - self.buff_offset = 0 - self.continuation_token = None - - def __iter__(self): - return self - - def _read_buffer_to_offset(self, new_buff_offset): - old_buff_offset = self.buff_offset - read_size = min(len(self.buff), new_buff_offset) - old_buff_offset - self.buff_offset += read_size - return self.buff[old_buff_offset: old_buff_offset + read_size] - - def read(self, n=None): - """Reads contents of file to a string. - - Args: - n: int, number of bytes or characters to read, otherwise - read all the contents of the file - - Returns: - Subset of the contents of the file as a string or bytes. - """ - if self.write_mode: - raise OSError("File not opened in read mode") - - result = None - if self.buff and len(self.buff) > self.buff_offset: - # read from local buffer - if n is not None: - chunk = self._read_buffer_to_offset(self.buff_offset + n) - if len(chunk) == n: - return chunk - result = chunk - n -= len(chunk) - else: - # add all local buffer and update offsets - result = self._read_buffer_to_offset(len(self.buff)) - - # read from filesystem - read_size = max(self.buff_chunk_size, n) if n is not None else None - (self.buff, self.continuation_token) = self.fs.read( - self.filename, self.binary_mode, read_size, self.continuation_token) - self.buff_offset = 0 - - # add from filesystem - if n is not None: - chunk = self._read_buffer_to_offset(n) - else: - # add all local buffer and update offsets - chunk = self._read_buffer_to_offset(len(self.buff)) - result = result + chunk if result else chunk - - return result - - def write(self, file_content): - """Writes string file contents to file, clearing contents of the file - on first write and then appending on subsequent calls. - """ - if not self.write_mode: - raise OSError("File not opened in write mode") - - if self.closed: - raise OSError("File already closed") - - if self.fs_supports_append: - if not self.write_started: - # write the first chunk to truncate file if it already exists - self.fs.write(self.filename, file_content, self.binary_mode) - self.write_started = True - else: - # append the later chunks - self.fs.append(self.filename, file_content, self.binary_mode) - else: - # add to temp file, but wait for flush to write to final filesystem - if self.write_temp is None: - mode = "w+b" if self.binary_mode else "w+" - self.write_temp = tempfile.TemporaryFile(mode) - - compatify = as_bytes if self.binary_mode else as_text - self.write_temp.write(compatify(file_content)) - - def __next__(self): - line = None - while True: - if not self.buff: - # read one unit into the buffer - line = self.read(1) - if line and (line[-1] == "\n" or not self.buff): - return line - if not self.buff: - return None - else: - index = self.buff.find("\n", self.buff_offset) - if index != -1: - # include line until now plus newline - chunk = self.read(index + 1 - self.buff_offset) - line = line + chunk if line else chunk - return line - - # read one unit past end of buffer - chunk = self.read(len(self.buff) + 1 - self.buff_offset) - line = line + chunk if line else chunk - if line and (line[-1] == "\n" or not self.buff): - return line - if not self.buff: - return None - - def next(self): - return self.__next__() - - def flush(self): - if self.closed: - raise OSError("File already closed") - - if not self.fs_supports_append: - if self.write_temp is not None: - # read temp file from the beginning - self.write_temp.flush() - self.write_temp.seek(0) - chunk = self.write_temp.read() - if chunk is not None: - # write full contents and keep in temp file - self.fs.write(self.filename, chunk, self.binary_mode) - self.write_temp.seek(len(chunk)) - - def close(self): - self.flush() - if self.write_temp is not None: - self.write_temp.close() - self.write_temp = None - self.write_started = False - self.closed = True - - -def exists(filename): - """Determines whether a path exists or not.""" - return get_filesystem(filename).exists(filename) - - -def abspath(path): - return get_filesystem(path).abspath(path) - - -def basename(path): - return get_filesystem(path).basename(path) - - -def relpath(path, start): - return get_filesystem(path).relpath(path, start) - - -def join(path, *paths): - return get_filesystem(path).join(path, *paths) - - -def download_file(file_to_download, file_to_save): - """Downloads the file, returning a temporary path to the file after finishing.""" - get_filesystem(file_to_download).download_file(file_to_download, file_to_save) - - -def glob(filename): - """Returns a list of files that match the given pattern(s).""" - return get_filesystem(filename).glob(filename) - - -def is_local(path): - """Returns whether the path is a local path""" - return isinstance(get_filesystem(path), LocalFileSystem) - - -def isdir(dirname): - """Returns whether the path is a directory or not.""" - return get_filesystem(dirname).isdir(dirname) - - -def listdir(dirname): - """Returns a list of entries contained within a directory. - - The list is in arbitrary order. It does not contain the special entries "." - and "..". - """ - return get_filesystem(dirname).listdir(dirname) - - -def makedirs(path): - """Creates a directory and all parent/intermediate directories.""" - return get_filesystem(path).makedirs(path) - - -def walk(top, topdown=True, onerror=None): - """Recursive directory tree generator for directories. - - Args: - top: string, a Directory name - topdown: bool, Traverse pre order if True, post order if False. - onerror: optional handler for errors. Should be a function, it will be - called with the error as argument. Rethrowing the error aborts the walk. - - Errors that happen while listing directories are ignored. - - Yields: - Each yield is a 3-tuple: the pathname of a directory, followed by lists - of all its subdirectories and leaf files. - (dirname, [subdirname, subdirname, ...], [filename, filename, ...]) - as strings - """ - fs = get_filesystem(top) - if hasattr(fs, "walk"): - yield from fs.walk(top, topdown, onerror) - else: - top = fs.abspath(top) - listing = fs.listdir(top) - - files = [] - subdirs = [] - for item in listing: - full_path = fs.join(top, item) - if fs.isdir(full_path): - subdirs.append(item) - else: - files.append(item) - - here = (top, subdirs, files) - - if topdown: - yield here - - for subdir in subdirs: - joined_subdir = fs.join(top, subdir) - for subitem in walk(joined_subdir, topdown, onerror=onerror): - yield subitem - - if not topdown: - yield here - - -def stat(filename): - """Returns file statistics for a given path.""" - return get_filesystem(filename).stat(filename) - - -def read(file): - with File(file, 'rb') as f: - return f.read() - - -def is_link(path): - return LocalFileSystem.islink(path) - - -def is_too_big_file(filepath): - return stat(filepath).length > MAX_FILE_SIZE - - -def has_too_long_path(filepath): - if platform.system() == 'Windows' and len(filepath) > MAX_WINDOWS_PATH_LENGTH: - logger.warning( - f'The path length of the file "{filepath}" exceeds the maximum limit of {MAX_WINDOWS_PATH_LENGTH} ' - f'and will be skipped.') - return True - elif len(filepath) > MAX_WINDOWS_PATH_LENGTH: - logger.warning( - f'The path length of the file "{filepath}" exceeds the maximum limit of {MAX_LINUX_PATH_LENGTH} ' - f'and will be skipped.') - return True - else: - return False - - -def check_file_valid(filepath): - if is_link(filepath): - logger.warning(f'File "{filepath}" is a soft link and will be skipped.') - return False - if is_too_big_file(filepath): - logger.warning( - f'File "{filepath}" exceeds the maximum limit size of 500MB and will be skipped.') - return False - if has_too_long_path(filepath): - return False - return True - diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/gs.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/gs.py deleted file mode 100644 index 8596bce2b892b7188155d05330a6356a83323eff..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/gs.py +++ /dev/null @@ -1,126 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from google.cloud import storage - -from .. import utils -from .base import BaseFileSystem, RemotePath, StatData - -logger = utils.get_logger() - - -class GoogleBlobSystem(RemotePath, BaseFileSystem): - """Provides filesystem access to S3.""" - - def __init__(self): - if not storage: - raise ImportError('google-cloud-storage must be installed for Google Cloud Blob support.') - - def exists(self, filename): - """Returns whether the path is a directory or not.""" - bucket_name, path = self.bucket_and_path(filename) - client = self.create_google_cloud_client() - bucket = client.bucket(bucket_name) - return bucket.blob(path).exists() - - def read(self, file, binary_mode=False, size=None, continue_from=None): - raise NotImplementedError - - def write(self, filename, file_content, binary_mode=False): - raise NotImplementedError - - def glob(self, filename): - raise NotImplementedError - - def download_file(self, file_to_download, file_to_save): - bucket_name, path = self.bucket_and_path(file_to_download) - client = self.create_google_cloud_client() - bucket = client.bucket(bucket_name) - blob = bucket.blob(path) - blob.download_to_filename(file_to_save) - - def isdir(self, dirname): - """Returns whether the path is a directory or not.""" - basename, parts = self.split_blob_path(dirname) - if basename is None or parts is None: - return False - if basename == '': - # root container case - return True - else: - return basename == parts[0] and len(parts) > 1 - - def listdir(self, dirname): - """Returns a list of entries contained within a directory.""" - bucket_name, path = self.bucket_and_path(dirname) - client = self.create_google_cloud_client() - blobs = client.list_blobs(bucket_name, prefix=path) - items = [] - for blob in blobs: - item = self.relpath(blob.name, path) - if items not in items: - items.append(item) - return items - - def makedirs(self, path): - """No need create directory since the upload blob will automatically create""" - pass - - def stat(self, filename): - """Returns file statistics for a given path.""" - bucket_name, path = self.bucket_and_path(filename) - client = self.create_google_cloud_client() - bucket = client.bucket(bucket_name) - blob = bucket.get_blob(path) - return StatData(blob.size) - - def walk(self, top, topdown=True, onerror=None): - bucket_name, path = self.bucket_and_path(top) - client = self.create_google_cloud_client() - blobs = client.list_blobs(bucket_name, prefix=path) - results = {} - for blob in blobs: - dirname, basename = self.split(blob.name) - dirname = 'gs://{}/{}'.format(bucket_name, dirname) - results.setdefault(dirname, []).append(basename) - for key, value in results.items(): - yield key, None, value - - def split_blob_path(self, blob_path): - """ Find the first blob start with blob_path, then get the relative path starting from dirname(blob_path). - Finally, split the relative path. - return (basename(blob_path), [relative splitted paths]) - If blob_path doesn't exist, return (None, None) - For example, - For blob gs://tests/test1/test2/test.txt - * If the blob_path is '', return ('', [test1, test2, test.txt]) - * If the blob_path is test1, return (test1, [test2, test.txt]) - * If the blob_path is test1/test2, return (test2, [test2, test.txt]) - * If the blob_path is test1/test2/test.txt, return (test.txt, [test.txt]) - """ - bucket_name, path = self.bucket_and_path(blob_path) - client = self.create_google_cloud_client() - blobs = client.list_blobs(bucket_name, prefix=path, delimiter=None, max_results=1) - - for blob in blobs: - dir_path, basename = self.split(path) - if dir_path: - rel_path = blob.name[len(dir_path):] - parts = rel_path.lstrip('/').split('/') - else: - parts = blob.name.split('/') - return (basename, parts) - return (None, None) - - def bucket_and_path(self, url): - """Split an S3-prefixed URL into bucket and path.""" - if url.startswith('gs://'): - url = url[len('gs://'):] - idx = url.index('/') - bucket = url[:idx] - path = url[(idx + 1):] - return bucket, path - - def create_google_cloud_client(self): - client = storage.Client.create_anonymous_client() - return client diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/utils.py deleted file mode 100644 index 79e9afc391bdc41785850e0ebb3522c97cc4ad53..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/io/utils.py +++ /dev/null @@ -1,72 +0,0 @@ -def as_str_any(value): - """Converts to `str` as `str(value)`, but use `as_str` for `bytes`. - - Args: - value: A object that can be converted to `str`. - - Returns: - A `str` object. - """ - if isinstance(value, bytes): - return as_str(value) - else: - return str(value) - - -def as_text(bytes_or_text, encoding="utf-8"): - """Returns the given argument as a unicode string. - - Args: - bytes_or_text: A `bytes`, `str`, or `unicode` object. - encoding: A string indicating the charset for decoding unicode. - - Returns: - A `str` (Python 3) object. - - Raises: - TypeError: If `bytes_or_text` is not a binary or unicode string. - """ - if isinstance(bytes_or_text, str): - return bytes_or_text - elif isinstance(bytes_or_text, bytes): - return bytes_or_text.decode(encoding) - else: - raise TypeError( - "Expected binary or unicode string, got %r" % bytes_or_text - ) - - -# Convert an object to a `str` in both Python 2 and 3. -as_str = as_text - - -def as_bytes(bytes_or_text, encoding="utf-8"): - """Converts either bytes or unicode to `bytes`, using utf-8 encoding for - text. - - Args: - bytes_or_text: A `bytes`, `str`, or `unicode` object. - encoding: A string indicating the charset for encoding unicode. - - Returns: - A `bytes` object. - - Raises: - TypeError: If `bytes_or_text` is not a binary or unicode string. - """ - if isinstance(bytes_or_text, str): - return bytes_or_text.encode(encoding) - elif isinstance(bytes_or_text, bytes): - return bytes_or_text - else: - raise TypeError( - "Expected binary or unicode string, got %r" % (bytes_or_text,) - ) - - -def parse_blob_url(url): - from urllib import parse - url_path = parse.urlparse(url) - - parts = url_path.path.lstrip('/').split('/', 1) - return url_path.netloc, tuple(parts) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/multiprocessing.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/multiprocessing.py deleted file mode 100644 index b71773505c4473934340a0e573ebfcfe3db6f6a4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/multiprocessing.py +++ /dev/null @@ -1,13 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import multiprocessing as mp -import os - - -def get_start_method(): - return os.getenv('TORCH_PROFILER_START_METHOD', 'spawn') - - -__all__ = [x for x in dir(mp.get_context(get_start_method())) if not x.startswith('_')] -globals().update((name, getattr(mp.get_context(get_start_method()), name)) for name in __all__) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py deleted file mode 100644 index 2651f87c087a419c950f93b201606e7601a33a08..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/plugin.py +++ /dev/null @@ -1,651 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import atexit -import copy -import gzip -import json -import os -import shutil -import sys -import tempfile -import threading -import time -from collections import OrderedDict -from queue import Queue - -import werkzeug -from tensorboard.plugins import base_plugin -from werkzeug import exceptions, wrappers - -from . import consts, io, utils -from .profiler import RunLoader -from .run import DistributedRunProfile, Run, RunProfile - -logger = utils.get_logger() - - -def decorate_headers(func): - def wrapper(*args, **kwargs): - headers = func(*args, **kwargs) - headers.extend(TorchProfilerPlugin.headers) - return headers - - return wrapper - - -exceptions.HTTPException.get_headers = decorate_headers(exceptions.HTTPException.get_headers) - - -class TorchProfilerPlugin(base_plugin.TBPlugin): - """TensorBoard plugin for Torch Profiler.""" - - plugin_name = consts.PLUGIN_NAME - headers = [('X-Content-Type-Options', 'nosniff')] - CONTENT_TYPE = 'application/json' - - def __init__(self, context: base_plugin.TBContext): - """Instantiates TorchProfilerPlugin. - Args: - context: A base_plugin.TBContext instance. - """ - super(TorchProfilerPlugin, self).__init__(context) - self.logdir = io.abspath(context.logdir.rstrip('/')) - - self._load_lock = threading.Lock() - self._load_threads = [] - - self._runs = OrderedDict() - self._runs_lock = threading.Lock() - - self._temp_dir = tempfile.mkdtemp() - self._cache = io.Cache(self._temp_dir) - self._queue = Queue() - self._gpu_metrics_file_dict = {} - monitor_runs = threading.Thread(target=self._monitor_runs, name='monitor_runs', daemon=True) - monitor_runs.start() - - receive_runs = threading.Thread(target=self._receive_runs, name='receive_runs', daemon=True) - receive_runs.start() - - self.diff_run_cache = {} - self.diff_run_flatten_cache = {} - - def clean(): - logger.debug('starting cleanup...') - self._cache.__exit__(*sys.exc_info()) - logger.debug('remove temporary cache directory %s' % self._temp_dir) - shutil.rmtree(self._temp_dir) - - atexit.register(clean) - - def is_active(self): - """Returns whether there is relevant data for the plugin to process. - If there is no any pending run, hide the plugin - """ - if self.is_loading: - return True - else: - with self._runs_lock: - return bool(self._runs) - - def get_plugin_apps(self): - return { - '/index.js': self.static_file_route, - '/index.html': self.static_file_route, - '/trace_viewer_full.html': self.static_file_route, - '/trace_embedding.html': self.static_file_route, - '/trace_script.js': self.static_file_route, - '/runs': self.runs_route, - '/views': self.views_route, - '/workers': self.workers_route, - '/spans': self.spans_route, - '/overview': self.overview_route, - '/operation': self.operation_pie_route, - '/operation/table': self.operation_table_route, - '/operation/stack': self.operation_stack_route, - '/kernel': self.kernel_pie_route, - '/kernel/table': self.kernel_table_route, - '/kernel/tc_pie': self.kernel_tc_route, - '/trace': self.trace_route, - '/distributed/gpuinfo': self.dist_gpu_info_route, - '/distributed/overlap': self.comm_overlap_route, - '/distributed/waittime': self.comm_wait_route, - '/distributed/commops': self.comm_ops_route, - '/memory': self.memory_route, - '/memory_curve': self.memory_curve_route, - '/memory_events': self.memory_events_route, - '/module': self.module_route, - '/tree': self.op_tree_route, - '/diff': self.diff_run_route, - '/diffnode': self.diff_run_node_route, - } - - def frontend_metadata(self): - return base_plugin.FrontendMetadata(es_module_path='/index.js', disable_reload=True) - - @wrappers.Request.application - def runs_route(self, request: werkzeug.Request): - with self._runs_lock: - names = list(self._runs.keys()) - - data = { - 'runs': names, - 'loading': self.is_loading - } - return self.respond_as_json(data) - - @wrappers.Request.application - def views_route(self, request: werkzeug.Request): - name = request.args.get('run') - self._validate(run=name) - run = self._get_run(name) - views_list = [view.display_name for view in run.views] - data = { - 'device_target': run.device_target, - 'views': views_list - } - return self.respond_as_json(data) - - @wrappers.Request.application - def workers_route(self, request: werkzeug.Request): - name = request.args.get('run') - view = request.args.get('view') - self._validate(run=name, view=view) - run = self._get_run(name) - # Required datas are in file 'trace_view.json' in Ascend. - if run.device_target == 'Ascend' and view == 'Overview': - view = 'Trace' - return self.respond_as_json(run.get_workers(view)) - - @wrappers.Request.application - def spans_route(self, request: werkzeug.Request): - name = request.args.get('run') - worker = request.args.get('worker') - self._validate(run=name, worker=worker) - run = self._get_run(name) - return self.respond_as_json(run.get_spans(worker)) - - @wrappers.Request.application - def overview_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - name = request.args.get('run') - run = self._get_run(name) - data = profile.overview - is_gpu_used = profile.has_runtime or profile.has_kernel or profile.has_memcpy_or_memset - normal_workers = [worker for worker in run.workers if worker != 'All'] - data['environments'] = [{'title': 'Number of Worker(s)', 'value': str(len(normal_workers))}, - {'title': 'Device Type', 'value': 'GPU' if is_gpu_used else 'CPU'}] - if profile.gpu_summary and profile.gpu_tooltip: - data['gpu_metrics'] = {'title': 'GPU Summary', - 'data': profile.gpu_summary, - 'tooltip': profile.gpu_tooltip} - - return self.respond_as_json(data) - - @wrappers.Request.application - def operation_pie_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - group_by = request.args.get('group_by') - if group_by == 'OperationAndInputShape': - return self.respond_as_json(profile.operation_pie_by_name_input) - else: - return self.respond_as_json(profile.operation_pie_by_name) - - @wrappers.Request.application - def operation_table_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - group_by = request.args.get('group_by') - if group_by == 'OperationAndInputShape': - return self.respond_as_json(profile.operation_table_by_name_input) - else: - return self.respond_as_json(profile.operation_table_by_name) - - @wrappers.Request.application - def operation_stack_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - op_name = request.args.get('op_name') - self._validate(op_name=op_name) - group_by = request.args.get('group_by') - input_shape = request.args.get('input_shape') - if group_by == 'OperationAndInputShape': - return self.respond_as_json(profile.operation_stack_by_name_input[str(op_name) + '###' + str(input_shape)]) - else: - return self.respond_as_json(profile.operation_stack_by_name[str(op_name)]) - - @wrappers.Request.application - def kernel_pie_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - return self.respond_as_json(profile.kernel_pie) - - @wrappers.Request.application - def kernel_table_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - group_by = request.args.get('group_by') - if group_by == 'Kernel': - return self.respond_as_json(profile.kernel_table) - else: - return self.respond_as_json(profile.kernel_op_table) - - @wrappers.Request.application - def kernel_tc_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - return self.respond_as_json(profile.tc_pie) - - @wrappers.Request.application - def trace_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - - if not profile.has_kernel: # Pure CPU. - raw_data = self._cache.read(profile.trace_file_path) - if not profile.trace_file_path.endswith('.gz'): - raw_data = gzip.compress(raw_data, 1) - else: - file_with_gpu_metrics = self._gpu_metrics_file_dict.get(profile.trace_file_path) - if file_with_gpu_metrics: - raw_data = io.read(file_with_gpu_metrics) - else: - raw_data = self._cache.read(profile.trace_file_path) - if not profile.trace_file_path.endswith('.gz'): - raw_data = gzip.compress(raw_data, 1) - - # write the data to temp file - fp = tempfile.NamedTemporaryFile('w+b', suffix='.json.gz', dir=self._temp_dir, delete=False) - fp.close() - # Already compressed, no need to gzip.open - with open(fp.name, mode='wb') as file: - file.write(raw_data) - self._gpu_metrics_file_dict[profile.trace_file_path] = fp.name - - headers = [('Content-Encoding', 'gzip')] - headers.extend(TorchProfilerPlugin.headers) - return werkzeug.Response(raw_data, content_type=TorchProfilerPlugin.CONTENT_TYPE, headers=headers) - - @wrappers.Request.application - def dist_gpu_info_route(self, request: werkzeug.Request): - profile = self._get_distributed_profile_for_request(request) - return self.respond_as_json(profile.gpu_info) - - @wrappers.Request.application - def comm_overlap_route(self, request: werkzeug.Request): - profile = self._get_distributed_profile_for_request(request) - return self.respond_as_json(profile.steps_to_overlap) - - @wrappers.Request.application - def comm_wait_route(self, request: werkzeug.Request): - profile = self._get_distributed_profile_for_request(request) - return self.respond_as_json(profile.steps_to_wait) - - @wrappers.Request.application - def comm_ops_route(self, request: werkzeug.Request): - profile = self._get_distributed_profile_for_request(request) - return self.respond_as_json(profile.comm_ops) - - @wrappers.Request.application - def memory_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - start_ts = request.args.get('start_ts', None) - end_ts = request.args.get('end_ts', None) - memory_metric = request.args.get('memory_metric', 'KB') - if start_ts is not None: - start_ts = int(start_ts) - if end_ts is not None: - end_ts = int(end_ts) - - return self.respond_as_json( - profile.get_memory_stats(start_ts=start_ts, end_ts=end_ts, memory_metric=memory_metric), True) - - @wrappers.Request.application - def memory_curve_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - if profile.device_target == 'Ascend': - return self.respond_as_json(profile.memory_all_curve, True) - else: - time_metric = request.args.get('time_metric', 'ms') - memory_metric = request.args.get('memory_metric', 'MB') - return self.respond_as_json( - profile.get_memory_curve(time_metric=time_metric, memory_metric=memory_metric), True) - - @wrappers.Request.application - def memory_events_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - start_ts = request.args.get('start_ts', None) - end_ts = request.args.get('end_ts', None) - time_metric = request.args.get('time_metric', 'ms') - memory_metric = request.args.get('memory_metric', 'KB') - if profile.device_target == 'Ascend': - temp_memory_events = copy.deepcopy(profile.memory_events) - operator_memory_events = temp_memory_events['operator']['rows'] - if start_ts is not None: - start_ts = float(start_ts) - if end_ts is not None: - end_ts = float(end_ts) - for key in operator_memory_events: - if start_ts is not None and end_ts is not None: - operator_memory_events[key] = [ - i - for i in operator_memory_events[key] - if i[2] and start_ts <= i[2] <= end_ts - ] - elif start_ts is not None: - operator_memory_events[key] = [ - i - for i in operator_memory_events[key] - if i[2] and start_ts <= i[2] - ] - elif end_ts is not None: - operator_memory_events[key] = [ - i - for i in operator_memory_events[key] - if i[2] and end_ts >= i[2] - ] - return self.respond_as_json(temp_memory_events, True) - else: - if start_ts is not None: - start_ts = int(start_ts) - if end_ts is not None: - end_ts = int(end_ts) - return self.respond_as_json( - profile.get_memory_events(start_ts, end_ts, time_metric=time_metric, - memory_metric=memory_metric), True) - - @wrappers.Request.application - def module_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - content = profile.get_module_view() - if content: - return self.respond_as_json(content, True) - else: - name = request.args.get('run') - worker = request.args.get('worker') - span = request.args.get('span') - raise exceptions.NotFound('could not find the run for %s/%s/%s' % (name, worker, span)) - - @wrappers.Request.application - def op_tree_route(self, request: werkzeug.Request): - profile = self._get_profile_for_request(request) - content = profile.get_operator_tree() - return self.respond_as_json(content, True) - - @wrappers.Request.application - def diff_run_route(self, request: werkzeug.Request): - base, exp = self.get_diff_runs(request) - diff_stats = self.get_diff_status(base, exp) - content = diff_stats.get_diff_tree_summary() - return self.respond_as_json(content, True) - - @wrappers.Request.application - def diff_run_node_route(self, request: werkzeug.Request): - base, exp = self.get_diff_runs(request) - path = request.args.get('path', '0') - stats_dict = self.get_diff_stats_dict(base, exp) - diff_stat = stats_dict.get(path) - if diff_stat is None: - raise exceptions.NotFound('could not find diff run for %s' % (path)) - content = diff_stat.get_diff_node_summary(path) - return self.respond_as_json(content, True) - - @wrappers.Request.application - def static_file_route(self, request: werkzeug.Request): - filename = os.path.basename(request.path) - extension = os.path.splitext(filename)[1] - if extension == '.html': - mimetype = 'text/html' - elif extension == '.css': - mimetype = 'text/css' - elif extension == '.js': - mimetype = 'application/javascript' - else: - mimetype = 'application/octet-stream' - filepath = os.path.join(os.path.dirname(__file__), 'static', filename) - try: - with open(filepath, 'rb') as infile: - contents = infile.read() - except IOError as e: - raise exceptions.NotFound('404 Not Found') from e - return werkzeug.Response( - contents, content_type=mimetype, headers=TorchProfilerPlugin.headers - ) - - @staticmethod - def respond_as_json(obj, compress: bool = False): - content = json.dumps(obj) - headers = [] - headers.extend(TorchProfilerPlugin.headers) - if compress: - content_bytes = content.encode('utf-8') - raw_data = gzip.compress(content_bytes, 1) - headers.append(('Content-Encoding', 'gzip')) - return werkzeug.Response(raw_data, content_type=TorchProfilerPlugin.CONTENT_TYPE, headers=headers) - else: - return werkzeug.Response(content, content_type=TorchProfilerPlugin.CONTENT_TYPE, headers=headers) - - @property - def is_loading(self): - with self._load_lock: - return bool(self._load_threads) - - def get_diff_runs(self, request: werkzeug.Request): - name = request.args.get('run') - span = request.args.get('span') - worker = request.args.get('worker') - self._validate(run=name, worker=worker, span=span) - base = self._get_profile(name, worker, span) - - exp_name = request.args.get('exp_run') - exp_span = request.args.get('exp_span') - exp_worker = request.args.get('exp_worker') - self._validate(exp_run=exp_name, exp_worker=exp_worker, exp_span=exp_span) - exp = self._get_profile(exp_name, exp_worker, exp_span) - - return base, exp - - def get_diff_status(self, base: RunProfile, exp: RunProfile): - key = (base, exp) - diff_stats = self.diff_run_cache.get(key) - if diff_stats is None: - diff_stats = base.compare_run(exp) - self.diff_run_cache[key] = diff_stats - - return diff_stats - - def get_diff_stats_dict(self, base: RunProfile, exp: RunProfile): - key = (base, exp) - stats_dict = self.diff_run_flatten_cache.get(key) - if stats_dict is None: - diff_stats = self.get_diff_status(base, exp) - stats_dict = diff_stats.flatten_diff_tree() - self.diff_run_flatten_cache[key] = stats_dict - return stats_dict - - def _monitor_runs(self): - logger.info('Monitor runs begin') - touched = set() - try: - while True: - try: - logger.debug('Scan run dir') - run_dirs = self._get_run_dirs() - - has_dir = False - # Assume no deletion on run directories, trigger async load if find a new run - for run_dir in run_dirs: - has_dir = True - run_device = f'{run_dir["name"]}_{run_dir["device_target"]}' - if run_device not in touched: - touched.add(run_device) - logger.info('Find run directory %s', run_dir['name']) - # Use threading to avoid UI stall and reduce data parsing time - t = threading.Thread(target=self._load_run, args=(run_dir,)) - t.start() - with self._load_lock: - self._load_threads.append(t) - - if not has_dir: - # handle directory removed case. - self._runs.clear() - except Exception as ex: - logger.warning('Failed to scan runs. Exception=%s', ex, exc_info=True) - - time.sleep(consts.MONITOR_RUN_REFRESH_INTERNAL_IN_SECONDS) - except Exception: - logger.exception('Failed to start monitor_runs') - - def _receive_runs(self): - while True: - run: Run = self._queue.get() - if run is None: - continue - - logger.info('Add run %s', run.name) - with self._runs_lock: - is_new = run.name not in self._runs - self._runs[run.name] = run - if is_new: - self._runs = OrderedDict(sorted(self._runs.items())) - - def _get_run_dirs(self): - """Scan logdir, find PyTorch Profiler run directories. - A directory is considered to be a gpu run if it contains 1 or more *.pt.trace.json[.gz]. - E.g. there are 2 runs: run1, run2 - /run1 - /[worker1].pt.trace.json.gz - /[worker2].pt.trace.json.gz - /run2 - /[worker1].pt.trace.json - A directory is considered to be an ascend run if it satisfies the following two conditions: - 1.At least one subdirectory with the name in this format: {worker}_{span}_ascend_pt. - 2.The subdirectory in condition 1 has a 'ASCEND_PROFILER_OUTPUT' subdirectory which - contains at least one of these 4 kind of files: - [ - 'trace_view.json(.gz)', - 'kernel_details.csv', - 'operator_details.csv', - 'operator_memory.csv' & 'memory_record.csv' - ] - E.g. there are 2 runs: run1, run2 - /run1 - /[worker1]_[span1]_ascend_pt - /ASCEND_PROFILER_OUTPUT - /trace_view.json - /kernel_details.csv - /operator_details.csv - /operator_memory.csv - /memory_record.csv - /[worker2]_[span1]_ascend_pt - /ASCEND_PROFILER_OUTPUT - /trace_view.json - /operator_details.csv - /run2 - /[worker1]_[span1]_ascend_pt - /ASCEND_PROFILER_OUTPUT - /memory_record.csv - /operator_memory.csv - """ - for root, subdirs, files in io.walk(self.logdir): - for subdir in subdirs: - if str(subdir) == 'ASCEND_PROFILER_OUTPUT': - match = consts.WORKER_SPAN_PATTERN.match(io.basename(root)) - if match is not None: - run_name = io.abspath(io.join(root, '..')) - yield {'name': run_name, 'device_target': 'Ascend'} - break - - for file in files: - if utils.is_gpu_chrome_trace_file(file): - yield {'name': root, 'device_target': 'GPU'} - break - - def _load_run(self, run_dir): - try: - name = self._get_run_name(run_dir['name']) - logger.info('Load run %s', name) - # Currently, assume run data is immutable, so just load once - loader = RunLoader(name, run_dir['name'], self._cache, run_dir['device_target']) - run = loader.load() - if run.profiles: - self._queue.put(run) - logger.info('Run %s loaded', name) - else: - logger.warning(f'Run {name} skipped') - except Exception as ex: - logger.warning('Failed to load run %s. Exception=%s', ex, name, exc_info=True) - - t = threading.current_thread() - with self._load_lock: - try: - self._load_threads.remove(t) - except ValueError: - logger.warning('could not find the thread {}'.format(run_dir)) - - def _get_run(self, name) -> Run: - with self._runs_lock: - run = self._runs.get(name, None) - - if run is None: - raise exceptions.NotFound('could not find the run for %s' % (name)) - - return run - - def _get_run_name(self, run_dir): - logdir = io.abspath(self.logdir) - if run_dir == logdir: - name = io.basename(run_dir) - else: - name = io.relpath(run_dir, logdir) - return name - - def _get_profile_for_request(self, request: werkzeug.Request) -> RunProfile: - name = request.args.get('run') - span = request.args.get('span') - worker = request.args.get('worker') - self._validate(run=name, worker=worker) - profile = self._get_profile(name, worker, span) - if not isinstance(profile, RunProfile): - raise exceptions.BadRequest('Get an unexpected profile type %s for %s/%s' % (type(profile), name, worker)) - - return profile - - def _get_distributed_profile_for_request(self, request: werkzeug.Request) -> DistributedRunProfile: - name = request.args.get('run') - span = request.args.get('span') - self._validate(run=name) - profile = self._get_profile(name, 'All', span) - if not isinstance(profile, DistributedRunProfile): - raise exceptions.BadRequest('Get an unexpected distributed profile type %s for %s' % (type(profile), name)) - - return profile - - def _get_profile(self, name, worker, span): - run = self._get_run(name) - profile = run.get_profile(worker, span) - if profile is None: - raise exceptions.NotFound('could not find the profile for %s/%s/%s ' % (name, worker, span)) - return profile - - def _validate(self, **kwargs): - for name, v in kwargs.items(): - if v is None: - raise exceptions.BadRequest('Must specify %s in request url' % (name)) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/__init__.py deleted file mode 100644 index 59a0e64155546ce75e1c4607cf35c3144a28271f..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -__all__ = ['RunLoader'] - -from .loader import RunLoader diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/communication.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/communication.py deleted file mode 100644 index 0afcdb11a66f89b8a448713bf140e3293db7e503..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/communication.py +++ /dev/null @@ -1,95 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from typing import Dict, List, Tuple - -from .. import utils -from .node import CommunicationNode -from .range_utils import get_ranges_sum, merge_ranges - -logger = utils.get_logger() - - -def generate_communication_nodes( - communication_data: Dict[int, CommunicationNode], - steps: List[Tuple[int, int]], - steps_names: List[str]): - comm_node_list: List[CommunicationNode] = [] - - # Sort the communication node according the start time, this is for correlating communication node between workers - for comm_node in communication_data.values(): - comm_node.kernel_ranges.sort(key=lambda x: (x[0], -x[1])) - comm_node_list.append(comm_node) - comm_node_list.sort(key=lambda x: (x.start_time, -x.end_time)) - - # Find each communication node belong to which step - index = 0 - valid_steps = len(steps) - for comm_node in comm_node_list: - while index < valid_steps: - if comm_node.start_time >= steps[index][0] and comm_node.end_time <= steps[index][1]: - comm_node.step_name = steps_names[index] - break - elif comm_node.start_time >= steps[index][1]: - index += 1 - else: - logger.error('Found a communication op not belong to any step.') - break - if index >= valid_steps: - logger.error('Found communication ops not belong to any step. ') - break - - return comm_node_list - - -def analyze_communication_nodes(comm_node_list: List[CommunicationNode])\ - -> Tuple[Dict[str, Tuple[int, int]], Dict[str, List[int]]]: - step_comm_stats: Dict[str, Tuple[int, int]] = {} - total_comm_stats: Dict[str, Tuple[int, int, List, List]] = {} - - step_to_comm_ranges: Dict[str, Tuple[List, List]] = {} - for comm_node in comm_node_list: - if comm_node.step_name not in step_to_comm_ranges: - step_to_comm_ranges[comm_node.step_name] = [[], []] - step_to_comm_ranges[comm_node.step_name][0].extend(comm_node.kernel_ranges) - step_to_comm_ranges[comm_node.step_name][1].extend(comm_node.real_time_ranges) - - if comm_node.name not in total_comm_stats: - total_comm_stats[comm_node.name] = [0, 0, [], []] - total_comm_stats[comm_node.name][0] += 1 - bytes_one_value = 0 - if comm_node.input_shape: - for i, shape in enumerate(comm_node.input_shape): - if comm_node.input_type[i] == 'long int': - bytes_one_value = 8 - elif comm_node.input_type[i] == 'float': - bytes_one_value = 4 - elif comm_node.input_type[i] == 'int': - bytes_one_value = 4 - elif comm_node.input_type[i] == 'c10::Half': - bytes_one_value = 2 - elif comm_node.input_type[i] == 'c10:BFloat16': - bytes_one_value = 2 - elif comm_node.input_type[i] == 'unsigned char': - bytes_one_value = 1 - else: - logger.warning('Found an unknown tensor type: {}'.format(comm_node.input_type[i])) - bytes_one_value = 0 - total_size = 1 - for size in shape: - total_size *= size - total_comm_stats[comm_node.name][1] += total_size * bytes_one_value - total_comm_stats[comm_node.name][2].extend(comm_node.kernel_ranges) - total_comm_stats[comm_node.name][3].extend(comm_node.real_time_ranges) - - for step, comm_ranges in step_to_comm_ranges.items(): - step_comm_stats[step] = [ - get_ranges_sum(merge_ranges(comm_ranges[0])), - get_ranges_sum(merge_ranges(comm_ranges[1])) - ] - - for _, stats in total_comm_stats.items(): - stats[2] = get_ranges_sum(merge_ranges(stats[2])) - stats[3] = get_ranges_sum(merge_ranges(stats[3])) - - return step_comm_stats, total_comm_stats diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py deleted file mode 100644 index 00544e635340c556d5346fc307bb29913c08929c..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/data.py +++ /dev/null @@ -1,488 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import gzip -import io as sysio -import json -import math -import os.path -import re -import tempfile -from json.decoder import JSONDecodeError -from typing import Dict, List, Optional -from configparser import ConfigParser - -from .op_tree import OpTreeBuilder -from .. import io, utils -from ..consts import InputFilesType, INPUT_FILE_LIST -from ..utils import href -from . import trace -from .communication import analyze_communication_nodes -from .event_parser import CommLibTypes, EventParser, ProfileRole -from .gpu_metrics_parser import GPUMetricsParser -from .kernel_parser import KernelParser -from .memory_parser import MemoryParser, MemorySnapshot -from .node import OperatorNode -from .op_agg import ModuleAggregator -from .overall_parser import OverallParser -from .tensor_cores_parser import TensorCoresParser -from .trace import BaseEvent, EventTypes, MemoryEvent - -logger = utils.get_logger() -config = ConfigParser() -config_path = os.path.join(os.getcwd(), 'torch_tb_profiler', 'config', '../config/config.ini') -config.read(config_path) - - -class RunProfileData(object): - def __init__(self, worker: str, span: str, trace_json: Dict): - self.worker = worker - self.span = span - - # metadatas - self.is_pytorch_lightning = trace_json.get('Framework', None) == 'pytorch-lightning' - self.data_schema_version = trace_json.get('schemaVersion', None) - self.distributed_info = trace_json.get('distributedInfo', None) - self.device_props = trace_json.get('deviceProperties', None) - - self.profiler_start_ts = float('inf') - self.events: List[BaseEvent] = [] - - trace_body = trace_json.get('traceEvents', None) - fwd_bwd_events = [] - if trace_body is not None: - for data in trace_body: - if data.get('ts') is not None: - try: - self.profiler_start_ts = min(self.profiler_start_ts, float(data.get('ts'))) - except ValueError: - logger.warning(f'The operator {data.get("name")} has wrong "ts" format, expected a number.') - if data.get('cat') == 'forward_backward': - fwd_bwd_events.append(data) - else: - event = trace.create_event(data, self.is_pytorch_lightning) - if event is not None: - event.ts = float(event.ts) - self.events.append(event) - - self.events.sort(key=lambda e: e.ts) - self.forward_backward_events = trace.create_association_events(fwd_bwd_events) - - self.trace_file_path: str = None - self.kernel_file_path: str = None - self.kernel_statistic_path: str = None - - # Event Parser results - self.tid2tree: Dict[int, OperatorNode] = None - self.pl_tid2tree: Dict[int, OperatorNode] = None - self.used_devices = [] - self.use_dp: bool = False - self.use_ddp: bool = False - self.comm_lib = None - self.has_runtime: bool = False - self.has_kernel: bool = True - self.has_trace: bool = True - self.has_communication: bool = False - self.has_memcpy_or_memset: bool = False - self.role_ranges = None - self.steps_costs = None - self.steps_names = None - self.avg_costs = None - self.has_memory: bool = False - self.has_operator_view: bool = False - - # GPU parser - self.gpu_metrics_parser: GPUMetricsParser = None - - # Operator aggregator - self.op_list_groupby_name = None - self.op_list_groupby_name_input = None - self.stack_lists_group_by_name = None - self.stack_lists_group_by_name_input = None - self.kernel_list_groupby_name_op = None - - # Kernel and Tensor Core - self.kernel_stat = None - self.tc_ratio = None - self.tc_eligible_ops_kernel_ratio = None - self.tc_used_ratio = None # If it's a pure CPU run, then this keeps as None. - - # Communicator - self.comm_node_list = None - self.comm_overlap_costs = None - self.memory_snapshot: Optional[MemorySnapshot] = None - - # recommendation based on analysis result. - self.recommendations = [] - - # npu memory data - self.memory_operator_path: str = None - self.memory_curve_path: str = None - self.memory_component_path: str = None - - # npu operator data - self.operator_path: str = None - - # npu communication data - self.distributed_csv_path: str = None - self.communication_json_path: str = None - - self.step_to_overlap = None - self.step_to_wait = None - self.comm_op = None - - @staticmethod - def parse_gpu(worker, span, path, cache_dir): - trace_path, trace_json = RunProfileData._preprocess_file(path, cache_dir, 'GPU') - if not trace_json: - return None - - profile = RunProfileData.from_json(worker, span, trace_json) - profile.trace_file_path = trace_path - return profile - - @staticmethod - def parse_npu(worker, span, path, cache_dir): - trace_json = {} - trace_path = path - has_trace = False - has_kernel = False - has_memory_record = False - has_memory_operator = False - has_communication_overlap = False - has_communication_wait_ops = False - - for file in io.listdir(path): - if utils.is_npu_trace_path(file) and io.check_file_valid(io.join(path, file)): - has_trace = True - trace_file = io.join(path, file) - trace_path, trace_json = RunProfileData._preprocess_file(trace_file, cache_dir, 'Ascend') - break - - profile = RunProfileData(worker, span, trace_json) - with utils.timing('EventParser.parse'): - parser = EventParser() - with utils.timing('EventParser: parse nodes'): - tid2list, tid2zero_rt_list, staled_device_nodes, _ = parser.parse_nodes(profile.events) - - with utils.timing('EventParser: build operator tree'): - builder = OpTreeBuilder() - profile.tid2tree = builder.build_tree(tid2list, tid2zero_rt_list, staled_device_nodes, - fwd_bwd_map=profile.forward_backward_events, is_ascend=True) - profile.trace_file_path = trace_path - profile.has_trace = has_trace - if math.isinf(profile.profiler_start_ts): - profile.profiler_start_ts = 0 - - for file in io.listdir(path): - if str(file) in INPUT_FILE_LIST and io.check_file_valid(io.join(path, file)): - if InputFilesType(file) == InputFilesType.KERNEL_DETAILS_CSV: - has_kernel = True - profile.kernel_file_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.MEMORY_RECORD_CSV: - has_memory_record = True - profile.memory_curve_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.MEMORY_OPERATOR_CSV: - has_memory_operator = True - profile.memory_operator_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.MEMORY_COMPONENT_CSV: - profile.memory_component_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.OPERATOR_DETAILS_CSV: - profile.has_operator_view = True - profile.operator_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.DISTRIBUTED_STEP_CSV: - has_communication_overlap = True - profile.distributed_csv_path = io.join(path, file) - if InputFilesType(file) == InputFilesType.DISTRIBUTED_COMMUNICATION_JSON: - has_communication_wait_ops = True - profile.communication_json_path = io.join(path, file) - - profile.has_kernel = has_kernel - profile.has_memory = has_memory_operator and has_memory_record - profile.has_communication = has_communication_wait_ops and has_communication_overlap - if profile.has_communication: - with utils.timing('EventParser.parse'): - parser = EventParser() - with utils.timing('EventParser: parse steps times'): - # Process steps - parser.parse_steps(profile.events, parser.communication_data) - - profile.steps_names = parser.steps_names - return profile - - @staticmethod - def from_json(worker, span, trace_json: Dict): - profile = RunProfileData(worker, span, trace_json) - with utils.timing('Data processing'): - profile.process() - profile.analyze() - return profile - - @staticmethod - def _preprocess_file(trace_path, cache_dir, device_target): - if not io.exists(trace_path): - raise FileNotFoundError(trace_path) - - data = io.read(trace_path) - if trace_path.endswith('.gz'): - data = gzip.decompress(data) - - json_reencode = False - try: - trace_json = json.loads(data) - except JSONDecodeError as e: - # Kineto may export json file with non-ascii code. before this is fixed, use a workaround - # to handle JSONDecodeError, re-encode it and save to a temp file - try: - trace_json = json.loads(data, strict=False) - except JSONDecodeError: - with sysio.StringIO() as fout: - str_data = data.decode('utf-8') - # only replace the N/A without surrounding double quote - fout.write(re.sub(r'(? 24 * 3600 * 1000: - del trace_json['traceEvents'][end_index] - json_reencode = True - - if json_reencode: - fp = tempfile.NamedTemporaryFile('w+t', suffix='.json.gz', dir=cache_dir, delete=False) - fp.close() - with gzip.open(fp.name, mode='wt') as fzip: - fzip.write(json.dumps(trace_json)) - trace_path = fp.name - - return trace_path, trace_json - - def process(self): - with utils.timing('EventParser.parse'): - parser = EventParser() - self.tid2tree, self.pl_tid2tree = parser.parse(self.events, self.forward_backward_events) - - self.has_runtime = parser.has_runtime - self.has_kernel = parser.has_kernel - self.has_memcpy_or_memset = parser.has_memcpy_or_memset - self.steps_names = parser.steps_names - self.used_devices = sorted(list(parser.used_devices)) - self.use_dp = parser.use_dp - self.use_ddp = parser.use_ddp - self.role_ranges = parser.role_ranges - - self.comm_lib = parser.comm_lib - self.has_communication = parser.has_communication - self.comm_node_list = parser.comm_node_list - - # Starting aggregate - logger.debug('ModuleAggregator') - with utils.timing('ModuleAggregator aggegation'): - module_aggregator = ModuleAggregator() - module_aggregator.aggregate(self.tid2tree) - self.op_list_groupby_name = module_aggregator.op_list_groupby_name - self.op_list_groupby_name_input = module_aggregator.op_list_groupby_name_input - self.stack_lists_group_by_name = module_aggregator.stack_lists_group_by_name - self.stack_lists_group_by_name_input = module_aggregator.stack_lists_group_by_name_input - self.kernel_list_groupby_name_op = module_aggregator.kernel_list_groupby_name_op - - logger.debug('OverallParser') - with utils.timing('OverallParser aggegation'): - overall_parser = OverallParser() - overall_parser.aggregate(parser.steps, parser.role_ranges) - self.avg_costs = overall_parser.avg_costs - self.steps_costs = overall_parser.steps_costs - self.comm_overlap_costs = overall_parser.communication_overlap - - logger.debug('GPUMetricsParser') - self.gpu_metrics_parser = GPUMetricsParser.parse_events( - self.events, parser.global_start_ts, parser.global_end_ts, parser.steps[0][0], parser.steps[-1][1]) - - logger.debug('TensorCoresParser') - tensorcores_parser = TensorCoresParser.parse_events( - self.tid2tree, module_aggregator.ops, self.gpu_metrics_parser.gpu_ids) - self.tc_eligible_ops_kernel_ratio = tensorcores_parser.tc_eligible_ops_kernel_ratio - self.tc_ratio = tensorcores_parser.tc_ratio - - if self.has_kernel: - logger.debug('KernelParser') - with utils.timing('parse kernels'): - kernel_parser = KernelParser() - kernel_parser.parse_events(self.events) - self.kernel_stat = kernel_parser.kernel_stat - self.tc_used_ratio = kernel_parser.tc_used_ratio - - memory_events = self._memory_events() - if memory_events: - memory_parser = MemoryParser(memory_events) - self.memory_snapshot = memory_parser.find_memory_nodes(self.tid2tree) - - def analyze(self): - self.recommendations = [] - - dataloader_ratio = self.avg_costs.costs[ProfileRole.DataLoader] / self.avg_costs.costs[ProfileRole.Total] - if dataloader_ratio > 0.05: - percentage = dataloader_ratio * 100 - url = config.get('URL', 'pytorch_data_loading_url') - self.recommendations.append( - f'This run has high time cost on input data loading. {percentage:.1f}% of the step ' + - "time is in DataLoader. You could try to set num_workers on DataLoader's construction " + - f"and {href('enable multi-processes on data loading', url)}." - ) - - self._analyze_distributed_metrics() - self._analyze_gpu_metrics() - - if self.device_props: - # Tensor Cores feature is available on GPU cards with compute capability >= 7.0 - major = self.device_props[0].get('computeMajor') - # If it's a pure CPU run, then self.tc_used_ratio is None, this rule will not be triggered. - if major is not None and major >= 7: - if math.isclose(self.tc_used_ratio, 0.0) and self.tc_eligible_ops_kernel_ratio > 0.0: - url = config.get('URL', 'pytorch_amp_url') - self.recommendations.append( - f'Kernels with {round(self.tc_eligible_ops_kernel_ratio * 100)}%' - ' time are launched by Tensor Cores eligible operators. ' - f"You could enable {href('Automatic Mixed Precision', url)} to speedup by using FP16.") - - # Memory related - if self.memory_snapshot: - for (dev_type, dev_id), peak_mem in self.memory_snapshot.get_peak_memory().items(): - if dev_type == -1: # ignore cpu - continue - total_mem = self.device_props[dev_id].get('totalGlobalMem') - if total_mem is not None and peak_mem > total_mem * 0.9: - percentage = peak_mem / total_mem * 100 if total_mem > 0 else 0 - total_mem_gb = total_mem / 1024 / 1024 / 1024 - ckp_url = config.get('URL', 'pytorch_ckp_url') - amp_url = config.get('URL', 'pytorch_amp_url') - self.recommendations.append( - f'Device memory usage is at the limit of device memory capacity ' - f'({percentage:.1f}% of {total_mem_gb:.1f}GB on GPU{dev_id}). ' - 'To get better value of your GPU or to use larger batch size for training, please refer to ' - f"{href('Gradient Checkpoint', ckp_url)} or {href('Automatic Mixed Precision', amp_url)}.") - break - - def _analyze_distributed_metrics(self): - if self.use_dp and len(self.used_devices) > 1: - url = config.get('URL', 'cuda_nn_ddp_instead_url') - self.recommendations.append( - f"It is recommended to {href('use DistributedDataParallel instead of DataParallel', url)}" - ' to do multi-GPU training.') - - if self.use_ddp and CommLibTypes.Nccl not in self.comm_lib and self.device_props: - for device_prop in self.device_props: - major = device_prop.get('computeMajor') - minor = device_prop.get('computeMinor') - if major is None or minor is None: - continue - compute_capability = '{}.{}'.format(major, minor) - if float(compute_capability) >= 3.5: - text = ( - 'Nccl backend is currently the fastest and highly recommended backend' - ' when using DDP for training.') - self.recommendations.append(text) - break - - communication_ratio = self.avg_costs.costs[ProfileRole.Communication] / self.avg_costs.costs[ProfileRole.Total] - if communication_ratio > 0.1: - percentage = communication_ratio * 100 - compress_url = config.get('URL', 'compress_url') - grad_acc_url = config.get('URL', 'grad_acc_url') - lamb_url = config.get('URL', 'lamb_url') - self.recommendations.append( - f'This run has high time cost on communication. {percentage:.1f}% of the step time is in ' - f"communication. You could try {href('Gradient Compression', compress_url)} or " - f"{href('Gradient Accumulation', grad_acc_url)} or increase the batch size. " - 'Note: Gradient accumulation will increase global effective batch size, which may hurt model ' - f"convergence and accuracy. For such case, you may want to evaluate {href('LAMB optimizer', lamb_url)}." - ) - - def _memory_events(self) -> List[MemoryEvent]: - memory_events = [e for e in self.events if e.type == EventTypes.MEMORY] - memory_events.sort(key=lambda e: e.ts) - return memory_events - - def _analyze_gpu_metrics(self): - def get_gpus_str(gpus): - gpu_list_str = str(gpus[0]) - for i in range(1, len(gpus)): - if i == len(gpus) - 1: - gpu_list_str += 'and {}'.format(gpus[i]) - else: - gpu_list_str += ', {}'.format(gpus[i]) - has_str = 'has' if len(gpu_list_str) == 1 else 'have' - return gpu_list_str, has_str - - low_util_gpus = [] - for gpu_id in self.gpu_metrics_parser.gpu_ids: - if self.gpu_metrics_parser.gpu_utilization[gpu_id] < 0.5: - low_util_gpus.append(gpu_id) - if len(low_util_gpus) > 0: - gpu_list_str, has_str = get_gpus_str(low_util_gpus) - text = 'GPU {} {} low utilization. You could try to ' \ - 'increase batch size to improve. Note: Increasing batch size ' \ - 'may affect the speed and stability of model convergence.'.format(gpu_list_str, has_str) - self.recommendations.append(text) - - -class DistributedRunProfileData: - def __init__(self, run_profile_data: RunProfileData): - self.worker = run_profile_data.worker - self.span = run_profile_data.span - self.steps_names = run_profile_data.steps_names - self.has_communication = run_profile_data.has_communication - self.comm_lib = run_profile_data.comm_lib - self.comm_node_list = run_profile_data.comm_node_list - self.comm_overlap_costs = run_profile_data.comm_overlap_costs - self.used_devices = run_profile_data.used_devices - self.device_props = run_profile_data.device_props - self.distributed_info = run_profile_data.distributed_info - - self.step_to_overlap = run_profile_data.step_to_overlap - self.step_to_wait = run_profile_data.step_to_wait - self.comm_op = run_profile_data.comm_op - - self.total_comm_stats = None - self.step_comm_stats = None - - def communication_parse(self): - self.step_comm_stats, self.total_comm_stats = analyze_communication_nodes(self.comm_node_list) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/__init__.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/__init__.py deleted file mode 100644 index 3dae52e351eaa75fc65788904de951cc68aba4ab..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .contract import DiffStats, OpAgg -from .tree import (DiffNode, compare_op_tree, diff_summary, print_node, - print_ops) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/contract.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/contract.py deleted file mode 100644 index ce0cba35de7b752ea6dab966fd7febec77bf6f02..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/contract.py +++ /dev/null @@ -1,99 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from collections import namedtuple -from typing import Dict, List - -OpAgg = namedtuple('OpAgg', [ - 'name', - 'calls', - 'host_duration', - 'device_duration', - 'self_host_duration', - 'self_device_duration']) - - -class OpStats: - def __init__(self, - name, - duration, - device_duration, - total_duration, - aggs: List[OpAgg]): - self.name = name - self.duration = duration - self.device_duration = device_duration - self.total_duration = total_duration - self.op_aggs = aggs - - def __str__(self) -> str: - return f'{self.name}: {self.duration}/{self.device_duration}/{len(self.op_aggs)}' - - -class DiffStats: - def __init__(self, left: OpStats, right: OpStats): - self.left = left - self.right = right - self.children: List[DiffStats] = [] - - def flatten_diff_tree(self) -> Dict[str, 'DiffStats']: - result: Dict[str, DiffStats] = {} - - def traverse(node: DiffStats, path: str): - result[path] = node - for i, child in enumerate(node.children): - traverse(child, f'{path}-{i}') - - traverse(self, '0') - return result - - def to_dict(self): - d = { - 'left': { - 'name': self.left.name, - 'duration': self.left.duration, - 'device_duration': self.left.device_duration, - 'total_duration': self.left.total_duration, - 'aggs': [] - }, - 'right': { - 'name': self.right.name, - 'duration': self.right.duration, - 'device_duration': self.right.device_duration, - 'total_duration': self.right.total_duration, - 'aggs': [] - } - } - - for agg in self.left.op_aggs: - d['left']['aggs'].append(agg._asdict()) - - for agg in self.right.op_aggs: - d['right']['aggs'].append(agg._asdict()) - - return d - - def get_diff_tree_summary(self): - def traverse_node_recursive(node: DiffStats): - d = node.to_dict() - - d['children'] = [] - for c in node.children: - d['children'].append(traverse_node_recursive(c)) - - return d - - return traverse_node_recursive(self) - - def get_diff_node_summary(self, path: str): - def traverse_node(node: DiffStats, path: str): - d = node.to_dict() - d['path'] = path - return d - - d = traverse_node(self, path) - d['children'] = [] - for i, c in enumerate(self.children): - d['children'].append(traverse_node(c, f'{path}-{i}')) - - return d diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py deleted file mode 100644 index 4434c65ad6ea575bd5deb010e0bde9f7a8d24a9d..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/operator.py +++ /dev/null @@ -1,124 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from abc import ABCMeta -from typing import List, Tuple, Union - -from ..node import DeviceNode, OperatorNode -from ..op_agg import aggregate_ops -from .contract import OpAgg - - -class Operator(metaclass=ABCMeta): - def __init__(self, name) -> None: - self.name: str = name - - def __str__(self) -> str: - return f'{self.name}: {self.duration}' - - @property - def duration(self) -> int: - return 0 - - @property - def device_duration(self) -> int: - return 0 - - @property - def total_duration(self): - return self.device_duration or self.duration - - def aggregate_ops(self): - ops, _ = self.get_operators_and_kernels() - agg_result = aggregate_ops(ops, [lambda x: x.name])[0] - for agg in agg_result.values(): - yield OpAgg( - agg.name, - agg.calls, - round(agg.host_duration, 3), - round(agg.device_duration, 3), - round(agg.self_host_duration, 3), - round(agg.self_device_duration, 3)) - - def get_operators_and_kernels(self) -> Tuple[List[OperatorNode], List[DeviceNode]]: - return [], [] - - -class BlankOp(Operator): - def __init__(self) -> None: - super().__init__('Blank') - - -class UnknownOp(Operator): - def __init__(self, device_duration: int, duration: int) -> None: - super().__init__('Unknown') - self.device_duration = device_duration - self.duration = duration - - @property - def duration(self) -> int: - return self.duration - - @property - def device_duration(self) -> int: - return self.device_duration - - -class Operators(Operator): - def __init__(self, nodes: Union[OperatorNode, List[OperatorNode]]): - if not nodes: - raise ValueError('the operator node is None or empty') - if isinstance(nodes, OperatorNode): - super().__init__(nodes.name) - elif isinstance(nodes, list): - super().__init__('CompositeNodes') - - self.op_nodes: Union[OperatorNode, List[OperatorNode]] = nodes - - @property - def duration(self): - if isinstance(self.op_nodes, list): - return sum(n.duration for n in self.op_nodes) - else: - return self.op_nodes.duration - - @property - def device_duration(self): - if isinstance(self.op_nodes, list): - return sum(n.device_duration for n in self.op_nodes) - else: - return self.op_nodes.device_duration - - @property - def total_duration(self): - if isinstance(self.op_nodes, list): - return sum(n.device_duration or n.duration for n in self.op_nodes) - else: - return self.op_nodes.device_duration or self.op_nodes.duration - - def __str__(self) -> str: - if isinstance(self.op_nodes, list): - return f'{self.name}: {len(self.op_nodes)}: {self.op_nodes[0].name}: {self.total_duration}' - else: - return f'{self.name}: {self.op_nodes.__class__.__name__}: {self.total_duration}' - - def get_operators_and_kernels(self) -> Tuple[List[OperatorNode], List[DeviceNode]]: - if isinstance(self.op_nodes, list): - nodes = self.op_nodes - else: - nodes = [self.op_nodes] - - ops: List[OperatorNode] = [] - kernels: List[DeviceNode] = [] - for n in nodes: - op, k = n.get_operator_and_kernels() - ops.extend(op) - kernels.extend(k) - return ops, kernels - - -def create_operator(op_nodes: Union[OperatorNode, List[OperatorNode]]) -> Operator: - if op_nodes: - return Operators(op_nodes) - else: - return BlankOp() diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py deleted file mode 100644 index c5cf5fad448122c74db46467cb0c70b8ce4f727e..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/diffrun/tree.py +++ /dev/null @@ -1,163 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import sys -from typing import Generator, List, Union - -from ..node import (BackwardNode, DataLoaderNode, ModuleNode, OperatorNode, - OptimizerNode, ProfilerStepNode) -from .contract import DiffStats, OpStats -from .operator import Operator, Operators, create_operator - -INDENT = ' ' -RUN_NODE_TYPES = (BackwardNode, DataLoaderNode, ModuleNode, OptimizerNode, ProfilerStepNode) - - -class DiffNode: - def __init__(self, left: Operator, right: Operator): - self.left: Operator = left - self.right: Operator = right - self.children: List[DiffNode] = [] - - def build_tree(self): - """build the children from the left_node and right_node""" - if not isinstance(self.left, Operators) or not isinstance(self.right, Operators): - return - - if isinstance(self.left.op_nodes, OperatorNode) and isinstance(self.right.op_nodes, OperatorNode): - # simple node match. - diff_nodes = list(DiffNode.compare_operator_nodes( - self.left.op_nodes.children, self.right.op_nodes.children)) - if diff_nodes: - self.children.extend(diff_nodes) - elif isinstance(self.left.op_nodes, list) and isinstance(self.right.op_nodes, list): - # compare two list - diff_nodes = list(DiffNode.compare_operator_nodes(self.left.op_nodes, self.right.op_nodes)) - if diff_nodes: - self.children.extend(diff_nodes) - else: - # one single item and one list - pass - - @staticmethod - def create_node( - left: Union[OperatorNode, List[OperatorNode]], - right: Union[OperatorNode, List[OperatorNode]]) -> 'DiffNode': - if isinstance(left, list) and len(left) == 1: - left = left[0] - if isinstance(right, list) and len(right) == 1: - right = right[0] - - node = DiffNode(create_operator(left), create_operator(right)) - node.build_tree() - return node - - @staticmethod - def compare_operator_nodes( - left_nodes: List[OperatorNode], - right_nodes: List[OperatorNode]) -> Generator['DiffNode', None, None]: - """Given two OperatorNode lists, find the DataLoader/Module/Backward/Optimizer node and - create the child list DiffNode - """ - right_keys = [(type(r), r.name) for r in right_nodes] - - # find matching points in the two list - matched_paris = [] - key_index = 0 - for i, left_node in enumerate(left_nodes): - if not isinstance(left_node, RUN_NODE_TYPES): - # only handle DataLoader/Module/Backward/Optimizer nodes - continue - - for j in range(key_index, len(right_keys)): - if right_keys[j] == (type(left_node), left_node.name): - matched_paris.append((i, j)) - key_index = j + 1 - break - - if not matched_paris: - # there is not any matching points. - return - - # split the two list by the matching points - l_iter = 0 - r_iter = 0 - - for (left, r) in matched_paris: - left_child = left_nodes[l_iter:left] - right_child = right_nodes[r_iter:r] - if left_child or right_child: - yield DiffNode.create_node(left_child, right_child) - - yield DiffNode.create_node(left_nodes[left], right_nodes[r]) - l_iter = left + 1 - r_iter = r + 1 - # end time is bigger than threshold. - # Or do we need move the logic into frondend for visualization? - - # process the remaining nodes - left_remaining = left_nodes[l_iter:] - right_remaining = right_nodes[r_iter:] - if left_remaining or right_remaining: - yield DiffNode.create_node(left_remaining, right_remaining) - - -def compare_op_tree(left: OperatorNode, right: OperatorNode) -> DiffNode: - '''Create the diff tree from two root node - TODO: need handle the different threads case - need add runtimes besides of children? - ''' - left_children = list(get_tree_operators(left)) - right_children = list(get_tree_operators(right)) - return DiffNode.create_node(left_children, right_children) - - -def get_tree_operators(root: OperatorNode) -> Generator[OperatorNode, None, None]: - '''Get the operators by the root operators by excluding the ProfilerStepNode - ''' - profiler_nodes = [c for c in root.children if isinstance(c, ProfilerStepNode)] - if not profiler_nodes: - # there is no ProfilerStepNode at all - yield from root.children - else: - yield from (child for p in profiler_nodes for child in p.children) - - -def diff_summary(node: DiffNode) -> DiffStats: - if not node: - return None - - left = OpStats( - node.left.name, - node.left.duration, - node.left.device_duration, - node.left.total_duration, - list(node.left.aggregate_ops())) - right = OpStats( - node.right.name, - node.right.duration, - node.right.device_duration, - node.right.total_duration, - list(node.right.aggregate_ops())) - - stats = DiffStats(left, right) - for child in node.children: - stats.children.append(diff_summary(child)) - - return stats - - -def print_node(node: Union[DiffNode, DiffStats], level: int, index: int, file=sys.stdout): - file.write(f'{INDENT * level}level {level}, index {index}:\n') - file.write(f'{INDENT * (level + 1)}left : {node.left}\n') - file.write(f'{INDENT * (level + 1)}right: {node.right}\n') - for i, child in enumerate(node.children): - print_node(child, level + 1, i, file=file) - - -def print_ops(op: Operators, prefix: str = INDENT, file=sys.stdout): - if isinstance(op.op_nodes, list): - for n in op.op_nodes: - file.write(f'{prefix}{n.name}\n') - else: - file.write(f'{prefix}{op.op_nodes.name}\n') diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py deleted file mode 100644 index 9b364e0dbba55e07b939690d45123bbf6dc6fe23..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/event_parser.py +++ /dev/null @@ -1,467 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import sys -from collections import defaultdict -from dataclasses import dataclass -from enum import IntEnum -from typing import Dict, Iterable, List, Optional, Tuple - -from .. import utils -from .communication import generate_communication_nodes -from .node import (CommunicationNode, DeviceNode, ModuleNode, OperatorNode, PLModuleNode, PLProfileNode, - ProfilerStepNode, RuntimeNode, create_operator_node) -from .op_tree import OpTreeBuilder -from .range_utils import merge_ranges -from .trace import BaseEvent, DurationEvent, EventTypes, KernelEvent, NcclOpNameSet, GlooOpNameSet - -logger = utils.get_logger() - -CommLibTypes = IntEnum('CommLibTypes', ['Nccl', 'Gloo'], start=0) - - -class ProfileRole(IntEnum): - Kernel = 0 - Memcpy = 1 - Memset = 2 - Communication = 3 - Runtime = 4 - DataLoader = 5 - CpuOp = 6 - Other = 7 - Total = 8 - - -@dataclass -class NodeInfoParams: - event: DurationEvent - corrid_to_device: Dict[int, List[DeviceNode]] - corrid_to_runtime: Dict[int, RuntimeNode] - externalid_to_runtime: Dict[int, List[RuntimeNode]] - tid2list: Dict[int, List[OperatorNode]] - pl_tid2list: Dict[int, List[PLProfileNode]] - tid2zero_rt_list: Dict[int, List[RuntimeNode]] - - -class NodeParserMixin: - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.communication_data: Dict[int, CommunicationNode] = {} - self.device_node_list: List[DeviceNode] = [] - self.runtime_node_list: List[RuntimeNode] = [] - self.used_devices = set() - self.use_dp = False - self.use_ddp = False - self.comm_lib = set() - - def parse_nodes(self, events: Iterable[BaseEvent]): - # For OperatorNode and ProfilerStepNode: - # Use time interval containing relationship to build father-child correlation, - # which is consistent with autograd profiler. - # For RuntimeNode: - # Use external_id to build correlation with its father OperatorNode or ProfilerStepNode. - # Because in the case when RuntimeNode has duration 0 and starts at same time as a OperatorNode, - # just use interval containing relationship can't tell it is child or brother of the OperatorNode. - # value is a list of OperatorNode and ProfilerStepNode. Do not include RuntimeNode - tid2list: Dict[int, List[OperatorNode]] = defaultdict(list) - # value is a list of PLProfileNode. Do not include RuntimeNode - pl_tid2list: Dict[int, List[PLProfileNode]] = defaultdict(list) - # value is a list of RuntimeNode with external_id=0. They will be attached to root nodes. - tid2zero_rt_list: Dict[int, List[RuntimeNode]] = defaultdict(list) - corrid_to_device: Dict[int, List[DeviceNode]] = defaultdict(list) # value is a list of DeviceNode - - corrid_to_runtime: Dict[int, RuntimeNode] = {} # value is a RuntimeNode - externalid_to_runtime: Dict[int, List[RuntimeNode]] = defaultdict(list) # value is a list of RuntimeNode - - for event in events: - if event.type == EventTypes.MEMORY: - continue - params = NodeInfoParams(event, corrid_to_device, corrid_to_runtime, externalid_to_runtime, tid2list, - pl_tid2list, tid2zero_rt_list) - self._parse_node(params) - - if CommLibTypes.Nccl in self.comm_lib: - for event in events: - if event.type == EventTypes.KERNEL: - self._update_communication_node(event) - - # associate CUDA Runtimes with CPU events - for op_list in tid2list.values(): - for op in op_list: - runtime_nodes = externalid_to_runtime.pop(op.external_id, []) - if runtime_nodes: - op.runtimes.extend(runtime_nodes) - - if len(corrid_to_device) > 0: - node_count_dict = defaultdict(int) - for nodes in corrid_to_device.values(): - for n in nodes: - node_count_dict[n.type] += 1 - - logger.debug(("Some events doesn't belongs to any operators: " - f"{', '.join([':'.join((k, str(v))) for k, v in node_count_dict.items()])}")) - - staled_device_nodes = [] - for device_nodes in corrid_to_device.values(): - staled_device_nodes.extend([n for n in device_nodes if n.type == EventTypes.KERNEL]) - - return tid2list, tid2zero_rt_list, staled_device_nodes, pl_tid2list - - def _update_communication_node(self, event: KernelEvent): - """Update the communication node by using the TraceEvent instance""" - external_id = event.external_id - comm_node = self.communication_data.get(external_id) - if comm_node: - ts = event.ts - dur = event.duration - comm_node.kernel_ranges.append((ts, ts + dur)) - comm_node.total_time += dur - - return comm_node is not None - - def _parse_node(self, params: NodeInfoParams): - event = params.event - corrid_to_device = params.corrid_to_device - corrid_to_runtime = params.corrid_to_runtime - externalid_to_runtime = params.externalid_to_runtime - tid2list = params.tid2list - pl_tid2list = params.pl_tid2list - tid2zero_rt_list = params.tid2zero_rt_list - corrid = event.correlation_id - tid = event.tid - if event.type in [EventTypes.KERNEL, EventTypes.MEMCPY, EventTypes.MEMSET]: - self.used_devices.add(event.pid) - device_node = DeviceNode.create(event) - if corrid in corrid_to_runtime: - rt_node = corrid_to_runtime[corrid] # Don't pop it because it may be used by next kernel. - if rt_node.device_nodes is None: - rt_node.device_nodes = [] - rt_node.device_nodes.append(device_node) - else: - corrid_to_device[corrid].append(device_node) - self.device_node_list.append(device_node) - elif event.type == EventTypes.RUNTIME: - device_nodes = corrid_to_device.pop(corrid, None) - rt_node = RuntimeNode.create(event, device_nodes) - corrid_to_runtime[corrid] = rt_node - externalid_to_runtime[rt_node.external_id].append(rt_node) - # Some runtimes has external_id 0, which will not be correlated to any operator. - # So get them and attach them to root node. - if rt_node.external_id == 0: - tid2zero_rt_list[tid].append(rt_node) - self.runtime_node_list.append(rt_node) - - # check the external_id - if device_nodes: - for device_node in device_nodes: - if rt_node.external_id != device_node.external_id: - logger.warning( - 'Runtime and Device-op have same correlation id %s but with different external id!' - ' (rt external_id, device external_id): (%s, %s)' % - (corrid, rt_node.external_id, device_node.external_id)) - elif event.type in [EventTypes.PYTHON, - EventTypes.OPERATOR, - EventTypes.PL_MODULE, - EventTypes.PROFILER_STEP, - EventTypes.MODULE, - EventTypes.USER_ANNOTATION]: - if event.type == EventTypes.PROFILER_STEP: - op_node = ProfilerStepNode.create(event) - elif event.type == EventTypes.MODULE: - op_node = ModuleNode.create(event) - elif event.type == EventTypes.PL_MODULE: - op_node = PLModuleNode.create(event) - else: - op_node = create_operator_node(event) - if event.name in NcclOpNameSet or event.name in GlooOpNameSet: - comm_node = CommunicationNode.create(event) - if event.name in NcclOpNameSet: - self.comm_lib.add(CommLibTypes.Nccl) - if event.name in GlooOpNameSet: - self.comm_lib.add(CommLibTypes.Gloo) - ts = event.ts - dur = event.duration - comm_node.kernel_ranges.append((ts, ts + dur)) - comm_node.total_time = dur - self.communication_data[op_node.external_id] = comm_node - if event.name == 'DataParallel.forward': - self.use_dp = True - if event.name == 'DistributedDataParallel.forward': - self.use_ddp = True - if op_node: - tid2list[int(tid)].append(op_node) - elif event.type == EventTypes.PL_PROFILE: - op_node = PLProfileNode.create(event) - pl_tid2list[int(tid)].append(op_node) - - -class StepParser: - def __init__(self): - # we could not use [[]] * len here since they all point to same memory - self.role_ranges: List[List[Tuple[int, int]]] = [[] for _ in range(ProfileRole.Total - 1)] - self.steps: List[Tuple[int, int]] = [] - self.steps_names: List[str] = [] - self.cpu_min_ts = sys.maxsize # Min time of CPU side events. - self.cpu_max_ts = -sys.maxsize - 1 # Max time of CPU side events. - self.global_min_ts = sys.maxsize # Min time of all events. - self.global_max_ts = -sys.maxsize - 1 # Max time of all events. - # The below two form time range for adding gpu utilization to trace view. - # Use 'PyTorch Profiler (0)' as them. - # If not exists, assign global_min_ts and global_max_ts to them. - self.global_start_ts = sys.maxsize - self.global_end_ts = -sys.maxsize - 1 - - def parse_steps(self, events: Iterable[DurationEvent], comm_nodes: Dict[int, CommunicationNode]): - for event in events: - if event.type == EventTypes.MEMORY: - continue - - self._parse_step(event, comm_nodes) - if event.type == EventTypes.TRACE and event.name == 'PyTorch Profiler (0)': - self.global_start_ts = event.ts - self.global_end_ts = event.ts + event.duration - if self.global_start_ts == sys.maxsize: - self.global_start_ts = self.global_min_ts - if self.global_end_ts == -sys.maxsize - 1: - self.global_end_ts = self.global_max_ts - - if len(self.steps) == 0: - self.steps.append((self.cpu_min_ts, self.cpu_max_ts)) - self.steps_names.append('0') - - for i, role_range in enumerate(self.role_ranges): - self.role_ranges[i] = merge_ranges(role_range) - - def update_device_steps(self, runtime_node_list: List[RuntimeNode]): - self._update_steps_duration(*self._find_device_steps(runtime_node_list)) - - @property - def has_runtime(self): - return bool(self.role_ranges[ProfileRole.Runtime]) - - @property - def has_kernel(self): - return bool(self.role_ranges[ProfileRole.Kernel]) - - @property - def has_communication(self): - return bool(self.role_ranges[ProfileRole.Communication]) - - @property - def has_memcpy_or_memset(self): - return bool(self.role_ranges[ProfileRole.Memcpy] or self.role_ranges[ProfileRole.Memset]) - - def _parse_step(self, event: DurationEvent, comm_nodes: Dict[int, CommunicationNode]): - def check_name(name: str): - return (name.startswith('enumerate(DataLoader)#') and name.endswith('.__next__')) or name.startswith( - 'enumerate(DataPipe)#') - - ts = event.ts - dur = event.duration - evt_type = event.type - if evt_type == EventTypes.KERNEL: - if event.external_id in comm_nodes: - self.role_ranges[ProfileRole.Communication].append((ts, ts + dur)) - else: - self.role_ranges[ProfileRole.Kernel].append((ts, ts + dur)) - elif evt_type == EventTypes.MEMCPY: - self.role_ranges[ProfileRole.Memcpy].append((ts, ts + dur)) - elif evt_type == EventTypes.MEMSET: - self.role_ranges[ProfileRole.Memset].append((ts, ts + dur)) - elif evt_type == EventTypes.RUNTIME: - self.role_ranges[ProfileRole.Runtime].append((ts, ts + dur)) - elif evt_type in [EventTypes.OPERATOR, EventTypes.USER_ANNOTATION] and check_name(event.name): - self.role_ranges[ProfileRole.DataLoader].append((ts, ts + dur)) - elif event.type == EventTypes.PROFILER_STEP: - self.steps.append((ts, ts + dur)) - self.steps_names.append(str(event.step)) - elif evt_type in [EventTypes.PYTHON, EventTypes.OPERATOR, EventTypes.USER_ANNOTATION]: - if event.name in GlooOpNameSet or event.name in NcclOpNameSet: - self.role_ranges[ProfileRole.Communication].append((ts, ts + dur)) - else: - self.role_ranges[ProfileRole.CpuOp].append((ts, ts + dur)) - - # Record host side min and max time. - if evt_type in [EventTypes.PYTHON, EventTypes.OPERATOR, EventTypes.PROFILER_STEP]: - self.cpu_min_ts = min(self.cpu_min_ts, ts) - self.cpu_max_ts = max(self.cpu_max_ts, ts + dur) - # Record global wise min and max time. - self.global_min_ts = min(self.global_min_ts, ts) - self.global_max_ts = max(self.global_max_ts, ts + dur) - - def _find_device_steps(self, runtime_node_list: List[RuntimeNode]): - """return steps associated with device nodes. - """ - runtime_node_list = sorted(runtime_node_list, key=lambda x: x.start_time) - - # Use similar code with two-way merge to get all runtimes inside each host-side step span, - # then record each step's min kernel start time and max kernel end time: - steps_device: List[Tuple[int, int]] = [(sys.maxsize, -sys.maxsize - 1)] * len(self.steps) - # where the steps associated with devcie node, if yes, the related array item is larger than 0. - steps_matched_device_nodes: List[int] = [0] * len(self.steps) - - i_step = 0 - i_runtime = 0 - step_device_min_ts = sys.maxsize - step_device_max_ts = -sys.maxsize - 1 - matched_device_nodes = set() - - while i_step < len(self.steps) and i_runtime < len(runtime_node_list): - step_host_start_time = self.steps[i_step][0] - step_host_end_time = self.steps[i_step][1] - if runtime_node_list[i_runtime].start_time < step_host_start_time: - # This runtime is ahead of or intersects with this step span. Skip this runtime. - i_runtime += 1 - elif runtime_node_list[i_runtime].end_time <= step_host_end_time: - # and runtime_node_list[i_runtime].start_time >= step_host_start_time - # This runtime is inside this step span. Scan its device_nodes. - rt = runtime_node_list[i_runtime] - if rt.device_nodes is not None: - for device_node in rt.device_nodes: - step_device_min_ts = min(device_node.start_time, step_device_min_ts) - step_device_max_ts = max(device_node.end_time, step_device_max_ts) - matched_device_nodes.add(device_node) - steps_matched_device_nodes[i_step] += 1 - i_runtime += 1 - elif runtime_node_list[i_runtime].start_time < step_host_end_time: - # and runtime_node_list[i_runtime].end_time > step_host_end_time - # This runtime intersects with this step span. Skip this runtime. - i_runtime += 1 - else: - # runtime_node_list[i_runtime].start_time >= step_host_end_time - # This runtime starts after this step's end. Record and move forward this step. - steps_device[i_step] = (step_device_min_ts, step_device_max_ts) - i_step += 1 - step_device_min_ts = sys.maxsize - step_device_max_ts = -sys.maxsize - 1 - - while i_step < len(self.steps): - # This step doesn't launch any device side event, just assign it as empty. - steps_device[i_step] = (step_device_min_ts, step_device_max_ts) - step_device_min_ts = sys.maxsize - step_device_max_ts = -sys.maxsize - 1 - i_step += 1 - - # If there are matched device, find the first step end time before steps_device[0][0] - prev_step_end_time: Optional[int] = None - if len(matched_device_nodes) > 0: - prev_step_end_time = self.steps[0][0] - if steps_device[0][0] != sys.maxsize: # When step 0 has device event. - for device_node in self.device_node_list: - if device_node not in matched_device_nodes: - # Now this device_node is not launched inside any step span. - if device_node.end_time < steps_device[0][0]: - prev_step_end_time = max(prev_step_end_time, device_node.end_time) - - return prev_step_end_time, steps_device, steps_matched_device_nodes - - def _update_steps_duration(self, - prev_step_end_time: Optional[int], - steps_device: List[Tuple[int, int]], - steps_matched_device_nodes: List[int]): - """Update self.steps considering device side events launched by each host side step. - Update self.steps_names if some tail steps are removed.""" - - # Change step time to device side on the condition that any step have device time. - is_use_gpu = prev_step_end_time is not None - if is_use_gpu: - for i_step, step in enumerate(self.steps): - step_start_time = max(prev_step_end_time, step[0]) - step_end_time = step[1] - if steps_device[i_step][0] == sys.maxsize: # When step i_step has no device event. - # Assign to step_start_time when kernel is behind host step end. - step_end_time = max(step_end_time, step_start_time) - else: - step_end_time = max(step_end_time, steps_device[i_step][1]) - if step_end_time < step_start_time: - logger.warning( - 'Abnormal step_end_time of step {}: [{}, {}]'.format( - i_step, step_start_time, step_end_time)) - step_end_time = step_start_time - self.steps[i_step] = (step_start_time, step_end_time) # Update step time considering device side. - prev_step_end_time = step_end_time - - is_remove_tail_steps = True - if is_use_gpu and len(self.steps) > 1 and is_remove_tail_steps: - i_step = len(self.steps) - 1 - while i_step >= 0: - if steps_matched_device_nodes[i_step] > 0: - break - i_step -= 1 - if i_step >= 0: - keep_steps = i_step + 1 - if i_step > 0 and steps_matched_device_nodes[i_step - 1] * 0.8 > steps_matched_device_nodes[i_step]: - keep_steps = i_step - if keep_steps < len(self.steps): - logger.warning( - 'Remove the last {} steps from overview. ' - 'Because the profiler may fail to capture all the kernels launched by these steps.'.format( - len(self.steps) - keep_steps - )) - self.steps = self.steps[:keep_steps] - self.steps_names = self.steps_names[:keep_steps] - - -class EventParser(NodeParserMixin, StepParser): - def __init__(self): - super().__init__() - self.comm_node_list: List[CommunicationNode] = None - - def parse(self, events: Iterable[BaseEvent], fwd_bwd_map: Dict[int, int]) -> Dict[int, List[OperatorNode]]: - with utils.timing('EventParser: parse nodes'): - tid2list, tid2zero_rt_list, staled_device_nodes, pl_tid2list = self.parse_nodes(events) - - with utils.timing('EventParser: build operator tree'): - builder = OpTreeBuilder() - tid2tree = builder.build_tree(tid2list, tid2zero_rt_list, staled_device_nodes, fwd_bwd_map=fwd_bwd_map) - pl_tid2tree = builder.build_tree(pl_tid2list, {}, [], {}) - - with utils.timing('EventParser: parse steps times'): - # Process steps - self.parse_steps(events, self.communication_data) - if len(self.comm_lib) > 1: - logger.warning( - 'Multiple communication libs are found. To avoid confusing, we disable the distributed view.') - self.communication_data.clear() - - # Move the interleaved logic out of each NodeParser and StepParser - self.update_device_steps(self.runtime_node_list) - - self.comm_node_list = generate_communication_nodes(self.communication_data, self.steps, self.steps_names) - return tid2tree, pl_tid2tree - - @staticmethod - def print_tree(root): - class Ctx: - tid: int = -1 - name_stack: list = [] - - ctx = Ctx() - - def print_node_set_prefix(node: OperatorNode): - header = f'[{ctx.tid}]' + '.'.join(ctx.name_stack[1:]) # omit the CallTreeRoot - prefix_len = len(ctx.name_stack) * 4 - 4 - 1 - if len(ctx.name_stack) > 1: - logger.info(header) - prefix = ' ' * prefix_len - logger.info(prefix, node.name) - logger.info(prefix, 'time:', node.start_time, '-->', node.end_time) - - def push(node: OperatorNode): - ctx.name_stack.append(node.name) - - def pop(): - ctx.name_stack.pop() - - def traverse_opeartor_node(node: OperatorNode): - print_node_set_prefix(node) - - push(node) - for n in node.children: - traverse_opeartor_node(n) - pop() - - ctx.tid = root.tid - traverse_opeartor_node(root) - ctx.tid = -1 diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py deleted file mode 100644 index 1321d5f6df8d0a6a06abc9342d50ab843a5d41ea..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/gpu_metrics_parser.py +++ /dev/null @@ -1,314 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -from typing import Iterable, List - -from .. import consts, utils -from .range_utils import (get_ranges_sum, intersection_ranges_lists, - intersection_ranges_lists_with_value, merge_ranges, - merge_ranges_with_value) -from .trace import BaseEvent, EventTypes, KernelEvent - -logger = utils.get_logger() - - -# For calculating GPU utilization, and approximated SM efficiency. -class GPUMetricsParser(object): - def __init__(self): - # All gpu ids that used by any kernel. - self.gpu_ids = set() - # For calculating GPU utilization. - self.kernel_ranges_per_device = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - self.gpu_utilization = [None] * consts.MAX_GPU_PER_NODE - self.gpu_util_timeline_unit_size = 0 - self.gpu_util_timeline_unit_name = '' - self.gpu_util_buckets = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - # For calculating approximated SM efficiency. - self.blocks_per_sm_per_device = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - self.avg_approximated_sm_efficiency_per_device = [None] * consts.MAX_GPU_PER_NODE - self.approximated_sm_efficiency_ranges = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - self.gpu_sm_efficiency_json = None - self.blocks_per_sm_count = [0] * consts.MAX_GPU_PER_NODE - # For calculating averaged occupancy. - self.occupancy_per_device = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - self.avg_occupancy_per_device = [None] * consts.MAX_GPU_PER_NODE - self.occupancy_count = [0] * consts.MAX_GPU_PER_NODE - - def calculate_gpu_utilization(self, global_start_time, global_end_time, steps_start_time, steps_end_time): - # Make bucket_size to 10-power's of us, and number of buckets to (10, 100]. - # 10-power's of us, in order to straight forward for user to understand. - # If number of buckets are too many, the value of gpu utilization will be either 0 or 1. - def get_bucket_info(range_micro_seconds): - max_buckets = 100 - bucket_size = 1 - while range_micro_seconds / bucket_size > max_buckets: - bucket_size *= 10 - buckets = int(range_micro_seconds / bucket_size) - unit = bucket_size - unit_str = 'us' - if unit >= 1000: - unit /= 1000 - unit_str = 'ms' - if unit >= 1000: - unit /= 1000 - unit_str = 's' - return int(bucket_size), int(buckets), int(unit), unit_str - - gpu_utilization_timeline = [[] for _ in range(consts.MAX_GPU_PER_NODE)] - for gpu_id in self.gpu_ids: - self.kernel_ranges_per_device[gpu_id] = merge_ranges(self.kernel_ranges_per_device[gpu_id]) - - # Top-level number still consider steps, to be consistent with overview's breakdown. - kernel_ranges_all_steps = intersection_ranges_lists( - self.kernel_ranges_per_device[gpu_id], [(steps_start_time, steps_end_time)]) - ranges_sum = get_ranges_sum(kernel_ranges_all_steps) - self.gpu_utilization[gpu_id] = ranges_sum / (steps_end_time - steps_start_time) - - # The timeline will use 'PyTorch Profiler (0)' as start, - # in order to draw previous step's kernels' gpu utilization. - bucket_size, buckets, self.gpu_util_timeline_unit_size, self.gpu_util_timeline_unit_name = \ - get_bucket_info(global_end_time - global_start_time) - buckets_ranges = [] - for i in range(buckets): - buckets_ranges.append((global_start_time + i * bucket_size, - global_start_time + (i + 1) * bucket_size if i < buckets - 1 - else global_end_time)) # The last bucket may be longer. - gpu_utilization_timeline[gpu_id] = [0] * buckets - if len(self.kernel_ranges_per_device[gpu_id]) > 0: - current_range_index = 0 - current_range = self.kernel_ranges_per_device[gpu_id][current_range_index] - current_bucket_index = 0 - current_bucket = buckets_ranges[0] - while (current_range_index < len(self.kernel_ranges_per_device[gpu_id]) - and current_bucket_index < buckets): - if current_bucket[1] <= current_range[0]: - current_bucket_index += 1 - current_bucket = buckets_ranges[current_bucket_index] if current_bucket_index < buckets \ - else None - elif current_bucket[0] >= current_range[1]: - current_range_index += 1 - if current_range_index < len(self.kernel_ranges_per_device[gpu_id]): - current_range = self.kernel_ranges_per_device[gpu_id][current_range_index] - else: - left_bound = max(current_range[0], current_bucket[0]) - right_bound = min(current_range[1], current_bucket[1]) - gpu_utilization_timeline[gpu_id][current_bucket_index] += (right_bound - left_bound) - if current_bucket[1] < current_range[1]: - current_bucket_index += 1 - current_bucket = buckets_ranges[current_bucket_index] if current_bucket_index < buckets \ - else None - else: - current_range_index += 1 - if current_range_index < len(self.kernel_ranges_per_device[gpu_id]): - current_range = self.kernel_ranges_per_device[gpu_id][current_range_index] - for i_bucket in range(buckets): - bucket_size = buckets_ranges[i_bucket][1] - buckets_ranges[i_bucket][0] - gpu_utilization_timeline[gpu_id][i_bucket] /= bucket_size - start_time = buckets_ranges[i_bucket][0] - self.gpu_util_buckets[gpu_id].append((start_time, gpu_utilization_timeline[gpu_id][i_bucket])) - start_time = buckets_ranges[-1][1] - self.gpu_util_buckets[gpu_id].append((start_time, 0)) - - self.kernel_ranges_per_device = None # Release memory. - - def calculate_approximated_sm_efficiency(self, steps_start_time, steps_end_time): - def calculate_avg(approximated_sm_efficiency_ranges, total_dur): - total_weighted_sm_efficiency = 0.0 - for r in approximated_sm_efficiency_ranges: - dur = r[1] - r[0] - total_weighted_sm_efficiency += r[2] * dur - avg_approximated_sm_efficiency = total_weighted_sm_efficiency / total_dur - return avg_approximated_sm_efficiency - - total_dur = steps_end_time - steps_start_time - for gpu_id in self.gpu_ids: - blocks_per_sm_ranges = self.blocks_per_sm_per_device[gpu_id] - approximated_sm_efficiency_ranges = merge_ranges_with_value(blocks_per_sm_ranges) - # To be consistent with GPU utilization, here it must also intersect with all steps, - # in order to remove the kernels out of steps range. - approximated_sm_efficiency_ranges_all_steps = intersection_ranges_lists_with_value( - approximated_sm_efficiency_ranges, [(steps_start_time, steps_end_time)]) - if len(approximated_sm_efficiency_ranges_all_steps) > 0: - avg_approximated_sm_efficiency = calculate_avg(approximated_sm_efficiency_ranges_all_steps, total_dur) - self.avg_approximated_sm_efficiency_per_device[gpu_id] = avg_approximated_sm_efficiency - - # The timeline still uses all kernels including out of steps scope's. - if len(approximated_sm_efficiency_ranges) > 0: - self.approximated_sm_efficiency_ranges[gpu_id] = approximated_sm_efficiency_ranges - - self.blocks_per_sm_per_device = None # Release memory. - - # Weighted average. Weighted by kernel's time duration. - def calculate_occupancy(self, steps_start_time, steps_end_time): - for gpu_id in self.gpu_ids: - occupancys_on_a_device = self.occupancy_per_device[gpu_id] - total_time = 0 - total_occupancy = 0.0 - for r in occupancys_on_a_device: - min_time = max(r[0], steps_start_time) - max_time = min(r[1], steps_end_time) - if min_time < max_time: - dur = max_time - min_time - total_occupancy += r[2] * dur - total_time += dur - if total_time > 0: - self.avg_occupancy_per_device[gpu_id] = total_occupancy / total_time - - @classmethod - def parse_events(cls, - events: Iterable[BaseEvent], - global_start_time: int, - global_end_time: int, - steps_start_time: int, - steps_end_time: int): - parser = GPUMetricsParser() - logger.debug('GPU Metrics, parse events') - for event in events: - if event.type == EventTypes.KERNEL: - parser.parse_event(event) - - parser.calculate_gpu_utilization(global_start_time, global_end_time, steps_start_time, steps_end_time) - parser.calculate_approximated_sm_efficiency(steps_start_time, steps_end_time) - parser.calculate_occupancy(steps_start_time, steps_end_time) - return parser - - def parse_event(self, event: KernelEvent): - ts = event.ts - dur = event.duration - gpu_id = event.device_id - if gpu_id != event.pid: - logger.warning("pid '{}' is not equal to args.device '{}' on event with ts '{}'".format( - event.pid, gpu_id, event.ts)) - if gpu_id is not None: - if gpu_id not in self.gpu_ids: - self.gpu_ids.add(gpu_id) - self.kernel_ranges_per_device[gpu_id].append((ts, ts + dur)) - if event.blocks_per_sm is not None: - if event.blocks_per_sm > 0.0: - self.blocks_per_sm_per_device[gpu_id].append((ts, ts + dur, event.blocks_per_sm)) - self.blocks_per_sm_count[gpu_id] += 1 - else: - # Workaround for negative value input. - logger.warning('blocks per SM {} with ts {} is not positive!'.format(event.blocks_per_sm, ts)) - if event.occupancy is not None: - if event.occupancy >= 0.0: - self.occupancy_per_device[gpu_id].append((ts, ts + dur, event.occupancy)) - self.occupancy_count[gpu_id] += 1 - else: - # Workaround for negative value input. - logger.warning('est. achieved occupancy % {} with ts {} is negative!'.format(event.occupancy, ts)) - - def get_gpu_metrics_columns(self): - columns = [] - if self.has_blocks_per_sm: - columns.append({'type': 'number', 'name': 'Mean Blocks Per SM', - 'tooltip': consts.TOOLTIP_BLOCKS_PER_SM}) - if self.has_occupancy: - columns.append({'type': 'number', 'name': 'Mean Est. Achieved Occupancy (%)', - 'tooltip': consts.TOOLTIP_OCCUPANCY_COMMON + consts.TOOLTIP_OCCUPANCY_TABLE}) - return columns - - @property - def has_blocks_per_sm(self): - return sum(self.blocks_per_sm_count) > 0 - - @property - def has_occupancy(self): - return sum(self.occupancy_count) > 0 - - def get_gpu_metrics(self): - def build_trace_counter_gpu_util(gpu_id, start_time, counter_value): - util_json = ("{{\"ph\":\"C\", \"name\":\"GPU {} Utilization\", \"pid\":{}, \"ts\":{}, " - "\"args\":{{\"GPU Utilization\":{}}}}}").format(gpu_id, gpu_id, start_time, counter_value) - return util_json - - def build_trace_counter_sm_efficiency(gpu_id, start_time, counter_value): - util_json = ("{{\"ph\":\"C\", \"name\":\"GPU {} Est. SM Efficiency\", \"pid\":{}, \"ts\":{}, " - "\"args\":{{\"Est. SM Efficiency\":{}}}}}").format(gpu_id, gpu_id, start_time, counter_value) - return util_json - - def add_trace_counter_gpu_util(gpu_id, start_time, counter_value, counter_json_list: List): - json_str = build_trace_counter_gpu_util(gpu_id, start_time, counter_value) - counter_json_list.append(json_str) - - def add_trace_counter_sm_efficiency(gpu_id, start_time, end_time, value, counter_json_list: List): - efficiency_json_start = build_trace_counter_sm_efficiency(gpu_id, start_time, value) - efficiency_json_finish = build_trace_counter_sm_efficiency(gpu_id, end_time, 0) - counter_json_list.append(efficiency_json_start) - counter_json_list.append(efficiency_json_finish) - - counter_json_list = [] - for gpu_id, buckets in enumerate(self.gpu_util_buckets): - if len(buckets) > 0: - # Adding 1 as baseline. To avoid misleading virtualization when the max value is less than 1. - add_trace_counter_gpu_util(gpu_id, buckets[0][0], 1, counter_json_list) - add_trace_counter_gpu_util(gpu_id, buckets[0][0], 0, counter_json_list) - for b in buckets: - add_trace_counter_gpu_util(gpu_id, b[0], b[1], counter_json_list) - for gpu_id, ranges in enumerate(self.approximated_sm_efficiency_ranges): - buckets = self.gpu_util_buckets[gpu_id] - if len(ranges) > 0 and len(buckets) > 0: - # Adding 1 as baseline. To avoid misleading virtualization when the max value is less than 1. - add_trace_counter_sm_efficiency(gpu_id, buckets[0][0], buckets[0][0], 1, counter_json_list) - for r in ranges: - add_trace_counter_sm_efficiency(gpu_id, r[0], r[1], r[2], counter_json_list) - - return counter_json_list - - def get_gpu_metrics_data_tooltip( - self, - gpu_infos, - tc_ratio): - if not self.gpu_ids: - return None, None - - has_sm_efficiency = False - has_occupancy = False - has_tc = False - - gpu_metrics_data = [] - gpu_info_columns = ['Name', 'Memory', 'Compute Capability'] - - def process_gpu(gpu_id: int): - nonlocal has_sm_efficiency, has_occupancy, has_tc - gpu_metrics_data.append({'title': 'GPU {}:'.format(gpu_id), 'value': ''}) - gpu_info = gpu_infos.get(gpu_id, None) - if gpu_info is not None: - for key in gpu_info_columns: - if key in gpu_info: - gpu_metrics_data.append({'title': key, 'value': gpu_info[key]}) - else: - # the legacy chrome tracing file would not have gpu info. - pass - gpu_metrics_data.append({'title': 'GPU Utilization', 'value': '{} %'.format( - round(self.gpu_utilization[gpu_id] * 100, 2))}) - if self.avg_approximated_sm_efficiency_per_device[gpu_id] is not None: - gpu_metrics_data.append({'title': 'Est. SM Efficiency', 'value': '{} %'.format( - round(self.avg_approximated_sm_efficiency_per_device[gpu_id] * 100, 2))}) - has_sm_efficiency = True - if self.avg_occupancy_per_device[gpu_id] is not None: - gpu_metrics_data.append({'title': 'Est. Achieved Occupancy', 'value': '{} %'.format( - round(self.avg_occupancy_per_device[gpu_id], 2))}) - has_occupancy = True - if tc_ratio[gpu_id] is not None: - gpu_metrics_data.append({'title': 'Kernel Time using Tensor Cores', 'value': '{} %'.format( - round(tc_ratio[gpu_id] * 100, 2))}) - has_tc = True - - gpu_ids = list(self.gpu_ids) - process_gpu(gpu_ids[0]) - for idx in range(1, len(gpu_ids)): - # Append separator line for beautiful to see. - gpu_metrics_data.append({'title': '
', 'value': ''}) - process_gpu(gpu_ids[idx]) - - tooltip_summary = 'The GPU usage metrics:\n' - tooltip = '{}\n{}'.format(tooltip_summary, consts.TOOLTIP_GPU_UTIL) - if has_sm_efficiency: - tooltip += '\n' + consts.TOOLTIP_SM_EFFICIENCY - if has_occupancy: - tooltip += '\n' + consts.TOOLTIP_OCCUPANCY_COMMON + consts.TOOLTIP_OCCUPANCY_OVERVIEW - if has_tc: - tooltip += '\n' + consts.TOOLTIP_TENSOR_CORES - - return gpu_metrics_data, tooltip diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/kernel_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/kernel_parser.py deleted file mode 100644 index 229251e60a90d5bf4fed514d5f175199b92d3870..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/kernel_parser.py +++ /dev/null @@ -1,45 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -from typing import Optional - -import numpy as np -import pandas as pd - -from .tensor_core import TcAllowlist -from .trace import EventTypes - - -class KernelParser: - def __init__(self): - self.kernel_stat: Optional[pd.DataFrame] = None - self.tc_used_ratio = 0.0 - - def parse_events(self, events): - events = [vars(event) for event in events if event.type == EventTypes.KERNEL] - events = pd.DataFrame(events) - events = events.astype({'type': 'category', 'name': 'string'}, copy=False) - events['tc_used'] = events['name'].map(lambda name: name in TcAllowlist) - - def weighted_avg(x: pd.Series): - try: - # fill these None as zero - x = x.fillna(0) - return np.average(x, weights=events.loc[x.index, 'duration']) - except ZeroDivisionError: - return 0 - - self.kernel_stat = events.groupby('name').agg( - tc_used=('tc_used', 'first'), - count=('duration', 'count'), - sum=('duration', 'sum'), - mean=('duration', 'mean'), - max=('duration', 'max'), - min=('duration', 'min'), - blocks_per_sm=('blocks_per_sm', weighted_avg), - occupancy=('occupancy', weighted_avg)).sort_values('sum', ascending=False) - - tc_total = self.kernel_stat['sum'].sum() - tc_self = self.kernel_stat[self.kernel_stat['tc_used']]['sum'].sum() - if tc_total > 0: - self.tc_used_ratio = tc_self / tc_total diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py deleted file mode 100644 index b31374ea493fa2fcffa93024b89694d00534dee9..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/loader.py +++ /dev/null @@ -1,230 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import bisect -import os -import sys -from collections import defaultdict -from typing import List, Tuple - -from .. import consts, io, utils -from ..multiprocessing import Process, Queue -from ..run import Run, RunProfile -from .data import DistributedRunProfileData, RunProfileData -from .node import CommunicationNode -from .run_generator import DistributedRunGenerator, RunGenerator - -logger = utils.get_logger() - - -class RunLoader(object): - def __init__(self, name, run_dir, caches: io.Cache, device_target="GPU"): - self.run_name = name - self.run_dir = run_dir - self.caches = caches - self.queue = Queue() - self.device_target = device_target - - def load(self): - workers = [] - spans_by_workers = defaultdict(list) - if self.device_target == 'Ascend': - for path in io.listdir(self.run_dir): - if io.isdir(io.join(self.run_dir, path)) and utils.is_worker_span_dir(path) and io.isdir( - io.join(self.run_dir, path, 'ASCEND_PROFILER_OUTPUT')): - data_path = io.join(self.run_dir, path, 'ASCEND_PROFILER_OUTPUT') - for file in io.listdir(data_path): - if utils.is_npu_trace_path(file) or str(file) in consts.INPUT_FILE_LIST: - match = consts.WORKER_SPAN_PATTERN.match(path) - worker = match.group(1) - span = match.group(2) - if span is not None: - bisect.insort(spans_by_workers[worker], span) - workers.append((worker, span, io.join(path, 'ASCEND_PROFILER_OUTPUT'))) - break - else: - for path in io.listdir(self.run_dir): - if io.isdir(io.join(self.run_dir, path)): - continue - match = consts.WORKER_PATTERN.match(path) - if not match: - continue - absolute_path = io.join(self.run_dir, path) - if io.stat(absolute_path).length > consts.MAX_FILE_SIZE: - logger.warning( - f'File "{absolute_path}" exceeds the maximum limit size of 500MB and will be skipped.') - continue - - worker = match.group(1) - span = match.group(2) - if span is not None: - # remove the starting dot (.) - span = span[1:] - bisect.insort(spans_by_workers[worker], span) - - workers.append((worker, span, path)) - - span_index_map = {} - for worker, span_array in spans_by_workers.items(): - for i, span in enumerate(span_array, 1): - span_index_map[(worker, span)] = i - - for worker, span, path in workers: - # convert the span timestamp to the index. - span_index = None if span is None else span_index_map[(worker, span)] - p = Process(target=self._process_data, args=(worker, span, span_index, path)) - p.start() - logger.info('started all processing') - - distributed_run = Run(self.run_name, self.run_dir, self.device_target) - run = Run(self.run_name, self.run_dir, self.device_target) - num_items = len(workers) - while num_items > 0: - item: Tuple[RunProfile, DistributedRunProfileData] = self.queue.get() - num_items -= 1 - r, d = item - if r or d: - logger.debug('Loaded profile via mp.Queue') - if r is not None: - run.add_profile(r) - if d is not None: - distributed_run.add_profile(d) - - distributed_profiles = self._process_spans(distributed_run) - for d in distributed_profiles: - if d is not None: - run.add_profile(d) - - # for no daemon process, no need to join them since it will automatically join - return run - - def _process_data(self, worker, span_name, span, path): - import absl.logging - absl.logging.use_absl_handler() - - try: - logger.debug('Parse trace, run_dir=%s, data_dir=%s', self.run_dir, path) - local_file = self.caches.get_remote_cache(io.join(self.run_dir, path)) - if self.device_target == 'Ascend': - data = RunProfileData.parse_npu(worker, span, local_file, self.caches.cache_dir) - else: - data = RunProfileData.parse_gpu(worker, span, local_file, self.caches.cache_dir) - if not data: - self.queue.put((None, None)) - logger.debug('finishing process data') - return - if data.trace_file_path != local_file: - self.caches.add_file(local_file, data.trace_file_path) - - generator = RunGenerator(worker, span, data, self.device_target) - profile = generator.generate_run_profile() - if self.device_target == 'Ascend': - data.step_to_overlap = profile.step_to_overlap - data.step_to_wait = profile.step_to_wait - data.comm_op = profile.comm_op - dist_data = DistributedRunProfileData(data) - - logger.debug('Sending back profile via mp.Queue') - self.queue.put((profile, dist_data)) - except KeyboardInterrupt: - logger.warning('tb_plugin receive keyboard interrupt signal, process %d will exit' % (os.getpid())) - sys.exit(1) - except Exception as ex: - if self.device_target == 'Ascend': - worker_name = f'{worker}_{span_name}_ascend_pt' - else: - worker_name = worker - logger.warning('Failed to parse profile data for Run %s on %s. Exception=%s', - self.run_name, worker_name, ex, exc_info=True) - self.queue.put((None, None)) - logger.debug('finishing process data') - - def _process_spans(self, distributed_run: Run): - spans = distributed_run.get_spans() - if spans is None: - return [self._process_distributed_profiles(distributed_run.get_profiles(), None)] - else: - span_profiles = [] - for span in spans: - profiles = distributed_run.get_profiles(span=span) - p = self._process_distributed_profiles(profiles, span) - if p is not None: - span_profiles.append(p) - return span_profiles - - def _process_distributed_profiles(self, profiles: List[DistributedRunProfileData], span): - if self.device_target != 'Ascend': - return self._gpu_distributed(profiles, span) - else: - for data in profiles: - if not data.has_communication: - logger.debug('There is no communication profile in this NPU run.') - return None - generator = DistributedRunGenerator(profiles, span, self.device_target) - profile = generator.generate_run_profile() - return profile - - def _gpu_distributed(self, profiles, span): - has_communication = True - comm_node_lists: List[List[CommunicationNode]] = [] - for data in profiles: - logger.debug('Processing profile data') - # Set has_communication to False and disable distributed view if any one worker has no communication - if data.has_communication and data.comm_node_list: - comm_node_lists.append(data.comm_node_list) - if len(comm_node_lists[-1]) != len(comm_node_lists[0]): - logger.error("Number of communication operation nodes don't match between workers in run: %s" - % self.run_name) - has_communication = False - else: - has_communication = False - logger.debug('Processing profile data finish') - - if not has_communication: - logger.debug('There is no communication profile in this GPU run.') - return None - - worker_num = len(comm_node_lists) - for i, node in enumerate(comm_node_lists[0]): - kernel_range_size = len(node.kernel_ranges) - # loop for all communication kernel ranges in order - for j in range(kernel_range_size): - min_range = sys.maxsize - # For each kernel_range, find the minist between workers as the real communication time - for k in range(worker_num): - kernel_ranges = comm_node_lists[k][i].kernel_ranges - if len(kernel_ranges) != kernel_range_size: - logger.error("Number of communication kernels don't match between workers in run: %s" - % self.run_name) - has_communication = False - return None - if kernel_ranges: - if kernel_ranges[j][1] - kernel_ranges[j][0] < min_range: - min_range = kernel_ranges[j][1] - kernel_ranges[j][0] - for k in range(worker_num): - kernel_range = comm_node_lists[k][i].kernel_ranges[j] - comm_node_lists[k][i].real_time_ranges.append((kernel_range[1] - min_range, kernel_range[1])) - - for data in profiles: - data.communication_parse() - - generator = DistributedRunGenerator(profiles, span, self.device_target) - profile = generator.generate_run_profile() - return profile diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/memory_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/memory_parser.py deleted file mode 100644 index 64b78127a4c7a5675e5b2f71877754c541dde94f..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/memory_parser.py +++ /dev/null @@ -1,331 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -from collections import defaultdict -from enum import IntEnum -from typing import Dict, Iterable, List, Optional, Tuple - -from .. import utils -from .node import OperatorNode, is_operator_node -from .op_agg import aggregate_ops -from .trace import DeviceType, MemoryEvent - -logger = utils.get_logger() - - -class MemoryMetrics(IntEnum): - SelfIncreaseSize = 0 - SelfAllocationSize = 1 - SelfAllocationCount = 2 - IncreaseSize = 3 - AllocationSize = 4 - AllocationCount = 5 - - -class MemoryRecord: - def __init__(self, scope: str, pid: int, tid: int, ts: int, - device_type: DeviceType, device_id: int, - address: int, record_bytes: int, total_allocated: float, total_reserved: float): - self.scope = scope - self.tid = tid - self.pid = pid - self.ts = ts - self.device_type = device_type - self.device_id = device_id - self.addr = address - self.bytes = record_bytes - self.total_allocated = total_allocated - self.total_reserved = total_reserved - self.op_name: Optional[str] = None - self.parent_op_name: Optional[str] = None - - @property - def device_name(self): - if self.device_type == DeviceType.CPU: - return 'CPU' - elif self.device_type == DeviceType.CUDA: - return 'GPU{}'.format(self.device_id) - else: - return None - - @property - def is_allocation(self): - return self.bytes > 0 - - @property - def op_name_or_unknown(self): - return self.op_name if self.op_name else '' - - @classmethod - def from_event(cls, event: MemoryEvent): - return cls(event.scope, event.pid, event.tid, event.ts, event.device_type, event.device_id, - event.addr, event.bytes, event.total_allocated, event.total_reserved) - - def __repr__(self) -> str: - return f"<{'+' if self.bytes>0 else ''}{self.bytes}B, addr: {self.addr}, ts: {self.ts}>" - - -class MemorySnapshot: - def __init__(self, memory_records: Iterable[MemoryRecord], - op_memory_table: Dict[OperatorNode, List[MemoryRecord]], - processed_nodes: Dict[OperatorNode, int]) -> None: - self.memory_records = memory_records - self.op_memory_table = op_memory_table - # the visited node times from parent to child - # troubleshooting issue purpose. - self.processed_node = processed_nodes - self.unreached_node = defaultdict(list) - - def get_peak_memory(self) -> Dict[Tuple[DeviceType, int], int]: - peaks = defaultdict(int) - for r in self.memory_records: - if r.total_allocated == r.total_allocated: # !isnan - peaks[(r.device_type, r.device_id)] = max(peaks[(r.device_type, r.device_id)], r.total_allocated) - return peaks - - def get_memory_statistics(self, - tid2tree: Dict[int, OperatorNode], - start_ts=None, end_ts=None) -> Dict[str, Dict[str, List[int]]]: - metric_length = len(MemoryMetrics) - self_metric_length = metric_length // 2 - - def dict_factory(): - return defaultdict(lambda: [0] * metric_length) - - # traverse outputs - op_list: List[OperatorNode] = [] - # two level keys dictionary - # first keyed by node, then keyed by device (CPU/GPU0/GPU1/etc.) - memory_metrics_keyed_by_node: Dict[OperatorNode, Dict[str, List[int]]] = defaultdict(dict_factory) - - def traverse_node_memory(node: OperatorNode): - if start_ts is not None and node.end_time < start_ts: - return - if end_ts is not None and node.start_time > end_ts: - return - - is_op = is_operator_node(node) - if is_op: - op_list.append(node) - - if node not in self.processed_node: - self.unreached_node[tid].append(node) - # since the node has not been visited for insert memory records, just ignore all childrens - return - elif is_op: - node_memory_metrics = self.get_memory_metrics(node, start_ts, end_ts) - for device, metrics in node_memory_metrics.items(): - # device is name of device like: CPU/GPU0 - # metrics is an arrary [SelfIncreaseSize, SelfAllocationSize, SelfAllocationCount] - for i, value in enumerate(metrics): - memory_metrics_keyed_by_node[node][device][i] = value - memory_metrics_keyed_by_node[node][device][i + self_metric_length] += value - else: - logger.debug('node {}:{} is not operator node, will skip its self metrics processing'.format( - node.name, node.start_time)) - - # recursive the children nodes - for child in node.children: - traverse_node_memory(child) - # sum up the child metrics - for device, metrics in memory_metrics_keyed_by_node[child].items(): - for i in range(self_metric_length, metric_length): - memory_metrics_keyed_by_node[node][device][i] += metrics[i] - - for _, root in tid2tree.items(): - for child in root.children: - traverse_node_memory(child) - - # keyed first by device name like CPU/GPU0 etc, then keyed by operator name. - # the value is array [items indexed by MemoryMetrics] - memory_metrics_keyed_by_nodename: Dict[str, Dict[str, List[int]]] = defaultdict(dict_factory) - # node: the instance, device_keyed_metrics: dictionary keyed by device name like CPU/GPU0 - for node, device_keyed_metrics in memory_metrics_keyed_by_node.items(): - if not is_operator_node(node): - # skip the node like Optimizer.step, DataLoader, ProfilerStep#1 etc. - continue - - for device, metrics in device_keyed_metrics.items(): - for i, metric in enumerate(metrics): - memory_metrics_keyed_by_nodename[device][node.name][i] += metric - - # get the op_calls dictionary from module parser result. - op_calls: Dict[str, int] = defaultdict(int) - agg_result = aggregate_ops(op_list, [lambda op: op.name]) - for op_name, op_agg in agg_result[0].items(): - op_calls[op_name] += op_agg.calls - - result: Dict[str, Dict[str, List[int]]] = defaultdict(defaultdict) - for device, node_metrics in memory_metrics_keyed_by_nodename.items(): - for node, values in node_metrics.items(): - if any(values): - result[device][node] = values + [op_calls[node]] - - return result - - def get_memory_metrics(self, op: OperatorNode, start_ts, end_ts): - metrics_count = len([e.name for e in MemoryMetrics if e.name.startswith('Self')]) - memory_metrics: Dict[str, List[int]] = defaultdict(lambda: [0] * metrics_count) - for record in self.op_memory_table[op]: - if start_ts is not None and record.ts < start_ts: - continue - if end_ts is not None and record.ts > end_ts: - continue - name = record.device_name - if name is None: - continue - - memory_metrics[name][MemoryMetrics.SelfIncreaseSize] += record.bytes - if record.bytes > 0: - memory_metrics[name][MemoryMetrics.SelfAllocationSize] += record.bytes - memory_metrics[name][MemoryMetrics.SelfAllocationCount] += 1 - - return memory_metrics - - -class MemoryParser: - def __init__(self, memory_events: Iterable[MemoryEvent]): - # statistics purpose - self.staled_records: List[MemoryRecord] = [] - self.processed_records: List[MemoryRecord] = [] - self.memory_records: List[MemoryRecord] = [MemoryRecord.from_event(e) for e in memory_events] - - def find_memory_nodes(self, tid2tree: Dict[int, OperatorNode]) -> MemorySnapshot: - records_by_tid: Dict[int, List[MemoryRecord]] = defaultdict(list) - for r in self.memory_records: - records_by_tid[r.tid].append(r) - - op_memory_table: Dict[OperatorNode, List[MemoryRecord]] = defaultdict(list) - processed_node = defaultdict(int) - - tree_height = 0 - for tid, records in records_by_tid.items(): - if not records: - continue - - # each item is (parent_node, child_index) that it is visiting. - node_stack: List[Tuple[OperatorNode, int]] = [] - - record_index = 0 - current_node: OperatorNode = tid2tree.get(tid) - child_index = 0 - - if current_node: - processed_node[current_node] += 1 - - while record_index < len(records): - """In the loop, one pass will process one record. The basic logic is: - It will search from the node that last visited since both the records and tree is ordered already - 1. it current node contains the records, then find the exactly child which just embrace it. - 2. otherwise, find the parent node and set the child_index, so that the parent node could continue from - previous visited node. # noqa: E501 - 3. if there is not any node contains the records, then all remaining records will be ignored. - """ - record = records[record_index] - - if len(node_stack) > tree_height: - tree_height = len(node_stack) - - if current_node is None or current_node.start_time is None or current_node.end_time is None: - # 3. Ignore all remaining records. - logger.debug( - 'could not find the node for tid %d, timestamp: %d, record index: %d, total records: %d' % ( - record.tid, record.ts, record_index, len(records))) - self.staled_records.append(records[record_index]) - record_index += 1 - continue - - if record.ts < current_node.start_time: - # this should only happens for root node. - logger.debug('record timestamp %d is less that the start time of %s' % - (record.ts, current_node.name)) - # This record has no chance to be appended to following tree node. - self.staled_records.append(record) - record_index += 1 - continue - elif record.ts >= current_node.end_time: - # 2. pop parent node and update the child_index accordingly. - if len(node_stack) > 0: - current_node, child_index = node_stack.pop() - child_index += 1 - else: - # if there is not item in stack, set it to None - current_node = None - continue - - # 1. find the real node embrace the record. - # Find the node which contains the records from top to downmost. - while child_index < len(current_node.children): - if record.ts < current_node.children[child_index].start_time: - # if current record timestamp is less than the current child's startime, - # we will break the search and keep the child_index not change. So that next time - # we can continue from here. - # there is no any child contains the record.timestamp - # child_find is False at this case. - break - elif record.ts >= current_node.children[child_index].end_time: - # if the record timestamp is greater than the children end time, increment to next child - # until find one contains the record - child_index += 1 - else: - # current children contains the record - processed_node[current_node.children[child_index]] += 1 - - # push child index which will be visited, then continue the loop - node_stack.append((current_node, child_index)) - current_node = current_node.children[child_index] - child_index = 0 - - # the current_node is the one contains the record at this moment. - if is_operator_node(current_node): - op_memory_table[current_node].append(record) - # NOTE: only allocation record can be associated with op. Because deallocation happens at the end - # of a tensor's lifetime which is not deterministic. - if record.is_allocation: - record.op_name = current_node.name - if len(node_stack) > 0: - record.parent_op_name = node_stack[-1][0].name - self.processed_records.append(record) - else: - self.staled_records.append(record) - - # the record is processed - record_index += 1 - - # show summary information - if len(self.staled_records) > 0 and len(self.memory_records) > 0: - logger.debug('{} memory records are skipped in total {} memory records and only {} get processed'.format( - len(self.staled_records), len(self.memory_records), len(self.processed_records))) - if tree_height > 0: - logger.debug('max tree height is {}'.format(tree_height)) - - all_records = self.get_preprocessed_records() - return MemorySnapshot(all_records, op_memory_table, processed_node) - - def get_preprocessed_records(self): - memory_records = sorted(self.memory_records, key=lambda r: r.ts) - - alloc = {} # allocation events may or may not have paired free event - prev_ts = float('-inf') # ensure ordered memory records is ordered - for i, r in enumerate(memory_records): - if r.addr is None: - # profile json data prior to pytorch 1.10 do not have addr - # we should ignore them - continue - if prev_ts > r.ts: - logger.error(f'Invalid value, prev_ts {prev_ts} is greater than end_ts {r.ts}') - return [] - prev_ts = r.ts - addr = r.addr - size = r.bytes - if size > 0: - # Allocation event, to be matched with a Release event - alloc[addr] = i - else: - # Processing a Release event - if addr in alloc: - alloc_r = memory_records[alloc[addr]] - r.op_name = alloc_r.op_name - r.parent_op_name = alloc_r.parent_op_name - del alloc[addr] - return memory_records diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py deleted file mode 100644 index 15f1e4ef93a5234cdf6273f9830ac1a6f3aeaa41..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/module_op.py +++ /dev/null @@ -1,262 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from collections import namedtuple -from typing import Dict, Generator, Iterable, List, Optional, Set, Tuple, Union - -from .node import (DataLoaderNode, ModuleNode, OperatorNode, OptimizerNode, - PLModuleNode, ProfilerStepNode, is_operator_node) -from .trace import BaseEvent, EventTypes, PLModuleEvent, PythonFunctionEvent - - -class Module: - def __init__(self, name: str, module_id: int, shape: str = ''): - self.name = name - self.module_id = module_id - self.children: List[Module] = [] - - def __hash__(self): - return hash((self.name, self.module_id, tuple(self.children))) - - def __eq__(self, obj) -> bool: - if not isinstance(obj, Module): - return False - - return (self.name == obj.name and - self.module_id == obj.module_id and - self.children == obj.children) - - -class ModuleStats: - def __init__(self, name: str, module_id: int): - self.name = name - self.module_id = module_id - self.occurences: int = 0 - self.operators: int = 0 - self.host_duration: int = 0 - self.device_duration: int = 0 - self.self_host_duration: int = 0 - self.self_device_duration: int = 0 - - @property - def avg_host_duration(self): - return self.host_duration / self.occurences if self.occurences != 0 else 0 - - @property - def avg_device_duration(self): - return self.device_duration / self.occurences if self.occurences != 0 else 0 - - -Stats = namedtuple('Stats', [ - 'name', - 'id', - 'occurences', - 'operators', - 'host_duration', - 'self_host_duration', - 'device_duration', - 'self_device_duration', - 'avg_duration', - 'children']) - - -def aggegate_module_view(tid2tree: Dict[int, OperatorNode], events: List[BaseEvent]) -> Optional[List[Stats]]: - roots = _build_module_hierarchy(events) - modules = _get_node_list(tid2tree, ModuleNode) - if modules and roots: - return _process_module_statistics(modules, roots) - else: - return None - - -def aggegate_pl_module_view(tid2tree: Dict[int, OperatorNode], events: List[BaseEvent]) -> Optional[List[Stats]]: - roots = _build_module_hierarchy_from_name(events) - modules = _get_node_list(tid2tree, PLModuleNode) - if modules and roots: - return _process_module_statistics(modules, roots) - else: - return None - - -def _build_module_hierarchy_from_name(events: List[PLModuleEvent]) -> List[Module]: - pl_module_events = [e for e in events if e.type == EventTypes.PL_MODULE] - name2module: Dict[str, Module] = {} - no_root: Set[str] = set() - - for event in pl_module_events: - if event.name not in name2module: - name2module[event.name] = Module(event.name, 0) - - for name, module in name2module.items(): - if name.find('.') == -1: - continue - parent_name = name[:name.rfind('.')] - if parent_name in name2module: - name2module[parent_name].children.append(module) - no_root.add(module.name) - - return [module for name, module in name2module.items() if name not in no_root] - - -def _build_module_hierarchy(events: List[PythonFunctionEvent]) -> List[Module]: - """Get the module hierarchy from the chome trace events - """ - python_events = [e for e in events if e.type in (EventTypes.PYTHON_FUNCTION, EventTypes.MODULE)] - id_to_event = {e.python_id: e for e in python_events} - - # Extract Python function topology. - children: Dict[int, List[int]] = {} - for e in python_events: - e_id = e.python_id - children.setdefault(e_id, []) - e_parent_id = e.python_parent_id - children.setdefault(e_parent_id, []) - children.get(e_parent_id).append(e_id) - function_leaves = [k for k, v in children.items() if not v] - - # Convert Python function topology to Module topology. - # This is a simple O(n) tree walking algorithm where we start from the leaves - # and walk up, discarding any nodes which are not Module nodes. - module_parent_map = {} - seen = set() - for i in function_leaves: - e = id_to_event[i] - current_module = None - while e is not None: - e_id = e.python_id - if e.type == EventTypes.MODULE: - if current_module is not None: - module_parent_map[current_module.python_id] = e_id - current_module = e - module_parent_map.setdefault(e_id, None) - - seen_key = (e_id, id(current_module)) - if seen_key in seen: - break - seen.add(seen_key) - - e = id_to_event.get(e.python_parent_id, None) - - module_roots = [k for k, v in module_parent_map.items() if v is None] - module_child_map: Dict[int, List[int]] = {} - for child_id, parent_id in module_parent_map.items(): - module_child_map.setdefault(child_id, []) - module_child_map.setdefault(parent_id, []) - module_child_map.get(parent_id).append(child_id) - - # The traverse order is well defined which guarantees that a given topology - # will produce a unique and unambiguous hierarchy. - def append_hierarchy(e_id) -> Module: - e = id_to_event[e_id] - module = Module(e.name, e.module_id) - for idx in module_child_map.get(e_id): - child = append_hierarchy(idx) - module.children.append(child) - return module - - unique_modules: Set[Module] = set() - for e_id in module_roots: - root = append_hierarchy(e_id) - unique_modules.add(root) - - return list(unique_modules) - - -def _aggregate_modules(modules: Iterable[Union[ModuleNode, PLModuleNode]]) -> Dict[Tuple[str, int], ModuleStats]: - """Aggregate the modules based on the name and module_id""" - module_aggs: Dict[Tuple(str, int), ModuleStats] = {} - for m in modules: - key = (m.name, m.module_id) - if key not in module_aggs: - module_aggs[key] = ModuleStats(m.name, m.module_id) - agg = module_aggs[key] - agg.occurences += 1 - - agg.operators += sum(is_operator_node(child) for child in m.children) - - agg.self_host_duration += m.self_host_duration - agg.host_duration += m.end_time - m.start_time - - agg.self_device_duration += m.self_device_duration - agg.device_duration += m.device_duration - - return module_aggs - - -def _get_node_list(tid2tree: Dict[int, OperatorNode], node_class) -> Generator[OperatorNode, None, None]: - """Get all node with node_class from the operator tree""" - def traverse_node(node): - # Check OptimizerNode here because in PytorchLightning PLModuleNode is under OptimizerNoder. - if type(node) not in (ProfilerStepNode, ModuleNode, OperatorNode, OptimizerNode, PLModuleNode, DataLoaderNode): - return - - if isinstance(node, node_class): - yield node - - for child in node.children: - yield from traverse_node(child) - - for _, root in tid2tree.items(): - for child in root.children: - yield from traverse_node(child) - - -def _process_module_statistics( - modules_nodes: Iterable[Union[ModuleNode, PLModuleNode]], - hierarchy: Iterable[Module]) -> List[Stats]: - """Get the module statistics from the ModuleNode(s) and the hierarchy - """ - module_aggs = _aggregate_modules(modules_nodes) - - def process_modules(h_modules: Iterable[Module]): - for m in h_modules: - name = m.name.replace('nn.Module: ', '') - stats = module_aggs.get((m.name, m.module_id)) - - child_stats = list(process_modules(m.children)) - yield Stats( - name, - m.module_id, - stats.occurences, - stats.operators, - stats.host_duration, - stats.self_host_duration, - stats.device_duration, - stats.self_device_duration, - stats.avg_device_duration if stats.avg_device_duration > 0 else stats.avg_host_duration, - child_stats) - - data = sorted(process_modules(hierarchy), key=lambda x: x.name) - return data - - -def get_module_tree(tid2tree: Dict[int, OperatorNode]): - """Get the module tree in timeline""" - from copy import copy - - modules = [] - - def traverse_node(node, parent: Optional[ModuleNode]): - if type(node) not in (ProfilerStepNode, ModuleNode): - return - - if isinstance(node, ModuleNode): - module = copy(node) - # remove the children after copy to keep the module only - module.children = [] - - if parent is None: - modules.append(module) - else: - parent.children.append(module) - parent = module - - for child in node.children: - traverse_node(child, parent) - - for _, root in tid2tree.items(): - for child in root.children: - # since the root node is CallTreeRoot, there is no parent ModuleNode - traverse_node(child, None) - - return modules diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py deleted file mode 100644 index 0528491c28752b0358d79e27168d055546bd0310..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/node.py +++ /dev/null @@ -1,321 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import sys -from abc import ABC -from typing import List, Optional, Tuple - -from .. import utils -from .tensor_core import TcAllowlist, TcOpAllowlist -from .trace import (DurationEvent, EventTypes, KernelEvent, ModuleEvent, - OperatorEvent, PLProfileEvent, NcclOpNameSet, GlooOpNameSet) - -logger = utils.get_logger() - -ExcludeOpName = ['DataParallel.forward', 'DistributedDataParallel.forward'] - - -class BaseNode(ABC): - def __init__(self, name: str, start_time: int, end_time: int, node_type: str, tid: int, - external_id: Optional[int] = None): - self.name = name - self.start_time = start_time - self.end_time = end_time - self.type = node_type - self.tid = tid - self.external_id = external_id # For consistency check. - - @staticmethod - def get_node_argument(event: DurationEvent): - kwargs = {} - kwargs['name'] = event.name - kwargs['start_time'] = event.ts - kwargs['end_time'] = event.ts + event.duration - kwargs['node_type'] = event.type - kwargs['tid'] = event.tid - - external_id = getattr(event, 'external_id', None) - if external_id is not None: - kwargs['external_id'] = external_id - - return kwargs - - @property - def duration(self) -> int: - if self.start_time is not None and self.end_time is not None: - return self.end_time - self.start_time - else: - return 0 - - -class CommunicationNode(BaseNode): - def __init__(self, input_shape: List[List[int]], input_type: List[str], **kwargs): - super().__init__(**kwargs) - self.input_shape = input_shape - self.input_type = input_type - self.kernel_ranges: List[Tuple[int, int]] = [] - self.real_time_ranges: List[Tuple[int, int]] = [] - self.total_time: int = 0 - self.real_time: int = 0 - self.step_name: str = None - - @classmethod - def create(cls, event: OperatorEvent): - kwargs = BaseNode.get_node_argument(event) - return cls(input_shape=event.input_shape, input_type=event.input_type, **kwargs) - - -class HostNode(BaseNode): - def __init__(self, device_duration: int = 0, **kwargs): - super().__init__(**kwargs) - self.device_duration = device_duration # Total time of Kernel, GPU Memcpy, GPU Memset. - - -class OperatorNode(HostNode): - # Don't use [] as default parameters - def __init__(self, children=None, runtimes=None, input_shape: Optional[List[List[int]]] = None, - input_type: Optional[List[str]] = None, callstack: Optional[str] = None, - self_host_duration: int = 0, self_device_duration: int = 0, **kwargs): - super().__init__(**kwargs) - self.children: List[OperatorNode] = [] if children is None else children # OperatorNode and ProfilerStepNode. - self.runtimes: List[RuntimeNode] = [] if runtimes is None else runtimes # RuntimeNode - self.input_shape = input_shape - self.input_type = input_type - self.callstack = callstack - self.self_host_duration = self_host_duration - self.self_device_duration = self_device_duration - self.tc_eligible = self.name in TcOpAllowlist - self.tc_self_duration = 0 # Time of TC kernels launched by this op excluding its children operators. - self.tc_total_duration = 0 # Time of TC kernels launched by this op including its children operators. - - def fill_stats(self): - def sort_key(x): - if x.start_time and x.end_time: - return x.start_time, -x.end_time - else: - return sys.maxsize, -sys.maxsize - 1 - self.children.sort(key=lambda x: (x.start_time, -x.end_time)) - self.runtimes.sort(key=sort_key) - - for child in self.children: - child.fill_stats() - for rt in self.runtimes: - rt.fill_stats(self) - - self.self_host_duration = self.end_time - self.start_time - for child in self.children: - self.device_duration += child.device_duration - self.self_host_duration -= (child.end_time - child.start_time) - self.tc_total_duration += child.tc_total_duration - # Mark TC eligible as True if any child operator is TC eligible. - if self.type == EventTypes.OPERATOR and not self.tc_eligible and child.tc_eligible: - self.tc_eligible = True - for rt in self.runtimes: - # From PyTorch 1.8 RC1, cpu_self_time does not include runtime's time. - # So here we keep consistent with it. - if rt.end_time is not None and rt.start_time is not None: - self.self_host_duration -= (rt.end_time - rt.start_time) - self.device_duration += rt.device_duration - self.self_device_duration += rt.device_duration - self.tc_self_duration += rt.tc_duration - self.tc_total_duration += rt.tc_duration - if self.type == EventTypes.OPERATOR and not self.tc_eligible and rt.tc_duration > 0: - logger.warning("New Tensor Cores eligible operator found: '{}'!".format(self.name)) - self.tc_eligible = True - - def get_operator_and_kernels(self): - ops: List[OperatorNode] = [] - kernels: List[DeviceNode] = [] - for child in self.children: - child_ops, child_kernels = child.get_operator_and_kernels() - ops.extend(child_ops) - kernels.extend(child_kernels) - for rt in self.runtimes: - kernels.extend(list(rt.get_kernels())) - - if is_operator_node(self): - ops.append(self) - - return ops, kernels - - @classmethod - def create(cls, event: OperatorEvent): - kwargs = BaseNode.get_node_argument(event) - return cls(input_shape=event.input_shape, input_type=event.input_type, callstack=event.callstack, **kwargs) - - -class ProfilerStepNode(OperatorNode): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - -class ModuleNode(OperatorNode): - def __init__(self, module_id: int, python_id: int, python_parent_id: int, **kwargs): - super().__init__(**kwargs) - self.module_id = module_id - self.python_id = python_id - self.python_parent_id = python_parent_id - - def fill_stats(self): - super().fill_stats() - self.self_device_duration += get_chilren_self_device_time(self) - - @classmethod - def create(cls, event: ModuleEvent): - kwargs = BaseNode.get_node_argument(event) - kwargs['module_id'] = event.module_id - kwargs['python_id'] = event.python_id - kwargs['python_parent_id'] = event.python_parent_id - # From the time being, the ModuleNode always have external_id to 0. - # As the result, we need reset the external_id to None to ignore adding the runtime nodes for ModuleNode - kwargs.pop('external_id', None) - return cls(**kwargs) - - -class BackwardNode(OperatorNode): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def fill_stats(self): - """Override the timestamps and duration for BackwardNode only - """ - self.children.sort(key=lambda x: (x.start_time, -x.end_time)) - self.start_time = self.children[0].start_time - self.end_time = self.children[-1].end_time - - self.self_host_duration = self.end_time - self.start_time - for child in self.children: - self.device_duration += child.device_duration - self.self_host_duration -= (child.end_time - child.start_time) - self.tc_total_duration += child.tc_total_duration - # Mark TC eligible as True if any child operator is TC eligible. - if not self.tc_eligible and child.tc_eligible: - self.tc_eligible = True - - -class PLProfileNode(OperatorNode): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - @classmethod - def create(cls, event: PLProfileEvent): - kwargs = BaseNode.get_node_argument(event) - return cls(**kwargs) - - -class PLModuleNode(OperatorNode): - def __init__(self, module_id: int, **kwargs): - super().__init__(**kwargs) - self.module_id = module_id - - def fill_stats(self): - super().fill_stats() - self.self_device_duration += get_chilren_self_device_time(self) - - @classmethod - def create(cls, event: PLProfileEvent): - kwargs = BaseNode.get_node_argument(event) - kwargs['module_id'] = event.module_id - return cls(**kwargs) - - -class DataLoaderNode(OperatorNode): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - -class OptimizerNode(OperatorNode): - def __init__(self, **kwargs): - super().__init__(**kwargs) - - -class RuntimeNode(HostNode): - def __init__(self, device_nodes: Optional[List['DeviceNode']] = None, **kwargs): - super().__init__(**kwargs) - # One runtime could trigger more than one kernel, such as cudaLaunchCooperativeKernelMultiDevice. - self.device_nodes = sorted(device_nodes, key=lambda x: (x.start_time, -x.end_time)) if device_nodes else None - self.tc_duration: int = 0 # Time summarization of all its launched kernels. - - def fill_stats(self, op_node: OperatorNode = None): - if self.device_nodes: - for device_node in self.device_nodes: - if op_node: - device_node.op_name = op_node.name - device_node.op_tc_eligible = op_node.tc_eligible - device_duration = device_node.end_time - device_node.start_time - self.device_duration += device_duration - self.tc_duration += device_duration if device_node.tc_used else 0 - - def get_kernels(self): - if self.device_nodes: - for d in self.device_nodes: - if d.type == EventTypes.KERNEL: - yield d - - @classmethod - def create(cls, event, device_nodes: Optional[List['DeviceNode']]): - kwargs = BaseNode.get_node_argument(event) - return cls(device_nodes=device_nodes, **kwargs) - - -class DeviceNode(BaseNode): - def __init__(self, - blocks_per_sm: Optional[float] = None, - occupancy: int = None, - grid: Optional[List[int]] = None, - block: Optional[List[int]] = None, - regs_per_thread: int = None, - shared_memory: int = None, - device_id: int = None, **kwargs): - super().__init__(**kwargs) - self.op_tc_eligible = False - self.op_name = None - self.blocks_per_sm = blocks_per_sm - self.occupancy = occupancy - self.grid = grid - self.block = block - self.regs_per_thread = regs_per_thread - self.shared_memory = shared_memory - self.tc_used = self.name in TcAllowlist - self.device_id = device_id - - @classmethod - def create(cls, event: KernelEvent): - kwargs = BaseNode.get_node_argument(event) - if event.type == EventTypes.KERNEL: - kwargs['blocks_per_sm'] = event.blocks_per_sm - kwargs['occupancy'] = event.occupancy - kwargs['grid'] = event.grid - kwargs['block'] = event.block - kwargs['regs_per_thread'] = event.regs_per_thread - kwargs['shared_memory'] = event.shared_memory - kwargs['device_id'] = event.device_id - return cls(**kwargs) - - -def create_operator_node(event: OperatorEvent): - if (event.name.startswith('enumerate(DataLoader)#') and event.name.endswith('.__next__') - or event.name.startswith('enumerate(DataPipe)#')): - return DataLoaderNode.create(event) - elif event.name.startswith('Optimizer.step'): - return OptimizerNode.create(event) - elif event.type == EventTypes.USER_ANNOTATION: - if event.name in GlooOpNameSet or event.name in NcclOpNameSet: - return OperatorNode.create(event) - else: - return None - else: - return OperatorNode.create(event) - - -def is_operator_node(node: BaseNode): - return bool(isinstance(node, OperatorNode) and node.type == EventTypes.OPERATOR and node.name not in ExcludeOpName - and not node.name.startswith("Optimizer.")) # exclude Optimizer.zero_grad - - -def get_chilren_self_device_time(node): - self_device_duration = 0 - for child in node.children: - if is_operator_node(child): - self_device_duration += child.device_duration - return self_device_duration diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py deleted file mode 100644 index d6fdb5903d368e02c4ddb9fc3f29f536696e2a2e..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_agg.py +++ /dev/null @@ -1,164 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -import sys -from collections import defaultdict -from typing import Callable, Dict, List - -from .. import utils -from .node import DeviceNode, OperatorNode - -logger = utils.get_logger() - - -class OperatorAgg: - def __init__(self, op: OperatorNode): - self.name = op.name - self.input_shape = str(op.input_shape) # Optional - - self.callstacks = set() # Optional - self.calls: int = 0 - self.host_duration: int = 0 - self.device_duration: int = 0 - self.self_host_duration: int = 0 - self.self_device_duration: int = 0 - self.tc_eligible = op.tc_eligible - self.tc_self_duration: int = 0 - self.tc_total_duration: int = 0 - - @property - def tc_self_ratio(self) -> float: - return self.tc_self_duration / self.self_device_duration if self.self_device_duration > 0 else 0 - - @property - def tc_total_ratio(self) -> float: - return self.tc_total_duration / self.device_duration if self.device_duration > 0 else 0 - - -def aggregate_ops(op_list: List[OperatorNode], - keys_func: List[Callable[[OperatorNode], str]]) -> List[Dict[str, OperatorAgg]]: - def aggregate(key_to_agg: Dict[str, OperatorAgg], key: str, op: OperatorNode): - if key not in key_to_agg: - key_to_agg[key] = OperatorAgg(op) - agg = key_to_agg[key] - agg.callstacks.add(op.callstack) - agg.calls += 1 - agg.host_duration += op.duration - agg.device_duration += op.device_duration - agg.self_host_duration += op.self_host_duration - agg.self_device_duration += op.self_device_duration - agg.tc_self_duration += op.tc_self_duration - agg.tc_total_duration += op.tc_total_duration - - agg_dicts: List[Dict[str, OperatorAgg]] = [{} for _ in range(len(keys_func))] - for op in op_list: - for i, key_func in enumerate(keys_func): - key = key_func(op) - aggregate(agg_dicts[i], key, op) - - return agg_dicts - - -class KernelAggByNameOp: - def __init__(self, kernel: DeviceNode, op_name: str): - self.name = kernel.name - self.op_name = op_name - self.grid = kernel.grid - self.block = kernel.block - self.regs_per_thread = kernel.regs_per_thread - self.shared_memory = kernel.shared_memory - - self.calls: int = 0 - self.total_duration: int = 0 - self.min_duration: int = sys.maxsize - self.max_duration: int = 0 - self.blocks_per_sm = 0.0 - self.occupancy = 0.0 - self.tc_used = kernel.tc_used - self.op_tc_eligible = kernel.op_tc_eligible - - @property - def avg_duration(self): - return self.total_duration / self.calls if self.calls > 0 else 0 - - @property - def avg_blocks_per_sm(self) -> float: - return self.blocks_per_sm / self.total_duration if self.total_duration > 0 else 0 - - @property - def avg_occupancy(self) -> float: - return self.occupancy / self.total_duration if self.total_duration > 0 else 0 - - -def aggregate_kernels(kernel_list: List[DeviceNode]) -> List[KernelAggByNameOp]: - name_op_to_agg: Dict[str, KernelAggByNameOp] = {} - for kernel in kernel_list: - dur = kernel.end_time - kernel.start_time - op_name = 'N/A' if kernel.op_name is None else kernel.op_name - key = '###'.join((kernel.name, op_name, - str(kernel.grid), str(kernel.block), - str(kernel.regs_per_thread or '0'), str(kernel.shared_memory or '0'))) - if key not in name_op_to_agg: - name_op_to_agg[key] = KernelAggByNameOp(kernel, op_name) - agg = name_op_to_agg[key] - agg.calls += 1 - agg.total_duration += dur - agg.min_duration = min(agg.min_duration, dur) - agg.max_duration = max(agg.max_duration, dur) - agg.blocks_per_sm += float(kernel.blocks_per_sm or 0) * dur - agg.occupancy += float(kernel.occupancy or 0) * dur - - kernel_list_groupby_name_op = list(name_op_to_agg.values()) - return kernel_list_groupby_name_op - - -class ModuleAggregator: - - def __init__(self): - self.op_list_groupby_name: List[OperatorAgg] = None # For Operator-view. - self.op_list_groupby_name_input: List[OperatorAgg] = None # For Operator-view. - self.kernel_list_groupby_name_op: List[KernelAggByNameOp] = None # For Kernel-view. - self.stack_lists_group_by_name: Dict[str, List[OperatorAgg]] = None - self.stack_lists_group_by_name_input: Dict[str, List[OperatorAgg]] = None - self.ops: List[OperatorNode] = None - - def aggregate(self, tid2tree: Dict[int, OperatorNode]): - # get the operators and kernels recursively by traverse the node tree root. - ops: List[OperatorNode] = [] - kernels: List[DeviceNode] = [] - for root in tid2tree.values(): - root_ops, root_kernels = root.get_operator_and_kernels() - ops.extend(root_ops) - kernels.extend(root_kernels) - - # aggregate both kernels and operators - self.kernel_list_groupby_name_op = aggregate_kernels(kernels) - - keys: List[Callable[[OperatorNode], str]] = [ - lambda x: x.name, - lambda x: '###'.join((x.name, str(x.input_shape))), - lambda x: '###'.join((x.name, str(x.callstack))), - lambda x: '###'.join((x.name, str(x.input_shape), str(x.callstack))) - ] - agg_result = aggregate_ops(ops, keys) - stack_lists_group_by_name: Dict[str, List[OperatorAgg]] = defaultdict(list) - stack_lists_group_by_name_input: Dict[str, List[OperatorAgg]] = defaultdict(list) - for agg in agg_result[2].values(): - if len(agg.callstacks) != 1: - logger.error(f'Incorrect length of callstacks, expected 1 but got {len(agg.callstacks)}') - break - if list(agg.callstacks)[0]: - stack_lists_group_by_name[agg.name].append(agg) - for agg in agg_result[3].values(): - if len(agg.callstacks) != 1: - logger.error(f'Incorrect length of callstacks, expected 1 but got {len(agg.callstacks)}') - break - if list(agg.callstacks)[0]: - key = agg.name + '###' + str(agg.input_shape) - stack_lists_group_by_name_input[key].append(agg) - - self.op_list_groupby_name = list(agg_result[0].values()) - self.op_list_groupby_name_input = list(agg_result[1].values()) - self.stack_lists_group_by_name = stack_lists_group_by_name - self.stack_lists_group_by_name_input = stack_lists_group_by_name_input - self.ops = ops diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py deleted file mode 100644 index fe919b29ced02efcea862f5e83ab52704f3f0d09..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/op_tree.py +++ /dev/null @@ -1,355 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -import math -import sys -from collections import defaultdict -from typing import Dict, Iterable, List, Optional, Tuple - -from .. import utils -from .node import (BackwardNode, DeviceNode, ModuleNode, OperatorNode, - ProfilerStepNode, RuntimeNode, is_operator_node) -from .trace import EventTypes - -logger = utils.get_logger() - - -class OpTreeBuilder: - BACKWARD_ROOT_PREFIX = 'autograd::engine::evaluate_function:' - BACKWARD_ACCUMULATE_GRAD = 'autograd::engine::evaluate_function: torch::autograd::AccumulateGrad' - - def __init__(self): - self.main_tid: int = None - self.tid2tree: Dict[int, OperatorNode] = None - - def build_tree(self, - tid2list: Dict[int, List[OperatorNode]], - tid2zero_rt_list: Dict[int, List[RuntimeNode]], - staled_device_nodes: List[DeviceNode], - fwd_bwd_map: Dict[int, int], - is_ascend=False): - """Construct the BackwardNode and replace the original backward nodes - """ - self.tid2tree = self._build_tree(tid2list, tid2zero_rt_list, staled_device_nodes, is_ascend) - - # if could not find any forward/backward association, skip the processing - if not fwd_bwd_map: - logger.debug('there is no any forwarwd backward association, skip processing backward correlation.') - return self.tid2tree - - self._set_main_tid() - - modules, backward_nodes = self._get_modules() - if not modules or not backward_nodes: - return self.tid2tree - - _, ts2parent = OpTreeBuilder._get_node_parents(backward_nodes) - agg_nodes = OpTreeBuilder._group_backward_nodes(backward_nodes) - fwd_bwd_root = self._get_backward_roots(fwd_bwd_map, ts2parent, agg_nodes) - if len(agg_nodes) > 0: - logger.warning('some nodes cannot find forward nodes') - - backward_modules: List[BackwardNode] = [] - for module in modules: - OpTreeBuilder._build_backward_module(module, None, fwd_bwd_root, backward_modules) - OpTreeBuilder._insert_backward_modules(self.tid2tree.get(self.main_tid), backward_modules) - self.tid2tree = {tid: root for tid, root in self.tid2tree.items() if len(root.children) > 0} - - return self.tid2tree - - def _build_tree(self, tid2list: Dict[int, List[OperatorNode]], tid2zero_rt_list, staled_device_nodes, is_ascend): - tid2tree = {} - - for tid, op_list in tid2list.items(): - zero_rt_list = tid2zero_rt_list[tid] if tid in tid2zero_rt_list else [] - # Note that when 2 start_time are equal, the one with bigger end_time should be ahead of the other. - op_list.sort(key=lambda x: (x.start_time, -x.end_time)) - main_tid = any([op.name.startswith('ProfilerStep#') for op in op_list]) - if main_tid: - # only append the staled device nodes into main thread - self.main_tid = op_list[0].tid - root_node = OpTreeBuilder._build_tree_internal(op_list, zero_rt_list, tid, staled_device_nodes, - is_ascend) - else: - root_node = OpTreeBuilder._build_tree_internal(op_list, zero_rt_list, tid, [], is_ascend) - tid2tree[int(tid)] = root_node - - return tid2tree - - def _set_main_tid(self): - if self.main_tid is None and self.tid2tree: - if len(self.tid2tree) == 1: - self.main_tid = next(iter(self.tid2tree)) - else: - # there are multiple tids - backward_tid = self._find_backward_tid() - tid2len = { - tid: root.end_time - root.start_time - for tid, root in self.tid2tree.items() - if tid != backward_tid or backward_tid is None - } - # get the maximum length as the main thread - self.main_tid = max(tid2len, key=tid2len.get) - - def _find_backward_tid(self): - for root in self.tid2tree.values(): - for child in root.children: - if child.name.startswith(OpTreeBuilder.BACKWARD_ROOT_PREFIX): - return child.tid - - return None - - @staticmethod - def _build_tree_internal(host_node_list, zero_rt_list, tid, staled_device_nodes, is_ascend): - """host_node_list: list of OperatorNode and ProfilerStepNode. - zero_rt_list: list of RuntimeNode with external_id=0.""" - - def build_tree_relationship(host_node_list: Iterable[OperatorNode], zero_rt_list, staled_device_nodes): - dummpy_rt: List[RuntimeNode] = [] - if staled_device_nodes: - # Note: Although kernels of this dummy runtime is put under main thread's tree, - # we don't know which thread launches them. - dummpy_rt.append(RuntimeNode( - name='dummy', - start_time=None, - end_time=None, - node_type=EventTypes.RUNTIME, - tid=0, - device_nodes=staled_device_nodes)) - dummpy_rt[0].fill_stats() - node_stack: List[OperatorNode] = [] - root_node = OperatorNode( - name='CallTreeRoot', - start_time=-sys.maxsize - 1, - end_time=sys.maxsize, - node_type=EventTypes.PYTHON, - tid=tid, - runtimes=zero_rt_list + dummpy_rt) # Give the list of RuntimeNode with external_id=0 to root node. - node_stack.append(root_node) - for node in host_node_list: - while True: # break loop when the node is inserted. - tail_node = node_stack[-1] - if node.start_time < tail_node.end_time: - if node.end_time <= tail_node.end_time or ( - is_ascend and math.isclose(node.end_time, tail_node.end_time, rel_tol=1)): - tail_node.children.append(node) - node_stack.append(node) - else: - logger.error('Error in input data: ranges on the same thread should not intersect!' - 'Father:({},{},{}) Child:({},{},{})' - .format(tail_node.name, tail_node.start_time, tail_node.end_time, - node.name, node.start_time, node.end_time)) - break - else: - node_stack.pop() - return root_node - - # Merge the consecutive calls to same function into one. - # Just follow the same pattern in torch/autograd/profiler.py, - # EventList._remove_dup_nodes - def remove_dup_nodes(node: OperatorNode): - if node.type == EventTypes.RUNTIME: - return - if len(node.children) == 1: - child = node.children[0] - if node.name == child.name and node.type == EventTypes.OPERATOR and child.type == EventTypes.OPERATOR: - node.children = child.children - node.runtimes = child.runtimes # Keep consistent with autograd profiler. - remove_dup_nodes(node) # This node may have to merge with child's child. - return - - for child in node.children: - remove_dup_nodes(child) - - root_node = build_tree_relationship(host_node_list, zero_rt_list, staled_device_nodes) - remove_dup_nodes(root_node) - root_node.fill_stats() - - # replace the root_node start_time/end_time - root_node.start_time = next((child.start_time for child in root_node.children - if child.start_time is not None), None) - root_node.end_time = next((child.end_time for child in reversed(root_node.children) - if child.end_time is not None), None) - return root_node - - def _get_modules(self) -> Tuple[List[ModuleNode], List[OperatorNode]]: - """Get the ModuleNodes and backward root nodes - If there are any ModuleNodes, the backward roots will be removed from the tree - so that later a new BackwardNode will be replaced. - """ - modules: List[ModuleNode] = [] - backward_nodes: Dict[OperatorNode, List[OperatorNode]] = defaultdict(list) - - def traverse_node(parent, node: OperatorNode): - if isinstance(node, ModuleNode): - modules.append(node) - elif isinstance(node, ProfilerStepNode): - for child in node.children: - traverse_node(node, child) - else: - if node.name.startswith(OpTreeBuilder.BACKWARD_ROOT_PREFIX): - backward_nodes[parent].append(node) - else: - pass - - for root in self.tid2tree.values(): - for child in root.children: - traverse_node(root, child) - - if modules: - backward_nodes_flatten: List[OperatorNode] = [] - # only remove the backward nodes when the module information exist - for p, nodes in backward_nodes.items(): - p.children = [child for child in p.children if child not in nodes] - backward_nodes_flatten.extend(nodes) - - return modules, backward_nodes_flatten - else: - return None, None - - @staticmethod - def _get_node_parents(nodes: Iterable[OperatorNode]): - """Get the child->parent relationship for these nodes""" - ts_to_node: Dict[int, OperatorNode] = {} - ts_to_parent: Dict[int, OperatorNode] = {} - - def traverse_node(node: OperatorNode): - if node.start_time not in ts_to_node: - ts_to_node[node.start_time] = node - for child in node.children: - if child.start_time not in ts_to_parent: - ts_to_parent[child.start_time] = node - traverse_node(child) - - for node in nodes: - traverse_node(node) - return ts_to_node, ts_to_parent - - @staticmethod - def _group_backward_nodes(nodes: Iterable[OperatorNode]) -> Dict[OperatorNode, List[OperatorNode]]: - """All nodes are backward nodes startswith autograd::engine::evaluate_function. - If one node's name is autograd::engine::evaluate_function: torch::autograd::AccumulateGrad, - it should be grouped with previous normal backward node. Otherwise, a new backward node should be started - """ - grouped_bwd_nodes: List[List[OperatorNode]] = [] - for node in nodes: - if node.name == OpTreeBuilder.BACKWARD_ACCUMULATE_GRAD: - grouped_bwd_nodes[-1].append(node) - else: - grouped_bwd_nodes.append([node]) - - # return the root backward node -> aggregated backward nodes array - # if there is no any AccumulateGrad accompanied with it, then the key:value is itself. - return {nodes[0]: nodes for nodes in grouped_bwd_nodes} - - @staticmethod - def _get_backward_roots(fwd_bwd_map: Dict[int, int], - ts2parent: Dict[int, OperatorNode], - backward_nodes: Dict[OperatorNode, List[OperatorNode]]) -> Dict[int, List[OperatorNode]]: - if not fwd_bwd_map: - return None - - fwd_to_bwdroot: Dict[int, List[OperatorNode]] = {} - for fwd, bwd in fwd_bwd_map.items(): - parent = ts2parent.get(bwd) - while parent is not None and not parent.name.startswith(OpTreeBuilder.BACKWARD_ROOT_PREFIX): - parent = ts2parent.get(parent.start_time) - - if parent: - fwd_to_bwdroot[fwd] = backward_nodes.pop(parent) - else: - logger.warning('parent is None for', bwd) - - return fwd_to_bwdroot - - @staticmethod - def _build_backward_module(node: ModuleNode, - parent: Optional[BackwardNode], - fwd_bwd_map: Dict[int, List[OperatorNode]], - result: List[BackwardNode]): - """Construct the backward module from root (node argument) and - insert it into result array if there is no any parent associated with it. - """ - if not fwd_bwd_map: - logger.warning('The forward backward map is empty. The backward construction is skipped.') - return - - if isinstance(node, ModuleNode): - backward_node = BackwardNode(name=node.name + '.backward', start_time=None, end_time=None, - node_type='backward', tid=0) - if parent is None: - result.append(backward_node) - else: - parent.children.append(backward_node) - parent = backward_node - - for child in node.children: - if parent: - if is_operator_node(child): - bwd_ops = fwd_bwd_map.get(child.start_time) - if bwd_ops: - parent.children.extend(bwd_ops) - - OpTreeBuilder._build_backward_module(child, parent, fwd_bwd_map, result) - - if isinstance(node, ModuleNode) and parent and parent.children: - parent.fill_stats() - parent.tid = parent.children[0].tid - - @staticmethod - def _insert_backward_modules(root: OperatorNode, backward_modules: List[BackwardNode]): - backward_modules.sort(key=lambda x: (x.start_time, -x.end_time)) - - # each item is (parent_node, child_index) that it is visiting. - node_stack = [] - module_index = 0 - child_index = 0 - current_node = root - - staled_modules = [] - - while module_index < len(backward_modules): - module = backward_modules[module_index] - if current_node is None: - # ignore all remaining modules - staled_modules.append(module) - module_index += 1 - continue - - if module.end_time < current_node.start_time: - staled_modules.append(module) - module_index += 1 - continue - elif module.start_time > current_node.end_time: - if node_stack: - # pop parent node and update the child_index accordingly. - current_node, child_index = node_stack.pop() - child_index += 1 - else: - # if there is not item in stack, set it to None - current_node = None - continue - - while child_index < len(current_node.children): - if module.end_time < current_node.children[child_index].start_time: - # if current module is before next child, - # we will break the search and keep the child_index not change. - # As the result, the module will be treated as child of 'current_node' - # So that next time we can continue from here. - # there is no any child contains the record.timestamp - # child_find is False at this case. - break - elif module.start_time >= current_node.children[child_index].end_time: - child_index += 1 - else: - # current children contains the record - node_stack.append((current_node, child_index)) - current_node = current_node.children[child_index] - child_index = 0 - - # when code execute here, it means the current_node will be the parent of backward module - # Add the module into current_node - current_node.children.insert(child_index, module) - # since the children number is increased by 1, we need increment the child_index. - child_index += 1 - module_index += 1 diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py deleted file mode 100644 index c646a33b89a673e1738fd38704516df8bfdfaade..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/overall_parser.py +++ /dev/null @@ -1,114 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -from typing import List, Tuple - -from .. import utils -from .event_parser import ProfileRole -from .range_utils import (get_ranges_sum, intersection_ranges_lists, - merge_ranges, subtract_ranges_lists) - -logger = utils.get_logger() - - -class OverallParser(object): - class Costs: - def __init__(self, costs: List[float] = None): - # the cost length is len(ProfileRole) - if costs is None: - self.costs = [0.] * len(ProfileRole) - else: - self.costs = costs - - @classmethod - def create_from_statistics(cls, statistics: 'OverallParser.Statistics', total_duration: int): - costs = [0.] * len(ProfileRole) - for i, cost_range in enumerate(statistics.cost_ranges): - costs[i] = get_ranges_sum(cost_range) - costs[ProfileRole.Total] = total_duration - return cls(costs) - - class Statistics: - def __init__(self, cost_ranges: List[List[Tuple[int, int]]]): - if not cost_ranges: - raise ValueError('the cost ranges is None') - - self.cost_ranges = cost_ranges - - @classmethod - def create_from_range(cls, steps: List[Tuple[int, int]], role_ranges: List[List[Tuple[int, int]]]): - if len(role_ranges) != ProfileRole.Total - 1: - return cls([]) - - cost_ranges: List[List[Tuple[int, int]]] = [] - slots: List[Tuple[int, int]] = [] - for role in role_ranges: - if slots: - inter_range = intersection_ranges_lists(slots, role) - else: - inter_range = role - slots = merge_ranges(list(steps)) - cost_ranges.append(inter_range) - slots = subtract_ranges_lists(slots, inter_range) - # The last one is ProfileRole.Other - cost_ranges.append(slots) - - return cls(cost_ranges) - - def intersection_with_step(self, step: Tuple[int, int]): - cost_ranges: List[List[Tuple[int, int]]] = [] - step = [step] - for cost_range in self.cost_ranges: - cost_ranges.append(intersection_ranges_lists(step, cost_range)) - - return OverallParser.Statistics(cost_ranges) - - class StepCommunicationCosts: - def __init__(self): - self.computation: int = 0 - self.communication: int = 0 - self.overlap: int = 0 - self.other: int = 0 - - def __init__(self): - self.steps_costs: List[OverallParser.Costs] = [] - self.avg_costs = OverallParser.Costs() - self.communication_overlap: List[OverallParser.StepCommunicationCosts] = [] - - def aggregate(self, steps: List[Tuple[int, int]], role_ranges: List[List[Tuple[int, int]]]): - logger.debug('Overall, statistics') - if len(steps) <= 0: - logger.error('Invalid steps number of 0') - return - global_stats = OverallParser.Statistics.create_from_range(steps, role_ranges) - if role_ranges[ProfileRole.Kernel]: - comm_comp_overlap = intersection_ranges_lists( - role_ranges[ProfileRole.Kernel], role_ranges[ProfileRole.Communication]) - else: - comm_comp_overlap = intersection_ranges_lists( - role_ranges[ProfileRole.CpuOp], role_ranges[ProfileRole.Communication]) - - logger.debug('Overall, aggregation') - for i, step in enumerate(steps): - steps_stat = global_stats.intersection_with_step(step) - self.steps_costs.append(OverallParser.Costs.create_from_statistics(steps_stat, step[1] - step[0])) - for cost_index, _ in enumerate(self.avg_costs.costs): - self.avg_costs.costs[cost_index] += self.steps_costs[i].costs[cost_index] - - comm_costs = OverallParser.StepCommunicationCosts() - comm_costs.overlap = get_ranges_sum(intersection_ranges_lists([step], comm_comp_overlap)) - if role_ranges[ProfileRole.Kernel]: - comm_costs.computation = get_ranges_sum( - intersection_ranges_lists([step], role_ranges[ProfileRole.Kernel])) - else: - comm_costs.computation = get_ranges_sum( - intersection_ranges_lists([step], role_ranges[ProfileRole.CpuOp])) - comm_costs.communication = get_ranges_sum( - intersection_ranges_lists([step], role_ranges[ProfileRole.Communication])) - comm_costs.other = self.steps_costs[i].costs[ProfileRole.Total] +\ - comm_costs.overlap - comm_costs.computation - comm_costs.communication - self.communication_overlap.append(comm_costs) - - valid_steps = len(steps) - for i, _ in enumerate(self.avg_costs.costs): - self.avg_costs.costs[i] /= valid_steps diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py deleted file mode 100644 index 7344762a6171c92b39dd771625f671d8df51cef4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/range_utils.py +++ /dev/null @@ -1,190 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from typing import List, Tuple - - -# src_ranges: item of (start_time, end_time, value) -def merge_ranges_with_value(src_ranges): - from collections import namedtuple - from enum import IntEnum - - class EndpointTypes(IntEnum): - START = 0 - END = 1 - - EndPoint = namedtuple('EndPoint', ['time', 'pt_type', 'value']) - - merged_ranges = [] - if len(src_ranges) > 0: - # Build tuple of (time, type, value) - endpoints: List[EndPoint] = [] - for r in src_ranges: - endpoints.append(EndPoint(r[0], EndpointTypes.START, r[2])) - endpoints.append(EndPoint(r[1], EndpointTypes.END, r[2])) - endpoints.sort(key=lambda x: [x.time, int(x.pt_type)]) # Make START in front of END if equal on time. - - last_endpoint_time = endpoints[0].time - last_value = endpoints[0].value - for i in range(1, len(endpoints)): - ep = endpoints[i] - if ep.time > last_endpoint_time and last_value > 0.0: - approximated_sm_efficiency = min(last_value, 1.0) - merged_ranges.append((last_endpoint_time, ep.time, approximated_sm_efficiency)) - last_endpoint_time = ep.time - if ep.pt_type == EndpointTypes.START: - last_value += ep.value - else: - last_value -= ep.value - - return merged_ranges - - -# range_list1 item is length 3. range_list2 item is length 2. -# Reture value's item is length 3. -def intersection_ranges_lists_with_value(range_list1, range_list2) -> List[Tuple[int, int, int]]: - range_list_dst = [] - if len(range_list1) == 0 or len(range_list2) == 0: - return range_list_dst - r1 = range_list1[0] - r2 = range_list2[0] - i1 = i2 = 0 - while i1 < len(range_list1): - if i2 == len(range_list2): - break - elif r2[1] <= r1[0]: - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0] and r2[1] < r1[1]: - if r2[1] > r1[0]: - range_list_dst.append((r1[0], r2[1], r1[2])) - r1 = (r2[1], r1[1], r1[2]) - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0]: - if r2[1] >= r1[1]: - range_list_dst.append(r1) - r2 = (r1[1], r2[1]) - r1, i1 = pop_list(range_list1, i1) - elif r2[1] < r1[1]: - if r2[0] > r1[0]: - range_list_dst.append((r2[0], r2[1], r1[2])) - r1 = (r2[1], r1[1], r1[2]) - r2, i2 = pop_list(range_list2, i2) - elif r2[0] < r1[1]: - if r2[1] >= r1[1]: - range_list_dst.append((r2[0], r1[1], r1[2])) - r2 = (r1[1], r2[1]) - r1, i1 = pop_list(range_list1, i1) - else: - if r2[0] >= r1[1]: - r1, i1 = pop_list(range_list1, i1) - return range_list_dst - - -def subtract_ranges_lists(range_list1: List[Tuple[int, int]], - range_list2: List[Tuple[int, int]]) -> List[Tuple[int, int]]: - range_list_dst = [] - if len(range_list1) == 0: - return range_list_dst - if len(range_list2) == 0: - range_list_dst = list(range_list1) - return range_list_dst - r1 = range_list1[0] - r2 = range_list2[0] - i1 = i2 = 0 - while i1 < len(range_list1): - if i2 == len(range_list2): - range_list_dst.append(r1) - r1, i1 = pop_list(range_list1, i1) - elif r2[1] <= r1[0]: - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0] and r2[1] < r1[1]: - r1 = (r2[1], r1[1]) - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0]: - if r2[1] >= r1[1]: - r2 = (r1[1], r2[1]) - r1, i1 = pop_list(range_list1, i1) - elif r2[0] < r1[1]: - if r2[0] > r1[0]: - range_list_dst.append((r1[0], r2[0])) - r1 = (r2[0], r1[1]) - else: - if r2[0] >= r1[1]: - range_list_dst.append(r1) - r1, i1 = pop_list(range_list1, i1) - return range_list_dst - - -def intersection_ranges_lists(range_list1: List[Tuple[int, int]], - range_list2: List[Tuple[int, int]]) -> List[Tuple[int, int]]: - range_list_dst = [] - if len(range_list1) == 0 or len(range_list2) == 0: - return range_list_dst - r1 = range_list1[0] - r2 = range_list2[0] - i1 = i2 = 0 - while i1 < len(range_list1): - if i2 == len(range_list2): - break - elif r2[1] <= r1[0]: - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0] and r2[1] < r1[1]: - if r2[1] > r1[0]: - range_list_dst.append((r1[0], r2[1])) - r1 = (r2[1], r1[1]) - r2, i2 = pop_list(range_list2, i2) - elif r2[0] <= r1[0]: - if r2[1] >= r1[1]: - range_list_dst.append(r1) - r2 = (r1[1], r2[1]) - r1, i1 = pop_list(range_list1, i1) - elif r2[1] < r1[1]: - if r2[0] > r1[0]: - range_list_dst.append(r2) - r1 = (r2[1], r1[1]) - r2, i2 = pop_list(range_list2, i2) - elif r2[0] < r1[1]: - if r2[1] >= r1[1]: - range_list_dst.append((r2[0], r1[1])) - r2 = (r1[1], r2[1]) - r1, i1 = pop_list(range_list1, i1) - else: - if r2[0] >= r1[1]: - r1, i1 = pop_list(range_list1, i1) - return range_list_dst - - -def get_ranges_sum(ranges: List[Tuple[int, int]]) -> int: - total: int = 0 - for item in ranges: - total += (item[1] - item[0]) - return total - - -def pop_list(range_list, index): - next_index = index + 1 - if next_index >= len(range_list): - return None, len(range_list) - next_item = range_list[next_index] - return next_item, next_index - - -def merge_ranges(src_ranges, is_sorted=False) -> List[Tuple[int, int]]: - if not src_ranges: - # return empty list if src_ranges is None or its length is zero. - return [] - - if not is_sorted: - src_ranges.sort(key=lambda x: x[0]) - - merged_ranges = [] - merged_ranges.append(src_ranges[0]) - for src_id in range(1, len(src_ranges)): - src_range = src_ranges[src_id] - if src_range[1] > merged_ranges[-1][1]: - if src_range[0] <= merged_ranges[-1][1]: - merged_ranges[-1] = (merged_ranges[-1][0], src_range[1]) - else: - merged_ranges.append((src_range[0], src_range[1])) - - return merged_ranges diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py deleted file mode 100644 index 111dc34e81031a33ff9e0a2c03b0375522de24cf..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/run_generator.py +++ /dev/null @@ -1,1402 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import csv -import json -import re -import io as sysio -from collections import OrderedDict, defaultdict -from json import JSONDecodeError -from typing import Dict, Iterable, List -import numpy as np - -from .. import consts, utils, io -from ..run import DistributedRunProfile, RunProfile -from .data import DistributedRunProfileData, RunProfileData -from .module_op import aggegate_module_view, aggegate_pl_module_view -from .op_agg import KernelAggByNameOp, OperatorAgg -from .overall_parser import ProfileRole -from ..utils import Canonicalizer - -logger = utils.get_logger() - - -class RunGenerator(object): - def __init__(self, worker, span, profile_data: RunProfileData, device_target="GPU"): - self.worker = worker - self.span = span - self.profile_data = profile_data - self.statistic_data = {} - self.accelerator_data = {} - self.device_target = device_target - self.component_curve_data = {} - self.process_data = {} - - @staticmethod - def check_overlap_data(title): - # csv: step / compute time / communication_not_overlap / overlap / communication / free time - length = len(title) - if length < 5: - return [] - key = ["computing", "overlapped", "communication(not overlapped)", "free"] - get_key = list() - for j in key: - for i in range(length): - if j == title[i]: - get_key.append(i) - if len(get_key) < 4: - return [] - return get_key - - @staticmethod - def get_table_head(name: str, input_shape: str, call_stack: str, value: list): - if name is None: - return {} - temp = { - 'name': name, 'calls': 0, 'host_self_duration': 0, - 'host_total_duration': 0, 'device_self_duration': 0, 'device_total_duration': 0, - 'tc_self_ratio': 0, 'tc_total_ratio': 0, 'tc_eligible': 'Yes' - } - if input_shape is not None: - temp['input_shape'] = input_shape - if call_stack is not None: - temp['call_stack'] = call_stack - else: - temp['has_call_stack'] = False - else: - if call_stack is not None: - temp['call_stack'] = call_stack - else: - temp['has_call_stack'] = False - for vl in iter(value): - if 'has_call_stack' in temp and vl[2]: - temp['has_call_stack'] = True - temp['calls'] += 1 - temp['host_self_duration'] = round(temp['host_self_duration'] + vl[3], 2) - temp['host_total_duration'] = round(temp['host_total_duration'] + vl[4], 2) - temp['device_self_duration'] = round(temp['device_self_duration'] + vl[5], 2) - temp['device_total_duration'] = round(temp['device_total_duration'] + vl[6], 2) - temp['tc_self_ratio'] = round(temp['tc_self_ratio'] + vl[7], 2) - temp['tc_total_ratio'] = round(temp['tc_total_ratio'] + vl[8], 2) - temp['tc_eligible'] = 'Yes' if temp['tc_self_ratio'] > 0 or temp['tc_total_ratio'] > 0 else 'No' - temp['tc_self_ratio'] = 0 if temp['device_self_duration'] == 0 \ - else round(temp['tc_self_ratio'] / temp['device_self_duration'] * 100, 2) - temp['tc_total_ratio'] = 0 if temp['device_total_duration'] == 0 \ - else round(temp['tc_total_ratio'] / temp['device_total_duration'] * 100, 2) - return temp - - @staticmethod - def get_wait_table_by_ops(op, ops): - total_trans = 0 - total_synchronize = 0 - for key, data in op.items(): - if str(key) == "Total Op Info" and data.get("Communication Time Info"): - total_trans += float(data.get("Communication Time Info").get("Transit Time(ms)")) - total_synchronize += float(data.get("Communication Time Info").get("Synchronization Time(ms)")) - continue - k = re.sub(r'[0-9]+', ' ', key).split(" ")[0] - if k not in ops: - ops[k] = [0, 0, 0, 0] - ops[k][0] += 1 - for _, band in data.get("Communication Bandwidth Info").items(): - ops[k][1] += float(band.get("Transit Size(MB)")) - if data.get("Communication Time Info") is not None: - ops[k][2] += data.get("Communication Time Info").get("Elapse Time(ms)") - ops[k][3] += data.get("Communication Time Info").get("Transit Time(ms)") - return total_trans, total_synchronize - - @staticmethod - def trans_shape(shape: str): - result = list() - if ';' not in shape: - result.append('[' + shape.strip() + ']') - return '[' + ', '.join(result) + ']' - if len(shape.strip()) <= 1: - result.append('[]') - return '[' + ', '.join(result) + ']' - shape_spl = shape.split("\n") - for shape_div in iter(shape_spl): - result.append('[' + str(shape_div.replace(';', '')) + ']') - return '[' + ', '.join(result) + ']' - - @staticmethod - def get_process_peaks_and_devices_type(process_data: dict, memory_metric: str): - devices_type = [] - peaks = {} - for device in process_data: - devices_type.append(device) - reserved_list = process_data.get(device).get('Allocated') - if reserved_list is not None: - max_reserved = 0 - for array_value in reserved_list: - max_reserved = max(array_value[1], max_reserved) - peaks[device] = f'Peak Memory Usage: {max_reserved:.1f}{memory_metric}' - return devices_type, peaks - - @staticmethod - def get_pta_ge_peaks_and_devices_type(process_data: dict, memory_metric: str): - devices_type = [] - peaks = {} - for device in process_data: - devices_type.append(device) - peaks[device] = 'Reserved Peak Memory Usage:' - for component in process_data.get(device): - max_reserved = 0 - for array_value in process_data.get(device).get(component): - max_reserved = max(array_value[2], max_reserved) - peaks[device] += f' {component}-{max_reserved:.1f}{memory_metric} |' - return devices_type, peaks - - @staticmethod - def check_csv_columns(columns: list, column_idxs: dict): - column_exist_count = 0 - for idx, column in enumerate(columns): - if column in column_idxs: - column_idxs[column] = idx - column_exist_count += 1 - return column_idxs.values(), column_exist_count - - @staticmethod - def get_csv_data(path: str): - if path is None: - return [] - datas = [] - with open(path, encoding='utf-8-sig') as f: - for row in csv.reader(f, skipinitialspace=True): - datas.append(row) - return datas - - def generate_run_profile(self): - profile_run = RunProfile(self.worker, self.span) - profile_run.is_pytorch_lightning = self.profile_data.is_pytorch_lightning - profile_run.has_runtime = self.profile_data.has_runtime - profile_run.has_kernel = self.profile_data.has_kernel - profile_run.has_communication = self.profile_data.has_communication - profile_run.has_memcpy_or_memset = self.profile_data.has_memcpy_or_memset - profile_run.profiler_start_ts = self.profile_data.profiler_start_ts - profile_run.device_target = self.device_target - - if self.device_target != 'Ascend': - profile_run.views.append(consts.OVERALL_VIEW) - profile_run.overview = self._generate_overview() - - profile_run.views.append(consts.OP_VIEW) - profile_run.operation_pie_by_name = self._generate_op_pie() - profile_run.operation_table_by_name = self._generate_op_table(self.profile_data.op_list_groupby_name) - profile_run.operation_stack_by_name = self._generate_op_table_for_stack(False) - profile_run.operation_pie_by_name_input = self._generate_op_pie(True) - profile_run.operation_table_by_name_input = self._generate_op_table( - self.profile_data.op_list_groupby_name_input, True) - profile_run.operation_stack_by_name_input = self._generate_op_table_for_stack(True) - - if self.profile_data.has_kernel: - profile_run.views.append(consts.KERNEL_VIEW) - profile_run.kernel_table = self._generate_kernel_table_gpu() - profile_run.kernel_op_table = self._generate_kernel_op_table_gpu() - profile_run.kernel_pie = self._generate_kernel_pie_gpu() - profile_run.tc_pie = self._generate_tc_pie_gpu() - - if self.profile_data.memory_snapshot: - profile_run.views.append(consts.MEMORY_VIEW) - profile_run.memory_snapshot = self.profile_data.memory_snapshot - - profile_run.gpu_metrics = self.profile_data.gpu_metrics_parser.get_gpu_metrics() - - gpu_infos = {gpu_id: RunGenerator.get_gpu_info(self.profile_data.device_props, gpu_id) - for gpu_id in self.profile_data.gpu_metrics_parser.gpu_ids} - gpu_infos = {gpu_id: gpu_info for gpu_id, gpu_info in gpu_infos.items() if gpu_info is not None} - - profile_run.gpu_summary, profile_run.gpu_tooltip = \ - self.profile_data.gpu_metrics_parser.get_gpu_metrics_data_tooltip( - gpu_infos, self.profile_data.tc_ratio) - - profile_run.pl_tid2tree = self.profile_data.pl_tid2tree - - profile_run.module_stats = aggegate_module_view(self.profile_data.tid2tree, self.profile_data.events) - profile_run.pl_module_stats = aggegate_pl_module_view(self.profile_data.tid2tree, self.profile_data.events) - if profile_run.is_pytorch_lightning and profile_run.pl_module_stats: - profile_run.views.append(consts.LIGHTNING_VIEW) - elif profile_run.module_stats: - profile_run.views.append(consts.MODULE_VIEW) - else: - if self.profile_data.has_operator_view: - profile_run.views.append(consts.OP_VIEW) - profile_run.operation_pie_by_name = self._get_operator_pie() - profile_run.operation_table_by_name = self._get_operator_table_by_name() - profile_run.operation_stack_by_name = self._get_call_stack_by_name() - profile_run.operation_pie_by_name_input = self._get_operator_pie(True) - profile_run.operation_table_by_name_input = self._get_operator_table_by_name(True) - profile_run.operation_stack_by_name_input = self._get_call_stack_by_name_shapes(True) - - if self.profile_data.has_kernel: - profile_run.views.append(consts.KERNEL_VIEW) - profile_run.kernel_table = self._generate_kernel_table_npu() - profile_run.kernel_op_table = self._generate_kernel_op_table_npu() - profile_run.kernel_pie = self._generate_kernel_pie_npu() - profile_run.tc_pie = self._generate_tc_pie_npu() - - if self.profile_data.has_memory: - profile_run.views.append(consts.MEMORY_VIEW) - profile_run.memory_div_curve = None - self.process_data, self.component_curve_data = self._handle_memory_data() - profile_run.memory_all_curve = self._get_memory_all_curve() - peak_memory_events = self._handle_memory_component() - profile_run.memory_events = self._get_memory_event(peak_memory_events) - - if self.profile_data.has_communication: - profile_run.step_to_overlap = self._npu_get_overlap() - profile_run.step_to_wait, profile_run.comm_op = self._npu_get_wait_table() - - profile_run.tid2tree = self.profile_data.tid2tree - if self.profile_data.has_trace: - profile_run.views.append(consts.TRACE_VIEW) - profile_run.trace_file_path = self.profile_data.trace_file_path - - return profile_run - - def _npu_get_overlap(self): - path = self.profile_data.distributed_csv_path - overlap_by_steps: Dict[str, List[float]] = OrderedDict() - data = RunGenerator.get_csv_data(path) - if len(data) <= 1: - return overlap_by_steps - title = [x.lower() for x in data[0]] - title_name = RunGenerator.check_overlap_data(title) - if not title_name: - logger.error(f"Incomplete content of CSV file {path}.") - return overlap_by_steps - - for idx, step in enumerate(data[1:]): - try: - key = step[0] - if key == '': - key = 'all' - overlap = [ - float(step[int(title_name[0])]), float(step[int(title_name[1])]), - float(step[int(title_name[2])]), float(step[int(title_name[3])]) - ] - if key in overlap_by_steps: - overlap_by_steps[key] = list(np.add(overlap, overlap_by_steps[key])) - else: - overlap_by_steps[key] = list(overlap) - except (ValueError, IndexError): - logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') - return overlap_by_steps - - def _npu_get_wait_table(self): - path = self.profile_data.communication_json_path - if not io.exists(path): - raise FileNotFoundError(path) - data = io.read(path) - wait_by_step: Dict[str, Dict[str, float]] = OrderedDict() - table_ops: Dict[str, List[float]] = OrderedDict() - try: - communication_json = json.loads(data, strict=False) - except JSONDecodeError as e: - try: - communication_json = json.loads(data, strict=False) - except JSONDecodeError: - with sysio.StringIO() as fout: - str_data = data.decode('utf-8') - # only replace the N/A without surrounding double quote - fout.write(re.sub(r'(?', abs(float(ls[size_idx])), - round((float(ls[allocation_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[ - allocation_idx] else None, - round((float(ls[release_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) if ls[ - release_idx] else None, - round(float(ls[duration_idx]) / 1000, 3) if ls[duration_idx] else None - ] - display_datas[device_type].append(nums) - except ValueError: - logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') - table['rows'] = display_datas - for name in display_datas: - devices_type.append(name) - table['metadata'].update({'default_device': devices_type[0]}) - return { - 'operator': table, - 'component': peak_memory_events - } - - def _get_memory_all_curve(self): - time_metric: str = 'ms' - memory_metric: str = 'MB' - cano = Canonicalizer(time_metric, memory_metric) - process_devices_type, process_peaks = RunGenerator.get_process_peaks_and_devices_type(self.process_data, - memory_metric) - total_result = { - 'metadata': { - 'devices': process_devices_type, - 'default_device': process_devices_type[0] if len(process_devices_type) > 0 else '', - 'peaks': process_peaks, - 'totals': {}, - 'first_ts': 0, - 'time_metric': cano.time_metric, - 'memory_metric': cano.memory_metric, - 'time_factor': cano.time_factor, - 'memory_factor': cano.memory_factor, - }, - 'columns': defaultdict(list), - 'rows': self.process_data - } - for device in process_devices_type: - if self.process_data.get(device).get('Allocated') is not None and self.process_data.get(device).get( - 'Reserved') is not None: - total_result['columns'][device].append( - {'name': f'Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'PTA+GE memory in use.'}) - total_result['columns'][device].append( - {'name': f'Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'PTA+GE reserved memory by allocator, both used and unused.'}) - if len(total_result['columns'][device]) > 0: - total_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', 'type': 'number', - 'tooltip': 'Time since profiler starts.'}) - pta_ge_devices_type, pta_ge_peaks = RunGenerator.get_pta_ge_peaks_and_devices_type(self.component_curve_data, - memory_metric) - component_curve_result = { - 'metadata': { - 'devices': pta_ge_devices_type, - 'default_device': pta_ge_devices_type[0] if len(pta_ge_devices_type) > 0 else '', - 'peaks': pta_ge_peaks, - 'totals': {}, - 'first_ts': 0, - 'time_metric': cano.time_metric, - 'memory_metric': cano.memory_metric, - 'time_factor': cano.time_factor, - 'memory_factor': cano.memory_factor, - }, - 'columns': defaultdict(list), - 'rows': self.component_curve_data - } - for device in pta_ge_devices_type: - if self.component_curve_data.get(device).get('PTA') is not None: - component_curve_result['columns'][device] += [ - {'name': f'PTA Allocated ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'PTA memory in use.'}, - {'name': f'PTA Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'PTA reserved memory by allocator, both used and unused.'}] - if self.component_curve_data.get(device).get('GE') is not None: - component_curve_result['columns'][device] += [ - {'name': f'GE Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'GE memory in use.'}, - {'name': f'GE Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'GE reserved memory by allocator, both used and unused.'}] - if self.component_curve_data.get(device).get('APP') is not None: - component_curve_result['columns'][device] += [ - {'name': f'APP Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'APP reserved memory by allocator, both used and unused.'}] - if len(component_curve_result['columns'][device]) > 0: - component_curve_result['columns'][device].insert(0, {'name': f'Time ({cano.time_metric})', - 'type': 'number', - 'tooltip': 'Time since profiler starts.'}) - device_types = list(set(process_devices_type + pta_ge_devices_type)) - return { - 'devices': device_types, - 'default_device': device_types[0], - 'total': total_result, - 'ptaGe': component_curve_result - } - - def _handle_memory_data(self): - process_data = defaultdict() - pta_or_ge_data = defaultdict() - path = self.profile_data.memory_curve_path - datas = RunGenerator.get_csv_data(path) - required_column_idxs = { - 'Component': -1, - 'Device Type': -1, - 'Timestamp(us)': -1, - 'Total Reserved(MB)': -1, - 'Total Allocated(MB)': -1 - } - (tag_type_idx, device_type_idx, time_idx, reserved_idx, allocated_idx), column_exist_count = \ - RunGenerator.check_csv_columns(datas[0], required_column_idxs) - if column_exist_count < len(required_column_idxs): - logger.error('Required column is missing in file "memory_record.csv"') - else: - for idx, ls in enumerate(datas[1:]): - try: - time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) - device_type = ls[device_type_idx] - if ls[tag_type_idx] == 'PTA+GE': - process_data.setdefault(device_type, {}).setdefault('Allocated', []).append( - [time_column, round(float(ls[allocated_idx]), 3)]) - process_data.setdefault(device_type, {}).setdefault('Reserved', []).append( - [time_column, round(float(ls[reserved_idx]), 3)]) - elif ls[tag_type_idx] == 'APP': - line_chart_data = [time_column, None, round(float(ls[reserved_idx]), 3)] - pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append( - line_chart_data) - elif ls[tag_type_idx] in ('PTA', 'GE'): - line_chart_data = [ - time_column, round(float(ls[allocated_idx]), 3), - round(float(ls[reserved_idx]), 3) - ] - pta_or_ge_data.setdefault(device_type, {}).setdefault(ls[tag_type_idx], []).append( - line_chart_data) - except ValueError: - logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') - - return process_data, pta_or_ge_data - - def _handle_memory_component(self): - peak_memory_events = { - 'metadata': { - 'title': 'Component Peak Memory', - 'default_device': '', - }, - 'columns': [{'name': 'Component', 'type': 'string'}, - {'name': 'Peak Memory Reserved(MB)', 'type': 'number'}, - {'name': 'Time(ms)', 'type': 'number'}] - } - peak_memory_rows = defaultdict(list) - path = self.profile_data.memory_component_path - component_datas = RunGenerator.get_csv_data(path) - if component_datas: - required_column_idxs = { - 'Component': -1, - 'Timestamp(us)': -1, - 'Total Reserved(MB)': -1, - 'Device': -1 - } - (tag_type_idx, time_idx, reserved_idx, device_type_idx), column_exist_count = \ - RunGenerator.check_csv_columns(component_datas[0], required_column_idxs) - if column_exist_count < len(required_column_idxs): - logger.error(f'Required column is missing in file "{path}"') - else: - for idx, ls in enumerate(component_datas[1:]): - memory_curve_id_dict = { - 'device_type_idx': device_type_idx, - 'reserved_idx': reserved_idx, - 'tag_type_idx': tag_type_idx, - 'time_idx': time_idx - } - try: - self._handle_peak_memory_rows(memory_curve_id_dict, ls, peak_memory_rows) - except (ValueError, TypeError): - logger.error(f'File "{path}" has wrong data format in row {idx + 2} and will skip it.') - peak_memory_events['rows'] = peak_memory_rows - return peak_memory_events - - def _handle_peak_memory_rows(self, memory_curve_id_dict, ls, peak_memory_rows): - # Record the peak memory usage of other components. - has_flag = False - device_type_idx = memory_curve_id_dict.get('device_type_idx') - reserved_idx = memory_curve_id_dict.get('reserved_idx') - tag_type_idx = memory_curve_id_dict.get('tag_type_idx') - time_idx = memory_curve_id_dict.get('time_idx') - time_column = round((float(ls[time_idx]) - self.profile_data.profiler_start_ts) / 1000, 3) - for item in peak_memory_rows[ls[device_type_idx]]: - if item[0] == ls[tag_type_idx]: - if item[1] < ls[reserved_idx]: - item[1] = ls[reserved_idx] - item[2] = time_column - elif item[1] == ls[reserved_idx]: - item[2] = min(item[2], time_column) - has_flag = True - break - if not has_flag: - peak_memory_rows[ls[device_type_idx]].append([ls[tag_type_idx], ls[reserved_idx], time_column]) - - def _generate_overview(self): - def build_part_time_str(part_cost: float, part_name: str): - format_str = ('
' - 'Step {}
' - 'Total: {}us
' - '{}: {}us
' - 'Percentage: {}%' - '
') - percentage = 0.0 if costs.costs[ProfileRole.Total] == 0 else round( - 100 * part_cost / costs.costs[ProfileRole.Total], 2) - return format_str.format(step_name, costs.costs[ProfileRole.Total], part_name, part_cost, percentage) - - def build_avg_cost_dict(part_name: str, part_cost: float): - profiler_total_cost = self.profile_data.avg_costs.costs[ProfileRole.Total] - cost_dict = {'name': part_name, - 'description': '', - 'value': round(part_cost), - 'extra': 0.0 if profiler_total_cost == 0 else round(100 * part_cost / profiler_total_cost, 2)} - return cost_dict - - show_gpu = (self.profile_data.has_runtime - or self.profile_data.has_kernel or self.profile_data.has_memcpy_or_memset) - - column_tootip = {'type': 'string', 'role': 'tooltip', 'p': {'html': 'true'}} - data = {} - data['steps'] = {} - data['steps']['columns'] = ['Step'] - if show_gpu: - data['steps']['columns'].extend(['Kernel', 'Memcpy', 'Memset']) - if self.profile_data.has_communication: - data['steps']['columns'].append('Communication') - if show_gpu: - data['steps']['columns'].append('Runtime') - data['steps']['columns'].extend(['DataLoader', 'CPU Exec', 'Other']) - - data['steps']['rows'] = [] - for i, costs in enumerate(self.profile_data.steps_costs): - step_name = self.profile_data.steps_names[i] - row = [{'value': step_name}] - if show_gpu: - row.extend([{'value': costs.costs[ProfileRole.Kernel], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Kernel], 'Kernel')}, - {'value': costs.costs[ProfileRole.Memcpy], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Memcpy], 'Memcpy')}, - {'value': costs.costs[ProfileRole.Memset], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Memset], 'Memset')}]) - if self.profile_data.has_communication: - row.append({'value': costs.costs[ProfileRole.Communication], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Communication], 'Communication')}) - if show_gpu: - row.append({'value': costs.costs[ProfileRole.Runtime], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Runtime], 'Runtime')}) - row.extend([{'value': costs.costs[ProfileRole.DataLoader], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.DataLoader], 'DataLoader')}, - {'value': costs.costs[ProfileRole.CpuOp], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.CpuOp], 'CPU Exec')}, - {'value': costs.costs[ProfileRole.Other], - 'tooltip': build_part_time_str(costs.costs[ProfileRole.Other], 'Other')}]) - data['steps']['rows'].append(row) - - avg_costs = [] - if show_gpu: - avg_costs.extend([ - build_avg_cost_dict('Kernel', self.profile_data.avg_costs.costs[ProfileRole.Kernel]), - build_avg_cost_dict('Memcpy', self.profile_data.avg_costs.costs[ProfileRole.Memcpy]), - build_avg_cost_dict('Memset', self.profile_data.avg_costs.costs[ProfileRole.Memset]) - ]) - if self.profile_data.has_communication: - avg_costs.extend([ - build_avg_cost_dict('Communication', self.profile_data.avg_costs.costs[ProfileRole.Communication]) - ]) - if show_gpu: - avg_costs.extend([ - build_avg_cost_dict('Runtime', self.profile_data.avg_costs.costs[ProfileRole.Runtime]) - ]) - avg_costs.extend([ - build_avg_cost_dict('DataLoader', self.profile_data.avg_costs.costs[ProfileRole.DataLoader]), - build_avg_cost_dict('CPU Exec', self.profile_data.avg_costs.costs[ProfileRole.CpuOp]), - build_avg_cost_dict('Other', self.profile_data.avg_costs.costs[ProfileRole.Other]) - ]) - - data['performance'] = [ - {'name': 'Average Step Time', 'description': '', - 'value': round(self.profile_data.avg_costs.costs[ProfileRole.Total]), - 'extra': 100, 'children': avg_costs} - ] - - if len(self.profile_data.recommendations) == 0: - html = '
  • N/A
  • ' - else: - html = '' - for recommendation in self.profile_data.recommendations: - html += '
  • {}
  • '.format(recommendation) - data['recommendations'] = '
      {}
    '.format(html) - - return data - - def _generate_op_pie(self, group_by_input_shape: bool = False): - op_device_total_time = [] - op_device_self_time = [] - op_host_total_time = [] - op_host_self_time = [] - - if group_by_input_shape: - op_list = self.profile_data.op_list_groupby_name_input - else: - op_list = self.profile_data.op_list_groupby_name - - for op_agg in op_list: - # Whether device_duration & self_device_duration are accurate or not depends on the input tracing data. - if op_agg.device_duration > 0: - op_device_total_time.append([op_agg.name, op_agg.device_duration]) - if op_agg.self_device_duration > 0: - op_device_self_time.append([op_agg.name, op_agg.self_device_duration]) - if op_agg.host_duration > 0: - op_host_total_time.append([op_agg.name, op_agg.host_duration]) - if op_agg.self_host_duration > 0: - op_host_self_time.append([op_agg.name, op_agg.self_host_duration]) - - op_device_total_time.sort(key=lambda x: x[1], reverse=True) - op_device_self_time.sort(key=lambda x: x[1], reverse=True) - op_host_total_time.sort(key=lambda x: x[1], reverse=True) - op_host_self_time.sort(key=lambda x: x[1], reverse=True) - - data = {} - device_total_time = {} - device_self_time = {} - host_total_time = {} - host_self_time = {} - - if len(op_device_total_time) > 0: - device_total_time['title'] = 'Device Total Time (us)' - device_total_time['columns'] = [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}] - device_total_time['rows'] = op_device_total_time - else: - device_total_time = None - - if len(op_device_self_time) > 0: - device_self_time['title'] = 'Device Self Time (us)' - device_self_time['columns'] = [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}] - device_self_time['rows'] = op_device_self_time - else: - device_self_time = None - - if len(op_host_total_time) > 0: - host_total_time['title'] = 'Host Total Time (us)' - host_total_time['columns'] = [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}] - host_total_time['rows'] = op_host_total_time - else: - host_total_time = None - - if len(op_host_self_time) > 0: - host_self_time['title'] = 'Host Self Time (us)' - host_self_time['columns'] = [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}] - host_self_time['rows'] = op_host_self_time - else: - host_self_time = None - - data['device_total_time'] = device_total_time - data['device_self_time'] = device_self_time - data['host_total_time'] = host_total_time - data['host_self_time'] = host_self_time - - return data - - def _generate_op_table(self, op_list: Iterable[OperatorAgg], group_by_input_shape=False, call_stack=False): - show_gpu = self.profile_data.has_kernel or self.profile_data.has_memcpy_or_memset - - if group_by_input_shape: - stack_list_dict = self.profile_data.stack_lists_group_by_name_input - else: - stack_list_dict = self.profile_data.stack_lists_group_by_name - - op_list = sorted(op_list, - key=lambda x: x.self_device_duration if show_gpu else x.self_host_duration, - reverse=True) - - data = list() - result = { - 'metadata': { - 'sort': 'device_self_duration' if show_gpu else 'host_self_duration', - 'tooltips': { - 'tc_eligible': consts.TOOLTIP_OP_TC_ELIGIBLE, - 'tc_self_ratio': consts.TOOLTIP_OP_TC_SELF, - 'tc_total_ratio': consts.TOOLTIP_OP_TC_TOTAL - } - }, - 'data': data - } - for op in op_list: - # Whether device_duration & self_device_duration are accurate or not depends on the input tracing data. - row = dict() - row['name'] = op.name - if group_by_input_shape: - row['input_shape'] = op.input_shape - row['calls'] = op.calls - if show_gpu: - row['device_self_duration'] = round(op.self_device_duration) - row['device_total_duration'] = round(op.device_duration) - row['host_self_duration'] = round(op.self_host_duration) - row['host_total_duration'] = round(op.host_duration) - row['tc_eligible'] = 'Yes' if op.tc_eligible else 'No' - row['tc_self_ratio'] = round(100 * op.tc_self_ratio, 2) - row['tc_total_ratio'] = round(100 * op.tc_total_ratio, 2) - if call_stack: - row['call_stack'] = op.callstacks.pop() - else: - if group_by_input_shape: - key = op.name + '###' + str(op.input_shape) - else: - key = op.name - row['has_call_stack'] = key in stack_list_dict - data.append(row) - - return result - - def _generate_op_table_for_stack(self, group_by_input_shape: bool): - if group_by_input_shape: - stack_list_dict = self.profile_data.stack_lists_group_by_name_input - else: - stack_list_dict = self.profile_data.stack_lists_group_by_name - - result = dict() - for k, v in stack_list_dict.items(): - result[k] = self._generate_op_table(v, group_by_input_shape, True) - return result - - def _generate_kernel_op_table_gpu(self): - table = {} - result = { - 'metadata': { - 'sort': 'Total Duration (us)' - }, - 'data': table - } - table['columns'] = [ - {'type': 'string', 'name': 'Name'}, - {'type': 'string', 'name': 'Operator'}, - {'type': 'string', 'name': 'Grid'}, - {'type': 'string', 'name': 'Block'}, - {'type': 'number', 'name': 'Register Per Thread'}, - {'type': 'number', 'name': 'Shared Memory'}, - {'type': 'string', 'name': 'Kernel Uses Tensor Cores', - 'tooltip': consts.TOOLTIP_KERNEL_USES_TC}, - {'type': 'string', 'name': 'Op is Tensor Cores eligible', - 'tooltip': consts.TOOLTIP_KERNEL_OP_TC_ELIGIBLE} - ] - col_names = ['Calls', 'Total Duration (us)', 'Mean Duration (us)', 'Max Duration (us)', 'Min Duration (us)'] - for column in col_names: - table['columns'].append({'type': 'number', 'name': column}) - gpu_metrics_columns = self.profile_data.gpu_metrics_parser.get_gpu_metrics_columns() - table['columns'].extend(gpu_metrics_columns) - - table['rows'] = [] - kernel_list: List[KernelAggByNameOp] = sorted( - self.profile_data.kernel_list_groupby_name_op, key=lambda x: x.total_duration, reverse=True) - for agg_by_name_op in kernel_list: - kernel_op_row = [ - agg_by_name_op.name, agg_by_name_op.op_name, - str(agg_by_name_op.grid), str(agg_by_name_op.block), - str(agg_by_name_op.regs_per_thread or '0'), str(agg_by_name_op.shared_memory or '0'), - 'Yes' if agg_by_name_op.tc_used else 'No', - 'Yes' if agg_by_name_op.op_tc_eligible else 'No', - agg_by_name_op.calls, - agg_by_name_op.total_duration, round(agg_by_name_op.avg_duration), - agg_by_name_op.max_duration, agg_by_name_op.min_duration - ] - if self.profile_data.gpu_metrics_parser.has_blocks_per_sm: - kernel_op_row.append(round(agg_by_name_op.avg_blocks_per_sm, 2)) - if self.profile_data.gpu_metrics_parser.has_occupancy: - kernel_op_row.append(round(agg_by_name_op.avg_occupancy, 2)) - table['rows'].append(kernel_op_row) - return result - - def _generate_kernel_pie_gpu(self): - pie = {'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}], 'rows': []} - for _id, (name, row) in enumerate(self.profile_data.kernel_stat.iterrows()): - pie['rows'].append([name, row['sum']]) - data = {'total': pie, 'device_target': self.device_target} - return data - - def _generate_kernel_table_gpu(self): - table = {} - result = { - 'metadata': { - 'sort': 'Total Duration (us)' - }, - 'data': table - } - table['columns'] = [ - {'type': 'string', 'name': 'Name'}, - {'type': 'string', 'name': 'Tensor Cores Used', - 'tooltip': consts.TOOLTIP_KERNEL_USES_TC} - ] - columns = ['count', 'sum', 'mean', 'max', 'min'] - round_digits = [0, 0, 0, 0, 0] - if self.profile_data.gpu_metrics_parser.has_blocks_per_sm: - columns.append('blocks_per_sm') - round_digits.append(2) - if self.profile_data.gpu_metrics_parser.has_occupancy: - columns.append('occupancy') - round_digits.append(2) - col_names = ['Calls', 'Total Duration (us)', 'Mean Duration (us)', 'Max Duration (us)', 'Min Duration (us)'] - for column in col_names: - table['columns'].append({'type': 'number', 'name': column}) - gpu_metrics_columns = self.profile_data.gpu_metrics_parser.get_gpu_metrics_columns() - table['columns'].extend(gpu_metrics_columns) - - table['rows'] = [] - for _id, (name, row) in enumerate(self.profile_data.kernel_stat.iterrows()): - kernel_row = [name, 'Yes' if row['tc_used'] else 'No'] - for i, column in enumerate(columns): - kernel_row.append(round(row[column]) if round_digits[i] == 0 - else round(row[column], round_digits[i])) - table['rows'].append(kernel_row) - return result - - def _generate_tc_pie_gpu(self): - pie = {'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}], 'rows': []} - pie['rows'].append(['Using Tensor Cores', self.profile_data.tc_used_ratio]) - pie['rows'].append(['Not Using Tensor Cores', 1.0 - self.profile_data.tc_used_ratio]) - data = {'total': pie} - return data - - def _generate_kernel_op_table_npu(self): - table = {} - result = { - 'metadata': { - 'sort': 'Calls' - }, - 'data': table - } - table['columns'] = [{'type': 'string', 'name': 'Name'}, - {'type': 'number', 'name': 'Calls'}, - {'type': 'number', 'name': 'Total Durations(us)'}, - {'type': 'number', 'name': 'Min Durations(us)'}, - {'type': 'number', 'name': 'Avg Durations(us)'}, - {'type': 'number', 'name': 'Max Durations(us)'} - ] - table['rows'] = [] - for key, value in self.statistic_data.items(): - temp = [key] - for val in value.values(): - temp.append(val) - table['rows'].append(temp) - return result - - def _generate_kernel_pie_npu(self): - pie = {'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}], 'rows': []} - for key, val in self.statistic_data.items(): - data = [key, float(val['Total'])] - pie['rows'].append(data) - datas = {'total': pie, 'device_target': self.device_target} - return datas - - def _generate_kernel_table_npu(self): - table = {'columns': [], 'rows': []} - result = { - 'metadata': { - 'sort': 'Duration (us)' - }, - 'data': table - } - path = self.profile_data.kernel_file_path - datas = RunGenerator.get_csv_data(path) - required_column_idxs = { - 'Name': -1, - 'Duration(us)': -1, - 'Accelerator Core': -1 - } - (name_idx, duration_idx, core_type_idx), column_exist_count = \ - RunGenerator.check_csv_columns(datas[0], required_column_idxs) - if column_exist_count < 3: - logger.error('Required column is missing in file "kernel_details.csv"') - else: - for column in datas[0]: - if column in ('Duration(us)', 'Start Time(us)', 'Wait Time(us)', 'Block Dim'): - table['columns'].append({'type': 'number', 'name': column}) - else: - table['columns'].append({'type': 'string', 'name': column}) - - self._handle_kernel_table_rows(name_idx, duration_idx, core_type_idx, datas[1:], path) - table['rows'] = datas[1:] - return result - - def _generate_tc_pie_npu(self): - pie = {'columns': [{'type': 'string', 'name': 'name'}, {'type': 'number', 'name': 'value'}], 'rows': []} - for key, val in self.accelerator_data.items(): - pie['rows'].append(['Using ' + key.replace('_', ' '), val]) - data = {'total': pie} - return data - - @staticmethod - def get_gpu_info(device_props, gpu_id): - if (device_props is None) or (gpu_id >= len(device_props)) or (gpu_id < 0): - return None - - device_prop: Dict = device_props[gpu_id] - gpu_info = {} - name = device_prop.get('name') - if name is not None: - gpu_info['Name'] = name - - mem = device_prop.get('totalGlobalMem') - if mem is not None: - try: - gpu_info['Memory'] = '{} GB'.format(round(float(mem) / 1024 / 1024 / 1024, 2)) - gpu_info['Memory Raw'] = mem - except ValueError: - logger.warning('The value of "totalGlobalMem" must be number in the JSON, please check it.') - - major = device_prop.get('computeMajor') - minor = device_prop.get('computeMinor') - if major is not None and minor is not None: - gpu_info['Compute Capability'] = '{}.{}'.format(major, minor) - - return gpu_info - - def _handle_kernel_table_rows(self, name_idx, duration_idx, core_type_idx, rows, path): - for idx, row in enumerate(rows): - call_name = row[name_idx] - try: - call_duration = float(row[duration_idx]) - except ValueError: - logger.error( - f'File "{path}" has wrong data format in row {idx + 2} in Duration column and will skip the row.') - continue - call_type = row[core_type_idx] - if self.accelerator_data.get(call_type) is not None: - self.accelerator_data[call_type] += call_duration - else: - self.accelerator_data[call_type] = call_duration - - if self.statistic_data.get(call_name) is not None: - temp = self.statistic_data.get(call_name, {}) - temp['Max'] = max(temp.get('Max', 0), call_duration) - temp['Min'] = min(temp.get('Min', 0), call_duration) - temp['Total'] = round(temp.get('Total', 0) + call_duration, 2) - temp['Calls'] = temp.get('Calls', 0) + 1 - if temp['Calls'] == 0: - logger.error( - f'temp["Calls"] is zero which can not be divisor.') - temp['Average'] = 0 - else: - temp['Average'] = round(temp['Total'] / temp['Calls'], 2) - else: - self.statistic_data[call_name] = { - 'Calls': 1, - 'Total': call_duration, - 'Min': call_duration, - 'Average': call_duration, - 'Max': call_duration - } - - -class DistributedRunGenerator(object): - def __init__(self, all_profile_data: Iterable[DistributedRunProfileData], span, device_target): - self.all_profile_data = all_profile_data - self.span = span - self.device_target = device_target - - def generate_run_profile(self): - profile_run = DistributedRunProfile(self.span) - profile_run.views.append(consts.DISTRIBUTED_VIEW) - profile_run.gpu_info = self._generate_gpu_info() - profile_run.steps_to_overlap = self._generate_overlap_graph() - profile_run.steps_to_wait = self._generate_wait_graph() - profile_run.comm_ops = self._generate_ops_table() - return profile_run - - def _generate_gpu_info(self): - # first key is node name, the second key is process id, the third key is GPU0/, - # the value is the gpu info json - result: Dict[str, Dict[str, Dict[str, Dict]]] = OrderedDict() - index = 0 - for data in sorted(self.all_profile_data, key=lambda x: x.worker): - if not data.device_props: - continue - - match = consts.NODE_PROCESS_PATTERN.match(data.worker) - if match: - node = match.group(1) - process_id = match.group(2) - else: - logger.warning('cannot parse node name from worker name {}'.format(data.worker)) - node = data.worker - process_id = index - index += 1 - if node not in result: - result[node] = OrderedDict() - - process_id = 'Process ' + str(process_id) - result[node][process_id] = OrderedDict() - for used_device in data.used_devices: - gpu_info = RunGenerator.get_gpu_info(data.device_props, used_device) - if gpu_info is not None: - result[node][process_id]['GPU' + str(used_device)] = gpu_info - - if result: - for k, v in result.items(): - result[k] = OrderedDict(sorted(v.items())) - return { - 'metadata': {'title': 'Device Information'}, - 'data': result - } - else: - return None - - def _generate_overlap_graph(self): - result = dict() - result['metadata'] = { - 'title': 'Computation/Communication Overview', - 'legends': ['Computation', 'Overlapping', 'Communication', 'Other'], - 'units': 'us' - } - steps_to_overlap: Dict[str, Dict[str, List[int]]] = OrderedDict() - steps_to_overlap['all'] = OrderedDict() - for data in self.all_profile_data: - steps_to_overlap['all'][data.worker] = [0, 0, 0, 0] - step_number = len(data.steps_names) - if step_number <= 0: - return None - if self.device_target != 'Ascend': - DistributedRunGenerator._get_gpu_overlap_data(data, steps_to_overlap) - else: - DistributedRunGenerator._get_npu_overlap_data(data, steps_to_overlap) - - steps_to_overlap['all'][data.worker] = [int(x / step_number) for x in steps_to_overlap['all'][data.worker]] - for k, v in steps_to_overlap.items(): - steps_to_overlap[k] = OrderedDict(sorted(v.items())) - result['data'] = steps_to_overlap - return result - - @staticmethod - def _get_gpu_overlap_data(data, steps_to_overlap): - for i, step_name in enumerate(data.steps_names): - steps_to_overlap.setdefault(step_name, OrderedDict()) - costs = data.comm_overlap_costs[i] - steps_to_overlap[step_name][data.worker] = [ - round(costs.computation - costs.overlap, 3), - round(costs.overlap, 3), - round(costs.communication - costs.overlap, 3), - round(costs.other, 3) - ] - steps_to_overlap['all'][data.worker] = [ - sum(x) - for x in zip(steps_to_overlap['all'][data.worker], steps_to_overlap[step_name][data.worker]) - ] - - @staticmethod - def _get_npu_overlap_data(data, steps_to_overlap): - steps = data.step_to_overlap - for k, v in steps.items(): - steps_to_overlap.setdefault(k, OrderedDict()) - # v: computation / overlap / communication_not_overlap / free time - # steps_to_overlap: computation_not_overlap / overlap / communication_not_overlap / other - steps_to_overlap[k][data.worker] = list( - [round(v[0] - v[1], 3), round(v[1], 3), round(v[2], 3), round(v[3], 3)]) - steps_to_overlap['all'][data.worker] = [ - sum(x) - for x in zip(steps_to_overlap['all'][data.worker], steps_to_overlap[k][data.worker]) - ] - - @staticmethod - def _get_npu_wait_data(data, steps_to_wait): - step_number = len(data.step_to_wait) - if step_number <= 0: - return - steps = data.step_to_wait - for k, v in steps.items(): - steps_to_wait.setdefault(k, OrderedDict()) - - trans = round(v.get('trans') * 1000, 3) # 1ms = 1000us - wait = round(v.get('Synchronize') * 1000, 3) # 1ms = 1000us - steps_to_wait[k][data.worker] = list([trans, wait]) - steps_to_wait['all'][data.worker] = [ - sum(x) - for x in zip(steps_to_wait['all'][data.worker], steps_to_wait[k][data.worker]) - ] - steps_to_wait['all'][data.worker] = [x / step_number for x in steps_to_wait['all'][data.worker]] - - @staticmethod - def _get_gpu_wait_data(data, steps_to_wait): - step_number = len(data.step_comm_stats.values()) - if step_number <= 0: - return - for step, comm_stats in data.step_comm_stats.items(): - steps_to_wait.setdefault(step, OrderedDict())[data.worker] = [ - round(comm_stats[1], 3), - round(comm_stats[0] - comm_stats[1], 3) - ] - steps_to_wait['all'][data.worker] = [ - sum(x) - for x in zip(steps_to_wait['all'][data.worker], steps_to_wait[step][data.worker]) - ] - steps_to_wait['all'][data.worker] = [int(x / step_number) for x in steps_to_wait['all'][data.worker]] - - def _generate_wait_graph(self): - result = dict() - result['metadata'] = { - 'title': 'Synchronizing/Communication Overview', - 'legends': ['Data Transfer Time', 'Synchronizing Time'], - 'units': 'us' - } - steps_to_wait: Dict[str, Dict[str, List[int]]] = OrderedDict() - - steps_to_wait['all'] = OrderedDict() - for data in self.all_profile_data: - steps_to_wait['all'][data.worker] = [0, 0] - if self.device_target != 'Ascend': - DistributedRunGenerator._get_gpu_wait_data(data, steps_to_wait) - else: - DistributedRunGenerator._get_npu_wait_data(data, steps_to_wait) - for k, v in steps_to_wait.items(): - steps_to_wait[k] = OrderedDict(sorted(v.items())) - result['data'] = steps_to_wait - return result - - def _generate_ops_table(self): - result = dict() - result['metadata'] = {'title': 'Communication Operations Stats'} - workers_to_comm_ops = OrderedDict() - # Ignore the span for distributed view - for data in self.all_profile_data: - table = {} - if self.device_target != 'Ascend': - DistributedRunGenerator._get_gpu_table(data, table) - else: - DistributedRunGenerator._get_npu_table(data, table) - workers_to_comm_ops[data.worker] = table - result['data'] = OrderedDict(sorted(workers_to_comm_ops.items())) - return result - - @staticmethod - def _get_npu_table(data, table): - table['columns'] = [{'type': 'string', 'name': 'Name'}] - col_names = [ - 'Calls', - 'Total Transit Size (bytes)', - 'Avg Transit Size (bytes)', - 'Elapse Time (us)', - 'Avg Elapse Time (us)', - 'Transit Time (us)', - 'Avg Transit Time (us)' - ] - for column in col_names: - table['columns'].append({'type': 'number', 'name': column}) - table['rows'] = [] - ops = data.comm_op - for op, stats in ops.items(): - row = [ - op, - stats[0], - round(stats[1] * 1024 * 1024, 3), - round(stats[1] * 1024 * 1024 / stats[0] if stats != 0 else 0), # 1MB = 1024 * 1024 bytes - round(stats[2] * 1000, 3), - round(stats[2] * 1000 / stats[0] if stats != 0 else 0), # 1ms = 1000us - round(stats[3] * 1000, 3), - round(stats[3] * 1000 / stats[0] if stats != 0 else 0) # 1ms = 1000us - ] - table['rows'].append(row) - - @staticmethod - def _get_gpu_table(data, table): - table['columns'] = [{'type': 'string', 'name': 'Name'}] - col_names = [ - 'Calls', - 'Total Size (bytes)', - 'Avg Size (bytes)', - 'Total Latency (us)', - 'Avg Latency (us)', - 'Data Transfer Time (us)', - 'Avg Data Transfer Time (us)' - ] - for column in col_names: - table['columns'].append({'type': 'number', 'name': column}) - table['rows'] = [] - for op, stats in data.total_comm_stats.items(): - row = [ - op, - stats[0], - round(stats[1], 3), - - round(stats[1] / stats[0] if stats[0] != 0 else 0), - round(stats[2], 3), - round(stats[2] / stats[0] if stats[0] != 0 else 0), - round(stats[3], 3), - round(stats[3] / stats[0] if stats[0] != 0 else 0) - ] - table['rows'].append(row) diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_core.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_core.py deleted file mode 100644 index cc53ab217f0ee6f88817c51da6ba46da68df4e28..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_core.py +++ /dev/null @@ -1,50 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -class TcAllowlistMeta(type): - # Enable grammar sugar as 'v in TcAllowlist'. - def __contains__(cls, item): - return cls.__contains__(item) - - -class TcAllowlist(metaclass=TcAllowlistMeta): - allowlist = ['h884', 's884', 'h1688', 's1688', 'hmma', 'i8816', '16816', - 'dgrad_1x1_stride_2x2', 'first_layer_wgrad_kernel', 'conv1x1', - 'conv2d_c1_k1', 'direct_group', 'xmma_implicit_gemm', - 'xmma_sparse_conv', 'xmma_warp_specialized_implicit_gemm', - 'xmma_gemm', 'xmma_sparse_gemm', 'c1688'] - - @classmethod - def __contains__(cls, item): - # If kernel name contains substring equal to any one in allowlist, then it uses tensor core. - for pattern in cls.allowlist: - if pattern in item: - return True - return False - - -class TcOpAllowlist(metaclass=TcAllowlistMeta): - allowlist = ['aten::_convolution', 'aten::conv1d', 'aten::conv2d', 'aten::conv3d', 'aten::conv_tbc', - 'aten::conv_transpose1d', 'aten::conv_transpose2d', 'aten::conv_transpose3d', - 'aten::convolution', 'aten::cudnn_convolution', 'aten::cudnn_convolution_transpose', - 'aten::prelu', 'aten::addmm', 'aten::addmv', 'aten::addr', - 'aten::matmul', 'aten::mm', 'aten::mv', - 'aten::linear', 'aten::addbmm', 'aten::baddbmm', 'aten::bmm', - 'aten::chain_matmul', 'aten::linalg_multi_dot', - 'aten::_thnn_fused_lstm_cell', 'aten::_thnn_fused_gru_cell', 'aten::lstm_cell', - 'aten::gru_cell', 'aten::rnn_tanh_cell', 'aten::rnn_relu_cell', - # The backward ops are got by running above ops' backward - # and recording whether it launched kernels. - 'CudnnConvolutionBackward', 'BmmBackward0', - 'aten::cudnn_convolution_transpose_backward', 'CudnnConvolutionTransposeBackward', - 'MmBackward', 'aten::cudnn_convolution_backward_weight', 'aten::addmm_', - 'AddmvBackward', 'MvBackward', - 'aten::cudnn_convolution_transpose_backward_weight', - 'aten::cudnn_convolution_transpose_backward_input', - 'AddmmBackward', 'aten::cudnn_convolution_backward_input', - 'AddbmmBackward', 'aten::cudnn_convolution_backward'] - - @classmethod - def __contains__(cls, item): - # If operator name equals to any one in allowlist, then it is tensor core eligible. - return item in cls.allowlist diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_cores_parser.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_cores_parser.py deleted file mode 100644 index e2372d9adc05e96f274300e4d91a23551ed555ec..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/tensor_cores_parser.py +++ /dev/null @@ -1,77 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# ------------------------------------------------------------------------- -from typing import Dict, Iterable, List - -from .. import consts -from .node import OperatorNode - - -class TensorCoresParser: - def __init__(self, tc_ratio: List[float], tc_eligible_ops_kernel_ratio: float): - # For calculating Tensor Cores time ratio per GPU. - self.tc_ratio = tc_ratio - self.tc_eligible_ops_kernel_ratio = tc_eligible_ops_kernel_ratio - - @classmethod - def parse_events(cls, tid2tree: Dict[str, OperatorNode], ops: Iterable[OperatorNode], gpu_ids: Iterable[int]): - tc_ratio = cls._calculate_tc_ratio(ops, gpu_ids) - tc_eligible_ops_kernel_ratio = cls._get_tc_eligible_ops_kernel_ratio(tid2tree, ops) - return cls(tc_ratio, tc_eligible_ops_kernel_ratio) - - @staticmethod - def _calculate_tc_ratio(ops: Iterable[OperatorNode], gpu_ids: Iterable[int]): - tc_ratio: List[float] = [None] * consts.MAX_GPU_PER_NODE - tc_time = [0] * consts.MAX_GPU_PER_NODE - total_time = [0] * consts.MAX_GPU_PER_NODE - has_kernel = False - for op in ops: - for rt in op.runtimes: - # 'CallTreeRoot' & 'dummy' kernels are launched out of profiler step, so don't count them. - if not (op.name == 'CallTreeRoot' and rt.name == 'dummy'): - for k in rt.get_kernels(): - has_kernel = True - dur = k.end_time - k.start_time - is_tc_used = k.tc_used - if is_tc_used: - tc_time[k.device_id] += dur - total_time[k.device_id] += dur - if has_kernel: # If no kernel, then keep all self.tc_ratio as None. - for gpu_id in gpu_ids: - if total_time[gpu_id] > 0: - tc_ratio[gpu_id] = tc_time[gpu_id] / total_time[gpu_id] - else: - tc_ratio[gpu_id] = 0.0 - return tc_ratio - - @staticmethod - def _get_bottom_tc_eligible_operators(op_tree_node: OperatorNode): - ops: List[OperatorNode] = [] - for child in op_tree_node.children: - child_ops = TensorCoresParser._get_bottom_tc_eligible_operators(child) - ops.extend(child_ops) - # TC-eligible ops which have children TC-eligible ops will not be regarded as 'bottom'. - if op_tree_node.tc_eligible and len(ops) == 0: - ops.append(op_tree_node) - return ops - - @staticmethod - def _get_tc_eligible_ops_kernel_ratio(tid2tree: Dict[int, OperatorNode], ops: Iterable[OperatorNode]): - def sum_self_kernel_time(ops: Iterable[OperatorNode]): - sum_time = 0 - for op in ops: - for rt in op.runtimes: - # 'CallTreeRoot' & 'dummy' kernels are launched out of profiler step, so don't count them. - if not (op.name == 'CallTreeRoot' and rt.name == 'dummy'): - for k in rt.get_kernels(): - sum_time += k.end_time - k.start_time - return sum_time - - ops_bottom_tc_eligible = [] - for root in tid2tree.values(): - ops_bottom_tc_eligible.extend(TensorCoresParser._get_bottom_tc_eligible_operators(root)) - ops_bottom_tc_eligible_kernel_sum = sum_self_kernel_time(ops_bottom_tc_eligible) - ops_kernel_sum = sum_self_kernel_time(ops) - tc_eligible_ops_kernel_ratio = ops_bottom_tc_eligible_kernel_sum / ops_kernel_sum \ - if ops_kernel_sum > 0 else 0.0 - return tc_eligible_ops_kernel_ratio diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py deleted file mode 100644 index ea09f79666bd184956469f48fc7922854394940d..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/profiler/trace.py +++ /dev/null @@ -1,241 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -------------------------------------------------------------------------- -__all__ = ['EventTypes', 'create_event'] - -from enum import IntEnum -from typing import Dict, Optional - -from .. import utils - -logger = utils.get_logger() - -NcclOpNameSet = ['nccl:broadcast', 'nccl:reduce', 'nccl:all_reduce', 'nccl:all_gather', 'nccl:reduce_scatter'] -GlooOpNameSet = ['gloo:broadcast', 'gloo:reduce', 'gloo:all_reduce', 'gloo:all_gather', 'gloo:reduce_scatter'] - - -class DeviceType(IntEnum): - CPU = 0 - CUDA = 1 - - -class EventTypes(object): - TRACE = 'Trace' - OPERATOR = 'Operator' - PROFILER_STEP = 'ProfilerStep' - RUNTIME = 'Runtime' - KERNEL = 'Kernel' - MEMCPY = 'Memcpy' - MEMSET = 'Memset' - PYTHON = 'Python' - MEMORY = 'Memory' - PYTHON_FUNCTION = 'python_function' - MODULE = 'Module' - PL_PROFILE = 'pl_profile' - PL_MODULE = 'pl_module' - USER_ANNOTATION = 'user_annotation' - - -EventTypeMap = { - 'trace': EventTypes.TRACE, - 'cpu_op': EventTypes.OPERATOR, - 'operator': EventTypes.OPERATOR, - 'runtime': EventTypes.RUNTIME, - 'cuda_runtime': EventTypes.RUNTIME, - 'kernel': EventTypes.KERNEL, - 'memcpy': EventTypes.MEMCPY, - 'gpu_memcpy': EventTypes.MEMCPY, - 'memset': EventTypes.MEMSET, - 'gpu_memset': EventTypes.MEMSET, - 'python': EventTypes.PYTHON, - 'memory': EventTypes.MEMORY, - 'python_function': EventTypes.PYTHON_FUNCTION, - 'user_annotation': EventTypes.USER_ANNOTATION, - 'gpu_user_annotation': EventTypes.USER_ANNOTATION -} - - -class BaseEvent(object): - def __init__(self, event_type, data): - self.type: str = event_type - self.name: str = data.get('name') - self.ts: int = data.get('ts') - self.pid: int = data.get('pid') - self.tid: int = data.get('tid') - self.args: Dict = data.get('args', {}) - - -class DurationEvent(BaseEvent): - def __init__(self, event_type, data): - super().__init__(event_type, data) - self.category: str = data.get('cat', '') - self.duration: int = data.get('dur') - - extern_id: Optional[int] = self.args.get('external id') - if extern_id is None: - extern_id = self.args.get('External id') - self.external_id = extern_id - self.correlation_id: Optional[int] = self.args.get('correlation') - - -class KernelEvent(DurationEvent): - def __init__(self, event_type, data): - super().__init__(event_type, data) - self.occupancy = self.args.get('est. achieved occupancy %') - self.blocks_per_sm = self.args.get('blocks per SM') - self.grid = self.args.get('grid') - self.block = self.args.get('block') - self.regs_per_thread = self.args.get('registers per thread') - self.shared_memory = self.args.get('shared memory') - self.device_id = self.args.get('device') - - -class OperatorEvent(DurationEvent): - def __init__(self, event_type, data): - super().__init__(event_type, data) - self.callstack = self.args.get('Call stack') - self.input_type = self.args.get('Input type') - - shape = self.args.get('Input Dims') - if shape is None: - # Setting shape to '[]' other None is to align with autograd result - shape = self.args.get('Input dims', []) - self.input_shape = shape - - -class ProfilerStepEvent(OperatorEvent): - def __init__(self, data): - super().__init__(EventTypes.PROFILER_STEP, data) - # torch.profiler.profile.step will invoke record_function with name like 'ProfilerStep#5' - self.step: int = int(self.name.split('#')[1]) - - -class MemoryEvent(BaseEvent): - def __init__(self, event_type, data): - super().__init__(event_type, data) - self.scope: str = data.get('s', '') - self.device_id: int = self.args.get('Device Id') - dtype = self.args.get('Device Type') - if dtype is not None: - try: - dtype = DeviceType(dtype) - except ValueError: - dtype = None - - self.device_type: DeviceType = dtype - - @property - def addr(self): - return self.args.get('Addr') - - @property - def bytes(self): - return self.args.get('Bytes', 0) - - @property - def total_allocated(self): - return self.args.get('Total Allocated', float('nan')) - - @property - def total_reserved(self): - return self.args.get('Total Reserved', float('nan')) - - -class PythonFunctionEvent(DurationEvent): - def __init__(self, event_type, data): - super().__init__(event_type, data) - self.python_id: int = self.args.get('Python id') - self.python_parent_id: int = self.args.get('Python parent id') - - -class ModuleEvent(PythonFunctionEvent): - def __init__(self, data): - super().__init__(EventTypes.MODULE, data) - self.module_id: int = self.args.get('Python module id') - - -class PLProfileEvent(DurationEvent): - def __init__(self, data): - super().__init__(EventTypes.PL_PROFILE, data) - self.name = self.name.replace('[pl][profile]', '') - - -class PLModuleEvent(DurationEvent): - def __init__(self, data): - super().__init__(EventTypes.PL_MODULE, data) - self.module_id = 0 # just to be compatible with ModuleEvent processing - self.name = self.name.replace('[pl][module]', '') - self.module_type = self.name[:self.name.find(': ')] - self.name = self.name[self.name.find(': ') + 2:] - - -def create_event(event, is_pytorch_lightning) -> Optional[BaseEvent]: - try: - event_type = event.get('ph') - if event_type == 'X': - return create_trace_event(event, is_pytorch_lightning) - elif event_type == 'i' and event.get('name') == '[memory]': - return MemoryEvent(EventTypes.MEMORY, event) - else: - return None - except Exception as ex: - logger.warning('Failed to parse profile event. Exception=%s. Event=%s', ex, event, exc_info=True) - raise - - -def create_trace_event(event, is_pytorch_lightning) -> Optional[BaseEvent]: - category = event.get('cat') - event_type = EventTypeMap.get(category.lower()) if category else None - if event_type == EventTypes.USER_ANNOTATION: - name = event.get('name') - if name and name.startswith('ProfilerStep#'): - return ProfilerStepEvent(event) - if name in GlooOpNameSet or name in NcclOpNameSet: - return OperatorEvent(event_type, event) - elif event_type == EventTypes.OPERATOR: - name = event.get('name') - if name and name.startswith('ProfilerStep#'): - return ProfilerStepEvent(event) - if is_pytorch_lightning: - if name and name.startswith('[pl][profile]'): - return PLProfileEvent(event) - elif name and name.startswith('[pl][module]'): - return PLModuleEvent(event) - return OperatorEvent(event_type, event) - elif event_type == EventTypes.PYTHON: - return OperatorEvent(event_type, event) - elif event_type == EventTypes.KERNEL: - return KernelEvent(event_type, event) - elif event_type == EventTypes.PYTHON_FUNCTION: - if is_pytorch_lightning: - return None - args = event.get('args') - if args and args.get('Python module id') is not None: - return ModuleEvent(event) - else: - return PythonFunctionEvent(event_type, event) - elif event_type is not None: - return DurationEvent(event_type, event) - return None - - -def create_association_events(events) -> Dict[int, int]: - forward_map = {} - backward_map = {} - - result = {} - for e in events: - ph = e.get('ph') - e_id = e['id'] - ts = e['ts'] - if ph == 's': - forward_map[e_id] = ts - elif ph == 'f': - backward_map[e_id] = ts - - for e_id, ts in forward_map.items(): - backward_ts = backward_map.get(e_id) - if backward_ts is not None: - result[ts] = backward_ts - - return result diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py deleted file mode 100644 index 9e30f225244280df7acfd7d2deb95a40208cfa54..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/run.py +++ /dev/null @@ -1,525 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -from collections import defaultdict -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union - -from . import consts, utils -from .profiler.diffrun import compare_op_tree, diff_summary -from .profiler.memory_parser import MemoryMetrics, MemoryRecord, MemorySnapshot -from .profiler.module_op import Stats -from .profiler.node import OperatorNode -from .utils import Canonicalizer, DisplayRounder - -logger = utils.get_logger() - - -class Run(object): - """ A profiler run. For visualization purpose only. - May contain profiling results from multiple workers. E.g. distributed scenario. - """ - - def __init__(self, name, run_dir, device_target='GPU'): - self.name = name - self.run_dir = run_dir - self.profiles: Dict[Tuple[str, str], RunProfile] = {} - self.span_view = {} - self.device_target = device_target - - @property - def workers(self): - # get full worker list and remove the duplicated - worker_list, _ = zip(*self.profiles.keys()) - worker_list = sorted(list(dict.fromkeys(worker_list))) - return worker_list - - @property - def views(self) -> List[consts.View]: - view_set = set() - for profile in self.profiles.values(): - view_set.update(profile.views) - return sorted(list(view_set), key=lambda x: x.id) - - def get_workers(self, view): - worker_set = set() - temp_span_view = {} - for profile in self.profiles.values(): - for v in profile.views: - if v.display_name == view: - worker_set.add(profile.worker) - if not temp_span_view.get(profile.worker): - temp_span_view[profile.worker] = [str(profile.span) if profile.span is not None else 'default'] - else: - temp_span_view[profile.worker].append( - str(profile.span) if profile.span is not None else 'default') - break - self.span_view = temp_span_view - return sorted(list(worker_set)) - - def get_spans(self, worker=None): - if worker is not None: - if self.span_view.get(worker) is None: - return None - spans = self.span_view.get(worker, []) - else: - spans = [s for _, s in self.profiles.keys()] - - spans = list(set(spans)) - if len(spans) == 1 and spans[0] is None: - return None - else: - return sorted(spans) - - def add_profile(self, profile: Union['DistributedRunProfile', 'RunProfile']): - span = profile.span - if span is None: - span = 'default' - else: - span = str(span) - self.profiles[(profile.worker, span)] = profile - - def get_profile(self, worker, span) -> Union['DistributedRunProfile', 'RunProfile']: - if worker is None: - raise ValueError('the worker parameter is mandatory') - - if len(self.profiles) == 0: - return None - - return self.profiles.get((worker, span), None) - - def get_profiles(self, *, worker=None, span=None) \ - -> Optional[Union[List['RunProfile'], List['DistributedRunProfile']]]: - # Note: we could not use if span to check it is None or not - # since the span 0 will be skipped at this case. - if worker is not None and span is not None: - return self.profiles.get((worker, span), None) - elif worker is not None: - return [p for (w, s), p in self.profiles.items() if worker == w] - elif span is not None: - return [p for (w, s), p in self.profiles.items() if span == s] - else: - return self.profiles.values() - - -class RunProfile(object): - """ Cooked profiling result for a worker. For visualization purpose only. - """ - - def __init__(self, worker, span): - self.worker = worker - self.span = span - self.views: List[consts.View] = [] - self.is_pytorch_lightning = False - self.has_runtime = False - self.has_kernel = False - self.has_communication = False - self.has_memcpy_or_memset = False - self.profiler_start_ts = float('inf') - self.overview = None - self.operation_pie_by_name = None - self.operation_table_by_name = None - self.operation_stack_by_name: Dict = None - self.operation_pie_by_name_input = None - self.operation_table_by_name_input = None - self.operation_stack_by_name_input: Dict = None - self.kernel_op_table = None - self.kernel_pie = None - self.kernel_table = None - self.tc_pie = None - self.trace_file_path: str = None - - self.gpu_metrics = None - - self.gpu_summary = None - self.gpu_tooltip = None - - # for memory stats and curve - self.memory_snapshot: Optional[MemorySnapshot] = None - self.tid2tree: Dict[int, OperatorNode] = None - self.pl_tid2tree: Dict[int, OperatorNode] = None - - self.module_stats: Optional[List(Stats)] = None - self.pl_module_stats: Optional[List(Stats)] = None - self.device_target = None - - self.memory_all_curve = None - self.memory_div_curve = None - self.memory_events = None - - self.step_to_overlap = None - self.step_to_wait = None - self.comm_op = None - - def append_gpu_metrics(self, raw_data: bytes): - counter_json_str = ', {}'.format(', '.join(self.gpu_metrics)) - counter_json_bytes = bytes(counter_json_str, 'utf-8') - - raw_data_without_tail = raw_data[: raw_data.rfind(b']')] - raw_data = b''.join([raw_data_without_tail, counter_json_bytes, b']}']) - - import gzip - raw_data = gzip.compress(raw_data, 1) - return raw_data - - @staticmethod - def _filtered_by_ts(events: Iterable[MemoryRecord], start_ts, end_ts): - """Returns time-ordered events of memory allocation and free""" - if start_ts is not None and end_ts is not None: - events = [e for e in events if start_ts <= e.ts and e.ts <= end_ts] - elif start_ts is not None: - events = [e for e in events if start_ts <= e.ts] - elif end_ts is not None: - events = [e for e in events if e.ts <= end_ts] - - return events - - def get_memory_stats(self, start_ts=None, end_ts=None, memory_metric='K'): - cano = Canonicalizer(memory_metric=memory_metric) - rounder = DisplayRounder(ndigits=3) - - stats = self.memory_snapshot.get_memory_statistics(self.tid2tree, start_ts=start_ts, end_ts=end_ts) - - result = { - 'metadata': { - 'title': 'Memory View', - 'default_device': 'CPU', - 'search': 'Operator Name', - 'sort': f'Self Size Increase ({cano.memory_metric})' - }, - 'columns': [ - {'name': 'Operator Name', 'type': 'string'}, - {'name': 'Calls', 'type': 'number', 'tooltip': '# of calls of the operator.'}, - {'name': f'Size Increase ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'The memory increase size include all children operators.'}, - {'name': f'Self Size Increase ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'The memory increase size associated with the operator itself.'}, - {'name': 'Allocation Count', 'type': 'number', - 'tooltip': 'The allocation count including all chidren operators.'}, - {'name': 'Self Allocation Count', 'type': 'number', - 'tooltip': 'The allocation count belonging to the operator itself.'}, - {'name': f'Allocation Size ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'The allocation size including all children operators.'}, - {'name': f'Self Allocation Size ({cano.memory_metric})', 'type': 'number', - 'tooltip': ('The allocation size belonging to the operator itself.\n' - 'It will sum up all allocation bytes without considering the memory free.')}, - ], - 'rows': {} - } - - for name in stats: - these_rows = [] - result['rows'][name] = these_rows - - memory = stats[name] - for op_name, stat in sorted(memory.items()): - these_rows.append([ - op_name, - stat[6], - rounder(cano.convert_memory(stat[MemoryMetrics.IncreaseSize])), - rounder(cano.convert_memory(stat[MemoryMetrics.SelfIncreaseSize])), - stat[MemoryMetrics.AllocationCount], - stat[MemoryMetrics.SelfAllocationCount], - rounder(cano.convert_memory(stat[MemoryMetrics.AllocationSize])), - rounder(cano.convert_memory(stat[MemoryMetrics.SelfAllocationSize])), - ]) - - for dev_name in sorted(stats.keys()): - if dev_name.startswith('GPU'): - result['metadata']['default_device'] = dev_name - break - - return result - - def get_memory_curve( - self, - time_metric: str = 'ms', - memory_metric: str = 'K', - patch_for_step_plot=True): - def get_curves_and_peaks(records: List[MemoryRecord], cano: Canonicalizer): - """Inputs: - records: Sorted list of MemoryRecord - - For example: - ```py - { - 'CPU': [# Timestamp, Total Allocated, Total Reserved, Device Total Memory, operator - [1, 4, 4, 1000000, 'aten::add'], - [2, 16, 16, 1000000, "aten::empty], - [4, 4, 16, 1000000, '...'], - ], - 'GPU0': ... - } - ```""" - curves = defaultdict(list) - peaks = defaultdict(float) - for r in records: - if r.addr is None: - continue - dev = r.device_name - ts = r.ts - ta = r.total_allocated - tr = r.total_reserved - - if ta != ta or tr != tr: # isnan - continue - - curves[dev].append([ - round(cano.convert_time(ts - self.profiler_start_ts), 3), - round(cano.convert_memory(ta), 3), - round(cano.convert_memory(tr), 3), - ]) - peaks[dev] = max(peaks[dev], ta) - - for dev in curves: - if len(curves[dev]) == 0: - del curves[dev] - del peaks[dev] - - return curves, peaks - - # NOTE: this should have been occured in frontend - def patch_curves_for_step_plot(curves: Dict[str, List]): - # For example, if a curve is [(0, 0), (1, 1), (2,2)], the line plot - # is a stright line. Interpolating it as [(0, 0), (1, 0), (1, 1), - # (2,1) (2,2)], then the line plot will work as step plot. - new_curves = defaultdict(list) - for dev, curve in curves.items(): - new_curve = [] - for i, p in enumerate(curve): - if i != 0: - new_curve.append(p[:1] + new_curve[-1][1:]) - new_curve.append(p) - new_curves[dev] = new_curve - return new_curves - - cano = Canonicalizer(time_metric, memory_metric) - - curves, peaks = get_curves_and_peaks(self.memory_snapshot.memory_records, cano) - if patch_for_step_plot: - curves = patch_curves_for_step_plot(curves) - peaks_formatted = {} - totals = {} - for dev, value in peaks.items(): - peaks_formatted[dev] = 'Peak Memory Usage: {:.1f}{}'.format(cano.convert_memory(value), cano.memory_metric) - if dev != 'CPU': - try: - totals[dev] = cano.convert_memory(self.gpu_infos[int(dev[3:])]['Memory Raw']) - except BaseException: - pass - - devices: List[str] = sorted(list(curves.keys())) - default_device = 'CPU' - for dev in devices: - if dev.startswith('GPU'): - default_device = dev - break - - return { - 'metadata': { - 'default_device': default_device, - 'devices': devices, - 'peaks': peaks_formatted, - 'totals': totals, - 'first_ts': self.profiler_start_ts, - 'time_metric': cano.time_metric, - 'memory_metric': cano.memory_metric, - 'time_factor': cano.time_factor, - 'memory_factor': cano.memory_factor, - }, - 'columns': [ - {'name': f'Time ({cano.time_metric})', 'type': 'number', 'tooltip': 'Time since profiler starts.'}, - {'name': f'Allocated ({cano.memory_metric})', 'type': 'number', 'tooltip': 'Total memory in use.'}, - {'name': f'Reserved ({cano.memory_metric})', 'type': 'number', - 'tooltip': 'Total reserved memory by allocator, both used and unused.'}, - ], - 'rows': curves, - } - - def get_memory_events( - self, - start_ts=None, - end_ts=None, - time_metric: str = 'ms', - memory_metric: str = 'K'): - def get_op_name_or_ctx(record: MemoryRecord): - name = record.op_name_or_unknown - if name.startswith('aten::empty') and record.parent_op_name: - # aten::empty can be treated as the 'malloc' in pytorch - name = f'{record.parent_op_name} ({name})' - return name - - cano = Canonicalizer(time_metric=time_metric, memory_metric=memory_metric) - rounder = DisplayRounder(ndigits=3) - - profiler_start_ts = self.profiler_start_ts - memory_records = RunProfile._filtered_by_ts(self.memory_snapshot.memory_records, start_ts, end_ts) - - events = defaultdict(list) - alloc = {} # allocation events may or may not have paired free event - free = {} # free events that does not have paired alloc event - prev_ts = float('-inf') # ensure ordered memory records is ordered - for i, r in enumerate(memory_records): - if r.addr is None: - # profile json data prior to pytorch 1.10 do not have addr - # we should ignore them - continue - if prev_ts > r.ts: - logger.error(f'Invalid value, prev_ts {prev_ts} is greater than end_ts {r.ts}') - return {} - prev_ts = r.ts - addr = r.addr - size = r.bytes - if r.is_allocation: - # to be matched with a release event - alloc[addr] = i - else: - if addr in alloc: - alloc_r = memory_records[alloc[addr]] - alloc_ts = alloc_r.ts - free_ts = r.ts - events[alloc_r.device_name].append([ - get_op_name_or_ctx(alloc_r), - rounder(cano.convert_memory(-size)), - rounder(cano.convert_time(alloc_ts - profiler_start_ts)), - rounder(cano.convert_time(free_ts - profiler_start_ts)), - rounder(cano.convert_time(free_ts - alloc_ts)), - ]) - del alloc[addr] - else: - if addr in free: - logger.warning(f'Address {addr} is freed multiple times') - free[addr] = i - - for i in alloc.values(): - r = memory_records[i] - events[r.device_name].append([ - get_op_name_or_ctx(r), - rounder(cano.convert_memory(r.bytes)), - rounder(cano.convert_time(r.ts - profiler_start_ts)), - None, - None, - ]) - - for i in free.values(): - r = memory_records[i] - events[r.device_name].append([ - get_op_name_or_ctx(r), - rounder(cano.convert_memory(-r.bytes)), - None, - rounder(cano.convert_time(r.ts - profiler_start_ts)), - None, - ]) - - default_device = 'CPU' - for dev_name in sorted(events.keys()): - if dev_name.startswith('GPU'): - default_device = dev_name - break - - return { - 'metadata': { - 'title': 'Memory Events', - 'default_device': default_device, - }, - 'columns': [ - {'name': 'Operator', 'type': 'string', 'tooltip': ''}, - {'name': f'Size ({cano.memory_metric})', 'type': 'number', 'tooltip': ''}, - {'name': f'Allocation Time ({cano.time_metric})', 'type': 'number', 'tooltip': ''}, - {'name': f'Release Time ({cano.time_metric})', 'type': 'number', 'tooltip': ''}, - {'name': f'Duration ({cano.time_metric})', 'type': 'number', 'tooltip': ''}, - ], - 'rows': events, # in the form of { 'CPU': [...], 'GPU0': [...], ... } - } - - def get_module_view(self): - if self.is_pytorch_lightning and self.pl_module_stats: - module_stats = self.pl_module_stats - elif self.module_stats: - module_stats = self.module_stats - else: - return None - - result = { - 'columns': [ - {'name': 'Module Name', 'type': 'string', 'key': 'name'}, - {'name': 'Occurences', 'type': 'number', 'key': 'occurences'}, - {'name': 'Operators', 'type': 'number', 'key': 'operators'}, - {'name': 'Host Total Time', 'type': 'number', 'key': 'host_duration'}, - {'name': 'Host Self Time', 'type': 'number', 'key': 'self_host_duration'}, - {'name': 'Device Total Time', 'type': 'number', 'key': 'device_duration'}, - {'name': 'Device Self Time', 'type': 'number', 'key': 'self_device_duration'} - ], - 'data': [] - } - - def process_modules_stats(parent: List[Any], modules_stats: List[Stats]): - for stats in modules_stats: - d = stats._asdict() - d['children'] = [] - parent.append(d) - process_modules_stats(d['children'], stats.children) - - process_modules_stats(result['data'], module_stats) - return result - - def get_operator_tree(self): - if self.is_pytorch_lightning: - root = next(iter(self.pl_tid2tree.values())) - else: - root = next(iter(self.tid2tree.values())) - - result = [] - - def traverse_node(parent: List, node: OperatorNode): - d = { - 'name': node.name, - 'start_time': node.start_time, - 'end_time': node.end_time, - 'type': node.type, - 'tid': node.tid, - 'children': [] - } - parent.append(d) - for child in node.children: - traverse_node(d['children'], child) - - traverse_node(result, root) - return result[0] - - def compare_run(self, exp: 'RunProfile'): - base_root = next(iter(self.tid2tree.values())) - exp_root = next(iter(exp.tid2tree.values())) - diff_root = compare_op_tree(base_root, exp_root) - diff_stats = diff_summary(diff_root) - return diff_stats - - -class DistributedRunProfile(object): - """ Profiling all workers in a view. - """ - - def __init__(self, span: str): - self.worker = 'All' - self.span = span - self.views = [] - self.gpu_info = None - self.steps_to_overlap = None - self.steps_to_wait = None - self.comm_ops = None diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.html b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.html deleted file mode 100644 index 66f6aca5f320396a436b341c587b14be6ef2612c..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.html +++ /dev/null @@ -1,2 +0,0 @@ -
    \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.js b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.js deleted file mode 100644 index ddb69f41d2f149a7847fd267e4cdb8a576334cc4..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/index.js +++ /dev/null @@ -1,3 +0,0 @@ -export async function render() { - document.location.href = 'index.html'; -} diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_embedding.html b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_embedding.html deleted file mode 100644 index 462d2c395f81d932fbf0196ccc53f4b0ece6e93a..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_embedding.html +++ /dev/null @@ -1,103 +0,0 @@ - - - - - - - - - - - - - - - - diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_script.js b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_script.js deleted file mode 100644 index 039aef359e030dbace4161538dd1191cc6fa1ab9..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_script.js +++ /dev/null @@ -1,27 +0,0 @@ -/** - * @license - * Copyright (c) 2014 The Polymer Project Authors. All rights reserved. - * This code may only be used under the BSD style license found at http://polymer.github.io/LICENSE.txt - * The complete set of authors may be found at http://polymer.github.io/AUTHORS.txt - * The complete set of contributors may be found at http://polymer.github.io/CONTRIBUTORS.txt - * Code distributed by Google as part of the polymer project is also - * subject to an additional IP rights grant found at http://polymer.github.io/PATENTS.txt - */ -// @version 0.7.24 -!function () { window.WebComponents = window.WebComponents || { flags: {} }; var e = "webcomponents.js", t = document.querySelector('script[src*="' + e + '"]'), n = {}; if (!n.noOpts) { if (location.search.slice(1).split("&").forEach(function (e) { var t, r = e.split("="); r[0] && (t = r[0].match(/wc-(.+)/)) && (n[t[1]] = r[1] || !0) }), t) for (var r, o = 0; r = t.attributes[o]; o++)"src" !== r.name && (n[r.name] = r.value || !0); if (n.log && n.log.split) { var i = n.log.split(","); n.log = {}, i.forEach(function (e) { n.log[e] = !0 }) } else n.log = {} } n.shadow = n.shadow || n.shadowdom || n.polyfill, "native" === n.shadow ? n.shadow = !1 : n.shadow = n.shadow || !HTMLElement.prototype.createShadowRoot, n.register && (window.CustomElements = window.CustomElements || { flags: {} }, window.CustomElements.flags.register = n.register), WebComponents.flags = n }(), WebComponents.flags.shadow && ("undefined" == typeof WeakMap && !function () { var e = Object.defineProperty, t = Date.now() % 1e9, n = function () { this.name = "__st" + (1e9 * Math.random() >>> 0) + (t++ + "__") }; n.prototype = { set: function (t, n) { var r = t[this.name]; return r && r[0] === t ? r[1] = n : e(t, this.name, { value: [t, n], writable: !0 }), this }, get: function (e) { var t; return (t = e[this.name]) && t[0] === e ? t[1] : void 0 }, "delete": function (e) { var t = e[this.name]; return !(!t || t[0] !== e) && (t[0] = t[1] = void 0, !0) }, has: function (e) { var t = e[this.name]; return !!t && t[0] === e } }, window.WeakMap = n }(), window.ShadowDOMPolyfill = {}, function (e) { "use strict"; function t() { if ("undefined" != typeof chrome && chrome.app && chrome.app.runtime) return !1; if (navigator.getDeviceStorage) return !1; try { var e = new Function("return true;"); return e() } catch (t) { return !1 } } function n(e) { if (!e) throw new Error("Assertion failed") } function r(e, t) { for (var n = W(t), r = 0; r < n.length; r++) { var o = n[r]; A(e, o, F(t, o)) } return e } function o(e, t) { for (var n = W(t), r = 0; r < n.length; r++) { var o = n[r]; switch (o) { case "arguments": case "caller": case "length": case "name": case "prototype": case "toString": continue }A(e, o, F(t, o)) } return e } function i(e, t) { for (var n = 0; n < t.length; n++)if (t[n] in e) return t[n] } function a(e, t, n) { U.value = n, A(e, t, U) } function s(e, t) { var n = e.__proto__ || Object.getPrototypeOf(e); if (q) try { W(n) } catch (r) { n = n.__proto__ } var o = R.get(n); if (o) return o; var i = s(n), a = E(i); return g(n, a, t), a } function c(e, t) { w(e, t, !0) } function l(e, t) { w(t, e, !1) } function u(e) { return /^on[a-z]+$/.test(e) } function d(e) { return /^[a-zA-Z_$][a-zA-Z_$0-9]*$/.test(e) } function p(e) { return k && d(e) ? new Function("return this.__impl4cf1e782hg__." + e) : function () { return this.__impl4cf1e782hg__[e] } } function h(e) { return k && d(e) ? new Function("v", "this.__impl4cf1e782hg__." + e + " = v") : function (t) { this.__impl4cf1e782hg__[e] = t } } function f(e) { return k && d(e) ? new Function("return this.__impl4cf1e782hg__." + e + ".apply(this.__impl4cf1e782hg__, arguments)") : function () { return this.__impl4cf1e782hg__[e].apply(this.__impl4cf1e782hg__, arguments) } } function m(e, t) { try { return e === window && "showModalDialog" === t ? B : Object.getOwnPropertyDescriptor(e, t) } catch (n) { return B } } function w(t, n, r, o) { for (var i = W(t), a = 0; a < i.length; a++) { var s = i[a]; if ("polymerBlackList_" !== s && !(s in n || t.polymerBlackList_ && t.polymerBlackList_[s])) { q && t.__lookupGetter__(s); var c, l, d = m(t, s); if ("function" != typeof d.value) { var w = u(s); c = w ? e.getEventHandlerGetter(s) : p(s), (d.writable || d.set || V) && (l = w ? e.getEventHandlerSetter(s) : h(s)); var v = V || d.configurable; A(n, s, { get: c, set: l, configurable: v, enumerable: d.enumerable }) } else r && (n[s] = f(s)) } } } function v(e, t, n) { if (null != e) { var r = e.prototype; g(r, t, n), o(t, e) } } function g(e, t, r) { var o = t.prototype; n(void 0 === R.get(e)), R.set(e, t), I.set(o, e), c(e, o), r && l(o, r), a(o, "constructor", t), t.prototype = o } function b(e, t) { return R.get(t.prototype) === e } function y(e) { var t = Object.getPrototypeOf(e), n = s(t), r = E(n); return g(t, r, e), r } function E(e) { function t(t) { e.call(this, t) } var n = Object.create(e.prototype); return n.constructor = t, t.prototype = n, t } function _(e) { return e && e.__impl4cf1e782hg__ } function S(e) { return !_(e) } function T(e) { if (null === e) return null; n(S(e)); var t = e.__wrapper8e3dd93a60__; return null != t ? t : e.__wrapper8e3dd93a60__ = new (s(e, e))(e) } function M(e) { return null === e ? null : (n(_(e)), e.__impl4cf1e782hg__) } function O(e) { return e.__impl4cf1e782hg__ } function L(e, t) { t.__impl4cf1e782hg__ = e, e.__wrapper8e3dd93a60__ = t } function N(e) { return e && _(e) ? M(e) : e } function C(e) { return e && !_(e) ? T(e) : e } function j(e, t) { null !== t && (n(S(e)), n(void 0 === t || _(t)), e.__wrapper8e3dd93a60__ = t) } function D(e, t, n) { G.get = n, A(e.prototype, t, G) } function H(e, t) { D(e, t, function () { return T(this.__impl4cf1e782hg__[t]) }) } function x(e, t) { e.forEach(function (e) { t.forEach(function (t) { e.prototype[t] = function () { var e = C(this); return e[t].apply(e, arguments) } }) }) } var R = new WeakMap, I = new WeakMap, P = Object.create(null), k = t(), A = Object.defineProperty, W = Object.getOwnPropertyNames, F = Object.getOwnPropertyDescriptor, U = { value: void 0, configurable: !0, enumerable: !1, writable: !0 }; W(window); var q = /Firefox/.test(navigator.userAgent), B = { get: function () { }, set: function (e) { }, configurable: !0, enumerable: !0 }, V = function () { var e = Object.getOwnPropertyDescriptor(Node.prototype, "nodeType"); return e && !e.get && !e.set }(), G = { get: void 0, configurable: !0, enumerable: !0 }; e.addForwardingProperties = c, e.assert = n, e.constructorTable = R, e.defineGetter = D, e.defineWrapGetter = H, e.forwardMethodsToWrapper = x, e.isIdentifierName = d, e.isWrapper = _, e.isWrapperFor = b, e.mixin = r, e.nativePrototypeTable = I, e.oneOf = i, e.registerObject = y, e.registerWrapper = v, e.rewrap = j, e.setWrapper = L, e.unsafeUnwrap = O, e.unwrap = M, e.unwrapIfNeeded = N, e.wrap = T, e.wrapIfNeeded = C, e.wrappers = P }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e, t, n) { return { index: e, removed: t, addedCount: n } } function n() { } var r = 0, o = 1, i = 2, a = 3; n.prototype = { calcEditDistances: function (e, t, n, r, o, i) { for (var a = i - o + 1, s = n - t + 1, c = new Array(a), l = 0; l < a; l++)c[l] = new Array(s), c[l][0] = l; for (var u = 0; u < s; u++)c[0][u] = u; for (var l = 1; l < a; l++)for (var u = 1; u < s; u++)if (this.equals(e[t + u - 1], r[o + l - 1])) c[l][u] = c[l - 1][u - 1]; else { var d = c[l - 1][u] + 1, p = c[l][u - 1] + 1; c[l][u] = d < p ? d : p } return c }, spliceOperationsFromEditDistances: function (e) { for (var t = e.length - 1, n = e[0].length - 1, s = e[t][n], c = []; t > 0 || n > 0;)if (0 != t) if (0 != n) { var l, u = e[t - 1][n - 1], d = e[t - 1][n], p = e[t][n - 1]; l = d < p ? d < u ? d : u : p < u ? p : u, l == u ? (u == s ? c.push(r) : (c.push(o), s = u), t--, n--) : l == d ? (c.push(a), t--, s = d) : (c.push(i), n--, s = p) } else c.push(a), t--; else c.push(i), n--; return c.reverse(), c }, calcSplices: function (e, n, s, c, l, u) { var d = 0, p = 0, h = Math.min(s - n, u - l); if (0 == n && 0 == l && (d = this.sharedPrefix(e, c, h)), s == e.length && u == c.length && (p = this.sharedSuffix(e, c, h - d)), n += d, l += d, s -= p, u -= p, s - n == 0 && u - l == 0) return []; if (n == s) { for (var f = t(n, [], 0); l < u;)f.removed.push(c[l++]); return [f] } if (l == u) return [t(n, [], s - n)]; for (var m = this.spliceOperationsFromEditDistances(this.calcEditDistances(e, n, s, c, l, u)), f = void 0, w = [], v = n, g = l, b = 0; b < m.length; b++)switch (m[b]) { case r: f && (w.push(f), f = void 0), v++, g++; break; case o: f || (f = t(v, [], 0)), f.addedCount++, v++, f.removed.push(c[g]), g++; break; case i: f || (f = t(v, [], 0)), f.addedCount++, v++; break; case a: f || (f = t(v, [], 0)), f.removed.push(c[g]), g++ }return f && w.push(f), w }, sharedPrefix: function (e, t, n) { for (var r = 0; r < n; r++)if (!this.equals(e[r], t[r])) return r; return n }, sharedSuffix: function (e, t, n) { for (var r = e.length, o = t.length, i = 0; i < n && this.equals(e[--r], t[--o]);)i++; return i }, calculateSplices: function (e, t) { return this.calcSplices(e, 0, e.length, t, 0, t.length) }, equals: function (e, t) { return e === t } }, e.ArraySplice = n }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t() { a = !1; var e = i.slice(0); i = []; for (var t = 0; t < e.length; t++)(0, e[t])() } function n(e) { i.push(e), a || (a = !0, r(t, 0)) } var r, o = window.MutationObserver, i = [], a = !1; if (o) { var s = 1, c = new o(t), l = document.createTextNode(s); c.observe(l, { characterData: !0 }), r = function () { s = (s + 1) % 2, l.data = s } } else r = window.setTimeout; e.setEndOfMicrotask = n }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { e.scheduled_ || (e.scheduled_ = !0, f.push(e), m || (u(n), m = !0)) } function n() { for (m = !1; f.length;) { var e = f; f = [], e.sort(function (e, t) { return e.uid_ - t.uid_ }); for (var t = 0; t < e.length; t++) { var n = e[t]; n.scheduled_ = !1; var r = n.takeRecords(); i(n), r.length && n.callback_(r, n) } } } function r(e, t) { this.type = e, this.target = t, this.addedNodes = new p.NodeList, this.removedNodes = new p.NodeList, this.previousSibling = null, this.nextSibling = null, this.attributeName = null, this.attributeNamespace = null, this.oldValue = null } function o(e, t) { for (; e; e = e.parentNode) { var n = h.get(e); if (n) for (var r = 0; r < n.length; r++) { var o = n[r]; o.options.subtree && o.addTransientObserver(t) } } } function i(e) { for (var t = 0; t < e.nodes_.length; t++) { var n = e.nodes_[t], r = h.get(n); if (!r) return; for (var o = 0; o < r.length; o++) { var i = r[o]; i.observer === e && i.removeTransientObservers() } } } function a(e, n, o) { for (var i = Object.create(null), a = Object.create(null), s = e; s; s = s.parentNode) { var c = h.get(s); if (c) for (var l = 0; l < c.length; l++) { var u = c[l], d = u.options; if ((s === e || d.subtree) && ("attributes" !== n || d.attributes) && ("attributes" !== n || !d.attributeFilter || null === o.namespace && d.attributeFilter.indexOf(o.name) !== -1) && ("characterData" !== n || d.characterData) && ("childList" !== n || d.childList)) { var p = u.observer; i[p.uid_] = p, ("attributes" === n && d.attributeOldValue || "characterData" === n && d.characterDataOldValue) && (a[p.uid_] = o.oldValue) } } } for (var f in i) { var p = i[f], m = new r(n, e); "name" in o && "namespace" in o && (m.attributeName = o.name, m.attributeNamespace = o.namespace), o.addedNodes && (m.addedNodes = o.addedNodes), o.removedNodes && (m.removedNodes = o.removedNodes), o.previousSibling && (m.previousSibling = o.previousSibling), o.nextSibling && (m.nextSibling = o.nextSibling), void 0 !== a[f] && (m.oldValue = a[f]), t(p), p.records_.push(m) } } function s(e) { if (this.childList = !!e.childList, this.subtree = !!e.subtree, "attributes" in e || !("attributeOldValue" in e || "attributeFilter" in e) ? this.attributes = !!e.attributes : this.attributes = !0, "characterDataOldValue" in e && !("characterData" in e) ? this.characterData = !0 : this.characterData = !!e.characterData, !this.attributes && (e.attributeOldValue || "attributeFilter" in e) || !this.characterData && e.characterDataOldValue) throw new TypeError; if (this.characterData = !!e.characterData, this.attributeOldValue = !!e.attributeOldValue, this.characterDataOldValue = !!e.characterDataOldValue, "attributeFilter" in e) { if (null == e.attributeFilter || "object" != typeof e.attributeFilter) throw new TypeError; this.attributeFilter = w.call(e.attributeFilter) } else this.attributeFilter = null } function c(e) { this.callback_ = e, this.nodes_ = [], this.records_ = [], this.uid_ = ++v, this.scheduled_ = !1 } function l(e, t, n) { this.observer = e, this.target = t, this.options = n, this.transientObservedNodes = [] } var u = e.setEndOfMicrotask, d = e.wrapIfNeeded, p = e.wrappers, h = new WeakMap, f = [], m = !1, w = Array.prototype.slice, v = 0; c.prototype = { constructor: c, observe: function (e, t) { e = d(e); var n, r = new s(t), o = h.get(e); o || h.set(e, o = []); for (var i = 0; i < o.length; i++)o[i].observer === this && (n = o[i], n.removeTransientObservers(), n.options = r); n || (n = new l(this, e, r), o.push(n), this.nodes_.push(e)) }, disconnect: function () { this.nodes_.forEach(function (e) { for (var t = h.get(e), n = 0; n < t.length; n++) { var r = t[n]; if (r.observer === this) { t.splice(n, 1); break } } }, this), this.records_ = [] }, takeRecords: function () { var e = this.records_; return this.records_ = [], e } }, l.prototype = { addTransientObserver: function (e) { if (e !== this.target) { t(this.observer), this.transientObservedNodes.push(e); var n = h.get(e); n || h.set(e, n = []), n.push(this) } }, removeTransientObservers: function () { var e = this.transientObservedNodes; this.transientObservedNodes = []; for (var t = 0; t < e.length; t++)for (var n = e[t], r = h.get(n), o = 0; o < r.length; o++)if (r[o] === this) { r.splice(o, 1); break } } }, e.enqueueMutation = a, e.registerTransientObservers = o, e.wrappers.MutationObserver = c, e.wrappers.MutationRecord = r }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e, t) { this.root = e, this.parent = t } function n(e, t) { if (e.treeScope_ !== t) { e.treeScope_ = t; for (var r = e.shadowRoot; r; r = r.olderShadowRoot)r.treeScope_.parent = t; for (var o = e.firstChild; o; o = o.nextSibling)n(o, t) } } function r(n) { if (n instanceof e.wrappers.Window, n.treeScope_) return n.treeScope_; var o, i = n.parentNode; return o = i ? r(i) : new t(n, null), n.treeScope_ = o } t.prototype = { get renderer() { return this.root instanceof e.wrappers.ShadowRoot ? e.getRendererForHost(this.root.host) : null }, contains: function (e) { for (; e; e = e.parent)if (e === this) return !0; return !1 } }, e.TreeScope = t, e.getTreeScope = r, e.setTreeScope = n }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { return e instanceof G.ShadowRoot } function n(e) { return A(e).root } function r(e, r) { var s = [], c = e; for (s.push(c); c;) { var l = a(c); if (l && l.length > 0) { for (var u = 0; u < l.length; u++) { var p = l[u]; if (i(p)) { var h = n(p), f = h.olderShadowRoot; f && s.push(f) } s.push(p) } c = l[l.length - 1] } else if (t(c)) { if (d(e, c) && o(r)) break; c = c.host, s.push(c) } else c = c.parentNode, c && s.push(c) } return s } function o(e) { if (!e) return !1; switch (e.type) { case "abort": case "error": case "select": case "change": case "load": case "reset": case "resize": case "scroll": case "selectstart": return !0 }return !1 } function i(e) { return e instanceof HTMLShadowElement } function a(t) { return e.getDestinationInsertionPoints(t) } function s(e, t) { if (0 === e.length) return t; t instanceof G.Window && (t = t.document); for (var n = A(t), r = e[0], o = A(r), i = l(n, o), a = 0; a < e.length; a++) { var s = e[a]; if (A(s) === i) return s } return e[e.length - 1] } function c(e) { for (var t = []; e; e = e.parent)t.push(e); return t } function l(e, t) { for (var n = c(e), r = c(t), o = null; n.length > 0 && r.length > 0;) { var i = n.pop(), a = r.pop(); if (i !== a) break; o = i } return o } function u(e, t, n) { t instanceof G.Window && (t = t.document); var o, i = A(t), a = A(n), s = r(n, e), o = l(i, a); o || (o = a.root); for (var c = o; c; c = c.parent)for (var u = 0; u < s.length; u++) { var d = s[u]; if (A(d) === c) return d } return null } function d(e, t) { return A(e) === A(t) } function p(e) { if (!K.get(e) && (K.set(e, !0), f(V(e), V(e.target)), P)) { var t = P; throw P = null, t } } function h(e) { switch (e.type) { case "load": case "beforeunload": case "unload": return !0 }return !1 } function f(t, n) { if ($.get(t)) throw new Error("InvalidStateError"); $.set(t, !0), e.renderAllPending(); var o, i, a; if (h(t) && !t.bubbles) { var s = n; s instanceof G.Document && (a = s.defaultView) && (i = s, o = []) } if (!o) if (n instanceof G.Window) a = n, o = []; else if (o = r(n, t), !h(t)) { var s = o[o.length - 1]; s instanceof G.Document && (a = s.defaultView) } return ne.set(t, o), m(t, o, a, i) && w(t, o, a, i) && v(t, o, a, i), J.set(t, re), Y["delete"](t, null), $["delete"](t), t.defaultPrevented } function m(e, t, n, r) { var o = oe; if (n && !g(n, e, o, t, r)) return !1; for (var i = t.length - 1; i > 0; i--)if (!g(t[i], e, o, t, r)) return !1; return !0 } function w(e, t, n, r) { var o = ie, i = t[0] || n; return g(i, e, o, t, r) } function v(e, t, n, r) { for (var o = ae, i = 1; i < t.length; i++)if (!g(t[i], e, o, t, r)) return; n && t.length > 0 && g(n, e, o, t, r) } function g(e, t, n, r, o) { var i = z.get(e); if (!i) return !0; var a = o || s(r, e); if (a === e) { if (n === oe) return !0; n === ae && (n = ie) } else if (n === ae && !t.bubbles) return !0; if ("relatedTarget" in t) { var c = B(t), l = c.relatedTarget; if (l) { if (l instanceof Object && l.addEventListener) { var d = V(l), p = u(t, e, d); if (p === a) return !0 } else p = null; Z.set(t, p) } } J.set(t, n); var h = t.type, f = !1; X.set(t, a), Y.set(t, e), i.depth++; for (var m = 0, w = i.length; m < w; m++) { var v = i[m]; if (v.removed) f = !0; else if (!(v.type !== h || !v.capture && n === oe || v.capture && n === ae)) try { if ("function" == typeof v.handler ? v.handler.call(e, t) : v.handler.handleEvent(t), ee.get(t)) return !1 } catch (g) { P || (P = g) } } if (i.depth--, f && 0 === i.depth) { var b = i.slice(); i.length = 0; for (var m = 0; m < b.length; m++)b[m].removed || i.push(b[m]) } return !Q.get(t) } function b(e, t, n) { this.type = e, this.handler = t, this.capture = Boolean(n) } function y(e, t) { if (!(e instanceof se)) return V(T(se, "Event", e, t)); var n = e; return be || "beforeunload" !== n.type || this instanceof M ? void U(n, this) : new M(n) } function E(e) { return e && e.relatedTarget ? Object.create(e, { relatedTarget: { value: B(e.relatedTarget) } }) : e } function _(e, t, n) { var r = window[e], o = function (t, n) { return t instanceof r ? void U(t, this) : V(T(r, e, t, n)) }; if (o.prototype = Object.create(t.prototype), n && W(o.prototype, n), r) try { F(r, o, new r("temp")) } catch (i) { F(r, o, document.createEvent(e)) } return o } function S(e, t) { return function () { arguments[t] = B(arguments[t]); var n = B(this); n[e].apply(n, arguments) } } function T(e, t, n, r) { if (ve) return new e(n, E(r)); var o = B(document.createEvent(t)), i = we[t], a = [n]; return Object.keys(i).forEach(function (e) { var t = null != r && e in r ? r[e] : i[e]; "relatedTarget" === e && (t = B(t)), a.push(t) }), o["init" + t].apply(o, a), o } function M(e) { y.call(this, e) } function O(e) { return "function" == typeof e || e && e.handleEvent } function L(e) { switch (e) { case "DOMAttrModified": case "DOMAttributeNameChanged": case "DOMCharacterDataModified": case "DOMElementNameChanged": case "DOMNodeInserted": case "DOMNodeInsertedIntoDocument": case "DOMNodeRemoved": case "DOMNodeRemovedFromDocument": case "DOMSubtreeModified": return !0 }return !1 } function N(e) { U(e, this) } function C(e) { return e instanceof G.ShadowRoot && (e = e.host), B(e) } function j(e, t) { var n = z.get(e); if (n) for (var r = 0; r < n.length; r++)if (!n[r].removed && n[r].type === t) return !0; return !1 } function D(e, t) { for (var n = B(e); n; n = n.parentNode)if (j(V(n), t)) return !0; return !1 } function H(e) { k(e, Ee) } function x(t, n, o, i) { e.renderAllPending(); var a = V(_e.call(q(n), o, i)); if (!a) return null; var c = r(a, null), l = c.lastIndexOf(t); return l == -1 ? null : (c = c.slice(0, l), s(c, t)) } function R(e) { return function () { var t = te.get(this); return t && t[e] && t[e].value || null } } function I(e) { var t = e.slice(2); return function (n) { var r = te.get(this); r || (r = Object.create(null), te.set(this, r)); var o = r[e]; if (o && this.removeEventListener(t, o.wrapped, !1), "function" == typeof n) { var i = function (t) { var r = n.call(this, t); r === !1 ? t.preventDefault() : "onbeforeunload" === e && "string" == typeof r && (t.returnValue = r) }; this.addEventListener(t, i, !1), r[e] = { value: n, wrapped: i } } } } var P, k = e.forwardMethodsToWrapper, A = e.getTreeScope, W = e.mixin, F = e.registerWrapper, U = e.setWrapper, q = e.unsafeUnwrap, B = e.unwrap, V = e.wrap, G = e.wrappers, z = (new WeakMap, new WeakMap), K = new WeakMap, $ = new WeakMap, X = new WeakMap, Y = new WeakMap, Z = new WeakMap, J = new WeakMap, Q = new WeakMap, ee = new WeakMap, te = new WeakMap, ne = new WeakMap, re = 0, oe = 1, ie = 2, ae = 3; b.prototype = { equals: function (e) { return this.handler === e.handler && this.type === e.type && this.capture === e.capture }, get removed() { return null === this.handler }, remove: function () { this.handler = null } }; var se = window.Event; se.prototype.polymerBlackList_ = { returnValue: !0, keyLocation: !0 }, y.prototype = { get target() { return X.get(this) }, get currentTarget() { return Y.get(this) }, get eventPhase() { return J.get(this) }, get path() { var e = ne.get(this); return e ? e.slice() : [] }, stopPropagation: function () { Q.set(this, !0) }, stopImmediatePropagation: function () { Q.set(this, !0), ee.set(this, !0) } }; var ce = function () { var e = document.createEvent("Event"); return e.initEvent("test", !0, !0), e.preventDefault(), e.defaultPrevented }(); ce || (y.prototype.preventDefault = function () { this.cancelable && (q(this).preventDefault(), Object.defineProperty(this, "defaultPrevented", { get: function () { return !0 }, configurable: !0 })) }), F(se, y, document.createEvent("Event")); var le = _("UIEvent", y), ue = _("CustomEvent", y), de = { get relatedTarget() { var e = Z.get(this); return void 0 !== e ? e : V(B(this).relatedTarget) } }, pe = W({ initMouseEvent: S("initMouseEvent", 14) }, de), he = W({ initFocusEvent: S("initFocusEvent", 5) }, de), fe = _("MouseEvent", le, pe), me = _("FocusEvent", le, he), we = Object.create(null), ve = function () { try { new window.FocusEvent("focus") } catch (e) { return !1 } return !0 }(); if (!ve) { var ge = function (e, t, n) { if (n) { var r = we[n]; t = W(W({}, r), t) } we[e] = t }; ge("Event", { bubbles: !1, cancelable: !1 }), ge("CustomEvent", { detail: null }, "Event"), ge("UIEvent", { view: null, detail: 0 }, "Event"), ge("MouseEvent", { screenX: 0, screenY: 0, clientX: 0, clientY: 0, ctrlKey: !1, altKey: !1, shiftKey: !1, metaKey: !1, button: 0, relatedTarget: null }, "UIEvent"), ge("FocusEvent", { relatedTarget: null }, "UIEvent") } var be = window.BeforeUnloadEvent; M.prototype = Object.create(y.prototype), W(M.prototype, { get returnValue() { return q(this).returnValue }, set returnValue(e) { q(this).returnValue = e } }), be && F(be, M); var ye = window.EventTarget, Ee = ["addEventListener", "removeEventListener", "dispatchEvent"];[Node, Window].forEach(function (e) { var t = e.prototype; Ee.forEach(function (e) { Object.defineProperty(t, e + "_", { value: t[e] }) }) }), N.prototype = { addEventListener: function (e, t, n) { if (O(t) && !L(e)) { var r = new b(e, t, n), o = z.get(this); if (o) { for (var i = 0; i < o.length; i++)if (r.equals(o[i])) return } else o = [], o.depth = 0, z.set(this, o); o.push(r); var a = C(this); a.addEventListener_(e, p, !0) } }, removeEventListener: function (e, t, n) { n = Boolean(n); var r = z.get(this); if (r) { for (var o = 0, i = !1, a = 0; a < r.length; a++)r[a].type === e && r[a].capture === n && (o++, r[a].handler === t && (i = !0, r[a].remove())); if (i && 1 === o) { var s = C(this); s.removeEventListener_(e, p, !0) } } }, dispatchEvent: function (t) { var n = B(t), r = n.type; K.set(n, !1), e.renderAllPending(); var o; D(this, r) || (o = function () { }, this.addEventListener(r, o, !0)); try { return B(this).dispatchEvent_(n) } finally { o && this.removeEventListener(r, o, !0) } } }, ye && F(ye, N); var _e = document.elementFromPoint; e.elementFromPoint = x, e.getEventHandlerGetter = R, e.getEventHandlerSetter = I, e.wrapEventTargetMethods = H, e.wrappers.BeforeUnloadEvent = M, e.wrappers.CustomEvent = ue, e.wrappers.Event = y, e.wrappers.EventTarget = N, e.wrappers.FocusEvent = me, e.wrappers.MouseEvent = fe, e.wrappers.UIEvent = le }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e, t) { Object.defineProperty(e, t, m) } function n(e) { l(e, this) } function r() { this.length = 0, t(this, "length") } function o(e) { for (var t = new r, o = 0; o < e.length; o++)t[o] = new n(e[o]); return t.length = o, t } function i(e) { a.call(this, e) } var a = e.wrappers.UIEvent, s = e.mixin, c = e.registerWrapper, l = e.setWrapper, u = e.unsafeUnwrap, d = e.wrap, p = window.TouchEvent; if (p) { var h; try { h = document.createEvent("TouchEvent") } catch (f) { return } var m = { enumerable: !1 }; n.prototype = { get target() { return d(u(this).target) } }; var w = { configurable: !0, enumerable: !0, get: null };["clientX", "clientY", "screenX", "screenY", "pageX", "pageY", "identifier", "webkitRadiusX", "webkitRadiusY", "webkitRotationAngle", "webkitForce"].forEach(function (e) { w.get = function () { return u(this)[e] }, Object.defineProperty(n.prototype, e, w) }), r.prototype = { item: function (e) { return this[e] } }, i.prototype = Object.create(a.prototype), s(i.prototype, { get touches() { return o(u(this).touches) }, get targetTouches() { return o(u(this).targetTouches) }, get changedTouches() { return o(u(this).changedTouches) }, initTouchEvent: function () { throw new Error("Not implemented") } }), c(p, i, h), e.wrappers.Touch = n, e.wrappers.TouchEvent = i, e.wrappers.TouchList = r } }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e, t) { Object.defineProperty(e, t, s) } function n() { this.length = 0, t(this, "length") } function r(e) { if (null == e) return e; for (var t = new n, r = 0, o = e.length; r < o; r++)t[r] = a(e[r]); return t.length = o, t } function o(e, t) { e.prototype[t] = function () { return r(i(this)[t].apply(i(this), arguments)) } } var i = e.unsafeUnwrap, a = e.wrap, s = { enumerable: !1 }; n.prototype = { item: function (e) { return this[e] } }, t(n.prototype, "item"), e.wrappers.NodeList = n, e.addWrapNodeListMethod = o, e.wrapNodeList = r }(window.ShadowDOMPolyfill), function (e) { "use strict"; e.wrapHTMLCollection = e.wrapNodeList, e.wrappers.HTMLCollection = e.wrappers.NodeList }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { O(e instanceof _) } function n(e) { var t = new T; return t[0] = e, t.length = 1, t } function r(e, t, n) { N(t, "childList", { removedNodes: n, previousSibling: e.previousSibling, nextSibling: e.nextSibling }) } function o(e, t) { N(e, "childList", { removedNodes: t }) } function i(e, t, r, o) { if (e instanceof DocumentFragment) { var i = s(e); U = !0; for (var a = i.length - 1; a >= 0; a--)e.removeChild(i[a]), i[a].parentNode_ = t; U = !1; for (var a = 0; a < i.length; a++)i[a].previousSibling_ = i[a - 1] || r, i[a].nextSibling_ = i[a + 1] || o; return r && (r.nextSibling_ = i[0]), o && (o.previousSibling_ = i[i.length - 1]), i } var i = n(e), c = e.parentNode; return c && c.removeChild(e), e.parentNode_ = t, e.previousSibling_ = r, e.nextSibling_ = o, r && (r.nextSibling_ = e), o && (o.previousSibling_ = e), i } function a(e) { if (e instanceof DocumentFragment) return s(e); var t = n(e), o = e.parentNode; return o && r(e, o, t), t } function s(e) { for (var t = new T, n = 0, r = e.firstChild; r; r = r.nextSibling)t[n++] = r; return t.length = n, o(e, t), t } function c(e) { return e } function l(e, t) { R(e, t), e.nodeIsInserted_() } function u(e, t) { for (var n = C(t), r = 0; r < e.length; r++)l(e[r], n) } function d(e) { R(e, new M(e, null)) } function p(e) { for (var t = 0; t < e.length; t++)d(e[t]) } function h(e, t) { var n = e.nodeType === _.DOCUMENT_NODE ? e : e.ownerDocument; n !== t.ownerDocument && n.adoptNode(t) } function f(t, n) { if (n.length) { var r = t.ownerDocument; if (r !== n[0].ownerDocument) for (var o = 0; o < n.length; o++)e.adoptNodeNoRemove(n[o], r) } } function m(e, t) { f(e, t); var n = t.length; if (1 === n) return P(t[0]); for (var r = P(e.ownerDocument.createDocumentFragment()), o = 0; o < n; o++)r.appendChild(P(t[o])); return r } function w(e) { if (void 0 !== e.firstChild_) for (var t = e.firstChild_; t;) { var n = t; t = t.nextSibling_, n.parentNode_ = n.previousSibling_ = n.nextSibling_ = void 0 } e.firstChild_ = e.lastChild_ = void 0 } function v(e) { if (e.invalidateShadowRenderer()) { for (var t = e.firstChild; t;) { O(t.parentNode === e); var n = t.nextSibling, r = P(t), o = r.parentNode; o && X.call(o, r), t.previousSibling_ = t.nextSibling_ = t.parentNode_ = null, t = n } e.firstChild_ = e.lastChild_ = null } else for (var n, i = P(e), a = i.firstChild; a;)n = a.nextSibling, X.call(i, a), a = n } function g(e) { var t = e.parentNode; return t && t.invalidateShadowRenderer() } function b(e) { for (var t, n = 0; n < e.length; n++)t = e[n], t.parentNode.removeChild(t) } function y(e, t, n) { var r; if (r = A(n ? q.call(n, I(e), !1) : B.call(I(e), !1)), t) { for (var o = e.firstChild; o; o = o.nextSibling)r.appendChild(y(o, !0, n)); if (e instanceof F.HTMLTemplateElement) for (var i = r.content, o = e.content.firstChild; o; o = o.nextSibling)i.appendChild(y(o, !0, n)) } return r } function E(e, t) { if (!t || C(e) !== C(t)) return !1; for (var n = t; n; n = n.parentNode)if (n === e) return !0; return !1 } function _(e) { O(e instanceof V), S.call(this, e), this.parentNode_ = void 0, this.firstChild_ = void 0, this.lastChild_ = void 0, this.nextSibling_ = void 0, this.previousSibling_ = void 0, this.treeScope_ = void 0 } var S = e.wrappers.EventTarget, T = e.wrappers.NodeList, M = e.TreeScope, O = e.assert, L = e.defineWrapGetter, N = e.enqueueMutation, C = e.getTreeScope, j = e.isWrapper, D = e.mixin, H = e.registerTransientObservers, x = e.registerWrapper, R = e.setTreeScope, I = e.unsafeUnwrap, P = e.unwrap, k = e.unwrapIfNeeded, A = e.wrap, W = e.wrapIfNeeded, F = e.wrappers, U = !1, q = document.importNode, B = window.Node.prototype.cloneNode, V = window.Node, G = window.DocumentFragment, z = (V.prototype.appendChild, V.prototype.compareDocumentPosition), K = V.prototype.isEqualNode, $ = V.prototype.insertBefore, X = V.prototype.removeChild, Y = V.prototype.replaceChild, Z = /Trident|Edge/.test(navigator.userAgent), J = Z ? function (e, t) { try { X.call(e, t) } catch (n) { if (!(e instanceof G)) throw n } } : function (e, t) { X.call(e, t) }; _.prototype = Object.create(S.prototype), D(_.prototype, { appendChild: function (e) { return this.insertBefore(e, null) }, insertBefore: function (e, n) { t(e); var r; n ? j(n) ? r = P(n) : (r = n, n = A(r)) : (n = null, r = null), n && O(n.parentNode === this); var o, s = n ? n.previousSibling : this.lastChild, c = !this.invalidateShadowRenderer() && !g(e); if (o = c ? a(e) : i(e, this, s, n), c) h(this, e), w(this), $.call(I(this), P(e), r); else { s || (this.firstChild_ = o[0]), n || (this.lastChild_ = o[o.length - 1], void 0 === this.firstChild_ && (this.firstChild_ = this.firstChild)); var l = r ? r.parentNode : I(this); l ? $.call(l, m(this, o), r) : f(this, o) } return N(this, "childList", { addedNodes: o, nextSibling: n, previousSibling: s }), u(o, this), e }, removeChild: function (e) { if (t(e), e.parentNode !== this) { for (var r = !1, o = (this.childNodes, this.firstChild); o; o = o.nextSibling)if (o === e) { r = !0; break } if (!r) throw new Error("NotFoundError") } var i = P(e), a = e.nextSibling, s = e.previousSibling; if (this.invalidateShadowRenderer()) { var c = this.firstChild, l = this.lastChild, u = i.parentNode; u && J(u, i), c === e && (this.firstChild_ = a), l === e && (this.lastChild_ = s), s && (s.nextSibling_ = a), a && (a.previousSibling_ = s), e.previousSibling_ = e.nextSibling_ = e.parentNode_ = void 0 } else w(this), J(I(this), i); return U || N(this, "childList", { removedNodes: n(e), nextSibling: a, previousSibling: s }), H(this, e), e }, replaceChild: function (e, r) { t(e); var o; if (j(r) ? o = P(r) : (o = r, r = A(o)), r.parentNode !== this) throw new Error("NotFoundError"); var s, c = r.nextSibling, l = r.previousSibling, p = !this.invalidateShadowRenderer() && !g(e); return p ? s = a(e) : (c === e && (c = e.nextSibling), s = i(e, this, l, c)), p ? (h(this, e), w(this), Y.call(I(this), P(e), o)) : (this.firstChild === r && (this.firstChild_ = s[0]), this.lastChild === r && (this.lastChild_ = s[s.length - 1]), r.previousSibling_ = r.nextSibling_ = r.parentNode_ = void 0, o.parentNode && Y.call(o.parentNode, m(this, s), o)), N(this, "childList", { addedNodes: s, removedNodes: n(r), nextSibling: c, previousSibling: l }), d(r), u(s, this), r }, nodeIsInserted_: function () { for (var e = this.firstChild; e; e = e.nextSibling)e.nodeIsInserted_() }, hasChildNodes: function () { return null !== this.firstChild }, get parentNode() { return void 0 !== this.parentNode_ ? this.parentNode_ : A(I(this).parentNode) }, get firstChild() { return void 0 !== this.firstChild_ ? this.firstChild_ : A(I(this).firstChild) }, get lastChild() { return void 0 !== this.lastChild_ ? this.lastChild_ : A(I(this).lastChild) }, get nextSibling() { return void 0 !== this.nextSibling_ ? this.nextSibling_ : A(I(this).nextSibling) }, get previousSibling() { return void 0 !== this.previousSibling_ ? this.previousSibling_ : A(I(this).previousSibling) }, get parentElement() { for (var e = this.parentNode; e && e.nodeType !== _.ELEMENT_NODE;)e = e.parentNode; return e }, get textContent() { for (var e = "", t = this.firstChild; t; t = t.nextSibling)t.nodeType != _.COMMENT_NODE && (e += t.textContent); return e }, set textContent(e) { null == e && (e = ""); var t = c(this.childNodes); if (this.invalidateShadowRenderer()) { if (v(this), "" !== e) { var n = I(this).ownerDocument.createTextNode(e); this.appendChild(n) } } else w(this), I(this).textContent = e; var r = c(this.childNodes); N(this, "childList", { addedNodes: r, removedNodes: t }), p(t), u(r, this) }, get childNodes() { for (var e = new T, t = 0, n = this.firstChild; n; n = n.nextSibling)e[t++] = n; return e.length = t, e }, cloneNode: function (e) { return y(this, e) }, contains: function (e) { return E(this, W(e)) }, compareDocumentPosition: function (e) { return z.call(I(this), k(e)) }, isEqualNode: function (e) { return K.call(I(this), k(e)) }, normalize: function () { for (var e, t, n = c(this.childNodes), r = [], o = "", i = 0; i < n.length; i++)t = n[i], t.nodeType === _.TEXT_NODE ? e || t.data.length ? e ? (o += t.data, r.push(t)) : e = t : this.removeChild(t) : (e && r.length && (e.data += o, b(r)), r = [], o = "", e = null, t.childNodes.length && t.normalize()); e && r.length && (e.data += o, b(r)) } }), L(_, "ownerDocument"), x(V, _, document.createDocumentFragment()), delete _.prototype.querySelector, delete _.prototype.querySelectorAll, _.prototype = D(Object.create(S.prototype), _.prototype), e.cloneNode = y, e.nodeWasAdded = l, e.nodeWasRemoved = d, e.nodesWereAdded = u, e.nodesWereRemoved = p, e.originalInsertBefore = $, e.originalRemoveChild = X, e.snapshotNodeList = c, e.wrappers.Node = _ }(window.ShadowDOMPolyfill), function (e) { - "use strict"; function t(t, n, r, o) { for (var i = null, a = null, s = 0, c = t.length; s < c; s++)i = b(t[s]), !o && (a = v(i).root) && a instanceof e.wrappers.ShadowRoot || (r[n++] = i); return n } function n(e) { return String(e).replace(/\/deep\/|::shadow|>>>/g, " ") } function r(e) { return String(e).replace(/:host\(([^\s]+)\)/g, "$1").replace(/([^\s]):host/g, "$1").replace(":host", "*").replace(/\^|\/shadow\/|\/shadow-deep\/|::shadow|\/deep\/|::content|>>>/g, " ") } function o(e, t) { for (var n, r = e.firstElementChild; r;) { if (r.matches(t)) return r; if (n = o(r, t)) return n; r = r.nextElementSibling } return null } function i(e, t) { return e.matches(t) } function a(e, t, n) { var r = e.localName; return r === t || r === n && e.namespaceURI === j } function s() { return !0 } function c(e, t, n) { return e.localName === n } function l(e, t) { return e.namespaceURI === t } function u(e, t, n) { return e.namespaceURI === t && e.localName === n } function d(e, t, n, r, o, i) { for (var a = e.firstElementChild; a;)r(a, o, i) && (n[t++] = a), t = d(a, t, n, r, o, i), a = a.nextElementSibling; return t } function p(n, r, o, i, a) { var s, c = g(this), l = v(this).root; if (l instanceof e.wrappers.ShadowRoot) return d(this, r, o, n, i, null); if (c instanceof N) s = S.call(c, i); else { if (!(c instanceof C)) return d(this, r, o, n, i, null); s = _.call(c, i) } return t(s, r, o, a) } function h(n, r, o, i, a) { var s, c = g(this), l = v(this).root; if (l instanceof e.wrappers.ShadowRoot) return d(this, r, o, n, i, a); if (c instanceof N) s = M.call(c, i, a); else { if (!(c instanceof C)) return d(this, r, o, n, i, a); s = T.call(c, i, a) } return t(s, r, o, !1) } function f(n, r, o, i, a) { var s, c = g(this), l = v(this).root; if (l instanceof e.wrappers.ShadowRoot) return d(this, r, o, n, i, a); if (c instanceof N) s = L.call(c, i, a); else { if (!(c instanceof C)) return d(this, r, o, n, i, a); s = O.call(c, i, a) } return t(s, r, o, !1) } var m = e.wrappers.HTMLCollection, w = e.wrappers.NodeList, v = e.getTreeScope, g = e.unsafeUnwrap, b = e.wrap, y = document.querySelector, E = document.documentElement.querySelector, _ = document.querySelectorAll, S = document.documentElement.querySelectorAll, T = document.getElementsByTagName, M = document.documentElement.getElementsByTagName, O = document.getElementsByTagNameNS, L = document.documentElement.getElementsByTagNameNS, N = window.Element, C = window.HTMLDocument || window.Document, j = "http://www.w3.org/1999/xhtml", D = { - querySelector: function (t) { var r = n(t), i = r !== t; t = r; var a, s = g(this), c = v(this).root; if (c instanceof e.wrappers.ShadowRoot) return o(this, t); if (s instanceof N) a = b(E.call(s, t)); else { if (!(s instanceof C)) return o(this, t); a = b(y.call(s, t)) } return a && !i && (c = v(a).root) && c instanceof e.wrappers.ShadowRoot ? o(this, t) : a }, querySelectorAll: function (e) { var t = n(e), r = t !== e; e = t; var o = new w; return o.length = p.call(this, i, 0, o, e, r), o } - }, H = { matches: function (t) { return t = r(t), e.originalMatches.call(g(this), t) } }, x = { getElementsByTagName: function (e) { var t = new m, n = "*" === e ? s : a; return t.length = h.call(this, n, 0, t, e, e.toLowerCase()), t }, getElementsByClassName: function (e) { return this.querySelectorAll("." + e) }, getElementsByTagNameNS: function (e, t) { var n = new m, r = null; return r = "*" === e ? "*" === t ? s : c : "*" === t ? l : u, n.length = f.call(this, r, 0, n, e || null, t), n } }; e.GetElementsByInterface = x, e.SelectorsInterface = D, e.MatchesInterface = H -}(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { for (; e && e.nodeType !== Node.ELEMENT_NODE;)e = e.nextSibling; return e } function n(e) { for (; e && e.nodeType !== Node.ELEMENT_NODE;)e = e.previousSibling; return e } var r = e.wrappers.NodeList, o = { get firstElementChild() { return t(this.firstChild) }, get lastElementChild() { return n(this.lastChild) }, get childElementCount() { for (var e = 0, t = this.firstElementChild; t; t = t.nextElementSibling)e++; return e }, get children() { for (var e = new r, t = 0, n = this.firstElementChild; n; n = n.nextElementSibling)e[t++] = n; return e.length = t, e }, remove: function () { var e = this.parentNode; e && e.removeChild(this) } }, i = { get nextElementSibling() { return t(this.nextSibling) }, get previousElementSibling() { return n(this.previousSibling) } }, a = { getElementById: function (e) { return /[ \t\n\r\f]/.test(e) ? null : this.querySelector('[id="' + e + '"]') } }; e.ChildNodeInterface = i, e.NonElementParentNodeInterface = a, e.ParentNodeInterface = o }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { r.call(this, e) } var n = e.ChildNodeInterface, r = e.wrappers.Node, o = e.enqueueMutation, i = e.mixin, a = e.registerWrapper, s = e.unsafeUnwrap, c = window.CharacterData; t.prototype = Object.create(r.prototype), i(t.prototype, { get nodeValue() { return this.data }, set nodeValue(e) { this.data = e }, get textContent() { return this.data }, set textContent(e) { this.data = e }, get data() { return s(this).data }, set data(e) { var t = s(this).data; o(this, "characterData", { oldValue: t }), s(this).data = e } }), i(t.prototype, n), a(c, t, document.createTextNode("")), e.wrappers.CharacterData = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { return e >>> 0 } function n(e) { r.call(this, e) } var r = e.wrappers.CharacterData, o = (e.enqueueMutation, e.mixin), i = e.registerWrapper, a = window.Text; n.prototype = Object.create(r.prototype), o(n.prototype, { splitText: function (e) { e = t(e); var n = this.data; if (e > n.length) throw new Error("IndexSizeError"); var r = n.slice(0, e), o = n.slice(e); this.data = r; var i = this.ownerDocument.createTextNode(o); return this.parentNode && this.parentNode.insertBefore(i, this.nextSibling), i } }), i(a, n, document.createTextNode("")), e.wrappers.Text = n }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { return i(e).getAttribute("class") } function n(e, t) { a(e, "attributes", { name: "class", namespace: null, oldValue: t }) } function r(t) { e.invalidateRendererBasedOnAttribute(t, "class") } function o(e, o, i) { var a = e.ownerElement_; if (null == a) return o.apply(e, i); var s = t(a), c = o.apply(e, i); return t(a) !== s && (n(a, s), r(a)), c } if (!window.DOMTokenList) return void console.warn("Missing DOMTokenList prototype, please include a compatible classList polyfill such as http://goo.gl/uTcepH."); var i = e.unsafeUnwrap, a = e.enqueueMutation, s = DOMTokenList.prototype.add; DOMTokenList.prototype.add = function () { o(this, s, arguments) }; var c = DOMTokenList.prototype.remove; DOMTokenList.prototype.remove = function () { o(this, c, arguments) }; var l = DOMTokenList.prototype.toggle; DOMTokenList.prototype.toggle = function () { return o(this, l, arguments) } }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(t, n) { var r = t.parentNode; if (r && r.shadowRoot) { var o = e.getRendererForHost(r); o.dependsOnAttribute(n) && o.invalidate() } } function n(e, t, n) { u(e, "attributes", { name: t, namespace: null, oldValue: n }) } function r(e) { a.call(this, e) } var o = e.ChildNodeInterface, i = e.GetElementsByInterface, a = e.wrappers.Node, s = e.ParentNodeInterface, c = e.SelectorsInterface, l = e.MatchesInterface, u = (e.addWrapNodeListMethod, e.enqueueMutation), d = e.mixin, p = (e.oneOf, e.registerWrapper), h = e.unsafeUnwrap, f = e.wrappers, m = window.Element, w = ["matches", "mozMatchesSelector", "msMatchesSelector", "webkitMatchesSelector"].filter(function (e) { return m.prototype[e] }), v = w[0], g = m.prototype[v], b = new WeakMap; r.prototype = Object.create(a.prototype), d(r.prototype, { createShadowRoot: function () { var t = new f.ShadowRoot(this); h(this).polymerShadowRoot_ = t; var n = e.getRendererForHost(this); return n.invalidate(), t }, get shadowRoot() { return h(this).polymerShadowRoot_ || null }, setAttribute: function (e, r) { var o = h(this).getAttribute(e); h(this).setAttribute(e, r), n(this, e, o), t(this, e) }, removeAttribute: function (e) { var r = h(this).getAttribute(e); h(this).removeAttribute(e), n(this, e, r), t(this, e) }, get classList() { var e = b.get(this); if (!e) { if (e = h(this).classList, !e) return; e.ownerElement_ = this, b.set(this, e) } return e }, get className() { return h(this).className }, set className(e) { this.setAttribute("class", e) }, get id() { return h(this).id }, set id(e) { this.setAttribute("id", e) } }), w.forEach(function (e) { "matches" !== e && (r.prototype[e] = function (e) { return this.matches(e) }) }), m.prototype.webkitCreateShadowRoot && (r.prototype.webkitCreateShadowRoot = r.prototype.createShadowRoot), d(r.prototype, o), d(r.prototype, i), d(r.prototype, s), d(r.prototype, c), d(r.prototype, l), p(m, r, document.createElementNS(null, "x")), e.invalidateRendererBasedOnAttribute = t, e.matchesNames = w, e.originalMatches = g, e.wrappers.Element = r }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { switch (e) { case "&": return "&"; case "<": return "<"; case ">": return ">"; case '"': return """; case " ": return " " } } function n(e) { return e.replace(L, t) } function r(e) { return e.replace(N, t) } function o(e) { for (var t = {}, n = 0; n < e.length; n++)t[e[n]] = !0; return t } function i(e) { if (e.namespaceURI !== D) return !0; var t = e.ownerDocument.doctype; return t && t.publicId && t.systemId } function a(e, t) { switch (e.nodeType) { case Node.ELEMENT_NODE: for (var o, a = e.tagName.toLowerCase(), c = "<" + a, l = e.attributes, u = 0; o = l[u]; u++)c += " " + o.name + '="' + n(o.value) + '"'; return C[a] ? (i(e) && (c += "/"), c + ">") : c + ">" + s(e) + ""; case Node.TEXT_NODE: var d = e.data; return t && j[t.localName] ? d : r(d); case Node.COMMENT_NODE: return ""; default: throw console.error(e), new Error("not implemented") } } function s(e) { e instanceof O.HTMLTemplateElement && (e = e.content); for (var t = "", n = e.firstChild; n; n = n.nextSibling)t += a(n, e); return t } function c(e, t, n) { var r = n || "div"; e.textContent = ""; var o = T(e.ownerDocument.createElement(r)); o.innerHTML = t; for (var i; i = o.firstChild;)e.appendChild(M(i)) } function l(e) { m.call(this, e) } function u(e, t) { var n = T(e.cloneNode(!1)); n.innerHTML = t; for (var r, o = T(document.createDocumentFragment()); r = n.firstChild;)o.appendChild(r); return M(o) } function d(t) { return function () { return e.renderAllPending(), S(this)[t] } } function p(e) { w(l, e, d(e)) } function h(t) { Object.defineProperty(l.prototype, t, { get: d(t), set: function (n) { e.renderAllPending(), S(this)[t] = n }, configurable: !0, enumerable: !0 }) } function f(t) { Object.defineProperty(l.prototype, t, { value: function () { return e.renderAllPending(), S(this)[t].apply(S(this), arguments) }, configurable: !0, enumerable: !0 }) } var m = e.wrappers.Element, w = e.defineGetter, v = e.enqueueMutation, g = e.mixin, b = e.nodesWereAdded, y = e.nodesWereRemoved, E = e.registerWrapper, _ = e.snapshotNodeList, S = e.unsafeUnwrap, T = e.unwrap, M = e.wrap, O = e.wrappers, L = /[&\u00A0"]/g, N = /[&\u00A0<>]/g, C = o(["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"]), j = o(["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"]), D = "http://www.w3.org/1999/xhtml", H = /MSIE/.test(navigator.userAgent), x = window.HTMLElement, R = window.HTMLTemplateElement; l.prototype = Object.create(m.prototype), g(l.prototype, { get innerHTML() { return s(this) }, set innerHTML(e) { if (H && j[this.localName]) return void (this.textContent = e); var t = _(this.childNodes); this.invalidateShadowRenderer() ? this instanceof O.HTMLTemplateElement ? c(this.content, e) : c(this, e, this.tagName) : !R && this instanceof O.HTMLTemplateElement ? c(this.content, e) : S(this).innerHTML = e; var n = _(this.childNodes); v(this, "childList", { addedNodes: n, removedNodes: t }), y(t), b(n, this) }, get outerHTML() { return a(this, this.parentNode) }, set outerHTML(e) { var t = this.parentNode; if (t) { t.invalidateShadowRenderer(); var n = u(t, e); t.replaceChild(n, this) } }, insertAdjacentHTML: function (e, t) { var n, r; switch (String(e).toLowerCase()) { case "beforebegin": n = this.parentNode, r = this; break; case "afterend": n = this.parentNode, r = this.nextSibling; break; case "afterbegin": n = this, r = this.firstChild; break; case "beforeend": n = this, r = null; break; default: return }var o = u(n, t); n.insertBefore(o, r) }, get hidden() { return this.hasAttribute("hidden") }, set hidden(e) { e ? this.setAttribute("hidden", "") : this.removeAttribute("hidden") } }), ["clientHeight", "clientLeft", "clientTop", "clientWidth", "offsetHeight", "offsetLeft", "offsetTop", "offsetWidth", "scrollHeight", "scrollWidth"].forEach(p), ["scrollLeft", "scrollTop"].forEach(h), ["focus", "getBoundingClientRect", "getClientRects", "scrollIntoView"].forEach(f), E(x, l, document.createElement("b")), e.wrappers.HTMLElement = l, e.getInnerHTML = s, e.setInnerHTML = c }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.unsafeUnwrap, a = e.wrap, s = window.HTMLCanvasElement; t.prototype = Object.create(n.prototype), r(t.prototype, { getContext: function () { var e = i(this).getContext.apply(i(this), arguments); return e && a(e) } }), o(s, t, document.createElement("canvas")), e.wrappers.HTMLCanvasElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = window.HTMLContentElement; t.prototype = Object.create(n.prototype), r(t.prototype, { constructor: t, get select() { return this.getAttribute("select") }, set select(e) { this.setAttribute("select", e) }, setAttribute: function (e, t) { n.prototype.setAttribute.call(this, e, t), "select" === String(e).toLowerCase() && this.invalidateShadowRenderer(!0) } }), i && o(i, t), e.wrappers.HTMLContentElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.wrapHTMLCollection, a = e.unwrap, s = window.HTMLFormElement; t.prototype = Object.create(n.prototype), r(t.prototype, { get elements() { return i(a(this).elements) } }), o(s, t, document.createElement("form")), e.wrappers.HTMLFormElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { r.call(this, e) } function n(e, t) { if (!(this instanceof n)) throw new TypeError("DOM object constructor cannot be called as a function."); var o = i(document.createElement("img")); r.call(this, o), a(o, this), void 0 !== e && (o.width = e), void 0 !== t && (o.height = t) } var r = e.wrappers.HTMLElement, o = e.registerWrapper, i = e.unwrap, a = e.rewrap, s = window.HTMLImageElement; t.prototype = Object.create(r.prototype), o(s, t, document.createElement("img")), n.prototype = t.prototype, e.wrappers.HTMLImageElement = t, e.wrappers.Image = n }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = (e.mixin, e.wrappers.NodeList, e.registerWrapper), o = window.HTMLShadowElement; t.prototype = Object.create(n.prototype), t.prototype.constructor = t, o && r(o, t), e.wrappers.HTMLShadowElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { if (!e.defaultView) return e; var t = d.get(e); if (!t) { for (t = e.implementation.createHTMLDocument(""); t.lastChild;)t.removeChild(t.lastChild); d.set(e, t) } return t } function n(e) { for (var n, r = t(e.ownerDocument), o = c(r.createDocumentFragment()); n = e.firstChild;)o.appendChild(n); return o } function r(e) { if (o.call(this, e), !p) { var t = n(e); u.set(this, l(t)) } } var o = e.wrappers.HTMLElement, i = e.mixin, a = e.registerWrapper, s = e.unsafeUnwrap, c = e.unwrap, l = e.wrap, u = new WeakMap, d = new WeakMap, p = window.HTMLTemplateElement; r.prototype = Object.create(o.prototype), i(r.prototype, { constructor: r, get content() { return p ? l(s(this).content) : u.get(this) } }), p && a(p, r), e.wrappers.HTMLTemplateElement = r }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.registerWrapper, o = window.HTMLMediaElement; o && (t.prototype = Object.create(n.prototype), r(o, t, document.createElement("audio")), e.wrappers.HTMLMediaElement = t) }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { r.call(this, e) } function n(e) { if (!(this instanceof n)) throw new TypeError("DOM object constructor cannot be called as a function."); var t = i(document.createElement("audio")); r.call(this, t), a(t, this), t.setAttribute("preload", "auto"), void 0 !== e && t.setAttribute("src", e) } var r = e.wrappers.HTMLMediaElement, o = e.registerWrapper, i = e.unwrap, a = e.rewrap, s = window.HTMLAudioElement; s && (t.prototype = Object.create(r.prototype), o(s, t, document.createElement("audio")), n.prototype = t.prototype, e.wrappers.HTMLAudioElement = t, e.wrappers.Audio = n) }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { return e.replace(/\s+/g, " ").trim() } function n(e) { o.call(this, e) } function r(e, t, n, i) { if (!(this instanceof r)) throw new TypeError("DOM object constructor cannot be called as a function."); var a = c(document.createElement("option")); o.call(this, a), s(a, this), void 0 !== e && (a.text = e), void 0 !== t && a.setAttribute("value", t), n === !0 && a.setAttribute("selected", ""), a.selected = i === !0 } var o = e.wrappers.HTMLElement, i = e.mixin, a = e.registerWrapper, s = e.rewrap, c = e.unwrap, l = e.wrap, u = window.HTMLOptionElement; n.prototype = Object.create(o.prototype), i(n.prototype, { get text() { return t(this.textContent) }, set text(e) { this.textContent = t(String(e)) }, get form() { return l(c(this).form) } }), a(u, n, document.createElement("option")), r.prototype = n.prototype, e.wrappers.HTMLOptionElement = n, e.wrappers.Option = r }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.unwrap, a = e.wrap, s = window.HTMLSelectElement; t.prototype = Object.create(n.prototype), r(t.prototype, { add: function (e, t) { "object" == typeof t && (t = i(t)), i(this).add(i(e), t) }, remove: function (e) { return void 0 === e ? void n.prototype.remove.call(this) : ("object" == typeof e && (e = i(e)), void i(this).remove(e)) }, get form() { return a(i(this).form) } }), o(s, t, document.createElement("select")), e.wrappers.HTMLSelectElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.unwrap, a = e.wrap, s = e.wrapHTMLCollection, c = window.HTMLTableElement; t.prototype = Object.create(n.prototype), r(t.prototype, { get caption() { return a(i(this).caption) }, createCaption: function () { return a(i(this).createCaption()) }, get tHead() { return a(i(this).tHead) }, createTHead: function () { return a(i(this).createTHead()) }, createTFoot: function () { return a(i(this).createTFoot()) }, get tFoot() { return a(i(this).tFoot) }, get tBodies() { return s(i(this).tBodies) }, createTBody: function () { return a(i(this).createTBody()) }, get rows() { return s(i(this).rows) }, insertRow: function (e) { return a(i(this).insertRow(e)) } }), o(c, t, document.createElement("table")), e.wrappers.HTMLTableElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.wrapHTMLCollection, a = e.unwrap, s = e.wrap, c = window.HTMLTableSectionElement; t.prototype = Object.create(n.prototype), r(t.prototype, { constructor: t, get rows() { return i(a(this).rows) }, insertRow: function (e) { return s(a(this).insertRow(e)) } }), o(c, t, document.createElement("thead")), e.wrappers.HTMLTableSectionElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.HTMLElement, r = e.mixin, o = e.registerWrapper, i = e.wrapHTMLCollection, a = e.unwrap, s = e.wrap, c = window.HTMLTableRowElement; t.prototype = Object.create(n.prototype), r(t.prototype, { get cells() { return i(a(this).cells) }, insertCell: function (e) { return s(a(this).insertCell(e)) } }), o(c, t, document.createElement("tr")), e.wrappers.HTMLTableRowElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { switch (e.localName) { case "content": return new n(e); case "shadow": return new o(e); case "template": return new i(e) }r.call(this, e) } var n = e.wrappers.HTMLContentElement, r = e.wrappers.HTMLElement, o = e.wrappers.HTMLShadowElement, i = e.wrappers.HTMLTemplateElement, a = (e.mixin, e.registerWrapper), s = window.HTMLUnknownElement; t.prototype = Object.create(r.prototype), a(s, t), e.wrappers.HTMLUnknownElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.Element, r = e.wrappers.HTMLElement, o = e.registerWrapper, i = (e.defineWrapGetter, e.unsafeUnwrap), a = e.wrap, s = e.mixin, c = "http://www.w3.org/2000/svg", l = window.SVGElement, u = document.createElementNS(c, "title"); if (!("classList" in u)) { var d = Object.getOwnPropertyDescriptor(n.prototype, "classList"); Object.defineProperty(r.prototype, "classList", d), delete n.prototype.classList } t.prototype = Object.create(n.prototype), s(t.prototype, { get ownerSVGElement() { return a(i(this).ownerSVGElement) } }), o(l, t, document.createElementNS(c, "title")), e.wrappers.SVGElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { p.call(this, e) } var n = e.mixin, r = e.registerWrapper, o = e.unwrap, i = e.wrap, a = window.SVGUseElement, s = "http://www.w3.org/2000/svg", c = i(document.createElementNS(s, "g")), l = document.createElementNS(s, "use"), u = c.constructor, d = Object.getPrototypeOf(u.prototype), p = d.constructor; t.prototype = Object.create(d), "instanceRoot" in l && n(t.prototype, { get instanceRoot() { return i(o(this).instanceRoot) }, get animatedInstanceRoot() { return i(o(this).animatedInstanceRoot) } }), r(a, t, l), e.wrappers.SVGUseElement = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.EventTarget, r = e.mixin, o = e.registerWrapper, i = e.unsafeUnwrap, a = e.wrap, s = window.SVGElementInstance; s && (t.prototype = Object.create(n.prototype), r(t.prototype, { get correspondingElement() { return a(i(this).correspondingElement) }, get correspondingUseElement() { return a(i(this).correspondingUseElement) }, get parentNode() { return a(i(this).parentNode) }, get childNodes() { throw new Error("Not implemented") }, get firstChild() { return a(i(this).firstChild) }, get lastChild() { return a(i(this).lastChild) }, get previousSibling() { return a(i(this).previousSibling) }, get nextSibling() { return a(i(this).nextSibling) } }), o(s, t), e.wrappers.SVGElementInstance = t) }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { o(e, this) } var n = e.mixin, r = e.registerWrapper, o = e.setWrapper, i = e.unsafeUnwrap, a = e.unwrap, s = e.unwrapIfNeeded, c = e.wrap, l = window.CanvasRenderingContext2D; n(t.prototype, { get canvas() { return c(i(this).canvas) }, drawImage: function () { arguments[0] = s(arguments[0]), i(this).drawImage.apply(i(this), arguments) }, createPattern: function () { return arguments[0] = a(arguments[0]), i(this).createPattern.apply(i(this), arguments) } }), r(l, t, document.createElement("canvas").getContext("2d")), e.wrappers.CanvasRenderingContext2D = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { i(e, this) } var n = e.addForwardingProperties, r = e.mixin, o = e.registerWrapper, i = e.setWrapper, a = e.unsafeUnwrap, s = e.unwrapIfNeeded, c = e.wrap, l = window.WebGLRenderingContext; if (l) { r(t.prototype, { get canvas() { return c(a(this).canvas) }, texImage2D: function () { arguments[5] = s(arguments[5]), a(this).texImage2D.apply(a(this), arguments) }, texSubImage2D: function () { arguments[6] = s(arguments[6]), a(this).texSubImage2D.apply(a(this), arguments) } }); var u = Object.getPrototypeOf(l.prototype); u !== Object.prototype && n(u, t.prototype); var d = /WebKit/.test(navigator.userAgent) ? { drawingBufferHeight: null, drawingBufferWidth: null } : {}; o(l, t, d), e.wrappers.WebGLRenderingContext = t } }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.Node, r = e.GetElementsByInterface, o = e.NonElementParentNodeInterface, i = e.ParentNodeInterface, a = e.SelectorsInterface, s = e.mixin, c = e.registerObject, l = e.registerWrapper, u = window.DocumentFragment; t.prototype = Object.create(n.prototype), s(t.prototype, i), s(t.prototype, a), s(t.prototype, r), s(t.prototype, o), l(u, t, document.createDocumentFragment()), e.wrappers.DocumentFragment = t; var d = c(document.createComment("")); e.wrappers.Comment = d }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { var t = d(u(e).ownerDocument.createDocumentFragment()); n.call(this, t), c(t, this); var o = e.shadowRoot; f.set(this, o), this.treeScope_ = new r(this, a(o || e)), h.set(this, e) } var n = e.wrappers.DocumentFragment, r = e.TreeScope, o = e.elementFromPoint, i = e.getInnerHTML, a = e.getTreeScope, s = e.mixin, c = e.rewrap, l = e.setInnerHTML, u = e.unsafeUnwrap, d = e.unwrap, p = e.wrap, h = new WeakMap, f = new WeakMap; t.prototype = Object.create(n.prototype), s(t.prototype, { constructor: t, get innerHTML() { return i(this) }, set innerHTML(e) { l(this, e), this.invalidateShadowRenderer() }, get olderShadowRoot() { return f.get(this) || null }, get host() { return h.get(this) || null }, invalidateShadowRenderer: function () { return h.get(this).invalidateShadowRenderer() }, elementFromPoint: function (e, t) { return o(this, this.ownerDocument, e, t) }, getSelection: function () { return document.getSelection() }, get activeElement() { var e = d(this).ownerDocument.activeElement; if (!e || !e.nodeType) return null; for (var t = p(e); !this.contains(t);) { for (; t.parentNode;)t = t.parentNode; if (!t.host) return null; t = t.host } return t } }), e.wrappers.ShadowRoot = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { var t = d(e).root; return t instanceof h ? t.host : null } function n(t, n) { if (t.shadowRoot) { n = Math.min(t.childNodes.length - 1, n); var r = t.childNodes[n]; if (r) { var o = e.getDestinationInsertionPoints(r); if (o.length > 0) { var i = o[0].parentNode; i.nodeType == Node.ELEMENT_NODE && (t = i) } } } return t } function r(e) { return e = u(e), t(e) || e } function o(e) { a(e, this) } var i = e.registerWrapper, a = e.setWrapper, s = e.unsafeUnwrap, c = e.unwrap, l = e.unwrapIfNeeded, u = e.wrap, d = e.getTreeScope, p = window.Range, h = e.wrappers.ShadowRoot; o.prototype = { get startContainer() { return r(s(this).startContainer) }, get endContainer() { return r(s(this).endContainer) }, get commonAncestorContainer() { return r(s(this).commonAncestorContainer) }, setStart: function (e, t) { e = n(e, t), s(this).setStart(l(e), t) }, setEnd: function (e, t) { e = n(e, t), s(this).setEnd(l(e), t) }, setStartBefore: function (e) { s(this).setStartBefore(l(e)) }, setStartAfter: function (e) { s(this).setStartAfter(l(e)) }, setEndBefore: function (e) { s(this).setEndBefore(l(e)) }, setEndAfter: function (e) { s(this).setEndAfter(l(e)) }, selectNode: function (e) { s(this).selectNode(l(e)) }, selectNodeContents: function (e) { s(this).selectNodeContents(l(e)) }, compareBoundaryPoints: function (e, t) { return s(this).compareBoundaryPoints(e, c(t)) }, extractContents: function () { return u(s(this).extractContents()) }, cloneContents: function () { return u(s(this).cloneContents()) }, insertNode: function (e) { s(this).insertNode(l(e)) }, surroundContents: function (e) { s(this).surroundContents(l(e)) }, cloneRange: function () { return u(s(this).cloneRange()) }, isPointInRange: function (e, t) { return s(this).isPointInRange(l(e), t) }, comparePoint: function (e, t) { return s(this).comparePoint(l(e), t) }, intersectsNode: function (e) { return s(this).intersectsNode(l(e)) }, toString: function () { return s(this).toString() } }, p.prototype.createContextualFragment && (o.prototype.createContextualFragment = function (e) { return u(s(this).createContextualFragment(e)) }), i(window.Range, o, document.createRange()), e.wrappers.Range = o }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { e.previousSibling_ = e.previousSibling, e.nextSibling_ = e.nextSibling, e.parentNode_ = e.parentNode } function n(n, o, i) { var a = x(n), s = x(o), c = i ? x(i) : null; if (r(o), t(o), i) n.firstChild === i && (n.firstChild_ = i), i.previousSibling_ = i.previousSibling; else { n.lastChild_ = n.lastChild, n.lastChild === n.firstChild && (n.firstChild_ = n.firstChild); var l = R(a.lastChild); l && (l.nextSibling_ = l.nextSibling) } e.originalInsertBefore.call(a, s, c) } function r(n) { var r = x(n), o = r.parentNode; if (o) { var i = R(o); t(n), n.previousSibling && (n.previousSibling.nextSibling_ = n), n.nextSibling && (n.nextSibling.previousSibling_ = n), i.lastChild === n && (i.lastChild_ = n), i.firstChild === n && (i.firstChild_ = n), e.originalRemoveChild.call(o, r) } } function o(e) { P.set(e, []) } function i(e) { var t = P.get(e); return t || P.set(e, t = []), t } function a(e) { for (var t = [], n = 0, r = e.firstChild; r; r = r.nextSibling)t[n++] = r; return t } function s() { for (var e = 0; e < F.length; e++) { var t = F[e], n = t.parentRenderer; n && n.dirty || t.render() } F = [] } function c() { T = null, s() } function l(e) { var t = A.get(e); return t || (t = new h(e), A.set(e, t)), t } function u(e) { var t = j(e).root; return t instanceof C ? t : null } function d(e) { return l(e.host) } function p(e) { this.skip = !1, this.node = e, this.childNodes = [] } function h(e) { this.host = e, this.dirty = !1, this.invalidateAttributes(), this.associateNode(e) } function f(e) { for (var t = [], n = e.firstChild; n; n = n.nextSibling)E(n) ? t.push.apply(t, i(n)) : t.push(n); return t } function m(e) { if (e instanceof L) return e; if (e instanceof O) return null; for (var t = e.firstChild; t; t = t.nextSibling) { var n = m(t); if (n) return n } return null } function w(e, t) { i(t).push(e); var n = k.get(e); n ? n.push(t) : k.set(e, [t]) } function v(e) { return k.get(e) } function g(e) { k.set(e, void 0) } function b(e, t) { var n = t.getAttribute("select"); if (!n) return !0; if (n = n.trim(), !n) return !0; if (!(e instanceof M)) return !1; if (!q.test(n)) return !1; try { return e.matches(n) } catch (r) { return !1 } } function y(e, t) { var n = v(t); return n && n[n.length - 1] === e } function E(e) { return e instanceof O || e instanceof L } function _(e) { return e.shadowRoot } function S(e) { for (var t = [], n = e.shadowRoot; n; n = n.olderShadowRoot)t.push(n); return t } var T, M = e.wrappers.Element, O = e.wrappers.HTMLContentElement, L = e.wrappers.HTMLShadowElement, N = e.wrappers.Node, C = e.wrappers.ShadowRoot, j = (e.assert, e.getTreeScope), D = (e.mixin, e.oneOf), H = e.unsafeUnwrap, x = e.unwrap, R = e.wrap, I = e.ArraySplice, P = new WeakMap, k = new WeakMap, A = new WeakMap, W = D(window, ["requestAnimationFrame", "mozRequestAnimationFrame", "webkitRequestAnimationFrame", "setTimeout"]), F = [], U = new I; U.equals = function (e, t) { return x(e.node) === t }, p.prototype = { append: function (e) { var t = new p(e); return this.childNodes.push(t), t }, sync: function (e) { if (!this.skip) { for (var t = this.node, o = this.childNodes, i = a(x(t)), s = e || new WeakMap, c = U.calculateSplices(o, i), l = 0, u = 0, d = 0, p = 0; p < c.length; p++) { for (var h = c[p]; d < h.index; d++)u++, o[l++].sync(s); for (var f = h.removed.length, m = 0; m < f; m++) { var w = R(i[u++]); s.get(w) || r(w) } for (var v = h.addedCount, g = i[u] && R(i[u]), m = 0; m < v; m++) { var b = o[l++], y = b.node; n(t, y, g), s.set(y, !0), b.sync(s) } d += v } for (var p = d; p < o.length; p++)o[p].sync(s) } } }, h.prototype = { render: function (e) { if (this.dirty) { this.invalidateAttributes(); var t = this.host; this.distribution(t); var n = e || new p(t); this.buildRenderTree(n, t); var r = !e; r && n.sync(), this.dirty = !1 } }, get parentRenderer() { return j(this.host).renderer }, invalidate: function () { if (!this.dirty) { this.dirty = !0; var e = this.parentRenderer; if (e && e.invalidate(), F.push(this), T) return; T = window[W](c, 0) } }, distribution: function (e) { this.resetAllSubtrees(e), this.distributionResolution(e) }, resetAll: function (e) { E(e) ? o(e) : g(e), this.resetAllSubtrees(e) }, resetAllSubtrees: function (e) { for (var t = e.firstChild; t; t = t.nextSibling)this.resetAll(t); e.shadowRoot && this.resetAll(e.shadowRoot), e.olderShadowRoot && this.resetAll(e.olderShadowRoot) }, distributionResolution: function (e) { if (_(e)) { for (var t = e, n = f(t), r = S(t), o = 0; o < r.length; o++)this.poolDistribution(r[o], n); for (var o = r.length - 1; o >= 0; o--) { var i = r[o], a = m(i); if (a) { var s = i.olderShadowRoot; s && (n = f(s)); for (var c = 0; c < n.length; c++)w(n[c], a) } this.distributionResolution(i) } } for (var l = e.firstChild; l; l = l.nextSibling)this.distributionResolution(l) }, poolDistribution: function (e, t) { if (!(e instanceof L)) if (e instanceof O) { var n = e; this.updateDependentAttributes(n.getAttribute("select")); for (var r = !1, o = 0; o < t.length; o++) { var e = t[o]; e && b(e, n) && (w(e, n), t[o] = void 0, r = !0) } if (!r) for (var i = n.firstChild; i; i = i.nextSibling)w(i, n) } else for (var i = e.firstChild; i; i = i.nextSibling)this.poolDistribution(i, t) }, buildRenderTree: function (e, t) { for (var n = this.compose(t), r = 0; r < n.length; r++) { var o = n[r], i = e.append(o); this.buildRenderTree(i, o) } if (_(t)) { var a = l(t); a.dirty = !1 } }, compose: function (e) { for (var t = [], n = e.shadowRoot || e, r = n.firstChild; r; r = r.nextSibling)if (E(r)) { this.associateNode(n); for (var o = i(r), a = 0; a < o.length; a++) { var s = o[a]; y(r, s) && t.push(s) } } else t.push(r); return t }, invalidateAttributes: function () { this.attributes = Object.create(null) }, updateDependentAttributes: function (e) { if (e) { var t = this.attributes; /\.\w+/.test(e) && (t["class"] = !0), /#\w+/.test(e) && (t.id = !0), e.replace(/\[\s*([^\s=\|~\]]+)/g, function (e, n) { t[n] = !0 }) } }, dependsOnAttribute: function (e) { return this.attributes[e] }, associateNode: function (e) { H(e).polymerShadowRenderer_ = this } }; var q = /^(:not\()?[*.#[a-zA-Z_|]/; N.prototype.invalidateShadowRenderer = function (e) { var t = H(this).polymerShadowRenderer_; return !!t && (t.invalidate(), !0) }, O.prototype.getDistributedNodes = L.prototype.getDistributedNodes = function () { return s(), i(this) }, M.prototype.getDestinationInsertionPoints = function () { return s(), v(this) || [] }, O.prototype.nodeIsInserted_ = L.prototype.nodeIsInserted_ = function () { this.invalidateShadowRenderer(); var e, t = u(this); t && (e = d(t)), H(this).polymerShadowRenderer_ = e, e && e.invalidate() }, e.getRendererForHost = l, e.getShadowTrees = S, e.renderAllPending = s, e.getDestinationInsertionPoints = v, e.visual = { insertBefore: n, remove: r } }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(t) { if (window[t]) { r(!e.wrappers[t]); var c = function (e) { n.call(this, e) }; c.prototype = Object.create(n.prototype), o(c.prototype, { get form() { return s(a(this).form) } }), i(window[t], c, document.createElement(t.slice(4, -7))), e.wrappers[t] = c } } var n = e.wrappers.HTMLElement, r = e.assert, o = e.mixin, i = e.registerWrapper, a = e.unwrap, s = e.wrap, c = ["HTMLButtonElement", "HTMLFieldSetElement", "HTMLInputElement", "HTMLKeygenElement", "HTMLLabelElement", "HTMLLegendElement", "HTMLObjectElement", "HTMLOutputElement", "HTMLTextAreaElement"]; c.forEach(t) }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { r(e, this) } var n = e.registerWrapper, r = e.setWrapper, o = e.unsafeUnwrap, i = e.unwrap, a = e.unwrapIfNeeded, s = e.wrap, c = window.Selection; t.prototype = { get anchorNode() { return s(o(this).anchorNode) }, get focusNode() { return s(o(this).focusNode) }, addRange: function (e) { o(this).addRange(a(e)) }, collapse: function (e, t) { o(this).collapse(a(e), t) }, containsNode: function (e, t) { return o(this).containsNode(a(e), t) }, getRangeAt: function (e) { return s(o(this).getRangeAt(e)) }, removeRange: function (e) { o(this).removeRange(i(e)) }, selectAllChildren: function (e) { o(this).selectAllChildren(e instanceof ShadowRoot ? o(e.host) : a(e)) }, toString: function () { return o(this).toString() } }, c.prototype.extend && (t.prototype.extend = function (e, t) { o(this).extend(a(e), t) }), n(window.Selection, t, window.getSelection()), e.wrappers.Selection = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { r(e, this) } var n = e.registerWrapper, r = e.setWrapper, o = e.unsafeUnwrap, i = e.unwrapIfNeeded, a = e.wrap, s = window.TreeWalker; t.prototype = { get root() { return a(o(this).root) }, get currentNode() { return a(o(this).currentNode) }, set currentNode(e) { o(this).currentNode = i(e) }, get filter() { return o(this).filter }, parentNode: function () { return a(o(this).parentNode()) }, firstChild: function () { return a(o(this).firstChild()) }, lastChild: function () { return a(o(this).lastChild()) }, previousSibling: function () { return a(o(this).previousSibling()) }, previousNode: function () { return a(o(this).previousNode()) }, nextNode: function () { return a(o(this).nextNode()) } }, n(s, t), e.wrappers.TreeWalker = t }(window.ShadowDOMPolyfill), function (e) { - "use strict"; function t(e) { u.call(this, e), this.treeScope_ = new w(this, null) } function n(e) { var n = document[e]; t.prototype[e] = function () { return j(n.apply(N(this), arguments)) } } function r(e, t) { x.call(N(t), C(e)), o(e, t) } function o(e, t) { e.shadowRoot && t.adoptNode(e.shadowRoot), e instanceof m && i(e, t); for (var n = e.firstChild; n; n = n.nextSibling)o(n, t) } function i(e, t) { var n = e.olderShadowRoot; n && t.adoptNode(n) } function a(e) { L(e, this) } function s(e, t) { - var n = document.implementation[t]; e.prototype[t] = function () { - return j(n.apply(N(this), arguments)) - } - } function c(e, t) { var n = document.implementation[t]; e.prototype[t] = function () { return n.apply(N(this), arguments) } } var l = e.GetElementsByInterface, u = e.wrappers.Node, d = e.ParentNodeInterface, p = e.NonElementParentNodeInterface, h = e.wrappers.Selection, f = e.SelectorsInterface, m = e.wrappers.ShadowRoot, w = e.TreeScope, v = e.cloneNode, g = e.defineGetter, b = e.defineWrapGetter, y = e.elementFromPoint, E = e.forwardMethodsToWrapper, _ = e.matchesNames, S = e.mixin, T = e.registerWrapper, M = e.renderAllPending, O = e.rewrap, L = e.setWrapper, N = e.unsafeUnwrap, C = e.unwrap, j = e.wrap, D = e.wrapEventTargetMethods, H = (e.wrapNodeList, new WeakMap); t.prototype = Object.create(u.prototype), b(t, "documentElement"), b(t, "body"), b(t, "head"), g(t, "activeElement", function () { var e = C(this).activeElement; if (!e || !e.nodeType) return null; for (var t = j(e); !this.contains(t);) { for (; t.parentNode;)t = t.parentNode; if (!t.host) return null; t = t.host } return t }), ["createComment", "createDocumentFragment", "createElement", "createElementNS", "createEvent", "createEventNS", "createRange", "createTextNode"].forEach(n); var x = document.adoptNode, R = document.getSelection; S(t.prototype, { adoptNode: function (e) { return e.parentNode && e.parentNode.removeChild(e), r(e, this), e }, elementFromPoint: function (e, t) { return y(this, this, e, t) }, importNode: function (e, t) { return v(e, t, N(this)) }, getSelection: function () { return M(), new h(R.call(C(this))) }, getElementsByName: function (e) { return f.querySelectorAll.call(this, "[name=" + JSON.stringify(String(e)) + "]") } }); var I = document.createTreeWalker, P = e.wrappers.TreeWalker; if (t.prototype.createTreeWalker = function (e, t, n, r) { var o = null; return n && (n.acceptNode && "function" == typeof n.acceptNode ? o = { acceptNode: function (e) { return n.acceptNode(j(e)) } } : "function" == typeof n && (o = function (e) { return n(j(e)) })), new P(I.call(C(this), C(e), t, o, r)) }, document.registerElement) { var k = document.registerElement; t.prototype.registerElement = function (t, n) { function r(e) { return e ? void L(e, this) : i ? document.createElement(i, t) : document.createElement(t) } var o, i; if (void 0 !== n && (o = n.prototype, i = n["extends"]), o || (o = Object.create(HTMLElement.prototype)), e.nativePrototypeTable.get(o)) throw new Error("NotSupportedError"); for (var a, s = Object.getPrototypeOf(o), c = []; s && !(a = e.nativePrototypeTable.get(s));)c.push(s), s = Object.getPrototypeOf(s); if (!a) throw new Error("NotSupportedError"); for (var l = Object.create(a), u = c.length - 1; u >= 0; u--)l = Object.create(l);["createdCallback", "attachedCallback", "detachedCallback", "attributeChangedCallback"].forEach(function (e) { var t = o[e]; t && (l[e] = function () { j(this) instanceof r || O(this), t.apply(j(this), arguments) }) }); var d = { prototype: l }; i && (d["extends"] = i), r.prototype = o, r.prototype.constructor = r, e.constructorTable.set(l, r), e.nativePrototypeTable.set(o, l); k.call(C(this), t, d); return r }, E([window.HTMLDocument || window.Document], ["registerElement"]) } E([window.HTMLBodyElement, window.HTMLDocument || window.Document, window.HTMLHeadElement, window.HTMLHtmlElement], ["appendChild", "compareDocumentPosition", "contains", "getElementsByClassName", "getElementsByTagName", "getElementsByTagNameNS", "insertBefore", "querySelector", "querySelectorAll", "removeChild", "replaceChild"]), E([window.HTMLBodyElement, window.HTMLHeadElement, window.HTMLHtmlElement], _), E([window.HTMLDocument || window.Document], ["adoptNode", "importNode", "contains", "createComment", "createDocumentFragment", "createElement", "createElementNS", "createEvent", "createEventNS", "createRange", "createTextNode", "createTreeWalker", "elementFromPoint", "getElementById", "getElementsByName", "getSelection"]), S(t.prototype, l), S(t.prototype, d), S(t.prototype, f), S(t.prototype, p), S(t.prototype, { get implementation() { var e = H.get(this); return e ? e : (e = new a(C(this).implementation), H.set(this, e), e) }, get defaultView() { return j(C(this).defaultView) } }), T(window.Document, t, document.implementation.createHTMLDocument("")), window.HTMLDocument && T(window.HTMLDocument, t), D([window.HTMLBodyElement, window.HTMLDocument || window.Document, window.HTMLHeadElement]); var A = document.implementation.createDocument; a.prototype.createDocument = function () { return arguments[2] = C(arguments[2]), j(A.apply(N(this), arguments)) }, s(a, "createDocumentType"), s(a, "createHTMLDocument"), c(a, "hasFeature"), T(window.DOMImplementation, a), E([window.DOMImplementation], ["createDocument", "createDocumentType", "createHTMLDocument", "hasFeature"]), e.adoptNodeNoRemove = r, e.wrappers.DOMImplementation = a, e.wrappers.Document = t -}(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { n.call(this, e) } var n = e.wrappers.EventTarget, r = e.wrappers.Selection, o = e.mixin, i = e.registerWrapper, a = e.renderAllPending, s = e.unwrap, c = e.unwrapIfNeeded, l = e.wrap, u = window.Window, d = window.getComputedStyle, p = window.getDefaultComputedStyle, h = window.getSelection; t.prototype = Object.create(n.prototype), u.prototype.getComputedStyle = function (e, t) { return l(this || window).getComputedStyle(c(e), t) }, p && (u.prototype.getDefaultComputedStyle = function (e, t) { return l(this || window).getDefaultComputedStyle(c(e), t) }), u.prototype.getSelection = function () { return l(this || window).getSelection() }, delete window.getComputedStyle, delete window.getDefaultComputedStyle, delete window.getSelection, ["addEventListener", "removeEventListener", "dispatchEvent"].forEach(function (e) { u.prototype[e] = function () { var t = l(this || window); return t[e].apply(t, arguments) }, delete window[e] }), o(t.prototype, { getComputedStyle: function (e, t) { return a(), d.call(s(this), c(e), t) }, getSelection: function () { return a(), new r(h.call(s(this))) }, get document() { return l(s(this).document) } }), p && (t.prototype.getDefaultComputedStyle = function (e, t) { return a(), p.call(s(this), c(e), t) }), i(u, t, window), e.wrappers.Window = t }(window.ShadowDOMPolyfill), function (e) { "use strict"; var t = e.unwrap, n = window.DataTransfer || window.Clipboard, r = n.prototype.setDragImage; r && (n.prototype.setDragImage = function (e, n, o) { r.call(this, t(e), n, o) }) }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { var t; t = e instanceof i ? e : new i(e && o(e)), r(t, this) } var n = e.registerWrapper, r = e.setWrapper, o = e.unwrap, i = window.FormData; i && (n(i, t, new i), e.wrappers.FormData = t) }(window.ShadowDOMPolyfill), function (e) { "use strict"; var t = e.unwrapIfNeeded, n = XMLHttpRequest.prototype.send; XMLHttpRequest.prototype.send = function (e) { return n.call(this, t(e)) } }(window.ShadowDOMPolyfill), function (e) { "use strict"; function t(e) { var t = n[e], r = window[t]; if (r) { var o = document.createElement(e), i = o.constructor; window[t] = i } } var n = (e.isWrapperFor, { a: "HTMLAnchorElement", area: "HTMLAreaElement", audio: "HTMLAudioElement", base: "HTMLBaseElement", body: "HTMLBodyElement", br: "HTMLBRElement", button: "HTMLButtonElement", canvas: "HTMLCanvasElement", caption: "HTMLTableCaptionElement", col: "HTMLTableColElement", content: "HTMLContentElement", data: "HTMLDataElement", datalist: "HTMLDataListElement", del: "HTMLModElement", dir: "HTMLDirectoryElement", div: "HTMLDivElement", dl: "HTMLDListElement", embed: "HTMLEmbedElement", fieldset: "HTMLFieldSetElement", font: "HTMLFontElement", form: "HTMLFormElement", frame: "HTMLFrameElement", frameset: "HTMLFrameSetElement", h1: "HTMLHeadingElement", head: "HTMLHeadElement", hr: "HTMLHRElement", html: "HTMLHtmlElement", iframe: "HTMLIFrameElement", img: "HTMLImageElement", input: "HTMLInputElement", keygen: "HTMLKeygenElement", label: "HTMLLabelElement", legend: "HTMLLegendElement", li: "HTMLLIElement", link: "HTMLLinkElement", map: "HTMLMapElement", marquee: "HTMLMarqueeElement", menu: "HTMLMenuElement", menuitem: "HTMLMenuItemElement", meta: "HTMLMetaElement", meter: "HTMLMeterElement", object: "HTMLObjectElement", ol: "HTMLOListElement", optgroup: "HTMLOptGroupElement", option: "HTMLOptionElement", output: "HTMLOutputElement", p: "HTMLParagraphElement", param: "HTMLParamElement", pre: "HTMLPreElement", progress: "HTMLProgressElement", q: "HTMLQuoteElement", script: "HTMLScriptElement", select: "HTMLSelectElement", shadow: "HTMLShadowElement", source: "HTMLSourceElement", span: "HTMLSpanElement", style: "HTMLStyleElement", table: "HTMLTableElement", tbody: "HTMLTableSectionElement", template: "HTMLTemplateElement", textarea: "HTMLTextAreaElement", thead: "HTMLTableSectionElement", time: "HTMLTimeElement", title: "HTMLTitleElement", tr: "HTMLTableRowElement", track: "HTMLTrackElement", ul: "HTMLUListElement", video: "HTMLVideoElement" }); Object.keys(n).forEach(t), Object.getOwnPropertyNames(e.wrappers).forEach(function (t) { window[t] = e.wrappers[t] }) }(window.ShadowDOMPolyfill), function (e) { function t(e, t) { var n = ""; return Array.prototype.forEach.call(e, function (e) { n += e.textContent + "\n\n" }), t || (n = n.replace(d, "")), n } function n(e) { var t = document.createElement("style"); return t.textContent = e, t } function r(e) { var t = n(e); document.head.appendChild(t); var r = []; if (t.sheet) try { r = t.sheet.cssRules } catch (o) { } else console.warn("sheet not found", t); return t.parentNode.removeChild(t), r } function o() { C.initialized = !0, document.body.appendChild(C); var e = C.contentDocument, t = e.createElement("base"); t.href = document.baseURI, e.head.appendChild(t) } function i(e) { C.initialized || o(), document.body.appendChild(C), e(C.contentDocument), document.body.removeChild(C) } function a(e, t) { if (t) { var o; if (e.match("@import") && D) { var a = n(e); i(function (e) { e.head.appendChild(a.impl), o = Array.prototype.slice.call(a.sheet.cssRules, 0), t(o) }) } else o = r(e), t(o) } } function s(e) { e && l().appendChild(document.createTextNode(e)) } function c(e, t) { var r = n(e); r.setAttribute(t, ""), r.setAttribute(x, ""), document.head.appendChild(r) } function l() { return j || (j = document.createElement("style"), j.setAttribute(x, ""), j[x] = !0), j } var u = { strictStyling: !1, registry: {}, shimStyling: function (e, n, r) { var o = this.prepareRoot(e, n, r), i = this.isTypeExtension(r), a = this.makeScopeSelector(n, i), s = t(o, !0); s = this.scopeCssText(s, a), e && (e.shimmedStyle = s), this.addCssToDocument(s, n) }, shimStyle: function (e, t) { return this.shimCssText(e.textContent, t) }, shimCssText: function (e, t) { return e = this.insertDirectives(e), this.scopeCssText(e, t) }, makeScopeSelector: function (e, t) { return e ? t ? "[is=" + e + "]" : e : "" }, isTypeExtension: function (e) { return e && e.indexOf("-") < 0 }, prepareRoot: function (e, t, n) { var r = this.registerRoot(e, t, n); return this.replaceTextInStyles(r.rootStyles, this.insertDirectives), this.removeStyles(e, r.rootStyles), this.strictStyling && this.applyScopeToContent(e, t), r.scopeStyles }, removeStyles: function (e, t) { for (var n, r = 0, o = t.length; r < o && (n = t[r]); r++)n.parentNode.removeChild(n) }, registerRoot: function (e, t, n) { var r = this.registry[t] = { root: e, name: t, extendsName: n }, o = this.findStyles(e); r.rootStyles = o, r.scopeStyles = r.rootStyles; var i = this.registry[r.extendsName]; return i && (r.scopeStyles = i.scopeStyles.concat(r.scopeStyles)), r }, findStyles: function (e) { if (!e) return []; var t = e.querySelectorAll("style"); return Array.prototype.filter.call(t, function (e) { return !e.hasAttribute(R) }) }, applyScopeToContent: function (e, t) { e && (Array.prototype.forEach.call(e.querySelectorAll("*"), function (e) { e.setAttribute(t, "") }), Array.prototype.forEach.call(e.querySelectorAll("template"), function (e) { this.applyScopeToContent(e.content, t) }, this)) }, insertDirectives: function (e) { return e = this.insertPolyfillDirectivesInCssText(e), this.insertPolyfillRulesInCssText(e) }, insertPolyfillDirectivesInCssText: function (e) { return e = e.replace(p, function (e, t) { return t.slice(0, -2) + "{" }), e.replace(h, function (e, t) { return t + " {" }) }, insertPolyfillRulesInCssText: function (e) { return e = e.replace(f, function (e, t) { return t.slice(0, -1) }), e.replace(m, function (e, t, n, r) { var o = e.replace(t, "").replace(n, ""); return r + o }) }, scopeCssText: function (e, t) { var n = this.extractUnscopedRulesFromCssText(e); if (e = this.insertPolyfillHostInCssText(e), e = this.convertColonHost(e), e = this.convertColonHostContext(e), e = this.convertShadowDOMSelectors(e), t) { var e, r = this; a(e, function (n) { e = r.scopeRules(n, t) }) } return e = e + "\n" + n, e.trim() }, extractUnscopedRulesFromCssText: function (e) { for (var t, n = ""; t = w.exec(e);)n += t[1].slice(0, -1) + "\n\n"; for (; t = v.exec(e);)n += t[0].replace(t[2], "").replace(t[1], t[3]) + "\n\n"; return n }, convertColonHost: function (e) { return this.convertColonRule(e, E, this.colonHostPartReplacer) }, convertColonHostContext: function (e) { return this.convertColonRule(e, _, this.colonHostContextPartReplacer) }, convertColonRule: function (e, t, n) { return e.replace(t, function (e, t, r, o) { if (t = O, r) { for (var i, a = r.split(","), s = [], c = 0, l = a.length; c < l && (i = a[c]); c++)i = i.trim(), s.push(n(t, i, o)); return s.join(",") } return t + o }) }, colonHostContextPartReplacer: function (e, t, n) { return t.match(g) ? this.colonHostPartReplacer(e, t, n) : e + t + n + ", " + t + " " + e + n }, colonHostPartReplacer: function (e, t, n) { return e + t.replace(g, "") + n }, convertShadowDOMSelectors: function (e) { for (var t = 0; t < N.length; t++)e = e.replace(N[t], " "); return e }, scopeRules: function (e, t) { var n = ""; return e && Array.prototype.forEach.call(e, function (e) { if (e.selectorText && e.style && void 0 !== e.style.cssText) n += this.scopeSelector(e.selectorText, t, this.strictStyling) + " {\n\t", n += this.propertiesFromRule(e) + "\n}\n\n"; else if (e.type === CSSRule.MEDIA_RULE) n += "@media " + e.media.mediaText + " {\n", n += this.scopeRules(e.cssRules, t), n += "\n}\n\n"; else try { e.cssText && (n += e.cssText + "\n\n") } catch (r) { e.type === CSSRule.KEYFRAMES_RULE && e.cssRules && (n += this.ieSafeCssTextFromKeyFrameRule(e)) } }, this), n }, ieSafeCssTextFromKeyFrameRule: function (e) { var t = "@keyframes " + e.name + " {"; return Array.prototype.forEach.call(e.cssRules, function (e) { t += " " + e.keyText + " {" + e.style.cssText + "}" }), t += " }" }, scopeSelector: function (e, t, n) { var r = [], o = e.split(","); return o.forEach(function (e) { e = e.trim(), this.selectorNeedsScoping(e, t) && (e = n && !e.match(O) ? this.applyStrictSelectorScope(e, t) : this.applySelectorScope(e, t)), r.push(e) }, this), r.join(", ") }, selectorNeedsScoping: function (e, t) { if (Array.isArray(t)) return !0; var n = this.makeScopeMatcher(t); return !e.match(n) }, makeScopeMatcher: function (e) { return e = e.replace(/\[/g, "\\[").replace(/\]/g, "\\]"), new RegExp("^(" + e + ")" + S, "m") }, applySelectorScope: function (e, t) { return Array.isArray(t) ? this.applySelectorScopeList(e, t) : this.applySimpleSelectorScope(e, t) }, applySelectorScopeList: function (e, t) { for (var n, r = [], o = 0; n = t[o]; o++)r.push(this.applySimpleSelectorScope(e, n)); return r.join(", ") }, applySimpleSelectorScope: function (e, t) { return e.match(L) ? (e = e.replace(O, t), e.replace(L, t + " ")) : t + " " + e }, applyStrictSelectorScope: function (e, t) { t = t.replace(/\[is=([^\]]*)\]/g, "$1"); var n = [" ", ">", "+", "~"], r = e, o = "[" + t + "]"; return n.forEach(function (e) { var t = r.split(e); r = t.map(function (e) { var t = e.trim().replace(L, ""); return t && n.indexOf(t) < 0 && t.indexOf(o) < 0 && (e = t.replace(/([^:]*)(:*)(.*)/, "$1" + o + "$2$3")), e }).join(e) }), r }, insertPolyfillHostInCssText: function (e) { return e.replace(M, b).replace(T, g) }, propertiesFromRule: function (e) { var t = e.style.cssText; e.style.content && !e.style.content.match(/['"]+|attr/) && (t = t.replace(/content:[^;]*;/g, "content: '" + e.style.content + "';")); var n = e.style; for (var r in n) "initial" === n[r] && (t += r + ": initial; "); return t }, replaceTextInStyles: function (e, t) { e && t && (e instanceof Array || (e = [e]), Array.prototype.forEach.call(e, function (e) { e.textContent = t.call(this, e.textContent) }, this)) }, addCssToDocument: function (e, t) { e.match("@import") ? c(e, t) : s(e) } }, d = /\/\*[^*]*\*+([^\/*][^*]*\*+)*\//gim, p = /\/\*\s*@polyfill ([^*]*\*+([^\/*][^*]*\*+)*\/)([^{]*?){/gim, h = /polyfill-next-selector[^}]*content\:[\s]*?['"](.*?)['"][;\s]*}([^{]*?){/gim, f = /\/\*\s@polyfill-rule([^*]*\*+([^\/*][^*]*\*+)*)\//gim, m = /(polyfill-rule)[^}]*(content\:[\s]*['"](.*?)['"])[;\s]*[^}]*}/gim, w = /\/\*\s@polyfill-unscoped-rule([^*]*\*+([^\/*][^*]*\*+)*)\//gim, v = /(polyfill-unscoped-rule)[^}]*(content\:[\s]*['"](.*?)['"])[;\s]*[^}]*}/gim, g = "-shadowcsshost", b = "-shadowcsscontext", y = ")(?:\\(((?:\\([^)(]*\\)|[^)(]*)+?)\\))?([^,{]*)", E = new RegExp("(" + g + y, "gim"), _ = new RegExp("(" + b + y, "gim"), S = "([>\\s~+[.,{:][\\s\\S]*)?$", T = /\:host/gim, M = /\:host-context/gim, O = g + "-no-combinator", L = new RegExp(g, "gim"), N = (new RegExp(b, "gim"), [/>>>/g, /::shadow/g, /::content/g, /\/deep\//g, /\/shadow\//g, /\/shadow-deep\//g, /\^\^/g, /\^(?!=)/g]), C = document.createElement("iframe"); C.style.display = "none"; var j, D = navigator.userAgent.match("Chrome"), H = "shim-shadowdom", x = "shim-shadowdom-css", R = "no-shim"; if (window.ShadowDOMPolyfill) { s("style { display: none !important; }\n"); var I = ShadowDOMPolyfill.wrap(document), P = I.querySelector("head"); P.insertBefore(l(), P.childNodes[0]), document.addEventListener("DOMContentLoaded", function () { e.urlResolver; if (window.HTMLImports && !HTMLImports.useNative) { var t = "link[rel=stylesheet][" + H + "]", n = "style[" + H + "]"; HTMLImports.importer.documentPreloadSelectors += "," + t, HTMLImports.importer.importsPreloadSelectors += "," + t, HTMLImports.parser.documentSelectors = [HTMLImports.parser.documentSelectors, t, n].join(","); var r = HTMLImports.parser.parseGeneric; HTMLImports.parser.parseGeneric = function (e) { if (!e[x]) { var t = e.__importElement || e; if (!t.hasAttribute(H)) return void r.call(this, e); e.__resource && (t = e.ownerDocument.createElement("style"), t.textContent = e.__resource), HTMLImports.path.resolveUrlsInStyle(t, e.href), t.textContent = u.shimStyle(t), t.removeAttribute(H, ""), t.setAttribute(x, ""), t[x] = !0, t.parentNode !== P && (e.parentNode === P ? P.replaceChild(t, e) : this.addElementToDocument(t)), t.__importParsed = !0, this.markParsingComplete(e), this.parseNext() } }; var o = HTMLImports.parser.hasResource; HTMLImports.parser.hasResource = function (e) { return "link" === e.localName && "stylesheet" === e.rel && e.hasAttribute(H) ? e.__resource : o.call(this, e) } } }) } e.ShadowCSS = u }(window.WebComponents)), function (e) { window.ShadowDOMPolyfill ? (window.wrap = ShadowDOMPolyfill.wrapIfNeeded, window.unwrap = ShadowDOMPolyfill.unwrapIfNeeded) : window.wrap = window.unwrap = function (e) { return e } }(window.WebComponents), function (e) { "use strict"; function t(e) { return void 0 !== p[e] } function n() { s.call(this), this._isInvalid = !0 } function r(e) { return "" == e && n.call(this), e.toLowerCase() } function o(e) { var t = e.charCodeAt(0); return t > 32 && t < 127 && [34, 35, 60, 62, 63, 96].indexOf(t) == -1 ? e : encodeURIComponent(e) } function i(e) { var t = e.charCodeAt(0); return t > 32 && t < 127 && [34, 35, 60, 62, 96].indexOf(t) == -1 ? e : encodeURIComponent(e) } function a(e, a, s) { function c(e) { b.push(e) } var l = a || "scheme start", u = 0, d = "", v = !1, g = !1, b = []; e: for (; (e[u - 1] != f || 0 == u) && !this._isInvalid;) { var y = e[u]; switch (l) { case "scheme start": if (!y || !m.test(y)) { if (a) { c("Invalid scheme."); break e } d = "", l = "no scheme"; continue } d += y.toLowerCase(), l = "scheme"; break; case "scheme": if (y && w.test(y)) d += y.toLowerCase(); else { if (":" != y) { if (a) { if (f == y) break e; c("Code point not allowed in scheme: " + y); break e } d = "", u = 0, l = "no scheme"; continue } if (this._scheme = d, d = "", a) break e; t(this._scheme) && (this._isRelative = !0), l = "file" == this._scheme ? "relative" : this._isRelative && s && s._scheme == this._scheme ? "relative or authority" : this._isRelative ? "authority first slash" : "scheme data" } break; case "scheme data": "?" == y ? (this._query = "?", l = "query") : "#" == y ? (this._fragment = "#", l = "fragment") : f != y && "\t" != y && "\n" != y && "\r" != y && (this._schemeData += o(y)); break; case "no scheme": if (s && t(s._scheme)) { l = "relative"; continue } c("Missing scheme."), n.call(this); break; case "relative or authority": if ("/" != y || "/" != e[u + 1]) { c("Expected /, got: " + y), l = "relative"; continue } l = "authority ignore slashes"; break; case "relative": if (this._isRelative = !0, "file" != this._scheme && (this._scheme = s._scheme), f == y) { this._host = s._host, this._port = s._port, this._path = s._path.slice(), this._query = s._query, this._username = s._username, this._password = s._password; break e } if ("/" == y || "\\" == y) "\\" == y && c("\\ is an invalid code point."), l = "relative slash"; else if ("?" == y) this._host = s._host, this._port = s._port, this._path = s._path.slice(), this._query = "?", this._username = s._username, this._password = s._password, l = "query"; else { if ("#" != y) { var E = e[u + 1], _ = e[u + 2]; ("file" != this._scheme || !m.test(y) || ":" != E && "|" != E || f != _ && "/" != _ && "\\" != _ && "?" != _ && "#" != _) && (this._host = s._host, this._port = s._port, this._username = s._username, this._password = s._password, this._path = s._path.slice(), this._path.pop()), l = "relative path"; continue } this._host = s._host, this._port = s._port, this._path = s._path.slice(), this._query = s._query, this._fragment = "#", this._username = s._username, this._password = s._password, l = "fragment" } break; case "relative slash": if ("/" != y && "\\" != y) { "file" != this._scheme && (this._host = s._host, this._port = s._port, this._username = s._username, this._password = s._password), l = "relative path"; continue } "\\" == y && c("\\ is an invalid code point."), l = "file" == this._scheme ? "file host" : "authority ignore slashes"; break; case "authority first slash": if ("/" != y) { c("Expected '/', got: " + y), l = "authority ignore slashes"; continue } l = "authority second slash"; break; case "authority second slash": if (l = "authority ignore slashes", "/" != y) { c("Expected '/', got: " + y); continue } break; case "authority ignore slashes": if ("/" != y && "\\" != y) { l = "authority"; continue } c("Expected authority, got: " + y); break; case "authority": if ("@" == y) { v && (c("@ already seen."), d += "%40"), v = !0; for (var S = 0; S < d.length; S++) { var T = d[S]; if ("\t" != T && "\n" != T && "\r" != T) if (":" != T || null !== this._password) { var M = o(T); null !== this._password ? this._password += M : this._username += M } else this._password = ""; else c("Invalid whitespace in authority.") } d = "" } else { if (f == y || "/" == y || "\\" == y || "?" == y || "#" == y) { u -= d.length, d = "", l = "host"; continue } d += y } break; case "file host": if (f == y || "/" == y || "\\" == y || "?" == y || "#" == y) { 2 != d.length || !m.test(d[0]) || ":" != d[1] && "|" != d[1] ? 0 == d.length ? l = "relative path start" : (this._host = r.call(this, d), d = "", l = "relative path start") : l = "relative path"; continue } "\t" == y || "\n" == y || "\r" == y ? c("Invalid whitespace in file host.") : d += y; break; case "host": case "hostname": if (":" != y || g) { if (f == y || "/" == y || "\\" == y || "?" == y || "#" == y) { if (this._host = r.call(this, d), d = "", l = "relative path start", a) break e; continue } "\t" != y && "\n" != y && "\r" != y ? ("[" == y ? g = !0 : "]" == y && (g = !1), d += y) : c("Invalid code point in host/hostname: " + y) } else if (this._host = r.call(this, d), d = "", l = "port", "hostname" == a) break e; break; case "port": if (/[0-9]/.test(y)) d += y; else { if (f == y || "/" == y || "\\" == y || "?" == y || "#" == y || a) { if ("" != d) { var O = parseInt(d, 10); O != p[this._scheme] && (this._port = O + ""), d = "" } if (a) break e; l = "relative path start"; continue } "\t" == y || "\n" == y || "\r" == y ? c("Invalid code point in port: " + y) : n.call(this) } break; case "relative path start": if ("\\" == y && c("'\\' not allowed in path."), l = "relative path", "/" != y && "\\" != y) continue; break; case "relative path": if (f != y && "/" != y && "\\" != y && (a || "?" != y && "#" != y)) "\t" != y && "\n" != y && "\r" != y && (d += o(y)); else { "\\" == y && c("\\ not allowed in relative path."); var L; (L = h[d.toLowerCase()]) && (d = L), ".." == d ? (this._path.pop(), "/" != y && "\\" != y && this._path.push("")) : "." == d && "/" != y && "\\" != y ? this._path.push("") : "." != d && ("file" == this._scheme && 0 == this._path.length && 2 == d.length && m.test(d[0]) && "|" == d[1] && (d = d[0] + ":"), this._path.push(d)), d = "", "?" == y ? (this._query = "?", l = "query") : "#" == y && (this._fragment = "#", l = "fragment") } break; case "query": a || "#" != y ? f != y && "\t" != y && "\n" != y && "\r" != y && (this._query += i(y)) : (this._fragment = "#", l = "fragment"); break; case "fragment": f != y && "\t" != y && "\n" != y && "\r" != y && (this._fragment += y) }u++ } } function s() { this._scheme = "", this._schemeData = "", this._username = "", this._password = null, this._host = "", this._port = "", this._path = [], this._query = "", this._fragment = "", this._isInvalid = !1, this._isRelative = !1 } function c(e, t) { void 0 === t || t instanceof c || (t = new c(String(t))), this._url = e, s.call(this); var n = e.replace(/^[ \t\r\n\f]+|[ \t\r\n\f]+$/g, ""); a.call(this, n, null, t) } var l = !1; if (!e.forceJURL) try { var u = new URL("b", "http://a"); u.pathname = "c%20d", l = "http://a/c%20d" === u.href } catch (d) { } if (!l) { var p = Object.create(null); p.ftp = 21, p.file = 0, p.gopher = 70, p.http = 80, p.https = 443, p.ws = 80, p.wss = 443; var h = Object.create(null); h["%2e"] = ".", h[".%2e"] = "..", h["%2e."] = "..", h["%2e%2e"] = ".."; var f = void 0, m = /[a-zA-Z]/, w = /[a-zA-Z0-9\+\-\.]/; c.prototype = { toString: function () { return this.href }, get href() { if (this._isInvalid) return this._url; var e = ""; return "" == this._username && null == this._password || (e = this._username + (null != this._password ? ":" + this._password : "") + "@"), this.protocol + (this._isRelative ? "//" + e + this.host : "") + this.pathname + this._query + this._fragment }, set href(e) { s.call(this), a.call(this, e) }, get protocol() { return this._scheme + ":" }, set protocol(e) { this._isInvalid || a.call(this, e + ":", "scheme start") }, get host() { return this._isInvalid ? "" : this._port ? this._host + ":" + this._port : this._host }, set host(e) { !this._isInvalid && this._isRelative && a.call(this, e, "host") }, get hostname() { return this._host }, set hostname(e) { !this._isInvalid && this._isRelative && a.call(this, e, "hostname") }, get port() { return this._port }, set port(e) { !this._isInvalid && this._isRelative && a.call(this, e, "port") }, get pathname() { return this._isInvalid ? "" : this._isRelative ? "/" + this._path.join("/") : this._schemeData }, set pathname(e) { !this._isInvalid && this._isRelative && (this._path = [], a.call(this, e, "relative path start")) }, get search() { return this._isInvalid || !this._query || "?" == this._query ? "" : this._query }, set search(e) { !this._isInvalid && this._isRelative && (this._query = "?", "?" == e[0] && (e = e.slice(1)), a.call(this, e, "query")) }, get hash() { return this._isInvalid || !this._fragment || "#" == this._fragment ? "" : this._fragment }, set hash(e) { this._isInvalid || (this._fragment = "#", "#" == e[0] && (e = e.slice(1)), a.call(this, e, "fragment")) }, get origin() { var e; if (this._isInvalid || !this._scheme) return ""; switch (this._scheme) { case "data": case "file": case "javascript": case "mailto": return "null" }return e = this.host, e ? this._scheme + "://" + e : "" } }; var v = e.URL; v && (c.createObjectURL = function (e) { return v.createObjectURL.apply(v, arguments) }, c.revokeObjectURL = function (e) { v.revokeObjectURL(e) }), e.URL = c } }(self), function (e) { function t(e) { y.push(e), b || (b = !0, m(r)) } function n(e) { return window.ShadowDOMPolyfill && window.ShadowDOMPolyfill.wrapIfNeeded(e) || e } function r() { b = !1; var e = y; y = [], e.sort(function (e, t) { return e.uid_ - t.uid_ }); var t = !1; e.forEach(function (e) { var n = e.takeRecords(); o(e), n.length && (e.callback_(n, e), t = !0) }), t && r() } function o(e) { e.nodes_.forEach(function (t) { var n = w.get(t); n && n.forEach(function (t) { t.observer === e && t.removeTransientObservers() }) }) } function i(e, t) { for (var n = e; n; n = n.parentNode) { var r = w.get(n); if (r) for (var o = 0; o < r.length; o++) { var i = r[o], a = i.options; if (n === e || a.subtree) { var s = t(a); s && i.enqueue(s) } } } } function a(e) { this.callback_ = e, this.nodes_ = [], this.records_ = [], this.uid_ = ++E } function s(e, t) { this.type = e, this.target = t, this.addedNodes = [], this.removedNodes = [], this.previousSibling = null, this.nextSibling = null, this.attributeName = null, this.attributeNamespace = null, this.oldValue = null } function c(e) { var t = new s(e.type, e.target); return t.addedNodes = e.addedNodes.slice(), t.removedNodes = e.removedNodes.slice(), t.previousSibling = e.previousSibling, t.nextSibling = e.nextSibling, t.attributeName = e.attributeName, t.attributeNamespace = e.attributeNamespace, t.oldValue = e.oldValue, t } function l(e, t) { return _ = new s(e, t) } function u(e) { return S ? S : (S = c(_), S.oldValue = e, S) } function d() { _ = S = void 0 } function p(e) { return e === S || e === _ } function h(e, t) { return e === t ? e : S && p(e) ? S : null } function f(e, t, n) { this.observer = e, this.target = t, this.options = n, this.transientObservedNodes = [] } if (!e.JsMutationObserver) { var m, w = new WeakMap; if (/Trident|Edge/.test(navigator.userAgent)) m = setTimeout; else if (window.setImmediate) m = window.setImmediate; else { var v = [], g = String(Math.random()); window.addEventListener("message", function (e) { if (e.data === g) { var t = v; v = [], t.forEach(function (e) { e() }) } }), m = function (e) { v.push(e), window.postMessage(g, "*") } } var b = !1, y = [], E = 0; a.prototype = { observe: function (e, t) { if (e = n(e), !t.childList && !t.attributes && !t.characterData || t.attributeOldValue && !t.attributes || t.attributeFilter && t.attributeFilter.length && !t.attributes || t.characterDataOldValue && !t.characterData) throw new SyntaxError; var r = w.get(e); r || w.set(e, r = []); for (var o, i = 0; i < r.length; i++)if (r[i].observer === this) { o = r[i], o.removeListeners(), o.options = t; break } o || (o = new f(this, e, t), r.push(o), this.nodes_.push(e)), o.addListeners() }, disconnect: function () { this.nodes_.forEach(function (e) { for (var t = w.get(e), n = 0; n < t.length; n++) { var r = t[n]; if (r.observer === this) { r.removeListeners(), t.splice(n, 1); break } } }, this), this.records_ = [] }, takeRecords: function () { var e = this.records_; return this.records_ = [], e } }; var _, S; f.prototype = { enqueue: function (e) { var n = this.observer.records_, r = n.length; if (n.length > 0) { var o = n[r - 1], i = h(o, e); if (i) return void (n[r - 1] = i) } else t(this.observer); n[r] = e }, addListeners: function () { this.addListeners_(this.target) }, addListeners_: function (e) { var t = this.options; t.attributes && e.addEventListener("DOMAttrModified", this, !0), t.characterData && e.addEventListener("DOMCharacterDataModified", this, !0), t.childList && e.addEventListener("DOMNodeInserted", this, !0), (t.childList || t.subtree) && e.addEventListener("DOMNodeRemoved", this, !0) }, removeListeners: function () { this.removeListeners_(this.target) }, removeListeners_: function (e) { var t = this.options; t.attributes && e.removeEventListener("DOMAttrModified", this, !0), t.characterData && e.removeEventListener("DOMCharacterDataModified", this, !0), t.childList && e.removeEventListener("DOMNodeInserted", this, !0), (t.childList || t.subtree) && e.removeEventListener("DOMNodeRemoved", this, !0) }, addTransientObserver: function (e) { if (e !== this.target) { this.addListeners_(e), this.transientObservedNodes.push(e); var t = w.get(e); t || w.set(e, t = []), t.push(this) } }, removeTransientObservers: function () { var e = this.transientObservedNodes; this.transientObservedNodes = [], e.forEach(function (e) { this.removeListeners_(e); for (var t = w.get(e), n = 0; n < t.length; n++)if (t[n] === this) { t.splice(n, 1); break } }, this) }, handleEvent: function (e) { switch (e.stopImmediatePropagation(), e.type) { case "DOMAttrModified": var t = e.attrName, n = e.relatedNode.namespaceURI, r = e.target, o = new l("attributes", r); o.attributeName = t, o.attributeNamespace = n; var a = e.attrChange === MutationEvent.ADDITION ? null : e.prevValue; i(r, function (e) { if (e.attributes && (!e.attributeFilter || !e.attributeFilter.length || e.attributeFilter.indexOf(t) !== -1 || e.attributeFilter.indexOf(n) !== -1)) return e.attributeOldValue ? u(a) : o }); break; case "DOMCharacterDataModified": var r = e.target, o = l("characterData", r), a = e.prevValue; i(r, function (e) { if (e.characterData) return e.characterDataOldValue ? u(a) : o }); break; case "DOMNodeRemoved": this.addTransientObserver(e.target); case "DOMNodeInserted": var s, c, p = e.target; "DOMNodeInserted" === e.type ? (s = [p], c = []) : (s = [], c = [p]); var h = p.previousSibling, f = p.nextSibling, o = l("childList", e.target.parentNode); o.addedNodes = s, o.removedNodes = c, o.previousSibling = h, o.nextSibling = f, i(e.relatedNode, function (e) { if (e.childList) return o }) }d() } }, e.JsMutationObserver = a, e.MutationObserver || (e.MutationObserver = a, a._isPolyfilled = !0) } }(self), function (e) { "use strict"; if (!window.performance || !window.performance.now) { var t = Date.now(); window.performance = { now: function () { return Date.now() - t } } } window.requestAnimationFrame || (window.requestAnimationFrame = function () { var e = window.webkitRequestAnimationFrame || window.mozRequestAnimationFrame; return e ? function (t) { return e(function () { t(performance.now()) }) } : function (e) { return window.setTimeout(e, 1e3 / 60) } }()), window.cancelAnimationFrame || (window.cancelAnimationFrame = function () { return window.webkitCancelAnimationFrame || window.mozCancelAnimationFrame || function (e) { clearTimeout(e) } }()); var n = function () { var e = document.createEvent("Event"); return e.initEvent("foo", !0, !0), e.preventDefault(), e.defaultPrevented }(); if (!n) { var r = Event.prototype.preventDefault; Event.prototype.preventDefault = function () { this.cancelable && (r.call(this), Object.defineProperty(this, "defaultPrevented", { get: function () { return !0 }, configurable: !0 })) } } var o = /Trident/.test(navigator.userAgent); if ((!window.CustomEvent || o && "function" != typeof window.CustomEvent) && (window.CustomEvent = function (e, t) { t = t || {}; var n = document.createEvent("CustomEvent"); return n.initCustomEvent(e, Boolean(t.bubbles), Boolean(t.cancelable), t.detail), n }, window.CustomEvent.prototype = window.Event.prototype), !window.Event || o && "function" != typeof window.Event) { var i = window.Event; window.Event = function (e, t) { t = t || {}; var n = document.createEvent("Event"); return n.initEvent(e, Boolean(t.bubbles), Boolean(t.cancelable)), n }, window.Event.prototype = i.prototype } }(window.WebComponents), window.HTMLImports = window.HTMLImports || { flags: {} }, function (e) { - function t(e, t) { t = t || f, r(function () { i(e, t) }, t) } function n(e) { return "complete" === e.readyState || e.readyState === v } function r(e, t) { if (n(t)) e && e(); else { var o = function () { "complete" !== t.readyState && t.readyState !== v || (t.removeEventListener(g, o), r(e, t)) }; t.addEventListener(g, o) } } function o(e) { e.target.__loaded = !0 } function i(e, t) { - function n() { c == l && e && e({ allImports: s, loadedImports: u, errorImports: d }) } function r(e) { o(e), u.push(this), c++, n() } function i(e) { - d.push(this), c++, n() - } var s = t.querySelectorAll("link[rel=import]"), c = 0, l = s.length, u = [], d = []; if (l) for (var p, h = 0; h < l && (p = s[h]); h++)a(p) ? (u.push(this), c++, n()) : (p.addEventListener("load", r), p.addEventListener("error", i)); else n() - } function a(e) { return d ? e.__loaded || e["import"] && "loading" !== e["import"].readyState : e.__importParsed } function s(e) { for (var t, n = 0, r = e.length; n < r && (t = e[n]); n++)c(t) && l(t) } function c(e) { return "link" === e.localName && "import" === e.rel } function l(e) { var t = e["import"]; t ? o({ target: e }) : (e.addEventListener("load", o), e.addEventListener("error", o)) } var u = "import", d = Boolean(u in document.createElement("link")), p = Boolean(window.ShadowDOMPolyfill), h = function (e) { return p ? window.ShadowDOMPolyfill.wrapIfNeeded(e) : e }, f = h(document), m = { get: function () { var e = window.HTMLImports.currentScript || document.currentScript || ("complete" !== document.readyState ? document.scripts[document.scripts.length - 1] : null); return h(e) }, configurable: !0 }; Object.defineProperty(document, "_currentScript", m), Object.defineProperty(f, "_currentScript", m); var w = /Trident/.test(navigator.userAgent), v = w ? "complete" : "interactive", g = "readystatechange"; d && (new MutationObserver(function (e) { for (var t, n = 0, r = e.length; n < r && (t = e[n]); n++)t.addedNodes && s(t.addedNodes) }).observe(document.head, { childList: !0 }), function () { if ("loading" === document.readyState) for (var e, t = document.querySelectorAll("link[rel=import]"), n = 0, r = t.length; n < r && (e = t[n]); n++)l(e) }()), t(function (e) { window.HTMLImports.ready = !0, window.HTMLImports.readyTime = (new Date).getTime(); var t = f.createEvent("CustomEvent"); t.initCustomEvent("HTMLImportsLoaded", !0, !0, e), f.dispatchEvent(t) }), e.IMPORT_LINK_TYPE = u, e.useNative = d, e.rootDocument = f, e.whenReady = t, e.isIE = w -}(window.HTMLImports), function (e) { var t = [], n = function (e) { t.push(e) }, r = function () { t.forEach(function (t) { t(e) }) }; e.addModule = n, e.initializeModules = r }(window.HTMLImports), window.HTMLImports.addModule(function (e) { var t = /(url\()([^)]*)(\))/g, n = /(@import[\s]+(?!url\())([^;]*)(;)/g, r = { resolveUrlsInStyle: function (e, t) { var n = e.ownerDocument, r = n.createElement("a"); return e.textContent = this.resolveUrlsInCssText(e.textContent, t, r), e }, resolveUrlsInCssText: function (e, r, o) { var i = this.replaceUrls(e, o, r, t); return i = this.replaceUrls(i, o, r, n) }, replaceUrls: function (e, t, n, r) { return e.replace(r, function (e, r, o, i) { var a = o.replace(/["']/g, ""); return n && (a = new URL(a, n).href), t.href = a, a = t.href, r + "'" + a + "'" + i }) } }; e.path = r }), window.HTMLImports.addModule(function (e) { var t = { async: !0, ok: function (e) { return e.status >= 200 && e.status < 300 || 304 === e.status || 0 === e.status }, load: function (n, r, o) { var i = new XMLHttpRequest; return (e.flags.debug || e.flags.bust) && (n += "?" + Math.random()), i.open("GET", n, t.async), i.addEventListener("readystatechange", function (e) { if (4 === i.readyState) { var n = null; try { var a = i.getResponseHeader("Location"); a && (n = "/" === a.substr(0, 1) ? location.origin + a : a) } catch (e) { console.error(e.message) } r.call(o, !t.ok(i) && i, i.response || i.responseText, n) } }), i.send(), i }, loadDocument: function (e, t, n) { this.load(e, t, n).responseType = "document" } }; e.xhr = t }), window.HTMLImports.addModule(function (e) { var t = e.xhr, n = e.flags, r = function (e, t) { this.cache = {}, this.onload = e, this.oncomplete = t, this.inflight = 0, this.pending = {} }; r.prototype = { addNodes: function (e) { this.inflight += e.length; for (var t, n = 0, r = e.length; n < r && (t = e[n]); n++)this.require(t); this.checkDone() }, addNode: function (e) { this.inflight++, this.require(e), this.checkDone() }, require: function (e) { var t = e.src || e.href; e.__nodeUrl = t, this.dedupe(t, e) || this.fetch(t, e) }, dedupe: function (e, t) { if (this.pending[e]) return this.pending[e].push(t), !0; return this.cache[e] ? (this.onload(e, t, this.cache[e]), this.tail(), !0) : (this.pending[e] = [t], !1) }, fetch: function (e, r) { if (n.load && console.log("fetch", e, r), e) if (e.match(/^data:/)) { var o = e.split(","), i = o[0], a = o[1]; a = i.indexOf(";base64") > -1 ? atob(a) : decodeURIComponent(a), setTimeout(function () { this.receive(e, r, null, a) }.bind(this), 0) } else { var s = function (t, n, o) { this.receive(e, r, t, n, o) }.bind(this); t.load(e, s) } else setTimeout(function () { this.receive(e, r, { error: "href must be specified" }, null) }.bind(this), 0) }, receive: function (e, t, n, r, o) { this.cache[e] = r; for (var i, a = this.pending[e], s = 0, c = a.length; s < c && (i = a[s]); s++)this.onload(e, i, r, n, o), this.tail(); this.pending[e] = null }, tail: function () { --this.inflight, this.checkDone() }, checkDone: function () { this.inflight || this.oncomplete() } }, e.Loader = r }), window.HTMLImports.addModule(function (e) { var t = function (e) { this.addCallback = e, this.mo = new MutationObserver(this.handler.bind(this)) }; t.prototype = { handler: function (e) { for (var t, n = 0, r = e.length; n < r && (t = e[n]); n++)"childList" === t.type && t.addedNodes.length && this.addedNodes(t.addedNodes) }, addedNodes: function (e) { this.addCallback && this.addCallback(e); for (var t, n = 0, r = e.length; n < r && (t = e[n]); n++)t.children && t.children.length && this.addedNodes(t.children) }, observe: function (e) { this.mo.observe(e, { childList: !0, subtree: !0 }) } }, e.Observer = t }), window.HTMLImports.addModule(function (e) { function t(e) { return "link" === e.localName && e.rel === u } function n(e) { var t = r(e); return "data:text/javascript;charset=utf-8," + encodeURIComponent(t) } function r(e) { return e.textContent + o(e) } function o(e) { var t = e.ownerDocument; t.__importedScripts = t.__importedScripts || 0; var n = e.ownerDocument.baseURI, r = t.__importedScripts ? "-" + t.__importedScripts : ""; return t.__importedScripts++, "\n//# sourceURL=" + n + r + ".js\n" } function i(e) { var t = e.ownerDocument.createElement("style"); return t.textContent = e.textContent, a.resolveUrlsInStyle(t), t } var a = e.path, s = e.rootDocument, c = e.flags, l = e.isIE, u = e.IMPORT_LINK_TYPE, d = "link[rel=" + u + "]", p = { documentSelectors: d, importsSelectors: [d, "link[rel=stylesheet]:not([type])", "style:not([type])", "script:not([type])", 'script[type="application/javascript"]', 'script[type="text/javascript"]'].join(","), map: { link: "parseLink", script: "parseScript", style: "parseStyle" }, dynamicElements: [], parseNext: function () { var e = this.nextToParse(); e && this.parse(e) }, parse: function (e) { if (this.isParsed(e)) return void (c.parse && console.log("[%s] is already parsed", e.localName)); var t = this[this.map[e.localName]]; t && (this.markParsing(e), t.call(this, e)) }, parseDynamic: function (e, t) { this.dynamicElements.push(e), t || this.parseNext() }, markParsing: function (e) { c.parse && console.log("parsing", e), this.parsingElement = e }, markParsingComplete: function (e) { e.__importParsed = !0, this.markDynamicParsingComplete(e), e.__importElement && (e.__importElement.__importParsed = !0, this.markDynamicParsingComplete(e.__importElement)), this.parsingElement = null, c.parse && console.log("completed", e) }, markDynamicParsingComplete: function (e) { var t = this.dynamicElements.indexOf(e); t >= 0 && this.dynamicElements.splice(t, 1) }, parseImport: function (e) { if (e["import"] = e.__doc, window.HTMLImports.__importsParsingHook && window.HTMLImports.__importsParsingHook(e), e["import"] && (e["import"].__importParsed = !0), this.markParsingComplete(e), e.__resource && !e.__error ? e.dispatchEvent(new CustomEvent("load", { bubbles: !1 })) : e.dispatchEvent(new CustomEvent("error", { bubbles: !1 })), e.__pending) for (var t; e.__pending.length;)t = e.__pending.shift(), t && t({ target: e }); this.parseNext() }, parseLink: function (e) { t(e) ? this.parseImport(e) : (e.href = e.href, this.parseGeneric(e)) }, parseStyle: function (e) { var t = e; e = i(e), t.__appliedElement = e, e.__importElement = t, this.parseGeneric(e) }, parseGeneric: function (e) { this.trackElement(e), this.addElementToDocument(e) }, rootImportForElement: function (e) { for (var t = e; t.ownerDocument.__importLink;)t = t.ownerDocument.__importLink; return t }, addElementToDocument: function (e) { var t = this.rootImportForElement(e.__importElement || e); t.parentNode.insertBefore(e, t) }, trackElement: function (e, t) { var n = this, r = function (o) { e.removeEventListener("load", r), e.removeEventListener("error", r), t && t(o), n.markParsingComplete(e), n.parseNext() }; if (e.addEventListener("load", r), e.addEventListener("error", r), l && "style" === e.localName) { var o = !1; if (e.textContent.indexOf("@import") == -1) o = !0; else if (e.sheet) { o = !0; for (var i, a = e.sheet.cssRules, s = a ? a.length : 0, c = 0; c < s && (i = a[c]); c++)i.type === CSSRule.IMPORT_RULE && (o = o && Boolean(i.styleSheet)) } o && setTimeout(function () { e.dispatchEvent(new CustomEvent("load", { bubbles: !1 })) }) } }, parseScript: function (t) { var r = document.createElement("script"); r.__importElement = t, r.src = t.src ? t.src : n(t), e.currentScript = t, this.trackElement(r, function (t) { r.parentNode && r.parentNode.removeChild(r), e.currentScript = null }), this.addElementToDocument(r) }, nextToParse: function () { return this._mayParse = [], !this.parsingElement && (this.nextToParseInDoc(s) || this.nextToParseDynamic()) }, nextToParseInDoc: function (e, n) { if (e && this._mayParse.indexOf(e) < 0) { this._mayParse.push(e); for (var r, o = e.querySelectorAll(this.parseSelectorsForNode(e)), i = 0, a = o.length; i < a && (r = o[i]); i++)if (!this.isParsed(r)) return this.hasResource(r) ? t(r) ? this.nextToParseInDoc(r.__doc, r) : r : void 0 } return n }, nextToParseDynamic: function () { return this.dynamicElements[0] }, parseSelectorsForNode: function (e) { var t = e.ownerDocument || e; return t === s ? this.documentSelectors : this.importsSelectors }, isParsed: function (e) { return e.__importParsed }, needsDynamicParsing: function (e) { return this.dynamicElements.indexOf(e) >= 0 }, hasResource: function (e) { return !t(e) || void 0 !== e.__doc } }; e.parser = p, e.IMPORT_SELECTOR = d }), window.HTMLImports.addModule(function (e) { function t(e) { return n(e, a) } function n(e, t) { return "link" === e.localName && e.getAttribute("rel") === t } function r(e) { return !!Object.getOwnPropertyDescriptor(e, "baseURI") } function o(e, t) { var n = document.implementation.createHTMLDocument(a); n._URL = t; var o = n.createElement("base"); o.setAttribute("href", t), n.baseURI || r(n) || Object.defineProperty(n, "baseURI", { value: t }); var i = n.createElement("meta"); return i.setAttribute("charset", "utf-8"), n.head.appendChild(i), n.head.appendChild(o), n.body.innerHTML = e, window.HTMLTemplateElement && HTMLTemplateElement.bootstrap && HTMLTemplateElement.bootstrap(n), n } var i = e.flags, a = e.IMPORT_LINK_TYPE, s = e.IMPORT_SELECTOR, c = e.rootDocument, l = e.Loader, u = e.Observer, d = e.parser, p = { documents: {}, documentPreloadSelectors: s, importsPreloadSelectors: [s].join(","), loadNode: function (e) { h.addNode(e) }, loadSubtree: function (e) { var t = this.marshalNodes(e); h.addNodes(t) }, marshalNodes: function (e) { return e.querySelectorAll(this.loadSelectorsForNode(e)) }, loadSelectorsForNode: function (e) { var t = e.ownerDocument || e; return t === c ? this.documentPreloadSelectors : this.importsPreloadSelectors }, loaded: function (e, n, r, a, s) { if (i.load && console.log("loaded", e, n), n.__resource = r, n.__error = a, t(n)) { var c = this.documents[e]; void 0 === c && (c = a ? null : o(r, s || e), c && (c.__importLink = n, this.bootDocument(c)), this.documents[e] = c), n.__doc = c } d.parseNext() }, bootDocument: function (e) { this.loadSubtree(e), this.observer.observe(e), d.parseNext() }, loadedAll: function () { d.parseNext() } }, h = new l(p.loaded.bind(p), p.loadedAll.bind(p)); if (p.observer = new u, !document.baseURI) { var f = { get: function () { var e = document.querySelector("base"); return e ? e.href : window.location.href }, configurable: !0 }; Object.defineProperty(document, "baseURI", f), Object.defineProperty(c, "baseURI", f) } e.importer = p, e.importLoader = h }), window.HTMLImports.addModule(function (e) { var t = e.parser, n = e.importer, r = { added: function (e) { for (var r, o, i, a, s = 0, c = e.length; s < c && (a = e[s]); s++)r || (r = a.ownerDocument, o = t.isParsed(r)), i = this.shouldLoadNode(a), i && n.loadNode(a), this.shouldParseNode(a) && o && t.parseDynamic(a, i) }, shouldLoadNode: function (e) { return 1 === e.nodeType && o.call(e, n.loadSelectorsForNode(e)) }, shouldParseNode: function (e) { return 1 === e.nodeType && o.call(e, t.parseSelectorsForNode(e)) } }; n.observer.addCallback = r.added.bind(r); var o = HTMLElement.prototype.matches || HTMLElement.prototype.matchesSelector || HTMLElement.prototype.webkitMatchesSelector || HTMLElement.prototype.mozMatchesSelector || HTMLElement.prototype.msMatchesSelector }), function (e) { function t() { window.HTMLImports.importer.bootDocument(r) } var n = e.initializeModules; e.isIE; if (!e.useNative) { n(); var r = e.rootDocument; "complete" === document.readyState || "interactive" === document.readyState && !window.attachEvent ? t() : document.addEventListener("DOMContentLoaded", t) } }(window.HTMLImports), window.CustomElements = window.CustomElements || { flags: {} }, function (e) { var t = e.flags, n = [], r = function (e) { n.push(e) }, o = function () { n.forEach(function (t) { t(e) }) }; e.addModule = r, e.initializeModules = o, e.hasNative = Boolean(document.registerElement), e.isIE = /Trident/.test(navigator.userAgent), e.useNative = !t.register && e.hasNative && !window.ShadowDOMPolyfill && (!window.HTMLImports || window.HTMLImports.useNative) }(window.CustomElements), window.CustomElements.addModule(function (e) { function t(e, t) { n(e, function (e) { return !!t(e) || void r(e, t) }), r(e, t) } function n(e, t, r) { var o = e.firstElementChild; if (!o) for (o = e.firstChild; o && o.nodeType !== Node.ELEMENT_NODE;)o = o.nextSibling; for (; o;)t(o, r) !== !0 && n(o, t, r), o = o.nextElementSibling; return null } function r(e, n) { for (var r = e.shadowRoot; r;)t(r, n), r = r.olderShadowRoot } function o(e, t) { i(e, t, []) } function i(e, t, n) { if (e = window.wrap(e), !(n.indexOf(e) >= 0)) { n.push(e); for (var r, o = e.querySelectorAll("link[rel=" + a + "]"), s = 0, c = o.length; s < c && (r = o[s]); s++)r["import"] && i(r["import"], t, n); t(e) } } var a = window.HTMLImports ? window.HTMLImports.IMPORT_LINK_TYPE : "none"; e.forDocumentTree = o, e.forSubtree = t }), window.CustomElements.addModule(function (e) { function t(e, t) { return n(e, t) || r(e, t) } function n(t, n) { return !!e.upgrade(t, n) || void (n && a(t)) } function r(e, t) { b(e, function (e) { if (n(e, t)) return !0 }) } function o(e) { S.push(e), _ || (_ = !0, setTimeout(i)) } function i() { _ = !1; for (var e, t = S, n = 0, r = t.length; n < r && (e = t[n]); n++)e(); S = [] } function a(e) { E ? o(function () { s(e) }) : s(e) } function s(e) { e.__upgraded__ && !e.__attached && (e.__attached = !0, e.attachedCallback && e.attachedCallback()) } function c(e) { l(e), b(e, function (e) { l(e) }) } function l(e) { E ? o(function () { u(e) }) : u(e) } function u(e) { e.__upgraded__ && e.__attached && (e.__attached = !1, e.detachedCallback && e.detachedCallback()) } function d(e) { for (var t = e, n = window.wrap(document); t;) { if (t == n) return !0; t = t.parentNode || t.nodeType === Node.DOCUMENT_FRAGMENT_NODE && t.host } } function p(e) { if (e.shadowRoot && !e.shadowRoot.__watched) { g.dom && console.log("watching shadow-root for: ", e.localName); for (var t = e.shadowRoot; t;)m(t), t = t.olderShadowRoot } } function h(e, n) { if (g.dom) { var r = n[0]; if (r && "childList" === r.type && r.addedNodes && r.addedNodes) { for (var o = r.addedNodes[0]; o && o !== document && !o.host;)o = o.parentNode; var i = o && (o.URL || o._URL || o.host && o.host.localName) || ""; i = i.split("/?").shift().split("/").pop() } console.group("mutations (%d) [%s]", n.length, i || "") } var a = d(e); n.forEach(function (e) { "childList" === e.type && (T(e.addedNodes, function (e) { e.localName && t(e, a) }), T(e.removedNodes, function (e) { e.localName && c(e) })) }), g.dom && console.groupEnd() } function f(e) { for (e = window.wrap(e), e || (e = window.wrap(document)); e.parentNode;)e = e.parentNode; var t = e.__observer; t && (h(e, t.takeRecords()), i()) } function m(e) { if (!e.__observer) { var t = new MutationObserver(h.bind(this, e)); t.observe(e, { childList: !0, subtree: !0 }), e.__observer = t } } function w(e) { e = window.wrap(e), g.dom && console.group("upgradeDocument: ", e.baseURI.split("/").pop()); var n = e === window.wrap(document); t(e, n), m(e), g.dom && console.groupEnd() } function v(e) { y(e, w) } var g = e.flags, b = e.forSubtree, y = e.forDocumentTree, E = window.MutationObserver._isPolyfilled && g["throttle-attached"]; e.hasPolyfillMutations = E, e.hasThrottledAttached = E; var _ = !1, S = [], T = Array.prototype.forEach.call.bind(Array.prototype.forEach), M = Element.prototype.createShadowRoot; M && (Element.prototype.createShadowRoot = function () { var e = M.call(this); return window.CustomElements.watchShadow(this), e }), e.watchShadow = p, e.upgradeDocumentTree = v, e.upgradeDocument = w, e.upgradeSubtree = r, e.upgradeAll = t, e.attached = a, e.takeRecords = f }), window.CustomElements.addModule(function (e) { function t(t, r) { if ("template" === t.localName && window.HTMLTemplateElement && HTMLTemplateElement.decorate && HTMLTemplateElement.decorate(t), !t.__upgraded__ && t.nodeType === Node.ELEMENT_NODE) { var o = t.getAttribute("is"), i = e.getRegisteredDefinition(t.localName) || e.getRegisteredDefinition(o); if (i && (o && i.tag == t.localName || !o && !i["extends"])) return n(t, i, r) } } function n(t, n, o) { return a.upgrade && console.group("upgrade:", t.localName), n.is && t.setAttribute("is", n.is), r(t, n), t.__upgraded__ = !0, i(t), o && e.attached(t), e.upgradeSubtree(t, o), a.upgrade && console.groupEnd(), t } function r(e, t) { Object.__proto__ ? e.__proto__ = t.prototype : (o(e, t.prototype, t["native"]), e.__proto__ = t.prototype) } function o(e, t, n) { for (var r = {}, o = t; o !== n && o !== HTMLElement.prototype;) { for (var i, a = Object.getOwnPropertyNames(o), s = 0; i = a[s]; s++)r[i] || (Object.defineProperty(e, i, Object.getOwnPropertyDescriptor(o, i)), r[i] = 1); o = Object.getPrototypeOf(o) } } function i(e) { e.createdCallback && e.createdCallback() } var a = e.flags; e.upgrade = t, e.upgradeWithDefinition = n, e.implementPrototype = r }), window.CustomElements.addModule(function (e) { function t(t, r) { var c = r || {}; if (!t) throw new Error("document.registerElement: first argument `name` must not be empty"); if (t.indexOf("-") < 0) throw new Error("document.registerElement: first argument ('name') must contain a dash ('-'). Argument provided was '" + String(t) + "'."); if (o(t)) throw new Error("Failed to execute 'registerElement' on 'Document': Registration failed for type '" + String(t) + "'. The type name is invalid."); if (l(t)) throw new Error("DuplicateDefinitionError: a type with name '" + String(t) + "' is already registered"); return c.prototype || (c.prototype = Object.create(HTMLElement.prototype)), c.__name = t.toLowerCase(), c["extends"] && (c["extends"] = c["extends"].toLowerCase()), c.lifecycle = c.lifecycle || {}, c.ancestry = i(c["extends"]), a(c), s(c), n(c.prototype), u(c.__name, c), c.ctor = d(c), c.ctor.prototype = c.prototype, c.prototype.constructor = c.ctor, e.ready && w(document), c.ctor } function n(e) { if (!e.setAttribute._polyfilled) { var t = e.setAttribute; e.setAttribute = function (e, n) { r.call(this, e, n, t) }; var n = e.removeAttribute; e.removeAttribute = function (e) { r.call(this, e, null, n) }, e.setAttribute._polyfilled = !0 } } function r(e, t, n) { e = e.toLowerCase(); var r = this.getAttribute(e); n.apply(this, arguments); var o = this.getAttribute(e); this.attributeChangedCallback && o !== r && this.attributeChangedCallback(e, r, o) } function o(e) { for (var t = 0; t < E.length; t++)if (e === E[t]) return !0 } function i(e) { var t = l(e); return t ? i(t["extends"]).concat([t]) : [] } function a(e) { for (var t, n = e["extends"], r = 0; t = e.ancestry[r]; r++)n = t.is && t.tag; e.tag = n || e.__name, n && (e.is = e.__name) } function s(e) { if (!Object.__proto__) { var t = HTMLElement.prototype; if (e.is) { var n = document.createElement(e.tag); t = Object.getPrototypeOf(n) } for (var r, o = e.prototype, i = !1; o;)o == t && (i = !0), r = Object.getPrototypeOf(o), r && (o.__proto__ = r), o = r; i || console.warn(e.tag + " prototype not found in prototype chain for " + e.is), e["native"] = t } } function c(e) { return g(T(e.tag), e) } function l(e) { if (e) return _[e.toLowerCase()] } function u(e, t) { _[e] = t } function d(e) { return function () { return c(e) } } function p(e, t, n) { return e === S ? h(t, n) : M(e, t) } function h(e, t) { e && (e = e.toLowerCase()), t && (t = t.toLowerCase()); var n = l(t || e); if (n) { if (e == n.tag && t == n.is) return new n.ctor; if (!t && !n.is) return new n.ctor } var r; return t ? (r = h(e), r.setAttribute("is", t), r) : (r = T(e), e.indexOf("-") >= 0 && b(r, HTMLElement), r) } function f(e, t) { var n = e[t]; e[t] = function () { var e = n.apply(this, arguments); return v(e), e } } var m, w = (e.isIE, e.upgradeDocumentTree), v = e.upgradeAll, g = e.upgradeWithDefinition, b = e.implementPrototype, y = e.useNative, E = ["annotation-xml", "color-profile", "font-face", "font-face-src", "font-face-uri", "font-face-format", "font-face-name", "missing-glyph"], _ = {}, S = "http://www.w3.org/1999/xhtml", T = document.createElement.bind(document), M = document.createElementNS.bind(document); m = Object.__proto__ || y ? function (e, t) { return e instanceof t } : function (e, t) { if (e instanceof t) return !0; for (var n = e; n;) { if (n === t.prototype) return !0; n = n.__proto__ } return !1 }, f(Node.prototype, "cloneNode"), f(document, "importNode"), document.registerElement = t, document.createElement = h, document.createElementNS = p, e.registry = _, e["instanceof"] = m, e.reservedTagList = E, e.getRegisteredDefinition = l, document.register = document.registerElement }), function (e) { function t() { i(window.wrap(document)), window.CustomElements.ready = !0; var e = window.requestAnimationFrame || function (e) { setTimeout(e, 16) }; e(function () { setTimeout(function () { window.CustomElements.readyTime = Date.now(), window.HTMLImports && (window.CustomElements.elapsed = window.CustomElements.readyTime - window.HTMLImports.readyTime), document.dispatchEvent(new CustomEvent("WebComponentsReady", { bubbles: !0 })) }) }) } var n = e.useNative, r = e.initializeModules; e.isIE; if (n) { var o = function () { }; e.watchShadow = o, e.upgrade = o, e.upgradeAll = o, e.upgradeDocumentTree = o, e.upgradeSubtree = o, e.takeRecords = o, e["instanceof"] = function (e, t) { return e instanceof t } } else r(); var i = e.upgradeDocumentTree, a = e.upgradeDocument; if (window.wrap || (window.ShadowDOMPolyfill ? (window.wrap = window.ShadowDOMPolyfill.wrapIfNeeded, window.unwrap = window.ShadowDOMPolyfill.unwrapIfNeeded) : window.wrap = window.unwrap = function (e) { return e }), window.HTMLImports && (window.HTMLImports.__importsParsingHook = function (e) { e["import"] && a(wrap(e["import"])) }), "complete" === document.readyState || e.flags.eager) t(); else if ("interactive" !== document.readyState || window.attachEvent || window.HTMLImports && !window.HTMLImports.ready) { var s = window.HTMLImports && !window.HTMLImports.ready ? "HTMLImportsLoaded" : "DOMContentLoaded"; window.addEventListener(s, t) } else t() }(window.CustomElements), function (e) { Function.prototype.bind || (Function.prototype.bind = function (e) { var t = this, n = Array.prototype.slice.call(arguments, 1); return function () { var r = n.slice(); return r.push.apply(r, arguments), t.apply(e, r) } }) }(window.WebComponents), function (e) { var t = document.createElement("style"); t.textContent = "body {transition: opacity ease-in 0.2s; } \nbody[unresolved] {opacity: 0; display: block; overflow: hidden; position: relative; } \n"; var n = document.querySelector("head"); n.insertBefore(t, n.firstChild) }(window.WebComponents), function (e) { window.Platform = e }(window.WebComponents); \ No newline at end of file diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_viewer_full.html b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_viewer_full.html deleted file mode 100644 index 4aac8735da109e1fc17c532e767fad21f85527f0..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/static/trace_viewer_full.html +++ /dev/null @@ -1,10175 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py b/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py deleted file mode 100644 index 5991cf2b33d1e818e6876c8d7550fbb6c87cdaa3..0000000000000000000000000000000000000000 --- a/plugins/tensorboard-plugins/tb_plugin/torch_tb_profiler/utils.py +++ /dev/null @@ -1,147 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# -# Copyright(c) 2023 Huawei Technologies. -# All rights reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Modifications: Add visualization of PyTorch Ascend profiling. -# -------------------------------------------------------------------------- -import logging -import math -import os -import time -from contextlib import contextmanager - -from . import consts - -predefined_logging_level = ('CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET') - - -def get_logging_level(): - log_level = os.environ.get('TORCH_PROFILER_LOG_LEVEL', 'INFO').upper() - if log_level not in predefined_logging_level: - log_level = logging.getLevelName(logging.INFO) - return log_level - - -logger = None - - -def get_logger(): - global logger - if logger is None: - logger = logging.getLogger(consts.PLUGIN_NAME) - logger.setLevel(get_logging_level()) - return logger - - -def is_gpu_chrome_trace_file(path): - return consts.WORKER_PATTERN.match(path) - - -def is_worker_span_dir(path): - return consts.WORKER_SPAN_PATTERN.match(path) - - -def is_npu_trace_path(path): - return consts.TRACE_PATTERN.match(path) - - -def href(text, url): - """"return html formatted hyperlink string - - Note: - target="_blank" causes this link to be opened in new tab if clicked. - """ - return f'{text}' - - -class Canonicalizer: - def __init__( - self, - time_metric='us', - memory_metric='B', - *, - input_time_metric='us', - input_memory_metric='B'): - # raw timestamp is in microsecond - time_metric_to_factor = { - 'us': 1, - 'ms': 1e3, - 's': 1e6, - } - # raw memory is in bytes - memory_metric_to_factor = { - 'B': math.pow(1024, 0), - 'KB': math.pow(1024, 1), - 'MB': math.pow(1024, 2), - 'GB': math.pow(1024, 3), - } - - # canonicalize the memory metric to a string - self.canonical_time_metrics = { - 'micro': 'us', 'microsecond': 'us', 'us': 'us', - 'milli': 'ms', 'millisecond': 'ms', 'ms': 'ms', - '': 's', 'second': 's', 's': 's', - } - # canonicalize the memory metric to a string - self.canonical_memory_metrics = { - '': 'B', 'B': 'B', - 'K': 'KB', 'KB': 'KB', - 'M': 'MB', 'MB': 'MB', - 'G': 'GB', 'GB': 'GB', - } - - self.time_metric = self.canonical_time_metrics.get(time_metric) - self.memory_metric = self.canonical_memory_metrics.get(memory_metric) - - # scale factor scale input to output - self.time_factor = time_metric_to_factor.get(self.canonical_time_metrics.get(input_time_metric)) /\ - time_metric_to_factor.get(self.time_metric) - self.memory_factor = memory_metric_to_factor.get(self.canonical_memory_metrics.get(input_memory_metric)) /\ - memory_metric_to_factor.get(self.memory_metric) - - def convert_time(self, t): - return self.time_factor * t - - def convert_memory(self, m): - return self.memory_factor * m - - -class DisplayRounder: - """Round a value for display purpose.""" - - def __init__(self, ndigits): - self.ndigits = ndigits - self.precision = math.pow(10, -ndigits) - - def __call__(self, v: float): - _v = abs(v) - if _v >= self.precision or v == 0: - return round(v, 3) - else: - ndigit = abs(math.floor(math.log10(_v))) - return round(v, ndigit) - - -@contextmanager -def timing(description: str, force: bool = False) -> None: - if force or os.environ.get('TORCH_PROFILER_BENCHMARK', '0') == '1': - start = time.time() - yield - elapsed_time = time.time() - start - logger.info(f'{description}: {elapsed_time}') - else: - yield