diff --git a/.jenkins/check/config/filter_pylint.txt b/.jenkins/check/config/filter_pylint.txt index 0a8c0b32df863318edbf30d92ffa131fd8cac3e6..da303dfe56c41c16fb422702bfa21feabbf88a72 100644 --- a/.jenkins/check/config/filter_pylint.txt +++ b/.jenkins/check/config/filter_pylint.txt @@ -253,6 +253,7 @@ "mindspore/tests/st/dump/dump_test_utils.py" "too-many-nested-blocks" "mindspore/tests/ut/python/parallel/test_graph_utils.py" "too-many-function-args" "mindspore/tests/st/ops/ascend/test_aclnn_ops/test_all_finite.py" "singleton-comparison" +"mindspore/tests/st/pynative/pyboost/test_pyboost_ops_abs.py" "redefined-builtin" #MindSpore Lite "mindspore/mindspore/ccsrc/plugin/device/cpu/kernel/nnacl/experimental/HPC-generator/generator.py" "redefined-builtin" @@ -276,4 +277,7 @@ "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "bad-whitespace" "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "bad-whitespace" "mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_dsl/sample/" "bad-continuation" -"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" "bad-continuation" +"mindspore/mindspore/lite/tools/kernel_builder/ascend/tbe_tik/sample/" + +#PIJit +"mindspore/tests/st/pi_jit" diff --git a/.jenkins/task/config/cann_version.txt b/.jenkins/task/config/cann_version.txt index 1c86cd975ee6b979e3968a08120eb17f0656a172..b459a083c2d12bc5b6007d0d1a09462c62e9ad07 100644 --- a/.jenkins/task/config/cann_version.txt +++ b/.jenkins/task/config/cann_version.txt @@ -1 +1 @@ -20240408 +20240414 \ No newline at end of file diff --git a/RELEASE.md b/RELEASE.md index ccdac2a9def58b8f5e749337a9c017c550b1209c..8646a9f9c3913ad6c68f840a4d2a1cf3202e359f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -77,13 +77,13 @@ - [BETA] mindspore.ops.TopK now supports the second input k as an int32 type tensor. -#### Bug fixes +### Bug Fixes - [#I92H93] Fixed the issue of 'Launch kernel failed' when using the Print operator to print string objects on the Ascend platform. - [#I8S6LY] Fixed RuntimeError: Attribute dyn_input_sizes of Default/AddN-op1 is [const vector]{}, of which size is less than 0 error of variable-length input operator, such as AddN or Concat, for dynamic shape process in graph mode on the Ascend platform. - [#I9ADZS] Fixed the data timeout issue in network training due to inefficient dataset recovery in the fault recovery scenario. -#### Contributors +### Contributors Thanks goes to these wonderful people: diff --git a/RELEASE_CN.md b/RELEASE_CN.md index 9de5de7c41f5d5f8b7a72dacc2362f79454aff83..3e2e64e3ecc9da734cfac2c765ff56a78719a5f8 100644 --- a/RELEASE_CN.md +++ b/RELEASE_CN.md @@ -68,7 +68,7 @@ - [BETA] 支持用户设置CANN的options配置项,配置项分为global和session二类,用户可以通过mindspore.set_context(ascend_config={"ge_options": {"global": {"global_option": "option_value"}, "session": {"session_option": "option_value"}}})进行配置。 -#### API Change +#### API变更 - 新增 mindspore.hal接口,开放流、事件以及设备管理能力。 - 新增 mindspore.multiprocessing 接口,提供了创建多进程的能力。 @@ -77,7 +77,7 @@ - [BETA] mindspore.ops.TopK当前支持第二个输入k为Int32类型的张量。 -#### Bug fixes +### 问题修复 - [#I92H93] 修复了昇腾平台下使用Print算子打印字符串对象时,Print算子报错Launch kernel failed的问题。 - [#I8S6LY] 修复了昇腾平台图模式动态shape流程下,变长输入算子(如 AddN、Concat)报错RuntimeError: Attribute dyn_input_sizes of Default/AddN-op1 is [const vector]{}, of which size is less than 0的问题。 diff --git a/cmake/external_libs/openssl.cmake b/cmake/external_libs/openssl.cmake index 5aea30896857ac10ba81e46459dcd47e26b84760..e96d5b1c7317a0f12ea57bde27d07dae488e7af1 100644 --- a/cmake/external_libs/openssl.cmake +++ b/cmake/external_libs/openssl.cmake @@ -46,6 +46,7 @@ if(BUILD_LITE) PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-4807.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-5678.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-0727.patch + PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-2511.patch ) elseif(PLATFORM_ARM32 AND ANDROID_NDK_TOOLCHAIN_INCLUDED) set(openssl_USE_STATIC_LIBS OFF) @@ -80,6 +81,7 @@ if(BUILD_LITE) PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-4807.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-5678.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-0727.patch + PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-2511.patch ) elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR APPLE) set(openssl_CFLAGS -fvisibility=hidden) @@ -109,6 +111,7 @@ if(BUILD_LITE) PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-4807.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-5678.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-0727.patch + PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-2511.patch ) else() MESSAGE(FATAL_ERROR "openssl does not support compilation for the current environment.") @@ -145,6 +148,7 @@ else() PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-4807.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2023-5678.patch PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-0727.patch + PATCHES ${OPENSSL_PATCH_ROOT}/CVE-2024-2511.patch ) include_directories(${openssl_INC}) add_library(mindspore::ssl ALIAS openssl::ssl) diff --git a/docs/api/api_python/ops/mindspore.ops.MatrixSetDiagV3.rst b/docs/api/api_python/ops/mindspore.ops.MatrixSetDiagV3.rst index 98281aec1d110793d8a8a0d4edb4a6d367487127..0ad15b822338bfd0c27eb31822f10c333ecc35d2 100644 --- a/docs/api/api_python/ops/mindspore.ops.MatrixSetDiagV3.rst +++ b/docs/api/api_python/ops/mindspore.ops.MatrixSetDiagV3.rst @@ -46,7 +46,7 @@ mindspore.ops.MatrixSetDiagV3 - **TypeError** - `k` 的数据类型不为int32。 - **ValueError** - `align` 取值不在合法值集合内。 - **ValueError** - `k` 的维度不为0或1。 - - **ValueError** - `x` 的维度不大于等于2。 + - **ValueError** - `x` 的维度小于2。 - **ValueError** - `k` 的大小不为1或2。 - **ValueError** - 当 `k` 的大小为2时, `k[1]` 小于 `k[0]` 。 - **ValueError** - 对角线 `diagonal` 的维度与输入 `x` 的维度不匹配。 diff --git a/docs/api/api_python/ops/mindspore.ops.extend.func_max.rst b/docs/api/api_python/ops/mindspore.ops.extend.func_max.rst index 479033f344297527c9c55a0694012531f96480d6..332bf3ee3e4da35cb77fd3848df084623c1a0081 100644 --- a/docs/api/api_python/ops/mindspore.ops.extend.func_max.rst +++ b/docs/api/api_python/ops/mindspore.ops.extend.func_max.rst @@ -7,7 +7,7 @@ mindspore.ops.extend.max 参数: - **input** (Tensor) - 输入任意维度的Tensor。不支持复数类型。 - - **dim** (int, 可选) - 指定计算维度。若要为 `dim` 参数赋值,请赋值int类型,不支持直接传入 ``None`` 。默认值: ``None`` 。 + - **dim** (int, 可选) - 指定计算维度。默认值: ``None`` 。 - **keepdim** (bool, 可选) - 表示是否减少维度,如果为 ``True`` ,输出将与输入保持相同的维度;如果为 ``False`` ,输出将减少维度。默认值: ``False`` 。 返回: diff --git a/docs/api/api_python/ops/mindspore.ops.extend.func_min.rst b/docs/api/api_python/ops/mindspore.ops.extend.func_min.rst index 3b6088bc35d241f3a1ce498a9ee57a3f0f6bffaf..208f6e0e9dcf9e036ed2fe59ca63e2c43c8ca98f 100644 --- a/docs/api/api_python/ops/mindspore.ops.extend.func_min.rst +++ b/docs/api/api_python/ops/mindspore.ops.extend.func_min.rst @@ -7,7 +7,7 @@ mindspore.ops.extend.min 参数: - **input** (Tensor) - 输入任意维度的Tensor。不支持复数类型。 - - **dim** (int, 可选) - 指定计算维度。若要为 `dim` 参数赋值,请赋值int类型,不支持直接传入 ``None`` 。默认值: ``None`` 。 + - **dim** (int, 可选) - 指定计算维度。默认值: ``None`` 。 - **keepdim** (bool, 可选) - 表示是否减少维度,如果为 ``True`` ,输出将与输入保持相同的维度;如果为 ``False`` ,输出将减少维度。默认值: ``False`` 。 返回: diff --git a/docs/api/api_python/ops/mindspore.ops.func_cholesky.rst b/docs/api/api_python/ops/mindspore.ops.func_cholesky.rst index 71265928731bf38e00b34a597491d31b45fb748d..d5616849104c99aa6d85580d4c9a8143dc8e2c08 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_cholesky.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_cholesky.rst @@ -15,6 +15,8 @@ mindspore.ops.cholesky .. math:: A = LL^T + 其中 `A` 是对称正定矩阵。 + 参数: - **input_x** (Tensor) - shape大小为 :math:`(*, N, N)` ,其中 :math:`*` 是零个或多个由对称正定矩阵组成的批处理维,数据类型为float32或float64。 - **upper** (bool) - 是否返回上三角矩阵还是下三角矩阵的标志。默认值:``False`` 。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_concat.rst b/docs/api/api_python/ops/mindspore.ops.func_concat.rst index 2febd8e6aea9108d7b2f0a9d54d7b96bf4fbcd72..c8d729f8f0d80789a4ad94e83e3c87ffbc490434 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_concat.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_concat.rst @@ -3,8 +3,6 @@ .. py:function:: mindspore.ops.concat(tensors, axis=0) - 在指定轴上拼接输入Tensor。 - :func:`mindspore.ops.cat()` 的别名。 教程样例: diff --git a/docs/api/api_python/ops/mindspore.ops.func_erf.rst b/docs/api/api_python/ops/mindspore.ops.func_erf.rst index d8e44fbc685aa47b1efd00eb45f7ed2fcfcfbad8..d2917c2ba1789abb9a3e7ea40c1dcb6f20a2c1b2 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_erf.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_erf.rst @@ -12,12 +12,16 @@ mindspore.ops.erf 参数: - **input** (Tensor) - 高斯误差函数的输入Tensor。上述公式中的 :math:`x` 。支持数据类型: - - Ascend: float16、float32。 + - Ascend: float16、float32、int64、bool。 - GPU/CPU: float16、float32、float64。 返回: - Tensor,具有与 `input` 相同的数据类型和shape。 + Tensor。当输入为 int64、bool 时,返回值类型为float32。 + 否则,返回值类型与输入类型相同。 异常: - **TypeError** - `input` 不是Tensor。 - - **TypeError** - `input` 的数据类型既不是float16、float32也不是float64。 + - **TypeError** - `input` 的数据类型不是如下类型: + + - Ascend: float16、float32、int64、bool。 + - GPU/CPU: float16、float32、float64。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_irfft.rst b/docs/api/api_python/ops/mindspore.ops.func_irfft.rst index 3a09a0435b41c27e3c979fa81ef5e4ce46751a26..3ebefa1dc092cdab0231d1d0c0dd777ca153ea30 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_irfft.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_irfft.rst @@ -28,7 +28,7 @@ mindspore.ops.irfft 异常: - **TypeError** - 如果 `input` 不是Tensor。 - - **TypeError** - 如果 `input` 数据类型不是int16,int32,int64,float32,float64。 + - **TypeError** - 如果 `input` 数据类型不是int16、int32、int64、float32、float64、complex64、complex128。 - **TypeError** - 如果 `n` 或 `dim` 不是int类型。 - **ValueError** - 如果 `dim` 中的值超出: :math:`[-input.ndim, -input.ndim)` 范围。 - **ValueError** - 如果 `n` 小于1。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_logdet.rst b/docs/api/api_python/ops/mindspore.ops.func_logdet.rst index 0a997fb67bc8f60b3df7bc2c77c6f27e68c52059..95db4df25e96eed79f0af39e9f0c7a4a8473d872 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_logdet.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_logdet.rst @@ -12,4 +12,4 @@ Tensor,`input` 的对数行列式。如果行列式小于0,则返回nan。如果行列式等于0,则返回-inf。 异常: - - **TypeError** - 如果 `input` 的dtype不是float32、float64、Complex64或Complex128。 + - **TypeError** - 如果 `input` 的dtype不是float32、float64、complex64或complex128。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_lu_solve.rst b/docs/api/api_python/ops/mindspore.ops.func_lu_solve.rst index 68d297f5aec9a3bbbea22079514fd44cfdb8c22c..18fb0cdefdb079350524b4cb9c5d5e17dc988192 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_lu_solve.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_lu_solve.rst @@ -20,8 +20,8 @@ mindspore.ops.lu_solve Tensor,与 `b` 和 `LU_data` 的数据类型相同。 异常: - - **TypeError** - `b` 或 `LU_data` 的 dtype 不属于以下类型: mstype.float16、mstype.float32。 - - **TypeError** - `LU_pivots` 的 dtype 不属于以下类型: mstype.int32。 + - **TypeError** - `b` 或 `LU_data` 的 dtype 不属于以下类型: float16、float32。 + - **TypeError** - `LU_pivots` 的 dtype 不属于以下类型: int32。 - **TypeError** - `b` , `LU_data` 或 `LU_pivots` 不为Tensor。 - **TypeError** - `b` 的 dtype 与 `LU_data` 的 dtype 不相同。 - **ValueError** - `LU_pivots` 的 batch 维度与 `LU_data` 的 batch 维度不相等。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_matrix_diag_part.rst b/docs/api/api_python/ops/mindspore.ops.func_matrix_diag_part.rst index 27e3e6c641bd6bac3fb8170fca091528a690e3e7..411d6e54c2ff2ab463150379cf58a94146b640aa 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_matrix_diag_part.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_matrix_diag_part.rst @@ -25,7 +25,7 @@ mindspore.ops.matrix_diag_part - **ValueError** - `align` 取值不在合法值集合内。 - **ValueError** - `k` 的维度不为0或1。 - **ValueError** - `padding_value` 的维度不为0。 - - **ValueError** - `x` 的维度不大于等于2。 + - **ValueError** - `x` 的维度小于2。 - **ValueError** - `k` 的大小不为1或2。 - **ValueError** - 当 `k` 的大小为2时,k[1]小于k[0]。 - **ValueError** - `k` 的取值不在 (-x.shape[-2], x.shape[-1]) 范围内。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_matrix_set_diag.rst b/docs/api/api_python/ops/mindspore.ops.func_matrix_set_diag.rst index 512e488dffff3e9b4fab3a2517f69dfc70f8d463..0b47355e6dc191248dc048fd6c0175d78149de23 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_matrix_set_diag.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_matrix_set_diag.rst @@ -27,7 +27,7 @@ mindspore.ops.matrix_set_diag - **TypeError** - `k` 的数据类型不为int32。 - **ValueError** - `align` 取值不在合法值集合内。 - **ValueError** - `k` 的维度不为0或1。 - - **ValueError** - `x` 的维度不大于等于2。 + - **ValueError** - `x` 的维度小于2。 - **ValueError** - `k` 的大小不为1或2。 - **ValueError** - 当 `k` 的大小为2时,k[1]小于k[0]。 - **ValueError** - 对角线 `diagonal` 的维度与输入 `x` 的维度不匹配。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_multinomial_with_replacement.rst b/docs/api/api_python/ops/mindspore.ops.func_multinomial_with_replacement.rst index c5f9d2541337a1c41f2202a08c306de65cde7936..f2fceb981fab6cb9f48f6def39b2cc99e80214c0 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_multinomial_with_replacement.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_multinomial_with_replacement.rst @@ -9,7 +9,7 @@ mindspore.ops.multinomial_with_replacement 输入的行不需要求和为1(在这种情况下,使用值作为权重),但必须是非负的、有限的,并且具有非零和。 参数: - - **x** (Tensor) - 包含概率的累积和的输入Tensor,必须为一维或二维。 + - **x** (Tensor) - 包含概率的累积和的输入Tensor,必须为一维或二维。数据类型必须是以下之一:float16、float32、float64。 - **seed** (int) - 如果将随机种子设置为-1,并将 `offset` 设置为0,则随机数生成器将使用随机种子进行种植。否则,将使用给定的随机数种子。支持的dtype:int64。 - **offset** (int) - 为避免种子冲突设置的偏移量。支持的dtype:int64。 - **numsamples** (int) - 抽取样本量,必须大于零。 @@ -21,7 +21,7 @@ mindspore.ops.multinomial_with_replacement 异常: - **TypeError** - 如果 `x` 不是1D或2DTensor。 - **TypeError** - 如果 `x` 数据类型不是float16、float32或float64。 - - **TypeError** - 如果 `num_sample` 不是int类型。 + - **TypeError** - 如果 `numsamples` 不是int类型。 - **TypeError** - 如果 `replacement` bool类型。 - **ValueError** - 如果 `replacement` 为False的时候, `numsamples` 的值不大于x_shape[-1]。 - **ValueError** - 如果 `x` 某一行元素的和小于零。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_outer.rst b/docs/api/api_python/ops/mindspore.ops.func_outer.rst index da8103ffb41f24b53cef0dcd0ab4ba8a4d022d1e..4fd9cae5d44c85ca0ece65c8393e6a4837443598 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_outer.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_outer.rst @@ -13,7 +13,7 @@ mindspore.ops.outer - **vec2** (Tensor) - 输入一维向量。 返回: - out (Tensor, optional),两个一维向量的外积,是一个二维矩阵。 + out (Tensor, 可选),两个一维向量的外积,是一个二维矩阵。 异常: - **TypeError** - 如果 `input` 或 `vec2` 不是Tensor。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_select.rst b/docs/api/api_python/ops/mindspore.ops.func_select.rst index 1eb0ab28841f5178c83aa36232a873fafead851d..d7f2149c28a18ac9ba60c42f52db972b794a5dfe 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_select.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_select.rst @@ -1,31 +1,31 @@ mindspore.ops.select ==================== -.. py:function:: mindspore.ops.select(cond, x, y) +.. py:function:: mindspore.ops.select(condition, input, other) - 根据条件判断Tensor中的元素的值,来决定输出中的相应元素是从 `x` (如果元素值为True)还是从 `y` (如果元素值为False)中选择。 + 根据条件判断Tensor中的元素的值,来决定输出中的相应元素是从 `input` (如果元素值为True)还是从 `other` (如果元素值为False)中选择。 该算法可以被定义为: .. math:: out_i = \begin{cases} - x_i, & \text{if } cond_i \\ - y_i, & \text{otherwise} + input_i, & \text{if } condition_i \\ + other_i, & \text{otherwise} \end{cases} 参数: - - **cond** (Tensor[bool]) - 条件Tensor,决定选择哪一个元素,shape是 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 - - **x** (Union[Tensor, int, float]) - 第一个被选择的Tensor或者数字。 - 如果x是一个Tensor,那么shape是或者可以被广播为 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 - 如果x是int或者float,那么将会被转化为int32或者float32类型,并且被广播为与y相同的shape。x和y中至少要有一个Tensor。 - - **y** (Union[Tensor, int, float]) - 第二个被选择的Tensor或者数字。 - 如果y是一个Tensor,那么shape是或者可以被广播为 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 - 如果y是int或者float,那么将会被转化为int32或者float32类型,并且被广播为与x相同的shape。x和y中至少要有一个Tensor。 + - **condition** (Tensor[bool]) - 条件Tensor,决定选择哪一个元素,shape是 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 + - **input** (Union[Tensor, int, float]) - 第一个被选择的Tensor或者数字。 + 如果input是一个Tensor,那么shape是或者可以被广播为 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 + 如果input是int或者float,那么将会被转化为int32或者float32类型,并且被广播为与y相同的shape。x和y中至少要有一个Tensor。 + - **other** (Union[Tensor, int, float]) - 第二个被选择的Tensor或者数字。 + 如果other是一个Tensor,那么shape是或者可以被广播为 :math:`(x_1, x_2, ..., x_N, ..., x_R)`。 + 如果other是int或者float,那么将会被转化为int32或者float32类型,并且被广播为与x相同的shape。x和y中至少要有一个Tensor。 返回: - Tensor,与 `cond` 的shape相同。 + Tensor,与 `condition` 的shape相同。 异常: - - **TypeError** - `x` 和 `y` 不是Tensor、int或者float。 + - **TypeError** - `input` 和 `other` 不是Tensor、int或者float。 - **ValueError** - 输入的shape不能被广播。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_space_to_batch_nd.rst b/docs/api/api_python/ops/mindspore.ops.func_space_to_batch_nd.rst index 95a092f4b86701a2cec4aff2f9c83a29bca25882..1837fc23bcf8cd5dd9127dfe452f901610c9e0bb 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_space_to_batch_nd.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_space_to_batch_nd.rst @@ -20,7 +20,7 @@ mindspore.ops.space_to_batch_nd 参数: - **input_x** (Tensor) - 输入张量,Ascend平台必须为四维。 - **block_size** (Union[list(int), tuple(int), int]) - 块形状描述空间维度为分割的个数。如果 `block_size` 为list或者tuple,其长度 `M` 为空间维度的长度。如果 `block_size` 为整数,那么所有空间维度分割的个数均为 `block_size` 。在Ascend平台 `M` 必须为2。 - - **paddings** (Union[tuple, list]) - 空间维度的填充大小。 + - **paddings** (Union[tuple, list]) - 空间维度的填充大小。包含 M 个subtraction列表。每个列表包含 2 个整数值。所有值都必须大于 0。`paddings[i]` 指定空间维度 i 的填充、与输入维度 i + 偏移量相对应。要求 input_shape[i+offset]+paddings[i][0]+paddings[i][1] 能被 block_size[i] 整除。在Ascend, M 必须为 2。 返回: Tensor,经过划分排列之后的结果。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_squeeze.rst b/docs/api/api_python/ops/mindspore.ops.func_squeeze.rst index 19497c1d4c763a7d76557fbfa77c169840e1f01e..73da448d188801c168ec571330f7310ebe26a5ff 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_squeeze.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_squeeze.rst @@ -11,6 +11,7 @@ mindspore.ops.squeeze 如果输入的shape为(A, 1, B), :math:`axis=0` 时不会改变输入的Tensor,但 :math:`axis=1` 时会使输入Tensor的shape变为(A, B)。 .. note:: + - squeeze不为1的维度会报错。 - 请注意,在动态图模式下,输出Tensor将与输入Tensor共享数据,并且没有Tensor数据复制过程。 - 维度索引从0开始,并且必须在 `[-input.ndim, input.ndim)` 范围内。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_tensor_scatter_mul.rst b/docs/api/api_python/ops/mindspore.ops.func_tensor_scatter_mul.rst index 99d7e107410a68ba24fbc6a5e2a21bb031ff9bf8..41b008b4e1603d9f0cb030ab47c574dd59dbac33 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_tensor_scatter_mul.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_tensor_scatter_mul.rst @@ -7,12 +7,12 @@ mindspore.ops.tensor_scatter_mul `indices` 的最后一个轴是每个索引向量的深度。对于每个索引向量, `updates` 中必须有相应的值。 `updates` 的shape应该等于 `input_x[indices]` 的shape。有关更多详细信息,请参见样例。 - .. note:: - - 如果 `indices` 的某些值超出 `input_x` 的维度范围,则相应的 `updates` 不会更新为 `input_x` ,而不是抛出索引错误。 - .. math:: output\left [indices \right ] = input\_x\times update + .. note:: + - 如果 `indices` 的某些值超出 `input_x` 的维度范围,则相应的 `updates` 不会更新为 `input_x` ,而不是抛出索引错误。 + 参数: - **input_x** (Tensor) - 输入Tensor。 `input_x` 的维度必须不小于 `indices.shape[-1]` 。 - **indices** (Tensor) - `input_x` 执行scatter操作的目标索引,数据类型为int32或int64,rank必须大于等于2。 diff --git a/docs/api/api_python/ops/mindspore.ops.func_where.rst b/docs/api/api_python/ops/mindspore.ops.func_where.rst index 4f4c6b9f99d421f3e6b9aac343893f3f2654e3f6..e72c94acd126610ed3da54a15e1a50e8491da159 100644 --- a/docs/api/api_python/ops/mindspore.ops.func_where.rst +++ b/docs/api/api_python/ops/mindspore.ops.func_where.rst @@ -1,23 +1,23 @@ mindspore.ops.where ==================== -.. py:function:: mindspore.ops.where(condition, x, y) +.. py:function:: mindspore.ops.where(condition, input, other) - 返回一个Tensor,Tensor的元素从 `x` 或 `y` 中根据 `condition` 选择。 + 返回一个Tensor,Tensor的元素从 `input` 或 `other` 中根据 `condition` 选择。 .. math:: - output_i = \begin{cases} x_i,\quad &if\ condition_i \\ y_i,\quad &otherwise \end{cases} + output_i = \begin{cases} input_i,\quad &if\ condition_i \\ other_i,\quad &otherwise \end{cases} 参数: - - **condition** (Tensor[bool]) - 如果是 ``True`` ,选取 `x` 中的元素,否则选取 `y` 中的元素。 - - **x** (Union[Tensor, Scalar]) - 在 `condition` 为 ``True`` 的索引处选择的值。 - - **y** (Union[Tensor, Scalar]) - 当 `condition` 为 ``False`` 的索引处选择的值。 + - **condition** (Tensor[bool]) - 如果是 ``True`` ,选取 `input` 中的元素,否则选取 `other` 中的元素。 + - **input** (Union[Tensor, Scalar]) - 在 `condition` 为 ``True`` 的索引处选择的值。 + - **other** (Union[Tensor, Scalar]) - 当 `condition` 为 ``False`` 的索引处选择的值。 返回: - Tensor,其中的元素从 `x` 和 `y` 中选取。 + Tensor,其中的元素从 `input` 和 `other` 中选取。 异常: - **TypeError** - 如果 `condition` 不是Tensor。 - - **TypeError** - 如果 `x` 和 `y` 都是常量。 - - **ValueError** - `condition` 、 `x` 和 `y` 不能互相广播。 + - **TypeError** - 如果 `input` 和 `other` 都是常量。 + - **ValueError** - `condition` 、 `input` 和 `other` 不能互相广播。 diff --git a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Tensor.rst b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Tensor.rst index 53011d7e3a3693c10a934b37b66b1c0de8439a0a..9c6690f908cba350b647f2cd320a325005ee5195 100644 --- a/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Tensor.rst +++ b/docs/api/lite_api_python/mindspore_lite/mindspore_lite.Tensor.rst @@ -9,7 +9,7 @@ mindspore_lite.Tensor - **tensor** (Tensor,可选) - 被存储在新Tensor中的数据,数据可以是来自其它Tensor。默认值: ``None`` 。 - **shape** (list,可选) - Tensor的shape信息。默认值: ``None`` 。 - **dtype** (DataType,可选) - Tensor的dtype信息。默认值: ``None`` 。 - - **device** (str,可选) - Tensor的device信息。默认值: ``None`` 。 + - **device** (str,可选) - Tensor的device信息。取值可以是 ``"ascend"`` 或者 ``"ascend:device_id"`` 或者 ``None`` ,其中 ``device_id`` 指的是卡号,可以是 ``0`` , ``1`` , ``2`` , ``3`` , ``4`` , ``5`` , ``6`` , ``7``。如果 ``device`` 的取值为 ``None``,则表示在CPU上初始化Tensor。默认值: ``None`` 。 异常: - **TypeError** - `tensor` 既不是Tensor类型也不是 ``None`` 。 diff --git a/graphengine b/graphengine index 004af6a1ddd7a507cb638501e9468ac124da8811..10fe59d72edd3d8f6b9fe2dc3cf84a274af08d65 160000 --- a/graphengine +++ b/graphengine @@ -1 +1 @@ -Subproject commit 004af6a1ddd7a507cb638501e9468ac124da8811 +Subproject commit 10fe59d72edd3d8f6b9fe2dc3cf84a274af08d65 diff --git a/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc b/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc index 38d9aefc29e990607cbd1cb99c405bb184171ab0..dde84d290f1616f972446b50e3d114e8aef04a95 100644 --- a/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc +++ b/mindspore/ccsrc/backend/common/expander/fallback/math_ops.cc @@ -19,6 +19,7 @@ #include "utils/shape_utils.h" #include "ops/ops_func_impl/matmul_ext.h" #include "ops/op_utils.h" +#include "ops/op_enum.h" namespace mindspore { namespace expander { @@ -288,5 +289,29 @@ NodePtr BuilderForMaxorMin(FallbackIRBuilder *ib, const std::string &emit_op) { REG_FALLBACK_BUILDER("Max").SetBody(BODYFUNC(ib) { return {BuilderForMaxorMin(ib, "ReduceMax")}; }); REG_FALLBACK_BUILDER("Min").SetBody(BODYFUNC(ib) { return {BuilderForMaxorMin(ib, "ReduceMin")}; }); + +REG_FALLBACK_BUILDER("DivMod").SetBody(BODYFUNC(ib) { + auto input_x = ib->GetInput(kIndex0); + auto input_y = ib->GetInput(kIndex1); + auto rounding_mode = ib->GetInput(kIndex2); + + auto mode_type = rounding_mode->abstract()->BuildType(); + MS_EXCEPTION_IF_NULL(mode_type); + if (mode_type->isa()) { + return {ib->Div(input_x, input_y)}; + } + + auto mode_value_ptr = rounding_mode->BuildValue(); + auto mode_opt = mindspore::ops::GetScalarValue(mode_value_ptr); + + if (mode_opt.value() == ops::RoundingMode::FLOOR) { + return {ib->Emit("FloorDiv", {input_x, input_y})}; + } else if (mode_opt.value() == ops::RoundingMode::TRUNC) { + auto div_out = ib->Cast(ib->Div(input_x, input_y), ib->GetDtype(input_x)->type_id()); + return {ib->Emit("Trunc", {div_out})}; + } else { + MS_LOG(EXCEPTION) << "DivMod abstract failed."; + } +}); } // namespace expander } // namespace mindspore diff --git a/mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc index 2e98851db71a9edd28be714a43f39f733ce35321..d4b327f55092fe5446aef45262983e5319b66dac 100644 --- a/mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/backend/common/mem_reuse/mem_dynamic_allocator.cc @@ -15,7 +15,9 @@ */ #include "include/backend/mem_reuse/mem_dynamic_allocator.h" + #include "include/backend/mem_reuse/mem_tracker.h" + #include #include #include @@ -104,7 +106,7 @@ DeviceMemPtr DynamicMemPoolBestFit::AllocTensorMem(size_t size, bool from_persis } if (device_addr == nullptr) { - MS_LOG(WARNING) << "Malloc failed and try to wait events to release more memory."; + MS_LOG(WARNING) << "Alloc tensor mem failed and try to wait events to release more memory."; // Since address may be duplicate, use set. std::set carry_event_addresses; for (const auto &stream_pair_address : stream_pair_addresses_) { @@ -572,10 +574,11 @@ void DynamicMemPoolBestFit::FreeTensorMemInner(const DeviceMemPtr &device_addr) bool DynamicMemPoolBestFit::PreCombineMemBuf(const DynamicMemBufPtr &mem_buf, const MemStatusManagerPtr &mem_mng) { auto device_addr = mem_buf->device_addr_; if (mem_buf->status_ == DynamicMemBufStatus::kMemBufUsed && !mem_buf->IsEventNotUsed()) { - MS_LOG(DEBUG) << "Combine mem buf exit since mem buf is used by event, device_addr : " << device_addr << "."; mem_buf->status_ = DynamicMemBufStatus::kMemBufUsedByEvent; mem_mng->mps_.total_used_mem_size_ -= mem_buf->size_; mem_mng->mps_.total_used_by_event_mem_size_ += mem_buf->size_; + MS_LOG(DEBUG) << "Combine mem buf exit since mem buf is used by event, device_addr : " << device_addr + << ", used by event mem size : " << mem_mng->mps_.total_used_by_event_mem_size_ << "."; return false; } @@ -621,6 +624,8 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, << " is less than the size of membuf : " << mem_buf->size_ << "."; } mem_mng->mps_.total_used_by_event_mem_size_ -= mem_buf->size_; + MS_LOG(DEBUG) << "Combime mem buf for addr : " << mem_buf->device_addr_ + << ", used by event mem size : " << mem_mng->mps_.total_used_by_event_mem_size_ << "."; } else if (origin_status == DynamicMemBufStatus::kMemBufIdle) { if (mem_mng->mps_.total_idle_mem_size_ < mem_buf->size_) { DumpDynamicMemPoolDebugInfo(); @@ -865,7 +870,8 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolStateInfo() { total_used_size_list[static_cast(mb->second->allocator_type_)] += mb->second->size_; } } - buf << ", block[" << i << "] block size:" << mem_mng->mem_block_list_[i]->mem_block_size_ / kMBToByte + buf << ", block[" << i << "] stream id:" << mem_mng->mem_block_list_[i]->stream_id_ + << "block size:" << mem_mng->mem_block_list_[i]->mem_block_size_ / kMBToByte << "M idle size:" << (mem_mng->mem_block_list_[i]->mem_block_size_ - mem_block_used_size) / kMBToByte << "M"; } @@ -917,7 +923,8 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() { MS_EXCEPTION_IF_NULL(mem_buf); total_idle_mem_in_mem_mng += mem_buf->size_; MS_LOG(INFO) << " Idle mem_buf info: size[" << mem_buf->size_ << "] address[" << mem_buf->device_addr_ - << "] status[" << kBufStatusString.at(mem_buf->status_) << "]."; + << "] status[" << kBufStatusString.at(mem_buf->status_) << "] stream id[" << mem_buf->stream_id_ + << "]."; } } // Dump all the eager free memory buf info. @@ -937,7 +944,8 @@ void DynamicMemPoolBestFit::DumpDynamicMemPoolDebugInfo() { MS_EXCEPTION_IF_NULL(mem_buf); total_eager_free_mem_in_mem_mng += mem_buf->size_; MS_LOG(INFO) << " Eager free mem_buf info: size[" << mem_buf->size_ << "] address[" << mem_buf->device_addr_ - << "] status[" << kBufStatusString.at(mem_buf->status_) << "]."; + << "] status[" << kBufStatusString.at(mem_buf->status_) << "] stream id[" << mem_buf->stream_id_ + << "]."; } } // Dump the memory statistical info. @@ -1249,7 +1257,7 @@ const DeviceState MemStatusManager::DumpMemBlockDebugInfo(const std::string &mem auto mem_buf_map = (*iter)->block_all_mem_buf_map_; MS_LOG(WARNING) << " MemBlock info: number[" << iter - mem_block_list_.begin() << "] mem_buf_counts[" << mem_buf_map.size() << "] base_address[" << (*iter)->device_addr() << "] block_size[" - << (*iter)->size() << "]."; + << (*iter)->size() << "] stream id[" << (*iter)->stream_id_ << "]."; for (auto iter_mem_buf = mem_buf_map.begin(); iter_mem_buf != mem_buf_map.end(); ++iter_mem_buf) { auto mem_buf = iter_mem_buf->second; MS_EXCEPTION_IF_NULL(mem_buf); @@ -1267,7 +1275,8 @@ const DeviceState MemStatusManager::DumpMemBlockDebugInfo(const std::string &mem MS_LOG(INFO) << " MemBuf info: address[" << mem_buf->device_addr_ << "] size[" << mem_buf->size_ << "] status[" << kBufStatusString.at(mem_buf->status_) << "] name[" << (mem_buf->allocator_name_.empty() ? "Unknown" : mem_buf->allocator_name_) << "] type[" - << kAllocatorTypeString.at(mem_buf->allocator_type_) << "]."; + << kAllocatorTypeString.at(mem_buf->allocator_type_) << "] stream id[" << mem_buf->stream_id_ + << "]."; } } return device_state; diff --git a/mindspore/ccsrc/backend/common/pass/replace_addn_fusion.cc b/mindspore/ccsrc/backend/common/pass/replace_addn_fusion.cc index b141dab8cbccfe42d17140a2b732cb9860903158..47bb612f8b255868db4e8b716e75e085253ca16f 100644 --- a/mindspore/ccsrc/backend/common/pass/replace_addn_fusion.cc +++ b/mindspore/ccsrc/backend/common/pass/replace_addn_fusion.cc @@ -38,6 +38,9 @@ bool ReplaceAddNFusion::CheckMatchedDAG(const PatternMap &, const FuncGraphPtr & if (LongToSize(num_input) != kAddNInputNum) { return false; } + if (common::AnfAlgo::GetOutputInferDataType(node, 0) == kNumberTypeUInt32) { + return false; + } return true; } diff --git a/mindspore/ccsrc/backend/graph_compiler/backend.cc b/mindspore/ccsrc/backend/graph_compiler/backend.cc index 627ff60b38db197ee0f3af6d6603136ca364ddae..ba5ec902d53d0425d0165abb8a5aa4ad553ad74c 100644 --- a/mindspore/ccsrc/backend/graph_compiler/backend.cc +++ b/mindspore/ccsrc/backend/graph_compiler/backend.cc @@ -475,7 +475,7 @@ void UpdateOutputAbstract(const VectorRef &outputs, const session::BackendOpRunI << op_run_info->base_op_run_info.abstract->ToString(); } -tensor::TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index) { +tensor::BaseTensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index) { MS_EXCEPTION_IF_NULL(output_node); const auto &device_tensor = AnfAlgo::GetMutableOutputAddr(output_node, output_index, false); MS_EXCEPTION_IF_NULL(device_tensor); @@ -496,7 +496,7 @@ tensor::TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t outpu // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync // when infer type is not equal to device type. - auto tensor = std::make_shared(kernel_tensor->dtype_id(), kernel_tensor->GetShapeVector()); + auto tensor = std::make_shared(kernel_tensor->dtype_id(), kernel_tensor->GetShapeVector()); // Put device tensor into host tensor. tensor->set_device_address(device_tensor); @@ -514,10 +514,10 @@ tensor::TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t outpu return tensor; } -tensor::TensorPtr CreateOutputTensorDynamicImpl(const OpCompilerInfoPtr &op_compiler_info, - const AnfNodePtr &output_node, size_t output_index, - const std::shared_ptr &address, - size_t idx_in_graph_outputs) { +tensor::BaseTensorPtr CreateOutputTensorDynamicImpl(const OpCompilerInfoPtr &op_compiler_info, + const AnfNodePtr &output_node, size_t output_index, + const std::shared_ptr &address, + size_t idx_in_graph_outputs) { MS_EXCEPTION_IF_NULL(output_node); MS_EXCEPTION_IF_NULL(address); MS_EXCEPTION_IF_NULL(op_compiler_info); @@ -531,7 +531,7 @@ tensor::TensorPtr CreateOutputTensorDynamicImpl(const OpCompilerInfoPtr &op_comp // Create host tensor, the output tensor should use the infer type, it will be handed correctly by tensor data sync // when infer type is not equal to device type. - auto tensor = std::make_shared(address->type_id(), address->host_shape()); + auto tensor = std::make_shared(address->type_id(), address->host_shape()); // Put device tensor into host tensor. address->SetNodeIndex(output_node, output_index); diff --git a/mindspore/ccsrc/backend/graph_compiler/backend_base.cc b/mindspore/ccsrc/backend/graph_compiler/backend_base.cc index 843581b409d61386a8668f48ef3e08e26d7c2f61..c93cc3b0c61a0a3835032e849c5004bd27a46700 100644 --- a/mindspore/ccsrc/backend/graph_compiler/backend_base.cc +++ b/mindspore/ccsrc/backend/graph_compiler/backend_base.cc @@ -38,6 +38,7 @@ #include "ops/sparse_tensor_ops.h" #include "ops/nn_ops.h" #include "runtime/device/device_address_utils.h" +#include "runtime/device/multi_stream_controller.h" #include "runtime/graph_scheduler/graph_compiler.h" #include "runtime/pynative/graph_adapter.h" #include "pybind_api/gil_scoped_long_running.h" @@ -522,6 +523,11 @@ const ActorInfo &MindRTBackendBase::CompileGraphs(const FuncGraphPtr &func_graph (void)actor_to_graph_compiler_info_.emplace(graph_compiler_info->name_, std::move(graph_compiler_info)); PROF_END(compile_backend_graph); + for (const auto &graph_id_to_context : graph_id_to_device_context_) { + auto context = graph_id_to_context.second; + device::MultiStreamController::GetInstance()->Refresh(context); + } + (void)profiler::CollectHostInfo(kModelNameRuntime, kEventCompileGraph, kStageCompileGraphs, 1, 0, 1); MS_LOG(INFO) << "Status record: end compile function graph: " << func_graph->ToString() << ", produce actor: " << actor_info; @@ -550,7 +556,7 @@ void DoUnifyMindIRPass(const FuncGraphPtr &graph, const std::shared_ptr kProfilerEventString = { {ProfilerEvent::kPyNativeFrontendTask, "FrontendTask"}, {ProfilerEvent::kPyNativeBackendTask, "BackendTask"}, {ProfilerEvent::kPyNativeDeviceTask, "DeviceTask"}, + {ProfilerEvent::kPyNativeLaunchTask, "LaunchTask"}, {ProfilerEvent::kPyNativeBpropTask, "BpropTask"}, {ProfilerEvent::kPyNativeGilAcquire, "AcquireGil"}, {ProfilerEvent::kPyNativeCast, "PyNativeCast"}, diff --git a/mindspore/ccsrc/common/symbol_engine/symbol_engine_impl.cc b/mindspore/ccsrc/common/symbol_engine/symbol_engine_impl.cc index 30ba5984b9c3e932d94c2d8d61533f1013267523..e706e1cb4913b68c70856c5405933f9c326b0814 100644 --- a/mindspore/ccsrc/common/symbol_engine/symbol_engine_impl.cc +++ b/mindspore/ccsrc/common/symbol_engine/symbol_engine_impl.cc @@ -522,7 +522,9 @@ void SymbolEngineImpl::BuildCNodeSymbol(const CNodePtr &cnode) { } } else { prim = GetCNodePrimitive(cnode); - MS_EXCEPTION_IF_NULL(prim); + if (prim == nullptr) { + prim = std::make_shared("_UnsupportedCNode"); + } inputs = ExtractInputsAbstract(cnode); } auto builder = OperationBuilderInfoRegistry::GetBuilder(prim->name(), emitter_.get()); diff --git a/mindspore/ccsrc/cxx_api/acl_utils.h b/mindspore/ccsrc/cxx_api/acl_utils.h index dac60cfbf8f85397a6ee4710a0b28f45d061fa68..fe7cd29ca286bbd62ac8c1c1da066e605455269f 100644 --- a/mindspore/ccsrc/cxx_api/acl_utils.h +++ b/mindspore/ccsrc/cxx_api/acl_utils.h @@ -19,7 +19,7 @@ #include #include -#include "transform/symbol/acl_base_symbol.h" +#include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/symbol_utils.h" namespace mindspore { diff --git a/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc b/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc index 61a5a7aee2f02646ceea0b15a586565f02a095ab..aa3f81835051af0f82dcfc9eb7d1ea221a067567 100644 --- a/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc +++ b/mindspore/ccsrc/cxx_api/model/acl/acl_model_options.cc @@ -19,7 +19,7 @@ #include "utils/log_adapter.h" #include "ge/ge_api_types.h" #include "cxx_api/acl_utils.h" -#include "transform/symbol/acl_base_symbol.h" +#include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/symbol_utils.h" namespace mindspore { diff --git a/mindspore/ccsrc/debug/CMakeLists.txt b/mindspore/ccsrc/debug/CMakeLists.txt index 7f5cb96ebd825f8ca507269e6e42c0ed8430eb6c..b720cc368770f4adb2ee3522652f61ea9ebc37d2 100644 --- a/mindspore/ccsrc/debug/CMakeLists.txt +++ b/mindspore/ccsrc/debug/CMakeLists.txt @@ -46,6 +46,7 @@ if(NOT ENABLE_SECURITY) "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_json_parser.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_utils.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/data_dumper.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/dump_graph_boundary.cc" "${CMAKE_CURRENT_SOURCE_DIR}/data_dump/npy_header.cc" "${CMAKE_CURRENT_SOURCE_DIR}/utils.cc" "${CMAKE_CURRENT_SOURCE_DIR}/common/csv_writer.cc" diff --git a/mindspore/ccsrc/debug/data_dump/acl_dump_json_writer.cc b/mindspore/ccsrc/debug/data_dump/acl_dump_json_writer.cc index 7757c55d22c4d247350f8705a0d567a10d9b45e7..e8b1d3133f48d6ff3adb0a946b0f5c16593a81c3 100644 --- a/mindspore/ccsrc/debug/data_dump/acl_dump_json_writer.cc +++ b/mindspore/ccsrc/debug/data_dump/acl_dump_json_writer.cc @@ -52,11 +52,13 @@ void AclDumpJsonWriter::Parse() { break; } auto kernels = dump_parser.GetKernelsJson(); + auto model_name = dump_parser.GetModelJson(); MS_LOG(INFO) << "Dump kernels is as follows: "; for (const auto &iter : kernels) { MS_LOG(INFO) << iter.dump(); } layer_ = kernels; + model_name_ = model_name; auto op_debug_mode = dump_parser.op_debug_mode(); MS_LOG(INFO) << "Op_debug_mode is: " << op_debug_mode; switch (op_debug_mode) { @@ -77,12 +79,14 @@ void AclDumpJsonWriter::Parse() { bool AclDumpJsonWriter::WriteToFile(uint32_t device_id, uint32_t step_id, bool is_init) { nlohmann::json dump_list; - if (!layer_.empty()) { - dump_list.push_back({{"layer", layer_}}); + if (!layer_.empty() && !model_name_.empty()) { + dump_list.push_back({{"model_name", model_name_}, {"layer", layer_}}); } std::string dump_path = dump_base_path_ + "/" + std::to_string(step_id); nlohmann::json dump; - if (dump_scene_ == "overflow") { + if (dump_scene_ == "lite_exception") { + dump = {{"dump_scene", "lite_exception"}}; + } else if (dump_scene_ == "overflow") { dump = {{"dump_path", dump_path}, {"dump_debug", "on"}}; } else { if (is_init == True) { @@ -90,7 +94,7 @@ bool AclDumpJsonWriter::WriteToFile(uint32_t device_id, uint32_t step_id, bool i } else { dump = {{"dump_path", dump_path}, {"dump_mode", dump_mode_}}; } - if (!dump_list.empty()) { + if (!dump_list.empty() && !model_name_.empty()) { dump["dump_list"] = dump_list; } else { dump["dump_list"] = nlohmann::json::array(); @@ -111,7 +115,7 @@ bool AclDumpJsonWriter::WriteToFile(uint32_t device_id, uint32_t step_id, bool i ChangeFileMode(realpath.value(), S_IWUSR); std::ofstream json_file(realpath.value()); if (!json_file.is_open()) { - MS_LOG(EXCEPTION) << "Write file:" << realpath.value() << " open failed." + MS_LOG(EXCEPTION) << "Write json file:" << realpath.value() << " open failed." << " Errno:" << errno; } try { diff --git a/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.cc b/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.cc new file mode 100644 index 0000000000000000000000000000000000000000..535705aa77c4f5c6864974ac9a9f5c263a60f3f4 --- /dev/null +++ b/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.cc @@ -0,0 +1,124 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "debug/data_dump/dump_graph_boundary.h" + +#include +#include +#include +#include "utils/ms_utils.h" +#include "utils/file_utils.h" +#include "utils/convert_utils_base.h" + +namespace mindspore::datadump { +DumpGraphBoundary &DumpGraphBoundary::GetInstance() { + static DumpGraphBoundary inst{}; + return inst; +} + +void ReplaceSlashesWithUnderscores(std::string *str) { + size_t pos = 0; + while ((pos = str->find('/', pos)) != std::string::npos) { + str->replace(pos, 1, "_"); + pos += 1; + } +} + +void DumpGraphBoundary::HookDumpTask(const KernelGraphPtr &kernel_graph, + const std::vector &device_addr, + const std::vector> &nodes, void *stream, + bool is_input) { + if (!enable_) { + return; + } + if (!spec_kernel_graph_.empty() && spec_kernel_graph_ != kernel_graph->ToString()) { + return; + } + MS_LOG(INFO) << "entry hook ======="; + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(stream); + auto kernel_graph_name = kernel_graph->ToString(); + std::vector names; + std::vector sizes; + std::vector host_item; + std::string mid_name = is_input ? "_input_" : "_output_"; + for (const auto &i : nodes) { + auto node = i.first.lock(); + MS_EXCEPTION_IF_NULL(node); + auto idx = i.second; + auto file_name = kernel_graph_name; + file_name.append("_" + node->fullname_with_scope() + mid_name + std::to_string(idx)); + ReplaceSlashesWithUnderscores(&file_name); + (void)names.emplace_back(file_name); + auto addr = device_addr[idx]; + MS_EXCEPTION_IF_NULL(addr); + auto host_data = new (std::nothrow) uint8_t[addr->GetSize()]; + if (!addr->AsyncDeviceToHost(host_data, addr->GetSize(), stream)) { + MS_LOG(ERROR) << "Call acl copy failed, name: " << names[idx] << ", size: " << addr->GetSize(); + delete[] host_data; + return; + } + sizes.push_back(addr->GetSize()); + (void)host_item.emplace_back(host_data); + MS_LOG(INFO) << "name: " << file_name << ", host addr: " << host_data << ", host size: " << addr->GetSize(); + } + auto dc = DataContainer(names, sizes, host_item); + (void)d_container_.emplace_back(dc); +} + +void DumpGraphBoundary::DataDrop(device::DeviceContext *device_ctx) { + if (!enable_) { + return; + } + MS_LOG(INFO) << "Entry drop ======="; + device_ctx->device_res_manager_->SyncAllStreams(); + auto dir_path = FileUtils::CreateNotExistDirs("./dump_graph_boundary"); + if (!dir_path.has_value()) { + MS_LOG(WARNING) << "Create dump graph boundary path failed."; + d_container_.clear(); + return; + } + auto dir_path_pre = dir_path.value(); + for (auto &dc : d_container_) { + for (size_t i = 0; i < dc.name_.size(); ++i) { + auto name = dc.name_[i]; + auto size = dc.size_[i]; + auto data = dc.data_[i]; + std::string file_name = std::string(dir_path_pre) + "/" + name; + MS_LOG(INFO) << "name: " << file_name << ", host addr: " << data << ", host size: " << size; + std::ofstream outFile(file_name, std::ios::out | std::ios::trunc | std::ios::binary); + if (!outFile.is_open()) { + MS_LOG(ERROR) << "Failed to open file for writing." << file_name; + d_container_.clear(); + return; + } + outFile.write(reinterpret_cast(data), SizeToLong(size)); + outFile.close(); + } + dc.Clear(); + } +} + +void DumpGraphBoundary::InitEnableFlag() { + auto dgb_flag = common::GetEnv("MS_MEMORY_STATISTIC"); + if (dgb_flag.find("kernel") != std::string::npos) { + spec_kernel_graph_ = dgb_flag; + enable_ = true; + } else { + enable_ = dgb_flag == "3"; + } +} + +} // namespace mindspore::datadump diff --git a/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.h b/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.h new file mode 100644 index 0000000000000000000000000000000000000000..717428b01313a25ffef3b32f929904f71d4f3718 --- /dev/null +++ b/mindspore/ccsrc/debug/data_dump/dump_graph_boundary.h @@ -0,0 +1,71 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_GRAPH_BOUNDARY_H +#define MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_GRAPH_BOUNDARY_H + +#include +#include +#include + +#include "include/backend/device_address.h" +#include "include/backend/kernel_graph.h" +#include "runtime/hardware/device_context.h" + +namespace mindspore::datadump { +class BACKEND_EXPORT DumpGraphBoundary { + public: + static DumpGraphBoundary &GetInstance(); + void HookDumpTask(const KernelGraphPtr &kernel_graph, const std::vector &device_addr, + const std::vector> &nodes, void *stream, bool is_input = False); + void DataDrop(device::DeviceContext *device_ctx); + void InitEnableFlag(); + + class DataContainer { + public: + DataContainer(std::vector name, std::vector size, std::vector data) + : name_(std::move(name)), size_(std::move(size)), data_(std::move(data)) {} + ~DataContainer() = default; + void Clear() { + name_.clear(); + size_.clear(); + for (auto &data : data_) { + if (data != nullptr) { + delete[] data; + data = nullptr; + } + } + data_.clear(); + } + + friend class DumpGraphBoundary; + + private: + std::vector name_{}; + std::vector size_{}; + std::vector data_{}; + }; + + private: + DumpGraphBoundary() = default; + ~DumpGraphBoundary() = default; + bool enable_{false}; + std::string spec_kernel_graph_{""}; + std::vector d_container_{}; +}; +} // namespace mindspore::datadump + +#endif // MINDSPORE_MINDSPORE_CCSRC_DEBUG_DATA_DUMP_DUMP_GRAPH_BOUNDARY_H diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index 5257d896401b8854c4d66ee1823c0fa1e5600859..5ae26bc876e6474a1b00bcd462a1e3c3e4dcff1f 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -51,6 +51,7 @@ constexpr auto kDumpInputAndOutput = 0; constexpr auto kDumpInputOnly = 1; constexpr auto kDumpOutputOnly = 2; constexpr auto kMindsporeDumpConfig = "MINDSPORE_DUMP_CONFIG"; +constexpr auto kModel = "model_name"; } // namespace namespace mindspore { @@ -378,6 +379,10 @@ void DumpJsonParser::ParseCommonDumpSetting(const nlohmann::json &content) { } ParseDumpMode(*dump_mode); + if (IsAclDump() && *dump_mode == 1) { + auto model = CheckJsonKeyExist(*common_dump_settings, kModel); + ParseModel(*model); + } ParseDumpPath(*common_dump_settings); // Pass in the whole json string to parse because the path field is optional. ParseNetName(*net_name); ParseIteration(*iteration); @@ -638,6 +643,11 @@ void DumpJsonParser::ParseKernels(const nlohmann::json &content) { } } +void DumpJsonParser::ParseModel(const nlohmann::json &content) { + CheckJsonStringType(content, kModel); + model_json_ = content; +} + void DumpJsonParser::ParseSupportDevice(const nlohmann::json &content) { CheckJsonArrayType(content, kSupportDevice); for (const auto &device : content) { diff --git a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_array_ops.cc b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_array_ops.cc index 92d47434722edc218a80cd298cff2c36c6bbabd7..88f230a48308dcbf367fa85a0dfb5e22f6cca1f2 100644 --- a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_array_ops.cc +++ b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_array_ops.cc @@ -185,7 +185,7 @@ NodePtrList SegmentMinOrMaxGrad(BpropBuilder *ib) { const int64_t max_len = 1000000; auto num_selected = ib->Emit("SegmentSum", {ib->Cast(is_selected, kFloat32), segment_ids}, {{"max_length", MakeValue(max_len)}}); - auto weighted_grads = ib->Div(dout, num_selected); + auto weighted_grads = ib->Cast(ib->Div(dout, num_selected), ib->GetDtype(dout)); auto gathered_grads = ib->Gather(weighted_grads, segment_ids, zero_value); auto dx = ib->Select(is_selected, gathered_grads, ib->ZerosLike(input_x)); if (input_x_type->type_id() != kNumberTypeFloat32) { @@ -925,7 +925,10 @@ REG_BPROP_BUILDER("Select").SetUnusedInputs({i3}).SetBody(BODYFUNC(ib) { auto dout = ib->GetInput(kIndex4); auto dx = x->need_compute_grad_out() ? ib->Select(cond, dout, ib->ZerosLike(x)) : ib->OutZeros(x); auto dy = x->need_compute_grad_out() ? ib->Select(cond, ib->ZerosLike(y), dout) : ib->OutZeros(y); - return {ib->OutZeros(cond), dx, dy}; + auto bc_x = BinopGradCommon(ib, cond, x, dout, dx); + auto bc_y = BinopGradCommon(ib, cond, y, dout, dy); + auto ret = BinopGradCommon(ib, x, y, bc_x[kIndex1], bc_y[kIndex1]); + return {ib->OutZeros(cond), ret[kIndex0], ret[kIndex1]}; }); REG_BPROP_BUILDER("OnesLike").SetUnusedInputs({i0, i1, i2}).SetBody(ReturnZeros); @@ -1554,6 +1557,61 @@ REG_BPROP_BUILDER("Split").SetUnusedInputs({i0, i3}).SetBody(BODYFUNC(ib) { return {dx, ib->OutZeros(axis), ib->OutZeros(output_num)}; }); +DEF_PURE_SHAPE_CALC(g_slice_ext) + .SetCalc([](const ShapeArray &inputs) -> ShapeArray { + auto x_shape = inputs.at(0); + auto axis = inputs.at(1); + auto begin = inputs.at(2); + auto end = inputs.at(3); + + MS_EXCEPTION_IF_CHECK_FAIL(axis.size() == 1, "axis should be a scalar."); + auto axis_value = axis[0]; + MS_EXCEPTION_IF_CHECK_FAIL(begin.size() == 1, "begin should be a scalar."); + auto begin_value = begin[0]; + MS_EXCEPTION_IF_CHECK_FAIL(end.size() == 1, "end should be a scalar."); + auto end_value = end[0]; + + axis_value = axis_value < 0 ? axis_value + x_shape.size() : axis_value; + auto length_value = end_value - begin_value; + begin_value = begin_value < 0 ? begin_value + x_shape[axis_value] : begin_value; + end_value = begin_value + length_value; + + auto begin_shape = x_shape; + begin_shape[axis_value] = begin_value; + auto end_shape = x_shape; + end_shape[axis_value] = end_shape[axis_value] - end_value; + + return {begin_shape, end_shape}; + }) + .SetInfer([](const ShapeArray &inputs, const HashSet &unknown_inputs) -> std::vector { + auto x = inputs.at(0); + auto axis = inputs.at(1); + auto begin = inputs.at(2); + auto end = inputs.at(3); + if (!unknown_inputs.empty() || IsDynamicRank(x) || IsDynamicRank(axis) || IsDynamicRank(begin) || + IsDynamicRank(end)) { + return {-1, -1}; + } + auto size = SizeToLong(inputs.at(0).size()); + return {size, size}; + }); + +REG_BPROP_BUILDER("SliceExt").SetUnusedInputs({i5}).SetBody(BODYFUNC(ib) { + auto x = ib->GetInput(kIndex0); + auto axis = ib->GetInput(kIndex1); + auto begin = ib->GetInput(kIndex2); + auto end = ib->GetInput(kIndex3); + auto step = ib->GetInput(kIndex4); + auto dout = ib->GetInput(kIndex6); + auto res = ib->ShapeCalc(g_slice_ext, {x, axis, begin, end}, {1, 2, 3}); + auto dx = + ib->Emit(kConcatOpName, {ib->MakeTuple({ib->Emit("Zeros", {res[0], ib->Value(ib->GetDtypeId(dout))}), dout, + ib->Emit("Zeros", {res[1], ib->Value(ib->GetDtypeId(dout))})}), + axis}); + + return {dx, ib->OutZeros(axis), ib->OutZeros(begin), ib->OutZeros(end), ib->OutZeros(step)}; +}); + DEF_PURE_SHAPE_CALC(g_tile) .SetCalc([](const ShapeArray &inputs) -> ShapeArray { // {x_shape, dims} diff --git a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc index f4b3fccd6ef370099dfad9fb6318b3d6471ab699..5b74e1fbb5af350e74029e54b5d04c42d5994a0c 100644 --- a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc +++ b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_math_ops.cc @@ -104,7 +104,7 @@ NodePtrList MinimumMaximumGrad(BpropBuilder *ib, const NodePtr &x, const NodePtr if (!x->need_compute_grad_out() && !y->need_compute_grad_out()) { return {grad_x, grad_y}; } - auto half_dout = ib->Div(dout, ib->Tensor(2, ib->GetDtype(dout))); + auto half_dout = ib->Cast(ib->Div(dout, ib->Tensor(2, ib->GetDtype(dout))), ib->GetDtype(x)); auto equal_mask = ib->Equal(x, y); auto zeros = ib->Tensor(0, ib->GetDtype(dout)); auto is_less = ib->Less(x, y); @@ -163,11 +163,11 @@ NodePtrList BpropAddcCommon(BpropBuilder *ib, const std::string &op_name, const NodePtr dvalue = nullptr; if (op_name == "Addcdiv") { constexpr int64_t const_val = -2; - inner_out = ib->Add((ib->Mul(value, ib->Div(x1, x2))), input_data); + inner_out = ib->Add((ib->Mul(value, ib->Cast(ib->Div(x1, x2), ib->GetDtype(x1)))), input_data); dx2 = ib->Neg(ib->Mul(ib->Mul(ib->Mul(x1, value), ib->Pow(x2, ib->Tensor(const_val, ib->GetDtype(x2)))), dinput_data)); - dx1 = ib->Mul(dinput_data, ib->Div(value, x2)); - dvalue = ib->Mul(dinput_data, ib->Div(x1, x2)); + dx1 = ib->Mul(dinput_data, ib->Cast(ib->Div(value, x2), ib->GetDtype(value))); + dvalue = ib->Mul(dinput_data, ib->Cast(ib->Div(x1, x2), ib->GetDtype(x1))); } else { dx1 = ib->Mul(dout, ib->Mul(value, x2)); dx2 = ib->Mul(dout, ib->Mul(value, x1)); @@ -798,6 +798,42 @@ REG_BPROP_BUILDER("Div").SetUnusedInputs({i0}).SetBody(BODYFUNC(ib) { return result; }); +REG_BPROP_BUILDER("DivMod").SetUnusedInputs({i0}).SetBody(BODYFUNC(ib) { + auto x = ib->GetInput(kIndex0); + auto y = ib->GetInput(kIndex1); + auto rounding_mode = ib->GetInput(kIndex2); + + auto mode_value_ptr = rounding_mode->BuildValue(); + auto mode_opt = mindspore::ops::GetScalarValue(mode_value_ptr); + if (mode_opt.has_value()) { + return {ib->OutZeros(x), ib->OutZeros(y), ib->OutZeros(rounding_mode)}; + } + + auto mode_type = rounding_mode->abstract()->BuildType(); + MS_EXCEPTION_IF_NULL(mode_type); + if (mode_type->isa()) { + auto out = ib->GetInput(kIndex3); + auto dout = ib->GetInput(kIndex4); + NodePtr bc_dx = nullptr; + NodePtr bc_dy = nullptr; + auto x_dtype_id = ib->GetDtypeId(x); + bc_dx = ib->Div(dout, y); + if (y->need_compute_grad_out()) { + bc_dy = -(bc_dx * out); + } + std::vector result = BinopGradCommon(ib, x, y, bc_dx, bc_dy); + bool is_complex = (x_dtype_id == kNumberTypeComplex64 || x_dtype_id == kNumberTypeComplex128); + if (is_complex) { + result[kIndex0] = ib->Conj(result[kIndex0]); + result[kIndex1] = y->need_compute_grad_out() ? ib->Conj(result[kIndex1]) : ib->OutZeros(y); + } + result.emplace_back(ib->OutZeros(rounding_mode)); + return result; + } else { + MS_LOG(EXCEPTION) << "DivMod abstract failed."; + } +}); + REG_BPROP_BUILDER("BitwiseAnd").SetUnusedInputs({i0, i1, i2, i3}).SetBody(ReturnZeros); REG_BPROP_BUILDER("BitwiseOr").SetUnusedInputs({i0, i1, i2, i3}).SetBody(ReturnZeros); REG_BPROP_BUILDER("BitwiseXor").SetUnusedInputs({i0, i1, i2, i3}).SetBody(ReturnZeros); @@ -1280,6 +1316,7 @@ REG_BPROP_BUILDER("Inv").SetUnusedInputs({i0}).SetBody(BODYFUNC(ib) { }); REG_BPROP_BUILDER("LinSpace").SetUnusedInputs({i0, i1, i2, i3, i4}).SetBody(ReturnZeros); +REG_BPROP_BUILDER("LinSpaceExt").SetUnusedInputs({i0, i1, i2, i3, i4, i5}).SetBody(ReturnZeros); REG_BPROP_BUILDER("IndexAdd").SetUnusedInputs({i0, i2, i3}).SetBody(BODYFUNC(ib) { auto indices = ib->GetInput(kIndex1); @@ -1848,7 +1885,7 @@ REG_BPROP_BUILDER("ReduceMean").SetUnusedInputs({i0, i3}).SetBody(BODYFUNC(ib) { return {dx, ib->OutZeros(axis), ib->OutZeros(keep_dims)}; }); -REG_BPROP_BUILDER("ArgMaxWithValue").SetBody(BODYFUNC(ib) { +REG_BPROP_BUILDER("ArgMaxWithValue").SetUnusedInputs({i0}).SetBody(BODYFUNC(ib) { auto x = ib->GetInput(kIndex0); auto axis = ib->GetInput(kIndex1); auto keep_dims = ib->GetInput(kIndex2); @@ -1858,7 +1895,7 @@ REG_BPROP_BUILDER("ArgMaxWithValue").SetBody(BODYFUNC(ib) { return {dx, ib->OutZeros(axis), ib->OutZeros(keep_dims)}; }); -REG_BPROP_BUILDER("ArgMinWithValue").SetBody(BODYFUNC(ib) { +REG_BPROP_BUILDER("ArgMinWithValue").SetUnusedInputs({i0}).SetBody(BODYFUNC(ib) { auto x = ib->GetInput(kIndex0); auto axis = ib->GetInput(kIndex1); auto keep_dims = ib->GetInput(kIndex2); @@ -2477,27 +2514,28 @@ REG_BPROP_BUILDER("ReduceStd").SetBody(BODYFUNC(ib) { auto dx = ib->Sub(x, mean); dx = ib->Mul(dx, std_d); - dx = ib->Div(dx, std); + auto dx_type = ib->GetDtype(dx); + dx = ib->Cast(ib->Div(dx, std), dx_type); auto unbiased_value = unbiased->BuildValue(); auto unbiased_opt = ops::GetScalarValue(unbiased_value); if (unbiased_opt.has_value()) { if (unbiased_opt.value()) { - dx = ib->Div(dx, ib->Cast(res[1], ib->GetDtype(dx))); + dx = ib->Cast(ib->Div(dx, ib->Cast(res[1], ib->GetDtype(dx))), dx_type); } else { - dx = ib->Div(dx, ib->Cast(res[2], ib->GetDtype(dx))); + dx = ib->Cast(ib->Div(dx, ib->Cast(res[2], ib->GetDtype(dx))), dx_type); } } else { auto unbiased_true_branch = [&dx, &res](Emitter *e) -> NodePtrList { - return {e->Div(dx, e->Cast(res[1], dx->dtype()))}; + return {e->Cast(e->Div(dx, e->Cast(res[1], dx->dtype())), dx->dtype())}; }; auto unbiased_false_branch = [&dx, &res](Emitter *e) -> NodePtrList { - return {e->Div(dx, e->Cast(res[2], dx->dtype()))}; + return {e->Cast(e->Div(dx, e->Cast(res[2], dx->dtype())), dx->dtype())}; }; auto unbiased_cond = ib->Equal(unbiased, ib->Value(true)); dx = ib->Conditional(unbiased_cond, unbiased_true_branch, unbiased_false_branch); } - auto temp = ib->Div(mean_d, ib->Cast(res[2], ib->GetDtype(mean_d))); + auto temp = ib->Cast(ib->Div(mean_d, ib->Cast(res[2], ib->GetDtype(mean_d))), ib->GetDtype(mean_d)); dx = ib->Add(dx, temp); return {dx, ib->OutZeros(axis), ib->OutZeros(unbiased), ib->OutZeros(keep_dims)}; }); diff --git a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_nn_ops.cc b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_nn_ops.cc index 8fb0687424c6fe50589df5889c04ecf0ca3990cb..ddced9baabb44d6af98920639038eafcf625d429 100644 --- a/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_nn_ops.cc +++ b/mindspore/ccsrc/frontend/expander/bprop/grad_ops/grad_nn_ops.cc @@ -733,6 +733,48 @@ REG_BPROP_BUILDER("MaxPoolWithArgmaxV2").SetBody(BODYFUNC(ib) { return {dx}; }); +REG_BPROP_BUILDER("MaxPoolWithMask").SetBody(BODYFUNC(ib) { + auto x = ib->GetInput(kIndex0); + auto kernel_size = ib->GetInput(kIndex1); + auto strides = ib->GetInput(kIndex2); + auto pads = ib->GetInput(kIndex3); + auto dilation = ib->GetInput(kIndex4); + auto ceil_mode = ib->GetInput(kIndex5); + auto argmax_type = ib->GetInput(kIndex6); + auto out = ib->GetInput(kIndex7); + auto dout = ib->GetInput(kIndex8); + auto dx = ib->Emit("MaxPoolGradWithMask", {x, ib->TupleGetItem(dout, i0), ib->TupleGetItem(out, i1), kernel_size, + strides, pads, dilation, ceil_mode, argmax_type}); + auto g_kernel_size = ib->OutZeros(kernel_size); + auto g_strides = ib->OutZeros(strides); + auto g_pads = ib->OutZeros(pads); + auto g_dilation = ib->OutZeros(dilation); + auto g_ceil_mode = ib->OutZeros(ceil_mode); + auto g_argmax_type = ib->OutZeros(argmax_type); + return {dx, g_kernel_size, g_strides, g_pads, g_dilation, g_ceil_mode, g_argmax_type}; +}); + +REG_BPROP_BUILDER("MaxPoolWithIndices").SetBody(BODYFUNC(ib) { + auto x = ib->GetInput(kIndex0); + auto kernel_size = ib->GetInput(kIndex1); + auto strides = ib->GetInput(kIndex2); + auto pads = ib->GetInput(kIndex3); + auto dilation = ib->GetInput(kIndex4); + auto ceil_mode = ib->GetInput(kIndex5); + auto argmax_type = ib->GetInput(kIndex6); + auto out = ib->GetInput(kIndex7); + auto dout = ib->GetInput(kIndex8); + auto dx = ib->Emit("MaxPoolGradWithIndices", {x, ib->TupleGetItem(dout, i0), ib->TupleGetItem(out, i1), kernel_size, + strides, pads, dilation, ceil_mode, argmax_type}); + auto g_kernel_size = ib->OutZeros(kernel_size); + auto g_strides = ib->OutZeros(strides); + auto g_pads = ib->OutZeros(pads); + auto g_dilation = ib->OutZeros(dilation); + auto g_ceil_mode = ib->OutZeros(ceil_mode); + auto g_argmax_type = ib->OutZeros(argmax_type); + return {dx, g_kernel_size, g_strides, g_pads, g_dilation, g_ceil_mode, g_argmax_type}; +}); + REG_BPROP_BUILDER("GroupNorm").SetUnusedInputs({i4}).SetBody(BODYFUNC(ib) { auto x = ib->GetInput(kIndex0); auto num_groups = ib->GetInput(kIndex1); @@ -2365,5 +2407,24 @@ REG_BPROP_BUILDER("RmsNorm").SetBody((BODYFUNC(ib) { return {dx, dgamma}; })); +REG_BPROP_BUILDER("MultiScaleDeformableAttnFunctionV2").SetBody((BODYFUNC(ib) { + auto value = ib->GetInput(kIndex0); + auto value_spatial_shapes = ib->GetInput(kIndex1); + auto value_level_start_index = ib->GetInput(kIndex2); + auto sampling_locations = ib->GetInput(kIndex3); + auto attention_weights = ib->GetInput(kIndex4); + auto dout = ib->GetInput(kIndex6); + sampling_locations = ib->Transpose(sampling_locations, {0, 1, 2, 3, 5, 4}); + auto grad = ib->Emit("MultiScaleDeformableAttentionV2Grad", {value, value_spatial_shapes, value_level_start_index, + sampling_locations, attention_weights, dout}); + auto grad_value = ib->TupleGetItem(grad, kIndex0); + auto grad_spatial_shapes = ib->ZerosLike(value_spatial_shapes); + auto grad_level_start_index = ib->ZerosLike(value_level_start_index); + auto grad_sampling_loc = ib->TupleGetItem(grad, kIndex1); + auto grad_attn_weight = ib->TupleGetItem(grad, kIndex2); + grad_sampling_loc = ib->Transpose(grad_sampling_loc, {0, 1, 2, 3, 5, 4}); + return {grad_value, grad_spatial_shapes, grad_level_start_index, grad_sampling_loc, grad_attn_weight}; +})); + REG_BPROP_BUILDERS_END } // namespace mindspore::expander::bprop diff --git a/mindspore/ccsrc/frontend/operator/composite/composite.cc b/mindspore/ccsrc/frontend/operator/composite/composite.cc index db96e220e9fee02ecc28b94125ba4ebda41aa100..e59852d16604838a905bc1ffae36026ad54ee1c5 100644 --- a/mindspore/ccsrc/frontend/operator/composite/composite.cc +++ b/mindspore/ccsrc/frontend/operator/composite/composite.cc @@ -1425,7 +1425,9 @@ DebugInfoPtr CheckVmapFunc(const AbstractBasePtr &fn_arg, int *nparam, size_t *c } else { AbstractFunctionPtr fn = dyn_cast(fn_arg); if (fn == nullptr) { - MS_LOG(EXCEPTION) << "'VmapOperation' arg0 must be a 'Function' or 'Cell', but got " << fn_arg->ToString() << "."; + MS_LOG(EXCEPTION) << "'VmapOperation' arg0 must be a 'Function' or 'Cell', but got " << fn_arg->ToString() + << ".\nIf you are using a user-defined package, assuming the module name is demo, please try " + << "setting 'export MS_JIT_MODULES=demo'."; } auto partial_fn = dyn_cast(fn); if (partial_fn != nullptr) { diff --git a/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc index 14c6dd6db463c376c85d4177bb98996cc122a476..7a26744a3230b6a4b81d32bfea7a7b304033db65 100644 --- a/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc +++ b/mindspore/ccsrc/frontend/operator/composite/unpack_call.cc @@ -20,16 +20,19 @@ #include "mindspore/core/ops/structure_ops.h" #include "mindspore/core/ops/sequence_ops.h" +#include "mindspore/core/ops/framework_ops.h" #include "abstract/abstract_value.h" #include "abstract/dshape.h" #include "frontend/operator/cc_implementations.h" #include "ir/anf.h" #include "frontend/optimizer/opt.h" #include "include/common/pybind_api/api_register.h" +#include "pipeline/jit/ps/fallback.h" namespace mindspore { // namespace to support composite operators definition namespace prim { +using mindspore::abstract::AbstractAny; using mindspore::abstract::AbstractBase; using mindspore::abstract::AbstractDictionary; using mindspore::abstract::AbstractDictionaryPtr; @@ -41,6 +44,75 @@ using mindspore::abstract::AbstractListPtr; using mindspore::abstract::AbstractTuple; using mindspore::abstract::AbstractTuplePtr; +FuncGraphPtr ConvertUnpackToPyInterpretFuncGraph(const AbstractBasePtrList &args_abs_list) { + // No need to check, check will be done in infer. + auto res_graph = std::make_shared(); + res_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); + res_graph->debug_info()->set_name("UnpackCallToPyInterpret"); + + // Generate pyinterpret node's inputs + AnfNodePtrList local_key_inputs = {NewValueNode(prim::kPrimMakeTuple)}; + AnfNodePtrList local_value_inputs = {NewValueNode(prim::kPrimMakeTuple)}; + + // Get function + std::stringstream script_buffer; + const std::string call_func_str = "__call_func_str__"; + script_buffer << call_func_str << "("; + (void)local_key_inputs.emplace_back(NewValueNode(call_func_str)); + (void)local_value_inputs.emplace_back(res_graph->add_parameter()); + + // Get input parameters: + // UnpackCall(__call_func_str__, (a, b), args(AbstractAny), {kwargs}) + // -> PyInterpret(__call_func_str__, a, b, args, kwargs) + // -> eval(__call_func_str__(a, b, *args, **kwargs)) + // 1. Process stable parameters, must be a tuple + size_t index = 1; + if (args_abs_list[index]->isa()) { + auto arg_tuple = args_abs_list[index++]->cast(); + AnfNodePtr para_tuple = res_graph->add_parameter(); + for (size_t i = 0; i < arg_tuple->size(); ++i) { + const auto param_str = "__input__" + std::to_string(i) + "__"; + script_buffer << param_str << ","; + (void)local_key_inputs.emplace_back(NewValueNode(param_str)); + (void)local_value_inputs.emplace_back( + res_graph->NewCNode({NewValueNode(prim::kPrimTupleGetItem), para_tuple, NewValueNode(SizeToLong(i))})); + } + } + + // 2. Process *args(AbstractAny) + if (index < args_abs_list.size() && args_abs_list[index]->isa()) { + const auto param_str = "args"; + script_buffer << "*" << param_str << ","; + AnfNodePtrList abstract_any_inputs = {NewValueNode(prim::kPrimMakeTuple)}; + while (index < args_abs_list.size() && args_abs_list[index]->isa()) { + (void)abstract_any_inputs.emplace_back(res_graph->add_parameter()); + index++; + } + (void)local_key_inputs.emplace_back(NewValueNode(param_str)); + (void)local_value_inputs.emplace_back(res_graph->NewCNode(abstract_any_inputs)); + } + + // 3. Process **kwargs, must be a dictionary + if (index < args_abs_list.size() && args_abs_list[index]->isa()) { + const auto param_str = "kwargs"; + script_buffer << "**" << param_str; + (void)local_key_inputs.emplace_back(NewValueNode(param_str)); + (void)local_value_inputs.emplace_back(res_graph->add_parameter()); + } + script_buffer << ")"; + + // Set func_graph output as generated pyinterpret node + const auto &script = script_buffer.str(); + const auto key_tuple = res_graph->NewCNode(local_key_inputs); + const auto value_tuple = res_graph->NewCNode(local_value_inputs); + auto local_dict_node = res_graph->NewCNode({NewValueNode(prim::kPrimMakeDict), key_tuple, value_tuple}); + auto res = fallback::CreatePyInterpretCNode(res_graph, script, py::dict(), local_dict_node); + res_graph->set_output(res); + + MS_LOG(DEBUG) << "Convert UnpackCall funcgraph as PyInterpret: " << res->DebugString(); + return res_graph; +} + FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_abs_list) { size_t arg_length = args_abs_list.size(); const size_t min_args_size = 2; @@ -48,6 +120,23 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_abs_l MS_LOG(INTERNAL_EXCEPTION) << "The UnpackCall operator requires arguments >=2, but got " << arg_length << "."; } + bool exist_any = false; + std::for_each(args_abs_list.begin() + 1, args_abs_list.end(), [&exist_any](const AbstractBasePtr &abs) { + MS_EXCEPTION_IF_NULL(abs); + if (abs->isa()) { + exist_any = true; + return; + } + if (!abs->isa() && !abs->isa() && !abs->isa()) { + MS_LOG(INTERNAL_EXCEPTION) << "The arguments of UnpackCall operator should be tuple, list or dict, but got " + << abs->ToString(); + } + }); + if (exist_any) { + MS_LOG(DEBUG) << "The arguments of UnpackCall operator should not be AbstractAny, convert to PyInterpret"; + return ConvertUnpackToPyInterpretFuncGraph(args_abs_list); + } + // No need to check, check will be done in infer. auto res_graph = std::make_shared(); res_graph->set_flag(FUNC_GRAPH_FLAG_CORE, true); @@ -56,7 +145,7 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_abs_l AnfNodePtr fn_node = res_graph->add_parameter(); std::vector elems; elems.push_back(fn_node); - for (size_t index = 1; index < arg_length; index++) { + for (size_t index = 1; index < arg_length; ++index) { MS_EXCEPTION_IF_NULL(args_abs_list[index]); if (args_abs_list[index]->isa()) { auto arg_tuple = args_abs_list[index]->cast(); @@ -72,7 +161,7 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_abs_l elems.push_back( res_graph->NewCNode({NewValueNode(prim::kPrimListGetItem), para_list, NewValueNode(SizeToLong(i))})); } - } else if (args_abs_list[index]->isa()) { + } else { AbstractDictionaryPtr arg_dict = args_abs_list[index]->cast(); AnfNodePtr para_dict = res_graph->add_parameter(); auto dict_elems = arg_dict->elements(); @@ -85,9 +174,6 @@ FuncGraphPtr UnpackCall::GenerateFuncGraph(const AbstractBasePtrList &args_abs_l res_graph->NewCNode({NewValueNode(prim::kPrimDictGetItem), para_dict, NewValueNode(key_value)}); return res_graph->NewCNode({NewValueNode(prim::kPrimMakeKeywordArg), NewValueNode(key_value), dict_get_item}); }); - } else { - MS_LOG(INTERNAL_EXCEPTION) << "The arguments of UnpackCall operator should be tuple, list or dict, but got " - << args_abs_list[index]->ToString(); } } // Add to order list to trace if fn_node had side effect. diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h index 62e5f193ca19e776ecf5b4391f4bf3b8aab70bd7..62eae7b56ec4850a83c26bfe24f22598cf40173b 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass/reshape_eliminate.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2024 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,13 +47,13 @@ class ReshapeSameShapeEliminater : public AnfVisitor { } auto src_shape_abs = x_->abstract(); - if (src_shape_abs == nullptr) { + if (src_shape_abs == nullptr || src_shape_abs->isa()) { return nullptr; } auto src_shape = src_shape_abs->GetShapeTrack(); auto tgt_shape_abs = node->abstract(); - if (tgt_shape_abs == nullptr) { + if (tgt_shape_abs == nullptr || tgt_shape_abs->isa()) { return nullptr; } auto tgt_shape = tgt_shape_abs->GetShapeTrack(); diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc b/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc index 178c85d708b1d47e23f2043beee1fb8236c3fc8b..40687aa44eca80232370361a7e03e68df29e7eef 100644 --- a/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc +++ b/mindspore/ccsrc/frontend/parallel/graph_util/graph_utils.cc @@ -1014,16 +1014,6 @@ Status MergeEntireShapeForDynamic(const FuncGraphPtr &root) { UpdateShapeNode(cnode, *func_graph); } } - for (auto const &node : all_nodes) { - if (!node->isa()) { - continue; - } - auto cnode = node->cast(); - if (!IsShapeOp(cnode)) { - continue; - } - UpdateShapeNode(cnode, root); - } } return Status::SUCCESS; } diff --git a/mindspore/ccsrc/include/backend/debug/data_dump/acl_dump_json_writer.h b/mindspore/ccsrc/include/backend/debug/data_dump/acl_dump_json_writer.h index effdc0e5ebadd4a53bef7a46d898f2baef9f6895..f284cdf55b007244720ebb6265aa4682ee52279e 100644 --- a/mindspore/ccsrc/include/backend/debug/data_dump/acl_dump_json_writer.h +++ b/mindspore/ccsrc/include/backend/debug/data_dump/acl_dump_json_writer.h @@ -55,6 +55,7 @@ class BACKEND_EXPORT AclDumpJsonWriter { std::string dump_base_path_ = ""; std::string dump_mode_ = "all"; nlohmann::json layer_ = nlohmann::json::array(); + nlohmann::json model_name_ = nlohmann::json::array(); std::string dump_scene_ = "normal"; std::string dump_debug_ = "off"; }; // class AclDumpJsonWriter diff --git a/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h b/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h index 3bb3f88e3786f978694b32b68682d1e123956a85..d1a6699efd9d0a950a95d525e030456d3b696f08 100644 --- a/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h +++ b/mindspore/ccsrc/include/backend/debug/data_dump/dump_json_parser.h @@ -98,6 +98,7 @@ class BACKEND_EXPORT DumpJsonParser { }; static bool IsAclDump(); nlohmann::json GetKernelsJson() { return kernels_json_; } + nlohmann::json GetModelJson() { return model_json_; } private: DumpJsonParser() = default; @@ -127,6 +128,7 @@ class BACKEND_EXPORT DumpJsonParser { bool already_parsed_{false}; std::string dump_layer_{""}; nlohmann::json kernels_json_ = nlohmann::json::array(); + nlohmann::json model_json_ = nlohmann::json::array(); // Save graphs for dump. std::vector graphs_; @@ -143,6 +145,7 @@ class BACKEND_EXPORT DumpJsonParser { void ParseIteration(const nlohmann::json &content); void ParseInputOutput(const nlohmann::json &content); void ParseKernels(const nlohmann::json &content); + void ParseModel(const nlohmann::json &content); void ParseSupportDevice(const nlohmann::json &content); bool ParseEnable(const nlohmann::json &content) const; void ParseOpDebugMode(const nlohmann::json &content); diff --git a/mindspore/ccsrc/include/backend/device_address.h b/mindspore/ccsrc/include/backend/device_address.h index b3a3afd943c2a73c8089c4da67f0c7f0df1fadc1..352ab2d352ec490595f941913ec759d750b8b24f 100644 --- a/mindspore/ccsrc/include/backend/device_address.h +++ b/mindspore/ccsrc/include/backend/device_address.h @@ -166,6 +166,8 @@ class DeviceAddress : public mindspore::DeviceSync { virtual bool AsyncHostToDevice(const ShapeVector &, size_t, TypeId, const void *, size_t) const { return true; } // Asynchronously copy device memory to host side. virtual bool AsyncDeviceToHost(const ShapeVector &, size_t, TypeId, void *, size_t) const { return true; } + // Asynchronously copy device memory to host side. + virtual bool AsyncDeviceToHost(void *host_ptr, size_t size, void *stream) const { return true; } // Synchronously copy device memory to device side. virtual bool SyncDeviceToDevice(const DeviceSync *) const { return true; } virtual bool SyncDeviceToDevice(const ShapeVector &, size_t, TypeId, const void *, const std::string &) const { diff --git a/mindspore/ccsrc/include/common/profiler.h b/mindspore/ccsrc/include/common/profiler.h index dc2496106886ea097b627fb21cd2ec287648d951..71d954aff923bb5e95952a97178b258926976634 100644 --- a/mindspore/ccsrc/include/common/profiler.h +++ b/mindspore/ccsrc/include/common/profiler.h @@ -87,6 +87,7 @@ enum class ProfilerEvent { kPyNativeFrontendTask, kPyNativeBackendTask, kPyNativeDeviceTask, + kPyNativeLaunchTask, kPyNativeBpropTask, // PyNative inner Event kPyNativeGilAcquire, diff --git a/mindspore/ccsrc/include/common/utils/utils.h b/mindspore/ccsrc/include/common/utils/utils.h index cf446332c66b181aeb4d45b2a3c27ae47e059d9a..2d44fb8a824a3e9319ce6476c84ac96e9a3417e6 100644 --- a/mindspore/ccsrc/include/common/utils/utils.h +++ b/mindspore/ccsrc/include/common/utils/utils.h @@ -169,7 +169,7 @@ constexpr auto kAttrRecordEvent = "record_event"; constexpr auto kAttrWaitEvent = "wait_event"; constexpr auto kAttrRecordEventStream = "record_event_stream"; constexpr auto kAttrWaitEventStream = "wait_event_stream"; -constexpr auto kAttrRecrodEventStreamPair = "record_wait_stream_pair"; +constexpr auto kAttrRecordWaitEventStreamPairId = "record_wait_event_stream_pair_id"; constexpr auto kAttrInputMultiStreamSafe = "input_multi_thread_safe"; constexpr auto kAttrStream = "stream"; constexpr auto kAttrIndex = "index"; diff --git a/mindspore/ccsrc/kernel/kernel.cc b/mindspore/ccsrc/kernel/kernel.cc index 1347244f544a5d6dc6a73addbcce990f4ba20b35..aa4cdd17a4436f50fb1e305c88495eb754b63f7e 100644 --- a/mindspore/ccsrc/kernel/kernel.cc +++ b/mindspore/ccsrc/kernel/kernel.cc @@ -618,7 +618,7 @@ int KernelMod::Resize(const std::vector &inputs, const std::vect const auto &shape = output->GetShapeVector(); if (!IsValidShape(shape)) { - MS_LOG(ERROR) << "Invalid shape:" << mindspore::ToString(shape) << ", kernel name:" << kernel_name(); + MS_LOG(WARNING) << "Invalid shape:" << mindspore::ToString(shape) << ", kernel name:" << kernel_name(); // Note: // If output shape is unknown, the op is a compute-depended op, and the output_size_list_ can be set by default // size: type_size. diff --git a/mindspore/ccsrc/kernel/pyboost/customize/divmod.cc b/mindspore/ccsrc/kernel/pyboost/customize/divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..c6023bdd55792126ed951b5f6f7db0db3f7119bb --- /dev/null +++ b/mindspore/ccsrc/kernel/pyboost/customize/divmod.cc @@ -0,0 +1,105 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "kernel/pyboost/pyboost_utils.h" +#include "mindspore/core/ops/framework_ops.h" +#include "mindspore/core/ops/math_ops.h" +#include "mindspore/ccsrc/kernel/pyboost/customize/divmod.h" +#include "kernel/pyboost/auto_generate/div.h" +#include "ops/op_enum.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +namespace { +void FloorDivCall(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, const BaseTensorPtr &y_tensor, + void *stream) { + MS_EXCEPTION_IF_NULL(op); + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor, y_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, y_tensor, stream]() { + MS_LOG(DEBUG) << "Run device task DivMod-FloorDiv' start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + + PyBoostUtils::MallocOpInputs(device_context, x_tensor, y_tensor); + PyBoostUtils::MallocOpOutputs(device_context, outputs); + + std::vector input_abs{x_tensor->ToAbstract(), y_tensor->ToAbstract()}; + const auto &input_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, x_tensor, y_tensor); + const auto &output_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); + + const auto primitive = std::make_shared(prim::kPrimFloorDiv->name()); + PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, op->stream_id()); + MS_LOG(DEBUG) << "Run device task DivMod-FloorDiv end"; + })); +} + +void TruncCall(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, void *stream) { + MS_EXCEPTION_IF_NULL(op); + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor, stream]() { + MS_LOG(DEBUG) << "For 'DivMod', the gpu task 'Trunc' start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + + PyBoostUtils::MallocOpInputs(device_context, input_tensor); + PyBoostUtils::MallocOpOutputs(device_context, outputs); + + std::vector input_abs{input_tensor->ToAbstract()}; + const auto &input_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, input_tensor); + const auto &output_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); + + const auto primitive = std::make_shared(prim::kPrimTrunc->name()); + PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, op->stream_id()); + MS_LOG(DEBUG) << "Run device task DivMod-Trunc end"; + })); +} +} // namespace +tensor::BaseTensorPtr DivModCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, const std::optional &rounding_mode, + void *stream) { + OpRunner::InferOpOutput(op, x_tensor, y_tensor, rounding_mode); + + auto mode = 0; + if (rounding_mode.has_value()) mode = GetValue(rounding_mode.value()); + + if (mode == ops::RoundingMode::FLOOR) { + FloorDivCall(op, x_tensor, y_tensor, stream); + } else { + const auto &div_op = CREATE_PYBOOST_OP(Div, op->device_context()->device_context_key_.device_name_); + div_op->Call(x_tensor, y_tensor); + + if (mode == ops::RoundingMode::TRUNC) { + TruncCall(op, div_op->outputs()[0], stream); + } else { + op->set_input_abs({x_tensor->ToAbstract()}); + op->set_output_abs(div_op->output_abs()); + op->set_outputs(div_op->outputs()); + } + } + + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/kernel/pyboost/customize/divmod.h b/mindspore/ccsrc/kernel/pyboost/customize/divmod.h new file mode 100644 index 0000000000000000000000000000000000000000..17721c5c8815b1d2d6ad0f5af08299d3352c0e41 --- /dev/null +++ b/mindspore/ccsrc/kernel/pyboost/customize/divmod.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ +#define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr BACKEND_EXPORT DivModCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode, void *stream); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ diff --git a/mindspore/ccsrc/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/kernel/pyboost/customize/identity.cc index 68e889044cd11473c3c44179ec39ed1105d41dc1..447a47c43ea0139966326ecb22f0cce968604a6a 100644 --- a/mindspore/ccsrc/kernel/pyboost/customize/identity.cc +++ b/mindspore/ccsrc/kernel/pyboost/customize/identity.cc @@ -22,10 +22,9 @@ namespace mindspore { namespace kernel { namespace pyboost { -void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, - void *stream) { +void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, const BaseTensorPtr &x_tensor) { // Async - PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, stream]() { + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor]() { MS_LOG(DEBUG) << "Run device task Identity start"; auto device_context = op->device_context(); const auto &outputs = op->outputs(); @@ -50,7 +49,8 @@ void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, device::DeviceAddressPtrList output_device_address_list{launch_device_address}; const auto &output_address_info = std::make_pair(output_kernel_tensor_list, output_device_address_list); - PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, + op->stream_id()); auto output_address = std::dynamic_pointer_cast(outputs[0]->device_address()); output_address->SetStorageInfo(input_x_address->GetStorageInfo()); output_address->set_ptr(launch_device_address->GetMutablePtr()); @@ -58,9 +58,9 @@ void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, })); } -void IdentityCustomizeCall(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, void *stream) { +void IdentityCustomizeCall(const std::shared_ptr &op, const BaseTensorPtr &x_tensor) { // Async - PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, stream]() { + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor]() { MS_LOG(DEBUG) << "Run device task Identity start"; auto device_context = op->device_context(); const auto &outputs = op->outputs(); @@ -78,13 +78,13 @@ void IdentityCustomizeCall(const std::shared_ptr &op, const BaseTensor const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); - PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, + op->stream_id()); MS_LOG(DEBUG) << "Run device task Identity end"; })); } -tensor::BaseTensorPtr IdentityCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, - void *stream) { +tensor::BaseTensorPtr IdentityCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor) { OpRunner::InferOpOutput(op, x_tensor); PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor); @@ -92,10 +92,10 @@ tensor::BaseTensorPtr IdentityCustomize(const std::shared_ptr &op, con if (x_tensor->is_contiguous()) { MS_LOG(DEBUG) << "Run Identity input contiguous"; - IdentityCustomizeCall(op, x_tensor, stream); + IdentityCustomizeCall(op, x_tensor); } else { MS_LOG(DEBUG) << "Run Identity input without contiguous"; - IdentityCustomizeCallWithoutContigous(op, x_tensor, stream); + IdentityCustomizeCallWithoutContigous(op, x_tensor); } return op->output(0); } diff --git a/mindspore/ccsrc/kernel/pyboost/customize/identity.h b/mindspore/ccsrc/kernel/pyboost/customize/identity.h index 790f0340c5ca1ce831d65a4369df1719b851e2e1..2894432ee525d30698a281156a000751b9395d3f 100644 --- a/mindspore/ccsrc/kernel/pyboost/customize/identity.h +++ b/mindspore/ccsrc/kernel/pyboost/customize/identity.h @@ -27,7 +27,7 @@ namespace mindspore { namespace kernel { namespace pyboost { tensor::BaseTensorPtr BACKEND_EXPORT IdentityCustomize(const std::shared_ptr &op, - const BaseTensorPtr &x_tensor, void *stream = nullptr); + const BaseTensorPtr &x_tensor); } // namespace pyboost } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/kernel/pyboost/customize/op_common.cc b/mindspore/ccsrc/kernel/pyboost/customize/op_common.cc index 3a7eaf699edaaba40d9462bf096dcd8e4ad16117..233c351aee049c404a12163111aa5590fd09b7e5 100644 --- a/mindspore/ccsrc/kernel/pyboost/customize/op_common.cc +++ b/mindspore/ccsrc/kernel/pyboost/customize/op_common.cc @@ -24,8 +24,7 @@ namespace mindspore { namespace kernel { namespace pyboost { -tensor::BaseTensorPtr CopyCustomizeCall(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, - void *stream) { +tensor::BaseTensorPtr CopyCustomizeCall(const std::shared_ptr &op, const BaseTensorPtr &input_tensor) { MS_LOG(DEBUG) << "Call start"; MS_EXCEPTION_IF_NULL(input_tensor); @@ -44,46 +43,44 @@ tensor::BaseTensorPtr CopyCustomizeCall(const std::shared_ptr &op, con // Create device address for output tensors PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); - // Async - PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor, stream]() { - auto device_context = op->device_context(); - const auto &outputs = op->outputs(); - - // Malloc for input tensors - PyBoostUtils::MallocOpInputs(device_context, input_tensor); - // Malloc for output tensors - PyBoostUtils::MallocOpOutputs(device_context, outputs); - - const auto &input_device_sync = input_tensor->device_address(); - MS_EXCEPTION_IF_NULL(input_device_sync); - if (input_device_sync->GetTensorStorageInfo() == nullptr) { - op->set_primitive(prim::kPrimTensorMove); - // Get inputs kernel tensors, the not-tensor value will malloc here - const auto &input_address_info = - PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), op->input_abs(), input_tensor); - // Get outputs kernel tensors - const auto &output_address_info = - PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); - - const auto &output_device_address = - std::dynamic_pointer_cast(op->output(0)->device_address()); - MS_EXCEPTION_IF_NULL(output_device_address); - if (output_device_address->GetSize() != 0) { - // Call kPrimTensorMove if input device address size if not 0. - PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, - stream); - } - } else { - const auto &input_address = std::dynamic_pointer_cast(input_tensor->device_address()); - const auto &output_address = std::dynamic_pointer_cast(op->output(0)->device_address()); - if (!device_context->GetKernelExecutor(false)->ExecuteKernelTask( - runtime::KernelTaskType::kCONTIGUOUS_TASK, {input_address}, {output_address}, op->stream_id())) { - MS_LOG(EXCEPTION) << "ExecuteKernelTask failed, task_type:" << runtime::KernelTaskType::kCONTIGUOUS_TASK; - } + runtime::OpExecutor::GetInstance().WaitAll(); + auto device_context = op->device_context(); + const auto &op_outputs = op->outputs(); + + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, input_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, op_outputs); + + const auto &input_device_sync = input_tensor->device_address(); + MS_EXCEPTION_IF_NULL(input_device_sync); + if (input_device_sync->GetTensorStorageInfo() == nullptr) { + op->set_primitive(prim::kPrimTensorMove); + // Get inputs kernel tensors, the not-tensor value will malloc here + const auto &input_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), op->input_abs(), input_tensor); + // Get outputs kernel tensors + const auto &output_address_info = + PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, op_outputs); + + const auto &output_device_address = + std::dynamic_pointer_cast(op->output(0)->device_address()); + MS_EXCEPTION_IF_NULL(output_device_address); + if (output_device_address->GetSize() != 0) { + // Call kPrimTensorMove if input device address size if not 0. + PyBoostUtils::LaunchKernel(op->primitive(), op->device_context(), input_address_info, output_address_info, + op->stream_id()); + } + } else { + const auto &input_address = std::dynamic_pointer_cast(input_tensor->device_address()); + const auto &output_address = std::dynamic_pointer_cast(op->output(0)->device_address()); + if (!device_context->GetKernelExecutor(false)->ExecuteKernelTask( + runtime::KernelTaskType::kCONTIGUOUS_TASK, {input_address}, {output_address}, op->stream_id())) { + MS_LOG(EXCEPTION) << "ExecuteKernelTask failed, task_type:" << runtime::KernelTaskType::kCONTIGUOUS_TASK; } + } - MS_LOG(DEBUG) << "Launch end"; - })); + MS_LOG(DEBUG) << "Launch end"; return op->output(0); } diff --git a/mindspore/ccsrc/kernel/pyboost/customize/op_common.h b/mindspore/ccsrc/kernel/pyboost/customize/op_common.h index a8d0ed217077e77b0c3469243430ae671590d0c0..4bada63cf704c7889c9efbe733a80393cb79f3f0 100644 --- a/mindspore/ccsrc/kernel/pyboost/customize/op_common.h +++ b/mindspore/ccsrc/kernel/pyboost/customize/op_common.h @@ -30,7 +30,7 @@ namespace kernel { namespace pyboost { // Common call for copy op in cpu and gpu. tensor::BaseTensorPtr BACKEND_EXPORT CopyCustomizeCall(const std::shared_ptr &op, - const BaseTensorPtr &input_tensor, void *stream); + const BaseTensorPtr &input_tensor); // If the tensor is continuous, return the cloned tensor and set the op info. If the tensor is not continuous, // return nullptr and do nothing. tensor::BaseTensorPtr BACKEND_EXPORT ContiguousTensorOpProcess(const std::shared_ptr &op, diff --git a/mindspore/ccsrc/kernel/pyboost/pyboost_utils.cc b/mindspore/ccsrc/kernel/pyboost/pyboost_utils.cc index 104babb916840868aba9bfc083688204c6089fb1..8942ebe44212a26806b4568406cfbf5ea446c0c5 100644 --- a/mindspore/ccsrc/kernel/pyboost/pyboost_utils.cc +++ b/mindspore/ccsrc/kernel/pyboost/pyboost_utils.cc @@ -197,6 +197,14 @@ DeviceSyncPtr PyBoostUtils::ContiguousByDeviceAddress(const DeviceSyncPtr &devic return new_device_address; } +void PyBoostUtils::CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, + const TensorStorageInfoPtrList &storage_info_list, + std::vector *outputs) { + for (auto &storage_info : storage_info_list) { + CreateOutputTensor(device_context, input, storage_info, outputs); + } +} + void PyBoostUtils::CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs) { @@ -360,7 +368,7 @@ PyboostKernelExtraFuncFactory &PyboostKernelExtraFuncFactory::GetInstance() { void PyBoostUtils::LaunchKernel(const PrimitivePtr &primitive, const DeviceContext *device_context, const AddressInfoPair &input_address_info, const AddressInfoPair &output_address_info, - void *stream_ptr) { + size_t stream_id) { const auto &real_name = primitive->name(); // KernelMod init auto kernel_mod = PyBoostUtils::CreateKernelMod(primitive, real_name, device_context, input_address_info.first, @@ -376,6 +384,7 @@ void PyBoostUtils::LaunchKernel(const PrimitivePtr &primitive, const DeviceConte const auto &workspace_kernel_tensors = PyBoostUtils::GetKernelTensorFromAddress(workspace_device_address); const auto &device_name = device_context->device_context_key().device_name_; + void *stream_ptr = device_context->device_res_manager_->GetStream(stream_id); if (!PyboostKernelExtraFuncFactory::GetInstance().IsEnableProfiler(device_name)) { if (!kernel_mod->Launch(input_address_info.first, workspace_kernel_tensors, output_address_info.first, stream_ptr)) { @@ -399,6 +408,8 @@ void PyBoostUtils::LaunchKernel(const PrimitivePtr &primitive, const DeviceConte if (kernel_mod->IsNeedUpdateOutputShapeAndSize()) { kernel_mod->UpdateOutputShapeAndSize(input_address_info.first, output_address_info.first); } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(real_name, device_context, stream_id, input_address_info.first, + output_address_info.first); MS_LOG(DEBUG) << real_name << " Launch end"; } diff --git a/mindspore/ccsrc/kernel/pyboost/pyboost_utils.h b/mindspore/ccsrc/kernel/pyboost/pyboost_utils.h index a20dc4b88ea2a34d901772abf2140bcbc9ec5433..3cf61edbff935d44a52758f42582976727da3cad 100644 --- a/mindspore/ccsrc/kernel/pyboost/pyboost_utils.h +++ b/mindspore/ccsrc/kernel/pyboost/pyboost_utils.h @@ -68,6 +68,9 @@ class BACKEND_EXPORT PyBoostUtils { // Create output tensors static void CreateOutputTensor(const AbstractBasePtr &abstract, std::vector *outputs); + static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, + const TensorStorageInfoPtrList &storage_info_list, + std::vector *outputs); static void CreateOutputTensor(const DeviceContext *device_context, const tensor::BaseTensorPtr &input, const TensorStorageInfoPtr &storage_info, std::vector *outputs); static void CreateOutputTensor(const ValueSimpleInfoPtr &output_value_simple_info, @@ -117,7 +120,7 @@ class BACKEND_EXPORT PyBoostUtils { static void LaunchKernel(const PrimitivePtr &primitive, const device::DeviceContext *device_context, const AddressInfoPair &input_address_info, const AddressInfoPair &output_address_info, - void *stream_ptr = nullptr); + size_t stream_id = kDefaultStreamIndex); static void GetKernelTensor(const DeviceContext *device_context, size_t stream_id, size_t index, std::vector *kernel_tensor_list, diff --git a/mindspore/ccsrc/kernel/pyboost/template/pyboost_view_template.tpl b/mindspore/ccsrc/kernel/pyboost/template/pyboost_view_template.tpl index 64dd64a818b77400860863d7c95b9628bd7a329d..a57a31d2df4de2f8601282b30e53fc25bdab8797 100644 --- a/mindspore/ccsrc/kernel/pyboost/template/pyboost_view_template.tpl +++ b/mindspore/ccsrc/kernel/pyboost/template/pyboost_view_template.tpl @@ -4,7 +4,7 @@ auto op = get_op(); // Create device address for input tensors PyBoostUtils::PrepareOpInputs(device_context_, op->stream_id(), ${call_tensors}); - PyBoostUtils::CreateOutputTensor(device_context_, ${input}, storage_info_list[0], &outputs_); + PyBoostUtils::CreateOutputTensor(device_context_, ${input}, storage_info_list, &outputs_); // Async PyBoostUtils::DispatchRun( diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_image_utils.cc index 70a5713ecf351b9f7760baa5fb763216fd6b4b37..e9c967a21fab680b55e11d80a8cf63b3d798d0a8 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/dvpp_image_utils.cc @@ -1632,11 +1632,10 @@ APP_ERROR DvppVerticalFlip(const std::shared_ptr &input, // acl APP_ERROR GetSocName(std::string *soc_name) { - const char *soc_name_c = aclrtGetSocName(); - if (soc_name_c == nullptr) { + *soc_name = MsContext::GetInstance()->ascend_soc_name(); + if (soc_name->empty()) { *soc_name = ""; } - *soc_name = std::string(soc_name_c); return APP_ERR_OK; } diff --git a/mindspore/ccsrc/pipeline/jit/pi/common.cc b/mindspore/ccsrc/pipeline/jit/pi/common.cc index 2e9422c22260fff0e41d4e5944f6be580ae9a645..4d6418a917f7bb7ce90497b23bbbf92852a274fb 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/common.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/common.cc @@ -1590,4 +1590,37 @@ py::object get_code_extra(const py::object &func) { return result; } +size_t FunctionId(const py::object &callable) { + PyObject *op = callable.ptr(); + if (PyMethod_Check(op)) { + op = PyMethod_GET_FUNCTION(op); + } + if (PyInstanceMethod_Check(op)) { + op = PyInstanceMethod_GET_FUNCTION(op); + } + void *result = op; + if (PyCFunction_Check(op)) { + // types.BuiltinFunctionType = type(len) same as types.BuiltinMethodType = type(list().append) + PyCFunction func = PyCFunction_GET_FUNCTION(op); + result = reinterpret_cast(func); + } else if (Py_IS_TYPE(op, &PyMethodDescr_Type)) { + // types.MethodDescriptorType = type(list.append) + PyCFunction func = reinterpret_cast(op)->d_method->ml_meth; + result = reinterpret_cast(func); + } else if (Py_IS_TYPE(op, &PyWrapperDescr_Type)) { + // types.WrapperDescriptorType = type(object.__init__) + result = reinterpret_cast(op)->d_wrapped; + } else if (Py_IS_TYPE(op, &_PyMethodWrapper_Type)) { + // types.WrapperDescriptorType = type(object().__str__) + PyObject *self = PyObject_GetAttrString(op, "__self__"); + PyObject *attr = PyObject_GetAttrString(op, "__name__"); + PyObject *descr = PyObject_GetAttr(reinterpret_cast(Py_TYPE(self)), attr); + result = reinterpret_cast(descr)->d_wrapped; + Py_DECREF(self); + Py_DECREF(attr); + Py_DECREF(descr); + } + return reinterpret_cast(result); +} + } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/jit/pi/external.h b/mindspore/ccsrc/pipeline/jit/pi/external.h index 7ada194ea62d64233a620b655e395cf0c2c70745..520bb493ae3956c5b039ebee1cf34a080d751b1a 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/external.h +++ b/mindspore/ccsrc/pipeline/jit/pi/external.h @@ -26,6 +26,7 @@ py::bool_ pi_jit_disable(); py::bool_ pi_jit_should_compile(const py::object &func, const py::object &tag); py::object get_code_extra(const py::object &); void update_pijit_default_config(const py::kwargs &conf); +size_t FunctionId(const py::object &callable); #if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION < 9) MS_API PyObject *EvalFrame(PyFrameObject *f, int exc); diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.cc index a2f29c38c7f83e210f470e6b3705fb2d84b66947..f182e236b5fd675b973d0d7d41ba42a4e4290e34 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/abstract_object.cc @@ -223,10 +223,6 @@ AbstractObjectBase::Type AbstractObjectBase::GetPyType(PyObject *o) { if (o == nullptr) { return kTypeAnyValue; } - py::object obj = py::cast(o); - if (py::hasattr(obj, PYTHON_PRIMITIVE_FUNCTION_FLAG)) { - return kTypePrimitiveFunction; - } FIND_MAP_CACHE(const_object_type_map, o); if (PyLong_Check(o)) { return (Py_ABS(Py_SIZE(o)) > 2) ? kTypeAnyValue : kTypeInt; @@ -239,7 +235,7 @@ AbstractObjectBase::Type AbstractObjectBase::GetMsType(PyTypeObject *tp) { {IsStubTensorType, kTypeStubTensor}, {IsTensorType, kTypeTensor}, {IsCellListType, kTypeNNCellList}, {IsCellType, kTypeCell}, {IsPrimitiveType, kTypePrimitive}, {IsMetaFuncGraphType, kTypeMetaFuncGraph}, - {IsMSDTypeType, kTypeMSDType}, + {IsMSDTypeType, kTypeMSDType}, {IsPrimitiveFunctionType, kTypePrimitiveFunction}, }; if (tp == nullptr) { return kTypeAnyValue; diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.cc index 898548376c7ba5603d6dc26a7f4cf051ceffa9b6..50864ad5edcff1699905c0174d804c7914ef5b40 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_analyzer.cc @@ -29,8 +29,6 @@ namespace mindspore { namespace pijit { -extern bool CheckMSConstexpr(const py::object &func); -extern bool CheckJitConstexpr(const py::object &func); extern TracePtr GetTrace(ValueNode *node, bool strict, bool print, int depth, int max_depth); const int kMsFlagSet = AObject::kMsFlagGradFunc | AObject::kMsFlagStandardFunc | AObject::kMsFlagShardFunc | diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.cc index 32bdfbe1331feb8928309dee04c22a756e6d5605..5a516fcc5604afe1409f9614b0ead558c2d111f8 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.cc @@ -210,10 +210,14 @@ bool GraphBuilder::IsByteCodeImplemented(int bytecode) { } bool GraphBuilder::ReplaceAll(ValueNode *old_node, ValueNode *new_node) { + static const std::set ref_op = { + BUILD_TUPLE, BUILD_LIST, BUILD_SET, BUILD_MAP, BUILD_CONST_KEY_MAP, + }; + // check reference relationship const auto &nodes = graph_->GetTracedNodes(); bool find = std::any_of(nodes.begin(), nodes.end(), [&old_node](ValueNode *node) { - if (Utils::IsGeneralNoSideEffectOp(node->GetOpcode())) { + if (Utils::IsGeneralNoSideEffectOp(node->GetOpcode()) && ref_op.find(node->GetOpcode()) == ref_op.end()) { return false; } const auto &args = node->getInputs(); @@ -1555,7 +1559,6 @@ py::object GraphBuilder::GetFuncInfo(ValueNode *func_node) { bool GraphBuilder::WhiteListFuncCheckAndInfer(CallNode *call_node, const py::object &callable) { const auto &conf = call_node->GetGraph()->Config(); - bool cell_inline = conf.GetBoolConfig(GraphJitConfig::kReplaceNNCellByConstruct); AObject::Type vobj_type = call_node->input(0)->GetVobj()->GetType(); if (vobj_type == AObject::kTypeCell) { current_block_->SetTrackResult(Block::kTrackHasOpsPrimitive); @@ -1565,7 +1568,6 @@ bool GraphBuilder::WhiteListFuncCheckAndInfer(CallNode *call_node, const py::obj } } - // handle special function, not inline bool infer_primitive = conf.GetBoolConfig(GraphJitConfig::kInferPrimitive); int max_infer = conf.getIntConfig(GraphJitConfig::kInferPrimitiveMax); if (max_infer != 0 && infer_func_count >= max_infer) { @@ -1574,31 +1576,38 @@ bool GraphBuilder::WhiteListFuncCheckAndInfer(CallNode *call_node, const py::obj infer_func_count++; } infer_primitive &= (conf.getIntConfig(GraphJitConfig::kInferPrimitiveMask) & infer_primitive_func) != 0; - std::string special_func_key; - if (IsFuncInWhiteList(callable, &special_func_key, infer_primitive)) { - call_node->SetSubGraph(NewGraph(nullptr, nullptr)); - call_node->GetSubGraph()->SetGuard(root_->GetGraph()->GetGuard()); - if (!HandleFuncInWhiteList(special_func_key, call_node)) { - return false; - } - if (call_node->GetSubGraph() == nullptr) { - call_node->SetInlineReason(InlineReason::kInlineFuncSpecialize); - } else { - MS_EXCEPTION_IF_NULL(call_node->GetSubGraph()->GetRetVal()); - call_node->SetInlineReason(InlineReason::kInline); - seek(0) = call_node->GetSubGraph()->GetRetVal(); - } - return true; - } - - // set node info before return - if (vobj_type == AObject::kTypePrimitive || (vobj_type == AObject::kTypeCell && !cell_inline)) { + if (!infer_primitive && vobj_type == AObject::kTypePrimitive) { call_node->SetVobj(AObject::MakeAObject(AObject::kTypeTensor)); call_node->SetInlineReason(InlineReason::kInlineGraphSupportedByMS); current_block_->SetTrackResult(Block::kTrackHasOpsPrimitive); return true; } - return false; + + InferFunc infer_func = FindInferFunc(callable); + if (infer_func == nullptr) { + return false; + } + + call_node->SetInlineReason(InlineReason::kInlineUnknown); + call_node->SetSubGraph(NewGraph(nullptr, nullptr)); + call_node->GetSubGraph()->SetGuard(root_->GetGraph()->GetGuard()); + infer_func(call_node); + + if (!HandleSideEffectOfFuncInWhiteList(call_node, infer_func)) { + return false; + } + InlineReason r; + if (call_node->GetSubGraph() == nullptr) { + r = InlineReason::kInlineFuncSpecialize; + } else { + MS_EXCEPTION_IF_NULL(call_node->GetSubGraph()->GetRetVal()); + r = InlineReason::kInline; + seek(0) = call_node->GetSubGraph()->GetRetVal(); + } + if (call_node->GetInlineReason() == InlineReason::kInlineUnknown) { + call_node->SetInlineReason(r); + } + return true; } bool UnsupportedCodeTypeCheck(PyCodeObject *co) { @@ -2598,6 +2607,9 @@ py::object GraphBuilder::ResolveCallable(CallNode *call_node, StopTraceReason *s } if (WhiteListFuncCheckAndInfer(call_node, callable_info)) { + if (call_node->GetInlineReason() == InlineReason::kInlineFunc_Type_Unsupported) { + *stop_reason = StopTraceReason::kStopTraceFunc_Type_Unsupported; + } return py::object(); } @@ -3219,34 +3231,7 @@ static void SetGradFuncInfo(CallNode *call_node) { void GraphBuilder::DumpDFG() { GRAPH_JIT_LOG_F("%s", graph_->ToString().c_str()); } -bool GraphBuilder::IsFuncInWhiteList(const py::object &f, std::string *special_func_key, bool bInferPrimitive) { - if (f.ptr() == nullptr) { - return false; - } - *special_func_key = GetFuncName(f); - auto FuncWhiteListMap = GetFuncWhiteListMap(); - auto iter = FuncWhiteListMap.find(*special_func_key); - if (iter != FuncWhiteListMap.end() && iter->second.check(f)) { - return true; - } - auto fuzzmatcher = GetFuncWhiteListFuzzyMatcher(); - auto tar = std::find_if(fuzzmatcher.begin(), fuzzmatcher.end(), - [&f](const std::pair &i) { return i.first(f); }); - if (tar != fuzzmatcher.end()) { - *special_func_key = tar->second; - return true; - } - if (bInferPrimitive && CheckPrimitive(f)) { - *special_func_key = GetMindsporeNamePrimitive(); - return true; - } - return false; -} - -bool GraphBuilder::HandleFuncInWhiteList(const std::string &key, CallNode *call_node) { - const auto &infer_func = GetFuncWhiteListMap().find(key)->second.infer; - infer_func(call_node); - +bool GraphBuilder::HandleSideEffectOfFuncInWhiteList(CallNode *call_node, InferFunc infer_func) { // handle white list side-effects ValueNode *old_node = nullptr; ValueNode *new_node = nullptr; @@ -3265,31 +3250,6 @@ bool GraphBuilder::HandleFuncInWhiteList(const std::string &key, CallNode *call_ return true; } -bool MindGraphBuilder::IsFuncInWhiteList(const py::object &f, std::string *special_func_key) { - if (f.ptr() == nullptr) { - return false; - } - *special_func_key = GetFuncName(f); - auto MindFuncWhiteListMap = GetFuncWhiteListMap(true); - auto iter = MindFuncWhiteListMap.find(*special_func_key); - if (iter != MindFuncWhiteListMap.end() && iter->second.check(f)) { - return true; - } - auto fuzzmatcher = GetFuncWhiteListFuzzyMatcher(true); - auto tar = std::find_if(fuzzmatcher.begin(), fuzzmatcher.end(), - [&f](const std::pair &i) { return i.first(f); }); - if (tar != fuzzmatcher.end()) { - *special_func_key = tar->second; - return true; - } - return false; -} - -bool MindGraphBuilder::HandleFuncInWhiteList(const std::string &key, CallNode *n) { - MS_LOG(INFO) << "specialize for " << key; - return GetFuncWhiteListMap(true).find(key)->second.infer(n); -} - LocationPtr MindGraphBuilder::GetLocation(CallNode *call_node) const { auto file_name = py::cast(graph_->GetCodeObj()->co_filename); auto line_no = call_node->GetLineNo(); @@ -3298,11 +3258,11 @@ LocationPtr MindGraphBuilder::GetLocation(CallNode *call_node) const { } bool MindGraphBuilder::WhiteListFuncCheckAndInfer(CallNode *call_node, const py::object &callable) { - std::string special_func_key; - if (IsFuncInWhiteList(callable, &special_func_key)) { + InferFunc infer_func = FindInferFunc(callable, trace_flag()); + if (infer_func != nullptr) { call_node->SetSubGraph(NewGraph(nullptr, nullptr)); call_node->GetSubGraph()->SetGuard(root_->GetGraph()->GetGuard()); - bool has_sub_graph = HandleFuncInWhiteList(special_func_key, call_node); + bool has_sub_graph = infer_func(call_node); if (!has_sub_graph) { call_node->SetInlineReason(InlineReason::kInlineFuncSpecialize); MS_ASSERT(!call_node->GetSubGraph()); // check infer function diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h index 7151bbe692955a90b8cd179bc2d1938b1de9f1f0..2eadb24a039342dd256705fe6797af04fce72b2a 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/graph_build.h @@ -23,6 +23,7 @@ #include #include "pipeline/jit/pi/graph_capture/graph.h" #include "pipeline/jit/pi/graph_build/func_graph_builder.h" +#include "pipeline/jit/pi/graph_capture/special_func_infer.h" #include "utils/convert_utils_base.h" namespace mindspore { @@ -283,14 +284,7 @@ class GraphBuilder { bool NotImplementBytecode(const Instr &instr); static const std::unordered_map bytecode_meth_map_; - // check the function is special function that mindspore support and not inline, - // the return values or type can be infer - // set key for handler - bool IsFuncInWhiteList(const py::object &f, std::string *special_func_key, bool bInferPrimitive); - - // infer the return value of special function and generate subgraph, or clear subgraph - // return true if special function has subgraph - virtual bool HandleFuncInWhiteList(const std::string &key, CallNode *n); + bool HandleSideEffectOfFuncInWhiteList(CallNode *call_node, InferFunc); }; class MindGraphBuilder : public GraphBuilder { @@ -328,8 +322,6 @@ class MindGraphBuilder : public GraphBuilder { private: std::vector GetNewArgs(CallNode *call_node, AObject *vobj = nullptr); - bool IsFuncInWhiteList(const py::object &f, std::string *special_func_key); - bool HandleFuncInWhiteList(const std::string &key, CallNode *n) override; bool AllConstantArgs(const std::vector &args, const py::object &callable_info, CallNode *call_node); private: diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.cc index 2bf883e1b587360ddb4bed9fff6a510277f62c12..1b9b5a06b2060537585212afed25f71f11209538 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.cc @@ -38,88 +38,14 @@ extern AObject *InferFuncResult(const py::object &func, const std::vector tensor_module = {"mindspore.common.tensor", "mindtorch.torch.tensor"}; -static const std::vector bypass_function_whilelist = {kMindsporeNameTensorInitCheck, - kMindsporeNameTensorContiguous}; - -static py::object GetGradClass() { return Utils::GetModuleAttr("mindspore._c_expression", "GradOperation_"); } - -const char *GetFuncName(const py::object &f) { - PyObject *func = f.ptr(); - if (func == nullptr) { - return ""; - } - if (PyMethod_Check(func)) { - func = PyMethod_GET_FUNCTION(func); - } - if (PyCFunction_Check(func)) { - return reinterpret_cast(func)->m_ml->ml_name; - } - PyCodeObject *co = nullptr; - if (PyFunction_Check(func)) { - co = reinterpret_cast(PyFunction_GET_CODE(func)); - } - if (co) { - return PyUnicode_AsUTF8(co->co_name); - } - PyTypeObject *tp = PyType_Check(func) ? reinterpret_cast(func) : Py_TYPE(func); - const char *res = strrchr(tp->tp_name, '.'); - return res ? res + 1 : tp->tp_name; -} +static bool CheckConstexpr(const py::object &func); template -bool SetCallResType(CallNode *call_node) { +static bool SetCallResType(CallNode *call_node) { call_node->SetVobj(AObject::MakeAObject(type)); call_node->SetSubGraph(nullptr); return false; @@ -139,19 +65,22 @@ bool JustCallAndSetRes(CallNode *call_node) { return SetCallResType(call_node); } + pi_jit_disable(); PyObject *value = PyObject_Call(func.ptr(), pair.first.ptr(), pair.second.ptr()); if (PyErr_Occurred()) { MS_LOG(ERROR) << "got an error " << py::error_already_set().what() << " at call the " << std::string(py::str(func.ptr())); PyErr_Clear(); } + pi_jit_enable(); + call_node->SetVobj(AObject::Convert(value)); call_node->SetSubGraph(nullptr); Py_XDECREF(value); return false; } -bool CallNodeReturnConst(CallNode *call_node, Graph *sub_graph, AObject *value) { +static bool CallNodeReturnConst(CallNode *call_node, Graph *sub_graph, AObject *value) { PyObject *cnst = value->GetPyObject().ptr(); MS_EXCEPTION_IF_NULL(cnst); @@ -204,19 +133,6 @@ bool GuardConstCallNodeParam(CallNode *call_node, Graph *sub_graph, int max_guar return true; } -static bool CheckConvertMap(const py::object &func) { - if (func.ptr() == nullptr || !PyFunction_Check(func.ptr())) { - return false; - } - py::object tmp = Utils::GetModuleAttr("mindspore._extends.parse.resources", "convert_object_map"); - auto dict_obj = py::cast(tmp); - if (dict_obj.contains(func)) { - return true; - } else { - return false; - } -} - static bool InferConvertMap(CallNode *call_node) { AObject *func_info = call_node->input(0)->GetVobj(); func_info->SetMsFlag(AObject::kMsFlagStandardFunc); @@ -266,17 +182,7 @@ static bool InferConvertMap(CallNode *call_node) { return false; } -bool CheckGetCachePrim_(const py::object &f) { - if (!PyFunction_Check(f.ptr())) { - return false; - } - auto func_ptr = reinterpret_cast(f.ptr()); - std::string name = PyUnicode_AsUTF8(func_ptr->func_module); - bool is_func = name == "mindspore.ops._primitive_cache"; - return is_func; -} - -bool InferGetCachePrim_(CallNode *n) { +static bool InferGetCachePrim(CallNode *n) { // just return the first parameter of _get_cache_prim Graph *g = n->GetSubGraph(); n->SetVobj(n->input(1)->GetVobj()); @@ -284,52 +190,6 @@ bool InferGetCachePrim_(CallNode *n) { return true; } -bool IsTensorModule(const std::string &name) { - return std::any_of(tensor_module.begin(), tensor_module.end(), [name](const auto &item) { return item == name; }); -} - -bool IsFuncInByPassWhiteList(const std::string &name) { - return std::any_of(bypass_function_whilelist.begin(), bypass_function_whilelist.end(), - [name](const auto &item) { return item == name; }); -} - -bool CheckTensorBypass(const py::object &f) { - if (!PyMethod_Check(f.ptr())) { - return false; - } - auto func_ptr = reinterpret_cast(PyMethod_Function(f.ptr())); - std::string module = PyUnicode_AsUTF8(func_ptr->func_module); - if (IsTensorModule(module)) { - std::string func_name = GetFuncName(f); - return IsFuncInByPassWhiteList(func_name); - } - return false; -} - -bool InferTensorBypass(CallNode *n) { - if (n->input(0)->GetOpcode() != LOAD_ATTR) { - n->SetSubGraph(nullptr); - return false; - } - Graph *g = n->GetSubGraph(); - n->SetVobj(AObject::Convert(PyMethod_Self(n->input(0)->GetVobj()->GetPyObject().ptr()))); - g->SetRetVal(n->input(0)->input(0)); - return true; -} - -static bool CheckRegistryGet(const py::object &func) { - PyObject *f = func.ptr(); - if (PyMethod_Check(f)) { - f = PyMethod_GET_FUNCTION(f); - } - if (!PyFunction_Check(f)) { - return false; - } - std::string name = PyUnicode_AsUTF8(reinterpret_cast(f)->func_module); - bool is_tensor = name == "mindspore.common._register_for_tensor"; - return is_tensor; -} - static bool InferRegistryGet(CallNode *call_node) { Graph *g = call_node->GetSubGraph(); JustCallAndSetRes(call_node); @@ -341,13 +201,7 @@ static bool InferRegistryGet(CallNode *call_node) { return false; } -bool CheckPrimitive(const py::object &func) { - bool isPrimitiveType = AObject::GetPyType(func.ptr()) == AObject::kTypePrimitive; - bool isPrimitiveFunction = py::hasattr(func, PYTHON_PRIMITIVE_FUNCTION_FLAG); - return isPrimitiveType || isPrimitiveFunction; -} - -bool InferPrimitive(CallNode *call_node) { +static bool InferPrimitive(CallNode *call_node) { static const std::unordered_map not_ret_tensor_prim = { {"Prim[_get_grad_op]", AObject::kTypeMetaFuncGraph}, {"Prim[DType]", AObject::kTypeAnyValue}, @@ -359,7 +213,8 @@ bool InferPrimitive(CallNode *call_node) { PyObject *prim = call_node->input(0)->GetVobj()->GetPyObject().ptr(); std::string prim_key = std::string(py::str(prim)); if (prim_key == "Prim[_get_grad_op]") { - AbstractType *type = static_cast(AObject::Convert(GetGradClass())); + py::object grad_class = Utils::GetModuleAttr("mindspore._c_expression", "GradOperation_"); + AbstractType *type = static_cast(AObject::Convert(grad_class)); AObject *res = type != nullptr ? type->BuildAbstractInstance({}, CALL_FUNCTION) : AObject::MakeAObject(AObject::kTypeMetaFuncGraph); call_node->SetVobj(res); @@ -420,7 +275,7 @@ bool InferPrimitive(CallNode *call_node) { return false; } -bool InferGradOperation(CallNode *call_node, AObject::MindsporeFlag f) { +static bool InferGradOperation(CallNode *call_node, AObject::MindsporeFlag f) { call_node->SetSubGraph(nullptr); AObject *grad_func = AObject::MakeAObject(AObject::kTypeFunction); grad_func->SetMsFlag(f); @@ -435,12 +290,7 @@ bool InferGradOperation(CallNode *call_node, AObject::MindsporeFlag f) { return false; } -static bool CheckMetaFunc_(const py::object &o) { - PyTypeObject *tp = PyType_Check(o.ptr()) ? reinterpret_cast(o.ptr()) : Py_TYPE(o.ptr()); - return IsMetaFuncGraphType(tp); -} - -static bool InferMetaFunc_(CallNode *call_node) { +static bool InferMetaFunc(CallNode *call_node) { call_node->SetSubGraph(nullptr); const auto &vo = call_node->input(0)->GetVobj(); MS_EXCEPTION_IF_CHECK_FAIL(vo->GetType() != AObject::kTypeType, "class call is before "); @@ -607,15 +457,6 @@ static void HandleGradFunc(CallNode *call_node, const py::object &after_grad, Tr HandleGradFuncCall(call_node, AObject::Convert(decorated_func), sens_param); } -static bool CheckGradFunc(const py::object &f) { - if (!PyFunction_Check(f.ptr())) { - return false; - } - std::string decorated_name = PyUnicode_AsUTF8(reinterpret_cast(f.ptr())->func_qualname); - return decorated_name == "_Grad.__call__..after_grad" || - decorated_name == "GradOperation.__call__..after_grad"; -} - static bool InferGradFunc(CallNode *call_node) { AObject *vo = call_node->input(0)->GetVobj(); vo->SetMsFlag(AObject::kMsFlagGradFunc); @@ -630,110 +471,6 @@ static bool InferGradFunc(CallNode *call_node) { return false; } -static bool CheckJitFunc(const py::object &o) { - static const char except_file[] = "mindspore/common/api.py"; - static const size_t except_size = sizeof(except_file) - 1; - PyObject *func = o.ptr(); - if (PyMethod_Check(func)) { - func = PyMethod_GET_FUNCTION(func); - } - if (!PyFunction_Check(func)) { - return false; - } - PyCodeObject *co = reinterpret_cast(PyFunction_GET_CODE(func)); - const char *file = PyUnicode_AsUTF8(co->co_filename); - const size_t size = strlen(file); - return size > except_size && !strncmp(file + (size - except_size), except_file, except_size); -} - -static bool CheckCell(const py::object &callable_info) { - PyTypeObject *cell_type = PyType_Check(callable_info.ptr()) ? reinterpret_cast(callable_info.ptr()) - : Py_TYPE(callable_info.ptr()); - if (!IsCellType(cell_type)) { - return false; - } - py::object tp = py::cast(reinterpret_cast(cell_type)); - std::string type_str = py::str(tp.ptr()); - const auto &sets = *kPIJitConfigDefault.getSetConfig(GraphJitConfig::kPSJitStrictCells); - if (sets.find(type_str) != sets.end()) { - return true; - } - - // mindspore cells - std::string m = tp.attr("__module__").cast(); - constexpr const char except1[] = "mindspore."; - constexpr int except1_size = sizeof(except1) - 1; - if (!m.compare(0, except1_size, except1)) { - kPIJitConfigDefault.AddPSJitStrictCells(type_str); - return true; - } - return false; -} - -static bool InferCell(CallNode *call_node) { - PyTypeObject *cell_type = call_node->input(0)->GetVobj()->GetTypeObject(); - py::object tp = py::cast(reinterpret_cast(cell_type)); - - const auto &conf = call_node->GetGraph()->Config(); - py::object func = tp.attr("construct"); - - std::vector args; - std::transform(call_node->getInputs().begin(), call_node->getInputs().end(), std::back_inserter(args), - [](ValueNode *n) { return n->GetVobj(); }); - AObject *res = InferFuncResult(func, args, call_node->GetOpcode(), conf, true); - if (res == nullptr || res->GetType() == AObject::kTypeAnyValue) { - res = AObject::MakeAObject(AObject::kTypeTensor); - } - - call_node->SetVobj(res); - call_node->SetSubGraph(nullptr); - return false; -} - -static bool CheckJitForbidden(const py::object &func) { - if (func.ptr() == nullptr || PyCFunction_Check(func.ptr())) { - return false; - } - std::string m = GetTopModule(func); - const auto &l = *kPIJitConfigDefault.getSetConfig(GraphJitConfig::kAllowedInlineModules); - bool allow_inline = l.find(m) != l.end(); - bool forbidden = !allow_inline || kPIJitConfigDefault.CheckJitForbidden(func); - - PyObject *func_info = func.ptr(); - if (PyMethod_Check(func_info)) { - func_info = PyMethod_GET_FUNCTION(func_info); - } - if (!PyFunction_Check(func_info) && !PyCFunction_Check(func_info) && !PyType_Check(func_info)) { - func_info = reinterpret_cast(Py_TYPE(func_info)); - } - MS_LOG(DEBUG) << "func " << std::string(py::str(func_info)) << (forbidden ? " is forbidden to" : " will ") - << " Analyze, module is " << m; - return forbidden; -} - -bool CheckJitConstexpr(const py::object &func) { - PyObject *op = func.ptr(); - if (op == nullptr) { - return false; - } - if (PyMethod_Check(op)) { - op = PyMethod_GET_FUNCTION(op); - } - return kPIJitConfigDefault.CheckJitConstexpr(py::cast(op)); -} - -bool CheckMSConstexpr(const py::object &func) { - std::string tp_name = py::str(reinterpret_cast(Py_TYPE(func.ptr()))); - constexpr const char name[] = "..decorator..ProxyOp'>"; - constexpr const int size = sizeof(name) - 1; - if (tp_name.size() > size && !tp_name.compare(tp_name.size() - size, size, name)) { - return true; - } - constexpr const char name2[] = "..deco..CompileOp'>"; - constexpr const int size2 = sizeof(name2) - 1; - return tp_name.size() > size ? !tp_name.compare(tp_name.size() - size2, size2, name2) : false; -} - static bool InferMSConstexpr(CallNode *call_node) { Graph *g = call_node->GetSubGraph(); JustCallAndSetRes(call_node); @@ -742,15 +479,11 @@ static bool InferMSConstexpr(CallNode *call_node) { if (cnst.ptr() == nullptr) { return false; } - if (!GuardConstCallNodeParam(call_node, g, 2)) { - return false; - } - if (!CheckConstPyObject(cnst.ptr())) { - MS_LOG(DEBUG) << std::string(py::str(cnst.ptr())) << " as const is unsupported"; - return false; + bool is_constexpr = CheckConstexpr(call_node->input(0)->GetVobj()->GetPyObject()); + if (is_constexpr || GuardConstCallNodeParam(call_node, g, 2)) { + return CallNodeReturnConst(call_node, g, call_node->GetVobj()); } - - return CallNodeReturnConst(call_node, g, call_node->GetVobj()); + return false; } static bool GuardBuiltinFunc(CallNode *call_node) { @@ -777,81 +510,7 @@ static bool GuardIsInstance(CallNode *call_node) { return graph->GuardValueNode(call_node); } -#define DECLARE_BUILTIN_CFUNCTION(func_name) \ - p = PyDict_GetItemString(PyEval_GetBuiltins(), func_name); \ - MS_ASSERT(p &&PyCFunction_Check(p)); \ - c_function_obj = PyCFunction_GET_FUNCTION(p); \ - kBuiltinFuncOrMethodWhileList.emplace(c_function_obj); - -static const std::set &GenCFunctionMap() { - static std::set kBuiltinFuncOrMethodWhileList = {}; - if (!kBuiltinFuncOrMethodWhileList.empty()) { - return kBuiltinFuncOrMethodWhileList; - } - PyCFunction c_function_obj = nullptr; - PyObject *p = nullptr; - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameIsinstance); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameIssubclass); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameLen); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameAbs); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameMax); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameAll); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameAny); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameHash); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameId); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameOrd); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameCallable); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameGetattr); - DECLARE_BUILTIN_CFUNCTION(kBuiltinNameHasattr); - - // math.log - py::object math_builtin = Utils::GetModuleAttr("math", kBuiltinNameLog, false, false); - c_function_obj = PyCFunction_GET_FUNCTION(math_builtin.ptr()); - kBuiltinFuncOrMethodWhileList.emplace(c_function_obj); - - // python object cfunction without sideeffect - std::map> obj_cfunc_name = { - {py::dict().inc_ref().ptr(), - {"__contains__", "__getitem__", "__sizeof__", "get", "keys", "items", "values", "fromkeys", "copy", "pop"}}, - {py::list().inc_ref().ptr(), {"__getitem__", "__sizeof__", "copy", "index", "count"}}, - {py::tuple().inc_ref().ptr(), {"index", "count"}}, - {py::set().inc_ref().ptr(), {"__contains__", "copy", "issubset", "__sizeof__"}}, - {py::str().inc_ref().ptr(), - {"find", "count", "index", "rfind", "rindex", "startswith", "endswith", "isascii", - "islower", "isupper", "istitle", "isspace", "isdecimal", "isdigit", "isnumeric", "isalpha", - "isalnum", "isidentifier", "isprintable", "format", "format_map", "__format__", "__sizeof__"}}, - }; - for (auto item : obj_cfunc_name) { - for (auto meth : item.second) { - py::object builtin = py::cast(item.first).attr(meth.c_str()); - c_function_obj = PyCFunction_GET_FUNCTION(builtin.ptr()); - kBuiltinFuncOrMethodWhileList.emplace(c_function_obj); - } - } - for (auto item : obj_cfunc_name) { - Py_XDECREF(item.first); - } - return kBuiltinFuncOrMethodWhileList; -} - -#undef DECLARE_BUILTIN_CFUNCTION - -bool CheckBuiltinFuncOrMethod(const py::object &f) { - PyObject *func = f.ptr(); - if (PyMethod_Check(func)) { - func = PyMethod_GET_FUNCTION(func); - } - if (!PyCFunction_Check(func)) { - return false; - } - auto c_function_obj = PyCFunction_GET_FUNCTION(func); - if (GenCFunctionMap().find(c_function_obj) == GenCFunctionMap().end()) { - return false; - } - return true; -} - -static bool InferBuiltinFuncOrMethod(CallNode *call_node) { +bool InferBuiltinFuncOrMethod(CallNode *call_node) { Graph *sub_graph = call_node->GetSubGraph(); (void)JustCallAndSetRes(call_node); ConstantInfo::CollectBuiltinFuncConstantInfo(call_node); @@ -864,7 +523,7 @@ static bool InferBuiltinFuncOrMethod(CallNode *call_node) { bool guard_success = false; std::string name = GetFuncName(call_node->input(0)->GetVobj()->GetPyObject()); - if (name == kBuiltinNameIsinstance) { + if (name == "isinstance") { guard_success = GuardIsInstance(call_node); } else { guard_success = GuardBuiltinFunc(call_node); @@ -875,23 +534,6 @@ static bool InferBuiltinFuncOrMethod(CallNode *call_node) { return false; } -static bool CheckTensorAsType(const py::object &func) { - PyObject *op = func.ptr(); - if (op == nullptr) { - return false; - } - if (PyMethod_Check(op)) { - op = PyMethod_GET_FUNCTION(op); - } - if (!PyFunction_Check(op)) { - return false; - } - auto func_ptr = reinterpret_cast(op); - std::string name = PyUnicode_AsUTF8(func_ptr->func_module); - bool is_func = name == "mindspore.common.tensor"; - return is_func; -} - static bool InferTensorAsType(CallNode *call_node) { ValueNode *self_node = GetBoundSelf(call_node); bool is_not_method = call_node->input(0)->GetVobj()->GetType() != AObject::kTypeBoundMethod; @@ -922,25 +564,6 @@ static bool InferTensorAsType(CallNode *call_node) { return true; } -bool CheckListAppend(const py::object &func) { - static PyCFunction append = nullptr; - if (append == nullptr) { - append = PyCFunction_GET_FUNCTION(py::list().attr(kBuiltinNameAppend).ptr()); - } - PyObject *op = func.ptr(); - if (PyMethod_Check(op)) { - op = PyMethod_GET_FUNCTION(op); - } - /** - * this expression "list.append" will get type "method_descriptor" - * this expression "[].append" will get type "built-in function" - */ - if (!PyCFunction_Check(op)) { - return false; - } - return PyCFunction_GET_FUNCTION(op) == append; -} - bool InferListAppend(CallNode *call_node) { Graph *sub_graph = call_node->GetSubGraph(); call_node->SetSubGraph(nullptr); @@ -992,78 +615,251 @@ static bool InferPopAsGet(CallNode *call_node) { return false; } -// special function list -// special function that mindspore support and not inline, -// the return values or type can be infer -static const std::unordered_map kFuncWhiteListMap = { - // fuzzy match - {kMindsporeNamePrimitive, {CheckPrimitive, InferPrimitive}}, - {kMindsporeNameMetaFuncGraph, {CheckMetaFunc_, InferMetaFunc_}}, - {kMindsporeNameGradFunc, {CheckGradFunc, InferGradFunc}}, - {kMindsporeNameMsCell, {CheckCell, InferCell}}, - // name match - {kMindsporeNameJitFunc, {CheckJitFunc, SetCallResType}}, - {kMindsporeNameGetCachePrim, {CheckGetCachePrim_, InferGetCachePrim_}}, - {kMindsporeNameRegistryGet, {CheckRegistryGet, InferRegistryGet}}, - {kMindsporeNameTensorInitCheck, {CheckTensorBypass, InferTensorBypass}}, - {kMindsporeNameTensorContiguous, {CheckTensorBypass, InferTensorBypass}}, - // builtin_function_or_method - {kBuiltinNameFunctionOrMethod, {CheckBuiltinFuncOrMethod, InferBuiltinFuncOrMethod}}, - // object convert map - {kMindsporeNameConvertMap, {CheckConvertMap, InferConvertMap}}, - {kJitForbidden, {CheckJitForbidden, SetCallResType}}, - {kJitConstexpr, {CheckJitConstexpr, JustCallAndSetRes}}, - {kMindsporeNameConstexpr, {CheckMSConstexpr, InferMSConstexpr}}, - {kMindsporeNamePrimexpr, {CheckMSConstexpr, InferMSConstexpr}}, - {kMindsporeNameTensorAsType, {CheckTensorAsType, InferTensorAsType}}, - {kBuiltinNameAppend, {CheckListAppend, InferListAppend}}, - {kBuiltinNamePop, {CheckBuiltinFuncOrMethod, InferPopAsGet}}, -}; +static bool SetForbiddenFuncInfo(CallNode *call_node) { + SetCallResType(call_node); + call_node->SetInlineReason(InlineReason::kInlineFunc_Type_Unsupported); + return false; +} -static const std::vector> kFuncWhiteListFuzzyMatcher = { - {CheckJitConstexpr, kJitConstexpr}, - {CheckMetaFunc_, kMindsporeNameMetaFuncGraph}, - {CheckGradFunc, kMindsporeNameGradFunc}, - // guard these call by short traces - {CheckCell, kMindsporeNameMsCell}, - {CheckConvertMap, kMindsporeNameConvertMap}, - // builtin_function_or_method - {CheckBuiltinFuncOrMethod, kBuiltinNameFunctionOrMethod}, - {CheckJitForbidden, kJitForbidden}, -}; +bool InferMsApiFunc(CallNode *call_node) { + Graph *sub_graph = call_node->GetSubGraph(); + SetCallResType(call_node); + if (call_node->input(0)->GetVobj() == nullptr || call_node->input(0)->GetVobj()->GetPyObject().ptr() == nullptr) { + return false; + } + + py::object callable_object = call_node->input(0)->GetVobj()->GetPyObject(); + std::vector args; + std::transform(call_node->getInputs().begin() + 1, call_node->getInputs().end(), std::back_inserter(args), + [](ValueNode *n) { return n->GetVobj() ? n->GetVobj()->GetPyObject() : py::object(); }); + auto pair = Utils::PackCallStackArgs(args, call_node->GetOpcode()); + if (pair.first.ptr() == nullptr) { + return false; + } + PyTypeObject *callable_type = Py_TYPE(callable_object.ptr()); + + AObject *info; + + bool enable_func_graph_eval = kPIJitConfigDefault.GetBoolConfig(GraphJitConfig::kEnableMsApiInfer); + if (enable_func_graph_eval) { + py::object res = EvalMSAPIValue(callable_object, pair.first, pair.second); + info = AObject::Convert(res); + } else if (IsPrimitiveType(callable_type) || IsPrimitiveFunctionType(callable_type)) { + call_node->SetSubGraph(sub_graph); + return InferPrimitive(call_node); + } else { + info = InferFuncResult(callable_object, pair.first, pair.second, call_node->GetGraph()->Config(), true); + } + + call_node->SetVobj(info); + if (info->GetPyObject().ptr() != nullptr) { + ConstantInfo::CollectBuiltinFuncConstantInfo(call_node); + call_node->input(0)->GetVobj()->SetMsFlag(AObject::kMsFlagStandardFunc); + } + if (call_node->IsConstantValue()) { + return CallNodeReturnConst(call_node, sub_graph, call_node->GetVobj()); + } + return false; +} -static const std::unordered_map kMindFuncWhiteListMap = { - {kMindsporeNameJitFunc, {CheckJitFunc, SetCallResType}}, - {kMindsporeNameGetCachePrim, {CheckGetCachePrim_, InferGetCachePrim_}}, - {kMindsporeNameRegistryGet, {CheckRegistryGet, InferRegistryGet}}, - {kMindsporeNameTensorInitCheck, {CheckTensorBypass, InferTensorBypass}}, - {kMindsporeNameTensorContiguous, {CheckTensorBypass, InferTensorBypass}}, - {kBuiltinNameFunctionOrMethod, {CheckBuiltinFuncOrMethod, InferBuiltinFuncOrMethod}}, - {kJitForbidden, {CheckJitForbidden, SetCallResType}}, - {kJitConstexpr, {CheckJitConstexpr, JustCallAndSetRes}}, +enum FuncKey { + FUNC_KEY_EMPTY = 0, // "" + FUNC_KEY_PIJIT_CONSTEXPR, // "pijit.constexpr" + FUNC_KEY_PIJIT_FORBIDDEN, // "pijit.forbidden" + FUNC_KEY_BUILTIN_FUNC, // "builtin.func" + FUNC_KEY_LIST_APPEND, // "list.append" + FUNC_KEY_DICT_POP, // "dict.pop" + FUNC_KEY_PRIMITIVE, // "mindspore._c_expression.Primitive_" + FUNC_KEY_META_FUNCG_RAPH, // "mindspore._c_expression.MetaFuncGraph_" + FUNC_KEY_PSJIT_CODE, // "mindspore.common.api.jit..staging_specialize" + FUNC_KEY_CONSTEXPR, // "mindspore.ops.primitive.constexpr" + FUNC_KEY_PRIMEXPR, // "mindspore.ops.primitive._primexpr" + FUNC_KEY_GET_CACHE_PRIM, // "mindspore.ops._primitive_cache._get_cache_prim" + FUNC_KEY_REGISTRY_GET, // "mindspore.common._register_for_tensor.Registry.get" + FUNC_KEY_TENSOR_ASTYPE, // "mindspore.common.tensor.Tensor.astype" + FUNC_KEY_GRAD_OPERATIONS_CODE, // "mindspore.ops.composite.base._Grad.__call__..after_grad" + FUNC_KEY_PSJIT_CONVERTMAP, // "mindspore._extends.parse.resources.convert_object_map" + FUNC_KEY_GRAPH_CELL, // "mindspore.nn.cell.GraphCell" + FUNC_KEY_MS_API, // mindspore api + FUNC_KEY_COUNT, +}; +static FuncKey FindFuncKey(const py::object &callable); + +static const std::unordered_map infer_func_map = { + {FUNC_KEY_PIJIT_CONSTEXPR, JustCallAndSetRes}, + {FUNC_KEY_PIJIT_FORBIDDEN, SetForbiddenFuncInfo}, + {FUNC_KEY_BUILTIN_FUNC, InferBuiltinFuncOrMethod}, + {FUNC_KEY_LIST_APPEND, InferListAppend}, + {FUNC_KEY_DICT_POP, InferPopAsGet}, + {FUNC_KEY_PRIMITIVE, InferPrimitive}, + {FUNC_KEY_META_FUNCG_RAPH, InferMetaFunc}, + {FUNC_KEY_PSJIT_CODE, SetCallResType}, + {FUNC_KEY_CONSTEXPR, InferMSConstexpr}, + {FUNC_KEY_PRIMEXPR, InferMSConstexpr}, + {FUNC_KEY_GET_CACHE_PRIM, InferGetCachePrim}, + {FUNC_KEY_REGISTRY_GET, InferRegistryGet}, + {FUNC_KEY_TENSOR_ASTYPE, InferTensorAsType}, + {FUNC_KEY_GRAD_OPERATIONS_CODE, InferGradFunc}, + {FUNC_KEY_PSJIT_CONVERTMAP, InferConvertMap}, + {FUNC_KEY_GRAPH_CELL, SetCallResType}, + {FUNC_KEY_MS_API, InferMsApiFunc}, }; -static const std::vector> kMindFuncWhiteListFuzzyMatcher = { - {CheckJitConstexpr, kJitConstexpr}, - {CheckBuiltinFuncOrMethod, kBuiltinNameFunctionOrMethod}, - {CheckJitForbidden, kJitForbidden}, +static const std::unordered_map mind_infer_func_map = { + {FUNC_KEY_PIJIT_CONSTEXPR, JustCallAndSetRes}, {FUNC_KEY_PIJIT_FORBIDDEN, SetForbiddenFuncInfo}, + {FUNC_KEY_BUILTIN_FUNC, InferBuiltinFuncOrMethod}, {FUNC_KEY_PSJIT_CODE, SetCallResType}, + {FUNC_KEY_GET_CACHE_PRIM, InferGetCachePrim}, {FUNC_KEY_REGISTRY_GET, InferRegistryGet}, }; -const std::string GetMindsporeNamePrimitive() { return kMindsporeNamePrimitive; } +InferFunc FindInferFunc(const py::object &callable, bool trace_flag) { + FuncKey k = FindFuncKey(callable); + const auto &map = trace_flag ? mind_infer_func_map : infer_func_map; + auto iter = map.find(k); + if (iter != map.end()) { + return iter->second; + } + return nullptr; +} -const std::unordered_map &GetFuncWhiteListMap(bool trace_flag) { - if (trace_flag) { - return kMindFuncWhiteListMap; - } else { - return kFuncWhiteListMap; +static const std::unordered_map &GetFuncKeyMap() { + static std::unordered_map map = {}; + if (!map.empty()) { + return map; + } + py::object func_map = Utils::GetModuleAttr(kModuleName, kFuncMapName, true, true); + MS_EXCEPTION_IF_CHECK_FAIL(PyDict_CheckExact(func_map.ptr()), "white list func map must be 'dict[int, str]'"); + PyObject *key; + PyObject *value; + Py_ssize_t pos = 0; + while (PyDict_Next(func_map.ptr(), &pos, &key, &value)) { + MS_EXCEPTION_IF_CHECK_FAIL(PyLong_CheckExact(key), "white list func map key must be 'int'"); + MS_EXCEPTION_IF_CHECK_FAIL(PyLong_CheckExact(value), "white list func map value must be 'int'"); + size_t k = (PyLong_AsSize_t(value)); + MS_EXCEPTION_IF_CHECK_FAIL(k < FUNC_KEY_COUNT, "white list func map got error FuncKey " + std::to_string(k)); + map[PyLong_AsSize_t(key)] = static_cast(k); + } + return map; +} + +static FuncKey KeyFinderFuncId(const py::object &callable) { + auto iter = GetFuncKeyMap().find(FunctionId(callable)); + return iter != GetFuncKeyMap().end() ? iter->second : FUNC_KEY_EMPTY; +} + +static FuncKey KeyFinderFuncCodeId(const py::object &callable) { + PyObject *func = callable.ptr(); + if (PyMethod_Check(func)) { + func = PyMethod_GET_FUNCTION(func); + } + if (PyFunction_Check(func)) { + func = PyFunction_GET_CODE(func); + } + if (!PyCode_Check(func)) { + return FUNC_KEY_EMPTY; } + auto iter = GetFuncKeyMap().find(reinterpret_cast(func)); + return iter != GetFuncKeyMap().end() ? iter->second : FUNC_KEY_EMPTY; } -const std::vector> &GetFuncWhiteListFuzzyMatcher(bool trace_flag) { - if (trace_flag) { - return kMindFuncWhiteListFuzzyMatcher; + +static FuncKey KeyFinderPrimitive(const py::object &callable) { + PyTypeObject *type_object = Py_TYPE(callable.ptr()); + bool convert_to_prim = IsPrimitiveType(type_object) || IsPrimitiveFunctionType(type_object); + if (!convert_to_prim) { + return FUNC_KEY_EMPTY; + } + py::object func = py::getattr(reinterpret_cast(type_object), kSlotCallName, nullptr); + size_t id; + if (func.ptr() == nullptr) { + // primitive not defined slot __call__, use it self as id + id = reinterpret_cast(callable.ptr()); + } else if (PyFunction_Check(func.ptr())) { + // primitive defined python function __call__ + id = reinterpret_cast(PyFunction_GET_CODE(func.ptr())); } else { - return kFuncWhiteListFuzzyMatcher; + // primitive defined cpp function __call__ + id = FunctionId(func); + } + // first, find map to check special primitive. + auto iter = GetFuncKeyMap().find(id); + return iter != GetFuncKeyMap().end() ? iter->second : FUNC_KEY_PRIMITIVE; +} + +static FuncKey KeyFinderMetaFunc(const py::object &callable) { + PyTypeObject *type_object = reinterpret_cast(callable.ptr()); + type_object = PyType_CheckExact(type_object) ? type_object : Py_TYPE(type_object); + return IsMetaFuncGraphType(type_object) ? FUNC_KEY_META_FUNCG_RAPH : FUNC_KEY_EMPTY; +} + +static FuncKey KeyFinderGraphCell(const py::object &callable) { + static size_t id = 0; + if (id == 0) { + py::object type = Utils::GetModuleAttr("mindspore.nn.cell", "GraphCell", false, true); + id = reinterpret_cast(type.ptr()); + } + PyTypeObject *type_object = reinterpret_cast(callable.ptr()); + type_object = PyType_CheckExact(type_object) ? type_object : Py_TYPE(type_object); + size_t cur_id = reinterpret_cast(type_object); + return cur_id == id ? FUNC_KEY_GRAPH_CELL : FUNC_KEY_EMPTY; +} + +static FuncKey KeyFinderSkipModule(const py::object &callable) { + const auto &modules = kPIJitConfigDefault.allowed_inline_modules(); + std::string mod = GetTopModule(callable); + if (modules.find(mod) != modules.end()) { + return FUNC_KEY_EMPTY; + } + + PyObject *func_info = callable.ptr(); + if (PyMethod_Check(func_info)) { + func_info = PyMethod_GET_FUNCTION(func_info); + } + if (!PyFunction_Check(func_info) && !PyCFunction_Check(func_info) && !PyType_Check(func_info)) { + func_info = reinterpret_cast(Py_TYPE(func_info)); + } + MS_LOG(DEBUG) << "func " << std::string(py::str(func_info)) << " is forbidden to analyze, module is " << mod; + return FUNC_KEY_PIJIT_FORBIDDEN; +} + +static FuncKey FindFuncKey(const py::object &callable) { + if (callable.ptr() == nullptr || !PyCallable_Check(callable.ptr())) { + return FUNC_KEY_EMPTY; + } + std::vector finders = { + KeyFinderFuncId, KeyFinderFuncCodeId, KeyFinderPrimitive, + KeyFinderMetaFunc, KeyFinderGraphCell, KeyFinderSkipModule, // must be last for check modules + }; + FuncKey res = FUNC_KEY_EMPTY; + for (auto iter = finders.begin(), end = finders.end(); iter != end && res == FUNC_KEY_EMPTY; ++iter) { + res = (*iter)(callable); + } + return res; +} + +bool CheckJitConstexpr(const py::object &func) { + if (func.ptr() == nullptr) { + return false; + } + FuncKey k = KeyFinderFuncId(func); + return k == FUNC_KEY_PIJIT_CONSTEXPR; +} + +static bool CheckConstexpr(const py::object &func) { return KeyFinderPrimitive(func) == FUNC_KEY_CONSTEXPR; } + +bool CheckMSConstexpr(const py::object &func) { + if (func.ptr() == nullptr) { + return false; } + FuncKey k = KeyFinderPrimitive(func); + return k == FUNC_KEY_CONSTEXPR || k == FUNC_KEY_PRIMEXPR; } + +bool CheckBuiltinFuncOrMethod(const py::object &func) { + if (func.ptr() == nullptr) { + return false; + } + FuncKey k = KeyFinderFuncId(func); + return k == FUNC_KEY_BUILTIN_FUNC; +} + } // namespace pijit } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.h b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.h index 55749dfaaeedc12c237fe6b53bb6b9c02720b988..d147fbd4047ab3037e4f9d992dc0e0a3dae67c51 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.h +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_capture/special_func_infer.h @@ -25,23 +25,20 @@ namespace mindspore { namespace pijit { -using CheckFunc = bool (*)(const py::object &); + using InferFunc = bool (*)(CallNode *); -struct SpecialAction { - CheckFunc check; - InferFunc infer; -}; +InferFunc FindInferFunc(const py::object &callable, bool trace_flag = false); -const char *GetFuncName(const py::object &f); -bool CheckPrimitive(const py::object &func); void HandleGradFuncCall(CallNode *call_node, AObject *decorated, bool sens_param); bool GuardConstCallNodeParam(CallNode *call_node, Graph *sub_graph, int max_guard_depth); bool JustCallAndSetRes(CallNode *call_node); -const std::unordered_map &GetFuncWhiteListMap(bool trace_flag = false); -const std::vector> &GetFuncWhiteListFuzzyMatcher(bool trace_flag = false); -const std::string GetMindsporeNamePrimitive(); +bool CheckJitConstexpr(const py::object &func); +bool CheckMSConstexpr(const py::object &func); +bool CheckBuiltinFuncOrMethod(const py::object &func); +bool InferBuiltinFuncOrMethod(CallNode *call_node); bool InferListAppend(CallNode *call_node); + } // namespace pijit } // namespace mindspore diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.cc index 6c4b3df4e0001e094822f9d9e203d568b344208e..a08cf1622acacafcb66d327f9fa9b63d571e0b08 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include "base/base.h" #include "abstract/ops/primitive_infer_map.h" #include "ops/auto_generate/gen_ops_primitive.h" @@ -34,6 +35,7 @@ #include "pipeline/jit/pi/pydef.h" #include "pipeline/jit/pi/graph_guard/guard_utils.h" #include "pipeline/jit/ps/parse/data_converter.h" +#include "pipeline/jit/ps/action.h" #include "pipeline/jit/pi/graph_build/func_graph_builder.h" namespace mindspore { @@ -53,6 +55,11 @@ namespace pijit { static InferEnginePtr g_pInferEngine = nullptr; +template <> +bool IsPrimitiveFunctionType(PyTypeObject *tp) { + return IsPybindType(tp); +} + InferEnginePtr InferEngine::GetInstance() { if (g_pInferEngine == nullptr) { g_pInferEngine = std::shared_ptr(new InferEngine()); @@ -817,7 +824,6 @@ bool CheckTensorDataInitialized(const py::object &py_tensor) { return false; } -extern bool IsFuncInByPassWhiteList(const std::string &name); bool FindTensorName(const std::string &name) { const auto &meth = pipeline::GetMethodMap().find(kObjectTypeTensorType)->second; if (meth.find(name) != meth.end()) { @@ -830,7 +836,83 @@ bool FindTensorName(const std::string &name) { if (name == "device") { return true; } - return IsFuncInByPassWhiteList(name); + return false; +} + +static AbstractBasePtr PyToAbs(py::handle handle) { + py::object input = py::cast(handle); + ValuePtr value_ptr; + if (!parse::ConvertStubData(input, &value_ptr) || value_ptr == nullptr) { + MS_LOG(ERROR) << "can't convert argument to value ptr [" << std::string(py::str(input)) << "]"; + return nullptr; + } + return value_ptr->ToAbstract(); +} + +static std::unique_ptr MakeArgumentsAbstract(const py::object &callable_object, py::object args, + py::object key_words) { + py::object signature = py::module::import("inspect").attr("signature")(callable_object).attr("bind"); + py::object bind_args = py::reinterpret_steal(PyObject_Call(signature.ptr(), args.ptr(), key_words.ptr())); + (void)bind_args.attr("apply_defaults")(); + args = py::tuple(bind_args.attr("args")); + key_words = py::dict(bind_args.attr("kwargs")); + + AbstractBasePtrList list; + for (auto value : args) { + auto abs = PyToAbs(value); + if (abs == nullptr) { + return nullptr; + } + list.push_back(abs); + } + if (key_words.ptr() == nullptr) { + return std::make_unique(std::move(list)); + } + + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(key_words.ptr(), &pos, &key, &value)) { + auto abs = PyToAbs(value); + if (abs == nullptr) { + return nullptr; + } + list.push_back(std::make_shared(PyUnicode_AsUTF8(key), abs)); + } + return std::make_unique(std::move(list)); +} + +py::object EvalMSAPIValue(const py::object &ms_api, const py::object &args, const py::object &key_words) { + py::object callable_object = ms_api; + ValuePtr func_graph; + if (!parse::ConvertData(callable_object, &func_graph) || func_graph == nullptr) { + MS_LOG(ERROR) << "can't convert callable object to value ptr [" << std::string(py::str(callable_object)) << "]"; + return py::object(); + } + + auto inputs_ptr = MakeArgumentsAbstract(callable_object, args, key_words); + if (inputs_ptr == nullptr) { + return py::object(); + } + + AbstractBasePtrList inputs_abs_list = std::move(*inputs_ptr); + AbstractBasePtr eval_result; + if (func_graph->isa()) { + auto eval_res = abstract::EvalOnePrim(func_graph->cast(), inputs_abs_list); + eval_result = eval_res == nullptr ? nullptr : eval_res->abstract(); + } else if (func_graph->ToAbstract()->isa()) { + auto analyze_res = pipeline::AbstractAnalyze(func_graph, inputs_abs_list); + eval_result = analyze_res.eval_result == nullptr ? nullptr : analyze_res.eval_result->abstract(); + } + if (eval_result == nullptr) { + MS_LOG(ERROR) << "eval callable object failed [" << std::string(py::str(callable_object)) << "]"; + return py::object(); + } + py::object res = FuncGraphBuilder::ConvertToPyObj(eval_result); + if (res.ptr() == nullptr) { + MS_LOG(ERROR) << "can't convert AbstractBasePtr to PyObject [" << eval_result->ToString() << "]"; + return py::object(); + } + return ConvertCppTensor(res); } } // namespace pijit diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h index 19feb61f8689386b07cdde869be75c7b209c18ca..57153574d408de3373874d8a6f267d0da87c42ca 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/infer.h @@ -75,6 +75,8 @@ bool IsCellType(PyTypeObject *tp); template bool IsPrimitiveType(PyTypeObject *tp); template +bool IsPrimitiveFunctionType(PyTypeObject *tp); +template bool IsMetaFuncGraphType(PyTypeObject *tp); template bool IsMSDTypeType(PyTypeObject *tp); @@ -82,6 +84,7 @@ bool IsMSDTypeType(PyTypeObject *tp); bool FindTensorName(const std::string &name); bool CheckTensorDataInitialized(const py::object &tensor); +py::object EvalMSAPIValue(const py::object &ms_api, const py::object &args, const py::object &key_words); using SpecialPrimitiveInferFuncMap = std::unordered_map &)>; diff --git a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.cc b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.cc index ed72caae17262de3ea812f68cd6852c07a2e4994..69e89d94413e6f1dd9198c3503da071abad044a3 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/graph_guard/trace.cc @@ -32,6 +32,7 @@ #include "include/common/utils/python_adapter.h" #include "pipeline/jit/pi/graph_capture/abstract_object.h" #include "pipeline/jit/pi/pi_jit_config.h" +#include "pipeline/jit/pi/external.h" namespace mindspore { namespace pijit { @@ -65,7 +66,10 @@ static const char kMindTorchFlag[] = "mindtorch"; static const char kTrainingFlag[] = "training"; static const char kMindSporePackPrefix[] = "mindspore."; static const char kMindtorchPackPrefix[] = "mindtorch."; -extern bool check_builtin_cfunc(const py::object &func); + +constexpr const char *kFuncWhiteListModuleName = "mindspore._extends.pijit.pijit_func_white_list"; +constexpr const char *kGuardFuncMapName = "_guard_func_map"; + static PyObject *RichCompare(PyObject *left, PyObject *right, int oparg); static bool IsCastFunc(std::string name) { @@ -283,8 +287,7 @@ RootTrace::RootTrace(PyObject *pObj, TraceType tt, int index, std::string name, depth_ = 1; originType_ = tt; curType_ = tt; - const auto &k = *kPIJitConfigDefault.getSetConfig(GraphJitConfig::kAllowedInlineModules); - for (auto n : k) { + for (auto n : kPIJitConfigDefault.allowed_inline_modules()) { if (module_name.find(n) == 0) { is_const_ = true; break; @@ -2290,11 +2293,42 @@ void OpTrace::JudgeSubScrRandPass() { } } +static const std::unordered_map &GetGuardFuncKeyMap() { + static std::unordered_map map = {}; + static bool init = false; + if (init) { + return map; + } + init = true; + py::object func_map = Utils::GetModuleAttr(kFuncWhiteListModuleName, kGuardFuncMapName, true, true); + MS_EXCEPTION_IF_CHECK_FAIL(PyDict_CheckExact(func_map.ptr()), "white list func map must be 'dict[int, int]'"); + PyObject *key; + PyObject *value; + Py_ssize_t pos = 0; + while (PyDict_Next(func_map.ptr(), &pos, &key, &value)) { + MS_EXCEPTION_IF_CHECK_FAIL(PyLong_CheckExact(key), "white list func map key must be 'int'"); + MS_EXCEPTION_IF_CHECK_FAIL(PyLong_CheckExact(value), "white list func map value must be 'int'"); + map[PyLong_AsSize_t(key)] = PyLong_AsSize_t(value); + } + return map; +} + +static bool CheckRelaxGuardFunc(const py::object &callable) { + static size_t guard_key_relax_func = 0; + if (guard_key_relax_func == 0) { + py::object key_object = Utils::GetModuleAttr(kFuncWhiteListModuleName, "GUARD_KEY_RELAX_FUNC", true, true); + guard_key_relax_func = py::cast(key_object); + } + + auto iter = GetGuardFuncKeyMap().find(FunctionId(callable)); + return iter != GetGuardFuncKeyMap().end() && iter->second == guard_key_relax_func; +} + void OpTrace::JudgeRelaxGuardFuncPass() { if (opcode_ != CALL_FUNCTION || params_.size() < kParamCountOne) { return; } - if (kPIJitConfigDefault.CheckJitRelaxGuard(py::cast(params_[kParamIndexOne]->GetObject()))) { + if (CheckRelaxGuardFunc(py::cast(params_[0]->GetObject()))) { EnableRelax(); } } diff --git a/mindspore/ccsrc/pipeline/jit/pi/init.cc b/mindspore/ccsrc/pipeline/jit/pi/init.cc index 9d9194c697a697f7a736b66c218e67c3994737bd..2c953f0473849c25f6913d1816848e3c7597b4ef 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/init.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/init.cc @@ -31,6 +31,9 @@ void RegPIJitInterface(py::module *m) { (void)m->def("get_code_extra", &mindspore::get_code_extra, "get copy of code extra which is the pijit compile result"); + (void)m->def("function_id", &mindspore::FunctionId, + "Get cpp function pointer, or python function pointer, or object pointer"); + (void)py::class_(*m, "FunctionNode_") .def_static("record_primitive", &FunctionNode::RecordPrimitive, py::arg("prim"), py::arg("out"), py::arg("inputs"), "Record the executed primitive during forward execution.") diff --git a/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.cc b/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.cc index f6009e83933905804e1f122854d2ca7d5ed34b2c..38cf07f99d2a7d121fa9da68761de7384176ae51 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.cc @@ -28,12 +28,14 @@ GraphJitConfig kPIJitConfigDefault; constexpr int kDefaultMaxTraceDepth = 16; +constexpr const char *kModuleName = "mindspore._extends.pijit.pijit_func_white_list"; +constexpr const char *kFuncMapName = "_func_map"; +constexpr const char *kGuardFuncMapName = "guard_func_map"; + static const std::unordered_map key_map = { {"auto_jit_func_filter", &GraphJitConfig::SetAutoJitFilter}, {"auto_jit_cell", &GraphJitConfig::SetBool}, {"auto_grad", &GraphJitConfig::SetBool}, - // remove this config if 'strict_mode_cells' works well, and default inline all construct - {"replace_nncell_by_construct", &GraphJitConfig::SetBool}, {"compile_by_trace", &GraphJitConfig::SetBool}, {"print_after_all", &GraphJitConfig::SetBool}, {"print_tb", &GraphJitConfig::SetBool}, @@ -78,7 +80,6 @@ static const std::unordered_map}, {"relax_guard_count", &GraphJitConfig::SetInt}, {"allowed_inline_modules", &GraphJitConfig::AddAllowedInlineModules}, - {"strict_mode_cells", &GraphJitConfig::AddPSJitStrictCells}, {"pijit_forbidden", &GraphJitConfig::AddJitForbidden}, {"pijit_constexpr", &GraphJitConfig::AddJitConstexpr}, {"relax_guard_func", &GraphJitConfig::AddJitRelaxGuard}, @@ -87,7 +88,6 @@ static const std::unordered_map(registry); } +static bool AddToFuncMap(PyObject *list, const std::string &map_name, const std::string &key) { + py::object func_map = Utils::GetModuleAttr(kModuleName, map_name, true, true); + py::object key_object = Utils::GetModuleAttr(kModuleName, key, true, true); + for (const py::handle &i : py::iter(list)) { + if (!PyCallable_Check(i.ptr())) { + return false; + } + py::int_ id = FunctionId(py::reinterpret_borrow(i)); + PyDict_SetItem(func_map.ptr(), id.ptr(), key_object.ptr()); + } + return true; +} + +bool GraphJitConfig::AddJitForbidden(PyObject *list) { + return AddToFuncMap(list, kFuncMapName, "FUNC_KEY_PIJIT_FORBIDDEN"); +} + +bool GraphJitConfig::AddJitConstexpr(PyObject *list) { + return AddToFuncMap(list, kFuncMapName, "FUNC_KEY_PIJIT_CONSTEXPR"); +} + +bool GraphJitConfig::AddJitRelaxGuard(PyObject *list) { + return AddToFuncMap(list, kGuardFuncMapName, "GUARD_KEY_RELAX_FUNC"); +} + bool GraphJitConfig::AddAllowedInlineModules(PyObject *list) { py::object l = py::reinterpret_borrow(list); for (const auto &i : py::iter(l)) { @@ -189,29 +214,7 @@ bool GraphJitConfig::AddAllowedInlineModules(PyObject *list) { } void GraphJitConfig::AddAllowedInlineModules(const std::string &module_name) { - set_conf[kAllowedInlineModules - kStrListConf].insert(module_name); -} - -void GraphJitConfig::AddPSJitStrictCells(const std::string &type_str) { - set_conf[kPSJitStrictCells - kStrListConf].insert(type_str); -} - -bool GraphJitConfig::AddPSJitStrictCells(PyObject *list) { - py::object l = py::reinterpret_borrow(list); - py::object func = Utils::GetModuleAttr("mindspore.nn", "Cell", false, false); - for (const auto &i : py::iter(l)) { - if (py::isinstance(i, func)) { - AddPSJitStrictCells(std::string(py::str(reinterpret_cast(Py_TYPE(i.ptr()))))); - continue; - } - if (PyObject_IsSubclass(i.ptr(), func.ptr()) == true) { - AddPSJitStrictCells(std::string(py::str(i.ptr()))); - continue; - } - MS_LOG(WARNING) << "for config option 'strict_mode_cells' all elements must be subclass of mindspore.nn.Cell"; - return false; - } - return true; + allowed_inline_modules_.insert(module_name); } bool GraphJitConfig::SetAutoJitFilter(PyObject *callable) { @@ -256,117 +259,6 @@ bool GraphJitConfig::ShouldAutoJit(PyFrameObject *f) { return res == Py_True; } -static std::string GetCodeKey(PyCodeObject *co) { - std::stringstream s; - s << co << PyUnicode_AsUTF8(co->co_name); - return s.str(); -} - -bool GraphJitConfig::AddJitForbidden(PyObject *list) { - for (const py::handle &i : py::iter(list)) { - py::object code = GetPyCodeObject(py::cast(i)); - PyCodeObject *co = reinterpret_cast(code.ptr()); - if (co == nullptr) { - MS_LOG(WARNING) << "config options 'jit_forbidden', can't find the code of " << std::string(py::str(i)); - return false; - } - set_conf[kJitForbidden - kStrListConf].insert(GetCodeKey(co)); - } - return true; -} - -bool GraphJitConfig::CheckJitForbidden(const py::object &code) { - py::object h = GetPyCodeObject(code); - PyCodeObject *co = reinterpret_cast(h.ptr()); - if (co == nullptr) { - return false; - } - const auto &s = set_conf[kJitForbidden - kStrListConf]; - return s.find(GetCodeKey(co)) != s.end(); -} - -bool GraphJitConfig::AddJitConstexpr(PyObject *list) { - py::set constexpr_callable; - for (const py::handle &i : py::iter(list)) { - if (!PyCallable_Check(i.ptr())) { - MS_LOG(WARNING) << "config pijit_constexpr, all values must be function"; - return false; - } - constexpr_callable.add(i); - } - py::object map = GetObjectsMap(); - if (map.ptr() == nullptr) { - return false; - } - PyDict_SetItemString(map.ptr(), "", constexpr_callable.ptr()); - return true; -} - -bool GraphJitConfig::CheckJitConstexpr(const py::object &code) { - if (code.ptr() == nullptr || !PyCallable_Check(code.ptr())) { - return false; - } - PyTypeObject *tp = Py_TYPE(code.ptr()); - if (tp->tp_hash == nullptr || tp->tp_hash == PyObject_HashNotImplemented) { - return false; - } - py::object map = GetObjectsMap(); - if (map.ptr() == nullptr) { - return false; - } - PyObject *set = PyDict_GetItemString(map.ptr(), ""); - if (set == nullptr) { - return false; - } - int res = PySet_Contains(set, code.ptr()); - if (res < 0) { - PyErr_Clear(); - return false; - } - return res; -} - -bool GraphJitConfig::AddJitRelaxGuard(PyObject *list) { - py::set relax_guard_callable; - for (const py::handle &i : py::iter(list)) { - if (!PyCallable_Check(i.ptr())) { - MS_LOG(WARNING) << "config pijit_constexpr, all values must be function"; - return false; - } - relax_guard_callable.add(i); - } - py::object map = GetObjectsMap(); - if (map.ptr() == nullptr) { - return false; - } - PyDict_SetItemString(map.ptr(), "", relax_guard_callable.ptr()); - return true; -} - -bool GraphJitConfig::CheckJitRelaxGuard(const py::object &code) { - if (code.ptr() == nullptr || !PyCallable_Check(code.ptr())) { - return false; - } - PyTypeObject *tp = Py_TYPE(code.ptr()); - if (tp->tp_hash == nullptr || tp->tp_hash == PyObject_HashNotImplemented) { - return false; - } - py::object map = GetObjectsMap(); - if (map.ptr() == nullptr) { - return false; - } - PyObject *set = PyDict_GetItemString(map.ptr(), ""); - if (set == nullptr) { - return false; - } - int res = PySet_Contains(set, code.ptr()); - if (res < 0) { - PyErr_Clear(); - return false; - } - return res; -} - GraphJitConfig::GraphJitConfig(const py::object &c) { *this = kPIJitConfigDefault; (void)c.cast(); diff --git a/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h b/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h index 04f0cd500d8a368db66db47c07ec749cd034a788..1fe130b6976f3d3a78efdb3f89fd4114a56a078e 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h +++ b/mindspore/ccsrc/pipeline/jit/pi/pi_jit_config.h @@ -31,7 +31,6 @@ class GraphJitConfig { kAutoJitCell, kAutoGrad, kAutoJit, - kReplaceNNCellByConstruct, kPrintAfterAll, kPrintTraceback, kPrintBB, @@ -62,6 +61,7 @@ class GraphJitConfig { kEnableGeneratorExpressionToTuple, kFeatureBreakAtInlinedFunction, kEnableDynamicShape, + kEnableMsApiInfer, kTraceFlag, kSkipException, /* ------------------------------ */ @@ -79,34 +79,23 @@ class GraphJitConfig { kLimitGraphCount, kGuardRelaxCount, /* ------------------------------ */ - kStrListConf, - kAllowedInlineModules, - kPSJitStrictCells, - kJitForbidden, kOptionsCount }; GraphJitConfig(); explicit GraphJitConfig(const py::object &c); bool GetBoolConfig(Options o) const { return o > kBoolConf && o < kIntConf ? bool_conf[o - kBoolConf] : false; } - int getIntConfig(Options o) const { return o > kIntConf && o < kStrListConf ? int_conf[o - kIntConf] : 0; } - const auto *getSetConfig(Options o) const { - return o > kStrListConf && o < kOptionsCount ? &set_conf[o - kStrListConf] : nullptr; - } + int getIntConfig(Options o) const { return o > kIntConf && o < kOptionsCount ? int_conf[o - kIntConf] : 0; } + const auto &allowed_inline_modules() const { return allowed_inline_modules_; } bool ShouldAutoJit(PyFrameObject *f); - bool CheckJitForbidden(const py::object &callable); - bool CheckJitConstexpr(const py::object &code); - bool CheckJitRelaxGuard(const py::object &code); void AddAllowedInlineModules(const std::string &module_name); - void AddPSJitStrictCells(const std::string &type_str); - bool AddJitConstexpr(PyObject *list); - bool AddJitForbidden(PyObject *callable_list); - bool AddAllowedInlineModules(PyObject *list); - bool AddPSJitStrictCells(PyObject *list); bool SetAutoJitFilter(PyObject *callable); bool AddJitRelaxGuard(PyObject *list); + bool AddJitConstexpr(PyObject *callable_list); + bool AddJitForbidden(PyObject *callable_list); + bool AddAllowedInlineModules(PyObject *str_list); template bool SetBool(PyObject *value) { @@ -117,7 +106,7 @@ class GraphJitConfig { template bool SetInt(PyObject *value) { - static_assert(o > kIntConf && o < kStrListConf); + static_assert(o > kIntConf && o < kOptionsCount); int res = PyLong_AsLong(value); if (PyErr_Occurred()) { PyErr_Clear(); @@ -130,9 +119,9 @@ class GraphJitConfig { static void ApplyAutoJitCell(); private: + std::set allowed_inline_modules_; + int int_conf[kOptionsCount - kIntConf]; bool bool_conf[kIntConf - kBoolConf]; - int int_conf[kStrListConf - kIntConf]; - std::set set_conf[kOptionsCount - kStrListConf]; }; extern GraphJitConfig kPIJitConfigDefault; diff --git a/mindspore/ccsrc/pipeline/jit/pi/utils/utils.cc b/mindspore/ccsrc/pipeline/jit/pi/utils/utils.cc index c99efa4b9784894f5b9d5c044f3286954cf50edd..4883504f05e0dc56cb83c3fefce159b44f8da588 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/utils/utils.cc +++ b/mindspore/ccsrc/pipeline/jit/pi/utils/utils.cc @@ -149,14 +149,25 @@ py::object Utils::GetModuleAttr(const std::string &mod_name, const std::string & attr = PyObject_GetAttrString(mod, attr_name.c_str()); Py_DECREF(mod); } - if (attr == nullptr) { - if (_throw) { - throw py::error_already_set(); - } - Utils::ReportPythonException(); + if (attr != nullptr) { + return py::reinterpret_steal(attr); + } + if (!_throw) { PyErr_Clear(); + return py::object(); + } + if (!PyErr_Occurred()) { + if (mod == nullptr) { + if (_import) { + PyErr_Format(PyExc_ModuleNotFoundError, "No module named %s", mod_name.c_str()); + } else { + PyErr_Format(PyExc_KeyError, "sys.modules[%s]", mod_name.c_str()); + } + } else if (attr == nullptr) { + PyErr_Format(PyExc_AttributeError, "%S no attribute %s", mod, attr_name.c_str()); + } } - return py::reinterpret_steal(attr); + throw py::error_already_set(); } std::string Utils::ReportPythonException() { @@ -202,9 +213,9 @@ static std::pair PackExArgs(const std::vector(new_args); - kwargs = py::reinterpret_steal(keys); Py_DECREF(vals); + pargs = py::reinterpret_steal(new_args); + kwargs = py::reinterpret_steal(keys); } } while (0); return {pargs, kwargs}; @@ -401,6 +412,29 @@ py::object GetPyCodeObject(const py::object &any, bool exact_func) { return GetPyCodeObject(py::reinterpret_steal(call), true); } +const char *GetFuncName(const py::object &f) { + PyObject *func = f.ptr(); + if (func == nullptr) { + return ""; + } + if (PyMethod_Check(func)) { + func = PyMethod_GET_FUNCTION(func); + } + if (PyCFunction_Check(func)) { + return reinterpret_cast(func)->m_ml->ml_name; + } + PyCodeObject *co = nullptr; + if (PyFunction_Check(func)) { + co = reinterpret_cast(PyFunction_GET_CODE(func)); + } + if (co) { + return PyUnicode_AsUTF8(co->co_name); + } + PyTypeObject *tp = PyType_Check(func) ? reinterpret_cast(func) : Py_TYPE(func); + const char *res = strrchr(tp->tp_name, '.'); + return res ? res + 1 : tp->tp_name; +} + bool CheckConstPyObject(PyObject *cnst) { static const std::unordered_set cnst_types = { Py_TYPE(Py_None), Py_TYPE(Py_Ellipsis), Py_TYPE(Py_True), &PyCode_Type, &PyFloat_Type, diff --git a/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h b/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h index a0913ee6ed17d898bac53f05116d91b903947a72..7a046d27c8b57e50280700f344a97f46ae1960b7 100644 --- a/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h +++ b/mindspore/ccsrc/pipeline/jit/pi/utils/utils.h @@ -170,6 +170,7 @@ bool CheckContainer(PyObject *obj); bool IsTensorPyObject(PyObject *obj); bool IsMsClass(PyObject *obj); bool IsNumpyObject(PyObject *obj); +const char *GetFuncName(const py::object &handle); std::string GetTopModule(const py::object &o); py::object GetPyCodeObject(const py::object &any, bool exact_func = false); diff --git a/mindspore/ccsrc/pipeline/jit/ps/action.cc b/mindspore/ccsrc/pipeline/jit/ps/action.cc index b4d19f9d887dc18e88ebfc7afd8fe0db907221bc..57a37023c57a8f8d8d5bddb0445401e309c99288 100644 --- a/mindspore/ccsrc/pipeline/jit/ps/action.cc +++ b/mindspore/ccsrc/pipeline/jit/ps/action.cc @@ -335,7 +335,7 @@ abstract::AnalysisResult AbstractAnalyze(const abstract::AnalysisEnginePtr &engi abstract::AnalysisResult AbstractAnalyze(const ValuePtr &func, const abstract::AbstractBasePtrList &args_abs, bool clear) { - auto infer_graph = ConstructGraphForEval(func, args_abs); + auto infer_graph = func->isa() ? func->cast() : ConstructGraphForEval(func, args_abs); auto manager = Manage(infer_graph, true); auto engine = std::make_shared(abstract::GetPrimEvaluatorConstructors(), manager); return AbstractAnalyze(engine, infer_graph, args_abs, false, clear); diff --git a/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.cc b/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.cc index 8350149189f1097686ed2d32efafadcc1dc1c909..f0764afcd11499fdec0524182fa9f6adcd93af2a 100644 --- a/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.cc +++ b/mindspore/ccsrc/pipeline/jit/ps/parse/data_converter.cc @@ -856,6 +856,7 @@ FuncGraphPtr MakeCellFuncGraph(const py::object &obj, const std::string &obj_id, PyObjectWrapperPtr python_obj = std::make_shared(obj, "graph python obj"); func_graph->set_python_obj(python_obj); func_graph->set_flag(FUNC_GRAPH_FLAG_PROXY_GRAPH, true); + func_graph->set_flag(FUNC_GRAPH_FLAG_NO_CHILD_GRAPH, true); std::vector new_node_inputs; new_node_inputs.push_back(NewValueNode(reusing_graph)); for (const auto &origin_param : reusing_graph->parameters()) { @@ -1175,7 +1176,15 @@ TensorPtr ConvertTensorValue(const py::object &obj) { return py::getattr(obj, stub::PY_ATTR_TENSOR).cast(); } auto value = stub->WaitValue(); - return value->cast(); + auto tensor = value->cast(); + if (tensor == nullptr) { + // BaseTensor should convert to Tensor for Graph mode + auto base_tensor = value->cast(); + auto real_tensor = std::make_shared(*base_tensor); + stub->SetValue(real_tensor); + return real_tensor; + } + return tensor; } if (!py::isinstance(obj)) { return nullptr; diff --git a/mindspore/ccsrc/pipeline/jit/ps/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/ps/parse/parse.cc index 1c4f2aecb5bca8e95eaee3892afdaedd95661040..d64725f5481953abc0f96f379d9e114036a5b2a3 100644 --- a/mindspore/ccsrc/pipeline/jit/ps/parse/parse.cc +++ b/mindspore/ccsrc/pipeline/jit/ps/parse/parse.cc @@ -555,7 +555,9 @@ FuncGraphPtr Parser::ParseFuncGraph() { << " expression to make sure it is defined on a separate line.\n For example, " << "the code 'func = nn.ReLU() if y < 1 else lambda x: x + 1' rewritten as\n" << "'if y < 1:\n func = nn.ReLU()\nelse:\n func = lambda x: x + 1\n'" - << "will solve the problem."; + << "will solve the problem.\nIn addition, if you are using a user-defined " + << "package, assuming the module name is demo, please try " + << "setting 'export MS_JIT_MODULES=demo'."; } fn_block = ParseLambdaFunction(lambda_node); } diff --git a/mindspore/ccsrc/pipeline/pynative/forward/forward.cc b/mindspore/ccsrc/pipeline/pynative/forward/forward.cc index 2141825b44f6bf47ed1d09004a51ff2b8429068f..0058187e3fad036dabb9eaf5df9ed2d26d0cf8f7 100644 --- a/mindspore/ccsrc/pipeline/pynative/forward/forward.cc +++ b/mindspore/ccsrc/pipeline/pynative/forward/forward.cc @@ -441,8 +441,8 @@ void ForwardExecutor::CreateViewOpOutputs(const FrontendOpRunInfoPtr &op_run_inf CreateInputAddressForViewOp(view_input_tensor, op_run_info); for (size_t i = 0; i < storage_infos.size(); i++) { - MS_LOG(INFO) << "View op " << op_run_info->base_op_run_info.op_name << ", i:" << i - << ", storage_info:" << storage_infos[i]->ToString(); + MS_LOG(DEBUG) << "View op " << op_run_info->base_op_run_info.op_name << ", i:" << i + << ", storage_info:" << storage_infos[i]->ToString(); CreateViewOutputTensor(op_run_info, view_input_tensor, storage_infos[i], task_type); } diff --git a/mindspore/ccsrc/pipeline/pynative/grad/grad.cc b/mindspore/ccsrc/pipeline/pynative/grad/grad.cc index aa6d105613a42b7453ac8ca47106084042f1ad9d..0d6d15a7ce3cbb0c0253d7bdf297291bc4447876 100644 --- a/mindspore/ccsrc/pipeline/pynative/grad/grad.cc +++ b/mindspore/ccsrc/pipeline/pynative/grad/grad.cc @@ -417,6 +417,31 @@ KernelGraphPtr CloneKernelGraph(const FuncGraphPtr &func_graph) { PyNativeAlgo::Common::FreeFuncGraphForwardNodes(func_graph); return new_graph; } + +void ClearInputGradInfo(const ValuePtr &value) { + MS_EXCEPTION_IF_NULL(value); + if (value->isa()) { + auto tensor_value = value->cast(); + tensor_value->set_auto_grad_meta_data(nullptr); + } else if (value->isa()) { + const auto &value_seq = value->cast(); + for (auto elem : value_seq->value()) { + ClearInputGradInfo(elem); + } + } else if (value->isa()) { + auto stub_node = value->cast(); + MS_EXCEPTION_IF_NULL(stub_node); + ClearInputGradInfo(stub_node->WaitValue()); + } +} + +void ClearInputsGradInfo(const InputArgsInfoPtr &input_args_info) { + MS_EXCEPTION_IF_NULL(input_args_info); + for (size_t i = 0; i < input_args_info->input_size; ++i) { + const auto &v = input_args_info->input_arg_value_vec[i]; + ClearInputGradInfo(v); + } +} } // namespace ForwardExecutorPtr GradExecutor::forward() const { @@ -640,10 +665,16 @@ void GradExecutor::MakeNewTopGraph(const InputArgsInfoPtr &input_args_info) { auto resource = std::make_shared(); MS_EXCEPTION_IF_NULL(input_args_info); const auto &obj_id_with_grad_order = GetAlreadyRunCellId(input_args_info->obj_id); - // To fix scene that user calls twice forward network with grad flag, and then call grad() interface. + // To fix the scene that user calls twice forward network with grad flag, and then call grad() interface. // We need to clear last top cell's parameters grad info to avoid influencing construct bprop graph of current top // cell. ClearParamGradInfo(top_cell_); + // To fix the scene like 1. net(x1) 2. x2 = deepcopy(x1), 3. net(x2) 3. grad_net(x2). 4. grad_net(x1) + // x1's auto_grad_meta_data will be copy to x2, x2 grad will use the same auto_grad_meta_data and clear x1's variable + // and set x2's variable. + // When execute grad_net(x1), x1's variable will not found, so we need clear input's auto_grad_meta_data before + // execute. + ClearInputsGradInfo(input_args_info); top_cell_ = std::make_shared(input_args_info->is_high_order_top_cell, input_args_info->grad_order, obj_id_with_grad_order, input_args_info->cell_id, input_args_info->already_run_cell_id, resource, fg, diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc b/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc index 250aac63d9493d3ffb0137e914cf5c8c92ccabb0..f03fc70c63f28354df4b3c3068dd819421056f6f 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc +++ b/mindspore/ccsrc/pipeline/pynative/pynative_utils.cc @@ -1590,16 +1590,18 @@ FrontendOpRunInfoPtr PyBoost::Init(const PrimitivePtr &prim, const py::list &arg return op_run_info; } -void PyBoost::MakeOutputValue(const FrontendOpRunInfoPtr &op_run_info, - const std::vector &outputs) { - size_t size = outputs.size(); +void PyBoost::MakeOutputValue(const FrontendOpRunInfoPtr &op_run_info, const kernel::pyboost::OpPtr &op) { + size_t size = op->outputs().size(); if (size == kSizeOne) { - op_run_info->real_out = outputs[0]; - return; + if ((op->output_abs() != nullptr && !op->output_abs()->isa()) || + (op->output_value_simple_info() != nullptr && op->output_value_simple_info()->size == kSizeOne)) { + op_run_info->real_out = op->outputs()[0]; + return; + } } std::vector output_values(size); for (size_t i = 0; i < size; ++i) { - const auto &output_tensor = outputs[i]; + const auto &output_tensor = op->outputs()[i]; MS_EXCEPTION_IF_NULL(output_tensor); output_values[i] = output_tensor; } @@ -1633,7 +1635,7 @@ void PyBoost::UpdateOpRunInfo(const kernel::pyboost::OpPtr &op, const FrontendOp MS_EXCEPTION_IF_NULL(op); MS_EXCEPTION_IF_NULL(op_run_info); // Create output value - MakeOutputValue(op_run_info, op->outputs()); + MakeOutputValue(op_run_info, op); // Set output value to python UpdateStubOutput(op_run_info, op->output_abs(), op); diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_utils.h b/mindspore/ccsrc/pipeline/pynative/pynative_utils.h index 0124fa696ed928018d7510908b1cd2a8815b1d3d..0ec6afed00e2a95e7e838c4b98df3112ad76b1e5 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_utils.h +++ b/mindspore/ccsrc/pipeline/pynative/pynative_utils.h @@ -166,8 +166,7 @@ struct PyBoost { static FrontendOpRunInfoPtr Init(const PrimitivePtr &prim, const py::list &args); static void DoGrad(const kernel::pyboost::OpPtr &op, const FrontendOpRunInfoPtr &op_run_info, ValuePtrList &&op_inputs); - static void MakeOutputValue(const FrontendOpRunInfoPtr &op_run_info, - const std::vector &outputs); + static void MakeOutputValue(const FrontendOpRunInfoPtr &op_run_info, const kernel::pyboost::OpPtr &op); static void UpdateStubOutput(const FrontendOpRunInfoPtr &op_run_info, const AbstractBasePtr &abstract, const kernel::pyboost::OpPtr &op); static void UpdateOpRunInfo(const kernel::pyboost::OpPtr &op, const FrontendOpRunInfoPtr &op_run_info); diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc index bd50cd7f226c7ecb50e2712280a7c5decdccefdb..28e16e042a8d0f128db01c6a7e638610d148cac1 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.cc @@ -107,7 +107,7 @@ void AscendDeviceAddress::SyncHostMemoryToDeviceWithCopySrc(void *dst, const voi }; auto device_context = GetDeviceContext(); MS_EXCEPTION_IF_NULL(device_context); - auto callback_ret = device_context->GetKernelExecutor(false)->LaunchCallback(callback_func, 0); + auto callback_ret = device_context->GetKernelExecutor(false)->LaunchCallback(callback_func, this->stream_id()); if (!callback_ret) { MS_LOG(EXCEPTION) << "LaunchCallback failed"; } @@ -153,7 +153,7 @@ void AscendDeviceAddress::SyncHostMemoryToDeviceWithTensorData(void *dst, const }; auto device_context = GetDeviceContext(); MS_EXCEPTION_IF_NULL(device_context); - auto callback_ret = device_context->GetKernelExecutor(false)->LaunchCallback(callback_func, 0); + auto callback_ret = device_context->GetKernelExecutor(false)->LaunchCallback(callback_func, this->stream_id()); if (!callback_ret) { MS_LOG(EXCEPTION) << "LaunchCallback failed"; } @@ -470,15 +470,12 @@ ShapeVector AscendDeviceAddress::GetDeviceShape(ShapeVector *host_shape) const { std::shared_ptr AscendDeviceAddress::CreateLaunchTransData(const ShapeVector &host_shape, const std::string &ori_format, const std::string &dst_format) const { - auto runtime_instance = device::KernelRuntimeManager::Instance().GetCurrentKernelRuntime(); - MS_EXCEPTION_IF_NULL(runtime_instance); - auto stream = runtime_instance->compute_stream(); int64_t groups = 1; if (format() == kOpFormat_FRAC_Z) { groups = GetGroupsWithCache(); } - auto launch_trans_data = - std::make_shared(stream, type_id(), GetSize(), ori_format, dst_format, host_shape, groups); + auto launch_trans_data = std::make_shared(this->stream_id(), type_id(), GetSize(), ori_format, + dst_format, host_shape, groups); MS_EXCEPTION_IF_NULL(launch_trans_data); return launch_trans_data; } @@ -809,6 +806,18 @@ bool AscendDeviceAddress::AsyncDeviceToHost(const ShapeVector & /* shape */, siz return true; } +// Asynchronously copy device memory to host side. +bool AscendDeviceAddress::AsyncDeviceToHost(void *host_ptr, size_t size, void *stream) const { + MS_ERROR_IF_NULL(host_ptr); + MS_ERROR_IF_NULL(stream); + auto ret = CALL_ASCEND_API(aclrtMemcpyAsync, host_ptr, size, GetDevicePtr(), size, ACL_MEMCPY_DEVICE_TO_HOST, stream); + if (ret != ACL_ERROR_NONE) { + MS_LOG(ERROR) << "Call aclrtMemcpyAsync device to host failed, the error num[" << ret << "]"; + return false; + } + return true; +} + bool AscendDeviceAddress::ConvertFormatAndSyncHostToDevice(const ShapeVector &shape, size_t size, mindspore::TypeId type, const void *host_ptr, const tensor::TensorDataPtr &tensor_data) const { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h index 4af9993cde7b72e2e0fe7345b4d11e9a94297b89..963aeb01a9b7534f81ce34bf65150a43302ff6e3 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_device_address.h @@ -88,11 +88,13 @@ class AscendDeviceAddress : public LoadableDeviceAddress { // Asynchronously copy host memory to device side. bool AsyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr, - size_t stream_id) const; + size_t stream_id) const override; // Asynchronously copy device memory to host side. - bool AsyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr, size_t stream_id) const; + bool AsyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr, + size_t stream_id) const override; + bool AsyncDeviceToHost(void *host_ptr, size_t size, void *stream) const override; void set_communication_ptr(uint8_t *communication_ptr) override { communication_ptr_ = communication_ptr; // The communication_ptr_ should free to memory pool instead of GetDevicePtr(), so must update device pointer diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_event.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_event.cc index 52834b6f84f5ae28d64d0fc519db7b7858b8c65a..78f93f0299398a43629027c9e8084371416e8b1f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_event.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_event.cc @@ -36,7 +36,7 @@ AscendEvent::AscendEvent(uint32_t flag) { MS_LOG(ERROR) << "aclrtCreateEventWithFlag failed, ret:" << ret; event_ = nullptr; } - MS_LOG(DEBUG) << "Create ascend event success, flat : " << flag << "."; + MS_LOG(DEBUG) << "Create ascend event success, flag : " << flag << "."; } AscendTimeEvent::AscendTimeEvent() { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc index 571d1df97552ac4efb1823eea65d4b47af8bf194..f57c716ba0714fb348dda0c1abe14fef17377be2 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/ascend_stream_manager.cc @@ -300,9 +300,7 @@ bool AscendStreamMng::SyncExceptStreamsInList(const std::set &excep return res; } -size_t AscendStreamMng::QueryStreamSize() const { - return std::count_if(streams_.begin(), streams_.end(), [](void *stream) { return stream != nullptr; }); -} +size_t AscendStreamMng::QueryStreamSize() const { return streams_.size(); } bool AscendStreamMng::QueryStream(size_t stream_id) { if (stream_id >= streams_.size()) { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.cc b/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.cc index f0be96a14e7b07e9848f972c3d97f58902644dc6..1df2a4b8ae36bda79e453f4660a3392a132b0b0a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.cc @@ -24,6 +24,7 @@ #include "include/common/utils/anfalgo.h" #include "runtime/device/memory_manager.h" #include "plugin/device/ascend/hal/device/ascend_memory_pool.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" #include "plugin/device/ascend/kernel/acl/acl_kernel_build.h" #include "acl/acl_rt.h" #include "ops/array_op_name.h" @@ -90,7 +91,7 @@ void LaunchTransData::ConstructKernelGraph() { } uint8_t *LaunchTransData::AllocDeviceMem(size_t size) { - auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size); + auto device_memory = AscendMemoryPool::GetInstance().AllocTensorMem(size, false, stream_id_); if (device_memory == nullptr) { MS_LOG(EXCEPTION) << "Fail to alloc memory, size: " << size << "B."; } @@ -145,9 +146,10 @@ void LaunchTransData::LaunchOpKernel() { // workspaces std::vector kernel_workspace; + const auto stream = AscendStreamMng::GetInstance().GetStream(stream_id_); // launch - auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspace, kernel_outputs, stream_); + auto ret_status = kernel_mod_->Launch(kernel_inputs, kernel_workspace, kernel_outputs, stream); if (!ret_status) { MS_LOG(EXCEPTION) << "Launch transdata single kernel failed"; } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.h b/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.h index fcdedefd2313f1ade79f42b91df04aa06319f00a..d1cc205393e15ae7c6b578cdd0a3af2efbe80e06 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/device/launch_transdata.h @@ -26,9 +26,9 @@ namespace mindspore::device::ascend { class LaunchTransData { public: - LaunchTransData(void *stream, TypeId dtype, size_t total_size, std::string src_format, std::string dst_format, + LaunchTransData(uint32_t stream_id, TypeId dtype, size_t total_size, std::string src_format, std::string dst_format, ShapeVector host_shape, int64_t groups) - : stream_(stream), + : stream_id_(stream_id), dtype_(dtype), total_size_(total_size), src_format_(std::move(src_format)), @@ -48,7 +48,7 @@ class LaunchTransData { void SetKernelBuildInfo(); uint8_t *AllocDeviceMem(size_t size); void CreateOutputAddr(const std::vector &outputs_list, std::vector *kernel_tensors); - void *stream_; + uint32_t stream_id_; TypeId dtype_; size_t total_size_; std::string src_format_; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/CMakeLists.txt b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/CMakeLists.txt index 98763f0125412e7f762df6041fd6883ca9760047..9106ff32c2d393e612cbd0fd200f7ce4a31ac209 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/CMakeLists.txt +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/CMakeLists.txt @@ -23,6 +23,7 @@ file(GLOB_RECURSE MS_HARDWARE_910B RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "ge_graph_optimization.cc" "acl_somas.cc" "acl_stream_assign.cc" + "gpto.cc" ) set_property(SOURCE ${MS_HARDWARE_910B} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc index 635a3169183792b91bd755a6b0a82622ca26ec4f..8b756ef5eabcfd6738219b160a1bb22c97ca98aa 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.cc @@ -30,7 +30,7 @@ namespace mindspore { namespace device { namespace ascend { -void AclStreamAssign::AssignStream(const NotNull &kernel_graph) { +void AclStreamAssign::AssignStream(const NotNull &kernel_graph, const std::vector> &sched_events) { auto kernels = kernel_graph->execution_order(); if (kernels.empty()) { return; @@ -77,7 +77,7 @@ void AclStreamAssign::AssignStream(const NotNull &kernel_graph) common::AnfAlgo::SetNodeAttr(kAttrStreamId, MakeValue(stream_id), kernels[i - 1]); } } - InsertEventForNonTaskSink(kernel_graph); + InsertEventForNonTaskSink(kernel_graph, sched_events); } void AclStreamAssign::GenKernelIoExecInfoMap( @@ -262,7 +262,7 @@ CNodePtr AclStreamAssign::CreateSendApplyKernel(const NotNull &g auto send_node_ptr = graph_ptr->NewCNode({send_apply}); MS_EXCEPTION_IF_NULL(send_node_ptr); common::AnfAlgo::SetNodeAttr(kAttrEventId, MakeValue(event_id), send_node_ptr); - common::AnfAlgo::SetNodeAttr(kAttrRecrodEventStreamPair, MakeValue(event_generate_id), send_node_ptr); + common::AnfAlgo::SetNodeAttr(kAttrRecordWaitEventStreamPairId, MakeValue(event_generate_id), send_node_ptr); AnfAlgo::SetStreamId(stream_id, send_node_ptr.get()); return send_node_ptr; } @@ -278,7 +278,7 @@ CNodePtr AclStreamAssign::CreateRecvApplyKernel(const NotNull &g MS_EXCEPTION_IF_NULL(recv_node_ptr); common::AnfAlgo::SetNodeAttr(kAttrEventId, MakeValue(event_id), recv_node_ptr); common::AnfAlgo::SetNodeAttr(kAttrRecordEventStream, MakeValue(record_stream_id), recv_node_ptr); - common::AnfAlgo::SetNodeAttr(kAttrRecrodEventStreamPair, MakeValue(event_generate_id), recv_node_ptr); + common::AnfAlgo::SetNodeAttr(kAttrRecordWaitEventStreamPairId, MakeValue(event_generate_id), recv_node_ptr); AnfAlgo::SetStreamId(stream_id, recv_node_ptr.get()); return recv_node_ptr; } @@ -367,11 +367,22 @@ void AclStreamAssign::GenEventsForParallelOp(const NotNull &kern MS_LOG(DEBUG) << "Finish GenEventsForParallelOp."; } -void AclStreamAssign::InsertEventForNonTaskSink(const NotNull &kernel_graph) { +void AclStreamAssign::InsertEventForNonTaskSink(const NotNull &kernel_graph, const std::vector> &sched_events) { mindspore::HashMap> kernel_send; mindspore::HashMap> kernel_recv; AnfAlgo::SetStreamId(kDefaultStreamIndex, kernel_graph->output().get()); - GenEventsForParallelOp(kernel_graph, &kernel_send, &kernel_recv); + + if (common::GetEnv("MS_ENABLE_GPTO") != "1") { + GenEventsForParallelOp(kernel_graph, &kernel_send, &kernel_recv); + } else { + // Ioannis: simple logic should be this, but there seem to be many exceptions tackled in function GenEventsForParallelOp() + for (auto event : sched_events){ + const auto &send = event.first; + const auto &recv = event.second; + InsertEvents(kernel_graph, send, send, &kernel_send, &kernel_recv, recv); + } + } + UpdateEventsToExecutionOrder(kernel_graph, kernel_send, kernel_recv); } } // namespace ascend diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h index 11a8aed01ef105b9309120a20fdf14e9318e803d..487bd097b58f0531ad07afb2906c40c9e063213b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/acl_stream_assign.h @@ -57,7 +57,7 @@ class AclStreamAssign { AclStreamAssign(const AclStreamAssign &) = delete; AclStreamAssign &operator=(const AclStreamAssign &) = delete; - void AssignStream(const NotNull &kernel_graph); + void AssignStream(const NotNull &kernel_graph, const std::vector> &sched_events); private: AclStreamAssign() = default; @@ -74,7 +74,7 @@ class AclStreamAssign { mindspore::HashMap> *kernel_send, mindspore::HashMap> *kernel_recv); - void InsertEventForNonTaskSink(const NotNull &kernel_graph); + void InsertEventForNonTaskSink(const NotNull &kernel_graph, const std::vector> &sched_events); void InsertEventsForInputs(const NotNull &kernel_graph, const CNodePtr &kernel, const NodeIoExecInfoPtr &io_exec_info, diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc index 4646b880ea96e44bb4adeb4f0634ca4fad4493d6..409605b01e96520f8d6ca8c89f8ebc87f7edd0b1 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ascend_deprecated_interface.cc @@ -37,7 +37,6 @@ #include "plugin/device/ascend/hal/device/tensorsummary_utils.h" #include "plugin/device/ascend/hal/device/tensordump_utils.h" #include "plugin/device/ascend/hal/device/mbuf_receive_manager.h" -#include "transform/symbol/acl_base_symbol.h" #include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/symbol_utils.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc index 3b91bb6b1882963c52c54516cce611046cbc3c00..45858da37a9d9435354e17ce71fd44a292d70d80 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_device_context.cc @@ -39,7 +39,6 @@ #include "mindspore/core/utils/file_utils.h" #include "plugin/device/ascend/hal/device/dump/ascend_dump.h" #include "plugin/device/ascend/optimizer/ge_backend_optimization.h" -#include "transform/symbol/acl_base_symbol.h" #include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/symbol_utils.h" diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.cc index 202379bb95eaa35766a1142b369f3f1d666975aa..d395599fd1ded3f093fe6dfef44b152dfaabd94a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.cc @@ -52,6 +52,7 @@ #include "ops/array_ops.h" #include "pybind_api/gil_scoped_long_running.h" #include "include/common/utils/compile_cache_context.h" +#include "debug/data_dump/dump_graph_boundary.h" using InputNameAndType = std::vector>; using Data = ::ge::op::Data; using RefData = ::ge::op::RefData; @@ -711,7 +712,9 @@ void GeGraphExecutor::BuildInputDataGeTensor(const KernelGraphPtr &kernel_graph) } if (input_names.empty()) { MS_LOG(INFO) << "Kernel graph: " << kernel_graph->graph_id() << " input data list is nullptr"; - input_datas_[kernel_graph.get()] = {ge_inputs, need_update_input}; + std::vector device_addr; + device_addr.resize(ge_inputs.size()); + input_datas_[kernel_graph.get()] = {ge_inputs, device_addr, need_update_input}; return; } auto parameters = FilterAllParameters(kernel_graph); @@ -777,7 +780,9 @@ void GeGraphExecutor::BuildInputDataGeTensor(const KernelGraphPtr &kernel_graph) MS_LOG(WARNING) << "Not use all cur inputs, cur_inputs_index: " << cur_inputs_index << ", cur_inputs.size(): " << cur_inputs.size() << ", kernel graph: " << kernel_graph->graph_id(); } - input_datas_[kernel_graph.get()] = {ge_inputs, need_update_input}; + std::vector device_addr; + device_addr.resize(ge_inputs.size()); + input_datas_[kernel_graph.get()] = {ge_inputs, device_addr, need_update_input}; MS_LOG(INFO) << "BuildInputDataGeTensor finish."; } @@ -811,7 +816,9 @@ void GeGraphExecutor::BuildOutputDataGeTensor(const KernelGraphPtr &kernel_graph MS_EXCEPTION_IF_CHECK_FAIL( ge_outputs.size() == graph_outputs.size(), "The size of ge_outputs and graph_outputs check error, kernel graph: " + kernel_graph->ToString()); - output_datas_[kernel_graph.get()] = {ge_outputs, graph_outputs}; + std::vector device_addr; + device_addr.resize(ge_outputs.size()); + output_datas_[kernel_graph.get()] = {ge_outputs, device_addr, graph_outputs}; MS_LOG(INFO) << "BuildOutputDataGeTensor finish."; } @@ -1308,6 +1315,11 @@ bool GeGraphExecutor::RunGraphRefMode(const FuncGraphPtr &graph, const std::vect } } + auto iter_i = input_datas_.find(kg.get()); + if (iter_i != input_datas_.end()) { + datadump::DumpGraphBoundary::GetInstance().HookDumpTask( + kg, iter_i->second.ms_input_addrs, iter_i->second.need_update_input, ResManager()->GetStream(), True); + } { // Release GIL before calling into (potentially long-running) C++ code GilReleaseWithCheck gil_release; @@ -1319,6 +1331,12 @@ bool GeGraphExecutor::RunGraphRefMode(const FuncGraphPtr &graph, const std::vect } } + auto iter_o = output_datas_.find(kg.get()); + if (iter_o != output_datas_.end()) { + datadump::DumpGraphBoundary::GetInstance().HookDumpTask( + kg, iter_o->second.ms_output_addrs, iter_o->second.graph_outputs, ResManager()->GetStream(), False); + } + if (is_dynamic_shape) { auto graph_outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output()); SetDynamicOutputs(graph_outputs, &ge_outputs, ResManager()); @@ -1451,7 +1469,7 @@ FuncGraphPtr GeGraphExecutor::BuildDFGraph(const FuncGraphPtr &anf_graph, return anf_graph; } -std::vector GeGraphExecutor::GenerateInputGeTensor(const KernelGraphPtr &kernel_graph) const { +std::vector GeGraphExecutor::GenerateInputGeTensor(const KernelGraphPtr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); std::vector ge_inputs; auto iter = input_datas_.find(kernel_graph.get()); @@ -1504,12 +1522,13 @@ std::vector GeGraphExecutor::GenerateInputGeTensor(const KernelGraphPt output_addr->GetSize() != ge_inputs[kv.second].GetSize()) { (void)ge_inputs[kv.second].SetData(static_cast(output_addr->GetMutablePtr()), output_addr->GetSize(), [](void *) {}); + iter->second.ms_input_addrs[kv.second] = output_addr.get(); } } return ge_inputs; } -std::vector GeGraphExecutor::GenerateOutputGeTensor(const KernelGraphPtr &kernel_graph) const { +std::vector GeGraphExecutor::GenerateOutputGeTensor(const KernelGraphPtr &kernel_graph) { MS_EXCEPTION_IF_NULL(kernel_graph); std::vector ge_outputs; auto iter = output_datas_.find(kernel_graph.get()); @@ -1554,6 +1573,7 @@ std::vector GeGraphExecutor::GenerateOutputGeTensor(const KernelGraphP output_device_addr->GetSize() != ge_outputs[idx].GetSize()) { ge_outputs[idx].SetData(reinterpret_cast(output_device_addr->GetMutablePtr()), output_device_addr->GetSize(), [](void *) {}); + iter->second.ms_output_addrs[idx] = output_device_addr.get(); } idx++; } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.h index f8cb80df2648753e34a2b631efa270ea657316a4..c15a490f1386dceafa0f588044336a9898092f14 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.h @@ -35,11 +35,13 @@ namespace device { namespace ascend { struct GeInputData { std::vector ge_inputs; + std::vector ms_input_addrs; std::vector> need_update_input; }; struct GeOutputData { std::vector ge_outputs; + std::vector ms_output_addrs; std::vector> graph_outputs; }; @@ -67,8 +69,8 @@ class GeGraphExecutor : public GraphExecutor { void AllocOutputMemory(const KernelGraphPtr &kernel_graph) const; bool CompileGraph(const KernelGraphPtr &graph, const std::map &compile_options); int64_t CurGraphSinkSize(std::string graph_name); - std::vector GenerateInputGeTensor(const KernelGraphPtr &kernel_graph) const; - std::vector GenerateOutputGeTensor(const KernelGraphPtr &kernel_graph) const; + std::vector GenerateInputGeTensor(const KernelGraphPtr &kernel_graph); + std::vector GenerateOutputGeTensor(const KernelGraphPtr &kernel_graph); GeDeviceResManager *ResManager() const; void RunInitGraph(const std::string &graph_name); void AddRefCorrespondPairs(const KernelGraphPtr &graph, diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc index 7e18ab23368d6b17e8cb8f5337073d885bd99db4..11c1eed8de7022e68bc84919084998f9c93bbaf5 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc @@ -27,6 +27,7 @@ #include "plugin/device/ascend/hal/common/ascend_utils.h" #include "plugin/device/ascend/hal/hardware/acl_somas.h" #include "plugin/device/ascend/hal/hardware/acl_stream_assign.h" +#include "plugin/device/ascend/hal/hardware/gpto.h" #include "plugin/device/ascend/kernel/rts/rt_kernel_build.h" #include "plugin/device/ascend/kernel/hccl/hccl_kernel_metadata.h" #include "plugin/device/ascend/kernel/hccl/hccl_kernel_build.h" @@ -853,13 +854,13 @@ void CreateEventKernelMod(const KernelGraphPtr &kernel_graph) { } } // namespace -void GeKernelExecutor::DoStreamAssign(const KernelGraphPtr &kernel_graph) { +void GeKernelExecutor::DoStreamAssign(const KernelGraphPtr &kernel_graph, const std::vector> &sched_events) { MS_LOG(DEBUG) << "Status record: start stream assign."; auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); MS_EXCEPTION_IF_NULL(kernel_graph); // stream assign - AclStreamAssign::GetInstance().AssignStream(NOT_NULL(kernel_graph)); + AclStreamAssign::GetInstance().AssignStream(NOT_NULL(kernel_graph), sched_events); CreateEventKernelMod(kernel_graph); #ifdef ENABLE_DUMP_IR auto context_ptr = MsContext::GetInstance(); @@ -874,7 +875,7 @@ void GeKernelExecutor::DoStreamAssign(const KernelGraphPtr &kernel_graph) { MS_LOG(DEBUG) << "Status record: end stream assign."; } -void GeKernelExecutor::DoSomas(const FuncGraphPtr &graph) { +void GeKernelExecutor::DoSomas(const FuncGraphPtr &graph, const std::vector> &sched_events) { auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); MS_EXCEPTION_IF_NULL(graph); @@ -883,7 +884,7 @@ void GeKernelExecutor::DoSomas(const FuncGraphPtr &graph) { static const char kAscendEnableInternalKernels[] = "MS_ENABLE_INTERNAL_KERNELS"; static bool enable_runtime_pipeline = common::GetEnv(kAscendEnableInternalKernels) == "on"; if (!enable_runtime_pipeline) { - DoStreamAssign(kernel_graph); + DoStreamAssign(kernel_graph, sched_events); } // somas MS_LOG(DEBUG) << "Status record: start do somas."; @@ -950,7 +951,8 @@ void GeKernelExecutor::PreprocessBeforeRun(const FuncGraphPtr &graph) const { } } - DoSomas(NOT_NULL(graph)); + auto sched_events = opt::GPTO(graph); + DoSomas(NOT_NULL(graph), sched_events); profiler::CollectHostInfo("Ascend", "PreprocessBeforeRun", "GePreprocess", 1, 0, 1); } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h index 7014ed9b62094ccf696d66ed5cd980a222e7d08a..5e8532650cf6736826455ec1fa412df1286ccf65 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.h @@ -70,8 +70,8 @@ class GeKernelExecutor : public KernelExecutor { const device::DeviceAddressPtrList &output_addr_list, const size_t &stream_id) const override; private: - static void DoSomas(const FuncGraphPtr &graph); - static void DoStreamAssign(const KernelGraphPtr &kernel_graph); + static void DoSomas(const FuncGraphPtr &graph, const std::vector> &sched_events); + static void DoStreamAssign(const KernelGraphPtr &kernel_graph, const std::vector> &sched_events); // launch bool MemoryCopyAsync(const CNodePtr &node, const vector &inputs, const vector &outputs) const; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.cc new file mode 100644 index 0000000000000000000000000000000000000000..e62f4d74bcf6751dacc21ec797564738e060f5c7 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.cc @@ -0,0 +1,2099 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mindspore/core/ops/math_op_name.h" +#include "mindspore/core/ops/conv_pool_op_name.h" +#include "mindspore/core/ops/ascend_op_name.h" +#include "mindspore/core/utils/anf_utils.h" +#include "mindspore/ccsrc/include/common/utils/utils.h" +#include "mindspore/ccsrc/frontend/parallel/step_parallel.h" +#include "mindspore/core/utils/misc.h" +#include "include/backend/optimizer/helper.h" +#include "plugin/device/ascend/hal/hardware/gpto.h" +#include "plugin/device/ascend/hal/device/ascend_memory_adapter.h" + +static mindspore::opt::Memory SOFT_MEMORY_LIMIT; +static mindspore::opt::Memory HARD_MEMORY_LIMIT; // preset some value to capture max size of 910B +constexpr size_t kGBToByte = 1073741824; // 1GB + +namespace mindspore { +namespace opt { +// Subroutines Implementing "Scheduling to Dependencies" +struct SortByStart { + bool operator()(const Interval &interval1, const Interval &interval2) const { + const auto &id1 = interval1.id; + const auto &start1 = interval1.start; + const auto &end1 = interval1.end; + const auto &id2 = interval2.id; + const auto &start2 = interval2.start; + const auto &end2 = interval2.end; + return start1 < start2 || (start1 == start2 && end1 < end2) || (start1 == start2 && end1 == end2 && id1 < id2); + } +}; + +struct SortByEnd { + bool operator()(const Interval &interval1, const Interval &interval2) const { + const auto &id1 = interval1.id; + const auto &start1 = interval1.start; + const auto &end1 = interval1.end; + const auto &id2 = interval2.id; + const auto &start2 = interval2.start; + const auto &end2 = interval2.end; + return end1 < end2 || (end1 == end2 && start1 < start2) || (end1 == end2 && start1 == start2 && id1 < id2); + } +}; + +bool Overlap(const Time &start1, const Time &end1, const Time &start2, const Time &end2) { + return (start1 >= start2 && start1 < end2) || + (start2 >= start1 && start2 < end1); // if equal start and end for two intervals, then no overlap +} + +std::vector> gpto::ScheduleToDependencies(const SchedulingOutput &schedule) { + std::vector> dependencies; // to return + MS_LOG(INFO) << "Started Preprocessing of Intervals"; + // Distinguish types and sort + std::unordered_map> tasks_start; + std::unordered_map> tasks_end; + for (const auto &task_time : schedule.task_times) { + tasks_start[task_time.gpto_type].insert(task_time); + tasks_end[task_time.gpto_type].insert(task_time); + } + MS_LOG(INFO) << "Finished Preprocessing of Intervals"; + MS_LOG(INFO) << "Started Main Loop"; + // Main loop: check each task for potential dependencies in its right neighborhood + for (const auto &type_to_set : tasks_start) { + const auto &type = type_to_set.first; + for (auto it = tasks_start[type].begin(); it != tasks_start[type].end(); ++it) { + tasks_end[type].erase(*it); + // Dismiss overlapping tasks: save min end value of non-overlapping task to the right + std::unordered_map dismissed; + auto it1 = std::next(it); + for (; Overlap(it->start, it->end, it1->start, it1->end) && it1 != tasks_start[type].end(); ++it1) { + dismissed[it1->id] = true; + } + Time min_end_value = 0; + for (auto it2 = tasks_end[type].begin(); it2 != tasks_end[type].end(); ++it2) { + if (!dismissed[it2->id]) { + min_end_value = it2->end; + break; + } + } + // Add dependencies to immediate right neighborhood + for (; it1->start < min_end_value && it1 != tasks_start[type].end(); ++it1) { + dependencies.emplace_back(it->id, it1->id); + } + } + } + MS_LOG(INFO) << "Finished Main Loop"; + MS_LOG(INFO) << "Generated " << dependencies.size() << " dependencies"; + return dependencies; +} + +std::vector> gpto::ScheduleToDependenciesDifferentTypes(const SchedulingOutput &schedule) { + std::vector> dependencies; // to return + MS_LOG(INFO) << "Started Preprocessing of Intervals"; + // Distinguish types and sort + std::set tasks_start; + std::set tasks_end; + for (const auto &task_time : schedule.task_times) { + tasks_start.insert(task_time); + tasks_end.insert(task_time); + } + MS_LOG(INFO) << "Finished Preprocessing of Intervals"; + MS_LOG(INFO) << "Started Main Loop"; + // Main loop: check each task for potential dependencies in its right neighborhood + //for (const auto &type_to_set : tasks_start) { + //const auto &type = type_to_set.first; + for (auto it = tasks_start.begin(); it != tasks_start.end(); ++it) { + tasks_end.erase(*it); + // Dismiss overlapping tasks: save min end value of non-overlapping task to the right + std::unordered_map dismissed; + auto it1 = std::next(it); + for (; Overlap(it->start, it->end, it1->start, it1->end) && it1 != tasks_start.end(); ++it1) { + dismissed[it1->id] = true; + } + Time min_end_value = 0; + for (auto it2 = tasks_end.begin(); it2 != tasks_end.end(); ++it2) { + if (!dismissed[it2->id]) { + min_end_value = it2->end; + break; + } + } + // Add dependencies to immediate right neighborhood + for (; it1->start < min_end_value && it1 != tasks_start.end(); ++it1) { + if (it->gpto_type != it1->gpto_type){ + dependencies.emplace_back(it->id, it1->id); + } + } + } + //} + MS_LOG(INFO) << "Finished Main Loop"; + MS_LOG(INFO) << "Generated " << dependencies.size() << " dependencies"; + return dependencies; +} + +// Sorting for tasks +bool SortByWeightMax(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || + (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->weight() > task2->weight() || (task1->weight() == task2->weight() && task1->id() < task2->id()))) + // || (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()); + ; +} + +bool SortByWeightMin(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || + (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->weight() < task2->weight() || (task1->weight() == task2->weight() && task1->id() < task2->id()))) + //||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()); + ; +} + +bool SortBySuccDiff(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->succ_diff_type() > task2->succ_diff_type() || + (task1->succ_diff_type() == task2->succ_diff_type() && task1->weight() > task2->weight()) || + (task1->succ_diff_type() == task2->succ_diff_type() && task1->weight() == task2->weight() && + task1->id() < task2->id()))) + // ||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()); + ; +} + +bool SortByBottomLevelMax(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->bottom_level() > task2->bottom_level() || + (task1->bottom_level() == task2->bottom_level() && task1->weight() > task2->weight()) || + (task1->bottom_level() == task2->bottom_level() && task1->weight() == task2->weight() && + task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +bool SortByBottomLevelMin(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->bottom_level() < task2->bottom_level() || + (task1->bottom_level() == task2->bottom_level() && task1->weight() > task2->weight()) || + (task1->bottom_level() == task2->bottom_level() && task1->weight() == task2->weight() && + task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()); + ; +} + +bool SortByTopLevelMax(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->top_level() > task2->top_level() || + (task1->top_level() == task2->top_level() && task1->weight() > task2->weight()) || + (task1->top_level() == task2->top_level() && task1->weight() == task2->weight() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +bool SortByTopLevelMin(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->top_level() < task2->top_level() || + (task1->top_level() == task2->top_level() && task1->weight() > task2->weight()) || + (task1->top_level() == task2->top_level() && task1->weight() == task2->weight() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +bool SortByBottomTopLevelMaxSum(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->top_level() + task1->bottom_level() > task2->top_level() + task2->bottom_level() || + (task1->top_level() + task1->bottom_level() == task2->top_level() + task2->bottom_level() && + task1->weight() > task2->weight()) || + (task1->top_level() + task1->bottom_level() == task2->top_level() + task2->bottom_level() && + task1->weight() == task2->weight() && task1->id() < task2->id()))) + //||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +bool SortByBottomTopLevelMinSum(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->top_level() + task1->bottom_level() < task2->top_level() + task2->bottom_level() || + (task1->top_level() + task1->bottom_level() == task2->top_level() + task2->bottom_level() && + task1->weight() > task2->weight()) || + (task1->top_level() + task1->bottom_level() == task2->top_level() + task2->bottom_level() && + task1->weight() == task2->weight() && task1->id() < task2->id()))) + //||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Ishfaq Ahmad, Yu-Kwong Kwok, and Min-You Wu. +// Analysis, evaluation, and comparison of algorithms for scheduling task graphs on parallel processors. +// Second International Symposium on Parallel Architectures, Algorithms, and Networks (I-SPAN'96), +// pages 207-213. IEEE, 1996. +bool SortByBottomTopLevelComposite(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->bottom_level() - task1->top_level() > task2->bottom_level() - task2->top_level() || + (task1->bottom_level() - task1->top_level() == task2->bottom_level() - task2->top_level() && + task1->weight() > task2->weight()) || + (task1->bottom_level() - task1->top_level() == task2->bottom_level() - task2->top_level() && + task1->weight() == task2->weight() && task1->id() < task2->id()))) + //||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Behrooz Shirazi, Mingfang Wang, and Girish Pathak. +// Analysis and evaluation of heuristic methods for static task scheduling. +// Journal of Parallel and Distributed Computing, 10(3):222-232, 1990. +bool SortByWeightedLength(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->weighted_length() > task2->weighted_length() || + (task1->weighted_length() == task2->weighted_length() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// DFS with weights for tie breaking +bool SortByDepthMax(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->depth() > task2->depth() || (task1->depth() == task2->depth() && task1->weight() > task2->weight()) || + (task1->depth() == task2->depth() && task1->weight() == task2->weight() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// BFS with weights for tie breaking +bool SortByDepthMin(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->depth() < task2->depth() || (task1->depth() == task2->depth() && task1->weight() > task2->weight()) || + (task1->depth() == task2->depth() && task1->weight() == task2->weight() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Sort by predecessor to comm +bool SortByPredComm(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->pred_comm() < task2->pred_comm() || + (task1->pred_comm() == task2->pred_comm() && task1->bottom_level() > task2->bottom_level()) || + (task1->pred_comm() == task2->pred_comm() && task1->bottom_level() == task2->bottom_level() && + task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Sort by predecessor to comm + DFS +bool SortByPredCommDepth(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->pred_comm() < task2->pred_comm() || + (task1->pred_comm() == task2->pred_comm() && task1->depth() > task2->depth()) || + (task1->pred_comm() == task2->pred_comm() && task1->depth() == task2->depth() && task1->id() < task2->id()))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Sort by predecessor to cube + bottom level +bool SortByPredCube(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->pred_cube() < task2->pred_cube() || + (task1->pred_cube() == task2->pred_cube() && task1->bottom_level() > task2->bottom_level()) || + (task1->pred_cube() == task2->pred_cube() && task1->bottom_level() == task2->bottom_level() && + task1->id() < task2->id()))) + //||(task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Sort by greedy height of memory (maintained dynamically) +bool SortByGreedyHeight(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->subgraph_id() < task2->subgraph_id() || (task1->subgraph_id() == task2->subgraph_id() + //&& task1->subgraph_id() == SIZE_MAX + && (task1->mem_impact() < task2->mem_impact() || ( + task1->mem_impact() == task2->mem_impact() && SortByBottomLevelMax(task1, task2)))) + //|| (task1->subgraph_id() == task2->subgraph_id() && task1->subgraph_id() < SIZE_MAX && task1->original_order() < task2->original_order()) + ; +} + +// Sorting by load for processing elements +struct SortByLoad { + bool operator()(const ProcessingElement &pe1, const ProcessingElement &pe2) const { + return pe1.load < pe2.load || (pe1.load == pe2.load && pe1.id < pe2.id); + } +}; + +// Get PEs description +std::unordered_map GetTestPEs() { + std::unordered_map new_pem; + new_pem[kComm] = 1; + if (common::GetEnv("MS_ENABLE_GPTO_SINGLESTREAM") == "1") { + return new_pem; + } + new_pem[kComp] = 1; + if (common::GetEnv("MS_ENABLE_GPTO_MULTISTREAM") == "0") { + return new_pem; + } + new_pem[kCube] = 1; + return new_pem; +} + +// Auxiliary subroutines and lower bounds +void gpto::ComputeDepthAndTopLevel(std::vector> &tasks) { + MS_LOG(INFO) << "Top Level: Start Initialization"; + std::unordered_map unprocessed_parents; + std::queue> tasks_to_visit; + // Initialization loop + for (size_t j = 0; j < tasks.size(); ++j) { + const auto &id = tasks[j]->id(); + unprocessed_parents[id] = tasks[j]->parents().size(); + if (unprocessed_parents[id] == 0) { + tasks[j]->set_top_level(tasks[j]->weight()); + tasks_to_visit.push(tasks[j]); + } + } + MS_LOG(INFO) << "Top Level: End Initialization"; + MS_LOG(INFO) << "Top Level: Start Traversal Loop"; + while (!tasks_to_visit.empty()) { + const auto &selected_task = tasks_to_visit.front(); + // Update candidate tasks + for (auto &successor : selected_task->children()) { + const auto &succ_id = successor->id(); + successor->set_depth(std::max(successor->depth(), selected_task->depth() + 1)); + successor->set_top_level( + std::max(successor->top_level(), selected_task->top_level() + successor->weight())); + unprocessed_parents[succ_id] -= 1; + if (unprocessed_parents[succ_id] == 0) { + tasks_to_visit.push(successor); + } + } + tasks_to_visit.pop(); + } + MS_LOG(INFO) << "Top Level: End Traversal Loop"; +} + +void gpto::ComputeBottomLevelAndWeightedLength(std::vector> &tasks) { + MS_LOG(INFO) << "Bottom Level: Start Initialization"; + std::unordered_map unprocessed_children; + std::unordered_map children_sum; + std::unordered_map children_max; + std::queue> tasks_to_visit; + // Initialization loop + for (auto &task : tasks) { + const auto &id = task->id(); + task->set_bottom_level(task->weight()); + task->set_weighted_length(task->weight()); + unprocessed_children[id] = task->children().size(); + if (unprocessed_children[id] == 0) { + tasks_to_visit.push(task); + } + } + MS_LOG(INFO) << "Bottom Level: End Initialization"; + MS_LOG(INFO) << "Bottom Level: Start Traversal Loop"; + while (!tasks_to_visit.empty()) { + const auto &selected_task = tasks_to_visit.front(); + // Update candidate tasks + for (auto &predecessor : selected_task->parents()) { + const auto &pred_id = predecessor.lock()->id(); + predecessor.lock()->set_bottom_level(std::max( + predecessor.lock()->bottom_level(), selected_task->bottom_level() + predecessor.lock()->weight())); + children_sum[pred_id] += selected_task->weighted_length(); + children_max[pred_id] = std::max(children_max[pred_id], selected_task->weighted_length()); + unprocessed_children[pred_id] -= 1; + if (unprocessed_children[pred_id] == 0) { + if (children_max[pred_id] == 0) { + MS_LOG(EXCEPTION) << "divisor children_max[pred_id] cannot be 0!"; + } + predecessor.lock()->set_weighted_length(predecessor.lock()->weight() + children_max[pred_id] + + children_sum[pred_id] / children_max[pred_id]); + tasks_to_visit.push(predecessor.lock()); + } + } + tasks_to_visit.pop(); + } + MS_LOG(INFO) << "Bottom Level: End Traversal Loop"; +} + +void gpto::ComputePredComm(std::vector> &tasks) { + for (auto &task : tasks) { + task->set_pred_comm(0); + for (auto &predecessor : task->parents()) { + if (predecessor.lock()->gpto_type() == kComm) { + task->set_pred_comm(task->pred_comm() + 1); + } + } + } +} + +void gpto::ComputePredCube(std::vector> &tasks) { + for (auto &task : tasks) { + task->set_pred_cube(0); + for (auto &predecessor : task->parents()) { + if (predecessor.lock()->gpto_type() == kCube) { + task->set_pred_cube(task->pred_cube() + 1); + } + } + } +} + +void gpto::InitializeMemoryImpact(std::vector> &tasks){ + for (auto &task : tasks) { + Memory out_weight = 0, workspace_weight = 0; + for (auto &tensor : task->out_tensors()){ + if (tensor->type() == kWorkspace){ //TODO: Ioannis(later make them into lifelong end (new logic)??? + workspace_weight += tensor->weight(); + } else { + out_weight += tensor->weight(); + } + } + for (auto &tensor : task->workspace_tensors()){ + workspace_weight += tensor->weight(); + } + task->set_workspace_memory(workspace_weight); + //task->set_mem_impact(out_weight); + task->set_mem_impact(out_weight + workspace_weight); + } +} + +Time gpto::LowerBoundBottomLevel(std::vector> &tasks) { + Time max_bottom_level = 0; + for (const auto &task : tasks) { + max_bottom_level = std::max(max_bottom_level, task->bottom_level()); + } + return max_bottom_level; +} + +Time gpto::LowerBoundPEs(std::vector> &tasks, + std::unordered_map &type_to_num_cores_map) { + double lower_bound = 0; + + std::unordered_map type_task_sum; + for (const auto &task : tasks) { + type_task_sum[task->gpto_type()] += task->weight(); + } + for (const auto &type_to_num : type_to_num_cores_map) { + const auto &type = type_to_num.first; + const auto &num_cores = type_to_num.second; + if (num_cores == 0) { + MS_LOG(EXCEPTION) << "divisor num_cores cannot be 0!"; + } + lower_bound = std::max(lower_bound, type_task_sum[type] / (1.0 * num_cores)); + } + return std::ceil(lower_bound); +} + +// Main algorithms/subroutines +std::pair SelectPEandTime(const Task &task, Time can_start, + std::set *PEs_ptr) { + auto &PEs = *PEs_ptr; + std::pair return_pair = std::make_pair(0, 0); + for (auto it = PEs.begin(); it != PEs.end(); ++it) { + // unsafe use of const_cast, but we modify only idle list and not key sorting parameters like load, id, etc. + // cf: https://stackoverflow.com/questions/43340050/modification-of-elements-of-stdset-defined-behavior + auto &mut_pe = const_cast(*it); + // Put in first idle that fits it + for (auto idle_it = mut_pe.idle.begin(); idle_it != mut_pe.idle.end(); ++idle_it) { + Time start_time; + bool case_flag = false; + // Distinguish cases based on can_start constraint + if (can_start <= idle_it->first) { + start_time = idle_it->first; + } else if (can_start <= idle_it->second) { + start_time = can_start; + case_flag = true; + } else { // can_start > idle_it->second means we are not allowed to schedule the task here + continue; + } + // If the task fits, then place it here + if (idle_it->second - start_time >= task.weight()) { + // Save info to return: start task at time idle_it->first + return_pair.first = (*it).id; + return_pair.second = start_time; + // Update idle list + if (!case_flag) { + if (idle_it->second - idle_it->first == task.weight()) { // whole idle interval is filled in, erase it + mut_pe.idle.erase(idle_it); + } else { // idle_it->second - idle_it->first > task.weight() + idle_it->first += task.weight(); + } + } else { // case_flag = true, idle interval is broken into two sub-blocks [idle_it->first, can_start] and + // (maybe empty) [can_start + weight, idle_it->second] + Time upper = idle_it->second; + idle_it->second = can_start; + if (upper - can_start - task.weight() > 0) { + std::pair new_idle = std::make_pair(can_start + task.weight(), upper); + mut_pe.idle.emplace(std::next(idle_it), new_idle); + } + } + // Update load and PEs set + auto updated_PE = PEs.extract(it); + updated_PE.value().load += task.weight(); + PEs.insert(std::move(updated_PE)); + return return_pair; + } + } + } + return return_pair; +} + +std::pair SelectPEandTimeAvailableStart(const Task &task, Time can_start, + std::vector *PEs_ptr) { + auto &PEs = *PEs_ptr; + // Precompute min first available start for task + Time min_start = SIZE_MAX; + bool min_case = false; + std::vector::iterator min_it; + std::list>::iterator min_idle_it; + for (auto it = PEs.begin(); it != PEs.end(); ++it) { + for (auto idle_it = it->idle.begin(); idle_it != it->idle.end(); ++idle_it) { + Time start_time; + bool case_flag = false; + // Distinguish cases based on can_start constraint + if (can_start <= idle_it->first) { + start_time = idle_it->first; + } else if (can_start <= idle_it->second) { + start_time = can_start; + case_flag = true; + } else { // can_start > idle_it->second means we are not allowed to schedule the task here + continue; + } + if (idle_it->second - start_time >= task.weight()) { + if (min_start > start_time) { + min_start = start_time; + min_case = case_flag; + min_it = it; + min_idle_it = idle_it; + break; + } + } + } + } + // Assign task to min PE + std::pair return_pair = std::make_pair(0, 0); + // Save info to return: start task at time idle_it->first + return_pair.first = (*min_it).id; + return_pair.second = min_start; + // Update idle list + if (!min_case) { + if (min_idle_it->second - min_idle_it->first == task.weight()) { // whole idle interval is filled in, erase it + min_it->idle.erase(min_idle_it); + } else { // idle_it->second - idle_it->first > task.weight() + min_idle_it->first += task.weight(); + } + } else { // min_case = true, idle interval is broken into two sub-blocks [idle_it->first, can_start] and + // (maybe empty)[can_start + task.weight(), idle_it->second] + Time upper = min_idle_it->second; + min_idle_it->second = can_start; + if (upper - can_start - task.weight() > 0) { + std::pair new_idle = std::make_pair(can_start + task.weight(), upper); + min_it->idle.emplace(std::next(min_idle_it), new_idle); + } + } + // Update load + min_it->load += task.weight(); + return return_pair; +} + +constexpr TaskSortFunction TASK_SORT[] = {SortByWeightMax, + SortByWeightMin, + SortBySuccDiff, + SortByBottomLevelMax, + SortByBottomLevelMin, + SortByTopLevelMax, + SortByTopLevelMin, + SortByBottomTopLevelMaxSum, + SortByBottomTopLevelMinSum, + SortByBottomTopLevelComposite, + SortByWeightedLength, + SortByDepthMax, + SortByDepthMin, + SortByPredComm, + SortByPredCommDepth, + SortByPredCube, + SortByGreedyHeight}; + +constexpr std::string_view TASK_SORT_NAMES[] = {"SortByWeightMax", + "SortByWeightMin", + "SortBySuccDiff", + "SortByBottomLevelMax", + "SortByBottomLevelMin", + "SortByTopLevelMax", + "SortByTopLevelMin", + "SortByBottomTopLevelMaxSum", + "SortByBottomTopLevelMinSum", + "SortByBottomTopLevelComposite", + "SortByWeightedLength", + "SortByDepthMax", + "SortByDepthMin", + "SortByPredComm", + "SortByPredCommDepth", + "SortByPredCube", + "SortByGreedyHeight"}; + +enum class PEsSort { kSortByLoad = 0, kSortByValidStart, kNumPEsSort }; + +constexpr std::string_view PE_NAME_SORT[] = {"SortByLoad", "SortByValidStart"}; + +SchedulingOutput gpto::Process(SchedulingInput &input, const size_t graph_id, const FuncGraphPtr &graph, const std::set> &tensors) { + std::vector> *tasks = &(input.tasks); + auto type_to_num_cores_map = GetTestPEs(); + SchedulingOutput output{{}, SIZE_MAX, HARD_MEMORY_LIMIT}; + + // Optional: verify input task graph is a DAG + if (VerifyDAG(*tasks)) { + MS_LOG(INFO) << "Verification of DAG: SUCCESS"; + } else { + MS_LOG(INFO) << "Verification of DAG: FAILURE"; + } + + // Preprocessing: values computation for necessary sorting + ComputeBottomLevelAndWeightedLength(*tasks); + //ComputeDepthAndTopLevel(*tasks); + ComputePredComm(*tasks); + if (common::GetEnv("MS_ENABLE_GPTO_MULTISTREAM") == "1") { + ComputePredCube(*tasks); + } + + // Loop over all sorting combinations + std::unordered_map, Time> best_start, best_end; // to use in verify dependencies only + std::string best_solution = ""; + MS_LOG(INFO) << "Start loop multiple scheduling functions"; + for (size_t task_sort = 0; task_sort < static_cast(kNumTaskSort); ++task_sort) { + for (size_t pes_sort = 0; pes_sort < static_cast(PEsSort::kNumPEsSort); ++pes_sort) { + // Etienne: Add variable here to force algo + if (common::GetEnv("MS_ENABLE_GPTO_ALGO") != ""){ + if (common::GetEnv("MS_ENABLE_GPTO_ALGO") != TASK_SORT_NAMES[task_sort]) { + continue; + } + } + MS_LOG(INFO) << TASK_SORT_NAMES[task_sort] << " and " << PE_NAME_SORT[pes_sort]; + SchedulingOutput solution = ProcessCore(*tasks, type_to_num_cores_map, TASK_SORT[task_sort], + (pes_sort == static_cast(PEsSort::kSortByLoad))); + if ((solution.makespan < output.makespan || (solution.makespan == output.makespan && solution.memory_peak < output.memory_peak)) + && solution.memory_peak <= HARD_MEMORY_LIMIT) { + output = solution; + best_solution = TASK_SORT_NAMES[task_sort]; + for (const auto &task : *tasks) { // to use in verify dependencies only + best_start[task] = task->start(); + best_end[task] = task->end(); + } + } + for (const auto &task : *tasks) { + task->ResetStartEnd(); + } + } + } + MS_LOG(INFO) << "End loop multiple scheduling functions"; + + if (best_solution == "") { + MS_LOG(EXCEPTION) << "Hard memory limit is not satisfied by any scheduling memory estimate, exiting..."; + } + + // Print stats about best solution + MS_LOG(INFO) << "Memory-aware heuristics with soft memory limit " << SOFT_MEMORY_LIMIT << " and hard memory limit " << HARD_MEMORY_LIMIT; + MS_LOG(INFO) << "Best solution is: " << best_solution; + MS_LOG(INFO) << "Makespan of best solution is " << output.makespan; + MS_LOG(INFO) << "Bottom level lower bound is " << LowerBoundBottomLevel(*tasks); + MS_LOG(INFO) << "Max type lower bound is " << LowerBoundPEs(*tasks, type_to_num_cores_map); + MS_LOG(INFO) << "Solution relative error is " << std::setprecision(5) + << ((output.makespan / + (1.0 * std::max(LowerBoundBottomLevel(*tasks), LowerBoundPEs(*tasks, type_to_num_cores_map))) - + 1) * + 100) + << "%"; + MS_LOG(INFO) << "Peak memory estimate of best solution is " << output.memory_peak; + + // Create and (optionally) verify dependencies (here only for testing) + //MS_LOG(INFO) << "Start Schedule to Dependencies"; + //auto dependencies = ScheduleToDependencies(output); + //MS_LOG(INFO) << "End Schedule to Dependencies"; + + // Save best solution (intervals) + for (const auto &task : *tasks) { + task->set_start(best_start[task]); + task->set_end(best_end[task]); + } + + // Output log files + //MS_LOG(INFO) << "Start printing output log file"; + //PrintLog(output, dependencies, graph_id, tensors); + //MS_LOG(INFO) << "End printing output log file"; + + // auto lower = std::max(LowerBoundBottomLevel(*tasks), LowerBoundPEs(*tasks, type_to_num_cores_map)); + // PrintLogForILP(input, output, graph_id, graph, lower, tensors); + return output; +} + +SchedulingOutput gpto::ProcessCore(std::vector> &tasks, + std::unordered_map &type_to_num_cores_map, + const TaskSortFunction &sortPtr, bool pe_load_sort) { + SchedulingOutput output{{}, 0, 0}; + // Initializations for tasks + MS_LOG(INFO) << "Started Task Initialization"; + std::set, TaskSortFunction> candidate_tasks(sortPtr); + std::unordered_map can_start; + std::unordered_map unprocessed_parents; + for (auto &task : tasks) { + const auto &id = task->id(); + can_start[id] = 0; + unprocessed_parents[id] = task->parents().size(); + if (unprocessed_parents[id] == 0) { + candidate_tasks.insert(task); + } + } + + // Initialization for memory impact handling + InitializeMemoryImpact(tasks); + std::unordered_map>> left_consumers; + for (auto &task : tasks) { + for (auto &in_tensor : task->in_tensors()) { + left_consumers[in_tensor->id()].insert(in_tensor->consumers().begin(), in_tensor->consumers().end()); + } + } + + MS_LOG(INFO) << "Finished Task Initialization"; + + // Initializations for processing elements + // Pick a sorting for processing elements + // Implemented: SortByLoad, SortByAvailableStart + // Only one structure to be used depending on argument; we define both here + std::unordered_map> PEs_load; + std::unordered_map> PEs_start; + MS_LOG(INFO) << "Started Processing Element Initialization"; + size_t count = 0; + for (const auto &type_to_num : type_to_num_cores_map) { + const auto &type = type_to_num.first; + const auto &num_cores = type_to_num.second; + for (int i = 0; i < num_cores; ++i) { + ProcessingElement new_pe; + new_pe.id = count + i; + new_pe.gpto_type = type; + new_pe.load = 0; + new_pe.idle.emplace_back(0, SIZE_MAX); + if (pe_load_sort) { + PEs_load[type].insert(new_pe); + } else { + PEs_start[type].push_back(new_pe); + } + } + count += num_cores; + } + MS_LOG(INFO) << "Finished Processing Element Initialization"; + + // Task graph scheduling loop + MS_LOG(INFO) << "Started Scheduling Main Loop"; + output.memory_peak = 0; + Memory cur_mem_peak = 0; + std::unordered_map last_workspace_memory; // comp/comm for now -> originally 0 by definition here +// size_t last_subgraph_id = SIZE_MAX; // +// bool last_candidate_gather = false; // + Time last_comm_end = 0; + while (!candidate_tasks.empty()) { + // Select task and schedule it (memory-aware), save info for output + bool flag = false; + TaskPtr selected_task; + for (auto it = candidate_tasks.begin(); it != candidate_tasks.end(); ++it){ + selected_task = *it; + // if (!last_candidate_gather && last_subgraph_id < SIZE_MAX && selected_task->subgraph_id() != last_subgraph_id) continue; + if ((cur_mem_peak + selected_task->mem_impact() <= SOFT_MEMORY_LIMIT) || (selected_task->subgraph_id() < SIZE_MAX)){ // memory violated -> ignore for now if inside a ConditionSwitch/Gather branch + flag = true; + break; + } + } + if (flag == false){ + selected_task = *(candidate_tasks.begin()); + } + const auto &selected_id = selected_task->id(); +// last_candidate_gather = (selected_task->name().find("ConditionGather") != std::string::npos); // +// last_subgraph_id = selected_task->subgraph_id(); // + // Maintain memory peak information + cur_mem_peak += selected_task->mem_impact() - last_workspace_memory[selected_task->gpto_type()]; + last_workspace_memory[selected_task->gpto_type()] = selected_task->workspace_memory(); + output.memory_peak = std::max(output.memory_peak, cur_mem_peak); + + // Selected PE and start time + std::pair PE_and_time; + if (pe_load_sort) { + PE_and_time = SelectPEandTime(*selected_task, can_start[selected_id], &PEs_load[selected_task->gpto_type()]); + } else { + PE_and_time = + SelectPEandTimeAvailableStart(*selected_task, can_start[selected_id], &PEs_start[selected_task->gpto_type()]); + } + const auto &sigma = PE_and_time.second; + + // Maintenance of task interval + selected_task->set_start(sigma); + selected_task->set_end(sigma + selected_task->weight()); + // New interval for task in output + Interval new_interval{selected_id, selected_task->name(), selected_task->gpto_type(), selected_task->start(), selected_task->end()}; + output.task_times.push_back(new_interval); + // Update makespan + output.makespan = std::max(output.makespan, selected_task->end()); + + // Update memory impact values (no need for workspace memory removal here; only using as first estimate) + for (auto &in_tensor : selected_task->in_tensors()) { + const auto &tid = in_tensor->id(); + left_consumers[tid].erase(selected_task); + if (left_consumers[tid].size() == 1) { + auto last_consumer = *(left_consumers[tid].begin()); + auto it = candidate_tasks.find(last_consumer); + if (it != candidate_tasks.end()) { + auto updated_candidate = candidate_tasks.extract(it); + updated_candidate.value()->set_mem_impact(updated_candidate.value()->mem_impact() - in_tensor->weight()); + candidate_tasks.insert(std::move(updated_candidate)); + } else { + last_consumer->set_mem_impact(last_consumer->mem_impact() - in_tensor->weight()); + } + } + } + // Update out-tensors of selected node + for (auto &out_tensor : selected_task->out_tensors()) { + if (out_tensor->consumers().size() == 1) { + auto last_consumer = *(out_tensor->consumers().begin()); + last_consumer->set_mem_impact(last_consumer->mem_impact() - out_tensor->weight()); + } + } + + // Update candidate tasks + candidate_tasks.erase(selected_task); + + // Update can_start for ConditionalSwitch/Gather case !!!!! IOANNIS -> ok? (or move later to full block contraction option) + if (selected_task->condition_gather()){ + for (const auto &candidate : candidate_tasks){ + can_start[candidate->id()] = std::max(can_start[candidate->id()], selected_task->end()); + } + } + + if (common::AnfAlgo::IsCommunicationOp(selected_task->cnode())){ + last_comm_end = selected_task->end(); + } + + for (const auto &successor : selected_task->children()) { + const auto &succ_id = successor->id(); + can_start[succ_id] = std::max(can_start[succ_id], selected_task->end()); + if (successor->condition_switch()){ + can_start[succ_id] = std::max(can_start[succ_id], last_comm_end); + } + unprocessed_parents[succ_id] -= 1; + if (unprocessed_parents[succ_id] == 0) { + candidate_tasks.insert(successor); + } + } + } + MS_LOG(INFO) << "Finished Scheduling Main Loop"; + MS_LOG(INFO) << "Makespan is " << output.makespan; + MS_LOG(INFO) << "Peak mem is " << output.memory_peak; + // Verification of scheduling solution (optional) + if (VerifyScheduling(tasks)) { + MS_LOG(INFO) << "Verification of Scheduling: SUCCESS"; + } else { + MS_LOG(INFO) << "Verification of Scheduling: FAILURE"; + } + + return output; +} + +bool gpto::VerifyScheduling(std::vector> &tasks) { + bool flag = true; + MS_LOG(INFO) << "Start Verification of Scheduling"; + for (auto &task : tasks) { + // Check if task is scheduled before its children + for (auto child = task->children().begin(); child != task->children().end(); ++child) { + if (!(task->start() < task->end() && task->end() <= (*child)->start() && + (*child)->start() < (*child)->end())) { // assume open-rightpoint intervals and non-zero size + MS_LOG(INFO) << "Verification violation: task " << task->id() << " [" << task->start() << "," << task->end() + << "] and task " << (*child)->id() << " [" << (*child)->start() << "," << (*child)->end() << "]"; + flag = false; + } + } + } + MS_LOG(INFO) << "End Verification of Scheduling"; + return flag; +} + +bool BFSsort(const std::shared_ptr &task1, const std::shared_ptr &task2) { + return task1->depth() < task2->depth() || (task1->depth() == task2->depth() && task1->id() < task2->id()); +} + +bool gpto::VerifyDependencies(std::vector> &tasks, + std::vector> &dependencies) { + bool flag = true; + + MS_LOG(INFO) << "Start Verification of Dependencies"; + // Traverse graph by depth to maintain ancestor info + auto tasks_sorted = tasks; + std::sort(tasks_sorted.begin(), tasks_sorted.end(), BFSsort); + std::map> exists_path; + std::map> id_to_ptr; + for (auto current = tasks_sorted.begin(); current != tasks_sorted.end(); ++current) { + id_to_ptr[(*current)->id()] = *current; + for (auto parent = (*current)->parents().begin(); parent != (*current)->parents().end(); ++parent) { + exists_path[(*parent).lock()->id()][(*current)->id()] = true; + for (auto &it : tasks_sorted) { + if (exists_path[it->id()][(*parent).lock()->id()]) { + exists_path[it->id()][(*current)->id()] = true; + } + } + } + } + // For each dependency, check if redundant it forms a directed cycle and if corresponding tasks are scheduled + // correctly + size_t redundant_count = 0; + for (auto it = dependencies.begin(); it != dependencies.end(); ++it) { + const auto &source = id_to_ptr[it->first]; + const auto &dst = id_to_ptr[it->second]; + if (exists_path[it->first][it->second]) { + redundant_count++; + } + if (exists_path[it->second][it->first]) { + MS_LOG(INFO) << "Dependency cycle formation: task " << source->id() << " [" << source->start() << "," + << source->end() << "] and task " << dst->id() << " [" << dst->start() << "," << dst->end() << "]"; + } + if (!(source->start() < source->end() && source->end() <= dst->start() && dst->start() < dst->end())) { + // allow weights of size 0 + MS_LOG(INFO) << "Dependency scheduling violation: task " << source->id() << " [" << source->start() << "," + << source->end() << "] and task " << dst->id() << " [" << dst->start() << "," << dst->end() << "]"; + } + } + MS_LOG(INFO) << "End Verification of Dependencies"; + MS_LOG(INFO) << redundant_count << " dependencies are redundant, " << dependencies.size() - redundant_count + << " are real"; + + return flag; +} + +bool gpto::VerifyDAG(std::vector> &tasks) { + // simple verifier that no directed cycle exists + std::unordered_map visited; + std::unordered_map unprocessed_parents; + std::deque> to_visit; + MS_LOG(INFO) << "Start Verification of DAG"; + for (auto &task : tasks) { + const auto &id = task->id(); + visited[id] = false; + unprocessed_parents[id] = task->parents().size(); + if (unprocessed_parents[id] == 0) { + to_visit.push_back(task); + } + } + while (!to_visit.empty()) { + const auto selected_task = *(to_visit.begin()); + const auto &selected_id = selected_task->id(); + if (visited[selected_id]) { + MS_LOG(INFO) << "Cycle including task " << selected_id; + return false; + } else { + visited[selected_id] = true; + } + to_visit.pop_front(); + for (const auto &successor : selected_task->children()) { + const auto &succ_id = successor->id(); + unprocessed_parents[succ_id] -= 1; + if (unprocessed_parents[succ_id] == 0) { + to_visit.push_back(successor); + } + } + } + MS_LOG(INFO) << "End Verification of DAG"; + + return true; +} + +void gpto::PrintLog(const SchedulingOutput &output, + const std::vector> &dependencies, const FuncGraphPtr &graph, + const size_t graph_id, std::set> &tensors) { + std::stringstream ss; + ss << graph; + std::ofstream out_file("gpto_out_" + std::to_string(graph_id) + "_" + ss.str() + ".log", std::ios::out | std::ios::trunc); + if (!out_file.is_open()) { + MS_LOG(ERROR) << "Could not open comp_comm_scheduling_out.log"; + return; + } + + // Print info for tasks + const auto &tasks = output.task_times; + for (const auto &task : tasks) { + out_file << "TASK id=" << std::to_string(task.id) << ", name=" << task.name << ", type=" << std::to_string(task.gpto_type) + << ", start=" << std::to_string(task.start) << ", end=" << std::to_string(task.end) << "\n"; + } + // Print dependencies (or events depending on function used) + for (const auto &dependency : dependencies) { + const auto &source = dependency.first; + const auto &dst = dependency.second; + out_file << "DEPENDENCY " << std::to_string(source) << " " << std::to_string(dst) << "\n"; + } + // Print tensor info + // Change set of TensorPtr to vector of TensorPtr to be able to sort the list + std::vector tensors_vec; + std::copy(tensors.begin(), tensors.end(), back_inserter(tensors_vec)); + std::sort(tensors_vec.begin(), tensors_vec.end(), [](const TensorPtr lhs, const TensorPtr rhs) { return lhs->id() < rhs->id(); }); + for (const auto &tensor : tensors_vec) { + std::string consumers = ""; + for (const auto &consumer: tensor->consumers()){ + consumers += std::to_string(consumer->id()) + ";"; + } + out_file << "TENSOR id=" << std::to_string(tensor->id()) << ", weight=" << std::to_string(tensor->weight()) << ", source=" << std::to_string(tensor->source()->id()) << ", consumers=" << consumers << "\n"; + } + + out_file.close(); +} + +void gpto::PrintLogForILP(const SchedulingInput &input, const SchedulingOutput &output, + const size_t graph_id, const FuncGraphPtr &graph, const Time lower, + const std::set &tensors) { + + std::stringstream ss; + ss << graph; + std::ofstream out_file("gpto_out_ilp_" + std::to_string(graph_id) + "_" + ss.str() + ".log", std::ios::out | std::ios::trunc); + if (!out_file.is_open()) { + MS_LOG(ERROR) << "Could not open gpto_out file"; + return; + } + // Print info for tasks + const auto &tasks = input.tasks; + for (const auto &task : tasks) { + out_file << "TASK id=" << std::to_string(task->id()) << ", name=" << task->name() << ", type=" << std::to_string(task->gpto_type()) + << ", cost=" << std::to_string(task->weight()) << ", top=" << std::to_string(task->top_level()-task->weight()) + << ", bottom=" << std::to_string(task->bottom_level()-task->weight()) + << "\n"; + } + + // Print makespan and memory bounds + out_file << "UPPER " << output.makespan << "\n"; + out_file << "LOWER " << lower << "\n"; + out_file << "SOFT_MEMORY_LIMIT " << SOFT_MEMORY_LIMIT << "\n"; + out_file << "HARD_MEMORY_LIMIT " << HARD_MEMORY_LIMIT << "\n"; + + // Print edges + for (const auto &task : tasks) { + for (const auto &child : task->children()){ + out_file << "EDGE " << std::to_string(task->id()) << " " << std::to_string(child->id()) << "\n"; + } + } + + // Print same-type task pairs which can be executed in parallel + std::vector nodes_dependency; + gpto::ComputeAncestorsDescendants(tasks, nodes_dependency); + for (size_t i = 0; i < tasks.size(); i++){ + for (size_t j = i+1; j < tasks.size(); j++){ + const auto &task1 = tasks[i]; + const auto &task2 = tasks[j]; + if (task1->gpto_type() != task2->gpto_type()) continue; + if (nodes_dependency[task2->id()].IsBitTrue(task1->id()) || nodes_dependency[task1->id()].IsBitTrue(task2->id())) continue; + out_file << "NO_OVERLAP " << std::to_string(task1->id()) << " " << std::to_string(task2->id()) << "\n"; + } + } + // Print tensor info + for (const auto &tensor : tensors) { + std::string consumers = ""; + for (const auto &consumer: tensor->consumers()){ + consumers += std::to_string(consumer->id()) + ";"; + } + out_file << "TENSOR id=" << std::to_string(tensor->id()) << ", weight=" << std::to_string(tensor->weight()) + << ", source=" << std::to_string(tensor->source()->id()) << ", consumers=" << consumers << "\n"; + } + + out_file.close(); +} + +void gpto::ComputeAncestorsDescendants(const std::vector> &tasks, std::vector &nodes_dependency) { + // Ioannis: Assume tasks are sorted by id (ie in BFS order); if not, sort them + // Ioannis: Do we need each node to be ancestor/descendant of itself? No (for now) + size_t count = tasks.back()->id() + 1; + for (size_t i = 0; i < count; i++) { + (void)nodes_dependency.emplace_back(count); + } + for (const auto &task : tasks) { + for (const auto &parent : task->parents()){ + nodes_dependency[task->id()].SetBitTrue(parent.lock()->id()); + Union(&nodes_dependency[task->id()], &nodes_dependency[parent.lock()->id()]); + } + // Ioannis: log message just for debugging + MS_LOG(DEBUG) << "Task " << task->id() << " has " << nodes_dependency[task->id()].CountOnesNum() << "ancestors"; + } +} + +void InsertEdges(const std::vector &cnode_vec, + std::unordered_map *cnode_to_task_map_ptr) { + for (size_t i = 0; i < cnode_vec.size(); ++i) { + for (size_t j = 0; j < cnode_vec[i]->size(); ++j) { + const auto &input_node = cnode_vec[i]->input(j)->cast(); + if ((*cnode_to_task_map_ptr).count(input_node) == 0) continue; + + ((*cnode_to_task_map_ptr)[cnode_vec[i]])->AddParent((*cnode_to_task_map_ptr)[input_node]); + ((*cnode_to_task_map_ptr)[input_node])->AddChild((*cnode_to_task_map_ptr)[cnode_vec[i]]); + MS_LOG(INFO) << "Edge " << (*cnode_to_task_map_ptr)[input_node]->id() << " " + << (*cnode_to_task_map_ptr)[cnode_vec[i]]->id(); + MS_LOG(INFO) << "Edge (UniqueName) " << input_node->UniqueName() << " " << cnode_vec[i]->UniqueName(); + } + } +} + +bool IsCubeKernel(const CNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + static const std::unordered_set kCubeKernelSet = { + // matmul + kMatMulOpName, kMatMulV2OpName, kBatchMatMulOpName, kBatchMatMulV2OpName, + // conv + kConv2DOpName, kConv3DOpName, + // conv dx + kConv2DBackpropInputOpName, kConv2DBackpropInputDOpName, kConv2DTransposeOpName, kConv2DTransposeDOpName, + kDepthwiseConv2DBackpropInputOpName, kDepthwiseConv2DBackpropInputDOpName, kConv3DBackpropInputOpName, + kConv3DBackpropInputDOpName, kConv3DTransposeOpName, kConv3DTransposeDOpName, + // conv dw + kConv2DBackpropFilterOpName, kConv2DBackpropFilterDOpName, kDepthwiseConv2DBackpropFilterOpName, + kDepthwiseConv2DBackpropFilterDOpName, kConv3DBackpropFilterOpName, kConv3DBackpropFilterDOpName}; + + auto op_name = common::AnfAlgo::GetCNodeName(node); + return kCubeKernelSet.find(op_name) != kCubeKernelSet.end(); +} + +// To-do: rename the function +TaskType GetGPTOTaskTypeFromCNode(const CNodePtr cnode){ + if(common::AnfAlgo::IsCommunicationOp(cnode)){ + return kComm; + } + if (common::GetEnv("MS_ENABLE_GPTO_SINGLESTREAM") == "1") { + return kComm; + } + if (common::GetEnv("MS_ENABLE_GPTO_MULTISTREAM") == "0") { + return kComp; + } + if(IsCubeKernel(cnode)){ + return kCube; + } else { + return kComp; + } +} + +TaskType GetRealTaskTypeFromCNode(const CNodePtr cnode){ + if(common::AnfAlgo::IsCommunicationOp(cnode)){ + return kComm; + } else if(IsCubeKernel(cnode)){ + return kCube; + } else { + return kComp; + } +} + +size_t GetAlignSize(size_t original_size) { + constexpr size_t alignment = 512; + constexpr size_t alignment_complement = 31; + size_t aligned_size = + (original_size > 0) ? ((original_size + alignment + alignment_complement) / alignment) * alignment : 0; + return aligned_size; +} + +void ContractUnrealTasks(std::unordered_map *cnode_to_task_map_ptr) { + MS_LOG(INFO) << "Start ContractUnrealTasks"; + for (auto it = (*cnode_to_task_map_ptr).begin(); it != (*cnode_to_task_map_ptr).end(); /* no increment */){ + + if (AnfUtils::IsRealKernel(it->first)){ + ++it; + continue; + } + + auto &task_to_remove = it->second; + auto &task_parents = task_to_remove->parents(); + auto &task_children = task_to_remove->children(); + + if (task_parents.empty()){ // Case no parents: void --> Load --> Add ==> void --> Add + for (auto &task_child : task_children) { + task_child->RemoveParent(task_to_remove); + } + task_to_remove->ClearChildren(); + } else if (task_children.empty()){ // Case no children: Add --> MakeTuple --> Return --> void ==> Add --> void + for (auto &task_parent: task_parents) { + task_parent.lock()->RemoveChild(task_to_remove); + } + task_to_remove->ClearParents(); + } else { + for (auto &task_parent : task_parents){ + task_parent.lock()->RemoveChild(task_to_remove); + for (auto &task_child : task_children) { + task_parent.lock()->AddChild(task_child); + task_child->AddParent(task_parent); + task_child->RemoveParent(static_cast>(task_to_remove)); + } + } + task_to_remove->ClearParents(); + task_to_remove->ClearChildren(); + } + it = (*cnode_to_task_map_ptr).erase(it); + } + MS_LOG(INFO) << "End ContractUnrealTasks"; +} + +/* +void ContractUnrealTasks(std::unordered_map *cnode_to_task_map_ptr) { + MS_LOG(INFO) << "Start ContractUnrealTasks"; + for (auto it = (*cnode_to_task_map_ptr).begin(); it != (*cnode_to_task_map_ptr).end(); ){ + if (!AnfUtils::IsRealKernel(it->first)){ + auto &task_to_remove = it->second; // (*cnode_to_task_map_ptr)[cnode]; + auto &task_parents = task_to_remove->parents(); + auto &task_children = task_to_remove->children(); + + if (task_parents.empty()){ // Case no parents: void --> Load --> Add ==> void --> Add + for (auto &task_child : task_children) { + task_child->RemoveParent(task_to_remove); + } + task_to_remove->ClearChildren(); + } else if (task_children.empty()){ // Case no children: Add --> MakeTuple --> Return --> void ==> Add --> void + for (auto &task_parent: task_parents) { + task_parent.lock()->RemoveChild(task_to_remove); + } + task_to_remove->ClearParents(); + } else { + for (auto &task_parent : task_parents){ + task_parent.lock()->RemoveChild(task_to_remove); + for (auto &task_child : task_children) { + task_parent.lock()->AddChild(task_child); + task_child->AddParent(task_parent); + task_child->RemoveParent(static_cast>(task_to_remove)); + } + } + task_to_remove->ClearParents(); + task_to_remove->ClearChildren(); + } + it = (*cnode_to_task_map_ptr).erase(it); + } else { + ++it; + } + } + MS_LOG(INFO) << "End ContractUnrealTasks"; +} +*/ + +KernelWithIndex GetVisitKernelWithReturnType(const AnfNodePtr &ori_node, size_t ori_index, std::unordered_map *cnode_to_task_map_ptr) { + auto prenode = common::AnfAlgo::VisitKernelWithReturnType(ori_node, ori_index, false); + //auto xx = prenode.first->cast(); + while (prenode.first->isa() && cnode_to_task_map_ptr->find(prenode.first->cast()) == cnode_to_task_map_ptr->end()) { + auto cnode = prenode.first->cast(); +// if (!common::AnfAlgo::IsNopNode(cnode)) { +// MS_LOG(INTERNAL_EXCEPTION) << "Node[" << ori_node->fullname_with_scope() << "] find input node[" +// << cnode->fullname_with_scope() +// << "] doesn't exist in nodes_map and is not a nop node!!!!"; +// } + prenode = common::AnfAlgo::VisitKernelWithReturnType(cnode->input(1), 0, false); + } + return prenode; +} + +void ExtractRealTensors(const SchedulingInput &scheduling_input, std::unordered_map *cnode_to_task_map_ptr, std::set> &tensors) { + size_t tensor_count = 0; + const auto &tasks = scheduling_input.tasks; + + // Looping over tasks to obtain output and workspace tensors (somas style) + for (auto &task : tasks) { + const auto &kernel_mod = AnfAlgo::GetKernelMod(task->cnode()); + MS_EXCEPTION_IF_NULL(kernel_mod); + + // Extract each node's output tensors + for (const auto &size : kernel_mod->GetOutputSizeList()) { + Time weight = GetAlignSize(size); + if (weight == 0){ + weight = GetAlignSize(1); + } + std::shared_ptr new_tensor = std::make_shared(tensor_count, weight, task, kWorkspace); // initially kWorkspace, since no consumers + task->out_tensors().push_back(new_tensor); + MS_LOG(INFO) << "New output tensor " << tensor_count << " source id " << task->id() << " weight " << weight; + tensor_count++; + tensors.insert(new_tensor); + } + + // Extract each node's workspace tensor + for (const auto &size : kernel_mod->GetWorkspaceSizeList()) { + Time weight = GetAlignSize(size); + if (weight == 0){ + weight = GetAlignSize(1); + } + std::shared_ptr new_tensor = std::make_shared(tensor_count, weight, task, kWorkspace); + task->workspace_tensors().push_back(new_tensor); + MS_LOG(INFO) << "New workspace tensor " << tensor_count << " source id " << task->id() << " weight " << weight; + tensor_count++; + tensors.insert(new_tensor); + } + } + + // Looping over tasks to obtain input tensors after all outputs have been saved (somas style) + for (auto &task : tasks) { + const auto &kernel = task->cnode(); + const auto &kernel_mod = AnfAlgo::GetKernelMod(kernel); + MS_EXCEPTION_IF_NULL(kernel_mod); + + if (common::AnfAlgo::GetCNodeName(kernel) != kMemSetOpName) { // standard input case + auto input_tensor_num = common::AnfAlgo::GetInputTensorNum(kernel); + for (size_t i = 0; i < input_tensor_num; i++) { + auto input_node = kernel->input(i+1); + MS_EXCEPTION_IF_NULL(input_node); + KernelWithIndex prenode_index = GetVisitKernelWithReturnType(input_node, 0, cnode_to_task_map_ptr); + MS_EXCEPTION_IF_NULL(prenode_index.first); + if (common::AnfAlgo::CheckPrimitiveType(prenode_index.first, prim::kPrimMakeTuple)) { + MS_LOG(INTERNAL_EXCEPTION) << "Node " << kernel->fullname_with_scope() << "'s input node [" << input_node->DebugString() + << "]'s input " << i << " is MakeTuple"; + } + + if (!AnfUtils::IsRealCNodeKernel(prenode_index.first)) { // somas input parameter case, ignore for now + MS_LOG(INFO) << "Input [" << prenode_index.first->fullname_with_scope() << "] is not a real cnode kernel."; + continue; + } + auto iter = cnode_to_task_map_ptr->find(prenode_index.first->cast()); + if (iter == cnode_to_task_map_ptr->end()){ + MS_LOG(INFO) << "Kernel[" << kernel->fullname_with_scope() << "]'s input " << i << " [" + << prenode_index.first->fullname_with_scope() << "] not found in tasks"; + continue; + } + auto pre_task = iter->second; + if (prenode_index.second > pre_task->out_tensors().size()){ + MS_LOG(INFO) << "Output index " << prenode_index.second << " exceeds input node [" + << prenode_index.first->fullname_with_scope() << "]'s outputs size " + << pre_task->out_tensors().size(); + continue; + } + auto input_tensor = pre_task->out_tensors()[prenode_index.second]; + MS_EXCEPTION_IF_NULL(input_tensor); + input_tensor->consumers().insert(task); + task->in_tensors().push_back(input_tensor); + MS_LOG(INFO) << "Tensor " << input_tensor->id() << " has new consumer " << task->id(); + if(input_tensor->type() == TensorType::kWorkspace){ + input_tensor->set_type(TensorType::kOutput); + } + tensors.insert(input_tensor); // TODO: remove eventually + } + } else { // atomic clean input case + auto input_tensor_num = common::AnfAlgo::GetInputTensorNum(kernel); + for (size_t i = 0; i < input_tensor_num; i++) { + auto pre_node = kernel->input(i+1)->cast(); + MS_EXCEPTION_IF_NULL(pre_node); + + auto iter = cnode_to_task_map_ptr->find(pre_node); + if (iter == cnode_to_task_map_ptr->end()){ + MS_LOG(INFO) << "Kernel[" << kernel->fullname_with_scope() << "]'s input " << i << " [" + << pre_node->fullname_with_scope() << "] not found in tasks"; + continue; + } + auto pre_task = iter->second; + + if (common::AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, pre_node)) { // clean output + auto clean_output_indexs = common::AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicOutputIndexs); + for (auto index : clean_output_indexs) { + if (index > pre_task->out_tensors().size()) { + MS_LOG(INFO) << "Output index " << index << " exceed input node [" + << pre_node->fullname_with_scope() << "]'s outputs size " + << pre_task->out_tensors().size(); + continue; // TODO: replace above INFO by INTERNAL_EXCEPTION and remove continue (everywhere) + } + auto input_tensor = pre_task->out_tensors()[index]; + MS_EXCEPTION_IF_NULL(input_tensor); + task->in_tensors().push_back(input_tensor); + // + if(input_tensor->type() == TensorType::kWorkspace){ + input_tensor->set_type(TensorType::kOutput); + } + input_tensor->consumers().insert(task); + tensors.insert(input_tensor); // TODO: remove eventually + // + } + } + + if (common::AnfAlgo::HasNodeAttr(kAttrAtomicWorkspaceIndexs, pre_node)) { // clean workspace + auto clean_workspace_indexs = common::AnfAlgo::GetNodeAttr>(pre_node, kAttrAtomicWorkspaceIndexs); + for (const auto &index : clean_workspace_indexs) { + if (index > pre_task->out_tensors().size()) { + MS_LOG(INFO) << "Workspace index " << index << " exceed input node [" + << pre_node->fullname_with_scope() << "]'s Workspace size " + << pre_task->workspace_tensors().size(); + continue; + } + auto input_tensor = pre_task->workspace_tensors()[index]; + MS_EXCEPTION_IF_NULL(input_tensor); + task->in_tensors().push_back(input_tensor); + // + if(input_tensor->type() == TensorType::kWorkspace){ + input_tensor->set_type(TensorType::kOutput); + } + input_tensor->consumers().insert(task); + tensors.insert(input_tensor); // TODO: remove eventually (not use extra storage) + // + } + } + } + } + } +} + +size_t CalculateVectorCost(CNodePtr cnode){ + Time weight = 0; + if (common::AnfAlgo::GetInputTensorNum(cnode) == 0) { + return weight; + } + KernelWithIndex kernel_with_index_1 = common::AnfAlgo::GetPrevNodeOutput(cnode, 0); + ShapeVector shape_1 = common::AnfAlgo::GetOutputInferShape(kernel_with_index_1.first, kernel_with_index_1.second); + const TypeId type_1 = common::AnfAlgo::GetOutputInferDataType(kernel_with_index_1.first, 0); + size_t type_size_1 = GetDataTypeSize(type_1); + size_t compute_count = std::accumulate(shape_1.cbegin(), shape_1.cend(), 1, std::multiplies{}); + weight = 0.5 + (compute_count*type_size_1/128); + return weight; +} + + +size_t CalculateCubeCost(CNodePtr cnode){ + Time weight = 0; + // Get info of input 1 + size_t batch = 1; + KernelWithIndex kernel_with_index_1 = common::AnfAlgo::GetPrevNodeOutput(cnode, 0); + ShapeVector shape_1 = common::AnfAlgo::GetOutputInferShape(kernel_with_index_1.first, kernel_with_index_1.second); + + // Get info of input 2 + KernelWithIndex kernel_with_index_2 = common::AnfAlgo::GetPrevNodeOutput(cnode, 1); + ShapeVector shape_2 = common::AnfAlgo::GetOutputInferShape(kernel_with_index_2.first, kernel_with_index_2.second); + + // Get info of output + ShapeVector shape_out = common::AnfAlgo::GetOutputInferShape(cnode, 0); + + // Remove batch if operator is batchmatmul + if (IsPrimitiveCNode(cnode, prim::kPrimBatchMatMul) || IsPrimitiveCNode(cnode, prim::kPrimBatchMatMulV2)){ + batch = shape_1.front(); + if (shape_1.size() == 4) { + shape_1.erase(shape_1.begin()); + shape_1.erase(shape_1.begin()); + shape_out.erase(shape_out.begin()); + shape_out.erase(shape_out.begin()); + } else { + shape_1.erase(shape_1.begin()); + shape_2.erase(shape_2.begin()); + shape_out.erase(shape_out.begin()); + } + } + + // Find MKN + size_t k = 0; + size_t m = 0; + size_t n = 0; + std::vector tmp; + for(auto dim: shape_1){ tmp.push_back(dim); } + for(auto dim: shape_2){ + bool found_in_input = std::find(tmp.begin(), tmp.end(), dim) != tmp.end(); + bool found_in_output = std::find(shape_out.begin(), shape_out.end(), dim) != shape_out.end(); + if (found_in_input && k == 0 && !found_in_output) { + k = dim; + tmp.erase(std::remove(tmp.begin(), tmp.end(), dim), tmp.end()); + } else if (found_in_input && k == 0 && found_in_output && n != 0) { + k = dim; + } else { + n = dim; + } + } + m = tmp[0]; + + // Get info of dtype + const TypeId type_1 = common::AnfAlgo::GetOutputInferDataType(kernel_with_index_1.first, 0); + size_t type_size_1 = GetDataTypeSize(type_1); + + weight = 21 + batch*m*k*n*type_size_1/8192; + return weight; +} + +size_t CalculateCommCost(CNodePtr cnode){ + Time weight = 0; + size_t output_num = AnfUtils::GetOutputTensorNum(cnode); + size_t input_num = common::AnfAlgo::GetInputTensorNum(cnode); + + // For each operator we get the inputs and outputs + // For each inputs, we multiply the shape to have the total size and we multiply the size by the data type + // We then sum all inputs + // If there is more than 1 output, we do the same for the outputs + // If output == 1 then cost is 0. We then sum all outputs + // We sum inputs cost + outputs cost + for (size_t j = 0; j < input_num; j++) { + KernelWithIndex kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(cnode, j); + if (dyn_cast(kernel_with_index.first->Shape()) == nullptr || + dyn_cast(kernel_with_index.first->Type()) == nullptr) { + MS_LOG(INFO) << "shape or type is nullptr, ignore"; + continue; + } + ShapeVector shape = common::AnfAlgo::GetOutputInferShape(kernel_with_index.first, kernel_with_index.second); + if (shape.size() <= 0) continue; + + const TypeId type = common::AnfAlgo::GetOutputInferDataType(kernel_with_index.first, 0); + if (type == kObjectTypeUMonad || type == kObjectTypeMonad || type == kObjectTypeFunction) continue; + + size_t type_size = GetDataTypeSize(type); + weight += std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * type_size; + } + + if (output_num > 1) { + for (size_t j = 0; j < output_num; j++) { + ShapeVector shape = common::AnfAlgo::GetOutputInferShape(cnode, j); + if (shape.size() <= 0) continue; + + const TypeId type = common::AnfAlgo::GetOutputInferDataType(cnode, j); + if (type == kObjectTypeUMonad || type == kObjectTypeMonad || type == kObjectTypeFunction) continue; + + size_t type_size = GetDataTypeSize(type); + weight += std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * type_size; + } + } + + return weight; +} + +size_t CalculateProfilingCost(const CNodePtr &cnode){ + Time weight = 0; + std::ifstream file; + file.open(common::GetEnv("MS_ENABLE_GPTO_PROF_FILE")); + + std::string line; + while(std::getline(file, line)) { + std::istringstream s(line); + std::string field; + std::vector fields; + while(getline(s, field,',')) { + fields.push_back(field); + } + if (cnode->fullname_with_scope() == fields[0]){ + weight = stoi(fields[3]); + break; + } + } + return weight; +} + +void gpto::PrintLogBaseline(const SchedulingInput &input, + const std::vector &execution_order, + std::unordered_map *cnode_to_task_gpto_map_ptr, const FuncGraphPtr &graph, + const size_t graph_id) { + std::stringstream ss; + ss << graph; + std::ofstream out_file("comp_comm_scheduling_baseline_" + std::to_string(graph_id) + "_" + ss.str() + ".log", std::ios::out | std::ios::trunc); + if (!out_file.is_open()) { + MS_LOG(ERROR) << "Could not open comp_comm_scheduling_baseline_" << graph_id << ".log"; + return; + } + + std::unordered_map taskid_to_end_value; + std::unordered_map taskid_to_start_value; + size_t makespan = 0; + + for(size_t i=0; iUniqueName(); + for(int j=i-1; j>=0; j--){ + MS_LOG(INFO) << "Current value loop j: " << j; + TaskPtr tmp_task = (*cnode_to_task_gpto_map_ptr)[execution_order[j]]; + MS_LOG(INFO) << "With node: " << tmp_task->name(); + if(tmp_task->gpto_type() == current_task->gpto_type()){ + MS_LOG(INFO) << "Found node same type"; + last_task = tmp_task; + break; + } + } + + // Find the latest parent of the current task + for(const auto &parent: (*cnode_to_task_gpto_map_ptr)[cnode]->parents()){ + if(last_task == nullptr || taskid_to_end_value[parent.lock()->id()] >= taskid_to_end_value[last_task->id()]){ + last_task = parent.lock(); + MS_LOG(INFO) << "Find parent " << last_task->name(); + } + } + + if (last_task == nullptr) { + last_task = current_task; + taskid_to_start_value[current_task->id()] = 0; + taskid_to_end_value[current_task->id()] = 0 + current_task->weight(); + } else { + taskid_to_start_value[current_task->id()] = taskid_to_end_value[last_task->id()]; + taskid_to_end_value[current_task->id()] = taskid_to_start_value[current_task->id()] + current_task->weight(); + } + + size_t current_task_end = taskid_to_end_value[current_task->id()]; + + if(current_task_end > makespan){ + makespan = taskid_to_end_value[current_task->id()]; + } + + out_file << "TASK id=" << std::to_string(current_task->id()) << ", name=" << current_task->name() << ", type=" << std::to_string(current_task->gpto_type()) + << ", start=" << std::to_string(taskid_to_start_value[current_task->id()]) << ", end=" << std::to_string(current_task_end) << "\n"; + + } + + MS_LOG(INFO) << "Makespan of baseline is " + std::to_string(makespan); + + out_file.close(); +} + +SchedulingInput gpto::ExtractSchedulingInput(const std::vector &cnode_vec, std::unordered_map *cnode_to_task_map_ptr, + std::set> &tensors) { + SchedulingInput scheduling_input; // to fill in and return + std::unordered_map> switch_attribute_ids, gather_attribute_ids; + + // Create a task per node + for (size_t i = 0; i < cnode_vec.size(); ++i) { + const auto &cnode = cnode_vec[i]; + + std::shared_ptr task = std::make_shared(i, GetRealTaskTypeFromCNode(cnode), GetGPTOTaskTypeFromCNode(cnode), cnode->fullname_with_scope()); + Time weight = 0; + if (common::GetEnv("MS_ENABLE_GPTO_PROF_FILE") != ""){ + weight = CalculateProfilingCost(cnode); + } else { + if (!AnfUtils::IsRealKernel(cnode)){ // CASE 1: not real kernel node + weight = 1; + } else if(task->real_type() == kComp){ // CASE 2: comp node of type Vector + weight = CalculateVectorCost(cnode); + } else if (task->real_type() == kCube) { // CASE 3: comp node of type Cube + weight = CalculateCubeCost(cnode); + } else { // CASE 4: comm node + weight = CalculateCommCost(cnode); + } + } + + task->AssignWeight(weight); + task->set_cnode(cnode); + + // Start Step 1 ConditionSwitch/Gather for inline: save attributes + task->set_original_order(i); + if (cnode->HasAttr(kInlineSubGraphName)){ // ConditionSwitch + task->set_condition_switch(true); + std::string s = cnode->GetAttr(kInlineSubGraphName)->ToString(); + std::string s1 = s.substr(s.find('(') + 1, s.find(',') - 1); + std::string s2 = s.substr(s.find(',') + 1, s.find(')') - 1); + switch_attribute_ids[task] = std::make_pair(std::stoll(s1.substr(s1.find("kernel_graph") + 12)), std::stoll(s2.substr(s2.find("kernel_graph") + 12))); + MS_LOG(INFO) << "Task ConditionSwitch " << task->id() << " with attribute kInlineSubGraphName" << s; + } else if (cnode->HasAttr(kAttrBranchGraphName)){ // ConditionGather + task->set_condition_gather(true); + std::string s = cnode->GetAttr(kAttrBranchGraphName)->ToString(); + std::string s1 = s.substr(s.find('(') + 1, s.find(',') - 1); + std::string s2 = s.substr(s.find(',') + 1, s.find(')') - 1); + gather_attribute_ids[task] = std::make_pair(std::stoll(s2.substr(s2.find("kernel_graph") + 12)), std::stoll(s1.substr(s1.find("kernel_graph") + 12))); + MS_LOG(INFO) << "Task ConditionGather " << task->id() << " with attribute kAttrBranchGraphName" << s; + } + // End Step 1 ConditionSwitch/Gather for inline + + (*cnode_to_task_map_ptr)[cnode] = task; + + MS_LOG(INFO) << "Task " << task->id() << " with name " << cnode->UniqueName() << " and CNodePtr " << cnode + << " with weight " << task->weight() << " and type " << GetGPTOTaskTypeFromCNode(cnode); + + if (AnfUtils::IsRealKernel(cnode)){ // only maintain real kernels in vector of tasks, the rest will be contracted later + scheduling_input.tasks.push_back(task); + } + } + + InsertEdges(cnode_vec, cnode_to_task_map_ptr); + ContractUnrealTasks(cnode_to_task_map_ptr); + ExtractRealTensors(scheduling_input, cnode_to_task_map_ptr, tensors); + + // IOANNIS: for conditional inline (nested if-else supported) + // Start Step 2 ConditionSwitch/Gather for inline: identify matching switch/gather pairs + + ComputeDepthAndTopLevel(scheduling_input.tasks); // if we keep here, don't call again later + struct Comp { + bool operator() (const TaskPtr t1, const TaskPtr t2) const { + return t1->depth() < t2->depth() || (t1->depth() == t2->depth() && t1->id() < t2->id()); + // return t1->id() < t2->id(); + // return t1 < t2; + } + }; + std::map switch_gather; + + //std::unordered_map switch_gather; + for (auto &switch_it : switch_attribute_ids){ + const auto &switch_task = switch_it.first; + auto switch_pair = switch_it.second; + + std::unordered_map>::iterator gather_it; + for (gather_it = gather_attribute_ids.begin(); gather_it != gather_attribute_ids.end(); ++gather_it){ + if (gather_it->second == switch_pair){ + break; + } + } + if (gather_it == gather_attribute_ids.end()){ + MS_LOG(INTERNAL_EXCEPTION) << "Could not find matching ConditionGather for a given ConditionSwitch " << switch_pair; + } + const auto &gather_task = gather_it->first; + switch_gather[switch_task] = gather_task; + MS_LOG(INFO) << "Mapped ConditionSwitch task " << switch_task->id() << " to ConditionGather task " << gather_task->id(); + } + // End Step 2 ConditionSwitch/Gather for inline + + // Start Step 3 ConditionSwitch/Gather for inline: traverse each Condition/Switch gather block to assign proper ids for scheduling + // Assumption 1: switch and nodes before gather have no predecessors/descendants outside the block + // Assumption 2: conditional switch does not have conditional gather as a child + size_t count_condition = SIZE_MAX - 1; + std::unordered_map unprocessed_parents; + std::queue tasks_to_visit; + + for (const auto &key_val : *cnode_to_task_map_ptr) { + auto &task = key_val.second; + unprocessed_parents[task->id()] = task->parents().size(); + } + + for (auto &it : switch_gather){ + const auto &switch_task = it.first; + const auto &gather_task = it.second; + MS_LOG(INFO) << "Assign subgraph id " << count_condition << " to tasks under ConditionSwitch task " << switch_task->id() << " name " << switch_task->name() << " up to (and including) ConditionGather task " << gather_task->id() << " name " << gather_task->name(); + + for (auto child : switch_task->children()) { + if (child == gather_task) { + child->set_subgraph_id(count_condition); + MS_LOG(INFO) << "Assign subgraph id " << count_condition << " to task " << gather_task->id() << " name " << gather_task->name(); + } else { + tasks_to_visit.push(child); + } + } + + while (!tasks_to_visit.empty()) { + const auto &selected_task = tasks_to_visit.front(); + selected_task->set_subgraph_id(count_condition); + MS_LOG(INFO) << "Assign subgraph id " << count_condition << " to task " << selected_task->id() << " name " << selected_task->name(); + if (selected_task->name().find("ConditionSwitch") != std::string::npos){ + for (auto gather_child : switch_gather[selected_task]->children()){ + unprocessed_parents[gather_child->id()] -= 1; + if (unprocessed_parents[gather_child->id()] == 0){ + if (gather_child != gather_task) { + tasks_to_visit.push(gather_child); + } else { + if (gather_task->subgraph_id() != count_condition){ + gather_task->set_subgraph_id(count_condition); + MS_LOG(INFO) << "Assign subgraph id " << count_condition << " to task " << gather_task->id() << " name " << gather_task->name(); + } + } + } + } + } else { + for (auto &child : selected_task->children()) { + unprocessed_parents[child->id()] -= 1; + if (unprocessed_parents[child->id()] == 0){ + if (child != gather_task) { + tasks_to_visit.push(child); + } else { + if (gather_task->subgraph_id() != count_condition){ + gather_task->set_subgraph_id(count_condition); + MS_LOG(INFO) << "Assign subgraph id " << count_condition << " to task " << gather_task->id() << " name " << gather_task->name(); + } + } + } + } + } + tasks_to_visit.pop(); + } + count_condition--; + } + // End Step 3 ConditionSwitch/Gather for inline + + return scheduling_input; +} + +Memory MemoryLowerBound(std::vector> &tasks, std::vector &nodes_dependency, std::set> &tensors){ + Memory max_lb = 0; + + for (const auto &task: tasks){ + Memory task_lb = 0; + for (const auto &tensor : tensors){ + //if (tensor->type() == 1) continue; // ignore workspace for now + const auto &source = tensor->source(); + const auto &consumers = tensor->consumers(); + + if (task == source || consumers.count(task) > 0) { + task_lb += tensor->weight(); + } else { + if (nodes_dependency[task->id()].IsBitTrue(source->id())){ + for (const auto &consumer : consumers){ + if (nodes_dependency[consumer->id()].IsBitTrue(task->id())){ + task_lb += tensor->weight(); + break; + } + } + } + } + } + task->set_lower_bound(task_lb); + max_lb = std::max(max_lb, task_lb); + } + return max_lb; +} + +void gpto::AddRealDependencies(const FuncGraphManagerPtr &manager, const std::vector &cnode_vec, const std::vector> &dependencies, + std::unordered_map *cnode_to_task) { + size_t count = 0, redundant_count = 0; + for (const auto &dependency : dependencies) { + + if(count > (size_t)(stoi(common::GetEnv("MS_ENABLE_GPTO_COUNT")))){break;} + MS_LOG(INFO) << "Checking dependency " << dependency.first << " " << dependency.second; + const auto &source = cnode_vec[dependency.first]; + const auto &dest = cnode_vec[dependency.second]; + + // Ignore dependencies already there + if ((*cnode_to_task)[source]->HasChild((*cnode_to_task)[dest])) { + MS_LOG(INFO) << "Dependency " << dependency.first << " " << dependency.second << " is redundant (already parent and child)"; + redundant_count++; + continue; + } + + // At least two inputs in destination node (input 0 is the node primitive) + if (dest->size() < 2) { + MS_LOG(INFO) << "Destination inputs size < 2: ignore"; + continue; + } + + // If destination node (comp) has comm inputs, make dependency involving one of them + for (size_t j = 1; j < dest->size(); ++j) { // input 0 is node primitive: ignore + if (!utils::isa(dest->input(j))) { + MS_LOG(INFO) << "Not a cnodeptr at input " << j; + continue; + } + + //bool is_same_input = false; + for (size_t k = 1; k < source->size(); ++k) { + if (!utils::isa(source->input(k))) { + MS_LOG(INFO) << "Not a cnodeptr at input " << j; + continue; + } + } + + // Add real dependency logic here + const auto &input_node = dest->input(j)->cast(); + std::vector depend_inputs{NewValueNode(prim::kPrimDepend), input_node, source}; + auto depend_node = dest->func_graph()->NewCNode(depend_inputs); + depend_node->set_abstract(input_node->abstract()->Clone()); + depend_node->AddAttr("multistream_scheduling_depend", MakeValue(true)); + MS_EXCEPTION_IF_NULL(depend_node); + auto &nodes = manager->node_users()[input_node]; + auto it = std::find_if(nodes.begin(), nodes.end(), [dest](const auto &user) { return user.first == dest; }); + if (it != nodes.end()) { + int idx = (*it).second; + manager->SetEdge(dest, idx, depend_node); + MS_LOG(INFO) << "Added dependency from " << dependency.first << ", unique name " << source->UniqueName() + << ", to " << dependency.second << ", unique name " << dest->UniqueName(); + count++; + break; // add dependency involving only one destination node input + } else { + MS_LOG(INFO) << "User index not found: Ignore dependency and continue"; + continue; + } + } + } + MS_LOG(INFO) << "Num of real dependencies added is " << count; + MS_LOG(INFO) << "Num of redundant dependencies (HasChild) is " << redundant_count; +} + +std::vector> GPTO(const FuncGraphPtr &graph) { + std::vector> events; + if (common::GetEnv("MS_ENABLE_GPTO") != "1") { + return events; + } + + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + + if (common::GetEnv("MS_ENABLE_GPTO_MEMORY_LIMIT") != "") { + SOFT_MEMORY_LIMIT = static_cast(stoll(common::GetEnv("MS_ENABLE_GPTO_MEMORY_LIMIT"))); + } else { + SOFT_MEMORY_LIMIT = static_cast(device::ascend::AscendMemAdapter::GetInstance().FreeDevMemSize()); + } + HARD_MEMORY_LIMIT = static_cast(context->get_param(MS_CTX_MAX_DEVICE_MEMORY)*kGBToByte); + + MS_LOG(INFO) << "Soft Memory value: " << SOFT_MEMORY_LIMIT; + MS_LOG(INFO) << "Hard Memory value: " << HARD_MEMORY_LIMIT; + + MS_EXCEPTION_IF_NULL(graph); + auto manager = graph->manager(); + MS_LOG(INFO) << "Graph pointer: " << graph; + MS_EXCEPTION_IF_NULL(manager); + + KernelGraphPtr kernel_graph = graph->cast(); + const size_t graph_id = kernel_graph->graph_id(); + MS_LOG(INFO) << "Start Scheduling Subgraph " << graph << " with id " << graph_id << " and Execution order size= " << kernel_graph->execution_order().size(); + + std::list cnode_list = graph->GetOrderedCnodes(); + std::vector cnode_vec(cnode_list.cbegin(), cnode_list.cend()); + + MS_LOG(INFO) << "Start ExtractSchedulingInput"; + std::unordered_map cnode_to_task; + std::set> tensors; // TODO: remove this data structure eventually, and only use out_tensors/in_tensors within tasks + SchedulingInput scheduling_input = gpto::ExtractSchedulingInput(cnode_vec, &cnode_to_task, tensors); + // size_t num_original_tasks = scheduling_input.tasks.size(); + MS_LOG(INFO) << "End ExtractSchedulingInput"; + if (scheduling_input.tasks.size() == 0){ + MS_LOG(WARNING) << "Scheduling input doesn't have tasks to continue... skip"; + MS_LOG(WARNING) << "Etienne PrintGraphExecuteOrder start"; + kernel_graph->PrintGraphExecuteOrder(); + MS_LOG(WARNING) << "Etienne PrintGraphExecuteOrder end"; + return events; + } + + // + // IOANNIS: START GENERALIZING FOR RESTRICTED SWITCH-GATHER ORDER + /* + std::unordered_map sub_input; + std::unordered_map sub_output; + std::unordered_map>> sub_deps; + std::unordered_map>> sub_events; + std::unordered_map> sub_order; + std::unordered_map> sub_switch, sub_gather; + + // Step 1: retrieve subgraphs based on graph attributes + std::unordered_map unprocessed_parents; + std::queue> tasks_to_visit; + const auto &tasks = scheduling_input.tasks; + // Initialization loop + std::vector>::iterator it = tasks.begin(); + while (it != tasks.end()) { + const auto &task = *it; + const auto &id = task->id(); + const auto &cnode = task->cnode(); + + if (cnode->HasAttr(kInlineSubGraphName)){ // ConditionSwitch + std::string s = cnode->GetAttr(kInlineSubGraphName)->ToString(); + MS_LOG(INFO) << "Assign task ConditionSwitch " << task->id() << " with attribute kInlineSubGraphName" << s << " parsing " << s.substr(s.find(',') + 1, s.find(')') - 1); + std::shared_ptr new_condition_switch = std::make_shared(*task); + new_condition_switch->ClearParents(); + new_condition_switch->in_tensors().clear(); + sub_input[s.substr(s.find(',') + 1, s.find(')') - 1)].tasks.push_back(new_condition_switch); + sub_switch[s.substr(s.find(',') + 1, s.find(')') - 1)] = task; + it = tasks.erase(it); + } else if (cnode->HasAttr(kAttrBranchGraphName)){ // ConditionGather + std::string s = cnode->GetAttr(kAttrBranchGraphName)->ToString(); + MS_LOG(INFO) << "Assign task ConditionGather " << task->id() << " with attribute kAttrBranchGraphName" << s << " parsing " << s.substr(s.find('(') + 1, s.find(',') - 1); + std::shared_ptr new_condition_gather = std::make_shared(*task); + new_condition_gather->ClearChildren(); + new_condition_gather->out_tensors().clear(); + sub_input[s.substr(s.find('(') + 1, s.find(',') - 1)].tasks.push_back(new_condition_gather); + sub_gather[s.substr(s.find('(') + 1, s.find(',') - 1)] = task; + it = tasks.erase(it); + } else if (cnode->HasAttr(kAttrPreKernelGraph)){ // Between ConditionSwitch and ConditionGather + MS_LOG(INFO) << "Assign task " << task->id() << " to subgraph " << cnode->GetAttr(kAttrPreKernelGraph)->ToString(); + sub_input[cnode->GetAttr(kAttrPreKernelGraph)->ToString()].tasks.push_back(task); + it = tasks.erase(it); + } else { + ++it; + } + } + + // Step 2: schedule each subgraph + for (auto &[name, sched_input] : sub_input){ + MS_LOG(INFO) << "Scheduling conditional branch " << name; + sub_output[name] = gpto::Process(sched_input, stoll(name.substr(name.find("kernel_graph") + 12)), nullptr, tensors); + sub_deps[name] = gpto::ScheduleToDependenciesDifferentTypes(sub_output[name]); + + std::vector task_times = sub_output[name].task_times; + std::sort(task_times.begin(), task_times.end(), [](Interval x, Interval y) { + return x.start < y.start || (x.start == y.start && x.end < y.end); + }); + for (auto interval : task_times){ + sub_order[name].push_back(cnode_vec[interval.id]); + } + for (auto dep : sub_deps[name]){ + sub_events[name].push_back(std::make_pair(cnode_vec[dep.first], cnode_vec[dep.second])); + } + } + */ + // IOANNIS: END GENERALIZING FOR RESTRICTED SWITCH-GATHER ORDER + // + + MS_LOG(INFO) << "Start Baseline Greedy Scheduling"; + gpto::PrintLogBaseline(scheduling_input, kernel_graph->execution_order(), &cnode_to_task, graph, graph_id); + MS_LOG(INFO) << "End Baseline Greedy Scheduling"; + + // + // IOANNIS: START CONTRACTING IF-ELSE BLOCKS + /* + size_t count_tasks = num_original_tasks; + size_t count_tensors = tensors.size(); + for (auto &[name, cond_switch] : sub_switch){ + std::shared_ptr task = std::make_shared(count_tasks++, kComp, kComp, name); // + task->AssignWeight(sub_output[name].makespan); + std::shared_ptr new_tensor = std::make_shared(count_tensors++, sub_output[name].memory_peak, task, kWorkspace); // + task->workspace_tensors().push_back(new_tensor); + scheduling_input.tasks.push_back(task); + // copies of tensors + rearrange edges correctly + for (auto t : cond_switch->in_tensors()){ + task->in_tensors.push_back(t); + } + } + */ + // IOANNIS: END CONTRACTING IF-ELSE BLOCKS + // + + auto scheduling_output = gpto::Process(scheduling_input, graph_id, graph, tensors); + + // Memory lower bound (for comparison only) + std::vector nodes_dependency; + + MS_LOG(INFO) << "Start Compute Ancestors Descendants"; + gpto::ComputeAncestorsDescendants(scheduling_input.tasks, nodes_dependency); + MS_LOG(INFO) << "End Compute Ancestors Descendants"; + + MS_LOG(INFO) << "Start Memory Lower Bound"; + Time memory_lb = MemoryLowerBound(scheduling_input.tasks, nodes_dependency, tensors); + MS_LOG(INFO) << "Memory Lower Bound value: " << memory_lb; + MS_LOG(INFO) << "End Memory Lower Bound"; + + std::vector> dependencies; + if (common::GetEnv("MS_ENABLE_GPTO") != "1") { + dependencies = gpto::ScheduleToDependencies(scheduling_output); + gpto::AddRealDependencies(manager, cnode_vec, dependencies, &cnode_to_task); + graph->cast()->SetExecOrderByDefault(); + } else { + dependencies = gpto::ScheduleToDependenciesDifferentTypes(scheduling_output); + std::vector new_order; + std::vector task_times = scheduling_output.task_times; + std::sort(task_times.begin(), task_times.end(), [](Interval x, Interval y) { + return x.start < y.start || (x.start == y.start && x.end < y.end); + }); + for (auto interval : task_times){ + new_order.push_back(cnode_vec[interval.id]); + } + MS_LOG(WARNING) << "Etienne PrintGraphExecuteOrder start"; + graph->cast()->set_execution_order(new_order); + kernel_graph->PrintGraphExecuteOrder(); + MS_LOG(WARNING) << "Etienne PrintGraphExecuteOrder end"; + for (auto dep : dependencies){ + events.push_back(std::make_pair(cnode_vec[dep.first], cnode_vec[dep.second])); + } + } + // Output log file with all info (scheduling and dependencies) + MS_LOG(INFO) << "Start printing output log file"; + gpto::PrintLog(scheduling_output, dependencies, graph, graph_id, tensors); + MS_LOG(INFO) << "End printing output log file"; + return events; +} +} // namespace opt +} // namespace mindspore + diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.h b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.h new file mode 100644 index 0000000000000000000000000000000000000000..164f24de3888d5fe6e514eed4462cf468ad243e7 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/gpto.h @@ -0,0 +1,430 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_HARDWARE_GPTO_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_HARDWARE_GPTO_H_ + +#include +#include +#include +#include +#include +#include + +#include "mindspore/core/ir/anf.h" +#include "mindspore/core/ir/manager.h" +#include "mindspore/core/mindapi/base/shape_vector.h" + +namespace mindspore { +namespace opt { +// DynamicBitSet data structure definition: copied from somas - only used for optional memory lower bound calculation +constexpr auto kHalfByteSize = 4; +class DynamicBitSet { + const size_t bit_width_ = 64; + + inline size_t GetIndex(size_t index) const { return index / bit_width_; } + + inline uint64_t GetBitMask(size_t index) const { + return ((static_cast(0x1)) << ((bit_width_ - 1) - (index % bit_width_))); + } + + inline void Reset(uint64_t val) { + bit_.clear(); + for (size_t i = 0; i < bit_size_; i++) { + bit_.push_back(val); + } + } + + public: + size_t bit_size_; + std::vector bit_; + explicit DynamicBitSet(size_t count) : bit_size_((count + bit_width_ - 1) / bit_width_) { Reset(0x0); } + ~DynamicBitSet() = default; + + void SetBitTrue(size_t index, bool log = false) { + if (log) { + MS_LOG(INFO) << GetIndex(index) << " " << GetBitMask(index); + } + bit_[GetIndex(index)] |= GetBitMask(index); + } + + void SetBitFalse(size_t index) { bit_[GetIndex(index)] &= (~GetBitMask(index)); } + bool IsBitTrue(size_t index) const { return (bit_[GetIndex(index)] & GetBitMask(index)) != 0x0; } + bool IsBitFalse(size_t index) const { return !IsBitTrue(index); } + + size_t CountOnesNum() const { + size_t ret = 0; + static unsigned char ones_num_in_hex[] = "\0\1\1\2\1\2\2\3\1\2\2\3\2\3\3\4"; + for (size_t i = 0; i < bit_size_; i++) { + auto value = bit_[i]; + if (value == 0) { + continue; + } + auto *char_value = reinterpret_cast(&value); + for (size_t j = 0; j < bit_width_ / CHAR_BIT; j++) { + ret += ones_num_in_hex[static_cast(char_value[j] & 0xF)]; + char_value[j] >>= kHalfByteSize; + ret += ones_num_in_hex[static_cast(char_value[j] & 0xF)]; + } + } + return ret; + } + + void Log() { + std::cout << "Start Print Bitset "; + for (size_t i = 0; i < bit_size_; i++) { + std::cout << " bit [" << std::dec << i << "] = " << std::hex << bit_[i] << std::dec; + } + std::cout << std::endl; + } + + friend void Union(DynamicBitSet *a, DynamicBitSet *b) { + for (size_t i = 0; i < (*a).bit_size_; i++) { + (*a).bit_[i] |= (*b).bit_[i]; + } + } +}; + +// Preliminary definitions +using Time = uint64_t; // size_t; +using Memory = int64_t; // maintain memory as signed integer, since memory impact of some operators may be negative +using TaskId = size_t; +using PeId = size_t; +enum TaskType { kNone = 0, kComp, kComm, kCube }; +enum TensorType { kOutput = 0, kWorkspace }; // kOutput: from one task to another, kWorkspace: workspace or to other subgraphs + +struct ProcessingElement { + PeId id; + TaskType gpto_type; + Time load; + std::list> idle; +}; + +struct Interval { // Information extracted by scheduling + TaskId id; + std::string name; + TaskType gpto_type; + Time start; + Time end; +}; + +enum TaskSort { + kSortByWeightMax = 0, + kSortByWeightMin, + kSortBySuccDiff, + kSortByBottomLevelMax, + kSortByBottomLevelMin, + kSortByTopLevelMax, + kSortByTopLevelMin, + kSortByBottomTopLevelMaxSum, + kSortByBottomTopLevelMinSum, + kSortByBottomTopLevelComposite, + kSortByWeightedLength, + kSortByDepthMax, + kSortByDepthMin, + kSortByPredComm, + kSortByPredCommDepth, + kSortByPredCube, + kSortByGreedyHeight, + kNumTaskSort +}; + +class Task; + +// GPTO Tensor definitions +class Tensor { + private: + size_t id_; + Memory weight_; + std::shared_ptr source_; + TensorType type_; + std::set> consumers_; + + public: + Tensor(const size_t id, const Memory weight, const std::shared_ptr source, const TensorType type){ + id_ = id; + weight_ = weight; + source_ = source; + type_ = type; + } + + Tensor(const Tensor &t){ + id_ = t.id_; + weight_ = t.weight_; + source_ = t.source_; + type_ = t.type_; + consumers_ = t.consumers_; + } + + ~Tensor() { consumers_.clear(); }; + + const size_t& id() const { return id_; } + const Memory& weight() const { return weight_; } + const std::shared_ptr& source() { return source_; } + const TensorType& type() const { return type_; } + std::set>& consumers() { return consumers_; } + + void set_type(TensorType type) { type_ = type; } +}; +using TensorPtr = std::shared_ptr; + +// GPTO Task definitions +class Task { + public: + struct SortByIdWeak { + bool operator()(const std::weak_ptr &task1, const std::weak_ptr &task2) const { + return task1.lock()->id() < task2.lock()->id(); + } + }; + + struct SortByIdShared { + bool operator()(const std::shared_ptr &task1, const std::shared_ptr &task2) const { + return task1->id() < task2->id(); + } + }; + + Task(const TaskId &id, const TaskType &real_type, const TaskType &gpto_type, const std::string &name) { + id_ = id; + real_type_ = real_type; + gpto_type_ = gpto_type; + cnode_ = nullptr; + weight_ = 1; + bottom_level_ = 0; + top_level_ = 0; + depth_ = 0; + succ_diff_type_ = 0; + weighted_length_ = 0.0; + start_ = SIZE_MAX; + end_ = 0; + pred_comm_ = 0; + pred_cube_ = 0; + name_ = name; + mem_impact_ = 0; + workspace_memory_ = 0; + lower_bound_ = 0; + subgraph_id_ = SIZE_MAX; + condition_switch_ = false; + condition_gather_ = false; + } + + Task(const Task &t){ + id_ = t.id_; + real_type_ = t.real_type_; + gpto_type_ = t.gpto_type_; + cnode_ = t.cnode_; + weight_ = t.weight_; + bottom_level_ = t.bottom_level_; + top_level_ = t.top_level_; + depth_ = t.depth_; + succ_diff_type_ = t.succ_diff_type_; + weighted_length_ = t.weighted_length_; + start_ = t.start_; + end_ = t.end_; + pred_comm_ = t.pred_comm_; + pred_cube_ = t.pred_cube_; + name_ = t.name_; + mem_impact_ = t.mem_impact_; + workspace_memory_ = t.workspace_memory_; + lower_bound_ = t.lower_bound_; + subgraph_id_ = t.subgraph_id_; + condition_switch_ = t.condition_switch_; + condition_gather_ = t.condition_gather_; + + parents_ = t.parents_; + children_ = t.children_; + in_tensors_ = t.in_tensors_; + out_tensors_ = t.out_tensors_; + workspace_tensors_ = t.workspace_tensors_; + } + + TaskId id() const { return id_; } + TaskType real_type() const { return real_type_; } + TaskType gpto_type() const { return gpto_type_; } + CNodePtr cnode() const { return cnode_; } + Time weight() const { return weight_; } + Time bottom_level() const { return bottom_level_; } + Time top_level() const { return top_level_; } + size_t depth() const { return depth_; } + size_t succ_diff_type() const { return succ_diff_type_; } + double weighted_length() const { return weighted_length_; } + Time start() const { return start_; } + Time end() const { return end_; } + size_t pred_comm() const { return pred_comm_; } + size_t pred_cube() const { return pred_cube_; } + std::string name() const { return name_; } + Memory mem_impact() const { return mem_impact_; } + Memory workspace_memory() const { return workspace_memory_; } + Time lower_bound() const { return lower_bound_; } + size_t subgraph_id() const { return subgraph_id_; } + bool condition_switch() const { return condition_switch_; } + bool condition_gather() const { return condition_gather_; } + size_t original_order() const { return original_order_; } + + std::set,SortByIdWeak>& parents() { return parents_; } + std::set,SortByIdShared>& children() { return children_; } + std::vector& in_tensors() { return in_tensors_; } + std::vector& out_tensors() { return out_tensors_; } + std::vector& workspace_tensors() { return workspace_tensors_; } + + void set_id(TaskId id) { id_ = id; } + void set_real_type(TaskType real_type) { real_type_ = real_type; } + void set_gpto_type(TaskType gpto_type) { gpto_type_ = gpto_type; } + void set_cnode(CNodePtr cnode) { cnode_ = cnode; } + void set_weight(Time weight) { weight_ = weight; } + void set_bottom_level(Time bottom_level) { bottom_level_ = bottom_level; } + void set_top_level(Time top_level) { top_level_ = top_level; } + void set_depth(size_t depth) { depth_ = depth; } + void set_succ_diff_type(size_t succ_diff_type) { succ_diff_type_ = succ_diff_type; } + void set_weighted_length(double weighted_length) { weighted_length_ = weighted_length; } + void set_start(Time start) { start_ = start; } + void set_end(Time end) { end_ = end; } + void set_pred_comm(size_t pred_comm) { pred_comm_ = pred_comm; } + void set_pred_cube(size_t pred_cube) { pred_cube_ = pred_cube; } + void set_name(std::string name) { name_ = name; } + void set_mem_impact(Memory mem_add) { mem_impact_ = mem_add; } + void set_workspace_memory(Memory workspace_memory) { workspace_memory_ = workspace_memory; } + void set_lower_bound(Time lb) { lower_bound_ = lb; } + void set_subgraph_id(size_t id) { subgraph_id_ = id; } + void set_condition_switch(bool cond) { condition_switch_ = cond; } + void set_condition_gather(bool cond) { condition_gather_ = cond; } + void set_original_order(size_t order) { original_order_ = order; } + + void AddParent(std::weak_ptr parent) { + parents_.insert(parent); + } + void RemoveParent(std::weak_ptr parent) { + parents_.erase(parent); + } + void ClearParents() { + parents_.clear(); + } + + void AddChild(std::shared_ptr child) { + children_.insert(child); + } + + void RemoveChild(std::shared_ptr child){ + children_.erase(child); + } + void ClearChildren() { + children_.clear(); + } + + bool HasChild(std::shared_ptr child) { + return std::find(children_.begin(), children_.end(), child) != children_.end(); + } + + void AssignWeight(Time weight) { + if (weight == 0) { + weight_ = 1; + } else { + weight_ = weight; + } + } + + void ResetStartEnd() { + start_ = SIZE_MAX; + end_ = 0; + } + + private: + TaskId id_; + TaskType real_type_; + TaskType gpto_type_; + CNodePtr cnode_; + + Time weight_; + Time bottom_level_; + Time top_level_; + size_t depth_; + size_t succ_diff_type_; + double weighted_length_; + Time start_; + Time end_; + size_t pred_comm_; + size_t pred_cube_; + std::string name_; + Memory mem_impact_; + Memory workspace_memory_; + Time lower_bound_; + + size_t subgraph_id_; + bool condition_switch_; + bool condition_gather_; + size_t original_order_; + + std::set,SortByIdWeak> parents_; + std::set, SortByIdShared> children_; + std::vector> in_tensors_; + std::vector> out_tensors_; + std::vector> workspace_tensors_; +}; +using TaskPtr = std::shared_ptr; +using TaskSortFunction = bool (*)(std::shared_ptr const &, std::shared_ptr const &); + +// GPTO Scheduling definitions +struct SchedulingInput { + std::vector> tasks; +}; + +struct SchedulingOutput { + std::vector task_times; + Time makespan; + Memory memory_peak; +}; + +namespace gpto { // Graph Parallel Topology Optimizer +// Main functionality +SchedulingInput ExtractSchedulingInput(const std::vector &, std::unordered_map *, std::set> &); +SchedulingOutput Process(SchedulingInput &, const size_t, const FuncGraphPtr &, const std::set &); +SchedulingOutput ProcessCore(std::vector> &, std::unordered_map &, + const TaskSortFunction &, bool); + +// Compute auxiliary values for task sorting criteria +void ComputeBottomLevelAndWeightedLength(std::vector> &); +void ComputeDepthAndTopLevel(std::vector> &); +void ComputePredComm(std::vector> &); +void ComputePredCube(std::vector> &); + +// Functions for memory-aware scheduling +void InitializeMemoryImpact(std::vector> &); +void ComputeAncestorsDescendants(const std::vector>&, std::vector&); // only needed for memory lower bound (optional) + +// Makespan lower bounds +Time LowerBoundBottomLevel(std::vector> &); +Time LowerBoundPEs(std::vector> &, std::unordered_map &); + +// Dependency generation +std::vector> ScheduleToDependencies(const SchedulingOutput &); // guide for real dependency generation +void AddRealDependencies(const FuncGraphManagerPtr &, const std::vector &, const std::vector> &, std::unordered_map *); +std::vector> ScheduleToDependenciesDifferentTypes(const SchedulingOutput &); // kbk event generation + +// Verification functions +bool VerifyDAG(std::vector> &); +bool VerifyScheduling(std::vector> &); +bool VerifyDependencies(std::vector> &, std::vector> &); + +// Printing log files +void PrintLog(const SchedulingOutput &, const std::vector> &, const FuncGraphPtr &, size_t, std::set> &tensors); +void PrintLogBaseline(const SchedulingInput &, const std::vector> &, std::unordered_map, std::shared_ptr >*, const FuncGraphPtr &, size_t); +void PrintLogForILP(const SchedulingInput &, const SchedulingOutput &, size_t, const FuncGraphPtr &, const Time, const std::set &); +} // namespace GPTO +// Integration function +std::vector> GPTO(const FuncGraphPtr &); +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_HAL_HARDWARE_GPTO_H_ + diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_proto.cc new file mode 100644 index 0000000000000000000000000000000000000000..27ca185a236d70db6f174e2a509a7335b6ce1f66 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_proto.cc @@ -0,0 +1,257 @@ +/** + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "custom_op_proto/cust_math_ops.h" +#include "register/op_impl_registry.h" +#include "utils/util.h" +#include "utils/common_shape_fns.h" +#include "utils/op_common_util.h" +#include "utils/op_const.h" + +namespace ge { +const std::string op_prefix = "Cust"; +const std::string fft_prefix = "FFT"; +std::string GetOpName(std::string op_name) { + if (!op_name.compare(0, op_prefix.size(), op_prefix) && op_name.find(fft_prefix) != std::string::npos) { + op_name.erase(op_name.begin(), op_name.begin() + op_prefix.size()); + } + return op_name; +} + +DataType FFTGetType(std::string op_name, DataType x_dtype) { + static const std::vector double_type = {DT_DOUBLE, DT_COMPLEX128}; + static const std::vector float_prim = {"HFFT", "HFFT2", "HFFTN", "IRFFT", "IRFFT2", "IRFFTN"}; + bool is_double_type = std::any_of(double_type.begin(), double_type.end(), + [&x_dtype](const DataType &type_id) { return x_dtype == type_id; }); + bool is_float_prim = std::find(float_prim.begin(), float_prim.end(), op_name) != float_prim.end(); + DataType y_dtype; + if (is_double_type && is_float_prim) { + y_dtype = DT_DOUBLE; + } + if (is_double_type && !is_float_prim) { + y_dtype = DT_COMPLEX128; + } + if (!is_double_type && is_float_prim) { + y_dtype = DT_FLOAT; + } + if (!is_double_type && !is_float_prim) { + y_dtype = DT_COMPLEX64; + } + return y_dtype; +} + +void FFTNGetAttr(const std::vector input_shape, size_t x_rank, std::vector *s_vec, + std::vector *dim_vec) { + std::vector s = *s_vec; + std::vector dim = *dim_vec; + if (dim.empty() && !s.empty()) { + for (size_t i = 0; i < s.size(); i++) { + (void)dim.emplace_back(x_rank - s.size() + i); + } + } + if (s.empty() && !dim.empty()) { + for (size_t i = 0; i < dim.size(); i++) { + (void)s.emplace_back(input_shape[dim[i]]); + } + } + if (s.empty() && dim.empty()) { + for (size_t i = 0; i < x_rank; i++) { + (void)dim.emplace_back(i); + (void)s.emplace_back(input_shape[i]); + } + } +} + +IMPLEMT_COMMON_INFERFUNC(FFTBaseInferShape) { + auto input_desc = op.GetInputDescByName("input"); + auto out_desc = op.GetOutputDescByName("y"); + auto op_name = GetOpName(op.GetOpType()); + + DataType x_dtype = input_desc.GetDataType(); + DataType y_dtype = FFTGetType(op_name, x_dtype); + out_desc.SetDataType(y_dtype); + + bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); + if (unknown_rank_shape) { + out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); + OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); + op.UpdateOutputDesc("y", out_desc); + return GRAPH_SUCCESS; + } + + size_t x_rank = input_desc.GetShape().GetDimNum(); + auto input_shape = input_desc.GetShape().GetDims(); + vector output_shape(input_shape.begin(), input_shape.end()); + const vector depend_names = {"n", "dim"}; + PREPARE_DYNAMIC_SHAPE(depend_names); + + // infer output shape based on 'n' and 'dim' + Tensor dim_tensor; + std::vector dim_vec; + if (op.GetInputConstData("dim", dim_tensor) == GRAPH_SUCCESS) { + DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); + GetConstValue(op, dim_tensor, dim_dtype, dim_vec); + for (size_t i = 0; i < dim_vec.size(); i++) { + dim_vec[i] = dim_vec[i] < 0 ? static_cast(x_rank) + dim_vec[i] : dim_vec[i]; + } + } + + Tensor s_tensor; + std::vector s_vec; + bool s_is_none{true}; + if (op.GetInputConstData("n", s_tensor) == GRAPH_SUCCESS) { + DataType dtype = op.GetInputDescByName("n").GetDataType(); + GetConstValue(op, s_tensor, dtype, s_vec); + s_is_none = false; + } + + FFTNGetAttr(output_shape, x_rank, &s_vec, &dim_vec); + int64_t dim = dim_vec[0]; + if (!s_is_none) { + int64_t n = s_vec[0]; + output_shape[dim] = n; + if (op_name == "IHFFT" || op_name == "RFFT") { + output_shape[dim] = n / 2 + 1; + } + } else { + if (op_name == "HFFT") { + output_shape[dim] = (output_shape[dim] - 1) * 2; + } else if (op_name == "IHFFT" || op_name == "RFFT") { + output_shape[dim] = output_shape[dim] / 2 + 1; + } + } + + out_desc.SetShape(ge::Shape(output_shape)); + op.UpdateOutputDesc("y", out_desc); + return GRAPH_SUCCESS; +} + +IMPLEMT_COMMON_INFERFUNC(FFTNBaseInferShape) { + auto input_desc = op.GetInputDescByName("input"); + auto out_desc = op.GetOutputDescByName("y"); + auto op_name = GetOpName(op.GetOpType()); + DataType input_dtype = input_desc.GetDataType(); + DataType output_dtype = FFTGetType(op_name, input_dtype); + out_desc.SetDataType(output_dtype); + + bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); + if (unknown_rank_shape) { + out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); + OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); + op.UpdateOutputDesc("y", out_desc); + return GRAPH_SUCCESS; + } + const vector depend_names = {"s", "dim"}; + PREPARE_DYNAMIC_SHAPE(depend_names); + + std::vector s_vec; + std::vector dim_vec; + size_t x_rank = input_desc.GetShape().GetDimNum(); + auto input_shape = input_desc.GetShape().GetDims(); + vector output_shape(input_shape.begin(), input_shape.end()); + + // infer output shape based on 's' and 'dim' + Tensor s_tensor; + bool s_is_none{true}; + if (op.GetInputConstData("s", s_tensor) == GRAPH_SUCCESS) { + DataType dtype = op.GetInputDescByName("s").GetDataType(); + GetConstValue(op, s_tensor, dtype, s_vec); + s_is_none = false; + } + + Tensor dim_tensor; + if (op.GetInputConstData("dim", dim_tensor) == GRAPH_SUCCESS) { + DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); + GetConstValue(op, dim_tensor, dim_dtype, dim_vec); + for (size_t i = 0; i < dim_vec.size(); i++) { + dim_vec[i] = dim_vec[i] < 0 ? static_cast(x_rank) + dim_vec[i] : dim_vec[i]; + } + } + + FFTNGetAttr(output_shape, x_rank, &s_vec, &dim_vec); + + static const std::vector half_shape_prim = {"IHFFT", "IHFFT2", "IHFFTN", "RFFT", "RFFT2", "RFFTN"}; + static const std::vector double_shape_prim = {"HFFT", "HFFT2", "HFFTN", "IRFFT", "IRFFT2", "IRFFTN"}; + bool is_half_shape_prim = std::find(half_shape_prim.begin(), half_shape_prim.end(), op_name) != half_shape_prim.end(); + bool is_double_shape_prim = + std::find(double_shape_prim.begin(), double_shape_prim.end(), op_name) != double_shape_prim.end(); + + for (size_t i = 0; i < s_vec.size(); i++) { + output_shape[dim_vec[i]] = s_vec[i]; + } + + if (is_double_shape_prim && s_is_none) { + output_shape[dim_vec.back()] = (output_shape[dim_vec.back()] - 1) * 2; + } + if (is_half_shape_prim && s_is_none) { + output_shape[dim_vec.back()] = output_shape[dim_vec.back()] / 2 + 1; + } + if (is_half_shape_prim && !s_is_none) { + output_shape[dim_vec.back()] = s_vec.back() / 2 + 1; + } + + out_desc.SetShape(ge::Shape(output_shape)); + op.UpdateOutputDesc("y", out_desc); + return GRAPH_SUCCESS; +} + +IMPLEMT_COMMON_INFERFUNC(FFTShiftInferShape) { + TensorDesc out_desc = op.GetOutputDescByName("input"); + out_desc.SetDataType(op.GetInputDescByName("input").GetDataType()); + out_desc.SetShape(op.GetInputDescByName("input").GetShape()); + if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { + OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; +} + +IMPLEMT_COMMON_INFERFUNC(FFTShapeCopyInferShape) { + TensorDesc input_desc = op.GetInputDescByName("input"); + TensorDesc out_desc = op.GetOutputDescByName("y"); + + Tensor shape_tensor; + Shape output_shape; + if (op.GetInputConstData("shape", shape_tensor) == GRAPH_SUCCESS) { + MakeShapeFromShapeTensor(shape_tensor, output_shape, op); + } else { + output_shape = Shape({UNKNOWN_RANK}); + } + out_desc.SetDataType(input_desc.GetDataType()); + out_desc.SetShape(output_shape); + if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { + OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); + return GRAPH_FAILED; + } + return GRAPH_SUCCESS; +} + +CUST_COMMON_INFER_FUNC_REG(FFTShapeCopy, FFTShapeCopyInferShape); + +CUST_COMMON_INFER_FUNC_REG(FFTShift, FFTShiftInferShape); +CUST_COMMON_INFER_FUNC_REG(IFFTShift, FFTShiftInferShape); + +CUST_COMMON_INFER_FUNC_REG(FFT, FFTBaseInferShape); +CUST_COMMON_INFER_FUNC_REG(IFFT, FFTBaseInferShape); + +CUST_COMMON_INFER_FUNC_REG(RFFT, FFTBaseInferShape); +CUST_COMMON_INFER_FUNC_REG(IRFFT, FFTBaseInferShape); + +CUST_COMMON_INFER_FUNC_REG(FFT2, FFTNBaseInferShape); +CUST_COMMON_INFER_FUNC_REG(FFTN, FFTNBaseInferShape); +CUST_COMMON_INFER_FUNC_REG(IFFT2, FFTNBaseInferShape); +CUST_COMMON_INFER_FUNC_REG(IFFTN, FFTNBaseInferShape); +} // namespace ge diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_shapecopy_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_shapecopy_proto.cc deleted file mode 100644 index 9b9ab98a9ef6a5ceac8a382b2247c8ccaafd2a1a..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fft_shapecopy_proto.cc +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" -#include "utils/common_shape_fns.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(FFTShapeCopyInferShape) { - TensorDesc input_desc = op.GetInputDescByName("input"); - TensorDesc out_desc = op.GetOutputDescByName("y"); - - Tensor shape_tensor; - Shape output_shape; - if (op.GetInputConstData("shape", shape_tensor) == GRAPH_SUCCESS) { - MakeShapeFromShapeTensor(shape_tensor, output_shape, op); - } else { - output_shape = Shape({UNKNOWN_RANK}); - } - out_desc.SetDataType(input_desc.GetDataType()); - out_desc.SetShape(output_shape); - if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { - OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); - return GRAPH_FAILED; - } - return GRAPH_SUCCESS; -} - -CUST_COMMON_INFER_FUNC_REG(FFTShapeCopy, FFTShapeCopyInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftbase_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftbase_proto.cc deleted file mode 100644 index 0d1c2de329c5733e2672cff0c234ea579b1303a0..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftbase_proto.cc +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" -#include "utils/common_shape_fns.h" -#include "utils/op_common_util.h" -#include "utils/op_const.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(FFTBaseInferShape) { - auto input_desc = op.GetInputDescByName("input"); - auto out_desc = op.GetOutputDescByName("y"); - - DataType x_dtype = input_desc.GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE || x_dtype == DT_COMPLEX128) { - y_dtype = DT_COMPLEX128; - } else { - y_dtype = DT_COMPLEX64; - } - out_desc.SetDataType(y_dtype); - - bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); - if (unknown_rank_shape) { - out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; - } - - size_t x_rank = input_desc.GetShape().GetDimNum(); - auto input_shape_dims = input_desc.GetShape().GetDims(); - vector output_shape_dims(input_shape_dims.begin(), input_shape_dims.end()); - const vector depend_names = {"n", "dim"}; - PREPARE_DYNAMIC_SHAPE(depend_names); - - // infer output shape based on 'n' and 'dim' - Tensor n_data; - if (op.GetInputConstData("n", n_data) == GRAPH_SUCCESS) { - DataType dtype = op.GetInputDescByName("n").GetDataType(); - std::vector const_vec; - GetConstValue(op, n_data, dtype, const_vec); - int64_t n = const_vec[0]; - Tensor dim_data; - op.GetInputConstData("dim", dim_data); - - DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); - std::vector const_vec_dim; - GetConstValue(op, dim_data, dim_dtype, const_vec_dim); - int64_t dim = const_vec_dim[0]; - dim = dim < 0 ? static_cast(x_rank) + dim : dim; - output_shape_dims[dim] = n; - } - - out_desc.SetShape(ge::Shape(output_shape_dims)); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; -} - -CUST_COMMON_INFER_FUNC_REG(FFT, FFTBaseInferShape); -CUST_COMMON_INFER_FUNC_REG(IFFT, FFTBaseInferShape); -} // namespace ge diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftnbase_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftnbase_proto.cc deleted file mode 100644 index 19ef7b348b6aea492ef558ed05cd1933d93aabca..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftnbase_proto.cc +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(FFTNBaseInferShape) { - auto input_desc = op.GetInputDescByName("input"); - auto out_desc = op.GetOutputDescByName("y"); - - DataType x_dtype = input_desc.GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE || x_dtype == DT_COMPLEX128) { - y_dtype = DT_COMPLEX128; - } else { - y_dtype = DT_COMPLEX64; - } - out_desc.SetDataType(y_dtype); - - bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); - if (unknown_rank_shape) { - out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; - } - - size_t x_rank = input_desc.GetShape().GetDimNum(); - auto input_shape_dims = input_desc.GetShape().GetDims(); - vector output_shape_dims(input_shape_dims.begin(), input_shape_dims.end()); - const vector depend_names = {"n", "dim"}; - PREPARE_DYNAMIC_SHAPE(depend_names); - - // infer output shape based on 'n' and 'dim' - Tensor s_tensor; - if (op.GetInputConstData("s", s_tensor) == GRAPH_SUCCESS) { - DataType dtype = op.GetInputDescByName("s").GetDataType(); - std::vector s_vec; - GetConstValue(op, s_tensor, dtype, s_vec); - - Tensor dim_tensor; - std::vector dim_vec; - if (op.GetInputConstData("dim", dim_tensor) == GRAPH_SUCCESS) { - DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); - GetConstValue(op, dim_tensor, dim_dtype, dim_vec); - for (size_t i = 0; i < dim_vec.size(); i++) { - dim_vec[i] = dim_vec[i] < 0 ? static_cast(x_rank) + dim_vec[i] : dim_vec[i]; - } - } else { - for (size_t i = 0; i < s_vec.size(); i++) { - (void)dim_vec.emplace_back(x_rank - s_vec.size() + i); - } - } - for (size_t i = 0; i < s_vec.size(); i++) { - output_shape_dims[dim_vec[i]] = s_vec[i]; - } - } - - out_desc.SetShape(ge::Shape(output_shape_dims)); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; -} -CUST_COMMON_INFER_FUNC_REG(FFT2, FFTNBaseInferShape); -CUST_COMMON_INFER_FUNC_REG(FFTN, FFTNBaseInferShape); -CUST_COMMON_INFER_FUNC_REG(IFFT2, FFTNBaseInferShape); -CUST_COMMON_INFER_FUNC_REG(IFFTN, FFTNBaseInferShape); -} // namespace ge diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftshift_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftshift_proto.cc deleted file mode 100644 index 736f2429566cf63bc25ea35f387562733c54c123..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fftshift_proto.cc +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(FFTShiftInferShape) { - TensorDesc out_desc = op.GetOutputDescByName("input"); - out_desc.SetDataType(op.GetInputDescByName("input").GetDataType()); - out_desc.SetShape(op.GetInputDescByName("input").GetShape()); - if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { - OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); - return GRAPH_FAILED; - } - return GRAPH_SUCCESS; -} - -CUST_COMMON_INFER_FUNC_REG(FFTShift, FFTShiftInferShape); -CUST_COMMON_INFER_FUNC_REG(IFFTShift, FFTShiftInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_grad_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_grad_proto.cc deleted file mode 100644 index 0458c393402ceab9db32aaf191cb27dbc43b4cc1..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_grad_proto.cc +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(IRFFTGradInferShape) { - DataType x_dtype = op.GetInputDescByName("input1").GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE || x_dtype == DT_COMPLEX128) { - y_dtype = DT_COMPLEX128; - } else { - y_dtype = DT_COMPLEX64; - } - TensorDesc out_desc = op.GetOutputDescByName("y"); - out_desc.SetDataType(y_dtype); - out_desc.SetShape(op.GetInputDescByName("input2").GetShape()); - - if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { - OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); - return GRAPH_FAILED; - } - return GRAPH_SUCCESS; -} -CUST_COMMON_INFER_FUNC_REG(IRFFTGrad, IRFFTGradInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_proto.cc deleted file mode 100644 index 57a63963356850dab854e03f6aa2f2956a12ccce..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/irfft_proto.cc +++ /dev/null @@ -1,110 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" - -namespace ge { -static graphStatus IRFFTInferShapeCommon(Operator &op, int64_t n, int64_t dim, bool unknown_n) { - if (!unknown_n && n <= 0) { - std::string err_msg = GetAttrValueErrMsg("irfft n", std::to_string(n), ConcatString("n > 0")); - VECTOR_INFER_SHAPE_INNER_ERR_REPORT(TbeGetName(op), err_msg); - return GRAPH_FAILED; - } - const int kRealFFTSideNum = 2; - auto input_desc = op.GetInputDescByName("input"); - auto out_desc = op.GetOutputDescByName("y"); - - DataType x_dtype = input_desc.GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE || x_dtype == DT_COMPLEX128) { - y_dtype = DT_DOUBLE; - } else { - y_dtype = DT_FLOAT; - } - out_desc.SetDataType(y_dtype); - - bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); - if (unknown_rank_shape) { - out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; - } - - size_t x_rank = input_desc.GetShape().GetDimNum(); - auto input_shape_dims = input_desc.GetShape().GetDims(); - dim = dim < 0 ? static_cast(x_rank) + dim : dim; - vector output_shape_dims(input_shape_dims.begin(), input_shape_dims.end()); - if (unknown_n) { - if (input_shape_dims[dim] != UNKNOWN_DIM) { - output_shape_dims[dim] = kRealFFTSideNum * (output_shape_dims[dim] - 1); - } - } else { - output_shape_dims[dim] = n; - } - - out_desc.SetShape(ge::Shape(output_shape_dims)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - - return GRAPH_SUCCESS; -} - -IMPLEMT_COMMON_INFERFUNC(IRFFTInferShape) { - const vector depend_names = {"n", "dim"}; - PREPARE_DYNAMIC_SHAPE(depend_names); - - // infer output shape based on 'n' and 'dim' - Tensor n_data; - bool is_unknown_n{true}; - if (op.GetInputConstData("n", n_data) == GRAPH_SUCCESS) { - is_unknown_n = false; - } - OP_LOGD(TbeGetName(op), "irfft n is unknown[%s].", is_unknown_n ? "true" : "false"); - int64_t n = 0; - if (!is_unknown_n) { - DataType dtype = op.GetInputDescByName("n").GetDataType(); - std::vector const_vec; - if (!GetConstValue(op, n_data, dtype, const_vec)) { - is_unknown_n = true; - OP_LOGW(TbeGetName(op), "Get irfft n value failed."); - } else { - n = const_vec[0]; - } - } - Tensor dim_data; - bool is_unknown_axis{true}; - if (op.GetInputConstData("dim", dim_data) == GRAPH_SUCCESS) { - is_unknown_axis = false; - } - OP_LOGD(TbeGetName(op), "irfft axis is unknown[%s].", is_unknown_axis ? "true" : "false"); - int64_t dim = -1; - if (!is_unknown_axis) { - DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); - std::vector const_vec_dim; - if (!GetConstValue(op, dim_data, dim_dtype, const_vec_dim)) { - OP_LOGW(TbeGetName(op), "Get rfft dim value failed."); - } else { - dim = const_vec_dim[0]; - } - } - - return IRFFTInferShapeCommon(op, n, dim, is_unknown_n); -} -CUST_COMMON_INFER_FUNC_REG(IRFFT, IRFFTInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_grad_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_grad_proto.cc deleted file mode 100644 index 405484e7cda6e43b7c1aae26fc7778f1334248a0..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_grad_proto.cc +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" - -namespace ge { -IMPLEMT_COMMON_INFERFUNC(RFFTGradInferShape) { - DataType x_dtype = op.GetInputDescByName("input1").GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE || x_dtype == DT_COMPLEX128) { - y_dtype = DT_COMPLEX128; - } else { - y_dtype = DT_COMPLEX64; - } - TensorDesc out_desc = op.GetOutputDescByName("y"); - out_desc.SetDataType(y_dtype); - out_desc.SetShape(op.GetInputDescByName("input2").GetShape()); - - if (op.UpdateOutputDesc("y", out_desc) != GRAPH_SUCCESS) { - OP_LOGE(TbeGetName(op).c_str(), "Failed to update output desc."); - return GRAPH_FAILED; - } - return GRAPH_SUCCESS; -} -CUST_COMMON_INFER_FUNC_REG(RFFTGrad, RFFTGradInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_proto.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_proto.cc deleted file mode 100644 index 4fc909f0261cd031a32b2352ce1390584503e238..0000000000000000000000000000000000000000 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/rfft_proto.cc +++ /dev/null @@ -1,113 +0,0 @@ -/** - * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "custom_op_proto/cust_math_ops.h" -#include "register/op_impl_registry.h" -#include "utils/util.h" -#include "utils/common_shape_fns.h" -#include "utils/op_common_util.h" -#include "utils/op_const.h" - -namespace ge { -static graphStatus RFFTInferShapeCommon(Operator &op, int64_t n, int64_t dim, bool unknown_n) { - if (!unknown_n && n <= 0) { - std::string err_msg = GetAttrValueErrMsg("rfft n", std::to_string(n), ConcatString("n > 0")); - VECTOR_INFER_SHAPE_INNER_ERR_REPORT(TbeGetName(op), err_msg); - return GRAPH_FAILED; - } - const int kRealFFTSideNum = 2; - auto input_desc = op.GetInputDescByName("input"); - auto out_desc = op.GetOutputDescByName("y"); - - DataType x_dtype = input_desc.GetDataType(); - DataType y_dtype; - if (x_dtype == DT_DOUBLE) { - y_dtype = DT_COMPLEX128; - } else { - y_dtype = DT_COMPLEX64; - } - out_desc.SetDataType(y_dtype); - - bool unknown_rank_shape = IsUnknownRankShape(input_desc.GetShape()); - if (unknown_rank_shape) { - out_desc.SetShape(ge::Shape(UNKNOWN_RANK)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - return GRAPH_SUCCESS; - } - - size_t x_rank = input_desc.GetShape().GetDimNum(); - auto input_shape_dims = input_desc.GetShape().GetDims(); - dim = dim < 0 ? static_cast(x_rank) + dim : dim; - vector output_shape_dims(input_shape_dims.begin(), input_shape_dims.end()); - if (unknown_n) { - if (input_shape_dims[dim] != UNKNOWN_DIM) { - output_shape_dims[dim] = output_shape_dims[dim] / kRealFFTSideNum + 1; - } - } else { - output_shape_dims[dim] = n / kRealFFTSideNum + 1; - } - - out_desc.SetShape(ge::Shape(output_shape_dims)); - OP_LOGD(TbeGetName(op).c_str(), "output shape:%s", to_string(out_desc.GetShape()).c_str()); - op.UpdateOutputDesc("y", out_desc); - - return GRAPH_SUCCESS; -} - -IMPLEMT_COMMON_INFERFUNC(RFFTInferShape) { - const vector depend_names = {"n", "dim"}; - PREPARE_DYNAMIC_SHAPE(depend_names); - - // infer output shape based on 'n' and 'dim' - Tensor n_data; - bool is_unknown_n{true}; - if (op.GetInputConstData("n", n_data) == GRAPH_SUCCESS) { - is_unknown_n = false; - } - OP_LOGD(TbeGetName(op), "rfft n is unknown[%s].", is_unknown_n ? "true" : "false"); - int64_t n = 0; - if (!is_unknown_n) { - DataType dtype = op.GetInputDescByName("n").GetDataType(); - std::vector const_vec; - if (!GetConstValue(op, n_data, dtype, const_vec)) { - is_unknown_n = true; - OP_LOGW(TbeGetName(op), "Get rfft n value failed."); - } else { - n = const_vec[0]; - } - } - Tensor dim_data; - bool is_unknown_axis{true}; - if (op.GetInputConstData("dim", dim_data) == GRAPH_SUCCESS) { - is_unknown_axis = false; - } - OP_LOGD(TbeGetName(op), "rfft axis is unknown[%s].", is_unknown_axis ? "true" : "false"); - int64_t dim = -1; - if (!is_unknown_axis) { - DataType dim_dtype = op.GetInputDescByName("dim").GetDataType(); - std::vector const_vec_dim; - if (!GetConstValue(op, dim_data, dim_dtype, const_vec_dim)) { - OP_LOGW(TbeGetName(op), "Get rfft dim value failed."); - } else { - dim = const_vec_dim[0]; - } - } - - return RFFTInferShapeCommon(op, n, dim, is_unknown_n); -} -CUST_COMMON_INFER_FUNC_REG(RFFT, RFFTInferShape); -} // namespace ge \ No newline at end of file diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/ascendc/op_kernel/all_finite.cpp b/mindspore/ccsrc/plugin/device/ascend/kernel/ascendc/op_kernel/all_finite.cpp index 000e1b15ab7c4c55a842d2d117a099e82554e5fd..61d186e3c80c3a5d195c7d091b27049f4b96797a 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/ascendc/op_kernel/all_finite.cpp +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/ascendc/op_kernel/all_finite.cpp @@ -227,9 +227,7 @@ class KernelAllFinite { float result = half_comp_t.GetValue(0); if (result != 0) { ui16_t.SetValue(0, 1); - AscendC::SetAtomicAdd(); DataCopy(yGm[0], half_comp_t, OUT_MIN_LEN); - AscendC::SetAtomicNone(); *loop = count; } } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/internal/reshape.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/internal/reshape.cc index c504c8c39e8af211db0d6cc654b2be0d3712fec5..58f8d54ef966dc8c19aee43c8b663486525fea1d 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/internal/reshape.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/internal/reshape.cc @@ -21,6 +21,8 @@ #include "kernel/framework_utils.h" #include "plugin/device/ascend/kernel/internal/internal_kernel_utils.h" +#include "transform/symbol/acl_rt_symbol.h" +#include "transform/symbol/symbol_utils.h" namespace mindspore { namespace kernel { @@ -68,8 +70,8 @@ bool InternalReshape::Launch(const std::vector &inputs, const st MS_EXCEPTION_IF_NULL(stream_ptr); auto status = - aclrtMemcpyAsync(outputs[kIndex0]->device_ptr(), outputs[kIndex0]->size(), inputs[kIndex0]->device_ptr(), - inputs[kIndex0]->size(), ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); + CALL_ASCEND_API(aclrtMemcpyAsync, outputs[kIndex0]->device_ptr(), outputs[kIndex0]->size(), + inputs[kIndex0]->device_ptr(), inputs[kIndex0]->size(), ACL_MEMCPY_DEVICE_TO_DEVICE, stream_ptr); if (status != ACL_ERROR_NONE) { MS_LOG(ERROR) << "ReshapeKernelMod Launch failed. kernel: " << kernel_name_ << ", call rtMemcpyAsync failed, ret = 0x" << status; diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/internal/tiling_cache.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/internal/tiling_cache.cc index 5f17df570d3e02156b0a8e870649b161c2956dde..6bb7d639de5c556c27ca6b624233bea919dc83bc 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/internal/tiling_cache.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/internal/tiling_cache.cc @@ -19,6 +19,8 @@ #include "ops/op_utils.h" #include "transform/acl_ir/op_api_cache.h" +#include "transform/symbol/acl_rt_symbol.h" +#include "transform/symbol/symbol_utils.h" namespace mindspore::kernel { @@ -74,8 +76,8 @@ TilingInfo TilingCacheMgr::GetOrCreateTilingInfo( // Bind device to current thread. device_context_->device_res_manager_->BindDeviceToCurrentThread(false); - ret = aclrtMemcpy(tiling_cache_elem.device_buf_.addr_, tiling_cache_elem.device_buf_.size_, host_tiling_buf_.addr_, - host_tiling_buf_.size_, ACL_MEMCPY_HOST_TO_DEVICE); + ret = CALL_ASCEND_API(aclrtMemcpy, tiling_cache_elem.device_buf_.addr_, tiling_cache_elem.device_buf_.size_, + host_tiling_buf_.addr_, host_tiling_buf_.size_, ACL_MEMCPY_HOST_TO_DEVICE); if (ret != 0) { MS_LOG(EXCEPTION) << "ACL_MEMCPY_HOST_TO_DEVICE failed!"; } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/convolution_grad_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/convolution_grad_aclnn_kernel.cc index 1f005c2953c9689581e8ad5feb854ec5e976f524..c4480ca6890fd205233fd13e8f85a8af799d1678 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/convolution_grad_aclnn_kernel.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/convolution_grad_aclnn_kernel.cc @@ -35,6 +35,7 @@ void ConvolutionGradAscend::GetWorkSpaceInfo(const std::vector & output_padding_ = transform::ConvertKernelTensor>(inputs[kIndex8]); groups_ = transform::ConvertKernelTensor(inputs[kIndex9]); const auto &output_mask_vec = transform::ConvertKernelTensor>(inputs[kIndex10]); + output_mask_.clear(); std::transform(output_mask_vec.begin(), output_mask_vec.end(), std::back_inserter(output_mask_), [](const int64_t &value) { return static_cast(value); }); diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..13b9310645faa332fb2362e4718477d92fadaf7d --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.cc @@ -0,0 +1,44 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.h" +#include "ir/tensor.h" +#include "runtime/device/kernel_runtime.h" + +namespace mindspore { +namespace kernel { + +void DivModAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + mode_ = 0; + auto mode_opt = inputs[kIndex2]->GetOptionalValueWithCheck(); + if (mode_opt.has_value()) { + mode_ = mode_opt.value(); + } + GetWorkspaceForResize(inputs[kIndex0], inputs[kIndex1], mode_, outputs[kIndex0]); +} + +bool DivModAscend::Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], inputs[kIndex1], mode_, outputs[kIndex0])); + + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(DivMod, DivModAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..039a5ec0e32f13c7800db0e7a545434bf2792799 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/divmod_aclnn_kernel.h @@ -0,0 +1,42 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_DIVMOD_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_DIVMOD_ACLNN_KERNEL_MOD_H_ +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class DivModAscend : public AclnnKernelMod { + public: + DivModAscend() : AclnnKernelMod(std::move("aclnnDivMod")) {} + ~DivModAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() + int64_t mode_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_DIVMOD_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..ba679100e06ecaa280de817729de6985c13e4c38 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.cc @@ -0,0 +1,49 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "transform/acl_ir/acl_helper.h" +#include "transform/acl_ir/op_api_convert.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { +void LinSpaceExtAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto start = transform::ConvertKernelTensor(inputs[kIndex0]); + auto end = transform::ConvertKernelTensor(inputs[kIndex1]); + steps_ = transform::ConvertKernelTensor(inputs[kIndex2]); + GetWorkspaceForResize(start, end, steps_, outputs[kIndex0]); +} + +bool LinSpaceExtAscend::Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + auto start = transform::ConvertKernelTensor(inputs[kIndex0]); + auto end = transform::ConvertKernelTensor(inputs[kIndex1]); + MS_EXCEPTION_IF_NULL(stream_ptr); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, start, end, steps_, outputs[kIndex0])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(LinSpaceExt, LinSpaceExtAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..a36f74b0d8976c5d02020f7c6b7a22376ab51e98 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/lin_space_ext_aclnn_kernel.h @@ -0,0 +1,45 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_ + +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class LinSpaceExtAscend : public AclnnKernelMod { + public: + LinSpaceExtAscend() : AclnnKernelMod(std::move("aclnnLinspace")) {} + ~LinSpaceExtAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() + int64_t steps_; + TypeId dtype_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_OPAPI_ACLNN_SUM_EXT_ACLNN_KERNEL_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..a85d9e13784f831140df0d2ec465bd2008fb6c21 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "runtime/device/kernel_runtime.h" +#include "transform/acl_ir/acl_helper.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { + +void MaxPoolGradWithIndicesAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto kernel_size = inputs[kIndex3]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex4]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex5]->GetValueWithCheck>(); + auto dilation = inputs[kIndex6]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex7]->GetValueWithCheck(); + GetWorkspaceForResize(inputs[kIndex1], inputs[kIndex0], inputs[kIndex2], kernel_size, strides, pads, dilation, + ceil_mode, outputs[kIndex0]); +} + +bool MaxPoolGradWithIndicesAscend::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + auto kernel_size = inputs[kIndex3]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex4]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex5]->GetValueWithCheck>(); + auto dilation = inputs[kIndex6]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex7]->GetValueWithCheck(); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex1], inputs[kIndex0], inputs[kIndex2], + kernel_size, strides, pads, dilation, ceil_mode, outputs[kIndex0])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(MaxPoolGradWithIndices, MaxPoolGradWithIndicesAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..beaba6cc80175f32d93cf5a62229bcc29e9c00cc --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_indices_aclnn_kernel.h @@ -0,0 +1,41 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_INDICES_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_INDICES_ACLNN_KERNEL_MOD_H_ +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class MaxPoolGradWithIndicesAscend : public AclnnKernelMod { + public: + MaxPoolGradWithIndicesAscend() : AclnnKernelMod("aclnnMaxPool2dWithIndicesBackward") {} + ~MaxPoolGradWithIndicesAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_INDICES_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..26038b874c7f2cc32836d37c7ba28c1bca3c6862 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "runtime/device/kernel_runtime.h" +#include "transform/acl_ir/acl_helper.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { + +void MaxPoolGradWithMaskAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto kernel_size = inputs[kIndex3]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex4]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex5]->GetValueWithCheck>(); + auto dilation = inputs[kIndex6]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex7]->GetValueWithCheck(); + GetWorkspaceForResize(inputs[kIndex1], inputs[kIndex0], inputs[kIndex2], kernel_size, strides, pads, dilation, + ceil_mode, outputs[kIndex0]); +} + +bool MaxPoolGradWithMaskAscend::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + auto kernel_size = inputs[kIndex3]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex4]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex5]->GetValueWithCheck>(); + auto dilation = inputs[kIndex6]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex7]->GetValueWithCheck(); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex1], inputs[kIndex0], inputs[kIndex2], + kernel_size, strides, pads, dilation, ceil_mode, outputs[kIndex0])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(MaxPoolGradWithMask, MaxPoolGradWithMaskAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..d8da0da57657018310057d0bbd826e800329a8fb --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_grad_with_mask_aclnn_kernel.h @@ -0,0 +1,41 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_MASK_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_MASK_ACLNN_KERNEL_MOD_H_ +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class MaxPoolGradWithMaskAscend : public AclnnKernelMod { + public: + MaxPoolGradWithMaskAscend() : AclnnKernelMod("aclnnMaxPool2dWithMaskBackward") {} + ~MaxPoolGradWithMaskAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_GRAD_WITH_MASK_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..a09772a7030962fafa7f14da97aa24e74c894fea --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "runtime/device/kernel_runtime.h" +#include "transform/acl_ir/acl_helper.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { + +void MaxPoolWithIndicesAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto kernel_size = inputs[kIndex1]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex2]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex3]->GetValueWithCheck>(); + auto dilation = inputs[kIndex4]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex5]->GetValueWithCheck(); + GetWorkspaceForResize(inputs[kIndex0], kernel_size, strides, pads, dilation, ceil_mode, outputs[kIndex0], + outputs[kIndex1]); +} + +bool MaxPoolWithIndicesAscend::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + auto kernel_size = inputs[kIndex1]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex2]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex3]->GetValueWithCheck>(); + auto dilation = inputs[kIndex4]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex5]->GetValueWithCheck(); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], kernel_size, strides, pads, dilation, + ceil_mode, outputs[kIndex0], outputs[kIndex1])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(MaxPoolWithIndices, MaxPoolWithIndicesAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..e8401a42bee879e11ceaac42ec5023bdf0afbb9c --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_indices_aclnn_kernel.h @@ -0,0 +1,41 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_INDICES_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_INDICES_ACLNN_KERNEL_MOD_H_ +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class MaxPoolWithIndicesAscend : public AclnnKernelMod { + public: + MaxPoolWithIndicesAscend() : AclnnKernelMod("aclnnMaxPool2dWithIndices") {} + ~MaxPoolWithIndicesAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_INDICES_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..98181916af4a3d456ea4a48517de67d593afd366 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "runtime/device/kernel_runtime.h" +#include "transform/acl_ir/acl_helper.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { + +void MaxPoolWithMaskAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto kernel_size = inputs[kIndex1]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex2]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex3]->GetValueWithCheck>(); + auto dilation = inputs[kIndex4]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex5]->GetValueWithCheck(); + GetWorkspaceForResize(inputs[kIndex0], kernel_size, strides, pads, dilation, ceil_mode, outputs[kIndex0], + outputs[kIndex1]); +} + +bool MaxPoolWithMaskAscend::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + auto kernel_size = inputs[kIndex1]->GetValueWithCheck>(); + std::vector strides = kernel_size; + if (inputs[kIndex2]->type_id() != kMetaTypeNone) { + strides = inputs[kIndex2]->GetValueWithCheck>(); + } + auto pads = inputs[kIndex3]->GetValueWithCheck>(); + auto dilation = inputs[kIndex4]->GetValueWithCheck>(); + auto ceil_mode = inputs[kIndex5]->GetValueWithCheck(); + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], kernel_size, strides, pads, dilation, + ceil_mode, outputs[kIndex0], outputs[kIndex1])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(MaxPoolWithMask, MaxPoolWithMaskAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..6a4e969a137ceab7af32db1adaeebb72189662e0 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/max_pool_with_mask_aclnn_kernel.h @@ -0,0 +1,41 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_MASK_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_MASK_ACLNN_KERNEL_MOD_H_ +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class MaxPoolWithMaskAscend : public AclnnKernelMod { + public: + MaxPoolWithMaskAscend() : AclnnKernelMod("aclnnMaxPool2dWithMask") {} + ~MaxPoolWithMaskAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_MAX_POOL_WITH_MASK_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..631d397f0053c7afe81cddda9eac7c23aa0f052b --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.cc @@ -0,0 +1,64 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.h" +#include +#include +#include +#include +#include +#include "ir/tensor.h" +#include "transform/acl_ir/acl_helper.h" +#include "transform/acl_ir/op_api_convert.h" +#include "abstract/ops/primitive_infer_map.h" + +namespace mindspore { +namespace kernel { + +void SliceExtAscend::GetWorkSpaceInfo(const std::vector &inputs, + const std::vector &outputs) { + auto dim = transform::ConvertKernelTensor(inputs[kIndex1]); + auto start = transform::ConvertKernelTensor(inputs[kIndex2]); + auto end = transform::ConvertKernelTensor(inputs[kIndex3]); + auto step = transform::ConvertKernelTensor(inputs[kIndex4]); + + shape_ = inputs[0]->GetShapeVector(); + auto length_value = end - start; + start = start < 0 ? start + shape_[dim] : start; + end = start + length_value; + + GetWorkspaceForResize(inputs[kIndex0], dim, start, end, step, outputs[kIndex0]); +} + +bool SliceExtAscend::Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) { + MS_EXCEPTION_IF_NULL(stream_ptr); + auto dim = transform::ConvertKernelTensor(inputs[kIndex1]); + auto start = transform::ConvertKernelTensor(inputs[kIndex2]); + auto end = transform::ConvertKernelTensor(inputs[kIndex3]); + auto step = transform::ConvertKernelTensor(inputs[kIndex4]); + + auto length_value = end - start; + start = start < 0 ? start + shape_[dim] : start; + end = start + length_value; + + ParseGenExecutor(GEN_EXECUTOR_BOOST(op_type_, hash_id_, inputs[kIndex0], dim, start, end, step, outputs[kIndex0])); + RunOp(stream_ptr, workspace); + return true; +} + +MS_ACLNN_KERNEL_FACTORY_REG(SliceExt, SliceExtAscend); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.h b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..8df36ab9df01fd3e579c868f4aea8e6dd4e45587 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/opapi/aclnn/slice_ext_aclnn_kernel.h @@ -0,0 +1,44 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_SLICE_EXT_ACLNN_KERNEL_MOD_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_SLICE_EXT_ACLNN_KERNEL_MOD_H_ + +#include +#include +#include "ops/base_operator.h" +#include "plugin/device/ascend/kernel/opapi/aclnn_kernel_mod.h" +#include "transform/acl_ir/acl_convert.h" + +namespace mindspore { +namespace kernel { + +class SliceExtAscend : public AclnnKernelMod { + public: + SliceExtAscend() : AclnnKernelMod(std::move("aclnnSlice")) {} + ~SliceExtAscend() = default; + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs, void *stream_ptr) override; + + void GetWorkSpaceInfo(const std::vector &inputs, const std::vector &outputs) override; + + private: + DEFINE_GET_WORKSPACE_FOR_RESIZE() + std::vector shape_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_SLICE_EXT_ACLNN_KERNEL_MOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.cc index c0e67abe2ddd642c951876638c9121e3c8332de3..add814a47292749ba903e30ae1b9754f5c760026 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.cc @@ -15,10 +15,28 @@ */ #include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" #include "transform/acl_ir/op_api_util.h" +#include "runtime/pipeline/pipeline.h" +#include "runtime/pipeline/task/device_task.h" +#include "runtime/pynative/op_executor.h" namespace mindspore { namespace kernel { namespace pyboost { int8_t GetCubeMathType() { return transform::OpApiUtil::GetCubeMathType(); } + +void DispatchLaunchKernel(const DeviceContext *device_context, const std::string &aclnn_name, void *ws_ptr, + size_t ws_size, transform::aclOpExecutor *executor, void *stream, + const std::function &release_func) { + runtime::OpExecutor::DispatchLaunchTask([=]() { + runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyNativeLaunchTask, + aclnn_name, false); + MS_LOG(DEBUG) << "launch task start, " << aclnn_name; + + device_context->device_res_manager_->BindDeviceToCurrentThread(false); + RUN_OP_API_ASYNC(aclnn_name, ws_ptr, ws_size, executor, stream, release_func); + + MS_LOG(DEBUG) << "launch task end, " << aclnn_name; + }); +} } // namespace pyboost } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.h index 891fdfcb7ae1bea90e51d68e790aca11e86ae814..d432f22101547e6ca7d86e424484236da2406a92 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.h +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/aclnn_utils.h @@ -23,27 +23,29 @@ #include "transform/acl_ir/op_api_exec.h" #include "runtime/device/device_address_utils.h" -#define LAUNCH_ACLNN(aclnn_api, device_context, stream_id, ...) \ - do { \ - static const std::string aclnn_name = #aclnn_api; \ - runtime::ProfilerRecorder aclnn_profiler(runtime::ProfilerModule::kPynative, \ - runtime::ProfilerEvent::kPyBoostLaunchAclnn, aclnn_name, false); \ - auto stream_ptr = device_context->device_res_manager_->GetStream(stream_id); \ - auto [ws_size, executor_handle, release_function] = GEN_EXECUTOR(aclnn_name, __VA_ARGS__); \ - if (ws_size == 0) { \ - RUN_OP_API_ASYNC(aclnn_name, nullptr, 0, executor_handle, stream_ptr, release_function); \ - } else { \ - auto workspace_device_address = \ - runtime::DeviceAddressUtils::CreateWorkspaceAddress(device_context, stream_id, ws_size); \ - RUN_OP_API_ASYNC(aclnn_name, workspace_device_address->GetMutablePtr(), ws_size, executor_handle, stream_ptr, \ - release_function); \ - } \ - static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); \ - if (sync) { \ - if (!device::ascend::AscendStreamMng::GetInstance().SyncAllStreams()) { \ - MS_LOG(EXCEPTION) << "SyncStream failed for op " << aclnn_name; \ - } \ - } \ +#define LAUNCH_ACLNN(aclnn_api, device_context, stream_id, ...) \ + do { \ + static const std::string aclnn_name = #aclnn_api; \ + runtime::ProfilerRecorder aclnn_profiler(runtime::ProfilerModule::kPynative, \ + runtime::ProfilerEvent::kPyBoostLaunchAclnn, aclnn_name, false); \ + auto stream_ptr = device_context->device_res_manager_->GetStream(stream_id); \ + auto [ws_size, executor_handle, release_function] = GEN_EXECUTOR(aclnn_name, __VA_ARGS__); \ + if (ws_size == 0) { \ + DispatchLaunchKernel(device_context, aclnn_name, nullptr, 0, executor_handle, stream_ptr, release_function); \ + } else { \ + auto workspace_device_address = \ + runtime::DeviceAddressUtils::CreateWorkspaceAddress(device_context, stream_id, ws_size); \ + DispatchLaunchKernel(device_context, aclnn_name, workspace_device_address->GetMutablePtr(), ws_size, \ + executor_handle, stream_ptr, release_function); \ + } \ + static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); \ + if (sync) { \ + if (!device::ascend::AscendStreamMng::GetInstance().SyncAllStreams()) { \ + MS_LOG(EXCEPTION) << "SyncStream failed for op " << aclnn_name; \ + } \ + } else { \ + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(aclnn_name, device_context, stream_id, __VA_ARGS__); \ + } \ } while (false) #define LAUNCH_ACLNN_SYNC(aclnn_api, device_context, stream_id, ...) \ @@ -68,6 +70,8 @@ if (!device::ascend::AscendStreamMng::GetInstance().SyncAllStreams()) { \ MS_LOG(EXCEPTION) << "SyncStream failed for op " << aclnn_name; \ } \ + } else { \ + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(aclnn_name, device_context, stream_id, __VA_ARGS__); \ } \ return &all_acl_tensor; \ } \ @@ -76,6 +80,9 @@ namespace mindspore { namespace kernel { namespace pyboost { int8_t GetCubeMathType(); +void DispatchLaunchKernel(const DeviceContext *device_context, const std::string &aclnn_name, void *ws_ptr, + size_t ws_size, transform::aclOpExecutor *executor, void *stream, + const std::function &release_func); } // namespace pyboost } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc index 2474c9b231dd279d77dda7c8fc13e19c95007832..9dcdaf6ad715ce70de8877900c504b786ce8756b 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc @@ -28,42 +28,40 @@ void CustomizeCopyAscend(device::DeviceContext *device_context, const device::De MS_EXCEPTION_IF_NULL(input_addr); MS_EXCEPTION_IF_NULL(output_addr); - // Async - PyBoostUtils::DispatchRun(std::make_shared([device_context, input_addr, output_addr, - stream_id]() { - // The input_addr_list address is malloc before - // Malloc for output tensors - if (output_addr->GetPtr() == nullptr) { - if (!device_context->device_res_manager_->AllocateMemory(output_addr.get())) { - MS_LOG(EXCEPTION) << "Allocate memory failed"; - } + runtime::OpExecutor::GetInstance().WaitAll(); + + // The input_addr_list address is malloc before + // Malloc for output tensors + if (output_addr->GetPtr() == nullptr) { + if (!device_context->device_res_manager_->AllocateMemory(output_addr.get())) { + MS_LOG(EXCEPTION) << "Allocate memory failed"; } + } - const auto &input_kernel_tensor = input_addr->kernel_tensor(); - const auto &output_kernel_tensor = output_addr->kernel_tensor(); + const auto &input_kernel_tensor = input_addr->kernel_tensor(); + const auto &output_kernel_tensor = output_addr->kernel_tensor(); - auto fill_kernel_info = [](const KernelTensorPtr &kernel_tensor) { - MS_EXCEPTION_IF_NULL(kernel_tensor); + auto fill_kernel_info = [](const KernelTensorPtr &kernel_tensor) { + MS_EXCEPTION_IF_NULL(kernel_tensor); - if (!kernel_tensor->host_info_exist()) { - kernel_tensor->SetType(std::make_shared(TypeIdToType(kernel_tensor->dtype_id()))); - kernel_tensor->SetShape(std::make_shared(kernel_tensor->host_shape())); - } - }; + if (!kernel_tensor->host_info_exist()) { + kernel_tensor->SetType(std::make_shared(TypeIdToType(kernel_tensor->dtype_id()))); + kernel_tensor->SetShape(std::make_shared(kernel_tensor->host_shape())); + } + }; - fill_kernel_info(input_kernel_tensor); - fill_kernel_info(output_kernel_tensor); - const auto &input_storage_info = input_kernel_tensor->tensor_storage_info(); - const auto &output_storage_info = output_kernel_tensor->tensor_storage_info(); - MS_LOG(DEBUG) << "Input_storage_info:" << (input_storage_info == nullptr ? "" : input_storage_info->ToString()) - << ", output_storage_info:" << (output_storage_info == nullptr ? "" : output_storage_info->ToString()) - << ", input address size:" << input_kernel_tensor->size() - << ", output address size:" << output_kernel_tensor->size(); + fill_kernel_info(input_kernel_tensor); + fill_kernel_info(output_kernel_tensor); + const auto &input_storage_info = input_kernel_tensor->tensor_storage_info(); + const auto &output_storage_info = output_kernel_tensor->tensor_storage_info(); + MS_LOG(DEBUG) << "Input_storage_info:" << (input_storage_info == nullptr ? "" : input_storage_info->ToString()) + << ", output_storage_info:" << (output_storage_info == nullptr ? "" : output_storage_info->ToString()) + << ", input address size:" << input_kernel_tensor->size() + << ", output address size:" << output_kernel_tensor->size(); - // Inplace output need be front - LAUNCH_ACLNN(aclnnInplaceCopy, device_context, stream_id, output_kernel_tensor.get(), input_kernel_tensor.get()); - MS_LOG(DEBUG) << "Launch end"; - })); + // Inplace output need be front + LAUNCH_ACLNN(aclnnInplaceCopy, device_context, stream_id, output_kernel_tensor.get(), input_kernel_tensor.get()); + MS_LOG(DEBUG) << "Launch end"; } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..3c2a4685b1d93883cb9c0c70e0646bc53054c332 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.cc @@ -0,0 +1,53 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/divmod.h" +#include +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/op_register.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode) { + OpRunner::InferOpOutput(op, x_tensor, y_tensor, rounding_mode); + + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor, y_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, x_tensor, y_tensor, rounding_mode]() { + MS_LOG(DEBUG) << "Run device task DivMod start"; + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor, y_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + + auto mode = GetValue(rounding_mode.value()); + LAUNCH_ACLNN(aclnnDivMod, device_context, op->stream_id(), x_tensor, y_tensor, mode, outputs[0]); + MS_LOG(DEBUG) << "Run device task DivMod end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.h new file mode 100644 index 0000000000000000000000000000000000000000..e8efe2e75e34c22d8b68d0a7780b973d89a706b1 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/divmod.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc index bc092c1897404a0506b8a046331557da3375fe22..bb8b10dce35f6187a5dd8cc95303ee2f6df81147 100644 --- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc @@ -85,6 +85,8 @@ void IdentityCustomizeCallWithoutContigous(const std::shared_ptr &op, if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) { MS_LOG(EXCEPTION) << "Launch kernel identity failed"; } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(prim::kPrimIdentity->name(), device_context, op->stream_id(), + input_kernel_tensors, output_kernel_tensors); auto output_address = std::dynamic_pointer_cast(outputs[0]->device_address()); output_address->SetStorageInfo(input_x_address->GetStorageInfo()); output_address->set_ptr(launch_device_address->GetMutablePtr()); @@ -131,6 +133,8 @@ void IdentityCustomizeCall(const std::shared_ptr &op, const BaseTensor if (!identity_kernel->Launch(input_kernel_tensors, workspaces, output_kernel_tensors, stream_ptr)) { MS_LOG(EXCEPTION) << "Launch kernel identity failed"; } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(prim::kPrimIdentity->name(), device_context, op->stream_id(), + input_kernel_tensors, output_kernel_tensors); MS_LOG(DEBUG) << "Run device task Identity end"; })); } diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..d20483a67bcd0b5109791ed7c498245f326431cc --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.cc @@ -0,0 +1,45 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr LinSpaceExtAscendCustomize(const std::shared_ptr &op, const ScalarPtr &start, + const ScalarPtr &end, const Int64ImmPtr &steps, + const std::optional &dtype) { + OpRunner::InferOpOutput(op, start, end, steps, dtype); + auto steps_value = GetValue(steps); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + PyBoostUtils::DispatchRun(std::make_shared([op, start, end, steps_value]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + + PyBoostUtils::MallocOpOutputs(device_context, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + LAUNCH_ACLNN(aclnnLinspace, device_context, op->stream_id(), start, end, steps_value, outputs[0]); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..7ede0be78c3c24b514a76361a28c505f5c1340fd --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/lin_space_ext.h @@ -0,0 +1,35 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_LIN_SPACE_EXT_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_LIN_SPACE_EXT_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr LinSpaceExtAscendCustomize(const std::shared_ptr &op, const ScalarPtr &start, + const ScalarPtr &end, const Int64ImmPtr &steps, + const std::optional &dtype); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_LIN_SPACE_EXT_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..0bb52bf8388a08dc2cf61c55dc15c07b73102d67 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +namespace { +void MaxPoolGradWithIndicesAscendCall(const std::shared_ptr &op, const device::DeviceContext *device_context, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, + const BaseTensorPtr mask, const ValueTuplePtr &kernel_size, + const std::optional &strides, const ValueTuplePtr &pads, + const ValueTuplePtr &dilation, const BoolImmPtr &ceil_mode, + const std::vector &outputs) { + std::vector strides_array; + if (strides.has_value()) { + strides_array = ConvertValueTupleToVector(strides.value()); + } + auto kernel_size_array = ConvertValueTupleToVector(kernel_size); + auto pads_array = ConvertValueTupleToVector(pads); + auto dilation_array = ConvertValueTupleToVector(dilation); + auto ceil_mode_scalar = GetValue(ceil_mode); + LAUNCH_ACLNN(aclnnMaxPool2dWithIndicesBackward, device_context, op->stream_id(), grad, x_tensor, mask, + kernel_size_array, strides_array, pads_array, dilation_array, ceil_mode_scalar, outputs[0]); +} +} // namespace + +tensor::BaseTensorPtr MaxPoolGradWithIndicesAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, + const BaseTensorPtr mask, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, + const Int64ImmPtr &argmax_type) { + OpRunner::InferOpOutput(op, x_tensor, grad, mask, kernel_size, strides, pads, dilation, ceil_mode, argmax_type); + // Create device address for input/output tensors + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor, grad, mask); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun(std::make_shared( + [op, x_tensor, grad, mask, kernel_size, strides, pads, dilation, ceil_mode]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor, grad, mask); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + MaxPoolGradWithIndicesAscendCall(op, device_context, x_tensor, grad, mask, kernel_size, strides, pads, dilation, + ceil_mode, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..6faebd71f5e82362dc7977392f011f2eb3525fba --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_indices.h @@ -0,0 +1,40 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_INDICES_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_INDICES_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr MaxPoolGradWithIndicesAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, + const BaseTensorPtr mask, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, + const Int64ImmPtr &argmax_type); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_INDICES_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.cc new file mode 100644 index 0000000000000000000000000000000000000000..7b0abd9245b3a6e6050fa75cb184f4731488bd59 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.cc @@ -0,0 +1,74 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +namespace { +void MaxPoolGradWithMaskAscendCall(const std::shared_ptr &op, const device::DeviceContext *device_context, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, const BaseTensorPtr mask, + const ValueTuplePtr &kernel_size, const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const std::vector &outputs) { + std::vector strides_array; + if (strides.has_value()) { + strides_array = ConvertValueTupleToVector(strides.value()); + } + auto kernel_size_array = ConvertValueTupleToVector(kernel_size); + auto pads_array = ConvertValueTupleToVector(pads); + auto dilation_array = ConvertValueTupleToVector(dilation); + auto ceil_mode_scalar = GetValue(ceil_mode); + LAUNCH_ACLNN(aclnnMaxPool2dWithMaskBackward, device_context, op->stream_id(), grad, x_tensor, mask, kernel_size_array, + strides_array, pads_array, dilation_array, ceil_mode_scalar, outputs[0]); +} +} // namespace + +tensor::BaseTensorPtr MaxPoolGradWithMaskAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, + const BaseTensorPtr mask, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type) { + OpRunner::InferOpOutput(op, x_tensor, grad, mask, kernel_size, strides, pads, dilation, ceil_mode, argmax_type); + // Create device address for input/output tensors + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor, grad, mask); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun(std::make_shared( + [op, x_tensor, grad, mask, kernel_size, strides, pads, dilation, ceil_mode]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor, grad, mask); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + MaxPoolGradWithMaskAscendCall(op, device_context, x_tensor, grad, mask, kernel_size, strides, pads, dilation, + ceil_mode, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.h new file mode 100644 index 0000000000000000000000000000000000000000..375fc7f42022951a2773532941358fd33b23f9c4 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_grad_with_mask.h @@ -0,0 +1,39 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_MASK_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_MASK_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr MaxPoolGradWithMaskAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const BaseTensorPtr &grad, + const BaseTensorPtr mask, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_GRAD_WITH_MASK_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..b681386ad66036e9b4bbf30b5dfbf70d998a3bee --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.cc @@ -0,0 +1,73 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +namespace { +void MaxPoolWithIndicesAscendCall(const std::shared_ptr &op, const device::DeviceContext *device_context, + const BaseTensorPtr &x_tensor, const ValueTuplePtr &kernel_size, + const std::optional &strides, const ValueTuplePtr &pads, + const ValueTuplePtr &dilation, const BoolImmPtr &ceil_mode, + const std::vector &outputs) { + std::vector strides_array; + if (strides.has_value()) { + strides_array = ConvertValueTupleToVector(strides.value()); + } + auto kernel_size_array = ConvertValueTupleToVector(kernel_size); + auto pads_array = ConvertValueTupleToVector(pads); + auto dilation_array = ConvertValueTupleToVector(dilation); + auto ceil_mode_scalar = GetValue(ceil_mode); + LAUNCH_ACLNN(aclnnMaxPool2dWithIndices, device_context, op->stream_id(), x_tensor, kernel_size_array, strides_array, + pads_array, dilation_array, ceil_mode_scalar, outputs[0], outputs[1]); +} +} // namespace + +tensor::BaseTensorPtr MaxPoolWithIndicesAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type) { + OpRunner::InferOpOutput(op, x_tensor, kernel_size, strides, pads, dilation, ceil_mode, argmax_type); + // Create device address for input/output tensors + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun( + std::make_shared([op, x_tensor, kernel_size, strides, pads, dilation, ceil_mode]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + MaxPoolWithIndicesAscendCall(op, device_context, x_tensor, kernel_size, strides, pads, dilation, ceil_mode, + outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..33f5a1f015b1a819419621df1e1286251da554cc --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_indices.h @@ -0,0 +1,38 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_INDICES_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_INDICES_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr MaxPoolWithIndicesAscendCustomize(const std::shared_ptr &op, + const BaseTensorPtr &x_tensor, const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_INDICES_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.cc new file mode 100644 index 0000000000000000000000000000000000000000..7f6ad1ff82641ba39eb9763a08075902d8b13a80 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +namespace { +void MaxPoolWithMaskAscendCall(const std::shared_ptr &op, const device::DeviceContext *device_context, + const BaseTensorPtr &x_tensor, const ValueTuplePtr &kernel_size, + const std::optional &strides, const ValueTuplePtr &pads, + const ValueTuplePtr &dilation, const BoolImmPtr &ceil_mode, + const std::vector &outputs) { + std::vector strides_array; + if (strides.has_value()) { + strides_array = ConvertValueTupleToVector(strides.value()); + } + auto kernel_size_array = ConvertValueTupleToVector(kernel_size); + auto pads_array = ConvertValueTupleToVector(pads); + auto dilation_array = ConvertValueTupleToVector(dilation); + auto ceil_mode_scalar = GetValue(ceil_mode); + LAUNCH_ACLNN(aclnnMaxPool2dWithMask, device_context, op->stream_id(), x_tensor, kernel_size_array, strides_array, + pads_array, dilation_array, ceil_mode_scalar, outputs[0], outputs[1]); +} +} // namespace + +tensor::BaseTensorPtr MaxPoolWithMaskAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type) { + OpRunner::InferOpOutput(op, x_tensor, kernel_size, strides, pads, dilation, ceil_mode, argmax_type); + // Create device address for input/output tensors + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun( + std::make_shared([op, x_tensor, kernel_size, strides, pads, dilation, ceil_mode]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, x_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + MaxPoolWithMaskAscendCall(op, device_context, x_tensor, kernel_size, strides, pads, dilation, ceil_mode, outputs); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.h new file mode 100644 index 0000000000000000000000000000000000000000..229409ec49b979a39f6b84cee2f6c340a741493f --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/max_pool_with_mask.h @@ -0,0 +1,38 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_MASK_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_MASK_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr MaxPoolWithMaskAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const ValueTuplePtr &kernel_size, + const std::optional &strides, + const ValueTuplePtr &pads, const ValueTuplePtr &dilation, + const BoolImmPtr &ceil_mode, const Int64ImmPtr &argmax_type); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_MAX_POOL_WITH_MASK_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.cc b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..becb3482b49160eb0952224351a67275f7049342 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.cc @@ -0,0 +1,61 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin/device/ascend/kernel/pyboost/customize/slice_ext.h" +#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" +#include "kernel/pyboost/pyboost_utils.h" +#include "plugin/device/ascend/kernel/pyboost/aclnn_utils.h" +#include "runtime/device/device_address_utils.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { + +tensor::BaseTensorPtr SliceExtAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, + const Int64ImmPtr &dim, const Int64ImmPtr &start, const Int64ImmPtr &end, + const Int64ImmPtr &step) { + OpRunner::InferOpOutput(op, input_tensor, dim, start, end, step); + // Create device address for input/output tensors + PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor); + PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs()); + + // Async + PyBoostUtils::DispatchRun(std::make_shared([op, input_tensor, dim, start, end, step]() { + auto device_context = op->device_context(); + const auto &outputs = op->outputs(); + // Malloc for input tensors + PyBoostUtils::MallocOpInputs(device_context, input_tensor); + // Malloc for output tensors + PyBoostUtils::MallocOpOutputs(device_context, outputs); + + auto dim_imm = GetValue(dim); + auto start_imm = GetValue(start); + auto end_imm = GetValue(end); + auto step_imm = GetValue(step); + auto length = end_imm - start_imm; + start_imm = start_imm < 0 ? start_imm + input_tensor->shape()[dim_imm] : start_imm; + end_imm = start_imm + length; + + MS_LOG(DEBUG) << op->primitive()->name() << " Call start"; + LAUNCH_ACLNN(aclnnSlice, device_context, op->stream_id(), input_tensor, dim_imm, start_imm, end_imm, step_imm, + outputs[0]); + MS_LOG(DEBUG) << op->primitive()->name() << " Launch end"; + })); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.h b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..c3d4465b650b8472182c287904901ffadea7072b --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/slice_ext.h @@ -0,0 +1,36 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SLICE_EXT_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SLICE_EXT_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr SliceExtAscendCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor, + const Int64ImmPtr &dim, const Int64ImmPtr &start, const Int64ImmPtr &end, + const Int64ImmPtr &step); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SLICE_EXT_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/add_cast_for_ge.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/add_cast_for_ge.cc index 88a22bea492f050fd8b7c0959bf6658e99785c72..e0ecf0d1a73f6eea60ac058bec4af8a71758b3f2 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/add_cast_for_ge.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/add_cast_for_ge.cc @@ -47,6 +47,7 @@ const std::unordered_map, std::vect {ops::kNameSqrt, {{{0, int_type_with_bool, kNumberTypeFloat32}}, {}}}, {ops::kNameRsqrt, {{{0, int_type_with_bool, kNumberTypeFloat32}}, {}}}, {ops::kNameErfinv, {{{0, int_type_with_bool, kNumberTypeFloat32}}, {}}}, + {ops::kNameErf, {{{0, int_type_with_bool, kNumberTypeFloat32}}, {}}}, {ops::kNameReduceAny, {{{0, {}, kNumberTypeBool}}, {}}}, {ops::kNameLogicalAnd, {{{0, {}, kNumberTypeBool}, {1, {}, kNumberTypeBool}}, {}}}, {ops::kNameLogicalOr, {{{0, {}, kNumberTypeBool}, {1, {}, kNumberTypeBool}}, {}}}, diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.cc new file mode 100644 index 0000000000000000000000000000000000000000..a694870b693ec1c9775107bef6cb6f0a3dea38e0 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.cc @@ -0,0 +1,127 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/ascend/optimizer/ge/broadcast_for_select.h" +#include +#include +#include +#include "mindspore/core/ops/array_ops.h" +#include "include/common/utils/anfalgo.h" +#include "include/backend/anf_runtime_algorithm.h" + +namespace mindspore { +namespace opt { +namespace { +ShapeVector GetSelectInputShape(const AnfNodePtr &input) { + MS_EXCEPTION_IF_NULL(input); + auto input_base_shape = input->Shape(); + MS_EXCEPTION_IF_NULL(input_base_shape); + auto input_shape = input_base_shape->cast(); + MS_EXCEPTION_IF_NULL(input_shape); + return input_shape->shape(); +} + +ShapeVector CalcBroadcastShape(AnfNodePtr cond, AnfNodePtr x, AnfNodePtr y) { + auto cond_shape = GetSelectInputShape(cond); + auto x_shape = GetSelectInputShape(x); + auto y_shape = GetSelectInputShape(y); + auto cond_size = cond_shape.size(); + auto x_size = x_shape.size(); + auto y_size = y_shape.size(); + ShapeVector broadcast_shape = + cond_size > x_size ? cond_size > y_size ? cond_shape : y_shape : x_size > y_size ? x_shape : y_shape; + auto n = broadcast_shape.size(); + for (size_t i = n; i > 0; --i) { + auto cond_i = cond_size < i ? 1 : cond_shape[cond_size - i]; + auto x_i = x_size < i ? 1 : x_shape[x_size - i]; + auto y_i = y_size < i ? 1 : y_shape[y_size - i]; + auto broadcost_i = std::max(cond_i, std::max(x_i, y_i)); + if (cond_i != broadcost_i && cond_i != 1) { + MS_EXCEPTION(ValueError) << "For select, condition input can not broadcast at index " << i; + } + if (x_i != broadcost_i && x_i != 1) { + MS_EXCEPTION(ValueError) << "For select, x input can not broadcast at index " << i; + } + if (y_i != broadcost_i && y_i != 1) { + MS_EXCEPTION(ValueError) << "For select, y input can not broadcast at index " << i; + } + broadcast_shape[n - i] = broadcost_i; + } + return broadcast_shape; +} + +CNodePtr AddBroadCastToNode(const FuncGraphPtr &func_graph, const AnfNodePtr &input_node, + const std::vector &broad_shape) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(input_node); + auto input_type = common::AnfAlgo::GetOutputInferDataType(input_node, 0); + auto shape_node = opt::CreateValueNodeWithKernelInfo(func_graph, MakeValue(broad_shape)); + + std::vector broadcastto_inputs = { + NewValueNode(std::make_shared(prim::kPrimBroadcastTo->name())), input_node, shape_node}; + CNodePtr broadcastto_node = NewCNode(broadcastto_inputs, func_graph); + MS_EXCEPTION_IF_NULL(broadcastto_node); + broadcastto_node->set_scope(input_node->scope()); + broadcastto_node->set_abstract(input_node->abstract()); + common::AnfAlgo::SetOutputInferTypeAndShape({input_type}, {broad_shape}, broadcastto_node.get()); + return broadcastto_node; +} + +CNodePtr AddSelectNode(const FuncGraphPtr &func_graph, const CNodePtr &cond_node, const CNodePtr &x_node, + const CNodePtr &y_node, const CNodePtr &select_node, const std::vector &broad_shape) { + MS_EXCEPTION_IF_NULL(func_graph); + MS_EXCEPTION_IF_NULL(cond_node); + MS_EXCEPTION_IF_NULL(x_node); + MS_EXCEPTION_IF_NULL(y_node); + MS_EXCEPTION_IF_NULL(select_node); + auto input_type = common::AnfAlgo::GetOutputInferDataType(select_node, 0); + + std::vector select_inputs = {NewValueNode(std::make_shared(prim::kPrimSelect->name())), + cond_node, x_node, y_node}; + CNodePtr out_node = NewCNode(select_inputs, func_graph); + MS_EXCEPTION_IF_NULL(out_node); + out_node->set_scope(select_node->scope()); + out_node->set_abstract(select_node->abstract()); + common::AnfAlgo::SetOutputInferTypeAndShape({input_type}, {broad_shape}, out_node.get()); + return out_node; +} +} // namespace + +const BaseRef BroadCastForSelect::DefinePattern() const { + VarPtr inputs = std::make_shared(); + return VectorRef({prim::kPrimSelect, inputs}); +} + +const AnfNodePtr BroadCastForSelect::Process(const FuncGraphPtr &graph, const AnfNodePtr &node, + const EquivPtr &) const { + // Select(...) ===> inputs -> CalcBroadcastShape -> BroadCastTo -> Select(...) + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(node); + auto select_node = node->cast(); + MS_EXCEPTION_IF_NULL(select_node); + // get broadcast shape + auto cond = select_node->input(kIndex1); + auto x = select_node->input(kIndex2); + auto y = select_node->input(kIndex3); + auto output_shape = CalcBroadcastShape(cond, x, y); + // do BroadCast + auto new_cond = AddBroadCastToNode(graph, cond, output_shape); + auto new_x = AddBroadCastToNode(graph, x, output_shape); + auto new_y = AddBroadCastToNode(graph, y, output_shape); + auto out_node = AddSelectNode(graph, new_cond, new_x, new_y, select_node, output_shape); + return out_node; +} +} // namespace opt +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.h new file mode 100644 index 0000000000000000000000000000000000000000..ef562562c7fa85136a86bf51897cfa2201439d76 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge/broadcast_for_select.h @@ -0,0 +1,37 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_OPTIMIZER_GE_BROADCAST_FOR_SELECT_H_ +#define MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_OPTIMIZER_GE_BROADCAST_FOR_SELECT_H_ + +#include +#include +#include +#include +#include "include/backend/optimizer/optimizer.h" +#include "ops/auto_generate/gen_ops_primitive.h" + +namespace mindspore { +namespace opt { +class BroadCastForSelect : public PatternProcessPass { + public: + explicit BroadCastForSelect(bool multi_graph = true) : PatternProcessPass("broadcast_for_select", multi_graph) {} + ~BroadCastForSelect() override = default; + const BaseRef DefinePattern() const override; + const AnfNodePtr Process(const FuncGraphPtr &graph, const AnfNodePtr &node, const EquivPtr &) const override; +}; +} // namespace opt +} // namespace mindspore +#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_OPTIMIZER_GE_BROADCAST_FOR_SELECT_H_ diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc index 1c9301f30e2ecd856bea4434a0a5218457ea9853..e9e6e736f369160bc977754c6295bc90695f35cf 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/ge_backend_optimization.cc @@ -62,6 +62,7 @@ #include "backend/common/pass/insert_tensor_move_for_communication.h" #include "plugin/device/ascend/optimizer/enhancer/eliminate_maketuple_getitem.h" #include "plugin/device/ascend/optimizer/ge/convert_pad_v3_paddings.h" +#include "plugin/device/ascend/optimizer/ge/broadcast_for_select.h" namespace mindspore { namespace opt { @@ -97,6 +98,7 @@ void GEBackendOptimization(const KernelGraphPtr &kernel_graph) { opt_ge_pm->AddPass(std::make_shared(true, true)); opt_ge_pm->AddPass(std::make_shared("unfold_nested_output")); opt_ge_pm->AddPass(std::make_shared("unfold_nested_maketuple")); + opt_ge_pm->AddPass(std::make_shared()); optimizer->AddPassManager(opt_ge_pm); (void)optimizer->Optimize(kernel_graph); kernel_graph->SetExecOrderByDefault(); diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.cc b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.cc index 6f62a8336c4420c57221fe2d24bbc5ffa1ba1e25..07cd6acb7ba3dd0f11b56c7dea8113e96ed4c2bb 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.cc +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.cc @@ -1,5 +1,5 @@ /** - * Copyright 2023 Huawei Technologies Co., Ltd + * Copyright 2023-2024 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,6 +35,11 @@ ValueNodePtr CreateValueNode(const FuncGraphPtr &graph, double value) { return value_node; } +bool IsFloatParameter(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + return common::AnfAlgo::GetOutputInferDataType(node, 0) == kNumberTypeFloat32; +} + AnfNodePtr CreateCastNode(const FuncGraphPtr &graph, const AnfNodePtr &input, const TypeId dst_type) { MS_EXCEPTION_IF_NULL(graph); MS_EXCEPTION_IF_NULL(input); @@ -89,10 +94,6 @@ const AnfNodePtr AdamWeightDecayUnifyMindIR::Process(const FuncGraphPtr &func_gr << input_list.size(); } - // Create New node - PrimitivePtr prim = std::make_shared(kAdamApplyOneWithDecayOpName); - std::vector new_node_inputs = {NewValueNode(prim)}; - auto num_one = CreateValueNode(func_graph, 1.0); // 1 - beta1 auto beta1_sub = CreateSubCNode(func_graph, num_one, input_list[kIndex5]); @@ -103,10 +104,25 @@ const AnfNodePtr AdamWeightDecayUnifyMindIR::Process(const FuncGraphPtr &func_gr beta2_sub->set_scope(node->scope()); input_list.push_back(beta2_sub); // Cast - auto ori_param = input_list[kIndex1]; - input_list[kIndex1] = CreateCastNode(func_graph, input_list[kIndex1], kNumberTypeFloat32); - input_list[kIndex2] = CreateCastNode(func_graph, input_list[kIndex2], kNumberTypeFloat32); - input_list[kIndex3] = CreateCastNode(func_graph, input_list[kIndex3], kNumberTypeFloat32); + bool all_fp32 = false; + if (IsFloatParameter(input_list[kIndex1]) && IsFloatParameter(input_list[kIndex2]) && + IsFloatParameter(input_list[kIndex3])) { + all_fp32 = true; + } + + // Create New node + PrimitivePtr prim = nullptr; + AnfNodePtr ori_param = nullptr; + if (!all_fp32) { + ori_param = input_list[kIndex1]; + input_list[kIndex1] = CreateCastNode(func_graph, input_list[kIndex1], kNumberTypeFloat32); + input_list[kIndex2] = CreateCastNode(func_graph, input_list[kIndex2], kNumberTypeFloat32); + input_list[kIndex3] = CreateCastNode(func_graph, input_list[kIndex3], kNumberTypeFloat32); + prim = std::make_shared(kAdamApplyOneWithDecayOpName); + } else { + prim = std::make_shared(kAdamApplyOneWithDecayAssignOpName); + } + std::vector new_node_inputs = {NewValueNode(prim)}; input_list[kIndex9] = CreateCastNode(func_graph, input_list[kIndex9], kNumberTypeFloat32); // Mapping ms index to ge index. @@ -115,6 +131,10 @@ const AnfNodePtr AdamWeightDecayUnifyMindIR::Process(const FuncGraphPtr &func_gr (void)new_node_inputs.emplace_back(cur_node); } + if (all_fp32) { + return CreateAdamApplyOneWithDecayAssign(func_graph, node, input_list, &new_node_inputs); + } + // Create New AdamApplyOneWithDecay with three outputs. return CreateAdamApplyOneWithDecay(func_graph, node, ori_param, input_list, new_node_inputs); } @@ -150,5 +170,34 @@ const AnfNodePtr AdamWeightDecayUnifyMindIR::CreateAdamApplyOneWithDecay(const F make_tuple->set_scope(node->scope()); return make_tuple; } + +const AnfNodePtr AdamWeightDecayUnifyMindIR::CreateAdamApplyOneWithDecayAssign(const FuncGraphPtr &func_graph, + const AnfNodePtr &node, + const AnfNodePtrList &input_list, + AnfNodePtrList *new_node_inputs) const { + if (input_list[kIndex11] != nullptr) { + (void)new_node_inputs->emplace_back(input_list[kIndex11]); + } + auto new_cnode = NewCNode(*new_node_inputs, func_graph); + MS_EXCEPTION_IF_NULL(new_cnode); + new_cnode->set_scope(node->scope()); + AbstractBasePtrList new_node_abstract_list; + new_node_abstract_list.push_back(input_list[kIndex3]->abstract()); + new_node_abstract_list.push_back(input_list[kIndex2]->abstract()); + new_node_abstract_list.push_back(input_list[kIndex1]->abstract()); + auto abstract_tuple = std::make_shared(new_node_abstract_list); + new_cnode->set_abstract(abstract_tuple); + std::vector new_cnode_outputs; + CreateMultipleOutputsOfAnfNode(func_graph, new_cnode, kAdamApplyOneOutputNum, &new_cnode_outputs); + if (new_cnode_outputs.size() != kAdamApplyOneOutputNum) { + MS_LOG(INTERNAL_EXCEPTION) << "The output size of node " << new_cnode->DebugString() << " should be " + << kAdamApplyOneOutputNum << trace::DumpSourceLines(node); + } + auto make_tuple = CreateMakeTupleNode( + func_graph, + std::vector{new_cnode_outputs[kIndex2], new_cnode_outputs[kIndex1], new_cnode_outputs[kIndex0]}); + make_tuple->set_scope(node->scope()); + return make_tuple; +} } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.h b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.h index c5bec5ee7c726a7500161895c8e30bfd6a597d9d..14e0eeb568205100ab35ab4bc645fb85705d7290 100644 --- a/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.h +++ b/mindspore/ccsrc/plugin/device/ascend/optimizer/mindir/adam_weight_decay_unify_mindir.h @@ -39,6 +39,9 @@ class AdamWeightDecayUnifyMindIR : public PatternProcessPass { const AnfNodePtr CreateAdamApplyOneWithDecay(const FuncGraphPtr &func_graph, const AnfNodePtr &node, const AnfNodePtr &ori_param, const AnfNodePtrList &input_list, const AnfNodePtrList &new_node_inputs) const; + const AnfNodePtr CreateAdamApplyOneWithDecayAssign(const FuncGraphPtr &func_graph, const AnfNodePtr &node, + const AnfNodePtrList &input_list, + AnfNodePtrList *new_node_inputs) const; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_cpu_kernel.h index 822fdd89c9b5c1c97c2d7685ac106e4e2fd71cc6..735fa542ba0da2444694dbae16e4e1ad1fccd101 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/maximum_cpu_kernel.h @@ -62,6 +62,12 @@ class MaximumCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper< const T *input_y, T *output); template T MaximumFunc(const T &lhs, const T &rhs) const { + if (std::isnan(static_cast(lhs))) { + return lhs; + } + if (std::isnan(static_cast(rhs))) { + return rhs; + } return lhs > rhs ? lhs : rhs; } template diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h index bef35e49abf5f9839fa021cf912968a5df94e0e3..4d242c8ad473cb53844ddd58300b7ffeba9b6da8 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/minimum_cpu_kernel.h @@ -59,6 +59,12 @@ class MinimumCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper< const int64_t d6, const T *input_x, const T *input_y, T *output); template T MinimumFunc(const T &lhs, const T &rhs) const { + if (std::isnan(static_cast(lhs))) { + return lhs; + } + if (std::isnan(static_cast(rhs))) { + return rhs; + } return lhs < rhs ? lhs : rhs; } template diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/contiguous.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/contiguous.cc index 04748558a82f4ca53e805159e66ca49ff80699de..3bb269e413a46250f17c06efe8bbd1ca6152bf7c 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/contiguous.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/contiguous.cc @@ -28,7 +28,7 @@ tensor::BaseTensorPtr ContiguousCPUCustomize(const std::shared_ptr &op return output_tensor; } - return CopyCustomizeCall(op, input_tensor, nullptr); + return CopyCustomizeCall(op, input_tensor); } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/copy.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/copy.cc index 70903d9bef87eb1a8973aa3ad416fb3998400804..75bd99df3afce63816842dec90bbf01b18e2a5b0 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/copy.cc +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/copy.cc @@ -23,7 +23,7 @@ namespace pyboost { tensor::BaseTensorPtr CopyCPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor) { MS_LOG(DEBUG) << "Call start"; - return CopyCustomizeCall(op, input_tensor, nullptr); + return CopyCustomizeCall(op, input_tensor); } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..784b9f804315fa4fea88bf8e8ee903081ae1fdcc --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/cpu/kernel/pyboost/customize/divmod.h" +#include +#include +#include "mindspore/ccsrc/kernel/pyboost/customize/divmod.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModCPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode) { + DivModCustomize(op, x_tensor, y_tensor, rounding_mode, nullptr); + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.h new file mode 100644 index 0000000000000000000000000000000000000000..2075acfb4fcc3f66edbedcc495ec80c7c35127bb --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/divmod.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModCPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu.h index 76f73c67f3965a87828360de1140805fc0f3ed9e..a2db0ceb295cda2905917078fb71b9d75a37f2c4 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ -#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ #include #include @@ -31,4 +31,4 @@ void SiLUCPUCustomize(const std::shared_ptr &op, const BaseTensorPtr & } // namespace pyboost } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_H_ diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu_grad.h b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu_grad.h index d8fd73d6bfd25ef156fccad75871e2a9966bf255..331a0a670eaadefb4c3e99e642aa91a8bbb0a75a 100644 --- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu_grad.h +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/customize/silu_grad.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ -#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ #include #include @@ -32,4 +32,4 @@ void SiLUGradCPUCustomize(const std::shared_ptr &op, const BaseTensorP } // namespace pyboost } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_PYBOOST_CUSTOMIZE_SILU_GRAD_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc index 6c3a2e8d307f2c9db0aef87731edf67eb2e499f7..63e990a526a88fcb24459416ff44d12b0b3a10ab 100644 --- a/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc +++ b/mindspore/ccsrc/plugin/device/gpu/hal/hardware/gpu_device_context.cc @@ -70,6 +70,7 @@ #include "plugin/device/gpu/hal/device/gpu_device_synchronizer.h" #include "include/common/profiler.h" #include "ops/ascend_op_name.h" +#include "runtime/device/device_address_utils.h" #include "runtime/pipeline/task/kernel_task.h" namespace mindspore { @@ -1005,11 +1006,9 @@ bool GPUKernelExecutor::ExecuteKernelTask(const runtime::KernelTaskType &task_ty auto task = GetTaskByTaskType(task_type, task_context); MS_EXCEPTION_IF_NULL(task); - // 需要补充PROFILER_END - // PROFILER_END(start_time, runtime::ProfilerModule::kKernel, runtime::ProfilerEvent::kKernelLaunch, - // kernel->fullname_with_scope(), false); + uint64_t start_time = 0; + PROFILER_START(start_time); auto lock = LockLaunchKernel(stream); - auto ret = task->RunWithRet(); if (!ret) { MS_LOG(EXCEPTION) << "Exec task failed, task_type:" << task_type; @@ -1023,6 +1022,11 @@ bool GPUKernelExecutor::ExecuteKernelTask(const runtime::KernelTaskType &task_ty return false; } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress("Contiguous", device_context_, stream_id, input_addr_list, + output_addr_list); + PROFILER_END(start_time, runtime::ProfilerModule::kKernel, runtime::ProfilerEvent::kKernelLaunch, "Contiguous", + false); + return true; } diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/contiguous.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/contiguous.cc index de5807fe83087c0545862d2f3948e4c440104911..7bf0e09f815d1f8c3c82bc7baf095743bb9dd3bb 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/contiguous.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/contiguous.cc @@ -28,8 +28,7 @@ tensor::BaseTensorPtr ContiguousGPUCustomize(const std::shared_ptr &op return output_tensor; } - auto stream = op->device_context()->device_res_manager_->GetStream(op->stream_id()); - return CopyCustomizeCall(op, input_tensor, stream); + return CopyCustomizeCall(op, input_tensor); } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/copy.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/copy.cc index bbe00b375a54e0c050099758222cd419c53ba11a..0321ae685bb4f87e79067a5780049b3622004e43 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/copy.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/copy.cc @@ -23,8 +23,7 @@ namespace pyboost { tensor::BaseTensorPtr CopyGPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &input_tensor) { MS_LOG(DEBUG) << "Call start"; - auto stream = op->device_context()->device_res_manager_->GetStream(op->stream_id()); - return CopyCustomizeCall(op, input_tensor, stream); + return CopyCustomizeCall(op, input_tensor); } } // namespace pyboost } // namespace kernel diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..dc477c09afd8d083efaaf8170c39dadd0e9696f8 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.cc @@ -0,0 +1,38 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "plugin/device/gpu/kernel/pyboost/customize/divmod.h" +#include +#include +#include "mindspore/ccsrc/kernel/pyboost/customize/divmod.h" +#include "mindspore/ccsrc/plugin/device/gpu/hal/device/gpu_device_manager.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModGPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode) { + auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); + DivModCustomize(op, x_tensor, y_tensor, rounding_mode, stream); + static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); + if (sync && !op->device_context()->device_res_manager_->SyncAllStreams()) { + MS_LOG(EXCEPTION) << "SyncStream failed for op DivMod."; + } + return op->output(0); +} +} // namespace pyboost +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.h b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.h new file mode 100644 index 0000000000000000000000000000000000000000..18a66288ba4f98aeca89ab23b8752577c1a01774 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/divmod.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ +#define MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ + +#include +#include +#include "ir/tensor.h" +#include "ir/value.h" +#include "runtime/hardware/device_context_manager.h" +#include "kernel/pyboost/op_runner.h" + +namespace mindspore { +namespace kernel { +namespace pyboost { +tensor::BaseTensorPtr DivModGPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor, + const BaseTensorPtr &y_tensor, + const std::optional &rounding_mode); +} // namespace pyboost +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_PYBOOST_CUSTOMIZE_DIVMOD_H_ diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc index eb69677dd94c01d8e0e01ca151348926f6548cc4..18482912b80c502449fe98c78752da56a0623506 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/identity.cc @@ -25,8 +25,7 @@ namespace kernel { namespace pyboost { tensor::BaseTensorPtr IdentityGPUCustomize(const std::shared_ptr &op, const BaseTensorPtr &x_tensor) { MS_LOG(DEBUG) << "Identity call start"; - auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); - IdentityCustomize(op, x_tensor, stream); + IdentityCustomize(op, x_tensor); static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); if (sync && !op->device_context()->device_res_manager_->SyncAllStreams()) { MS_LOG(EXCEPTION) << "SyncStream failed for op Identity."; diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/max_min.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/max_min.cc index f7cb7631fcde882c100a6a7d8c7ecd3f8b752989..d607817998383f45a7c4d18329b4c4ac3abe8054 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/max_min.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/max_min.cc @@ -49,8 +49,7 @@ void MinOrMaxGPUCall(const std::shared_ptr &op, const BaseTensorPtr &i PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, input_tensor, axis, keep_dims); const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); - auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); - PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, op->stream_id()); static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); if (sync && !device_context->device_res_manager_->SyncAllStreams()) { diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc index c20484b30427fb4f4bd5baa27310837fc6d7b3d7..e2f7636b8ef5b9bdacec38402d23bf844bdde79b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/mean_ext.cc @@ -45,9 +45,8 @@ void MeanExtGPUCall(const std::shared_ptr &op, const BaseTensorPtr &in PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), input_abs, input_tensor, axis, keep_dims); const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); - auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); - PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, op->stream_id()); static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); if (sync && !device_context->device_res_manager_->SyncAllStreams()) { MS_LOG(EXCEPTION) << "SyncStream failed for op " << primitive->name(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc index 5e7953031b54b365a80d970780748b3a04a65ac7..3c17d3576227361ce36879c016463913d2dd07b3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/customize/sum_ext.cc @@ -47,9 +47,8 @@ void SumExtGPUCall(const std::shared_ptr &op, const BaseTensorPtr &inp input_tensor, axis, keep_dims, skip_mode); const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); - auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); - PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(primitive, device_context, input_address_info, output_address_info, op->stream_id()); static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); if (sync && !device_context->device_res_manager_->SyncAllStreams()) { MS_LOG(EXCEPTION) << "SyncStream failed for op " << primitive->name(); diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl index 65b569b732916bef1c218fb757642f73e09ecd70..e0f089876640bb156339c876d769bc392e65827b 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl @@ -29,8 +29,7 @@ std::make_shared([this, op, ${call_args_with_tensor} const auto &output_address_info = PyBoostUtils::GetAddressInfo(device_context, op->stream_id(), {op->output_abs()}, outputs); // Launch kernel - auto stream = device::gpu::GPUDeviceManager::GetInstance().GetStream(op->stream_id()); - PyBoostUtils::LaunchKernel(primitive(), op->device_context(), input_address_info, output_address_info, stream); + PyBoostUtils::LaunchKernel(primitive(), op->device_context(), input_address_info, output_address_info, op->stream_id()); // Data sync static auto sync = MsContext::GetInstance()->get_param(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE); diff --git a/mindspore/ccsrc/pybind_api/hal/OWNERS b/mindspore/ccsrc/pybind_api/hal/OWNERS new file mode 100644 index 0000000000000000000000000000000000000000..a6d834a816be6962592141ef09dd2442a99f3094 --- /dev/null +++ b/mindspore/ccsrc/pybind_api/hal/OWNERS @@ -0,0 +1,21 @@ +approvers: +- zh_qh # +- ginfung +- chenfei52 +- chujinjin +- hwcaifubi +- zjun3021 + +reviewers: +- lanzhineng +- lianliguang +- Margaret_wangrui +- irmo +- huangbingjian +- liangzhibo +- Liangcan-Li +- ligan15 +- wangch1009 + +options: + no_parent_owners: true diff --git a/mindspore/ccsrc/pybind_api/hal/event_py.cc b/mindspore/ccsrc/pybind_api/hal/event_py.cc index 1468fb73b768f3aadc1ca12af27d5a3344f27a6d..d0a6a1f8ab37905a750706409ac0f5eb7ab3d4b9 100644 --- a/mindspore/ccsrc/pybind_api/hal/event_py.cc +++ b/mindspore/ccsrc/pybind_api/hal/event_py.cc @@ -22,6 +22,7 @@ #include "include/common/pybind_api/api_register.h" #include "pipeline/pynative/forward/forward_task.h" #include "pipeline/pynative/pynative_utils.h" +#include "runtime/device/multi_stream_controller.h" namespace mindspore { namespace hal { @@ -66,12 +67,16 @@ void EventPy::DispatchRecordEventTask(const StreamPyPtr &stream) { EventCnt::IncreaseUnrecordedCnt(event_); // Record event async. - pynative::DispatchOp(std::make_shared([stream, event = event_]() { - auto record_fn = [stream, event]() { + pynative::DispatchOp(std::make_shared([this, stream, event = event_]() { + auto record_fn = [this, stream, event]() { + device::MultiStreamController::GetInstance()->Refresh(stream->device_ctx()); + task_id_on_stream_ = + device::MultiStreamController::GetInstance()->LaunchTaskIdOnStream(stream->device_ctx(), record_stream_id_); auto stream_ptr = stream->stream(); - MS_LOG(DEBUG) << "RecordEvent stream_ptr:" << stream_ptr << ", event:" << event; event->set_record_stream(stream_ptr); event->RecordEvent(); + MS_LOG(DEBUG) << "RecordEvent record_stream_id:" << record_stream_id_ << ", event:" << event << ", stream_ptr" + << stream_ptr << ", task_id_on_stream:" << task_id_on_stream_; EventCnt::DecreaseUnrecordedCnt(event); }; if (!runtime::OpExecutor::NeedSync()) { @@ -87,8 +92,10 @@ void EventPy::Record(const StreamPyPtr &stream) { MS_EXCEPTION_IF_NULL(stream); if (!is_created_) { CreateEvent(stream); + device_ctx_ = stream->device_ctx(); } if (event_ != nullptr) { + record_stream_id_ = stream->stream_id(); // event_ is nullptr in cpu DispatchRecordEventTask(stream); } @@ -96,12 +103,16 @@ void EventPy::Record(const StreamPyPtr &stream) { void EventPy::DispatchWaitEventTask(const StreamPyPtr &stream) { // Wait event async. - pynative::DispatchOp(std::make_shared([stream, event = event_]() { - auto wait_fn = [stream, event]() { + pynative::DispatchOp(std::make_shared([this, stream, event = event_]() { + auto wait_fn = [this, stream, event]() { auto stream_ptr = stream->stream(); MS_LOG(DEBUG) << "WaitEvent stream_ptr:" << stream_ptr << ", event:" << event; event->set_wait_stream(stream_ptr); event->WaitEventWithoutReset(); + + // Release cross stream memory event, mark record_stream_id is use stream id, wait stream id is memory stream id. + (void)device::MultiStreamController::GetInstance()->WaitEvent(stream->device_ctx(), task_id_on_stream_, + record_stream_id_, stream->stream_id()); }; if (!runtime::OpExecutor::NeedSync()) { runtime::OpExecutor::GetInstance().PushSimpleOpRunTask(std::make_shared(wait_fn)); @@ -140,6 +151,9 @@ void EventPy::Synchronize() { runtime::OpExecutor::GetInstance().WaitAll(); event_->SyncEvent(); + MS_EXCEPTION_IF_NULL(device_ctx_); + // Clear cross stream memory event which task id less than task_id_on_stream. + (void)device::MultiStreamController::GetInstance()->WaitEvent(device_ctx_, task_id_on_stream_, record_stream_id_); } float EventPy::ElapsedTime(const EventPyPtr &other_event) { diff --git a/mindspore/ccsrc/pybind_api/hal/event_py.h b/mindspore/ccsrc/pybind_api/hal/event_py.h index f086304a69adb202d475903af0a6d4747dc33e31..573c13515b7a51ce2231a0ebfd44ee69be1532e0 100644 --- a/mindspore/ccsrc/pybind_api/hal/event_py.h +++ b/mindspore/ccsrc/pybind_api/hal/event_py.h @@ -71,6 +71,9 @@ class EventPy { std::shared_ptr event_{nullptr}; // The stream object that helps create event_. We can use this to access device_res_manager_; StreamPyPtr creator_stream_{nullptr}; + int64_t task_id_on_stream_{0}; + size_t record_stream_id_{0}; + device::DeviceContext *device_ctx_; }; class EventCnt { diff --git a/mindspore/ccsrc/pybind_api/hal/stream_py.cc b/mindspore/ccsrc/pybind_api/hal/stream_py.cc index 1355a71310191cf4efe67002588bc1f0c8c60ff1..067053169b793b883a703c1ad11e26d4321a1929 100644 --- a/mindspore/ccsrc/pybind_api/hal/stream_py.cc +++ b/mindspore/ccsrc/pybind_api/hal/stream_py.cc @@ -19,6 +19,7 @@ #include "runtime/hardware/device_context_manager.h" #include "utils/ms_context.h" #include "include/common/pybind_api/api_register.h" +#include "runtime/device/multi_stream_controller.h" namespace mindspore { namespace hal { @@ -61,7 +62,8 @@ bool StreamPy::Query() { void StreamPy::Synchronize() { MS_LOG(DEBUG) << "stream_id:" << stream_id_; runtime::OpExecutor::GetInstance().WaitAll(); - device_ctx_->device_res_manager_->SyncStream(stream_id_); + device::MultiStreamController::GetInstance()->Refresh(device_ctx_); + (void)device::MultiStreamController::GetInstance()->SyncStream(device_ctx_, stream_id_); } std::string StreamPy::ToStringRepr() const { @@ -90,21 +92,20 @@ bool StreamPy::StreamEqual(const std::shared_ptr other_stream) { void SetCurStream(const StreamPyPtr &cur_stream) { MS_EXCEPTION_IF_NULL(cur_stream); - runtime::OpExecutor::GetInstance().WaitAll(); + MS_LOG(DEBUG) << "current_stream_id:" << cur_stream->stream_id(); cur_stream->device_ctx()->device_res_manager_->SetCurrentStreamId(cur_stream->stream_id()); } void Synchronize() { auto device_ctx = GetDeviceCtx(); runtime::OpExecutor::GetInstance().WaitAll(); - MS_EXCEPTION_IF_NULL(device_ctx); - device_ctx->device_res_manager_->SyncAllStreams(); + device::MultiStreamController::GetInstance()->Refresh(device_ctx); + (void)device::MultiStreamController::GetInstance()->SyncAllStreams(device_ctx); } StreamPyPtr CurrentStream() { auto device_ctx = GetDeviceCtx(); - runtime::OpExecutor::GetInstance().WaitAll(); - const auto ¤t_stream_id = device_ctx->device_res_manager_->GetCurrentStreamId(); + auto current_stream_id = device_ctx->device_res_manager_->GetCurrentStreamId(); MS_LOG(DEBUG) << "current_stream_id:" << current_stream_id; return std::make_shared(device_ctx, current_stream_id); } diff --git a/mindspore/ccsrc/runtime/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt index 991f1247bceca1bd96a1e918a190524e0fa55d11..3be222f6188303e1fdff9c451b51b1dcfe842925 100644 --- a/mindspore/ccsrc/runtime/device/CMakeLists.txt +++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt @@ -2,8 +2,9 @@ file(GLOB_RECURSE DEVICE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "common/* "kernel_info.cc" "executor/dynamic_kernel.cc" "executor/executor_callback.cc" "kernel_runtime.cc" "memory_manager.cc" "kernel_runtime_manager.cc" "convert_tensor_utils.cc" "memory_scheduler.cc" "memory_offload_strategy.cc" "launch_kernel.cc" "launch_mul.cc" "tensor_array.cc" - "ms_device_shape_transfer.cc" "context_extends.cc" "stream_synchronizer.cc" "tensors_queue.cc" "auto_mem_offload.cc" - "common_somas_allocator.cc" "device_address_utils.cc" "loadable_device_address.cc" + "ms_device_shape_transfer.cc" "context_extends.cc" "multi_stream_controller.cc" "stream_synchronizer.cc" + "tensors_queue.cc" "auto_mem_offload.cc" "common_somas_allocator.cc" "device_address_utils.cc" + "loadable_device_address.cc" ) list(REMOVE_ITEM DEVICE_SRC_LIST "gsm/aio_plugin.cc") diff --git a/mindspore/ccsrc/runtime/device/device_address_utils.cc b/mindspore/ccsrc/runtime/device/device_address_utils.cc index cf4b246e8f605d804cf21f3d8fb85f9dd4080d49..d0165a626f86abf705d88d1759d38f8df90d10e4 100644 --- a/mindspore/ccsrc/runtime/device/device_address_utils.cc +++ b/mindspore/ccsrc/runtime/device/device_address_utils.cc @@ -913,9 +913,18 @@ void DeviceAddressUtils::MallocForInput(const DeviceContext *device_context, con MS_LOG(EXCEPTION) << "Allocate memory failed"; } auto tensor_size = LongToSize(tensor->data().nbytes()); - if (!device_address->SyncHostToDevice(tensor->shape(), tensor_size, tensor->data_type(), device_address->format(), - tensor->data_ptr())) { - MS_LOG(EXCEPTION) << "SyncHostToDevice failed"; + if (device_address->GetDeviceType() == device::DeviceType::kAscend) { + OpExecutor::DispatchLaunchTask([=]() { + if (!device_address->SyncHostToDevice(tensor->shape(), tensor_size, tensor->data_type(), device_address->format(), + tensor->data_ptr())) { + MS_LOG(EXCEPTION) << "SyncHostToDevice failed"; + } + }); + } else { + if (!device_address->SyncHostToDevice(tensor->shape(), tensor_size, tensor->data_type(), device_address->format(), + tensor->data_ptr())) { + MS_LOG(EXCEPTION) << "SyncHostToDevice failed"; + } } } @@ -1234,5 +1243,38 @@ device::DeviceAddressPtr DeviceAddressUtils::ConvertContiguousDeviceAddress( return new_device_address; } +void DeviceAddressUtils::GetCrossStreamAddressInfoFromInput( + size_t op_stream_id, std::vector> *cross_stream_addresses, + const tensor::TensorPtr &tensor) { + MS_EXCEPTION_IF_NULL(tensor); + if (tensor->device_address() == nullptr) { + return; + } + + auto device_address = std::dynamic_pointer_cast(tensor->device_address()); + MS_EXCEPTION_IF_NULL(device_address); + if (op_stream_id != device_address->stream_id()) { + // Device address is cross stream. + (void)cross_stream_addresses->emplace_back(device_address->stream_id(), device_address->GetMutablePtr()); + } +} + +void DeviceAddressUtils::GetCrossStreamAddressInfoFromInput( + size_t op_stream_id, std::vector> *cross_stream_addresses, + const mindspore::kernel::KernelTensor *tensor) { + MS_EXCEPTION_IF_NULL(tensor); + if (op_stream_id != tensor->stream_id()) { + (void)cross_stream_addresses->emplace_back(tensor->stream_id(), tensor->device_ptr()); + } +} + +void DeviceAddressUtils::GetCrossStreamAddressInfoFromInput( + size_t op_stream_id, std::vector> *cross_stream_addresses, + const device::DeviceAddressPtr &device_address) { + MS_EXCEPTION_IF_NULL(device_address); + if (op_stream_id != device_address->stream_id()) { + (void)cross_stream_addresses->emplace_back(device_address->stream_id(), device_address->GetMutablePtr()); + } +} } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/device_address_utils.h b/mindspore/ccsrc/runtime/device/device_address_utils.h index bf48824e87bce924687eaa23ef074bd886cfae6d..3d12eeefaa21ee6801c0047298c8d5d19f2bceb1 100644 --- a/mindspore/ccsrc/runtime/device/device_address_utils.h +++ b/mindspore/ccsrc/runtime/device/device_address_utils.h @@ -20,9 +20,17 @@ #include #include #include +#include #include "runtime/hardware/device_context.h" #include "runtime/pynative/op_compiler.h" +#include "runtime/device/multi_stream_controller.h" #include "kernel/kernel.h" +#include "mindapi/base/type_traits.h" + +template +struct is_optional : public std::false_type {}; +template +struct is_optional> : public std::true_type {}; namespace mindspore { using device::DeviceContext; @@ -136,6 +144,25 @@ class BACKEND_EXPORT DeviceAddressUtils { const device::DeviceAddressPtr &old_device_address, bool is_sync); + template + static void ProcessCrossStreamAddress(const std::string &op_name, const DeviceContext *device_context, + size_t op_stream_id, const T &... args) { + // memory_stream_addresses pair : memory_stream_id, address. + std::vector> cross_stream_addresses; + (GetCrossStreamAddressInfo(op_stream_id, &cross_stream_addresses, args), ...); + if (cross_stream_addresses.empty()) { + return; + } + + device::MultiStreamController::GetInstance()->Refresh(device_context); + auto task_id_on_stream = + device::MultiStreamController::GetInstance()->LaunchTaskIdOnStream(device_context, op_stream_id); + MS_LOG(DEBUG) << "Launch stream_id:" << op_stream_id << ", task id:" << task_id_on_stream << ", op_name:" << op_name + << ", cross_stream_addresses size:" << cross_stream_addresses.size(); + device::MultiStreamController::GetInstance()->RecordEvent(device_context, task_id_on_stream, op_stream_id, + cross_stream_addresses); + } + private: static void UpdateKernelTensorHostInfoByNode(const kernel::KernelTensorPtr &kernel_tensor, const AnfNodePtr &node, size_t output_idx); @@ -144,6 +171,50 @@ class BACKEND_EXPORT DeviceAddressUtils { // is consistent with device type, for example, device address type // DeviceType::kGPU should be used on GPU device static bool NodeDeviceAddressExist(const DeviceContext *device_context, const AnfNodePtr &node, size_t index); + + static void GetCrossStreamAddressInfoFromInput(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const tensor::TensorPtr &tensor); + + static void GetCrossStreamAddressInfoFromInput(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const mindspore::kernel::KernelTensor *tensor); + + static void GetCrossStreamAddressInfoFromInput(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const device::DeviceAddressPtr &device_address); + + template + static void GetCrossStreamAddressInfo(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const std::optional &opt) { + if (opt.has_value()) { + return GetCrossStreamAddressInfo(op_stream_id, cross_stream_addresses, opt.value()); + } + } + + template + static void GetCrossStreamAddressInfo(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const std::vector &inputs) { + if constexpr (!std::is_same_v && !std::is_same_v && + !std::is_same_v) { + return; + } + for_each(inputs.begin(), inputs.end(), [op_stream_id, cross_stream_addresses](auto item) { + GetCrossStreamAddressInfo(op_stream_id, cross_stream_addresses, item); + }); + } + + template ::value && !is_optional::value, T>> + static void GetCrossStreamAddressInfo(size_t op_stream_id, + std::vector> *cross_stream_addresses, + const T &input) { + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + GetCrossStreamAddressInfoFromInput(op_stream_id, cross_stream_addresses, input); + } + } }; } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/multi_stream_controller.cc b/mindspore/ccsrc/runtime/device/multi_stream_controller.cc new file mode 100644 index 0000000000000000000000000000000000000000..36851d029eb6615d970799dd82bd989acc5494e5 --- /dev/null +++ b/mindspore/ccsrc/runtime/device/multi_stream_controller.cc @@ -0,0 +1,264 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "runtime/device/multi_stream_controller.h" + +#include + +namespace mindspore { +namespace device { +MultiStreamControllerPtr &MultiStreamController::GetInstance() { + static std::once_flag init_flag = {}; + static MultiStreamControllerPtr multi_stream_controller = nullptr; + std::call_once(init_flag, [&]() { + if (multi_stream_controller == nullptr) { + MS_LOG(INFO) << "Create MultiStreamController."; + multi_stream_controller = std::make_shared(); + } + }); + + return multi_stream_controller; +} + +void MultiStreamController::Refresh(const DeviceContext *device_context) { + auto stream_size = device_context->device_res_manager_->QueryStreamSize(); + MS_LOG(INFO) << "Stream manager initialize, device_context : " << device_context << ", stream_size : " << stream_size + << "."; + if (stream_size == 0) { + // CPU has no concept of stream, stream size must be zero. + MS_LOG(INFO) << "Stream size is 0, will initialize with 2 streams."; + stream_size = 2; + } + task_id_on_stream_manager_[device_context].Resize(stream_size); + if (event_pools_.count(device_context) == 0) { + (void)event_pools_.emplace(device_context, std::make_shared([device_context]() { + // Event in pool need to do synchronization between streams, need to enable blocking. + return device_context->device_res_manager_->CreateRuntimeEvent(true, false); + })); + } +} + +bool MultiStreamController::UpdateTaskIdOnStream(const DeviceContext *device_context, int64_t task_id_on_stream, + uint32_t user_stream_id, uint32_t memory_stream_id) { + return task_id_on_stream_manager_[device_context].Update(task_id_on_stream, user_stream_id, memory_stream_id); +} + +int64_t MultiStreamController::QueryTaskIdOnStream(const DeviceContext *device_context, uint32_t user_stream_id, + uint32_t memory_stream_id) { + return task_id_on_stream_manager_[device_context].Query(user_stream_id, memory_stream_id); +} + +int64_t MultiStreamController::LaunchTaskIdOnStream(const DeviceContext *device_context, uint32_t stream_id) { + return task_id_on_stream_manager_[device_context].Launch(stream_id); +} + +int64_t MultiStreamController::GetTaskIdOnStream(const DeviceContext *device_context, uint32_t stream_id) { + return task_id_on_stream_manager_[device_context].Get(stream_id); +} + +std::mutex &MultiStreamController::GetStreamMutex(const DeviceContext *device_context, size_t stream_id) { + return stream_mutexes_[device_context][stream_id]; +} + +bool MultiStreamController::RecordEvent(const DeviceContext *device_context, int64_t task_id_on_stream, + uint32_t user_stream_id, + const std::vector> &memory_stream_addresses) { + auto mem_manager = device_context->device_res_manager_->mem_manager(); + if (mem_manager == nullptr) { + MS_LOG(WARNING) << "mem_manager_ is nullptr."; + return false; + } + + auto event = device_context->device_res_manager_->CreateRuntimeEvent(false, true); + if (event == nullptr) { + return true; + } + event->RecordEvent(user_stream_id); + // Record event on mem buf. + return mem_manager->RecordEvent(task_id_on_stream, user_stream_id, memory_stream_addresses, event); +} + +bool MultiStreamController::WaitEvent(const DeviceContext *device_context, int64_t task_id_on_stream, + uint32_t user_stream_id, uint32_t memory_stream_id) { + auto mem_manager = device_context->device_res_manager_->mem_manager(); + if (mem_manager == nullptr) { + MS_LOG(WARNING) << "mem_manager_ is nullptr."; + return false; + } + // If update task id on stream failed, means task id on stream is elder one, no need to wait event on mem manager. + if (!UpdateTaskIdOnStream(device_context, task_id_on_stream, user_stream_id, memory_stream_id)) { + return false; + } + return mem_manager->WaitEvent(task_id_on_stream, user_stream_id, memory_stream_id); +} + +bool MultiStreamController::WaitEvent(const DeviceContext *device_context, int64_t task_id_on_stream, + uint32_t user_stream_id) { + auto mem_manager = device_context->device_res_manager_->mem_manager(); + if (mem_manager == nullptr) { + MS_LOG(WARNING) << "mem_manager_ is nullptr."; + return false; + } + + return mem_manager->WaitEvent(task_id_on_stream, user_stream_id); +} + +bool MultiStreamController::DispatchRecordWaitEvent(const DeviceContext *device_context, uint32_t user_stream_id, + uint32_t memory_stream_id) { + if (event_pools_.count(device_context) == 0) { + MS_LOG(INTERNAL_EXCEPTION) << "device context has not initialized."; + } + auto &event_pool = event_pools_[device_context]; + auto event = event_pool->Get(); + // Note : record event on memory stream id and wait event on user stream id to make sure memory is safe. + event->RecordEvent(memory_stream_id); + event->WaitEvent(user_stream_id); + return true; +} + +bool MultiStreamController::SyncStream(const DeviceContext *device_context, size_t stream_id) { + auto &device_res_manager = device_context->device_res_manager_; + bool ret = device_res_manager->SyncStream(stream_id); + auto mem_manager = device_res_manager->mem_manager(); + if (mem_manager != nullptr) { + auto task_id_on_stream = GetTaskIdOnStream(device_context, stream_id); + mem_manager->WaitEvent(task_id_on_stream, stream_id); + } + return ret; +} + +bool MultiStreamController::SyncAllStreams(const DeviceContext *device_context) { + auto &device_res_manager = device_context->device_res_manager_; + bool ret = device_res_manager->SyncAllStreams(); + auto mem_manager = device_res_manager->mem_manager(); + if (mem_manager != nullptr) { + mem_manager->WaitAllEvents(); + } + return ret; +} + +bool MultiStreamController::SyncNotDefaultStreams(const DeviceContext *device_context) { + auto &device_res_manager = device_context->device_res_manager_; + bool ret = device_res_manager->SyncNotDefaultStreams(); + auto mem_manager = device_res_manager->mem_manager(); + if (mem_manager != nullptr) { + auto stream_ids = device_res_manager->GetStreamIds(); + for (auto stream_id : stream_ids) { + auto task_id_on_stream = GetTaskIdOnStream(device_context, stream_id); + mem_manager->WaitEvent(task_id_on_stream, stream_id); + } + } + return ret; +} + +void TaskIdOnStreamManager::Resize(uint32_t stream_size) { + std::lock_guard lock(mutex_); + MS_LOG(INFO) << "Task id on stream manager initialize : " << initialized_ << ", stream_size : " << stream_size << "."; + if (initialized_ && stream_size <= initialize_size_) { + MS_LOG(INFO) << "Task id on stream manager has already initialized, current size : " << initialize_size_ << "."; + return; + } + uint32_t min_stream_size = 2; + initialize_size_ = std::max(stream_size, min_stream_size); + generator_.resize(initialize_size_); + status_.resize(initialize_size_); + for (auto &vec : status_) { + vec.resize(initialize_size_); + } + initialized_ = true; +} + +int64_t TaskIdOnStreamManager::Query(uint32_t user_stream_id, uint32_t memory_stream_id) { + std::lock_guard lock(mutex_); + return status_[user_stream_id][memory_stream_id]; +} + +bool TaskIdOnStreamManager::Update(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id) { + std::lock_guard lock(mutex_); + if (status_[user_stream_id][memory_stream_id] >= task_id_on_stream) { + return false; + } + status_[user_stream_id][memory_stream_id] = task_id_on_stream; + return true; +} + +int64_t TaskIdOnStreamManager::Launch(uint32_t stream_id) { return ++generator_[stream_id].value_; } + +int64_t TaskIdOnStreamManager::Get(uint32_t stream_id) { return generator_[stream_id].value_; } + +EventPool::EventPool(std::function event_creator) : event_creator_(std::move(event_creator)) {} + +EventPool::~EventPool() { + std::lock_guard lock(mutex_); + expired_ = true; + events_.clear(); + cached_events_.clear(); +} + +DeviceEventPtr EventPool::Get() { + MS_LOG(DEBUG) << "Event pool get start."; + std::lock_guard lock(mutex_); + DeviceEvent *event = nullptr; + // Try to create event firstly before reached core size. + if (size_ < core_size_) { + auto created_event = event_creator_(); + if (created_event->IsReady()) { + cached_events_.push_back(created_event); + size_++; + event = created_event.get(); + } + } + // Try to reuse event. + if (event == nullptr) { + auto iter = events_.begin(); + while (iter != events_.end()) { + auto event_in_list = *iter; + if (event_in_list == nullptr) { + MS_LOG(INTERNAL_EXCEPTION) << "exception : event in list is nullptr, events_ size : " << events_.size() << "."; + } + if (event_in_list->QueryEvent()) { + event = event_in_list; + events_.erase(iter); + break; + } + iter++; + } + } + // Reuse failed, try to create more event. + if (event == nullptr) { + auto created_event = event_creator_(); + if (created_event->IsReady()) { + cached_events_.push_back(created_event); + event = created_event.get(); + size_++; + } else { + MS_LOG(INTERNAL_EXCEPTION) << "Get event failed."; + } + } + MS_LOG(DEBUG) << "Get event, events_ size : " << events_.size() << ", event : " << event << "."; + + auto event_ptr = std::shared_ptr(event, [&](DeviceEvent *e) { + std::lock_guard lock(mutex_); + if (!expired_) { + MS_LOG(DEBUG) << "Return event : " << e << "."; + events_.push_back(e); + } else { + MS_LOG(DEBUG) << "Return event : " << e << "failed."; + } + }); + return event_ptr; +} +} // namespace device +} // namespace mindspore diff --git a/mindspore/ccsrc/runtime/device/multi_stream_controller.h b/mindspore/ccsrc/runtime/device/multi_stream_controller.h new file mode 100644 index 0000000000000000000000000000000000000000..b75eba0ca5e5d7f89d6be472de900d09924fc3cf --- /dev/null +++ b/mindspore/ccsrc/runtime/device/multi_stream_controller.h @@ -0,0 +1,134 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_MULTI_STREAM_CONTROLLER_H_ +#define MINDSPORE_CCSRC_RUNTIME_DEVICE_MULTI_STREAM_CONTROLLER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils/log_adapter.h" +#include "include/backend/mem_reuse/mem_dynamic_allocator.h" +#include "include/backend/visible.h" +#include "runtime/hardware/device_context.h" + +namespace mindspore { +namespace device { +template +struct AtomicWrapper { + AtomicWrapper() : value_(0L) {} + explicit AtomicWrapper(const std::atomic &value) : value_(value.load()) {} + AtomicWrapper(const AtomicWrapper &other) : value_(other.value_.load()) {} + AtomicWrapper &operator=(const AtomicWrapper &other) { value_.store(other.value_.load()); } + + std::atomic value_; +}; + +class BACKEND_EXPORT TaskIdOnStreamManager { + public: + TaskIdOnStreamManager() = default; + + void Resize(uint32_t stream_size); + + int64_t Query(uint32_t user_stream_id, uint32_t memory_stream_id); + + bool Update(int64_t task_id_on_stream, uint32_t user_stream_id, uint32_t memory_stream_id); + + int64_t Launch(uint32_t stream_id); + + int64_t Get(uint32_t stream_id); + + private: + std::mutex mutex_; + bool initialized_{false}; + uint32_t initialize_size_{0}; + std::vector> generator_; + std::vector> status_; +}; + +// Event pool recycled with ref count, pool will reuse event when cannot create more events. +class BACKEND_EXPORT EventPool { + public: + explicit EventPool(std::function event_creator); + ~EventPool(); + + EventPool() = delete; + EventPool(const EventPool &) = delete; + EventPool &operator=(const EventPool &) = delete; + + // Get event from pool, event was wrapper by shared_ptr. + DeviceEventPtr Get(); + + private: + std::mutex mutex_; + bool expired_{false}; + // Pool will just create event before reach core size, use half of size limits as core size. + size_t core_size_{32768}; + size_t size_{0}; + std::function event_creator_; + std::list events_; + // cached_events_ hold shared ptr of event, since device res manager return a smart pointer. + std::list cached_events_; +}; +using EventPoolPtr = std::shared_ptr; + +class MultiStreamController; +using MultiStreamControllerPtr = std::shared_ptr; + +class BACKEND_EXPORT MultiStreamController { + public: + MultiStreamController() = default; + MultiStreamController(const MultiStreamController &) = delete; + MultiStreamController &operator=(const MultiStreamController &) = delete; + ~MultiStreamController() = default; + + static MultiStreamControllerPtr &GetInstance(); + + void Refresh(const DeviceContext *device_context); + bool UpdateTaskIdOnStream(const DeviceContext *device_context, int64_t task_id_on_stream, uint32_t user_stream_id, + uint32_t memory_stream_id); + int64_t QueryTaskIdOnStream(const DeviceContext *device_context, uint32_t user_stream_id, uint32_t memory_stream_id); + int64_t LaunchTaskIdOnStream(const DeviceContext *device_context, uint32_t stream_id); + int64_t GetTaskIdOnStream(const DeviceContext *device_context, uint32_t stream_id); + + std::mutex &GetStreamMutex(const DeviceContext *device_context, size_t stream_id); + + // memory_stream_addresses pair : memory_stream_id, address. + bool RecordEvent(const DeviceContext *device_context, int64_t task_id_on_stream, uint32_t user_stream_id, + const std::vector> &memory_stream_addresses); + bool WaitEvent(const DeviceContext *device_context, int64_t task_id_on_stream, uint32_t user_stream_id, + uint32_t memory_stream_id); + bool WaitEvent(const DeviceContext *device_context, int64_t task_id_on_stream, uint32_t user_stream_id); + bool DispatchRecordWaitEvent(const DeviceContext *device_context, uint32_t user_stream_id, uint32_t memory_stream_id); + + bool SyncStream(const DeviceContext *device_context, size_t stream_id); + bool SyncAllStreams(const DeviceContext *device_context); + bool SyncNotDefaultStreams(const DeviceContext *device_context); + + private: + std::unordered_map task_id_on_stream_manager_; + std::unordered_map> stream_mutexes_; + std::unordered_map event_pools_; +}; +} // namespace device +} // namespace mindspore +#endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_MULTI_STREAM_CONTROLLER_H_ diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc index 2d704414a5d3f8e7e04a3bdafb10398633b01833..aba7456ade9f0d092a14b66275af3366371e47f0 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/abstract_actor.cc @@ -291,7 +291,7 @@ bool AbstractActor::IsOutputAddressPersisted(const DeviceTensor *output_device_t // Ref node need check the origin node. const auto &graph = AnfAlgo::FetchKernelGraph(output_node.first.get()); if ((graph != nullptr) && graph->IsInRefOutputMap(output_node)) { - const auto &origin_node = graph->GetRefCorrespondOutput(output_node).first; + const auto &origin_node = graph->GetRefNodeRecursive(output_node).first; MS_EXCEPTION_IF_NULL(origin_node); if (origin_node->isa() || origin_node->isa()) { return true; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.cc index 7faee336009fb95c90f7fef23e3ff47e82ed9759..935abd6860020ae25c06712f7bb8d86d1ca65239 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_dump.cc @@ -822,6 +822,8 @@ std::string GetActorSubName(AbstractActor *actor) { MS_EXCEPTION_IF_NULL(actor); if (actor->type() == KernelTransformType::kCopyActor) { return std::string("CopyActor"); + } else if (actor->type() == KernelTransformType::kEntranceActor) { + return std::string("EntranceActor"); } const auto &name = actor->GetAID().Name(); std::string kernel_graph_name; @@ -1108,6 +1110,10 @@ void DumpActorInfo(AbstractActor *actor, std::ofstream &ofs) { } } } + +bool IsTopActorType(AbstractActor *actor) { + return actor->type() != KernelTransformType::kStackActor && actor->type() != KernelTransformType::kEntranceActor; +} } // namespace std::vector TopoSortForActor(AbstractActor *root) { @@ -1122,6 +1128,7 @@ std::vector TopoSortForActor(AbstractActor *root) { extra_seen_map[root] = 0; while (!todo.empty()) { AbstractActor *actor = todo.back(); + if (extra_seen_map[actor] == seen) { todo.pop_back(); continue; @@ -1134,22 +1141,25 @@ std::vector TopoSortForActor(AbstractActor *root) { } seen_map[actor] = seen; std::vector input_aids; - std::for_each( - actor->input_data_arrow_aids().begin(), actor->input_data_arrow_aids().end(), - [&input_aids, actor](const auto &pair) { - input_aids.emplace_back((actor->type() != KernelTransformType::kFusionActor && pair.second != nullptr && - pair.second->to_op_id_.Name().find(kFusionActorNameSuffix) != std::string::npos) - ? pair.second->to_op_id_.Name() - : pair.first.Name()); - }); - std::for_each( - actor->input_control_arrow_aids().begin(), actor->input_control_arrow_aids().end(), - [&input_aids, actor](const auto &pair) { - input_aids.emplace_back((actor->type() != KernelTransformType::kFusionActor && pair.second != nullptr && - pair.second->to_op_id_.Name().find(kFusionActorNameSuffix) != std::string::npos) - ? pair.second->to_op_id_.Name() - : pair.first.Name()); - }); + + if (IsTopActorType(actor)) { + std::for_each( + actor->input_data_arrow_aids().begin(), actor->input_data_arrow_aids().end(), + [&input_aids, actor](const auto &pair) { + input_aids.emplace_back((actor->type() != KernelTransformType::kFusionActor && pair.second != nullptr && + pair.second->to_op_id_.Name().find(kFusionActorNameSuffix) != std::string::npos) + ? pair.second->to_op_id_.Name() + : pair.first.Name()); + }); + std::for_each( + actor->input_control_arrow_aids().begin(), actor->input_control_arrow_aids().end(), + [&input_aids, actor](const auto &pair) { + input_aids.emplace_back((actor->type() != KernelTransformType::kFusionActor && pair.second != nullptr && + pair.second->to_op_id_.Name().find(kFusionActorNameSuffix) != std::string::npos) + ? pair.second->to_op_id_.Name() + : pair.first.Name()); + }); + } for (auto aid : input_aids) { const auto &input_actor = FetchActor(aid); if (input_actor == nullptr) { diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h index 7b29fba67489148ba2db9d81d96b1585b5c1343a..6d9cae2554087d9d04849df6d3953bd69ed18688 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/actor_set.h @@ -104,22 +104,7 @@ using RpcActorSetWeakPtr = std::weak_ptr; // The output actor is used to receive the output result of actor which represents the graph output. struct ActorSet { explicit ActorSet(const ActorInfo &name) : name_(name) {} - ~ActorSet() { callback_counter_->set_expired(true); } - - void InitCallbackCounter() { - if (loop_count_actor_ != nullptr) { - loop_count_actor_->set_callback_counter(callback_counter_); - } - for (auto &kernel_actor : kernel_actors_) { - kernel_actor->set_callback_counter(callback_counter_); - } - if (control_actors_ != nullptr) { - auto &exit_actors = control_actors_->exit_actors_; - for (auto &exit_actor : exit_actors) { - exit_actor->set_callback_counter(callback_counter_); - } - } - } + ~ActorSet() = default; DataPrepareActorPtr data_prepare_actor_{nullptr}; std::vector data_source_actors_; @@ -150,8 +135,6 @@ struct ActorSet { double single_thread_execution_time_{0}; // Record the execution state. bool is_execution_failed_{false}; - // Control variable for callback. - CallbackCounterPtr callback_counter_ = std::make_shared(); bool has_dynamic_shape_{false}; bool has_kernel_need_user_data_{false}; bool enable_multi_stream_{false}; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.cc index f632f0856e3931738a786aa3050467ca1c27f2ea..75bdbd8639ebfe7867ba76ee0e1c48a226fc9ded 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.cc @@ -28,6 +28,11 @@ ConditionGatherActor::ConditionGatherActor(const std::string &name, const CNodeP : KernelActor(name, kernel, device_context, memory_manager_aid, debug_aid, recorder_aid, strategy, modifiable_ref_input_indexes, modifiable_ref_output_indexes, type) {} +ConditionGatherActor::~ConditionGatherActor() { + for_each(need_clean_ptr_device_addresses_.begin(), need_clean_ptr_device_addresses_.end(), + [](const device::DeviceAddressPtr &device_address) { device_address->set_ptr(nullptr); }); +} + void ConditionGatherActor::RunBranchName(const std::string &branch_name, OpContext *const context) { MS_LOG(DEBUG) << "Condition gather actor:" << GetAID() << " branch name:" << branch_name; current_branch_name_ = branch_name; @@ -88,6 +93,7 @@ void ConditionGatherActor::Init() { MS_EXCEPTION_IF_NULL(somas_info_); (void)somas_info_->InsertGraphOutputInfo(output_address.get(), somas_outputs[i].first, somas_outputs[i].second); output_address->set_from_mem_pool(true); + need_clean_ptr_device_addresses_.emplace_back(output_address); } else { UpdateRefCount(output_address.get(), true); } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h index 6df834574baf9f19aefb8cdbeb1ca009809dd48a..a94aa79598aa974b71e9502a61d9ca16c4352725 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/condition_gather_actor.h @@ -37,7 +37,7 @@ class ConditionGatherActor : public KernelActor { GraphExecutionStrategy strategy, const std::set &modifiable_ref_input_indexes, const std::set &modifiable_ref_output_indexes, const KernelTransformType &type = KernelTransformType::kConditionGatherActor); - ~ConditionGatherActor() override = default; + ~ConditionGatherActor() override; // Receive the branch name from condition switch actor. void RunBranchName(const std::string &branch_name, OpContext *const context); @@ -58,6 +58,7 @@ class ConditionGatherActor : public KernelActor { mindspore::HashMap branch_name_to_id_; mindspore::HashMap branch_name_to_input_data_num_; mindspore::HashMap branch_name_to_input_control_num_; + std::vector need_clean_ptr_device_addresses_; }; using ConditionGatherActorPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc index 53dbf8fa94f1938129fe02bd1dee762c3c6ef1f0..8f32faa79be34cc8f2a586450bf5681ec77f4234 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.cc @@ -49,9 +49,6 @@ void ExitActor::Init() { void ExitActor::FetchInput(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); - auto counter = callback_counter(); - MS_EXCEPTION_IF_NULL(counter); - counter->Wait(); if (!WaitRuntimePipelineFinish(context)) { MS_LOG(INFO) << "Run failed and early stop."; return; @@ -266,10 +263,20 @@ bool ExitActor::IsNeedCopyDeviceAddress(DeviceTensor *const input_device_tensor, return false; } const auto &node = input_device_tensor->GetNodeIndex().first; - if (node != nullptr && (!node->isa())) { - MS_LOG(DEBUG) << "Input device address:" << input_device_tensor << " ptr:" << input_device_tensor->GetPtr() - << " for node:" << node->DebugString() << " is not need replace ptr for actor:" << GetAID(); - return false; + if (node != nullptr) { + if (!node->isa()) { + MS_LOG(DEBUG) << "Input device address:" << input_device_tensor << " ptr:" << input_device_tensor->GetPtr() + << " for node:" << node->DebugString() << " is not need replace ptr for actor:" << GetAID(); + return false; + } + const auto &iter = ref_out_in_map_.find(input_device_tensor->GetNodeIndex()); + if (iter != ref_out_in_map_.end() && iter->second.first != nullptr && (!iter->second.first->isa())) { + MS_LOG(DEBUG) << "Input device address:" << input_device_tensor << " ptr:" << input_device_tensor->GetPtr() + << " for node:" << node->DebugString() + << " is a ref node of:" << iter->second.first->DebugString() + << " not need replace ptr for actor:" << GetAID(); + return false; + } } } return true; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h index 5f397add0de987d0a03cf964c19b9b37c2c8b507..a528f901157d8997710eb630024586ea89f7842d 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/control_flow/exit_actor.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include "utils/hash_map.h" @@ -55,9 +56,6 @@ class ExitActor : public ControlActor { } void OnMemoryAllocFinish(OpContext *const context) override; - CallbackCounterPtr callback_counter() const { return callback_counter_; } - void set_callback_counter(const CallbackCounterPtr &callback_counter) { callback_counter_ = callback_counter; } - protected: void Init() override; void FetchInput(OpContext *const context) override; @@ -87,6 +85,7 @@ class ExitActor : public ControlActor { // needed. This mark is used to record whether it need to be copied. std::vector is_need_copy_device_tensors_; std::vector is_need_dynamic_checks_; + std::map ref_out_in_map_; // Cache the dynamic shape flag to optimize the running performance. std::vector is_dynamic_shapes_; // Output data. @@ -94,8 +93,6 @@ class ExitActor : public ControlActor { mindspore::HashMap>>> output_branch_data_; // The value of haspmap indicates the output data flag. See constant prefixed with kOutputDataFalg for details. mindspore::HashMap> output_branch_data_flag_; - - CallbackCounterPtr callback_counter_; }; using ExitActorPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.cc index 44ae891c9d7d9ad1a94a2828d84e241fff6245f6..3c4b6e5e032705f9b052ac23afca0cb3770ef93f 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/debug_actor.cc @@ -31,6 +31,7 @@ #include "debug/debugger/debugger_utils.h" #endif #include "debug/data_dump/data_dumper.h" +#include "debug/data_dump/dump_graph_boundary.h" #include "include/common/debug/common.h" #include "utils/file_utils.h" #include "include/backend/debug/profiler/profiling.h" @@ -72,6 +73,7 @@ void DebugActor::ACLDump(uint32_t device_id, const std::vector & } } } + /* * Feature group: Dump, Online debugger. * Target device group: GPU. @@ -133,50 +135,6 @@ void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchAddr *launch_in } } -/* - * Feature group: ascend step start timestamp - * Target device group: Ascend. - * Description: Add step start timestamp when profiler is started. - */ -void DebugActor::AscendStepStart(const std::vector &graphs, - std::vector device_contexts) { - auto profiler = profiler::Profiler::GetInstance(kAscendDevice); - if (profiler == nullptr || !profiler->IsInitialized() || graphs.empty()) { - return; - } - if (profiler->GetEnableFlag() && !graphs[0]->IsDatasetGraph()) { - profile_started_ = false; - for (size_t i = 0; i < graphs.size(); ++i) { - MS_EXCEPTION_IF_NULL(graphs[i]); - MS_EXCEPTION_IF_NULL(device_contexts[i]); - if (device_contexts[i]->GetDeviceType() == device::DeviceType::kAscend && !profile_started_) { - device_ctx_ = device_contexts[i]; - device_ctx_->device_res_manager_->BindDeviceToCurrentThread(false); - MS_LOG(INFO) << "Dot step start timestamp."; - profiler->StepStart(current_step++, device_contexts[i]->device_res_manager_->GetStream()); - profile_started_ = true; - } - } - } -} - -/* - * Feature group: ascend step end timestamp - * Target device group: Ascend. - * Description: Add step end timestamp when profiler is end. - */ -void DebugActor::AscendStepEnd() { - auto profiler = profiler::Profiler::GetInstance(kAscendDevice); - if (profile_started_ && profiler != nullptr && profiler->GetEnableFlag()) { - MS_EXCEPTION_IF_NULL(device_ctx_); - device_ctx_->device_res_manager_->BindDeviceToCurrentThread(false); - device_ctx_->device_res_manager_->SyncAllStreams(); - MS_LOG(INFO) << "Dot step end timestamp."; - profiler->StepStop(); - profile_started_ = false; - } -} - /* * Feature group: Dump, Online debugger. * Target device group: Ascend, GPU. @@ -202,7 +160,8 @@ void DebugActor::DebugOnStepBegin(const std::vector &graphs, } } if (backend == "ge") { - AscendStepStart(graphs, device_contexts); + MS_LOG(INFO) << "On GE backend, debug_actor is not supported except for acl dump."; + datadump::DumpGraphBoundary::GetInstance().InitEnableFlag(); return; } MS_EXCEPTION_IF_NULL(op_context); @@ -253,7 +212,7 @@ void DebugActor::DebugOnStepEnd(OpContext *const op_context, const MS_EXCEPTION_IF_NULL(context); std::string backend = context->backend_policy(); step_count = total_running_count_; - if (dump_flag == true) { + if (dump_flag) { auto registered_dumper = datadump::DataDumperRegister::Instance().GetDumperForBackend(device::DeviceType::kAscend); if (registered_dumper != nullptr) { device_ctx_->device_res_manager_->SyncAllStreams(); @@ -262,14 +221,9 @@ void DebugActor::DebugOnStepEnd(OpContext *const op_context, const dump_flag = false; } if (backend == "ge") { - AscendStepEnd(); -#ifdef ENABLE_DEBUGGER - auto debugger = Debugger::GetInstance(); - if (debugger != nullptr && !(debugger->GetAscendKernelByKernelFlag())) { - MS_LOG(INFO) << "On GE backend, debug_actor is not supported for graph mode."; - return; - } -#endif + MS_LOG(INFO) << "On GE backend, debug_actor is not supported except for acl dump."; + datadump::DumpGraphBoundary::GetInstance().DataDrop(device_ctx_); + return; } MS_EXCEPTION_IF_NULL(op_context); std::lock_guard locker(debug_mutex_); diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc index bd9159449bf60b86de244e7db379aa661131699d..abc22aae1fda760a4296cf5d0c8a060e1ffe8f70 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.cc @@ -15,6 +15,10 @@ */ #include "runtime/graph_scheduler/actor/kernel_actor.h" + +#include + +#include "runtime/device/multi_stream_controller.h" #include "runtime/graph_scheduler/actor/memory_manager_actor.h" #include "runtime/graph_scheduler/actor/output_actor.h" #include "runtime/graph_scheduler/actor/recorder_actor.h" @@ -90,6 +94,27 @@ void KernelActor::Init() { data->data_ = output_device_tensors_[IntToSize(data_arrow->from_output_index_)]; ++output_data_index; } + + // Share pointer of task id on stream with output kernel tensor. + for (auto &output_kernel_tensor : output_kernel_tensors_) { + output_kernel_tensor->set_task_id_on_stream(task_id_on_stream_); + } + is_stream_recv_actor_ = IsPrimitiveCNode(kernel_, prim::kPrimStreamRecv); + // kernel_ may be ValueNode, skip exception situation. + auto cnode = kernel_->cast(); + if (cnode == nullptr) { + return; + } + auto input0 = cnode->input(kAnfPrimitiveIndex); + if (IsValueNode(input0)) { + MS_LOG(WARNING) << "cnode is not a func graph value node : " << kernel_->DebugString() << "."; + return; + } + auto multi_stream_safe_value = cnode->GetAttr(kAttrInputMultiStreamSafe); + if (multi_stream_safe_value != nullptr) { + is_multi_stream_safe_ = GetValue(multi_stream_safe_value); + MS_LOG(DEBUG) << "cnode : " << cnode->DebugString() << " is thread safe."; + } } void KernelActor::InitInputInfo() { @@ -728,11 +753,6 @@ void KernelActor::ExecuteLaunchKernelTask(OpContext *const context MS_LOG(EXCEPTION) << "#umsg#Kernel error:#umsg#Launch kernel failed: " + kernel_->fullname_with_scope() << trace::DumpSourceLines(kernel_); } - - if (ActorDispatcher::enable_multi_stream()) { - LaunchCallback(context); - } - if (is_dynamic_shape_ && kernel_mod_->IsNeedUpdateOutputShapeAndSize()) { kernel_mod_->UpdateOutputShapeAndSize(input_kernel_tensors_, output_kernel_tensors_); } @@ -823,7 +843,7 @@ void KernelActor::ResizeKernelMod() { } } -bool KernelActor::LaunchKernel(OpContext *const) { +bool KernelActor::LaunchKernel(OpContext *const context) { // Check the skipped launch condition. if (is_launch_skipped_) { MS_EXCEPTION_IF_CHECK_FAIL((input_device_tensors_.size() >= 1), "The inputs size is wrong."); @@ -841,6 +861,20 @@ bool KernelActor::LaunchKernel(OpContext *const) { } } + // Cpu not support stream lock with LaunchKernel. + if (!ActorDispatcher::enable_multi_stream()) { + MS_LOG(DEBUG) << "Begin launch kernel: " << kernel_->fullname_with_scope(); + auto ret = device_contexts_[0]->GetKernelExecutor(false)->LaunchKernel( + kernel_, input_kernel_tensors_, workspace_kernel_tensors_, output_kernel_tensors_, kernel_mod_, stream_); + MS_LOG(DEBUG) << "End launch kernel: " << kernel_->fullname_with_scope(); + return ret; + } + + auto multi_stream_controller = device::MultiStreamController::GetInstance(); + std::lock_guard lock( + multi_stream_controller->GetStreamMutex(device_contexts_[0], kernel_info_->stream_id())); + // Here should process multi stream first to make inputs is memory safe. + ProcessMultiStream(context); MS_LOG(DEBUG) << "Begin launch kernel: " << kernel_->fullname_with_scope(); auto ret = device_contexts_[0]->GetKernelExecutor(false)->LaunchKernel( kernel_, input_kernel_tensors_, workspace_kernel_tensors_, output_kernel_tensors_, kernel_mod_, stream_); @@ -848,66 +882,101 @@ bool KernelActor::LaunchKernel(OpContext *const) { return ret; } -void KernelActor::LaunchCallback(OpContext *const context) { - if (input_device_tensors_.empty()) { +void KernelActor::ProcessMultiStream(OpContext *const context) { + ProfilerRecorder profiler(ProfilerModule::kKernel, ProfilerEvent::kProcessMultiStream, GetAID().Name()); + auto device_context = device_contexts_[0]; + auto stream_id = kernel_info_->stream_id(); + // Update output_kernel_tensors_ with task id on stream. + auto multi_stream_controller = device::MultiStreamController::GetInstance(); + auto task_id_on_stream = multi_stream_controller->LaunchTaskIdOnStream(device_context, stream_id); + MS_LOG(DEBUG) << "Launch stream_id : " << stream_id << ", task id : " << task_id_on_stream + << ", kernel : " << GetAID().Name() << "."; + *task_id_on_stream_ = task_id_on_stream; + + // Process wait stream. + if (is_stream_recv_actor_) { + // Note: wait node start to launch. Event was record on send node, so, we can releases events on send node stream. + // Release events on send node means memory stream id is recv node stream id and user stream id is send node stream + // id. + auto user_stream_id = kernel_mod_->record_stream_id(); + auto memory_stream_id = stream_id; + if (stream_send_actor_ == nullptr) { + MS_LOG(WARNING) << "stream_send_actor_ is nullptr."; + return; + } + MS_LOG(DEBUG) << "Process wait stream start, memory_stream_id : " << memory_stream_id + << ", send task id on stream : " << *(stream_send_actor_->task_id_on_stream_) << "."; + // Here, need get task id on stream from send node. + (void)multi_stream_controller->WaitEvent(device_context, *(stream_send_actor_->task_id_on_stream_), user_stream_id, + memory_stream_id); return; } - auto stream_id = kernel_info_->stream_id(); - std::vector callback_funcs; - for (const auto &device_tensor_ptr : input_device_tensors_) { - if (stream_id == device_tensor_ptr->stream_id()) { + + // Process inputs. + if (input_kernel_tensors_.empty()) { + MS_LOG(DEBUG) << "Exit process multi stream as inputs is empty."; + return; + } + + std::vector> cross_stream_addresses; + std::vector cross_stream_kernel_tensors; + for (const auto &input_kernel_tensor : input_kernel_tensors_) { + if (stream_id == input_kernel_tensor->stream_id()) { continue; } - size_t ref_count = device_tensor_ptr->IncreaseCounter(); - if (ref_count == SIZE_MAX) { + if (input_kernel_tensor->pointer_ref_count()->ref_count() == SIZE_MAX && + input_kernel_tensor->pointer_ref_count()->dynamic_ref_count() == INT32_MAX) { continue; } - auto now_count = callback_counter_->Increase(); - MS_LOG(DEBUG) << "Callback counter : " << now_count << "."; - auto release_ref_callback = [device_tensor_ptr, device_context_ptr = device_contexts_[0], context, &aid = GetAID(), - callback_counter = callback_counter_]() { - // We need check parameters before execution, since main thread may exit before callback thread. - if (callback_counter == nullptr || callback_counter->expired()) { - MS_LOG(INFO) - << "Exit callback since callback_counter is nullptr or expired, which indicates that main thread is expired."; - return; + (void)cross_stream_addresses.emplace_back(input_kernel_tensor->stream_id(), input_kernel_tensor->device_ptr()); + (void)cross_stream_kernel_tensors.emplace_back(input_kernel_tensor); + } + + // Dispatch record/wait. + if (!is_multi_stream_safe_) { + for (const auto &cross_stream_kernel_tensor : cross_stream_kernel_tensors) { + // Input kernel tensor is memory stream id, this is important. + auto user_stream_id = stream_id; + auto memory_stream_id = cross_stream_kernel_tensor->stream_id(); + if (cross_stream_kernel_tensor->task_id_on_stream() == nullptr) { + MS_LOG(INTERNAL_EXCEPTION) << "Cross_stream_kernel_tensor : " << cross_stream_kernel_tensor + << " task id on stream is nullptr."; } + auto memory_task_id_on_stream = *cross_stream_kernel_tensor->task_id_on_stream(); + auto safe_task_id_on_stream = + multi_stream_controller->QueryTaskIdOnStream(device_context, user_stream_id, memory_stream_id); - std::vector free_list{device_tensor_ptr}; - MemoryManagerActor::GetInstance()->FreeMemory(&free_list, device_context_ptr, context, aid); - auto ref_counter = callback_counter->Decrease(); - callback_counter->Notify(); - MS_LOG(DEBUG) << "Callback is called, device tensor : " << device_tensor_ptr - << ", device_tensor_ptr ptr : " << device_tensor_ptr->GetMutablePtr() - << ", device_tensor_ptr ref count : " << device_tensor_ptr->ref_count() - << ", device_tensor_ptr dynamic ref count : " << device_tensor_ptr->dynamic_ref_count() - << ", device tensor ptr : " << device_tensor_ptr->GetMutablePtr() - << ", callback counter : " << ref_counter << ", stream id : " << device_tensor_ptr->stream_id() - << "."; - }; - (void)callback_funcs.emplace_back(release_ref_callback); - } - - if (!callback_funcs.empty()) { - MS_EXCEPTION_IF_NULL(device_contexts_[0]); - device::CallbackFunc callback_func = [callback_funcs = std::move(callback_funcs)]() { - for (const auto &callback_func : callback_funcs) { - callback_func(); + if (safe_task_id_on_stream >= memory_task_id_on_stream) { + MS_LOG(DEBUG) << "safe_task_id_on_stream : " << safe_task_id_on_stream + << " is bigger than memory_task_id_on_stream : " << memory_task_id_on_stream; + continue; } - }; - MS_LOG(DEBUG) << "Begin launch callback of actor : " << GetAID().Name() << ", id : " << actor_id() << "."; - auto ret = device_contexts_[0]->GetKernelExecutor(false)->LaunchCallback(callback_func, kernel_info_->stream_id()); - MS_LOG(DEBUG) << "End launch callback of actor: " << GetAID().Name() << ", id : " << actor_id() << ", ret : " << ret - << "."; + MS_LOG(DEBUG) << "Dispatch record/wait safe_task_id_on_stream : " << safe_task_id_on_stream + << ", memory_task_id_on_stream : " << memory_task_id_on_stream; + multi_stream_controller->DispatchRecordWaitEvent(device_context, user_stream_id, memory_stream_id); + // Add recv process. + user_stream_id = memory_stream_id; + memory_stream_id = stream_id; + auto last_task_id_on_stream = multi_stream_controller->GetTaskIdOnStream(device_context, user_stream_id); + MS_LOG(DEBUG) << "Dispatch wait stream start, usert_stream_id : " << user_stream_id + << ", memory_stream_id : " << memory_stream_id + << ", last_task_id_on_stream : " << last_task_id_on_stream << "."; + // Here, need get task id on stream from send node. + (void)multi_stream_controller->WaitEvent(device_context, last_task_id_on_stream, user_stream_id, + memory_stream_id); + } } -} -void KernelActor::PostLaunchKernel(OpContext *const context) { - // Execute kernel actor callbacks. - if (ActorDispatcher::enable_multi_stream()) { - LaunchCallback(context); + // Record event. + if (!cross_stream_addresses.empty()) { + MS_LOG(DEBUG) << "Record event for kernel : " << kernel_->fullname_with_scope() + << ", addresses size : " << cross_stream_addresses.size() << "."; + // Record event on stream. + multi_stream_controller->RecordEvent(device_context, task_id_on_stream, stream_id, cross_stream_addresses); } +} +void KernelActor::PostLaunchKernel(OpContext *const context) { if (is_dynamic_shape_ && kernel_mod_->IsNeedUpdateOutputShapeAndSize()) { kernel_mod_->UpdateOutputShapeAndSize(input_kernel_tensors_, output_kernel_tensors_); } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h index 4cc877f0a2ac85dc18ff136943c41e3c8ae2c368..056fa7dfbe51a24ad765c5cfe5f68adb919d3631 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/kernel_actor.h @@ -104,8 +104,6 @@ class KernelActor : public DebugAwareActor { bool inputs_continuous_memory() const { return inputs_continuous_memory_; } SomasInfo *somas_info() const { return somas_info_; } const std::set &somas_graph_output_indexes() const { return somas_graph_output_indexes_; } - CallbackCounterPtr callback_counter() const { return callback_counter_; } - void set_callback_counter(const CallbackCounterPtr &callback_counter) { callback_counter_ = callback_counter; } void set_enable_async_infer(bool enable_async_infer) { enable_async_infer_ = enable_async_infer; } @@ -116,6 +114,8 @@ class KernelActor : public DebugAwareActor { // Really do launch kernel with memory allocate and free. void ExecuteLaunchKernelTask(OpContext *const context); + void set_stream_send_actor(KernelActor *stream_send_actor) { stream_send_actor_ = stream_send_actor; } + protected: void Init() override; void Run(OpContext *const context) override; @@ -123,8 +123,8 @@ class KernelActor : public DebugAwareActor { // Do kernel launching in this method after 'PreLaunchKernel' and 'PostLaunchKernel'. virtual bool LaunchKernel(OpContext *const context); - - virtual void LaunchCallback(OpContext *const context); + // Execute kernel actor multi stream produre to make sure safety of memory. + virtual void ProcessMultiStream(OpContext *const context); // Execute infer shape, resize and launch kernel by runtime pipeline which executes by KernelAsyncInferActor, // KernelAsyncResizeActor and KernelAsyncLaunchActor. @@ -196,6 +196,14 @@ class KernelActor : public DebugAwareActor { SomasInfo *somas_info_; // The graph output node and index use somas info. std::set somas_graph_output_indexes_; + // Task id on stream, use for events. + std::shared_ptr task_id_on_stream_ = std::make_shared(0L); + // Send actor ref, point to the send actor when current actor is recv actor. + KernelActor *stream_send_actor_{nullptr}; + // Flag for stream recv actor. + bool is_stream_recv_actor_{false}; + // Flag for indicating if current actor is multi-thread safe, which was generate at compile time. + bool is_multi_stream_safe_{false}; private: friend class GraphScheduler; @@ -251,8 +259,6 @@ class KernelActor : public DebugAwareActor { // Whether the inputs need continuous memory, used to check the inputs legitimacy. bool inputs_continuous_memory_; - CallbackCounterPtr callback_counter_; - // The stream resource of the KernelActor to launch kernel. void *stream_{nullptr}; }; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc index 6ce79b540b7975e36b58057b6a311c9834038959..073db3e902e529cc62361fbdb58b12aa0340ba12 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.cc @@ -55,9 +55,6 @@ void LoopCountActor::IncreaseLoopCount(OpContext *const context) { current_count_++; MS_LOG(INFO) << "Loop count actor(" << GetAID().Name() << ") running, loop count: " << loop_count_ << ", current count: " << current_count_ << ", total running count: " << total_running_count_; - auto counter = callback_counter(); - MS_EXCEPTION_IF_NULL(counter); - counter->Wait(); if (!WaitRuntimePipelineFinish(context)) { MS_LOG(INFO) << "Run graph failed and please check error log."; } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h index ada56f7b9cd65899d5036bb81c95b73a435ae556..41c211e2866de793c21c724334c15f9908fcede7 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/loop_count_actor.h @@ -64,9 +64,6 @@ class LoopCountActor : public DebugAwareActor { const AID &data_prepare_aid() const { return data_prepare_aid_; } const std::vector &entrance_aids() const { return entrance_aids_; } - CallbackCounterPtr callback_counter() const { return callback_counter_; } - void set_callback_counter(const CallbackCounterPtr &callback_counter) { callback_counter_ = callback_counter; } - protected: void Run(OpContext *const context) override; void SendOutput(OpContext *const context) override; @@ -96,8 +93,6 @@ class LoopCountActor : public DebugAwareActor { // Only need sync stream in DR scenarios. bool is_need_sync_stream_{true}; - - CallbackCounterPtr callback_counter_; }; using LoopCountActorPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.cc index 6a19d5b207671ae705e3df98f65acf66bcc124b1..9869177b19d532327c39ac8fe01b48f9e9e13155 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.cc @@ -16,7 +16,6 @@ #include "runtime/graph_scheduler/actor/memory/memory_free_actor.h" -#include #include "runtime/graph_scheduler/actor/memory_manager_actor.h" namespace mindspore { @@ -34,13 +33,5 @@ void MemoryFreeActor::SendMemoryFreeReq(OpContext *const context) context, GetAID()); } } - -void MemoryFreeActor::ProcessSomasCrossStreamMemorySynchronization(OpContext *const /*context*/) { - if (ActorDispatcher::enable_multi_stream()) { - ProfilerRecorder profiler(ProfilerModule::kKernel, ProfilerEvent::kStreamSync, GetAID().Name()); - device_contexts_[0]->device_res_manager_->SyncAllStreams(); - MS_LOG(INFO) << "Somas cross stream memory synchronize, sync all streams."; - } -} } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h b/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h index 1a8584ae8e038b1fc92b9d209bb1a2eb6111a1e1..4443e6c5d74213df0b8357383da7dcc6834b846a 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/memory/memory_free_actor.h @@ -43,15 +43,11 @@ class MemoryFreeActor : public MemoryAwareActor { // Get the member. SomasInfo *somas_info() const { return somas_info_; } - // Process somas cross streams memory synchronize. - void ProcessSomasCrossStreamMemorySynchronization(OpContext *const context); - protected: void Run(OpContext *const context) override { if (!WaitRuntimePipelineFinish(context)) { MS_LOG(INFO) << "Run graph failed and please check error log."; } - ProcessSomasCrossStreamMemorySynchronization(context); PostRun(context); } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc b/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc index 7f055b67b9dd34493676e0eaa41c1d314e90cf41..eeab1eb5383da11d5c6b9f6174e5cd73ffbb6ae0 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/actor/output_actor.cc @@ -228,7 +228,11 @@ void OutputActor::RunOpData(OpData *const input_data, OpContextdata_->ref_count() << " origin ref count:" << input_data->data_->original_ref_count() << " dynamic ref count:" << input_data->data_->dynamic_ref_count() - << " from memory pool:" << input_data->data_->from_mem_pool(); + << " from memory pool:" << input_data->data_->from_mem_pool() << " output node:" + << (input_data->data_->GetNodeIndex().first == nullptr + ? "null" + : input_data->data_->GetNodeIndex().first->DebugString()) + << " index:" << input_data->data_->GetNodeIndex().second; auto output_position = IntToSize(input_data->index_); if (output_position >= outputs_.size()) { SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*context), "The input index is of range."); diff --git a/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc b/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc index 682ca752e3e96a73733f2869c7c3e49654e444a2..4ad878335490d55dd291083a70ce934f235d0fb6 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/control_node_parser.cc @@ -2805,6 +2805,7 @@ bool ControlNodeParser::IsInputInSameLevel(const AnfNodePtr &node) { } auto iter = node_to_level_.find(input_node); if (iter == node_to_level_.end()) { + PrintGraphGroupInfo(kernel_graph_group_infos_); MS_LOG(EXCEPTION) << "Failed to find input:" << input_node->DebugString() << " for node:" << node->DebugString() << " in graph output map."; } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.cc b/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.cc index adfee1079e334f7aed511e0dcd0dec8308002b67..6fd4c89e468ba8ce6cdd5ee8829340a8932bd152 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/control_node_scheduler.cc @@ -424,7 +424,6 @@ std::vector ControlNodeScheduler::BuildExitActor(const GraphCompil (void)is_dynamic_shapes.emplace_back(is_dynamic_shape); (void)device_contexts.emplace_back(node_with_context.second.second); } - const auto &actor_name = kernel_graph_group_info->group_name_ + kExitActorNameSuffix; const auto &exit_actor = std::make_shared(actor_name, memory_manager_aid_, formal_parameters, nullptr); MS_EXCEPTION_IF_NULL(exit_actor); @@ -432,6 +431,13 @@ std::vector ControlNodeScheduler::BuildExitActor(const GraphCompil exit_actor->is_need_dynamic_checks_.swap(is_need_dynamic_checks); exit_actor->is_dynamic_shapes_.swap(is_dynamic_shapes); exit_actor->device_contexts_.swap(device_contexts); + for (const auto &graph : kernel_graph_group_info->graphs_) { + MS_EXCEPTION_IF_NULL(graph); + std::for_each(graph->GetRefMap().begin(), graph->GetRefMap().end(), + [&exit_actor, &graph](const std::pair &pair) { + exit_actor->ref_out_in_map_[pair.first] = graph->GetRefNodeRecursive(pair.first); + }); + } (void)exit_actors.emplace_back(exit_actor); InsertActor(exit_actor.get()); } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc b/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc index 75ff337d55ef4ebe675a1847bacce5da9335a09f..3f6acdab47c57ec156c3e8a6fbaa265e82a8ef28 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/graph_scheduler.cc @@ -442,7 +442,10 @@ void GraphScheduler::Initialize() { MS_LOG(INFO) << "The actor thread number: " << actor_thread_num << ", the kernel thread number: " << (actor_and_kernel_thread_num - actor_thread_num); - if (default_actor_thread_num_ <= kAsyncLaunchThreadNum) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (default_actor_thread_num_ <= kAsyncLaunchThreadNum && EnableRuntimePipeline() && + context_ptr->get_param(MS_CTX_RUNTIME_NUM_THREADS) == static_cast(1)) { MS_LOG(WARNING) << "The number of actor threads is only: " << default_actor_thread_num_ << ", and pipelined runtime optimization is not enabled, the performance may not reach the optimal level. Please " @@ -502,8 +505,7 @@ void GraphScheduler::BuildAndScheduleGlobalActor() { #ifdef ENABLE_DEBUGGER auto debugger = Debugger::GetInstance(); MS_EXCEPTION_IF_NULL(debugger); - auto profiler = profiler::Profiler::GetInstance(kAscendDevice); - if ((profiler != nullptr && profiler->IsInitialized()) || debugger->DebuggerBackendEnabled()) { + if (debugger->DebuggerBackendEnabled()) { debugger_actor_need = true; } #endif @@ -567,9 +569,7 @@ ActorSet *GraphScheduler::Transform(const GraphCompilerInfo &graph_compiler_info (void)profiler::CollectHostInfo(kModelNameRuntime, kEventCompileGraph, kStageOptimize, 1, 0, 0); Optimize(actor_set); (void)profiler::CollectHostInfo(kModelNameRuntime, kEventCompileGraph, kStageOptimize, 1, 0, 1); - if (graph_compiler_info.control_node_parser_ != nullptr && (!graph_compiler_info.control_node_parser_->IsInited())) { - DumpFinalActor(actor_set.get(), graph_compiler_info); - } + DumpFinalActor(actor_set.get(), graph_compiler_info); MS_LOG(INFO) << "Graph(" << graph_compiler_info.name_ << ") transforms actor end."; #if defined(__linux__) && defined(WITH_BACKEND) @@ -954,7 +954,6 @@ ActorSetPtr GraphScheduler::Build(const GraphCompilerInfo &graph_compiler_info) MS_EXCEPTION_IF_NULL(rpc_node_scheduler_); actor_set->rpc_actors_ = rpc_node_scheduler_->Build(actor_set.get()); #endif - actor_set->InitCallbackCounter(); return actor_set; } @@ -1284,6 +1283,30 @@ std::vector GraphScheduler::BuildCustomActor(const GraphCompiler return custom_actors; } +namespace { +void ProcessStreamSendRecvEventPair( + mindspore::HashMap> *send_recv_nodes, const CNodePtr &kernel, + const KernelActorPtr &kernel_actor, bool is_send_node) { + auto primitive = common::AnfAlgo::GetCNodePrimitive(kernel); + MS_EXCEPTION_IF_NULL(primitive); + auto record_event_stream_pair_attr = primitive->GetAttr(kAttrRecordWaitEventStreamPairId); + if (record_event_stream_pair_attr != nullptr) { + auto event_pair_id = GetValue(record_event_stream_pair_attr); + MS_LOG(DEBUG) << "Process event pair id : " << event_pair_id << "."; + auto &send_recv_actor = (*send_recv_nodes)[event_pair_id]; + if (is_send_node) { + MS_EXCEPTION_IF_CHECK_FAIL(send_recv_actor.first == nullptr, "Stream send pair id is already set."); + send_recv_actor.first = kernel_actor; + } else { + MS_EXCEPTION_IF_CHECK_FAIL(send_recv_actor.second == nullptr, "Stream recv pair id is already set."); + send_recv_actor.second = kernel_actor; + } + } else { + MS_LOG(INFO) << "Stream send/recv kernel : " << kernel->DebugString() << " has no event stream pair id."; + } +} +} // namespace + std::vector GraphScheduler::BuildKernelActor(const GraphCompilerInfo &graph_compiler_info) { std::vector kernel_actors; @@ -1303,6 +1326,8 @@ std::vector GraphScheduler::BuildKernelActor(const GraphCompiler strategy = (is_single_op_graph ? strategy : GraphExecutionStrategy::kPipeline); } + // Stream recv node need task id on stream from send node. Here pass stream send actor to stream recv actor. + mindspore::HashMap> send_recv_nodes; for (auto &kernel : execution_order) { MS_EXCEPTION_IF_NULL(kernel); if (IsKernelActor(kernel, graph_compiler_info.strategy_) && (!IsSkippedKernelActor(kernel))) { @@ -1328,10 +1353,22 @@ std::vector GraphScheduler::BuildKernelActor(const GraphCompiler kernel_actor->inputs_continuous_memory_ = common::AnfAlgo::IsCommunicationOp(kernel) && (common::AnfAlgo::GetInputTensorNum(kernel) > 1); + if (IsPrimitiveCNode(kernel, prim::kPrimStreamSend)) { + ProcessStreamSendRecvEventPair(&send_recv_nodes, kernel, kernel_actor, true); + } else if (IsPrimitiveCNode(kernel, prim::kPrimStreamRecv)) { + ProcessStreamSendRecvEventPair(&send_recv_nodes, kernel, kernel_actor, false); + } + InsertActor(kernel_actor.get()); (void)kernel_actors.emplace_back(kernel_actor); } } + for (auto &[event_pair_id, send_recv_actor] : send_recv_nodes) { + auto [send_actor, recv_actor] = send_recv_actor; + MS_LOG(DEBUG) << "Stream send/recv pair : " << event_pair_id << ", send_actor : " << send_actor + << ", recv_actor : " << recv_actor << "."; + recv_actor->set_stream_send_actor(send_actor.get()); + } } return kernel_actors; } @@ -2222,7 +2259,11 @@ void GraphScheduler::LinkGlobalControlArrow(ActorSet *const actor_set, // Link the control arrow by the execution order. if (execution_order_running_) { for (const auto &graph : graph_compiler_info.graphs_) { - LinkControlArrowByExecutionOrder(graph, graph_compiler_info); + if (graph->inline_sub_graph_kernels().empty()) { + LinkControlArrowByExecutionOrder(graph, graph_compiler_info); + } else { + inline_control_flow_scheduler_.LinkControlArrowByExecutionOrder(graph, graph_compiler_info); + } } } @@ -2498,7 +2539,11 @@ void GraphScheduler::LinkControlArrowByCommunicationNode(const std::vectorinline_sub_graph_kernels().empty()) { + LinkControlArrowByExecutionOrder(graph, graph_compiler_info); + } else { + inline_control_flow_scheduler_.LinkControlArrowByExecutionOrder(graph, graph_compiler_info); + } } } } diff --git a/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.cc b/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.cc index ec6155799efa9bd6d88d736e4e953d39364e521e..9baf17e854feb28af9121095afadef874c86d3b9 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.cc @@ -96,14 +96,18 @@ void GetBranchNameToCondtionActor(const KernelGraphPtr &graph, } } // namespace -void InlineControlFlowScheduler::LinkControlArrowByExecutionOrder( - const KernelGraphPtr &graph, const GraphCompilerInfo &graph_compiler_info, - const mindspore::HashMap &branch_name_to_gather_actor) { +void InlineControlFlowScheduler::LinkControlArrowByExecutionOrder(const KernelGraphPtr &graph, + const GraphCompilerInfo &graph_compiler_info) const { MS_EXCEPTION_IF_NULL(graph); const auto &inline_sub_graph_kernels = graph->inline_sub_graph_kernels(); if (graph->is_graph_run_mode() || graph->is_any_type_input() || inline_sub_graph_kernels.empty()) { return; } + + mindspore::HashMap branch_name_to_switch_actor; + mindspore::HashMap branch_name_to_gather_actor; + GetBranchNameToCondtionActor(graph, &branch_name_to_switch_actor, &branch_name_to_gather_actor); + MS_LOG(DEBUG) << "Link control arrow for graph:" << graph->ToString(); // Only link control arrow between kernels in the same graph. mindspore::HashMap branch_last_actor; @@ -404,7 +408,7 @@ void InlineControlFlowScheduler::FixRefCountForRefNode(const KernelWithIndex &in MS_EXCEPTION_IF_NULL(input_with_index.first); auto new_branch_name = branch_name; if (common::AnfAlgo::CheckPrimitiveType(input_with_index.first, prim::kPrimConditionSwitch)) { - MS_LOG(DEBUG) << "Check switch node:" << input_with_index.first->DebugString() + MS_LOG(DEBUG) << "Check switch node:" << input_with_index.first->fullname_with_scope() << " index:" << input_with_index.second << " ref count:" << ref_count << " branch name:" << branch_name; const auto &actor = FetchActor(input_with_index.first->fullname_with_scope()); @@ -438,8 +442,11 @@ void InlineControlFlowScheduler::FixRefCountForRefNode(const KernelWithIndex &in return; } - const auto &ref_value = kernel_graph->GetRefCorrespondOutput(input_with_index); - if (ref_value.first != nullptr && kernel_graph->IsInRefOutputMap(ref_value)) { + if (kernel_graph->IsInRefOutputMap(input_with_index)) { + const auto &ref_value = kernel_graph->GetRefCorrespondOutput(input_with_index); + if (ref_value.first == nullptr) { + return; + } MS_LOG(DEBUG) << "Check input node:" << ref_value.first->fullname_with_scope() << " index:" << ref_value.second << " output node:" << input_with_index.first->fullname_with_scope() << " index:" << input_with_index.second; @@ -735,9 +742,6 @@ void InlineControlFlowScheduler::Link(ActorSet *actor_set, const GraphCompilerIn for (const auto &graph : graph_compiler_info.graphs_) { MS_EXCEPTION_IF_NULL(graph); GetBranchNameToCondtionActor(graph, &branch_name_to_switch_actor, &branch_name_to_gather_actor); - if (execution_order_running) { - LinkControlArrowByExecutionOrder(graph, graph_compiler_info, branch_name_to_gather_actor); - } } LinkControlArrowForNoInputOrOutputActor(actor_set, branch_name_to_switch_actor, branch_name_to_gather_actor); for (const auto &kernel_actor : actor_set->kernel_actors_) { diff --git a/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h b/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h index d88cee2096c0a3c3d7a312f64757eb7908cde569..d2e5181162d1e30d0b6edcbdc9a60f71bbdcc279 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h +++ b/mindspore/ccsrc/runtime/graph_scheduler/inline_control_flow_scheduler.h @@ -32,11 +32,10 @@ class InlineControlFlowScheduler { // Link control arrows and fix the member variables for condition actors. void Link(ActorSet *actor_set, const GraphCompilerInfo &graph_compiler_info, bool execution_order_running); + void LinkControlArrowByExecutionOrder(const KernelGraphPtr &graph, + const GraphCompilerInfo &graph_compiler_info) const; private: - void LinkControlArrowByExecutionOrder( - const KernelGraphPtr &graph, const GraphCompilerInfo &graph_compiler_info, - const mindspore::HashMap &branch_name_to_gather_actor); // Fix the member variables for condition actors. void HandleConditionSwitchActor(const KernelActorPtr &kernel_actor); void HandleConditionGatherActor(const KernelActorPtr &kernel_actor); diff --git a/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc b/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc index 0dffb809bc233bcdb9a03e1b6eae4e4ee0016bb8..37110b8d26b8a3b30547613af299b714c91366cc 100644 --- a/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc +++ b/mindspore/ccsrc/runtime/graph_scheduler/scheduler_helper.cc @@ -1052,6 +1052,22 @@ void SchedulerHelper::DumpFormatActorSet(const ActorSet *actor_set, std::ofstrea MS_EXCEPTION_IF_NULL(actor_set); try { MS_LOG(DEBUG) << "Start dump format actor set:" << actor_set->name_; + if (actor_set->control_actors_ != nullptr) { + for (const auto &exit_actor : actor_set->control_actors_->exit_actors_) { + if (exit_actor->node() != nullptr) { + continue; + } + auto actors = TopoSortForActor(exit_actor.get()); + ActorInfoMap actor_info; + ofs << "\n\nBase Block : " + << exit_actor->GetAID().Name().substr(0, exit_actor->GetAID().Name().find(kExitActorNameSuffix)) << "\n\n"; + for (size_t i = 0; i < actors.size(); ++i) { + DumpActorInfo(actors[i], i, &actor_info, ofs); + } + } + return; + } + auto actors = TopoSortForActor(actor_set->output_actor_.get()); ActorInfoMap actor_info; for (size_t i = 0; i < actors.size(); ++i) { diff --git a/mindspore/ccsrc/runtime/hardware/device_context.h b/mindspore/ccsrc/runtime/hardware/device_context.h index 02741ca10331c218c0c46eed8aec168709591c87..7bdbf1a9255998cf98318dfb58c00d82507be6bc 100644 --- a/mindspore/ccsrc/runtime/hardware/device_context.h +++ b/mindspore/ccsrc/runtime/hardware/device_context.h @@ -251,7 +251,9 @@ class BACKEND_EXPORT DeviceResManager { // Since the current entry for creating streams is not unified, the implementation of the 'SyncStream' and // "SyncAllStreams" interfaces are implemented by subclasses. virtual bool SyncStream(size_t stream_id) const { return true; } + virtual bool SyncAllStreams() const { return true; } + virtual bool SyncNotDefaultStreams() const { return true; } // Return default stream id. Normally it's 0. diff --git a/mindspore/ccsrc/runtime/pipeline/async_rqueue.cc b/mindspore/ccsrc/runtime/pipeline/async_rqueue.cc index 02b3316a6ee0ecf6627439805c96b9b4df2a4a95..c0edb6183175340286ff474c698a5ae711aebcbf 100644 --- a/mindspore/ccsrc/runtime/pipeline/async_rqueue.cc +++ b/mindspore/ccsrc/runtime/pipeline/async_rqueue.cc @@ -103,6 +103,16 @@ void AsyncRQueue::Push(const AsyncTaskPtr &task) { if (worker_ == nullptr) { worker_ = std::make_unique(&AsyncRQueue::WorkerLoop, this); } + + if (current_level_ == kThreadWaitLevel::kLevelUnknown) { + // cppcheck-suppress unreadVariable + std::unique_lock lock(level_mutex_); + current_level_ = thread_id_to_wait_level_[std::this_thread::get_id()]; + } + + if (current_level_ >= wait_level_) { + MS_LOG(EXCEPTION) << "Cannot push task from thread " << current_level_ << " to queue " << wait_level_; + } tasks_queue_.Enqueue(task); } diff --git a/mindspore/ccsrc/runtime/pipeline/pipeline.cc b/mindspore/ccsrc/runtime/pipeline/pipeline.cc index 36c8338ded33453b5f68fb825e99fabd48d35af9..40f5238379c928a07fac520654aa2ff5427f9276 100644 --- a/mindspore/ccsrc/runtime/pipeline/pipeline.cc +++ b/mindspore/ccsrc/runtime/pipeline/pipeline.cc @@ -27,6 +27,7 @@ Pipeline &Pipeline::Get() { Pipeline::Pipeline() : frontend_stage_( std::make_shared("frontend_queue", runtime::kThreadWaitLevel::kLevelFrontend)), - backend_stage_(std::make_shared("backend_device", kThreadWaitLevel::kLevelDevice)) {} + backend_stage_(std::make_shared("backend_queue", kThreadWaitLevel::kLevelBackend)), + launch_stage_(std::make_shared("launch_queue", kThreadWaitLevel::kLevelDevice)) {} } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/pipeline/pipeline.h b/mindspore/ccsrc/runtime/pipeline/pipeline.h index 03723c22bd0648c755cff5f557099f8e3976896b..5715ac98f8ab10645eca0f135799a57ec6be0d58 100644 --- a/mindspore/ccsrc/runtime/pipeline/pipeline.h +++ b/mindspore/ccsrc/runtime/pipeline/pipeline.h @@ -28,6 +28,7 @@ class BACKEND_EXPORT Pipeline { const AsyncRQueuePtr &frontend_stage() const { return frontend_stage_; } const AsyncRQueuePtr &backend_stage() const { return backend_stage_; } + const AsyncRQueuePtr &launch_stage() const { return launch_stage_; } private: Pipeline(); @@ -36,8 +37,10 @@ class BACKEND_EXPORT Pipeline { // Infer and create output tensor. AsyncRQueuePtr frontend_stage_; - // Malloc and launch kernels. + // Malloc and free. AsyncRQueuePtr backend_stage_; + // Launch kernel. + AsyncRQueuePtr launch_stage_; }; } // namespace runtime } // namespace mindspore diff --git a/mindspore/ccsrc/runtime/pipeline/task/device_task.cc b/mindspore/ccsrc/runtime/pipeline/task/device_task.cc index 51a0eba3346d625532e958f385048cea3a5ee65f..ecba3f9bfc08202206d0f5694351b663811d74fc 100644 --- a/mindspore/ccsrc/runtime/pipeline/task/device_task.cc +++ b/mindspore/ccsrc/runtime/pipeline/task/device_task.cc @@ -17,6 +17,7 @@ #include #include "include/common/profiler.h" #include "runtime/pipeline/task/device_task.h" +#include "runtime/pipeline/pipeline.h" namespace mindspore { namespace runtime { @@ -35,11 +36,21 @@ DeviceOpRunTask::~DeviceOpRunTask() { context_->op_compiler_info()->UpdateStatus void DeviceOpRunTask::Run() { runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyNativeDeviceTask, context_->op_run_info()->base_op_run_info.op_name, false); + Pipeline::Get().launch_stage()->Wait(); MS_EXCEPTION_IF_NULL(run_func_); run_func_(context_); run_func_ = nullptr; } +void DeviceLaunchTask::Run() { + if (run_func_) { + run_func_(); + } else { + MS_LOG(EXCEPTION) << "No run function!"; + } + run_func_ = nullptr; +} + void PyBoostDeviceTask::Run() { runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyNativeDeviceTask, kProfilerNamePyboost, false); @@ -54,6 +65,7 @@ void PyBoostDeviceTask::Run() { void PassthroughDeviceTask::Run() { runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyNativeDeviceTask, runtime::ProfilerRecorder::kNoName, false); + Pipeline::Get().launch_stage()->Wait(); run_func_(); } } // namespace runtime diff --git a/mindspore/ccsrc/runtime/pipeline/task/device_task.h b/mindspore/ccsrc/runtime/pipeline/task/device_task.h index ea7153a423a08bb0f4d9b15a4f74e1ab8fbba908..f027f7c2eb1aa5eb87407544837fe93ad56259b1 100644 --- a/mindspore/ccsrc/runtime/pipeline/task/device_task.h +++ b/mindspore/ccsrc/runtime/pipeline/task/device_task.h @@ -90,6 +90,17 @@ class BACKEND_EXPORT PyBoostDeviceTask : public AsyncTask { std::function run_func_; }; +class BACKEND_EXPORT DeviceLaunchTask : public AsyncTask { + public: + explicit DeviceLaunchTask(std::function run_func) : AsyncTask(kKernelTask), run_func_(std::move(run_func)) {} + ~DeviceLaunchTask() = default; + + void Run() override; + + private: + std::function run_func_; +}; + class BACKEND_EXPORT PassthroughDeviceTask : public AsyncTask { public: explicit PassthroughDeviceTask(std::function run_func) diff --git a/mindspore/ccsrc/runtime/pynative/op_executor.cc b/mindspore/ccsrc/runtime/pynative/op_executor.cc index 9bb8f0576926d0ea1a7bbc906232ec614bbf9616..a57a959e4dcfec1becc7d4eb06c4f48794ed8966 100644 --- a/mindspore/ccsrc/runtime/pynative/op_executor.cc +++ b/mindspore/ccsrc/runtime/pynative/op_executor.cc @@ -33,11 +33,15 @@ void OpExecutor::RegisterForwardCallback(const std::function &callback) tensor::Tensor::RegisterLazyCallback([]() { OpExecutor::GetInstance().WaitAll(); }); } -void OpExecutor::Reset() { runtime::Pipeline::Get().backend_stage()->Reset(); } +void OpExecutor::Reset() { + runtime::Pipeline::Get().backend_stage()->Reset(); + runtime::Pipeline::Get().launch_stage()->Reset(); +} void OpExecutor::WaitForRun() { MS_LOG(DEBUG) << "Start"; runtime::Pipeline::Get().backend_stage()->Wait(); + runtime::Pipeline::Get().launch_stage()->Wait(); MS_LOG(DEBUG) << "All task finish"; } @@ -74,6 +78,17 @@ bool OpExecutor::RunQueueEmpty() { return runtime::Pipeline::Get().backend_stage void OpExecutor::WorkerJoin() { GilReleaseWithCheck release_gil; runtime::Pipeline::Get().backend_stage()->WorkerJoin(); + runtime::Pipeline::Get().launch_stage()->WorkerJoin(); +} + +void OpExecutor::DispatchLaunchTask(const std::function &func) { + if (NeedSync()) { + runtime::OpExecutor::GetInstance().WaitAll(); + func(); + } else { + auto task = std::make_shared([=]() { func(); }); + runtime::Pipeline::Get().launch_stage()->Push(task); + } } bool OpExecutor::NeedSync() { @@ -87,6 +102,7 @@ void OpExecutor::ChildAfterFork() { MS_LOG(DEBUG) << "OpExecutor reinitialize after fork"; MS_LOG(DEBUG) << "Reinitialize async_queue_."; runtime::Pipeline::Get().backend_stage()->ChildAfterFork(); + runtime::Pipeline::Get().launch_stage()->ChildAfterFork(); // Refresh the lazy callback in Tensor. tensor::Tensor::RegisterLazyCallback([]() { OpExecutor::GetInstance().WaitAll(); }); MS_LOG(DEBUG) << "OpExecutor reinitialize after fork done."; diff --git a/mindspore/ccsrc/runtime/pynative/op_executor.h b/mindspore/ccsrc/runtime/pynative/op_executor.h index 7d09ad14396b04dc8b4348ad4ada1a879be4ac64..8907c26b0c4665d8a3c3b0cc52c1cbf40c362e25 100644 --- a/mindspore/ccsrc/runtime/pynative/op_executor.h +++ b/mindspore/ccsrc/runtime/pynative/op_executor.h @@ -65,6 +65,8 @@ class BACKEND_EXPORT OpExecutor { static bool NeedSync(); + static void DispatchLaunchTask(const std::function &func); + private: OpExecutor(); ~OpExecutor(); diff --git a/mindspore/ccsrc/runtime/pynative/op_runner.cc b/mindspore/ccsrc/runtime/pynative/op_runner.cc index 35e366161fc636cf06eb952a546c50c6fc9e707f..6aed9654b730579a4e8b92e429f3259146bd23f3 100644 --- a/mindspore/ccsrc/runtime/pynative/op_runner.cc +++ b/mindspore/ccsrc/runtime/pynative/op_runner.cc @@ -631,6 +631,8 @@ void LaunchKernels(const KernelGraphPtr &graph, const device::DeviceContext *dev stream)) { MS_LOG(EXCEPTION) << "Launch kernel failed, name:" << node->fullname_with_scope(); } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(op_run_info->base_op_run_info.op_name, device_context, + stream_id, inputs, outputs); } MS_LOG(DEBUG) << "End"; } @@ -928,6 +930,8 @@ void DynamicOpRunner::RunSingleOpGraph(const session::BackendOpRunInfoPtr &op_ru UpdateOutputShape(output_edges); } } + runtime::DeviceAddressUtils::ProcessCrossStreamAddress(op_run_info->base_op_run_info.op_name, device_context, + stream_id, input_kernel_tensors, output_kernel_tensors); } } @@ -966,6 +970,7 @@ void DynamicOpRunner::UpdateInputDeviceAddress(const OpCompilerInfoPtr &op_compi auto new_device_address = DeviceAddressUtils::ConvertContiguousDeviceAddress(device_context, device_address, is_sync); input_edge->address_ = new_device_address; + input_tensor->set_device_address(new_device_address); } else { // Always use tensor address as kernel address. input_edge->address_ = device_address; diff --git a/mindspore/ccsrc/transform/acl_ir/acl_declare/optimizer.cc b/mindspore/ccsrc/transform/acl_ir/acl_declare/optimizer.cc index 33a54b11f2ec4f5566049d3c826be86721ebae63..d5f4c08a72462c5a30d8d316600760638c7573a6 100644 --- a/mindspore/ccsrc/transform/acl_ir/acl_declare/optimizer.cc +++ b/mindspore/ccsrc/transform/acl_ir/acl_declare/optimizer.cc @@ -19,6 +19,7 @@ namespace mindspore { namespace transform { REGISTER_ACL_OP(AdamApplyOneWithDecay).set_run_mode(false); +REGISTER_ACL_OP(AdamApplyOneWithDecayAssign).set_run_mode(false); REGISTER_ACL_OP(ApplyAdaMaxD).set_run_mode(false); REGISTER_ACL_OP(ApplyMomentum).set_run_mode(false); REGISTER_ACL_OP(ApplyMomentumD).set_run_mode(false); diff --git a/mindspore/ccsrc/transform/acl_ir/op_api_util.cc b/mindspore/ccsrc/transform/acl_ir/op_api_util.cc index 437b1167cbdde10613de3ab82a5e66a6c9870a26..ec938726a975afae9495f49d7072cc7807b6f254 100644 --- a/mindspore/ccsrc/transform/acl_ir/op_api_util.cc +++ b/mindspore/ccsrc/transform/acl_ir/op_api_util.cc @@ -24,7 +24,7 @@ #include "include/common/utils/utils.h" #include "ops/math_op_name.h" #include "utils/ms_context.h" -#include "transform/symbol/acl_base_symbol.h" +#include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/acl_compiler_symbol.h" #include "transform/symbol/symbol_utils.h" diff --git a/mindspore/ccsrc/transform/graph_ir/convert.cc b/mindspore/ccsrc/transform/graph_ir/convert.cc index 84a134614d43a57b79737922ebc7d8ae76b7573d..289b658f3a97ccdc918ac1b5e8ea6bc7b0a06167 100644 --- a/mindspore/ccsrc/transform/graph_ir/convert.cc +++ b/mindspore/ccsrc/transform/graph_ir/convert.cc @@ -742,9 +742,15 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) { // Momentum's accum parameter at last auto cmp = std::bind(ParamCompare, std::placeholders::_1, std::placeholders::_2, std::cref(params_), graph_manager_->node_users()); - std::map ordered_tensors(tensors.begin(), tensors.end(), cmp); - for (const auto &it : ordered_tensors) { - std::string name = it.first; + std::map, decltype(cmp)> ordered_tensors(cmp); + // NOTE: the sequence of parameters of init DfGraph is calculated by TensorOrderMap, see method `GetInputTensors` + // defined in `mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_graph_executor.cc` + for (auto &it : tensors) { + ordered_tensors.insert({it.first, {index++, it.second}}); + } + for (const auto &itor : ordered_tensors) { + std::string name = itor.first; + auto &it = itor.second; auto node_itor = params_.find(name); // if name not in params_, create a node in graph if (node_itor == params_.end()) { @@ -815,8 +821,7 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) { } auto param_op = adpt->generate(name + "_data"); if (it.second->is_init() == 0) { - SetXDataIndex(param_op, index); - index++; + SetXDataIndex(param_op, it.first); ProcessInputData(&init_input, &infer_need_update_parameter_names, param_op, name, desc); } diff --git a/mindspore/ccsrc/transform/graph_ir/custom_op_proto/msda_ops.h b/mindspore/ccsrc/transform/graph_ir/custom_op_proto/msda_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..04e7ec3ac763d7fdfcfb0dcbbd59bbe40aee9908 --- /dev/null +++ b/mindspore/ccsrc/transform/graph_ir/custom_op_proto/msda_ops.h @@ -0,0 +1,47 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_GRAPH_IR_CUSTOM_OP_PROTO_MSDA_OPS_H_ +#define MINDSPORE_CCSRC_GRAPH_IR_CUSTOM_OP_PROTO_MSDA_OPS_H_ + +#include "graph/operator_reg.h" +#include "graph/operator.h" + +/* clang-format off */ + +namespace ge { +REG_OP(MultiScaleDeformableAttentionV2Grad) + .INPUT(value, ge::TensorType::ALL()) + .INPUT(spatial_shapes, ge::TensorType::ALL()) + .INPUT(level_start_index, ge::TensorType::ALL()) + .INPUT(sampling_loc, ge::TensorType::ALL()) + .INPUT(attn_weight, ge::TensorType::ALL()) + .INPUT(grad_output, ge::TensorType::ALL()) + .OUTPUT(grad_value, ge::TensorType::ALL()) + .OUTPUT(grad_sampling_loc, ge::TensorType::ALL()) + .OUTPUT(grad_attn_weight, ge::TensorType::ALL()) + .OP_END_FACTORY_REG(MultiScaleDeformableAttentionV2Grad); + +REG_OP(MultiScaleDeformableAttnFunctionV2) + .INPUT(value, ge::TensorType::ALL()) + .INPUT(value_spatial_shapes, ge::TensorType::ALL()) + .INPUT(value_level_start_index, ge::TensorType::ALL()) + .INPUT(sampling_locations, ge::TensorType::ALL()) + .INPUT(attention_weights, ge::TensorType::ALL()) + .OUTPUT(output, ge::TensorType::ALL()) + .OP_END_FACTORY_REG(MultiScaleDeformableAttnFunctionV2); +} // namespace ge +#endif // MINDSPORE_CCSRC_GRAPH_IR_CUSTOM_OP_PROTO_MSDA_OPS_H_ diff --git a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h index 020a9f2bedcc3db03a94db87aa8238dac896b0e5..9a822ee6595cfbb81eff3a654ccec95add47862f 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h +++ b/mindspore/ccsrc/transform/graph_ir/op_adapter_map.h @@ -531,6 +531,8 @@ constexpr const char kNameAllGatherMatmul[] = "AllGatherMatmul"; constexpr const char kSilentCheck[] = "SilentCheck"; constexpr const char kNameUniformCandidateSampler[] = "UniformCandidateSampler"; constexpr const char kNameAllFinite[] = "AllFinite"; +constexpr const char kNameMultiScaleDeformableAttnFunctionV2[] = "MultiScaleDeformableAttnFunctionV2"; +constexpr const char kNameMultiScaleDeformableAttentionV2Grad[] = "MultiScaleDeformableAttentionV2Grad"; class OpAdapterDesc; diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.cc b/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.cc index 4e23c334f4de4ac71eea63ed1c1d622f8a19d822..cc0e832df3a24b42d2f491bdbaa412be71c60aca 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.cc +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.cc @@ -317,4 +317,25 @@ INPUT_ATTR_MAP(EmbeddingDenseGrad) = {{kIndex3, ATTR_DESC(num_weights, AnyTraits ATTR_MAP(EmbeddingDenseGrad) = EMPTY_ATTR_MAP; OUTPUT_MAP(EmbeddingDenseGrad) = {{0, OUTPUT_DESC(y)}}; REG_ADPT_DESC(EmbeddingDenseBackward, ops::kNameEmbeddingDenseBackward, ADPT_DESC(EmbeddingDenseGrad)) + +// MultiScaleDeformableAttnFunctionV2 +INPUT_MAP(MultiScaleDeformableAttnFunctionV2) = {{1, INPUT_DESC(value)}, + {2, INPUT_DESC(value_spatial_shapes)}, + {3, INPUT_DESC(value_level_start_index)}, + {4, INPUT_DESC(sampling_locations)}, + {5, INPUT_DESC(attention_weights)}}; +ATTR_MAP(MultiScaleDeformableAttnFunctionV2) = EMPTY_ATTR_MAP; +OUTPUT_MAP(MultiScaleDeformableAttnFunctionV2) = {{0, OUTPUT_DESC(output)}}; +REG_ADPT_DESC(MultiScaleDeformableAttnFunctionV2, kNameMultiScaleDeformableAttnFunctionV2, + ADPT_DESC(MultiScaleDeformableAttnFunctionV2)) + +// MultiScaleDeformableAttentionV2Grad +INPUT_MAP(MultiScaleDeformableAttentionV2Grad) = { + {1, INPUT_DESC(value)}, {2, INPUT_DESC(spatial_shapes)}, {3, INPUT_DESC(level_start_index)}, + {4, INPUT_DESC(sampling_loc)}, {5, INPUT_DESC(attn_weight)}, {6, INPUT_DESC(grad_output)}}; +ATTR_MAP(MultiScaleDeformableAttentionV2Grad) = EMPTY_ATTR_MAP; +OUTPUT_MAP(MultiScaleDeformableAttentionV2Grad) = { + {0, OUTPUT_DESC(grad_value)}, {1, OUTPUT_DESC(grad_sampling_loc)}, {2, OUTPUT_DESC(grad_attn_weight)}}; +REG_ADPT_DESC(MultiScaleDeformableAttentionV2Grad, kNameMultiScaleDeformableAttentionV2Grad, + ADPT_DESC(MultiScaleDeformableAttentionV2Grad)) } // namespace mindspore::transform diff --git a/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.h b/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.h index 3cb554f68defa725a0720a7f3fc06b19778f3a52..c52e446cfb8187b97bd44a9ea5f1f22d4d174b2c 100644 --- a/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.h +++ b/mindspore/ccsrc/transform/graph_ir/op_declare/nn_calculation_ops_declare.h @@ -20,6 +20,7 @@ #include "op_proto/inc/rnn.h" #include "transform/graph_ir/op_declare/op_declare_macro.h" #include "transform/graph_ir/custom_op_proto/wkv_ops.h" +#include "transform/graph_ir/custom_op_proto/msda_ops.h" #include "utils/hash_map.h" DECLARE_OP_ADAPTER(BiasAddGrad) @@ -94,4 +95,10 @@ DECLARE_OP_USE_OUTPUT(Conv2DTranspose) DECLARE_OP_ADAPTER(EmbeddingDenseGrad) DECLARE_OP_USE_OUTPUT(EmbeddingDenseGrad) + +DECLARE_OP_ADAPTER(MultiScaleDeformableAttnFunctionV2) +DECLARE_OP_USE_OUTPUT(MultiScaleDeformableAttnFunctionV2) + +DECLARE_OP_ADAPTER(MultiScaleDeformableAttentionV2Grad) +DECLARE_OP_USE_OUTPUT(MultiScaleDeformableAttentionV2Grad) #endif // MINDSPORE_CCSRC_TRANSFORM_GRAPH_IR_OP_DECLARE_NN_CALCULATION_OPS_DECLARE_H_ diff --git a/mindspore/ccsrc/transform/symbol/acl_base_symbol.cc b/mindspore/ccsrc/transform/symbol/acl_base_symbol.cc index 459018004cc179359bcb10d7af1bd23f9648afd2..92fd1faca74c6f20cd6aca0a2805b3d138b04b12 100644 --- a/mindspore/ccsrc/transform/symbol/acl_base_symbol.cc +++ b/mindspore/ccsrc/transform/symbol/acl_base_symbol.cc @@ -31,7 +31,6 @@ aclSetTensorDescNameFunObj aclSetTensorDescName_ = nullptr; aclSetTensorFormatFunObj aclSetTensorFormat_ = nullptr; aclSetTensorPlaceMentFunObj aclSetTensorPlaceMent_ = nullptr; aclSetTensorShapeFunObj aclSetTensorShape_ = nullptr; -aclrtGetSocNameFunObj aclrtGetSocName_ = nullptr; void LoadAclBaseApiSymbol(const std::string &ascend_path) { std::string aclbase_plugin_path = "lib64/libascendcl.so"; @@ -52,7 +51,6 @@ void LoadAclBaseApiSymbol(const std::string &ascend_path) { aclSetTensorFormat_ = DlsymAscendFuncObj(aclSetTensorFormat, base_handler); aclSetTensorPlaceMent_ = DlsymAscendFuncObj(aclSetTensorPlaceMent, base_handler); aclSetTensorShape_ = DlsymAscendFuncObj(aclSetTensorShape, base_handler); - aclrtGetSocName_ = DlsymAscendFuncObj(aclrtGetSocName, base_handler); MS_LOG(INFO) << "Load acl base api success!"; } diff --git a/mindspore/ccsrc/transform/symbol/acl_base_symbol.h b/mindspore/ccsrc/transform/symbol/acl_base_symbol.h index e59111df3048f96b375fe61ab3e94d385cf8ebe4..7774e81cc0942c1a5ebb4bfbe2f84345abb6a1c4 100644 --- a/mindspore/ccsrc/transform/symbol/acl_base_symbol.h +++ b/mindspore/ccsrc/transform/symbol/acl_base_symbol.h @@ -34,7 +34,6 @@ ORIGIN_METHOD(aclSetTensorDescName, void, aclTensorDesc *, const char *) ORIGIN_METHOD(aclSetTensorFormat, aclError, aclTensorDesc *, aclFormat) ORIGIN_METHOD(aclSetTensorPlaceMent, aclError, aclTensorDesc *, aclMemType) ORIGIN_METHOD(aclSetTensorShape, aclError, aclTensorDesc *, int, const int64_t *) -ORIGIN_METHOD(aclrtGetSocName, const char *) extern aclCreateDataBufferFunObj aclCreateDataBuffer_; extern aclCreateTensorDescFunObj aclCreateTensorDesc_; @@ -48,7 +47,6 @@ extern aclSetTensorDescNameFunObj aclSetTensorDescName_; extern aclSetTensorFormatFunObj aclSetTensorFormat_; extern aclSetTensorPlaceMentFunObj aclSetTensorPlaceMent_; extern aclSetTensorShapeFunObj aclSetTensorShape_; -extern aclrtGetSocNameFunObj aclrtGetSocName_; void LoadAclBaseApiSymbol(const std::string &ascend_path); } // namespace transform diff --git a/mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc b/mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc index 69491ec3fa4c896fba6cbd664e166e7525c34128..b859ab08329b9130559ee07c0cd3a7e3bceb68a0 100644 --- a/mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc +++ b/mindspore/ccsrc/transform/symbol/acl_rt_symbol.cc @@ -63,6 +63,7 @@ aclrtSubscribeReportFunObj aclrtSubscribeReport_ = nullptr; aclrtSynchronizeEventFunObj aclrtSynchronizeEvent_ = nullptr; aclrtSynchronizeStreamFunObj aclrtSynchronizeStream_ = nullptr; aclrtSynchronizeStreamWithTimeoutFunObj aclrtSynchronizeStreamWithTimeout_ = nullptr; +aclrtGetSocNameFunObj aclrtGetSocName_ = nullptr; void LoadAclRtApiSymbol(const std::string &ascend_path) { std::string aclrt_plugin_path = ascend_path + "lib64/libascendcl.so"; @@ -115,6 +116,7 @@ void LoadAclRtApiSymbol(const std::string &ascend_path) { aclrtSynchronizeEvent_ = DlsymAscendFuncObj(aclrtSynchronizeEvent, handler); aclrtSynchronizeStream_ = DlsymAscendFuncObj(aclrtSynchronizeStream, handler); aclrtSynchronizeStreamWithTimeout_ = DlsymAscendFuncObj(aclrtSynchronizeStreamWithTimeout, handler); + aclrtGetSocName_ = DlsymAscendFuncObj(aclrtGetSocName, handler); MS_LOG(INFO) << "Load acl rt api success!"; } diff --git a/mindspore/ccsrc/transform/symbol/acl_rt_symbol.h b/mindspore/ccsrc/transform/symbol/acl_rt_symbol.h index f6234fa258e91b52f3c5218a6a97d2ed60bc4972..d88111c13ef463dd2ade1975bafdf29fa016f407 100644 --- a/mindspore/ccsrc/transform/symbol/acl_rt_symbol.h +++ b/mindspore/ccsrc/transform/symbol/acl_rt_symbol.h @@ -66,6 +66,7 @@ ORIGIN_METHOD(aclrtSubscribeReport, aclError, uint64_t, aclrtStream) ORIGIN_METHOD(aclrtSynchronizeEvent, aclError, aclrtEvent) ORIGIN_METHOD(aclrtSynchronizeStream, aclError, aclrtStream) ORIGIN_METHOD(aclrtSynchronizeStreamWithTimeout, aclError, aclrtStream, int32_t) +ORIGIN_METHOD(aclrtGetSocName, const char *) extern aclrtCreateContextFunObj aclrtCreateContext_; extern aclrtCreateEventFunObj aclrtCreateEvent_; @@ -111,6 +112,7 @@ extern aclrtSubscribeReportFunObj aclrtSubscribeReport_; extern aclrtSynchronizeEventFunObj aclrtSynchronizeEvent_; extern aclrtSynchronizeStreamFunObj aclrtSynchronizeStream_; extern aclrtSynchronizeStreamWithTimeoutFunObj aclrtSynchronizeStreamWithTimeout_; +extern aclrtGetSocNameFunObj aclrtGetSocName_; void LoadAclRtApiSymbol(const std::string &ascend_path); } // namespace transform diff --git a/mindspore/core/ir/func_graph.cc b/mindspore/core/ir/func_graph.cc index 1a68b462bc41cf66b888ac7d8bb67bf0dcac7c4c..c297c1bc0a9432c83ba3557b180e3e7bd2bd98c3 100644 --- a/mindspore/core/ir/func_graph.cc +++ b/mindspore/core/ir/func_graph.cc @@ -659,7 +659,7 @@ void FuncGraph::SetDefaultValues(const std::vector &name_list, cons void FuncGraph::ClearDefaultValues() { parameter_default_value_.clear(); } -size_t FuncGraph::GetDefaultValueCount() { +size_t FuncGraph::GetDefaultValueCount() const { int64_t null_count = std::count_if(parameter_default_value_.begin(), parameter_default_value_.end(), [](const std::pair &pair) { return IsValueNode(pair.second); }); diff --git a/mindspore/core/ir/func_graph.h b/mindspore/core/ir/func_graph.h index 7091b0472fda8bc117656ba2d26ec5d294174736..76763a1d6d2bdab3d9696177cc001f997fe79295 100644 --- a/mindspore/core/ir/func_graph.h +++ b/mindspore/core/ir/func_graph.h @@ -103,6 +103,7 @@ const char FUNC_GRAPH_FLAG_DYNAMIC_SHAPE[] = "dynamic_shape"; const char FUNC_GRAPH_FLAG_NO_RECURSIVE[] = "no_recursive"; const char FUNC_GRAPH_FLAG_ARGS_NO_EXPAND[] = "args_no_expand"; const char FUNC_GRAPH_FLAG_PROXY_GRAPH[] = "proxy_graph"; +const char FUNC_GRAPH_FLAG_NO_CHILD_GRAPH[] = "no_child_graph"; const char kFuncGraphFlagUndetermined[] = "undeterminate"; const char kFuncGraphFlagBackPropEntry[] = "back_prop_entry"; @@ -180,7 +181,7 @@ class MS_CORE_API FuncGraph : public FuncGraphBase, public EffectInfoHolder { } void SetDefaultValues(const std::vector &name_list, const AnfNodePtrList &value_list); void ClearDefaultValues(); - size_t GetDefaultValueCount(); + size_t GetDefaultValueCount() const; std::map ¶meter_default_value() { return parameter_default_value_; } void set_has_vararg(bool has_) { has_vararg_ = has_; } bool has_vararg() const { return has_vararg_; } diff --git a/mindspore/core/ir/func_graph_cloner.cc b/mindspore/core/ir/func_graph_cloner.cc index 85567844c214a3e7f822b2091757166e22dff684..f487267ede7905ffdf24ea2bbf90ee53e6baafcd 100644 --- a/mindspore/core/ir/func_graph_cloner.cc +++ b/mindspore/core/ir/func_graph_cloner.cc @@ -200,6 +200,10 @@ void Cloner::AddChildGraphs(const FuncGraphPtr &func_graph) { if (!clone_all_child_graphs_) { return; } + // The graph marked 'no_child_graph' has no child graph. + if (func_graph->has_flag(FUNC_GRAPH_FLAG_NO_CHILD_GRAPH)) { + return; + } auto &scopes = manager_->scopes(func_graph); std::set memo; for (auto &graph : scopes) { diff --git a/mindspore/core/ir/func_graph_extends.cc b/mindspore/core/ir/func_graph_extends.cc index 0ed7cfa584f4833e429c78425efe6dd5c9e8971c..e5098a6f42e5e3ddf477c72891647d6f86fab820 100644 --- a/mindspore/core/ir/func_graph_extends.cc +++ b/mindspore/core/ir/func_graph_extends.cc @@ -91,12 +91,13 @@ void FuncGraph::GenerateVarParams(const FuncGraphPtr &specialized_graph, int var } // If there is variable argument. - if (variable_args_count < 0) { + if (variable_args_count + GetDefaultValueCount() < 0) { MS_LOG(EXCEPTION) << "For function:" << this->ToString() << ", its argument size: " << pos_args_input_count - << " is less or equal to parameter size: " << GetPositionalArgsCount(); + << " is less than parameter size: " << GetPositionalArgsCount(); } + int count_num = variable_args_count < 0 ? pos_args_input_count : GetPositionalArgsCount(); // Copy other parameters than vararg's firstly. - for (size_t i = 0; i < IntToSize(GetPositionalArgsCount()); ++i) { + for (size_t i = 0; i < IntToSize(count_num); ++i) { specialized_parameter_list->push_back(specialized_graph->parameters()[i]); } MS_EXCEPTION_IF_NULL(specialized_graph->GetVariableArgParameter()); diff --git a/mindspore/core/ops/adam_apply_one_with_decay.cc b/mindspore/core/ops/adam_apply_one_with_decay.cc index 2d9a74f907eae1f6981c5867c0974c65a7716724..9332b6fced2a1e7716d324136aa168dec824a79f 100644 --- a/mindspore/core/ops/adam_apply_one_with_decay.cc +++ b/mindspore/core/ops/adam_apply_one_with_decay.cc @@ -129,7 +129,16 @@ class MIND_API AdamApplyOneWithDecay : public BaseOperator { AdamApplyOneWithDecay() : BaseOperator("AdamApplyOneWithDecay") {} }; +class MIND_API AdamApplyOneWithDecayAssign : public BaseOperator { + public: + MIND_API_BASE_MEMBER(AdamApplyOneWithDecayAssign); + /// \brief Constructor. + AdamApplyOneWithDecayAssign() : BaseOperator("AdamApplyOneWithDecayAssign") {} +}; + REGISTER_PRIMITIVE_OP_INFER_IMPL(AdamApplyOneWithDecay, prim::kPrimAdamApplyOneWithDecay, AdamApplyOneWithDecayInfer, false); +REGISTER_PRIMITIVE_OP_INFER_IMPL(AdamApplyOneWithDecayAssign, prim::kPrimAdamApplyOneWithDecayAssign, + AdamApplyOneWithDecayInfer, false); } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/fusion/avg_pool_fusion.h b/mindspore/core/ops/fusion/avg_pool_fusion.h index 8c8518f7471ad034d70eca7d4285c7e39d9879cc..400138eb46fa7cd0bf529b12b627fa6fed4d87cd 100644 --- a/mindspore/core/ops/fusion/avg_pool_fusion.h +++ b/mindspore/core/ops/fusion/avg_pool_fusion.h @@ -45,7 +45,7 @@ class MIND_API AvgPoolFusion : public BaseOperator { /// \param[in] activation_type Define the activation type. void Init(const std::vector &kernel_size = {1}, const std::vector &stride = {1}, const PadMode &pad_mode = VALID, const Format &format = NCHW, - const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = FLOOR, + const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = RoundMode::FLOOR, const bool global = false, const ActivationType activation_type = NO_ACTIVATION); /// \brief Set pad_mode. diff --git a/mindspore/core/ops/fusion/max_pool_fusion.h b/mindspore/core/ops/fusion/max_pool_fusion.h index c7c3d05261ffd7bf840f5ae10e623da50c914a56..1d5e252aa05cd58cda90a8fadfe7ab8a31f86a0d 100644 --- a/mindspore/core/ops/fusion/max_pool_fusion.h +++ b/mindspore/core/ops/fusion/max_pool_fusion.h @@ -45,7 +45,7 @@ class MIND_API MaxPoolFusion : public MaxPool { /// \param[in] activation_type Define the activation type. void Init(const std::vector &kernel_size = {1}, const std::vector &stride = {1}, const PadMode &pad_mode = VALID, const Format &format = NCHW, - const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = FLOOR, + const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = RoundMode::FLOOR, const bool global = false, const ActivationType activation_type = NO_ACTIVATION); /// \brief Method to set global attribute. diff --git a/mindspore/core/ops/math_op_name.h b/mindspore/core/ops/math_op_name.h index 6b5f41f0fc8bf17c2a47370d5b7850c26ada5f9e..412abaf4515f2c5fa368791bb7851991cc1de351 100644 --- a/mindspore/core/ops/math_op_name.h +++ b/mindspore/core/ops/math_op_name.h @@ -114,6 +114,7 @@ constexpr auto kCumsumDOpName = "CumsumD"; constexpr auto kCumSumOpName = "CumSum"; constexpr auto kDigammaOpName = "Digamma"; constexpr auto kDivOpName = "Div"; +constexpr auto kDivModOpName = "DivMod"; constexpr auto kEigOpName = "Eig"; constexpr auto kEuclideanNormDOpName = "EuclideanNormD"; constexpr auto kExpm1OpName = "Expm1"; diff --git a/mindspore/core/ops/max_pool.h b/mindspore/core/ops/max_pool.h index 5e6ea2fbf7b085333c55fbcefb977638a7a641e8..eb06674d7310989a3562874867db94fa5b15e6fd 100644 --- a/mindspore/core/ops/max_pool.h +++ b/mindspore/core/ops/max_pool.h @@ -39,7 +39,7 @@ class MIND_API MaxPool : public BaseOperator { /// \brief Init. Refer to the parameters of Python API @ref mindspore.ops.MaxPool for the inputs. void Init(const std::vector &kernel_size = {1}, const std::vector &stride = {1}, const PadMode &pad_mode = VALID, const Format &format = NCHW, - const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = FLOOR); + const std::vector &pad = {0, 0, 0, 0}, const RoundMode &round_mode = RoundMode::FLOOR); /// \brief Set pad_mode. void set_pad_mode(const PadMode &pad_mode); /// \brief Set kernel_size. diff --git a/mindspore/core/ops/op_enum.cc b/mindspore/core/ops/op_enum.cc index d955ab17489bdf52457df2cf77412a3bccfd4943..f201883727b92938816208945ac39f53373dd792 100644 --- a/mindspore/core/ops/op_enum.cc +++ b/mindspore/core/ops/op_enum.cc @@ -71,6 +71,10 @@ inline std::unordered_map GetStringToFormatMap() { } REG_STRING_TO_ENUM(format, GetStringToFormatMap()) +// RoundingMode +StrToEnumMap StrToRoundingModeMap = {{"FLOOR", RoundingMode::FLOOR}, {"TRUNC", RoundingMode::TRUNC}}; +REG_STRING_TO_ENUM(rounding_mode, StrToRoundingModeMap) + // PadMode StrToEnumMap StrToPadModeMap = { {"PAD", PadMode::PAD}, {"SAME", PadMode::SAME}, {"VALID", PadMode::VALID}, {"FULL", PadMode::FULL}}; diff --git a/mindspore/core/ops/op_enum.h b/mindspore/core/ops/op_enum.h index fb2751494c1132a8e0b688326124262cf8e9130d..6e5d34896113c9438f8ad5b7e780eb616f8f8d05 100644 --- a/mindspore/core/ops/op_enum.h +++ b/mindspore/core/ops/op_enum.h @@ -40,6 +40,8 @@ enum Group : int64_t { SYNC_BN_GROUP0 = 0 }; enum InterpolationMode : int64_t { BILINEAR = 0, NEAREST = 1 }; +enum RoundingMode : int64_t { TRUNC = 1, FLOOR = 2 }; + enum NormMode : int64_t { BACKWARD = 0, FORWARD = 1, ORTHO = 2 }; enum GridSamplerPaddingMode : int64_t { ZEROS = 0, BORDER = 1, REFLECTION = 2 }; diff --git a/mindspore/core/ops/op_name.h b/mindspore/core/ops/op_name.h index f7d2eee77986d03e383c46ff180c03feb91ac0bc..a957a0b386298282895d37d23ee280e2a0d7452d 100644 --- a/mindspore/core/ops/op_name.h +++ b/mindspore/core/ops/op_name.h @@ -222,6 +222,7 @@ constexpr auto kReduction = "reduction"; constexpr auto kRho = "rho"; constexpr auto kRootRank = "root_rank"; constexpr auto kRoundMode = "round_mode"; +constexpr auto kRoundingMode = "rounding_mode"; constexpr auto kRtol = "rtol"; constexpr auto kSame = "same"; constexpr auto kScale = "scale"; diff --git a/mindspore/core/ops/ops_def/abs_op.yaml b/mindspore/core/ops/ops_def/abs_op.yaml index 564156049825c190941d92fdee22e3126439cbeb..27769583bda71680948a16b941d116b2f457c346 100644 --- a/mindspore/core/ops/ops_def/abs_op.yaml +++ b/mindspore/core/ops/ops_def/abs_op.yaml @@ -6,3 +6,7 @@ abs: returns: output: dtype: tensor + class: + name: Abs + dispatch: + enable: True diff --git a/mindspore/core/ops/ops_def/convolution_grad_op.yaml b/mindspore/core/ops/ops_def/convolution_grad_op.yaml index 631f1bf13a115666fd94f01b050e97322543f183..c265ef68305d45bf931e0c7691abc44a7b017029 100644 --- a/mindspore/core/ops/ops_def/convolution_grad_op.yaml +++ b/mindspore/core/ops/ops_def/convolution_grad_op.yaml @@ -19,7 +19,7 @@ convolution_grad: dtype: tuple[int] default: 0 prim_init: True - arg_handler: to_paddings + arg_handler: to_2d_paddings dilation: dtype: tuple[int] default: 1 diff --git a/mindspore/core/ops/ops_def/convolution_op.yaml b/mindspore/core/ops/ops_def/convolution_op.yaml index 02dd06297e4eac0e5d1391d5f8f24463330a4deb..4304be074454c7566f45e530cfd750f0ae8b4d73 100644 --- a/mindspore/core/ops/ops_def/convolution_op.yaml +++ b/mindspore/core/ops/ops_def/convolution_op.yaml @@ -17,8 +17,7 @@ convolution: dtype: tuple[int] default: 0 prim_init: True - arg_handler: to_paddings - type_cast: list[int] + arg_handler: to_2d_paddings dilation: dtype: tuple[int] default: 1 diff --git a/mindspore/core/ops/ops_def/divmod_op.yaml b/mindspore/core/ops/ops_def/divmod_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e00fe4ce504d45166cef729b445470f4a8109d5 --- /dev/null +++ b/mindspore/core/ops/ops_def/divmod_op.yaml @@ -0,0 +1,27 @@ +#operator divmod +divmod: + args: + x: + dtype: tensor + type_cast: number + y: + dtype: tensor + type_cast: number + rounding_mode: + dtype: int + default: None + arg_handler: str_to_enum + args_signature: + dtype_group: (x, y), (rounding_mode) + returns: + output: + dtype: tensor + class: + name: DivMod + function: + disable: True + dispatch: + enable: True + Ascend: DivModAscend + GPU: DivModGPU + CPU: DivModCPU diff --git a/mindspore/core/ops/ops_def/doc/broadcast_to_doc.yaml b/mindspore/core/ops/ops_def/doc/broadcast_to_doc.yaml index 0502430b14ff2dc41e2a89e5ccb242fef24a97ca..cf35ed37b8bb96ef201a0b8029fec5de68ff56cd 100644 --- a/mindspore/core/ops/ops_def/doc/broadcast_to_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/broadcast_to_doc.yaml @@ -9,36 +9,36 @@ broadcast_to: :math:`x_1` and :math:`y_1` consecutively and decide whether these shapes are broadcastable and what the broadcast result is. - If the value pairs at a specific dim are equal, then that value goes right into that dim of output shape. - With an input shape :math:`(2, 3)`, target shape :math:`(2, 3)` , the inferred output shape is :math:`(2, 3)`. + - If the value pairs at a specific dim are equal, then that value goes right into that dim of output shape. + With an input shape :math:`(2, 3)`, target shape :math:`(2, 3)` , the inferred output shape is :math:`(2, 3)`. - If the value pairs are unequal, there are three cases: + - If the value pairs are unequal, there are three cases: - Case 1: If the value of the target shape in the dimension is -1, the value of the - output shape in the dimension is the value of the corresponding input shape in the dimension. - With an input shape :math:`(3, 3)`, target - shape :math:`(-1, 3)`, the output shape is :math:`(3, 3)`. + - Case 1: If the value of the target shape in the dimension is -1, the value of the + output shape in the dimension is the value of the corresponding input shape in the dimension. + With an input shape :math:`(3, 3)`, target + shape :math:`(-1, 3)`, the output shape is :math:`(3, 3)`. - Case 2: If the value of target shape in the dimension is not -1, but the corresponding - value in the input shape is 1, then the corresponding value of the output shape - is that of the target shape. With an input shape :math:`(1, 3)`, target - shape :math:`(8, 3)`, the output shape is :math:`(8, 3)`. + - Case 2: If the value of target shape in the dimension is not -1, but the corresponding + value in the input shape is 1, then the corresponding value of the output shape + is that of the target shape. With an input shape :math:`(1, 3)`, target + shape :math:`(8, 3)`, the output shape is :math:`(8, 3)`. - Case 3: If the corresponding values of the two shapes do not satisfy the above cases, - it means that broadcasting from the input shape to the target shape is not supported. + - Case 3: If the corresponding values of the two shapes do not satisfy the above cases, + it means that broadcasting from the input shape to the target shape is not supported. So far we got the last m dims of the outshape, now focus on the first :math:`*` dims, there are two cases: - If the first :math:`*` dims of output shape does not have -1 in it, then fill the input - shape with ones until their length are the same, and then refer to - Case 2 mentioned above to calculate the output shape. With target shape :math:`(3, 1, 4, 1, 5, 9)`, - input shape :math:`(1, 5, 9)`, the filled input shape will be :math:`(1, 1, 1, 1, 5, 9)` and thus the - output shape is :math:`(3, 1, 4, 1, 5, 9)`. + - If the first :math:`*` dims of output shape does not have -1 in it, then fill the input + shape with ones until their length are the same, and then refer to + Case 2 mentioned above to calculate the output shape. With target shape :math:`(3, 1, 4, 1, 5, 9)`, + input shape :math:`(1, 5, 9)`, the filled input shape will be :math:`(1, 1, 1, 1, 5, 9)` and thus the + output shape is :math:`(3, 1, 4, 1, 5, 9)`. - If the first :math:`*` dims of output shape have -1 in it, it implies this -1 is corresponding to - a non-existing dim so they're not broadcastable. With target shape :math:`(3, -1, 4, 1, 5, 9)`, - input shape :math:`(1, 5, 9)`, instead of operating the dim-filling process first, it raises errors directly. + - If the first :math:`*` dims of output shape have -1 in it, it implies this -1 is corresponding to + a non-existing dim so they're not broadcastable. With target shape :math:`(3, -1, 4, 1, 5, 9)`, + input shape :math:`(1, 5, 9)`, instead of operating the dim-filling process first, it raises errors directly. Args: input (Tensor): The input Tensor. diff --git a/mindspore/core/ops/ops_def/doc/erf_doc.yaml b/mindspore/core/ops/ops_def/doc/erf_doc.yaml index f175509a908c6f4794e77f3560cd5846c87198fe..734085171b1a4879020d0f9192225d2997ba86d8 100644 --- a/mindspore/core/ops/ops_def/doc/erf_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/erf_doc.yaml @@ -10,15 +10,20 @@ erf: input (Tensor): The input tensor of Gaussian error function. :math:`x` in the following formula. Supported dtypes: - - Ascend: float16, float32. + - Ascend: float16, float32, int64, bool. - GPU/CPU: float16, float32, float64. Returns: - Tensor, has the same shape and dtype as the `input`. + Tensor. If the input is int64 or bool, the return value type is float32. + Otherwise, the return value type is the same as the input type. + Raises: TypeError: If `input` is not a Tensor. - TypeError: If dtype of `input` is not float16, float32 or float64. + TypeError: If dtype of `input` is not as follows + + - Ascend: float16, float32, int64, bool. + - GPU/CPU: float16, float32, float64. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` diff --git a/mindspore/core/ops/ops_def/doc/max_pool_grad_with_indices_doc.yaml b/mindspore/core/ops/ops_def/doc/max_pool_grad_with_indices_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8f6fd18ba7e23a3f02b53949bae7b4a356f21019 --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/max_pool_grad_with_indices_doc.yaml @@ -0,0 +1,3 @@ +max_pool_grad_with_indices: + description: | + Gradients of the MaxPoolWithIndices operation. diff --git a/mindspore/core/ops/ops_def/doc/max_pool_grad_with_mask_doc.yaml b/mindspore/core/ops/ops_def/doc/max_pool_grad_with_mask_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ce043f4d2f0d88b9bcc77c57eaae5b007553d17 --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/max_pool_grad_with_mask_doc.yaml @@ -0,0 +1,3 @@ +max_pool_grad_with_mask: + description: | + Gradients of the MaxPoolWithMask operation. diff --git a/mindspore/core/ops/ops_def/doc/max_pool_with_indices_doc.yaml b/mindspore/core/ops/ops_def/doc/max_pool_with_indices_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c3d4a294486acbf69b97e22fb09b3f8b403d985f --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/max_pool_with_indices_doc.yaml @@ -0,0 +1,62 @@ +max_pool_with_indices: + description: | + Performs max pooling on the input Tensor and returns both max values and indices. + + Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs + regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size + :math:`(h_{ker}, w_{ker})` and stride :math:`(s_0, s_1)`, the operation is as follows: + + .. math:: + \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) + + .. warning:: + This is an experimental API that is subject to change or deletion. + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax + value, is an int number that represents height and width of the kernel, or a tuple of + two int numbers that represent height and width respectively. + strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents + not only the height of movement but also the width of movement, or a tuple of two int numbers that + represent height and width of movement respectively. Default: ``None`` , meaning that + `strides = kernel_size`. + pads (Union[int, tuple[int]], optional): An int number that represents the depth, + height and width of movement are both strides, or a tuple of two int numbers that represent + depth, height and width of movement respectively. + Default: 0. + dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. Default: ``(1, 1)`` . + ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` . + argmax_type (mindspore.dtype, optional) : The dtype for argmax. + Default: ``mstype.int64`` . [Disabled in Ascend.] + + Inputs: + - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of float32 in Ascend. + + Outputs: + Tuple of 2 Tensors, representing the maxpool result and where the max values are generated. + + - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`. + It has the same data type as `x`. + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{pads[0]} - \text{dilation[0]} + \times (\text{kernel_size[0]} - 1) - 1}{\text{strides[0]}} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{pads[1]} - \text{dilation[1]} + \times (\text{kernel_size[1]} - 1) - 1}{\text{strides[1]}} + 1\right\rfloor + + - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32 in Ascend. + + Raises: + TypeError: If `x` is not a Tensor. + ValueError: If length of shape of `x` is not equal to 4. + TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple. + ValueError: If `kernel_size`, `strides` or `dilation` is less than 1. + ValueError: If `pads` is less than 0. + ValueError: If `pads` is more than half of `kernel_size`. + TypeError: If `ceil_mode` is not bool. + + Supported Platforms: + ``Ascend910B`` diff --git a/mindspore/core/ops/ops_def/doc/max_pool_with_mask_doc.yaml b/mindspore/core/ops/ops_def/doc/max_pool_with_mask_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..68a096f8abfe9b4cd821991c7b0141519741b247 --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/max_pool_with_mask_doc.yaml @@ -0,0 +1,64 @@ +max_pool_with_mask: + description: | + Performs max pooling on the input Tensor and returns both max values and mask. + + Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool outputs + regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size + :math:`(h_{ker}, w_{ker})` and stride :math:`(s_0, s_1)`, the operation is as follows: + + .. math:: + \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) + + .. warning:: + This is an experimental API that is subject to change or deletion. + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and argmax + value, is an int number that represents height and width of the kernel, or a tuple of + two int numbers that represent height and width respectively. + strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents + not only the height of movement but also the width of movement, or a tuple of two int numbers that + represent height and width of movement respectively. Default: ``1``. + pads (Union[int, tuple[int]], optional): An int number that represents the depth, + height and width of movement are both strides, or a tuple of two int numbers that represent + depth, height and width of movement respectively. + Default: 0. + dilation (Union[int, tuple[int]], optional): Control the stride of elements in the kernel. + Default: ``(1, 1)`` . + ceil_mode (bool, optional): Whether to use ceil instead of floor to calculate output shape. + Default: ``False`` . + argmax_type (mindspore.dtype, optional) : The dtype for argmax. + Default: ``mstype.int64`` . [Disabled in Ascend.] + + Inputs: + - **x** (Tensor) - Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of float16 + and float32 in Ascend. + + Outputs: + Tuple of 2 Tensors, representing the maxpool result and mask are generated. + + - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`. + It has the same data type as `x`. + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{pads[0]} - \text{dilation[0]} + \times (\text{kernel_size[0]} - 1) - 1}{\text{strides[0]}} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{pads[1]} - \text{dilation[1]} + \times (\text{kernel_size[1]} - 1) - 1}{\text{strides[1]}} + 1\right\rfloor + + - **mask** (Tensor) - Maxpooling mask. Data type is int8 in Ascend. + + Raises: + TypeError: If `x` is not a Tensor. + ValueError: If length of shape of `x` is not equal to 4. + TypeError: If `kernel_size` , `strides` , `pads` or `dilation` is not int or tuple. + ValueError: If `kernel_size`, `strides` or `dilation` is less than 1. + ValueError: If `pads` is less than 0. + ValueError: If `pads` is more than half of `kernel_size`. + TypeError: If `ceil_mode` is not bool. + + Supported Platforms: + ``Ascend910B`` diff --git a/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attention_v2_grad_doc.yaml b/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attention_v2_grad_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..49c5d4c62fce7116ea1cda8741427f200db0d12d --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attention_v2_grad_doc.yaml @@ -0,0 +1,21 @@ +multi_scale_deformable_attn_grad: + description: | + Multi Scale Deformable Attention Grad function. + Args: + value (Tensor): The input tensor. + spatial_shapes (Tensor): The input tensor. + level_start_index (Tensor): The input tensor. + sampling_loc (Tensor): The input tensor. + attn_weight (Tensor): The input tensor. + grad_output (Tensor): The input tensor. + + Returns: + grad_value (Tensor): The output tensor. + grad_sampling_loc (Tensor): The output tensor. + grad_attn_weight (Tensor): The output tensor. + + Raises: + TypeError: If input is not a Tensor. + + Supported Platforms: + ``Ascend`` diff --git a/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attn_doc.yaml b/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attn_doc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7b5fd2630b58d5a427f4d8c4e5bf9a0c5fd266c4 --- /dev/null +++ b/mindspore/core/ops/ops_def/doc/multi_scale_deformable_attn_doc.yaml @@ -0,0 +1,18 @@ +multi_scale_deformable_attn: + description: | + Multi Scale Deformable Attention function. + Args: + value (Tensor): The input tensor. + value_spatial_shapes (Tensor): The input tensor. + value_level_start_index (Tensor): The input tensor. + sampling_locations (Tensor): The input tensor. + attention_weights (Tensor): The input tensor. + + Returns: + output (Tensor): The output tensor. + + Raises: + TypeError: If input is not a Tensor. + + Supported Platforms: + ``Ascend`` diff --git a/mindspore/core/ops/ops_def/doc/randperm_v2_doc.yaml b/mindspore/core/ops/ops_def/doc/randperm_v2_doc.yaml index 73a40bae1d416b457e92e263eb871a0949ef8308..21848bf789f5389a7f46cbd775fb74bb3a027c55 100644 --- a/mindspore/core/ops/ops_def/doc/randperm_v2_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/randperm_v2_doc.yaml @@ -17,7 +17,7 @@ randperm_v2: Default: ``0`` . It must be non-negative. dtype (mindspore.dtype, optional): The type of output. Its value must be one of the following types: int32, int16, int8, - uint8, int64, float64, float32, float16. Default: mstype.int64. + uint8, int64, float64, float32, float16. Default: ``mstype.int64``. Returns: Tensor. Its shape is specified by the required args `n`. Its type is specified by `dtype`. diff --git a/mindspore/core/ops/ops_def/doc/reshape_doc.yaml b/mindspore/core/ops/ops_def/doc/reshape_doc.yaml index 28c2e41b659b6a26663051424459268489c0ba35..ecaf918c3632ab32fd862a40f657d10dfbcc70bb 100644 --- a/mindspore/core/ops/ops_def/doc/reshape_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/reshape_doc.yaml @@ -2,7 +2,7 @@ reshape: description: | Rearranges the input Tensor based on the given shape. - The 'shape' can only have one -1 at most, in which case it's inferred from the remaining dimensions and + The `shape` can only have one -1 at most, in which case it's inferred from the remaining dimensions and the number of elements in the input. Args: @@ -17,13 +17,13 @@ reshape: \frac{\prod_{i=1}^{R}x_{i}}{y_1\times ...\times y_{k-1}\times y_{k+1}\times...\times y_S} , y_{k+1}, ..., y_S)` Raises: - ValueError: The given 'shape' contains more than one -1. - ValueError: The given 'shape' contains elements less than -1. - ValueError: For scenarios where the given 'shape' does not contain -1, the product of elements of the given - 'shape' is not equal to the product of the input's 'shape', + ValueError: The given `shape` contains more than one -1. + ValueError: The given `shape` contains elements less than -1. + ValueError: For scenarios where the given `shape` does not contain -1, the product of elements of the given + `shape` is not equal to the product of the input's `shape`, :math:`\prod_{i=1}^{R}x_{i} \ne \prod_{i=1}^{S}y_{i}`, (Namely, it does not match the input's array size). - And for scenarios where the given 'shape' contains -1, the product of elements other than -1 of the given - `shape` is an aliquant part of the product of the input's 'shape' :math:`\prod_{i=1}^{R}x_{i}`. + And for scenarios where the given `shape` contains -1, the product of elements other than -1 of the given + `shape` is an aliquant part of the product of the input's `shape` :math:`\prod_{i=1}^{R}x_{i}`. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` diff --git a/mindspore/core/ops/ops_def/doc/rfft_doc.yaml b/mindspore/core/ops/ops_def/doc/rfft_doc.yaml index 3731ffe9d218f91e62676562da69eb6e74093d54..3c784539ec4e472687a5de89d1ffec242b146741 100644 --- a/mindspore/core/ops/ops_def/doc/rfft_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/rfft_doc.yaml @@ -29,7 +29,7 @@ rfft: TypeError: If the `input` type is not Tensor. TypeError: If the `input` data type is not one of: int16, int32, int64, float32, float64. TypeError: If `n` or `dim` type is not int. - ValueError: If `dim` is not in the range of "[ `-input.ndim` , `input.ndim` )". + ValueError: If `dim` is not in the range of :math:`[-input.ndim, -input.ndim)`. ValueError: If `n` is less than 1. ValueError: If `norm` is none of ``"backward"`` , ``"forward"`` or ``"ortho"``. diff --git a/mindspore/core/ops/ops_def/doc/select_doc.yaml b/mindspore/core/ops/ops_def/doc/select_doc.yaml index 3f0de7d4835e6ff922a6fa0dcc5a055734af94c6..a23e687b4f319e19681cb964f78369f543d91118 100644 --- a/mindspore/core/ops/ops_def/doc/select_doc.yaml +++ b/mindspore/core/ops/ops_def/doc/select_doc.yaml @@ -1,30 +1,30 @@ select: description: | The conditional tensor determines whether the corresponding element in the output must be - selected from `x` (if True) or `y` (if False) based on the value of each + selected from `input` (if True) or `other` (if False) based on the value of each element. It can be defined as: .. math:: out_i = \begin{cases} - x_i, & \text{if } cond_i \\ - y_i, & \text{otherwise} + input_i, & \text{if } condition_i \\ + other_i, & \text{otherwise} \end{cases} Inputs: - - **cond** (Tensor[bool]): The condition tensor, decides which element is chosen. + - **condition** (Tensor[bool]): The condition tensor, decides which element is chosen. The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`. - - **x** (Tensor): The first Tensor to be selected. + - **input** (Tensor): The first Tensor to be selected. The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`. - - **y** (Tensor): The second Tensor to be selected. + - **other** (Tensor): The second Tensor to be selected. The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`. Outputs: - Tensor, has the same shape as `cond`. + Tensor, has the same shape as `condition`. Raises: - TypeError: If x or y is not a Tensor. + TypeError: If input or other is not a Tensor. ValueError: The shape of inputs are different. Supported Platforms: diff --git a/mindspore/core/ops/ops_def/erf_op.yaml b/mindspore/core/ops/ops_def/erf_op.yaml index 066e737e4ff864f2e0cf6c7b24d143ca5583b2a6..4b16a79b09274f0de035a375625a9f5a4eadba7a 100644 --- a/mindspore/core/ops/ops_def/erf_op.yaml +++ b/mindspore/core/ops/ops_def/erf_op.yaml @@ -7,4 +7,4 @@ erf: output: dtype: tensor dispatch: - enable: False \ No newline at end of file + enable: True \ No newline at end of file diff --git a/mindspore/core/ops/ops_def/lin_space_ext_op.yaml b/mindspore/core/ops/ops_def/lin_space_ext_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3de7582600d524b2dd75e7db37d970f860ffa58f --- /dev/null +++ b/mindspore/core/ops/ops_def/lin_space_ext_op.yaml @@ -0,0 +1,24 @@ +#operator lin_space_ext +lin_space_ext: + args: + start: + dtype: number + type_cast: tensor + end: + dtype: number + type_cast: tensor + steps: + dtype: int + type_cast: tensor + dtype: + dtype: TypeId + arg_handler: dtype_to_type_id + default: None + returns: + output: + dtype: tensor + function: + disable: True + dispatch: + enable: True + Ascend: LinSpaceExtAscend diff --git a/mindspore/core/ops/ops_def/max_pool_grad_with_indices_op.yaml b/mindspore/core/ops/ops_def/max_pool_grad_with_indices_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a73be362d0bdc1b34b019ac196b6dd32c362897f --- /dev/null +++ b/mindspore/core/ops/ops_def/max_pool_grad_with_indices_op.yaml @@ -0,0 +1,45 @@ +#operator max_pool_grad_with_indices +max_pool_grad_with_indices: + args: + x: + dtype: tensor + grad: + dtype: tensor + argmax: + dtype: tensor + kernel_size: + dtype: tuple[int] + prim_init: True + arg_handler: to_kernel_size + strides: + dtype: tuple[int] + default: None + prim_init: True + arg_handler: to_strides + pads: + dtype: tuple[int] + default: 0 + prim_init: True + arg_handler: to_output_padding + dilation: + dtype: tuple[int] + default: (1, 1) + prim_init: True + arg_handler: to_dilations + ceil_mode: + dtype: bool + default: False + prim_init: True + argmax_type: + dtype: TypeId + default: mstype.int64 + prim_init: True + arg_handler: dtype_to_type_id + returns: + y: + dtype: tensor + function: + disable: True + dispatch: + enable: True + Ascend: MaxPoolGradWithIndicesAscend diff --git a/mindspore/core/ops/ops_def/max_pool_grad_with_mask_op.yaml b/mindspore/core/ops/ops_def/max_pool_grad_with_mask_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4827f867ad9a4f9a59e6361fdcca3d787376ed27 --- /dev/null +++ b/mindspore/core/ops/ops_def/max_pool_grad_with_mask_op.yaml @@ -0,0 +1,45 @@ +#operator max_pool_grad_with_mask +max_pool_grad_with_mask: + args: + x: + dtype: tensor + grad: + dtype: tensor + mask: + dtype: tensor + kernel_size: + dtype: tuple[int] + prim_init: True + arg_handler: to_kernel_size + strides: + dtype: tuple[int] + default: None + prim_init: True + arg_handler: to_strides + pads: + dtype: tuple[int] + default: 0 + prim_init: True + arg_handler: to_output_padding + dilation: + dtype: tuple[int] + default: (1, 1) + prim_init: True + arg_handler: to_dilations + ceil_mode: + dtype: bool + default: False + prim_init: True + argmax_type: + dtype: TypeId + default: mstype.int64 + prim_init: True + arg_handler: dtype_to_type_id + returns: + y: + dtype: tensor + function: + disable: True + dispatch: + enable: True + Ascend: MaxPoolGradWithMaskAscend diff --git a/mindspore/core/ops/ops_def/max_pool_with_indices_op.yaml b/mindspore/core/ops/ops_def/max_pool_with_indices_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9b7b7f36b5e4ccccfe8afa1d758109f8affdfa3 --- /dev/null +++ b/mindspore/core/ops/ops_def/max_pool_with_indices_op.yaml @@ -0,0 +1,43 @@ +#operator max_pool_with_indices +max_pool_with_indices: + args: + x: + dtype: tensor + kernel_size: + dtype: tuple[int] + prim_init: True + arg_handler: to_kernel_size + strides: + dtype: tuple[int] + default: None + prim_init: True + arg_handler: to_strides + pads: + dtype: tuple[int] + default: 0 + prim_init: True + arg_handler: to_output_padding + dilation: + dtype: tuple[int] + default: (1, 1) + prim_init: True + arg_handler: to_dilations + ceil_mode: + dtype: bool + default: False + prim_init: True + argmax_type: + dtype: TypeId + default: mstype.int64 + prim_init: True + arg_handler: dtype_to_type_id + returns: + output: + dtype: tensor + argmax: + dtype: tensor + function: + disable: True + dispatch: + enable: True + Ascend: MaxPoolWithIndicesAscend diff --git a/mindspore/core/ops/ops_def/max_pool_with_mask_op.yaml b/mindspore/core/ops/ops_def/max_pool_with_mask_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c12028e9edb686efc073705ec64f82e85a9b2abe --- /dev/null +++ b/mindspore/core/ops/ops_def/max_pool_with_mask_op.yaml @@ -0,0 +1,43 @@ +#operator max_pool_with_mask +max_pool_with_mask: + args: + x: + dtype: tensor + kernel_size: + dtype: tuple[int] + prim_init: True + arg_handler: to_kernel_size + strides: + dtype: tuple[int] + default: None + prim_init: True + arg_handler: to_strides + pads: + dtype: tuple[int] + default: 0 + prim_init: True + arg_handler: to_output_padding + dilation: + dtype: tuple[int] + default: (1, 1) + prim_init: True + arg_handler: to_dilations + ceil_mode: + dtype: bool + default: False + prim_init: True + argmax_type: + dtype: TypeId + default: mstype.int64 + prim_init: True + arg_handler: dtype_to_type_id + returns: + output: + dtype: tensor + mask: + dtype: tensor + function: + disable: True + dispatch: + enable: True + Ascend: MaxPoolWithMaskAscend diff --git a/mindspore/core/ops/ops_def/multi_scale_deformable_attn_grad_op.yaml b/mindspore/core/ops/ops_def/multi_scale_deformable_attn_grad_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5ea8dc21b8154b6ccb668fc8332fbc270780101 --- /dev/null +++ b/mindspore/core/ops/ops_def/multi_scale_deformable_attn_grad_op.yaml @@ -0,0 +1,28 @@ +#operator multi_scale_deformable_attn_grad +multi_scale_deformable_attn_grad: + args: + value: + dtype: tensor + spatial_shapes: + dtype: tensor + level_start_index: + dtype: tensor + sampling_loc: + dtype: tensor + attn_weight: + dtype: tensor + grad_output: + dtype: tensor + returns: + grad_value: + dtype: tensor + grad_sampling_loc: + dtype: tensor + grad_attn_weight: + dtype: tensor + class: + name: MultiScaleDeformableAttentionV2Grad + function: + disable: True + dispatch: + enable: True diff --git a/mindspore/core/ops/ops_def/multi_scale_deformable_attn_op.yaml b/mindspore/core/ops/ops_def/multi_scale_deformable_attn_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..170990137bb2e0cf010d8c87985b0361d4a85920 --- /dev/null +++ b/mindspore/core/ops/ops_def/multi_scale_deformable_attn_op.yaml @@ -0,0 +1,22 @@ +#operator multi_scale_deformable_attn +multi_scale_deformable_attn: + args: + value: + dtype: tensor + value_spatial_shapes: + dtype: tensor + value_level_start_index: + dtype: tensor + sampling_locations: + dtype: tensor + attention_weights: + dtype: tensor + returns: + output: + dtype: tensor + class: + name: MultiScaleDeformableAttnFunctionV2 + function: + disable: True + dispatch: + enable: True diff --git a/mindspore/core/ops/ops_def/select_op.yaml b/mindspore/core/ops/ops_def/select_op.yaml index 976169269cc01f1a3789149e0911daddc0c3ce88..4322fc41ed5e6f8a66b6bdade659e6d35eddf9de 100644 --- a/mindspore/core/ops/ops_def/select_op.yaml +++ b/mindspore/core/ops/ops_def/select_op.yaml @@ -1,14 +1,18 @@ -#operator select +#operator select/where select: args: - cond: + condition: dtype: tensor - x: + input: dtype: tensor - y: + type_cast: number + other: dtype: tensor + type_cast: number + args_signature: + dtype_group: (condition), (input, other) returns: output: dtype: tensor - function: - disable: True + dispatch: + enable: True diff --git a/mindspore/core/ops/ops_def/slice_ext_op.yaml b/mindspore/core/ops/ops_def/slice_ext_op.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f7c84f6190052792c58ca626619d4165b634904d --- /dev/null +++ b/mindspore/core/ops/ops_def/slice_ext_op.yaml @@ -0,0 +1,23 @@ +#operator slice_ext +slice_ext: + args: + input: + dtype: tensor + dim: + dtype: int + start: + dtype: int + end: + dtype: int + step: + dtype: int + returns: + output: + dtype: tensor + function: + disable: True + class: + name: SliceExt + dispatch: + enable: True + Ascend: SliceExtAscend diff --git a/mindspore/core/ops/ops_func_impl/argmax_with_value.cc b/mindspore/core/ops/ops_func_impl/argmax_with_value.cc index 8014dac0a772bb8f8b479c6c72c6347ee4a98424..f24f39476b31ab7d99f5dda4c7a09ead8bd8d704 100644 --- a/mindspore/core/ops/ops_func_impl/argmax_with_value.cc +++ b/mindspore/core/ops/ops_func_impl/argmax_with_value.cc @@ -162,5 +162,9 @@ TypePtrList ArgMaxWithValueFuncImpl::InferType(const PrimitivePtr &primitive, co TypePtrList type_ptr_list{kInt64, input_x_type}; return type_ptr_list; } + +REGISTER_SIMPLE_INFER(kNameArgMaxWithValue, ArgMaxWithValueFuncImpl) + +REGISTER_SIMPLE_INFER(kNameArgMinWithValue, ArgMaxWithValueFuncImpl) } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/convolution.cc b/mindspore/core/ops/ops_func_impl/convolution.cc index 0136ac0f53e4839e24a3638a71d0ab58808067d2..aab3560a222cac8598d8b9407b2f78545668e3bd 100644 --- a/mindspore/core/ops/ops_func_impl/convolution.cc +++ b/mindspore/core/ops/ops_func_impl/convolution.cc @@ -31,7 +31,32 @@ constexpr size_t kWightIdx = 1; constexpr size_t kStrideIdx = 3; constexpr size_t kPaddingIdx = 4; constexpr size_t kDilationIdx = 5; +constexpr size_t kTransposedIdx = 6; +constexpr size_t kOutputPaddingIdx = 7; +constexpr size_t kGroupsIdx = 8; + +int64_t GetOutputHW(const ShapeVector &input_shape, const ShapeVector &weight_shape, size_t shape_pos, size_t i, + const ArrayValue &stride, const ArrayValue &padding, + const ArrayValue &dilation, bool transposed, const ArrayValue &output_padding) { + if (input_shape[shape_pos] == abstract::Shape::kShapeDimAny || + weight_shape[shape_pos] == abstract::Shape::kShapeDimAny || padding.IsValueUnknown(i) || + dilation.IsValueUnknown(i) || stride.IsValueUnknown(i)) { + return abstract::Shape::kShapeDimAny; + } + + if (!transposed) { + return (input_shape[shape_pos] + 2 * padding[i] - dilation[i] * (weight_shape[shape_pos] - 1) - 1) / stride[i] + 1; + } else { + if (output_padding.IsValueUnknown(i)) { + return abstract::Shape::kShapeDimAny; + } + + return (input_shape[shape_pos] - 1) * stride[i] - 2 * padding[i] + dilation[i] * (weight_shape[shape_pos] - 1) + + output_padding[i] + 1; + } +} } // namespace + BaseShapePtr ConvolutionFuncImpl::InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const { MS_EXCEPTION_IF_NULL(primitive); @@ -59,42 +84,51 @@ BaseShapePtr ConvolutionFuncImpl::InferShape(const PrimitivePtr &primitive, } int64_t N = input_shape[0]; - int64_t Co = weight_shape[0]; + int64_t Co = abstract::Shape::kShapeDimAny; int64_t Ho = abstract::Shape::kShapeDimAny; int64_t Wo = abstract::Shape::kShapeDimAny; - auto stride_value_opt = GetArrayValue(input_args[kStrideIdx]); - auto padding_value_opt = GetArrayValue(input_args[kPaddingIdx]); - auto dilation_value_opt = GetArrayValue(input_args[kDilationIdx]); - - if (!stride_value_opt.has_value() || !padding_value_opt.has_value() || !dilation_value_opt.has_value()) { - MS_LOG(DEBUG) << "stride_value_opt.has_value():" << stride_value_opt.has_value() - << ", padding_value_opt.has_value():" << padding_value_opt.has_value() - << ", dilation_value_opt.has_value():" << dilation_value_opt.has_value(); + auto transposed_opt = GetScalarValue(input_args[kTransposedIdx]->BuildValue()); + if (!transposed_opt.has_value()) { + // 'Co/Ho/Wo' is unknown, if transposed is any value auto output_shape = {N, Co, Ho, Wo}; + MS_LOG(DEBUG) << "transposed_opt has no value, output_shape:" << output_shape; return std::make_shared(output_shape); } - const auto &stride = stride_value_opt.value(); - const auto &padding = padding_value_opt.value(); - const auto &dilation = dilation_value_opt.value(); - - // 'NCHW', the pos of 'H' is 2, the pos of 'W' is 2 - const size_t h_begin_pos = 2; - auto get_out_shape = [&](size_t i) { - if (input_shape[h_begin_pos + i] == abstract::Shape::kShapeDimAny || - weight_shape[h_begin_pos + i] == abstract::Shape::kShapeDimAny || padding.IsValueUnknown(i) || - dilation.IsValueUnknown(i) || stride.IsValueUnknown(i)) { - return abstract::Shape::kShapeDimAny; + auto transposed = transposed_opt.value(); + if (transposed) { + auto groups_opt = GetScalarValue(input_args[kGroupsIdx]->BuildValue()); + if (groups_opt.has_value() && weight_shape[1] != abstract::Shape::kShapeDimAny) { + Co = weight_shape[1] * groups_opt.value(); } + } else { + Co = weight_shape[0]; + } + + auto stride_opt = GetArrayValue(input_args[kStrideIdx]); + auto padding_opt = GetArrayValue(input_args[kPaddingIdx]); + auto dilation_opt = GetArrayValue(input_args[kDilationIdx]); + auto output_padding_opt = GetArrayValue(input_args[kOutputPaddingIdx]); + if (!stride_opt.has_value() || !padding_opt.has_value() || !dilation_opt.has_value() || + (transposed && !output_padding_opt.has_value())) { + auto output_shape = {N, Co, Ho, Wo}; + MS_LOG(DEBUG) << "stride has_value:" << stride_opt.has_value() << ", paddind has_value:" << padding_opt.has_value() + << ", dilation has_value:" << dilation_opt.has_value() + << ", output_padding has_value:" << output_padding_opt.has_value() + << ", output_shape:" << output_shape; + return std::make_shared(output_shape); + } - return (input_shape[h_begin_pos + i] + 2 * padding[i] - dilation[i] * (weight_shape[h_begin_pos + i] - 1) - 1) / - stride[i] + - 1; - }; + const auto &stride = stride_opt.value(); + const auto &padding = padding_opt.value(); + const auto &dilation = dilation_opt.value(); + const auto &output_padding = output_padding_opt.value(); - Ho = get_out_shape(0); - Wo = get_out_shape(1); + constexpr size_t h_begin_pos = 2; // 'NCHW', the pos of 'H' is 2 + constexpr size_t w_begin_pos = 3; // 'NCHW', the pos of 'W' is 3 + Ho = GetOutputHW(input_shape, weight_shape, h_begin_pos, 0, stride, padding, dilation, transposed, output_padding); + Wo = GetOutputHW(input_shape, weight_shape, w_begin_pos, 1, stride, padding, dilation, transposed, output_padding); auto output_shape = {N, Co, Ho, Wo}; return std::make_shared(output_shape); } diff --git a/mindspore/core/ops/ops_func_impl/div.cc b/mindspore/core/ops/ops_func_impl/div.cc index e2f10ac905e9ef4653e718f812e46b867a8d31de..6e88c379ffbef1aded0ee361e44de0cc1f394cee 100644 --- a/mindspore/core/ops/ops_func_impl/div.cc +++ b/mindspore/core/ops/ops_func_impl/div.cc @@ -40,15 +40,18 @@ TypePtr DivFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector< auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice) { - static std::set intergral_set = {kNumberTypeBool, kNumberTypeUInt8, kNumberTypeInt8, - kNumberTypeInt16, kNumberTypeInt32, kNumberTypeInt64}; + static std::set x_set = {kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeInt16, kNumberTypeInt32, + kNumberTypeInt64}; + static std::set integral_set = {kNumberTypeBool, kNumberTypeUInt8, kNumberTypeInt8, + kNumberTypeInt16, kNumberTypeInt32, kNumberTypeInt64}; auto x_tensor_type = x_dtype->cast(); auto y_tensor_type = y_dtype->cast(); MS_EXCEPTION_IF_NULL(x_tensor_type); MS_EXCEPTION_IF_NULL(y_tensor_type); auto x_type_id = x_tensor_type->element()->type_id(); auto y_type_id = y_tensor_type->element()->type_id(); - if (x_type_id == kNumberTypeFloat32 && intergral_set.find(y_type_id) != intergral_set.end()) { + if ((x_type_id == kNumberTypeFloat32 && integral_set.find(y_type_id) != integral_set.end()) || + (x_set.find(x_type_id) != x_set.end() && integral_set.find(y_type_id) != integral_set.end())) { return kFloat32; } } diff --git a/mindspore/core/ops/ops_func_impl/divmod.cc b/mindspore/core/ops/ops_func_impl/divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..f71045a9d0eb508c92f59ee61e45ee908898a191 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/divmod.cc @@ -0,0 +1,68 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ops_func_impl/divmod.h" +#include +#include +#include +#include +#include +#include "utils/check_convert_utils.h" +#include "ops/op_enum.h" +#include "abstract/dshape.h" +#include "ops/op_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace ops { +BaseShapePtr DivModFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + return BroadCastInferShape(primitive->name(), input_args); +} + +TypePtr DivModFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector &input_args) const { + auto prim_name = primitive->name(); + auto x_dtype = input_args[kIndex0]->GetType(); + auto y_dtype = input_args[kIndex1]->GetType(); + + auto mode = input_args[kIndex2]->GetValue(); + auto rounding_mode = GetScalarValue(mode); + + if (rounding_mode == RoundingMode::TRUNC || rounding_mode == RoundingMode::FLOOR) { + return input_args[0]->GetType()->Clone(); + } else { + static std::set x_set = {kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeInt16, kNumberTypeInt32, + kNumberTypeInt64}; + static std::set integral_set = {kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeInt16, kNumberTypeInt32, + kNumberTypeInt64}; + auto x_tensor_type = x_dtype->cast(); + auto y_tensor_type = y_dtype->cast(); + MS_EXCEPTION_IF_NULL(x_tensor_type); + MS_EXCEPTION_IF_NULL(y_tensor_type); + auto x_type_id = x_tensor_type->element()->type_id(); + auto y_type_id = y_tensor_type->element()->type_id(); + if ((x_type_id == kNumberTypeFloat32 && integral_set.find(y_type_id) != integral_set.end()) || + (integral_set.find(x_type_id) != integral_set.end() && integral_set.find(y_type_id) != integral_set.end())) { + return kFloat32; + } + std::map types; + (void)types.emplace("x", x_dtype); + (void)types.emplace("y", y_dtype); + return CheckAndConvertUtils::CheckMathBinaryOpTensorType(types, common_valid_types_with_complex, prim_name); + } +} +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/divmod.h b/mindspore/core/ops/ops_func_impl/divmod.h new file mode 100644 index 0000000000000000000000000000000000000000..088a30590752e83280fd7ed6a44871926467521a --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/divmod.h @@ -0,0 +1,35 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_DIVMOD_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_DIVMOD_H_ + +#include +#include +#include "ops/op_name.h" +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API DivModFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMP_DivMod_H_ diff --git a/mindspore/core/ops/ops_func_impl/erf.cc b/mindspore/core/ops/ops_func_impl/erf.cc index 191677ec8859b3abc8e3c0c2e446a93c19aecf07..5ae22a94fbfbdcd934c55cce69a66c448e84b7fc 100644 --- a/mindspore/core/ops/ops_func_impl/erf.cc +++ b/mindspore/core/ops/ops_func_impl/erf.cc @@ -24,7 +24,16 @@ BaseShapePtr ErfFuncImpl::InferShape(const PrimitivePtr &primitive, } TypePtr ErfFuncImpl::InferType(const PrimitivePtr &primitive, const std::vector &input_args) const { - return input_args[kIndex0]->GetType()->Clone(); + auto input_type = input_args[kIndex0]->GetType(); + auto input_type_id = input_type->cast()->element()->type_id(); + static const std::vector int_or_bool = {kNumberTypeInt64, kNumberTypeBool}; + bool is_int_or_bool = std::any_of(int_or_bool.begin(), int_or_bool.end(), + [&input_type_id](const TypeId &type_id) { return input_type_id == type_id; }); + if (is_int_or_bool) { + return std::make_shared(kFloat32); + } else { + return input_type->Clone(); + } } } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/lin_space_ext.cc b/mindspore/core/ops/ops_func_impl/lin_space_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..058a6276672ce2ae0a64e0bf04982046d67fd27a --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/lin_space_ext.cc @@ -0,0 +1,118 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "ops/ops_func_impl/lin_space_ext.h" +#include "utils/check_convert_utils.h" +#include "ops/op_utils.h" + +namespace mindspore { +namespace ops { +BaseShapePtr LinSpaceExtFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto steps_opt = GetScalarValue(input_args[kInputIndex2]->GetValue()); + if (!(CheckAndConvertUtils::IsTensor(input_args[kInputIndex0]) && + CheckAndConvertUtils::IsTensor(input_args[kInputIndex1]))) { + if (!MS_LIKELY(steps_opt.has_value())) { + ShapeVector infered_shape{abstract::Shape::kShapeDimAny}; + return std::make_shared(infered_shape); + } else { + int64_t steps = steps_opt.value(); + MS_CHECK_VALUE(steps > 0, + CheckAndConvertUtils::FormatCheckIntegerMsg("steps", steps, kGreaterThan, 0, primitive)); + ShapeVector infered_shape{steps}; + return std::make_shared(infered_shape); + } + } + + const auto &start_shape_ptr = input_args[kInputIndex0]->GetShape(); + const auto &start_shape = start_shape_ptr->GetShapeVector(); + const auto &end_shape_ptr = input_args[kInputIndex1]->GetShape(); + const auto &end_shape = end_shape_ptr->GetShapeVector(); + const auto &steps_value_ptr = input_args[kInputIndex2]->GetValue(); + const auto &steps_value = GetScalarValue(steps_value_ptr); + if (MS_UNLIKELY(IsDynamic(start_shape) || IsDynamic(end_shape))) { + ShapeVector infered_shape{abstract::Shape::kShapeDimAny}; + return std::make_shared(infered_shape); + } + // 0-D tensor input. + if (start_shape.empty() && end_shape.empty()) { + // Output is dynamic shape. + if (!steps_value.has_value()) { + ShapeVector infered_shape{abstract::Shape::kShapeDimAny}; + return std::make_shared(infered_shape); + } else { + int64_t steps = steps_value.value(); + MS_CHECK_VALUE(steps > 0, + CheckAndConvertUtils::FormatCheckIntegerMsg("steps", steps, kGreaterThan, 0, primitive)); + ShapeVector infered_shape{steps}; + return std::make_shared(infered_shape); + } + } + // Support vmap. + size_t batch_rank = 0; + if (primitive->HasAttr(kBatchRank)) { + auto value_ptr = primitive->GetAttr(kBatchRank); + batch_rank = LongToSize(GetValue(value_ptr)); + } + + MS_CHECK_VALUE( + start_shape.size() == batch_rank, + CheckAndConvertUtils::FormatCheckIntegerMsg("rank of 'start'", start_shape.size(), kEqual, batch_rank, primitive)); + MS_CHECK_VALUE(end_shape.size() == batch_rank, CheckAndConvertUtils::FormatCheckIntegerMsg( + "rank of 'end'", end_shape.size(), kEqual, batch_rank, primitive)); + MS_CHECK_VALUE(start_shape == end_shape, + CheckAndConvertUtils::FormatCheckMsg("shape of 'start'", start_shape, kEqual, end_shape, primitive)); + + ShapeVector out_shape(start_shape.begin(), start_shape.end()); + if (!steps_value.has_value()) { + out_shape.push_back(abstract::Shape::kShapeDimAny); + } else { + int64_t steps = steps_value.value(); + MS_CHECK_VALUE(steps > 0, CheckAndConvertUtils::FormatCheckIntegerMsg("steps", steps, kGreaterThan, 0, primitive)); + out_shape.push_back(steps); + } + return std::make_shared(out_shape); +} + +TypePtr LinSpaceExtFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + MS_EXCEPTION_IF_NULL(input_args[kInputIndex0]); + MS_EXCEPTION_IF_NULL(input_args[kInputIndex1]); + + auto start_dtype = input_args[kInputIndex0]->GetType(); + auto end_dtype = input_args[kInputIndex1]->GetType(); + if (CheckAndConvertUtils::IsTensor(input_args[kInputIndex0]) || + CheckAndConvertUtils::IsTensor(input_args[kInputIndex1])) { + std::map type_dict = { + {"start type", start_dtype}, + {"end type", end_dtype}, + }; + (void)CheckAndConvertUtils::CheckTensorTypeSame(type_dict, common_valid_types_with_bool, primitive->name()); + } + TypeId type_id; + if (input_args[kInputIndex3]->GetType()->isa()) { + type_id = kFloat32->type_id(); + } else { + auto dtype_opt = GetScalarValue(input_args[kInputIndex3]->GetValue()); + MS_CHECK_VALUE(dtype_opt.has_value(), primitive->name() + " error: dtype input should have valid value."); + type_id = static_cast(dtype_opt.value()); + } + return std::make_shared(TypeIdToType(type_id)); +} +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/lin_space_ext.h b/mindspore/core/ops/ops_func_impl/lin_space_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..ec9153057828b6809d5abac4706528569fef0519 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/lin_space_ext.h @@ -0,0 +1,35 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_LIN_SPACE_EXT_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_LIN_SPACE_EXT_H_ + +#include +#include +#include "mindapi/base/types.h" +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API LinSpaceExtFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_LIN_SPACE_EXT_H_ diff --git a/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.cc b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..5b11427f67864e87fc1334c3cf5d47a43a4cfee4 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.cc @@ -0,0 +1,43 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ops_func_impl/max_pool_grad_with_indices.h" +#include +#include +#include "ops/op_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace ops { +TypePtr MaxPoolGradWithIndicesFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto x_type = input_args[kIndex0]->GetType(); + return x_type->Clone(); +} + +BaseShapePtr MaxPoolGradWithIndicesFuncImpl::InferShape( + const PrimitivePtr &primitive, const std::vector &input_args) const { + auto x_shape = input_args[kIndex0]->GetShape()->GetShapeVector(); + if (IsDynamicRank(x_shape)) { + return std::make_shared( + std::vector{abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny}); + } + return std::make_shared(x_shape); +} + +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.h b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..00a4cab2da472aac1e87c1e98499163511a3beda --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_indices.h @@ -0,0 +1,34 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_INDICES_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_INDICES_H_ + +#include +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API MaxPoolGradWithIndicesFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_INDICES_H_ diff --git a/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.cc b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.cc new file mode 100644 index 0000000000000000000000000000000000000000..89f51dcd7120ada2372fd9a9837e109c3a73498f --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.cc @@ -0,0 +1,43 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ops_func_impl/max_pool_grad_with_mask.h" +#include +#include +#include "ops/op_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace ops { +TypePtr MaxPoolGradWithMaskFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto x_type = input_args[kIndex0]->GetType(); + return x_type->Clone(); +} + +BaseShapePtr MaxPoolGradWithMaskFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto x_shape = input_args[kIndex0]->GetShape()->GetShapeVector(); + if (IsDynamicRank(x_shape)) { + return std::make_shared( + std::vector{abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny}); + } + return std::make_shared(x_shape); +} + +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.h b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.h new file mode 100644 index 0000000000000000000000000000000000000000..d330295e28b9cda415fcb922f6ff7bdc87c83d05 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_grad_with_mask.h @@ -0,0 +1,34 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_MASK_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_MASK_H_ + +#include +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API MaxPoolGradWithMaskFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_GRAD_WITH_MASK_H_ diff --git a/mindspore/core/ops/ops_func_impl/max_pool_with_indices.cc b/mindspore/core/ops/ops_func_impl/max_pool_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..4e1c103660cb197455b0fbb0a5efb003c7fba055 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_with_indices.cc @@ -0,0 +1,243 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ops_func_impl/max_pool_with_indices.h" +#include +#include +#include +#include +#include "include/common/utils/utils.h" +#include "ops/op_utils.h" +#include "utils/check_convert_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace ops { +TypePtr MaxPoolWithIndicesFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + const std::set valid_types = {kInt8, kInt16, kInt32, kInt64, kUInt8, kUInt16, + kUInt32, kUInt64, kFloat16, kFloat32, kFloat64}; + (void)CheckAndConvertUtils::CheckTensorTypeValid("input", input_args[kIndex0]->GetType(), valid_types, + primitive->name()); + auto output_dtype = input_args[kIndex0]->GetType(); + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + TypePtr argmax_dtype; + auto number_type = input_args[kIndex6]->GetValue(); + auto number_type_opt = GetScalarValue(number_type); + MS_CHECK_VALUE(number_type_opt.has_value(), primitive->name() + " error: argmax dtype should be valid."); + auto target_type = TypeIdToType(static_cast(number_type_opt.value())); + if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice) { + (void)CheckAndConvertUtils::CheckTensorTypeValid("input", input_args[kIndex0]->GetType(), {kFloat32}, + primitive->name()); + if (target_type != kInt64) { + MS_LOG(WARNING) << "While running in Ascend, the attribute `argmax_type` of " << primitive->name() + << " is disabled, DO NOT set it."; + } + argmax_dtype = std::make_shared(kInt32); + } else { + if (target_type == kInt32) { + argmax_dtype = std::make_shared(kInt32); + } else if (target_type == kInt64) { + argmax_dtype = std::make_shared(kInt64); + } else { + MS_EXCEPTION(TypeError) << "For " << primitive->name() << ", the type of argmax should be int32 or int64."; + } + } + std::vector type_list = {output_dtype, argmax_dtype}; + return std::make_shared(type_list); +} + +inline int64_t IndicesComputeSize(int64_t in_value, const ArrayValue &kernel_size, + const ArrayValue &strides, const ArrayValue &pads, + const ArrayValue &dilation, size_t index, bool ceil_mode) { + int64_t out_value = 0; + const int64_t factor = 2; + if (in_value == abstract::Shape::kShapeDimAny) { + out_value = abstract::Shape::kShapeDimAny; + } else if (kernel_size.IsValueUnknown(index) || strides.IsValueUnknown(index) || pads.IsValueUnknown(index) || + dilation.IsValueUnknown(index)) { + out_value = abstract::Shape::kShapeDimAny; + } else { + auto out_d = + (static_cast(in_value + factor * pads[index] - dilation[index] * (kernel_size[index] - 1) - 1) / + static_cast(strides[index])) + + 1; + if (ceil_mode) { + out_value = static_cast(ceil(out_d)); + if ((out_value - 1) * strides[index] >= in_value + pads[index]) { + --out_value; + } + } else { + out_value = static_cast(floor(out_d)); + } + if (out_value <= 0) { + MS_EXCEPTION(ValueError) << "The index[" << index + kIndex2 << "] of input is [" << out_value + << "], which is invalid shape of MaxPoolWithIndices."; + } + } + return out_value; +} + +inline void IndicesCheckPositiveVector(const string &arg_name, const ArrayValue &array, + const string &prim_name, bool exclude_zeros) { + for (size_t i = 0; i < array.size(); ++i) { + if (exclude_zeros) { + if (MS_UNLIKELY(array[i] <= 0)) { + MS_LOG(EXCEPTION) << "For " << prim_name << ", '" << arg_name << "' must be positive, but it's " + << array.ToString() << "."; + } + } else { + if (MS_UNLIKELY(array[i] < 0)) { + MS_LOG(EXCEPTION) << "For " << prim_name << ", '" << arg_name << "' must be not negetive, but it's " + << array.ToString() << "."; + } + } + } +} + +BaseShapePtr MaxPoolWithIndicesFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + const size_t kAttrH = 0; + const size_t kAttrW = 1; + const int64_t kInputShapeSize = 4; + const int64_t kAttrsSize = 2; + auto x_shape = input_args[kIndex0]->GetShape()->GetShapeVector(); + if (IsDynamicRank(x_shape)) { + std::vector shape_list = {std::make_shared(std::vector{ + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny}), + std::make_shared(std::vector{ + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny})}; + return std::make_shared(shape_list); + } + (void)CheckAndConvertUtils::CheckInteger("input x rank", SizeToLong(x_shape.size()), kEqual, kInputShapeSize, + primitive->name()); + auto batch = x_shape[kIndex0]; + auto channel = x_shape[kIndex1]; + + auto kernel_size = input_args[kIndex1]->GetValue(); + auto kernel_size_array_opt = GetArrayValue(kernel_size); + ValuePtr strides; + if (input_args[kIndex2]->GetType()->type_id() == kMetaTypeNone) { + strides = kernel_size; + } else { + strides = input_args[kIndex2]->GetValue(); + } + auto strides_array_opt = GetArrayValue(strides); + auto pads = input_args[kIndex3]->GetValue(); + auto pads_array_opt = GetArrayValue(pads); + auto dilation = input_args[kIndex4]->GetValue(); + auto dilation_array_opt = GetArrayValue(dilation); + auto ceil_mode = input_args[kIndex5]->GetValue(); + auto ceil_mode_scalar_opt = GetScalarValue(ceil_mode); + if (!kernel_size_array_opt.has_value() || !strides_array_opt.has_value() || !pads_array_opt.has_value() || + !dilation_array_opt.has_value() || !ceil_mode_scalar_opt.has_value()) { + ShapeVector dyn_output{batch, channel, abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny}; + std::vector shape_list = {std::make_shared(dyn_output), + std::make_shared(dyn_output)}; + return std::make_shared(shape_list); + } + const auto &kernel_size_array = kernel_size_array_opt.value(); + const auto &strides_array = strides_array_opt.value(); + const auto &pads_array = pads_array_opt.value(); + const auto &dilation_array = dilation_array_opt.value(); + auto ceil_mode_scalar = ceil_mode_scalar_opt.value(); + + (void)CheckAndConvertUtils::CheckInteger("kernel_size rank", SizeToLong(kernel_size_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("strides rank", SizeToLong(strides_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("pads rank", SizeToLong(pads_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("dilation rank", SizeToLong(dilation_array.size()), kEqual, kAttrsSize, + primitive->name()); + auto H_in = x_shape[kIndex2]; + auto W_in = x_shape[kIndex3]; + auto H_out = + IndicesComputeSize(H_in, kernel_size_array, strides_array, pads_array, dilation_array, kAttrH, ceil_mode_scalar); + auto W_out = + IndicesComputeSize(W_in, kernel_size_array, strides_array, pads_array, dilation_array, kAttrW, ceil_mode_scalar); + ShapeVector output_shape = {x_shape[kIndex0], x_shape[kIndex1], H_out, W_out}; + ShapeVector argmax_shape = output_shape; + std::vector shape_list = {std::make_shared(output_shape), + std::make_shared(argmax_shape)}; + return std::make_shared(shape_list); +} + +int32_t MaxPoolWithIndicesFuncImpl::CheckValidation(const PrimitivePtr &primitive, + const std::vector &input_args) const { + int32_t check_status = OP_CHECK_SUCCESS; + + const size_t kAttrH = 0; + const size_t kAttrW = 1; + auto kernel_size = input_args[kIndex1]->GetValue(); + auto kernel_size_array_opt = GetArrayValue(kernel_size); + ValuePtr strides; + if (input_args[kIndex2]->GetType()->type_id() == kMetaTypeNone) { + strides = kernel_size; + } else { + strides = input_args[kIndex2]->GetValue(); + } + auto strides_array_opt = GetArrayValue(strides); + auto pads = input_args[kIndex3]->GetValue(); + auto pads_array_opt = GetArrayValue(pads); + auto dilation = input_args[kIndex4]->GetValue(); + auto dilation_array_opt = GetArrayValue(dilation); + + if (MS_UNLIKELY(!kernel_size_array_opt.has_value() || !strides_array_opt.has_value() || !pads_array_opt.has_value() || + !dilation_array_opt.has_value())) { + check_status = OP_CHECK_RETRY; + } else { + const auto &kernel_size_array = kernel_size_array_opt.value(); + const auto &strides_array = strides_array_opt.value(); + const auto &pads_array = pads_array_opt.value(); + const auto &dilation_array = dilation_array_opt.value(); + if (MS_UNLIKELY(kernel_size_array.HasUnknownValue() || strides_array.HasUnknownValue() || + pads_array.HasUnknownValue() || dilation_array.HasUnknownValue())) { + check_status = OP_CHECK_RETRY; + } else { + IndicesCheckPositiveVector(kKernelSize, kernel_size_array, primitive->name(), true); + IndicesCheckPositiveVector(kStrides, strides_array, primitive->name(), true); + IndicesCheckPositiveVector(kPads, pads_array, primitive->name(), false); + IndicesCheckPositiveVector(kDilation, dilation_array, primitive->name(), true); + + double half_factor = 0.5; + if ((pads_array[kAttrH] > static_cast(static_cast(kernel_size_array[kAttrH]) * half_factor)) || + (pads_array[kAttrW] > static_cast(static_cast(kernel_size_array[kAttrW]) * half_factor))) { + MS_EXCEPTION(ValueError) + << "It is required that the `pads` is no more than half of the `kernel_size`, but gets pads(" + << pads_array[kAttrH] << ", " << pads_array[kAttrW] << ") and kernel_size(" << kernel_size_array[kAttrH] + << ", " << kernel_size_array[kAttrW] << ")."; + } + + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + const auto &dilation_vector = dilation_array.ToVector(); + if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice && + std::any_of(dilation_vector.begin(), dilation_vector.end(), + [](const int64_t &value) { return value != 1; })) { + MS_EXCEPTION(ValueError) << "While running in Ascend, the attribute of `dilation` of '" << primitive->name() + << "' is required to be all one, but got (" << dilation_vector[kAttrH] << ", " + << dilation_vector[kAttrW] << ")."; + } + } + } + return check_status; +} +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/max_pool_with_indices.h b/mindspore/core/ops/ops_func_impl/max_pool_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..999c1dc220e720a06a123b54274cc60ce0feb6b1 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_with_indices.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_INDICES_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_INDICES_H_ + +#include +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API MaxPoolWithIndicesFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + int32_t CheckValidation(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_INDICES_H_ diff --git a/mindspore/core/ops/ops_func_impl/max_pool_with_mask.cc b/mindspore/core/ops/ops_func_impl/max_pool_with_mask.cc new file mode 100644 index 0000000000000000000000000000000000000000..ba576663fd78884a4dffd625c39d651b43683eb5 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_with_mask.cc @@ -0,0 +1,218 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/ops_func_impl/max_pool_with_mask.h" +#include +#include +#include +#include +#include "include/common/utils/utils.h" +#include "ops/op_utils.h" +#include "utils/check_convert_utils.h" +#include "utils/ms_context.h" + +namespace mindspore { +namespace ops { +TypePtr MaxPoolWithMaskFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto output_dtype = input_args[kIndex0]->GetType(); + (void)CheckAndConvertUtils::CheckTensorTypeValid("input", input_args[kIndex0]->GetType(), {kFloat16, kFloat32}, + primitive->name()); + std::vector type_list = {output_dtype, kInt8}; + return std::make_shared(type_list); +} + +inline int64_t MaskComputeSize(int64_t in_value, const ArrayValue &kernel_size, + const ArrayValue &strides, const ArrayValue &pads, + const ArrayValue &dilation, size_t index, bool ceil_mode) { + int64_t out_value = 0; + const int64_t factor = 2; + if (in_value == abstract::Shape::kShapeDimAny) { + out_value = abstract::Shape::kShapeDimAny; + } else if (kernel_size.IsValueUnknown(index) || strides.IsValueUnknown(index) || pads.IsValueUnknown(index) || + dilation.IsValueUnknown(index)) { + out_value = abstract::Shape::kShapeDimAny; + } else { + auto out_d = + (static_cast(in_value + factor * pads[index] - dilation[index] * (kernel_size[index] - 1) - 1) / + static_cast(strides[index])) + + 1; + if (ceil_mode) { + out_value = static_cast(ceil(out_d)); + if ((out_value - 1) * strides[index] >= in_value + pads[index]) { + --out_value; + } + } else { + out_value = static_cast(floor(out_d)); + } + if (out_value <= 0) { + MS_EXCEPTION(ValueError) << "The index[" << index + kIndex2 << "] of input is [" << out_value + << "], which is invalid shape of MaxPoolWithMask."; + } + } + return out_value; +} + +inline void MaskCheckPositiveVector(const string &arg_name, const ArrayValue &array, const string &prim_name, + bool exclude_zeros) { + for (size_t i = 0; i < array.size(); ++i) { + if (exclude_zeros) { + if (MS_UNLIKELY(array[i] <= 0)) { + MS_LOG(EXCEPTION) << "For " << prim_name << ", '" << arg_name << "' must be positive, but it's " + << array.ToString() << "."; + } + } else { + if (MS_UNLIKELY(array[i] < 0)) { + MS_LOG(EXCEPTION) << "For " << prim_name << ", '" << arg_name << "' must be not negetive, but it's " + << array.ToString() << "."; + } + } + } +} + +BaseShapePtr MaxPoolWithMaskFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + const size_t kAttrH = 0; + const size_t kAttrW = 1; + const int64_t kInputShapeSize = 4; + const int64_t kAttrsSize = 2; + auto x_shape = input_args[kIndex0]->GetShape()->GetShapeVector(); + if (IsDynamicRank(x_shape)) { + std::vector shape_list = {std::make_shared(std::vector{ + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny}), + std::make_shared(std::vector{ + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny, + abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny})}; + return std::make_shared(shape_list); + } + (void)CheckAndConvertUtils::CheckInteger("input x rank", SizeToLong(x_shape.size()), kEqual, kInputShapeSize, + primitive->name()); + auto batch = x_shape[kIndex0]; + auto channel = x_shape[kIndex1]; + + auto kernel_size = input_args[kIndex1]->GetValue(); + auto kernel_size_array_opt = GetArrayValue(kernel_size); + ValuePtr strides; + if (input_args[kIndex2]->GetType()->type_id() == kMetaTypeNone) { + strides = kernel_size; + } else { + strides = input_args[kIndex2]->GetValue(); + } + auto strides_array_opt = GetArrayValue(strides); + auto pads = input_args[kIndex3]->GetValue(); + auto pads_array_opt = GetArrayValue(pads); + auto dilation = input_args[kIndex4]->GetValue(); + auto dilation_array_opt = GetArrayValue(dilation); + auto ceil_mode = input_args[kIndex5]->GetValue(); + auto ceil_mode_scalar_opt = GetScalarValue(ceil_mode); + if (!kernel_size_array_opt.has_value() || !strides_array_opt.has_value() || !pads_array_opt.has_value() || + !dilation_array_opt.has_value() || !ceil_mode_scalar_opt.has_value()) { + ShapeVector dyn_output{batch, channel, abstract::Shape::kShapeDimAny, abstract::Shape::kShapeDimAny}; + std::vector shape_list = {std::make_shared(dyn_output), + std::make_shared(dyn_output)}; + return std::make_shared(shape_list); + } + const auto &kernel_size_array = kernel_size_array_opt.value(); + const auto &strides_array = strides_array_opt.value(); + const auto &pads_array = pads_array_opt.value(); + const auto &dilation_array = dilation_array_opt.value(); + auto ceil_mode_scalar = ceil_mode_scalar_opt.value(); + + (void)CheckAndConvertUtils::CheckInteger("kernel_size rank", SizeToLong(kernel_size_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("strides rank", SizeToLong(strides_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("pads rank", SizeToLong(pads_array.size()), kEqual, kAttrsSize, + primitive->name()); + (void)CheckAndConvertUtils::CheckInteger("dilation rank", SizeToLong(dilation_array.size()), kEqual, kAttrsSize, + primitive->name()); + auto H_in = x_shape[kIndex2]; + auto W_in = x_shape[kIndex3]; + auto H_out = + MaskComputeSize(H_in, kernel_size_array, strides_array, pads_array, dilation_array, kAttrH, ceil_mode_scalar); + auto W_out = + MaskComputeSize(W_in, kernel_size_array, strides_array, pads_array, dilation_array, kAttrW, ceil_mode_scalar); + ShapeVector output_shape = {x_shape[kIndex0], x_shape[kIndex1], H_out, W_out}; + ShapeVector argmax_shape = {x_shape[kIndex0], x_shape[kIndex1], kernel_size_array[kAttrH] * kernel_size_array[kAttrW], + (static_cast(ceil(static_cast(H_out * W_out) / 16)) + 1) * 2 * 16}; + + std::vector shape_list = {std::make_shared(output_shape), + std::make_shared(argmax_shape)}; + return std::make_shared(shape_list); +} +int32_t MaxPoolWithMaskFuncImpl::CheckValidation(const PrimitivePtr &primitive, + const std::vector &input_args) const { + int32_t check_status = OP_CHECK_SUCCESS; + + const size_t kAttrH = 0; + const size_t kAttrW = 1; + auto kernel_size = input_args[kIndex1]->GetValue(); + auto kernel_size_array_opt = GetArrayValue(kernel_size); + ValuePtr strides; + if (input_args[kIndex2]->GetType()->type_id() == kMetaTypeNone) { + strides = kernel_size; + } else { + strides = input_args[kIndex2]->GetValue(); + } + auto strides_array_opt = GetArrayValue(strides); + auto pads = input_args[kIndex3]->GetValue(); + auto pads_array_opt = GetArrayValue(pads); + auto dilation = input_args[kIndex4]->GetValue(); + auto dilation_array_opt = GetArrayValue(dilation); + + if (MS_UNLIKELY(!kernel_size_array_opt.has_value() || !strides_array_opt.has_value() || !pads_array_opt.has_value() || + !dilation_array_opt.has_value())) { + check_status = OP_CHECK_RETRY; + } else { + const auto &kernel_size_array = kernel_size_array_opt.value(); + const auto &strides_array = strides_array_opt.value(); + const auto &pads_array = pads_array_opt.value(); + const auto &dilation_array = dilation_array_opt.value(); + if (MS_UNLIKELY(kernel_size_array.HasUnknownValue() || strides_array.HasUnknownValue() || + pads_array.HasUnknownValue() || dilation_array.HasUnknownValue())) { + check_status = OP_CHECK_RETRY; + } else { + MaskCheckPositiveVector(kKernelSize, kernel_size_array, primitive->name(), true); + MaskCheckPositiveVector(kStrides, strides_array, primitive->name(), true); + MaskCheckPositiveVector(kPads, pads_array, primitive->name(), false); + MaskCheckPositiveVector(kDilation, dilation_array, primitive->name(), true); + + double half_factor = 0.5; + if ((pads_array[kAttrH] > static_cast(static_cast(kernel_size_array[kAttrH]) * half_factor)) || + (pads_array[kAttrW] > static_cast(static_cast(kernel_size_array[kAttrW]) * half_factor))) { + MS_EXCEPTION(ValueError) + << "It is required that the `pads` is no more than half of the `kernel_size`, but gets pads(" + << pads_array[kAttrH] << ", " << pads_array[kAttrW] << ") and kernel_size(" << kernel_size_array[kAttrH] + << ", " << kernel_size_array[kAttrW] << ")."; + } + + auto context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context); + const auto &dilation_vector = dilation_array.ToVector(); + if (context->get_param(MS_CTX_DEVICE_TARGET) == kAscendDevice && + std::any_of(dilation_vector.begin(), dilation_vector.end(), + [](const int64_t &value) { return value != 1; })) { + MS_EXCEPTION(ValueError) << "While running in Ascend, the attribute of `dilation` of '" << primitive->name() + << "' is required to be all one, but got (" << dilation_vector[kAttrH] << ", " + << dilation_vector[kAttrW] << ")."; + } + } + } + return check_status; +} +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/max_pool_with_mask.h b/mindspore/core/ops/ops_func_impl/max_pool_with_mask.h new file mode 100644 index 0000000000000000000000000000000000000000..d7d662058f0f3427e3e5fb48965c48609edd97ab --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/max_pool_with_mask.h @@ -0,0 +1,36 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_MASK_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_MASK_H_ + +#include +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { +class MIND_API MaxPoolWithMaskFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; + + int32_t CheckValidation(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_MAX_POOL_WITH_MASK_H_ diff --git a/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.cc b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.cc new file mode 100644 index 0000000000000000000000000000000000000000..2bf5a519747407d3db38082fac095b4bb82c4624 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.cc @@ -0,0 +1,85 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "abstract/ops/primitive_infer_map.h" +#include "ops/nn_ops.h" +#include "utils/check_convert_utils.h" +#include "ops/primitive_c.h" +#include "mindapi/src/helper.h" +#include "ops/ops_func_impl/multi_scale_deformable_attn.h" +#include "ops/auto_generate/gen_lite_ops.h" + +namespace mindspore { +namespace ops { + +enum MultiScaleDeformableAttnInputIndex : size_t { + kMultiScaleDeformableAttnInputValueIndex = 0, + kMultiScaleDeformableAttnInputValueSpatialShapesIndex, + kMultiScaleDeformableAttnInputValueLevelStartIndex, + kMultiScaleDeformableAttnInputSamplingLocationsIndex, + kMultiScaleDeformableAttnInputAttentionWeightsIndex, + kMultiScaleDeformableAttnInputsNum, +}; + +enum MultiScaleDeformableAttnOutputIndex : size_t { + kMultiScaleDeformableAttnOutputAttentionOutIndex = 0, + kMultiScaleDeformableAttnOutputsNum, +}; + +abstract::ShapePtr MultiScaleDeformableAttnInferShape(const PrimitivePtr &prim, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(prim); + auto value_shape = input_args[kMultiScaleDeformableAttnInputValueIndex]->GetShape()->GetShapeVector(); + auto sp_loc_shape = input_args[kMultiScaleDeformableAttnInputSamplingLocationsIndex]->GetShape()->GetShapeVector(); + ShapeVector attention_out_shape(3, abstract::Shape::kShapeDimAny); + attention_out_shape[0] = value_shape[0]; + attention_out_shape[1] = sp_loc_shape[1]; + attention_out_shape[2] = value_shape[1] * value_shape[3]; + return std::make_shared(attention_out_shape); +} + +TypePtr MultiScaleDeformableAttnInferType(const PrimitivePtr &prim, const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(prim); + auto op_name = prim->name(); + std::map out_types; + const std::set out_valid_types = {kFloat16, kFloat32}; + (void)out_types.emplace("value", input_args[kMultiScaleDeformableAttnInputValueIndex]->BuildType()); + auto type = CheckAndConvertUtils::CheckTensorTypeSame(out_types, out_valid_types, op_name); + return type; +} + +AbstractBasePtr MultiScaleDeformableAttnInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); + CheckAndConvertUtils::CheckInputArgs(input_args, kLessEqual, kMultiScaleDeformableAttnInputsNum, primitive->name()); + auto infer_shape = MultiScaleDeformableAttnInferShape(primitive, input_args); + auto infer_type = MultiScaleDeformableAttnInferType(primitive, input_args); + return abstract::MakeAbstract(infer_shape, infer_type); +} + +BaseShapePtr MultiScaleDeformableAttnFunctionV2FuncImpl::InferShape( + const PrimitivePtr &primitive, const std::vector &input_args) const { + return MultiScaleDeformableAttnInferShape(primitive, input_args); +} + +TypePtr MultiScaleDeformableAttnFunctionV2FuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + return MultiScaleDeformableAttnInferType(primitive, input_args); +} + +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.h b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.h new file mode 100644 index 0000000000000000000000000000000000000000..79d09add10fee9ac89827e002eaf393a0bc1c016 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn.h @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_H_ +#define MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_H_ +#include +#include +#include +#include "ops/base_operator.h" +#include "mindapi/base/types.h" +#include "ops/primitive_c.h" +#include "abstract/abstract_value.h" +#include "mindspore/core/ops/op_name.h" +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { + +class MIND_API MultiScaleDeformableAttnFunctionV2FuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; + +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_H_ diff --git a/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.cc b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.cc new file mode 100644 index 0000000000000000000000000000000000000000..f273685423b54a7c6266fa2df6f9f03eee865c32 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.cc @@ -0,0 +1,94 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "abstract/ops/primitive_infer_map.h" +#include "ops/nn_ops.h" +#include "utils/check_convert_utils.h" +#include "ops/primitive_c.h" +#include "mindapi/src/helper.h" +#include "ops/ops_func_impl/multi_scale_deformable_attn_grad.h" +#include "ops/auto_generate/gen_lite_ops.h" + +namespace mindspore { +namespace ops { +enum MultiScaleDeformableAttnGradInputIndex : size_t { + kMultiScaleDeformableAttnGradInputValueIndex = 0, + kMultiScaleDeformableAttnGradInputSpatialShapesIndex, + kMultiScaleDeformableAttnGradInputLevelStartIndex, + kMultiScaleDeformableAttnGradInputSamplingLocIndex, + kMultiScaleDeformableAttnGradInputAttnWeightIndex, + kMultiScaleDeformableAttnGradInputGradOutputIndex, + kMultiScaleDeformableAttnGradInputsNum, +}; + +enum MultiScaleDeformableAttnGradOutputIndex : size_t { + kMultiScaleDeformableAttnGradOutputGradValueIndex = 0, + kMultiScaleDeformableAttnGradOutputGradSamplingLocIndex, + kMultiScaleDeformableAttnGradOutputGradAttnWeightIndex, + kMultiScaleDeformableAttnGradOutputsNum, +}; + +abstract::TupleShapePtr MultiScaleDeformableAttnGradInferShape(const PrimitivePtr &prim, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(prim); + auto value_shape = input_args[kMultiScaleDeformableAttnGradInputValueIndex]->GetShape()->GetShapeVector(); + auto sp_loc_shape = input_args[kMultiScaleDeformableAttnGradInputSamplingLocIndex]->GetShape()->GetShapeVector(); + + auto out_one_shape = {value_shape[0], value_shape[1], value_shape[2], value_shape[3]}; + auto out_two_shape = {sp_loc_shape[0], sp_loc_shape[1], sp_loc_shape[2], + sp_loc_shape[3], sp_loc_shape[4], sp_loc_shape[5]}; + auto out_three_shape = {sp_loc_shape[0], sp_loc_shape[1], sp_loc_shape[2], sp_loc_shape[3], sp_loc_shape[5]}; + + abstract::BaseShapePtrList out_shape = std::vector{ + std::make_shared(out_one_shape), std::make_shared(out_two_shape), + std::make_shared(out_three_shape)}; + return std::make_shared(out_shape); +} + +TuplePtr MultiScaleDeformableAttnGradInferType(const PrimitivePtr &prim, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(prim); + auto op_name = prim->name(); + std::map out_types; + const std::set out_valid_types = {kFloat16, kFloat32}; + (void)out_types.emplace("value", input_args[kMultiScaleDeformableAttnGradInputValueIndex]->BuildType()); + auto type = CheckAndConvertUtils::CheckTensorTypeSame(out_types, out_valid_types, op_name); + return std::make_shared(std::vector{type, type, type}); +} + +AbstractBasePtr MultiScaleDeformableAttnGradInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); + CheckAndConvertUtils::CheckInputArgs(input_args, kLessEqual, kMultiScaleDeformableAttnGradInputsNum, + primitive->name()); + auto infer_shape = MultiScaleDeformableAttnGradInferShape(primitive, input_args); + auto infer_type = MultiScaleDeformableAttnGradInferType(primitive, input_args); + return abstract::MakeAbstract(infer_shape, infer_type); +} + +BaseShapePtr MultiScaleDeformableAttentionV2GradFuncImpl::InferShape( + const PrimitivePtr &primitive, const std::vector &input_args) const { + return MultiScaleDeformableAttnGradInferShape(primitive, input_args); +} + +TypePtr MultiScaleDeformableAttentionV2GradFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + return MultiScaleDeformableAttnGradInferType(primitive, input_args); +} + +} // namespace ops +} // namespace mindspore diff --git a/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.h b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..8a5eebb5db9427c25ac81d3bebf8fdca3910a948 --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/multi_scale_deformable_attn_grad.h @@ -0,0 +1,40 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_GRAD_H_ +#define MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_GRAD_H_ +#include +#include +#include +#include "ops/base_operator.h" +#include "mindapi/base/types.h" +#include "ops/primitive_c.h" +#include "abstract/abstract_value.h" +#include "mindspore/core/ops/op_name.h" +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore { +namespace ops { + +class MIND_API MultiScaleDeformableAttentionV2GradFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; + +} // namespace ops +} // namespace mindspore + +#endif // MINDSPORE_CORE_OPS_MULTI_SCALE_DEFORMABLE_ATTN_GRAD_H_ diff --git a/mindspore/core/ops/ops_func_impl/rsqrt.cc b/mindspore/core/ops/ops_func_impl/rsqrt.cc index f99fdcb3d5b193bdf1531fab3df618b9241e1bc7..0c61e54511fd6f90ee71ea191b5c22adfff955fa 100644 --- a/mindspore/core/ops/ops_func_impl/rsqrt.cc +++ b/mindspore/core/ops/ops_func_impl/rsqrt.cc @@ -29,9 +29,9 @@ TypePtr RsqrtFuncImpl::InferType(const PrimitivePtr &primitive, const std::vecto MS_EXCEPTION_IF_NULL(input_args[0]->GetType()); auto input_type = input_args[kIndex0]->GetType(); auto input_type_id = input_type->cast()->element()->type_id(); - static const std::vector int_or_bool = {kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeUInt16, - kNumberTypeInt16, kNumberTypeUInt32, kNumberTypeInt32, - kNumberTypeUInt64, kNumberTypeInt64, kNumberTypeBool}; + static const std::set int_or_bool = {kNumberTypeUInt8, kNumberTypeInt8, kNumberTypeUInt16, + kNumberTypeInt16, kNumberTypeUInt32, kNumberTypeInt32, + kNumberTypeUInt64, kNumberTypeInt64, kNumberTypeBool}; bool is_int_or_bool = std::any_of(int_or_bool.begin(), int_or_bool.end(), [&input_type_id](const TypeId &type_id) { return input_type_id == type_id; }); if (is_int_or_bool) { diff --git a/mindspore/core/ops/ops_func_impl/select.cc b/mindspore/core/ops/ops_func_impl/select.cc index cbe7a16129b05046ddd6b4c4a46c1ffa33670f5b..3fb375763dd0ede9b17b850754ba14ec62f1828e 100644 --- a/mindspore/core/ops/ops_func_impl/select.cc +++ b/mindspore/core/ops/ops_func_impl/select.cc @@ -46,18 +46,6 @@ namespace ops { using float_complex = std::complex; using double_complex = std::complex; -void SelectInferShapeCheck(const std::vector &x_shape, const std::vector &y_shape, - const std::vector &cond_shape, size_t shape_size) { - for (size_t i = 0; i < shape_size; i++) { - if ((x_shape[i] > 0 && cond_shape[i] > 0 && x_shape[i] != cond_shape[i]) || - (x_shape[i] > 0 && y_shape[i] > 0 && x_shape[i] != y_shape[i])) { - MS_EXCEPTION(ValueError) - << "For 'Select', the shape of 'condition', 'x' and 'y' must be the same. But got 'condition' shape: " - << cond_shape << ", 'x' shape: " << x_shape << ", 'y' shape: " << y_shape << "."; - } - } -} - abstract::BaseShapePtr SelectFuncImpl::InferShape(const PrimitivePtr &prim, const std::vector &input_args) const { auto cond_shape = input_args[kSelectCondIndex]->GetShape()->GetShapeVector(); @@ -66,16 +54,9 @@ abstract::BaseShapePtr SelectFuncImpl::InferShape(const PrimitivePtr &prim, if (IsDynamicRank(cond_shape) || IsDynamicRank(x_shape) || IsDynamicRank(y_shape)) { return std::make_shared(ShapeVector{abstract::TensorShape::kShapeRankAny}); } - auto cond_shape_size = cond_shape.size(); - auto x_shape_size = x_shape.size(); - auto y_shape_size = y_shape.size(); - if (cond_shape_size != x_shape_size || y_shape_size != x_shape_size) { - MS_EXCEPTION(ValueError) - << "For 'Select', the shape of 'condition', 'x' and 'y' must be the same. But got 'condition' shape: " - << cond_shape << ", 'x' shape: " << x_shape << ", 'y' shape: " << y_shape << "."; - } - SelectInferShapeCheck(x_shape, y_shape, cond_shape, x_shape_size); - return input_args[kSelectCondIndex]->GetShape()->Clone(); + auto broadcast_output_size = CalBroadCastShape(x_shape, y_shape, prim->name(), "input", "other"); + auto output_size = CalBroadCastShape(cond_shape, broadcast_output_size, prim->name(), "condition", "input"); + return std::make_shared(output_size); } TypePtr SelectFuncImpl::InferType(const PrimitivePtr &prim, const std::vector &input_args) const { @@ -94,11 +75,6 @@ TypePtr SelectFuncImpl::InferType(const PrimitivePtr &prim, const std::vectorToString() - << " and y_type: " << y_type->ToString() << "."; - } return x_type->Clone(); } } // namespace ops diff --git a/mindspore/core/ops/ops_func_impl/slice_ext.cc b/mindspore/core/ops/ops_func_impl/slice_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..f421351d2c46a900c26ff564698667d88a04e33a --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/slice_ext.cc @@ -0,0 +1,84 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "utils/check_convert_utils.h" +#include "ops/op_utils.h" +#include "ops/ops_func_impl/slice_ext.h" + +namespace mindspore::ops { +BaseShapePtr SliceExtFuncImpl::InferShape(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto prim_name = primitive->name(); + auto input_x_shape = input_args[0]->GetShape()->GetShapeVector(); + (void)CheckAndConvertUtils::CheckInteger("rank of input_x", SizeToLong(input_x_shape.size()), kGreaterThan, 0, + prim_name); + + if (IsDynamicRank(input_x_shape)) { + return std::make_shared(ShapeVector{abstract::TensorShape::kShapeRankAny}); + } + + auto axis_value_opt = GetScalarValue(input_args[kInputIndex1]->GetValue()); + auto input_begin_value_opt = GetScalarValue(input_args[kInputIndex2]->GetValue()); + auto input_end_value_opt = GetScalarValue(input_args[kInputIndex3]->GetValue()); + auto input_step_value_opt = GetScalarValue(input_args[kInputIndex4]->GetValue()); + + if (!axis_value_opt.has_value() || !input_begin_value_opt.has_value() || !input_end_value_opt.has_value() || + !input_step_value_opt.has_value()) { + return std::make_shared(ShapeVector{abstract::TensorShape::kShapeRankAny}); + } + + auto axis_value = axis_value_opt.value(); + auto input_begin_value = input_begin_value_opt.value(); + auto input_end_value = input_end_value_opt.value(); + auto x_rank = SizeToLong(input_x_shape.size()); + auto x_axis_size = input_x_shape[axis_value]; + + if (input_begin_value > input_end_value) { + MS_EXCEPTION(ValueError) << "For Slice, the end must be no greater than start."; + } + + MS_CHECK_VALUE( + axis_value >= -x_rank && axis_value < x_rank, + CheckAndConvertUtils::FormatCheckInRangeMsg("axis", axis_value, kIncludeLeft, {-x_rank, x_rank}, primitive)); + axis_value = axis_value < 0 ? axis_value + x_rank : axis_value; + + if (input_x_shape[axis_value] == abstract::Shape::kShapeDimAny) { + return std::make_shared(input_x_shape); + } + + MS_CHECK_VALUE(input_begin_value >= -x_axis_size && input_begin_value <= x_axis_size, + CheckAndConvertUtils::FormatCheckInRangeMsg("start", input_begin_value, kIncludeBoth, + {-x_axis_size, x_axis_size}, primitive)); + auto input_length = input_end_value - input_begin_value; + input_begin_value = input_begin_value < 0 ? input_begin_value + x_axis_size : input_begin_value; + input_end_value = input_begin_value + input_length; + MS_CHECK_VALUE(input_end_value >= -x_axis_size && input_end_value <= x_axis_size, + CheckAndConvertUtils::FormatCheckInRangeMsg("start", input_end_value, kIncludeBoth, + {-x_axis_size, x_axis_size}, primitive)); + auto out_shape = input_x_shape; + out_shape[axis_value] = input_end_value - input_begin_value; + + return std::make_shared(out_shape); +} + +TypePtr SliceExtFuncImpl::InferType(const PrimitivePtr &primitive, + const std::vector &input_args) const { + auto input_type = input_args[kIndex0]->GetType(); + return input_type->Clone(); +} +} // namespace mindspore::ops diff --git a/mindspore/core/ops/ops_func_impl/slice_ext.h b/mindspore/core/ops/ops_func_impl/slice_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..04d7ae7ac9bfd64b010da7d7c277eaf54fd1e25f --- /dev/null +++ b/mindspore/core/ops/ops_func_impl/slice_ext.h @@ -0,0 +1,32 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SLICE_EXT_H_ +#define MINDSPORE_CORE_OPS_OPS_FUNC_IMPL_SLICE_EXT_H_ + +#include +#include "ops/ops_func_impl/op_func_impl.h" + +namespace mindspore::ops { +/// \brief Implementation of InferShape and InferType functions for operator 'SliceExt' +class MIND_API SliceExtFuncImpl : public OpFuncImpl { + public: + BaseShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) const override; + TypePtr InferType(const PrimitivePtr &primitive, const std::vector &input_args) const override; +}; +} // namespace mindspore::ops + +#endif // MINDSPORE_CORE_OPS_SLICE_EXT_H_ diff --git a/mindspore/core/ops/symbol_ops_impl/conv2d.cc b/mindspore/core/ops/symbol_ops_impl/conv2d.cc new file mode 100644 index 0000000000000000000000000000000000000000..a86762b1482d815af0825684ee2b118b7e487aad --- /dev/null +++ b/mindspore/core/ops/symbol_ops_impl/conv2d.cc @@ -0,0 +1,98 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "mindspore/core/ops/symbol_ops_impl/common.h" +#include "mindspore/core/ops/conv2d.h" +#include "mindspore/core/ops/symbol_ops_impl/scalar_div.h" + +namespace mindspore { +namespace symshape { +namespace ops { +namespace { +constexpr size_t kNum2 = 2; +} +class MS_CORE_API Conv2D : public InferShapeOp { + public: + using InferShapeOp::InferShapeOp; + Conv2D(const SymbolPtr &x, const SymbolPtr &out_channel, const SymbolPtr &kernel_size, const SymbolPtr &pad_mode, + const SymbolPtr &padding, const SymbolPtr &stride, const SymbolPtr &dilation, const SymbolPtr &format) + : InferShapeOp({x, out_channel, kernel_size, pad_mode, padding, stride, dilation, format}) {} + + ~Conv2D() override = default; + MS_DECLARE_PARENT(Conv2D, InferShapeOp) + + protected: + SymbolPtr Eval() override; + SymbolPtr GenOutput(const SymbolPtr &n, const SymbolPtr &h, const SymbolPtr &w) const { + auto out_channel = input(kIndex1); + auto format = input_as(kIndex7)->value(); + return format == "NCHW" ? ListSymbol::Make({n, out_channel, h, w}) : ListSymbol::Make({n, h, w, out_channel}); + } + SymbolPtr CalcForPadSame(const SymbolPtr &x, const SymbolPtr &stride) { + return Emit(std::make_shared(x, stride)); + } + + ListSymbolPtr ProcessAttr(const SymbolPtr &attr, size_t begin_idx, size_t num) { + if (attr->is()) { + auto list = attr->as_sptr(); + if (list->size() == num) { + return list; + } + SymbolPtrList res(list->symbols().begin() + begin_idx, list->symbols().begin() + begin_idx + num); + return ListSymbol::Make(std::move(res)); + } + SymbolPtrList res(num, attr); + return ListSymbol::Make(std::move(res)); + } +}; + +SymbolPtr Conv2D::Eval() { + auto x = input_as(kIndex0); + auto pad_mode = input_as(kIndex3)->value(); + auto stride = ProcessAttr(input(kIndex5), kIndex2, kNum2); + auto format = input_as(kIndex7)->value(); + if (pad_mode != PadMode::SAME) { + // only support SAME pad now. + return nullptr; + } + if (!x->HasData()) { + return GenOutput(GenVInt(), GenVInt(), GenVInt()); + } + size_t h_axis = kIndex2; + size_t w_axis = kIndex3; + if (format == "NHWC") { + h_axis = kIndex1; + w_axis = kIndex2; + } + auto out_n = x->item(kIndex0); + auto out_h = CalcForPadSame(x->item(h_axis), stride->item(kIndex0)); + auto out_w = CalcForPadSame(x->item(w_axis), stride->item(kIndex1)); + return GenOutput(out_n, out_h, out_w); +} + +REG_SYMBOL_OP_BUILDER("Conv2D").SetShapeFunc([](OperationBuilder *b) -> SymbolPtr { + auto x = b->GetInputShape(kIndex0); + auto out_channel = b->GetInputOrAttr(kIndex3, "out_channel"); + auto kernel_size = b->GetInputOrAttr(kIndex4, "kernel_size"); + auto pad_mode = b->GetInputOrAttr(kIndex6, "pad_mode"); + auto padding = b->GetInputOrAttr(kIndex7, "pad"); + auto stride = b->GetInputOrAttr(kIndex8, "stride"); + auto dilation = b->GetInputOrAttr(kIndex9, "dilation"); + auto format = b->GetInputOrAttr(kIndex11, "format"); + return b->Emit(std::make_shared(x, out_channel, kernel_size, pad_mode, padding, stride, dilation, format)); +}); +} // namespace ops +} // namespace symshape +} // namespace mindspore diff --git a/mindspore/core/ops/symbol_ops_impl/rms_norm.cc b/mindspore/core/ops/symbol_ops_impl/rms_norm.cc new file mode 100644 index 0000000000000000000000000000000000000000..a3f2d87feda097e1586f937c801b43409524557b --- /dev/null +++ b/mindspore/core/ops/symbol_ops_impl/rms_norm.cc @@ -0,0 +1,33 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "mindspore/core/symbolic_shape/operation_builder.h" + +namespace mindspore { +namespace symshape { +namespace ops { +REG_SYMBOL_OP_BUILDER("RmsNorm").SetShapeFunc([](OperationBuilder *b) -> SymbolPtr { + auto inp = b->GetInputShape(kIndex0)->as_sptr(); + MS_EXCEPTION_IF_NULL(inp); + if (inp->is_dyn_len()) { + return nullptr; + } + auto rstd_shape = inp->symbols(); + rstd_shape.back() = IntSymbol::Make(1LL); + return ListSymbol::Make(SymbolPtrList{inp, ListSymbol::Make(std::move(rstd_shape))}); +}); +} // namespace ops +} // namespace symshape +} // namespace mindspore diff --git a/mindspore/core/ops/symbol_ops_impl/scalar_div.cc b/mindspore/core/ops/symbol_ops_impl/scalar_div.cc index da432db6de6e7b364d1ed32e2c83c146b6c0fb2a..0959a98c1fb78ec009646d756f1a21471c002cbf 100644 --- a/mindspore/core/ops/symbol_ops_impl/scalar_div.cc +++ b/mindspore/core/ops/symbol_ops_impl/scalar_div.cc @@ -16,6 +16,7 @@ #include "mindspore/core/ops/symbol_ops_impl/scalar_div.h" #include #include +#include namespace mindspore { namespace symshape { @@ -25,7 +26,7 @@ SymbolPtr ScalarDiv::Eval() { auto lhs = input_as(0); auto rhs = input_as(1); if (lhs->HasData() && rhs->HasData()) { - return GenInt(lhs->value() / rhs->value()); + return GenInt(DivWithCheck(lhs->value(), rhs->value())); } if (lhs->HasData() && lhs->value() == 0) { return GenInt(0); @@ -81,8 +82,41 @@ void ScalarDiv::UpdateMathInfo() { } } +SymbolPtr ScalarFloorDiv::Eval() { + // only eval on Building + auto lhs = input_as_sptr(0); + auto rhs = input_as_sptr(1); + if (lhs->HasData() && rhs->HasData()) { + return GenInt(FloorDiv(lhs->value(), rhs->value())); + } + if (lhs->is_divisible_by(rhs)) { + DoNotEvalOnRun(); + return Emit(std::make_shared(lhs, rhs)); + } + return GenVInt(); +} + +SymbolPtr ScalarCeilDiv::Eval() { + // only eval on Building + auto lhs = input_as_sptr(0); + auto rhs = input_as_sptr(1); + if (lhs->HasData() && rhs->HasData()) { + return GenInt(CeilDiv(lhs->value(), rhs->value())); + } + if (lhs->is_divisible_by(rhs)) { + DoNotEvalOnRun(); + return Emit(std::make_shared(lhs, rhs)); + } + // the CeilDiv has not math info, assume the lhs can be divisible by rhs if the env is set. + if (common::GetEnv("MS_DEV_USE_SYMBOL_CEIL_DIV") == "off") { + MS_LOG(WARNING) << "Assume the " << lhs->ToString() << " can be divide by " << rhs->ToString() << "."; + return Emit(std::make_shared(lhs, rhs)); + } + return GenVInt(); +} + REG_SYMBOL_OP_BUILDER("ScalarDiv").SetValueFunc(DefaultBuilder); -REG_SYMBOL_OP_BUILDER("ScalarFloorDiv").SetValueFunc(DefaultBuilder); +REG_SYMBOL_OP_BUILDER("ScalarFloorDiv").SetValueFunc(DefaultBuilder); } // namespace ops } // namespace symshape } // namespace mindspore diff --git a/mindspore/core/ops/symbol_ops_impl/scalar_div.h b/mindspore/core/ops/symbol_ops_impl/scalar_div.h index 676babefdb2e1010aa48ef4a5a39db930e62b84a..a1fbeb7024f899e85737629fe1f188000eeb4bba 100644 --- a/mindspore/core/ops/symbol_ops_impl/scalar_div.h +++ b/mindspore/core/ops/symbol_ops_impl/scalar_div.h @@ -16,6 +16,7 @@ #ifndef MINDSPORE_CORE_OPS_SYMBOL_OPS_IMPL_SCALAR_DIV_H_ #define MINDSPORE_CORE_OPS_SYMBOL_OPS_IMPL_SCALAR_DIV_H_ +#include #include "mindspore/core/ops/symbol_ops_impl/common.h" namespace mindspore { @@ -29,9 +30,43 @@ class MS_CORE_API ScalarDiv : public ScalarOp { protected: SymbolPtr Eval() override; - void EvalOnRun() override { output_as()->SetValue(AsInt(input(0)) / AsInt(input(1))); } + void EvalOnRun() override { output_as()->SetValue(DivWithCheck(AsInt(input(0)), AsInt(input(1)))); } + inline int64_t DivWithCheck(int64_t x, int64_t y) const { + if (x % y != 0) { + MS_LOG(EXCEPTION) << "For operation 'ScalarDiv', the 'x' should be divisible by 'y', but got " << x << "/" << y; + } + return x / y; + } void UpdateMathInfo() override; }; + +class MS_CORE_API ScalarFloorDiv : public ScalarOp { + public: + using ScalarOp::ScalarOp; + ScalarFloorDiv(const SymbolPtr &lhs, const SymbolPtr &rhs) : ScalarOp({lhs, rhs}) {} + MS_DECLARE_PARENT(ScalarFloorDiv, ScalarOp) + + protected: + SymbolPtr Eval() override; + void EvalOnRun() override { output_as()->SetValue(FloorDiv(AsInt(input(0)), AsInt(input(1)))); } + inline int64_t FloorDiv(int64_t x, int64_t y) const { + return DoubleToLong(std::floor(LongToDouble(x) / LongToDouble(y))); + } +}; + +class MS_CORE_API ScalarCeilDiv : public ScalarOp { + public: + using ScalarOp::ScalarOp; + ScalarCeilDiv(const SymbolPtr &lhs, const SymbolPtr &rhs) : ScalarOp({lhs, rhs}) {} + MS_DECLARE_PARENT(ScalarCeilDiv, ScalarOp) + + protected: + SymbolPtr Eval() override; + void EvalOnRun() override { output_as()->SetValue(CeilDiv(AsInt(input(0)), AsInt(input(1)))); } + inline int64_t CeilDiv(int64_t x, int64_t y) const { + return DoubleToLong(std::ceil(LongToDouble(x) / LongToDouble(y))); + } +}; } // namespace ops } // namespace symshape } // namespace mindspore diff --git a/mindspore/core/ops/symbol_ops_impl/strided_slice.cc b/mindspore/core/ops/symbol_ops_impl/strided_slice.cc index b3bd59499229d02043596019df4a3d26e0494c4e..e8a95588dd85d5a8cc5c2fc550187b620e36c533 100644 --- a/mindspore/core/ops/symbol_ops_impl/strided_slice.cc +++ b/mindspore/core/ops/symbol_ops_impl/strided_slice.cc @@ -17,6 +17,7 @@ #include "mindspore/core/ops/symbol_ops_impl/scalar_add.h" #include "mindspore/core/ops/symbol_ops_impl/scalar_sub.h" #include "mindspore/core/ops/symbol_ops_impl/scalar_div.h" +#include "mindspore/core/ops/symbol_ops_impl/scalar_min.h" namespace mindspore { namespace symshape { @@ -89,13 +90,9 @@ SymbolPtr StridedSlice::GetSlicingLengthForPositiveStrides(IntSymbolPtr start, I return GenInt(0); } if ((*start) <= (*end)) { - // length = (end - 1 - start) / strides + 1. (to floor) - if (strides->is_const() && strides->value() == 1) { - return Emit(std::make_shared(end, start)); - } - auto t1 = Emit(std::make_shared(Emit(std::make_shared(end, GenInt(1))), start)); - auto t2 = Emit(std::make_shared(t1, strides)); - return Emit(std::make_shared(t2, GenInt(1))); + // slice length = (end - start) / strides. (to ceil) + auto len = Emit(std::make_shared(end, start)); + return Emit(std::make_shared(len, strides)); } return GenVInt(); } @@ -139,6 +136,8 @@ SymbolPtr StridedSlice::ComputeInferShape(const ListSymbol *x_shape, const ListS } if (end_mask(j)) { finish = x_dim_size; + } else { + finish = Emit(std::make_shared(finish, x_dim_size))->as_sptr(); } auto slicing_len = GetSlicingLengthForPositiveStrides(start, finish, strides, x_dim_size); (void)res_shape.emplace_back(std::move(slicing_len)); diff --git a/mindspore/core/ops/symbol_ops_impl/transparent_op.cc b/mindspore/core/ops/symbol_ops_impl/transparent_op.cc index f50ffa31284061561d450c7403f7737c66e5fe6e..f8de9d7a9bad97badfa28ce1155a8963c6ab5f7c 100644 --- a/mindspore/core/ops/symbol_ops_impl/transparent_op.cc +++ b/mindspore/core/ops/symbol_ops_impl/transparent_op.cc @@ -18,6 +18,7 @@ namespace mindspore { namespace symshape { namespace ops { +// infer symbolic shape. please add ops in lexicographical order. REG_SYMBOL_OP_BUILDER("Abs").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("Assign").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("BiasAdd").SetShapeDepend({DependOn::kShape, DependOn::kNone}); @@ -39,16 +40,18 @@ REG_SYMBOL_OP_BUILDER("ReLU").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("Rsqrt").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("RsqrtGrad").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("ReshapeAndCache").SetShapeDepend({DependOn::kShape}); +REG_SYMBOL_OP_BUILDER("Sigmoid").SetShapeDepend({DependOn::kShape}); +REG_SYMBOL_OP_BUILDER("SigmoidGrad").SetShapeDepend({DependOn::kShape}); +REG_SYMBOL_OP_BUILDER("SiLU").SetShapeDepend({DependOn::kShape}); +REG_SYMBOL_OP_BUILDER("Softmax").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("SoftmaxBackward").SetShapeDepend({DependOn::kNone, DependOn::kShape}); REG_SYMBOL_OP_BUILDER("SoftmaxGrad").SetShapeDepend({DependOn::kNone, DependOn::kShape}); -REG_SYMBOL_OP_BUILDER("Softmax").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("Sqrt").SetShapeDepend({DependOn::kShape}); +REG_SYMBOL_OP_BUILDER("Square").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("StopGradient").SetShapeDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("Tril").SetShapeDepend({DependOn::kShape}); -REG_SYMBOL_OP_BUILDER("Sigmoid").SetShapeDepend({DependOn::kShape}); -REG_SYMBOL_OP_BUILDER("SigmoidGrad").SetShapeDepend({DependOn::kShape}); -REG_SYMBOL_OP_BUILDER("Square").SetShapeDepend({DependOn::kShape}); +// infer symbolic value. REG_SYMBOL_OP_BUILDER("Shape").SetValueDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("TensorShape").SetValueDepend({DependOn::kShape}); REG_SYMBOL_OP_BUILDER("ScalarToTensor").SetValueDepend({DependOn::kValue}); diff --git a/mindspore/core/symbolic_shape/int_symbol.h b/mindspore/core/symbolic_shape/int_symbol.h index 37f9254a224fc92f849bc04a824b931d217b8e43..66c730fbec30b03deb6851b723bf6ffde97b98bd 100644 --- a/mindspore/core/symbolic_shape/int_symbol.h +++ b/mindspore/core/symbolic_shape/int_symbol.h @@ -81,7 +81,8 @@ class MS_CORE_API IntSymbol : public ScalarSymbol { int64_t remainder() const { return math_info_.remainder(); } /// \brief Check the symbol is divisible by 'd' - bool is_divisible_by(int64_t d) const { return remainder() == 0 && divisor() % d == 0; } + bool is_divisible_by(int64_t d) const; + bool is_divisible_by(const IntSymbolPtr &d) const; /// \brief Check the symbol is ALWAYS greater than x bool is_greater_than(int64_t x) const { return range_min() > x; } /// \brief Check the symbol is ALWAYS less than x diff --git a/mindspore/core/symbolic_shape/symbol.cc b/mindspore/core/symbolic_shape/symbol.cc index b0697c02c19f83f5001a4485a9b2670a4d4fc5a7..58f1805dfdf50f0275bb18cd0f406ad42907cf3c 100644 --- a/mindspore/core/symbolic_shape/symbol.cc +++ b/mindspore/core/symbolic_shape/symbol.cc @@ -135,6 +135,17 @@ bool IntSymbol::operator<=(const IntSymbol &s) const { return math_info_.MathLessEqual(s.math_info_); } +bool IntSymbol::is_divisible_by(int64_t d) const { + if (has_data_) { + return value_ % d == 0; + } + return (divisor() % d == 0) && (remainder() % d == 0); +} + +bool IntSymbol::is_divisible_by(const IntSymbolPtr &d) const { + return (d->HasData() && is_divisible_by(d->value())) || (this->HasData() && value() == 0) || this->EqualsTo(d); +} + bool ListSymbol::operator==(const Symbol &s) const { if (this == &s) { return true; diff --git a/mindspore/lite/cmake/pocketfft.cmake b/mindspore/lite/cmake/pocketfft.cmake new file mode 100644 index 0000000000000000000000000000000000000000..2d5083ab66090fd07e8f3e1edc6021769045facb --- /dev/null +++ b/mindspore/lite/cmake/pocketfft.cmake @@ -0,0 +1,14 @@ +set(Pocketfft_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") +set(Pocketfft_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") + + +set(REQ_URL "https://github.com/malfet/pocketfft/archive/refs/heads/cpp.zip") +set(SHA256 "7c475524c264c450b78e221046d90b859316e105d3d6a69d5892baeafad95493") +set(INCLUDE "./") + +mindspore_add_pkg(pocketfft + HEAD_ONLY ./ + URL ${REQ_URL} + SHA256 ${SHA256} + ) +include_directories(${pocketfft_INC}) diff --git a/mindspore/lite/python/api/_parse_update_weights_name.py b/mindspore/lite/python/api/_parse_update_weights_name.py index 4219bb98c99dc96539f21e5fcae4367ef35c6788..bbdc5773907a7946a5cf32e90a1db05d138bd951 100644 --- a/mindspore/lite/python/api/_parse_update_weights_name.py +++ b/mindspore/lite/python/api/_parse_update_weights_name.py @@ -18,37 +18,173 @@ Parse Update Weights Name. import re import os +def _maybe_map_sgm_blocks_to_diffusers(name, layers_per_block=2, delimiter="_", block_slice_pos=5): + ''' + convert name like input_blocks.1.1_xxx to input_blocks.1.resnets_xxx + ''' + # 1. get all state_dict_keys + sgm_patterns = ["input_blocks", "middle_block", "output_blocks"] + inner_block_map = ["resnets", "attentions", "upsamplers"] + + if not any([pattern in name for pattern in sgm_patterns]): + return name + + layer_id = int(name.split(delimiter)[:block_slice_pos][-1]) + + # Rename keys accordingly + if sgm_patterns[0] in name: # 0:input_blocks + block_id = (layer_id - 1) // (layers_per_block + 1) + layer_in_block_id = (layer_id - 1) % (layers_per_block + 1) + + inner_block_id = int(name.split(delimiter)[block_slice_pos]) + inner_block_key = inner_block_map[inner_block_id] if "op" not in name else "downsamplers" + inner_layers_in_block = str(layer_in_block_id) if "op" not in name else "0" + new_name = delimiter.join( + name.split(delimiter)[: block_slice_pos - 1] + + [str(block_id), inner_block_key, inner_layers_in_block] + + name.split(delimiter)[block_slice_pos + 1 :] + ) + return new_name + + if sgm_patterns[1] in name: # 1:middle_block + key_part = None + if layer_id == 0: + key_part = [inner_block_map[0], "0"] + elif layer_id == 1: + key_part = [inner_block_map[1], "0"] + elif layer_id == 2: + key_part = [inner_block_map[0], "1"] + else: + raise ValueError(f"Invalid middle block id {layer_id}.") + + new_name = delimiter.join( + name.split(delimiter)[: block_slice_pos - 1] + key_part + name.split(delimiter)[block_slice_pos:] + ) + return new_name + + if sgm_patterns[2] in name: # 2:output_blocks + block_id = layer_id // (layers_per_block + 1) + layer_in_block_id = layer_id % (layers_per_block + 1) + name_splites = name.split(delimiter) + if len(name_splites) <= block_slice_pos: + raise ValueError("Invalid name") + + inner_block_id = int(name_splites[block_slice_pos]) + inner_block_key = inner_block_map[inner_block_id] + inner_layers_in_block = str(layer_in_block_id) if inner_block_id < 2 else "0" + new_name = delimiter.join( + name.split(delimiter)[: block_slice_pos - 1] + + [str(block_id), inner_block_key, inner_layers_in_block] + + name.split(delimiter)[block_slice_pos + 1 :] + ) + return new_name + + return name + +def _convert_kohya_name(name): + ''' + convert name like input_blocks_xxxx to down_blocks_xxxx + ''' + diffusers_name = name + lora_name = name.split(".")[0] + + if not lora_name.startswith("lora_unet_"): + return diffusers_name + + diffusers_name = name.replace("lora_unet_", "").replace("_", ".") + + if "input.blocks" in diffusers_name: + diffusers_name = diffusers_name.replace("input.blocks", "down_blocks") + else: + diffusers_name = diffusers_name.replace("down.blocks", "down_blocks") + + if "middle.block" in diffusers_name: + diffusers_name = diffusers_name.replace("middle.block", "mid_block") + else: + diffusers_name = diffusers_name.replace("mid.block", "mid_block") + if "output.blocks" in diffusers_name: + diffusers_name = diffusers_name.replace("output.blocks", "up_blocks") + else: + diffusers_name = diffusers_name.replace("up.blocks", "up_blocks") + + diffusers_name = diffusers_name.replace("transformer.blocks", "transformer_blocks") + diffusers_name = diffusers_name.replace("to.q", "to_q") + diffusers_name = diffusers_name.replace("to.k", "to_k") + diffusers_name = diffusers_name.replace("to.v", "to_v") + diffusers_name = diffusers_name.replace("to.out.0", "to_out") + diffusers_name = diffusers_name.replace("proj.in", "proj_in") + diffusers_name = diffusers_name.replace("proj.out", "proj_out") + diffusers_name = diffusers_name.replace("emb.layers", "time_emb_proj") + + # SDXL specificity. + if "emb" in diffusers_name and "time.emb.proj" not in diffusers_name: + pattern = r"\.\d+(?=\D*$)" + diffusers_name = re.sub(pattern, "", diffusers_name, count=1) + if ".in." in diffusers_name: + diffusers_name = diffusers_name.replace("in.layers.2", "conv1") + if ".out." in diffusers_name: + diffusers_name = diffusers_name.replace("out.layers.3", "conv2") + if "downsamplers" in diffusers_name or "upsamplers" in diffusers_name: + diffusers_name = diffusers_name.replace("op", "conv") + if "skip" in diffusers_name: + diffusers_name = diffusers_name.replace("skip.connection", "conv_shortcut") + + # LyCORIS specificity. + if "time.emb.proj" in diffusers_name: + diffusers_name = diffusers_name.replace("time.emb.proj", "time_emb_proj") + if "conv.shortcut" in diffusers_name: + diffusers_name = diffusers_name.replace("conv.shortcut", "conv_shortcut") + + # General coverage. + if "transformer_blocks" in diffusers_name: + if "attn1" in diffusers_name or "attn2" in diffusers_name: + diffusers_name = diffusers_name.replace("attn1", "attn1.processor") + diffusers_name = diffusers_name.replace("attn2", "attn2.processor") + return diffusers_name def _rename_variable_weight(name): """Rename variable weight""" - if not name.endswith("weight"): + if not name.endswith("weight") and not name.endswith("alpha"): raise RuntimeError("variable is not norm name, now only support **weight") - if "up_blocks" not in name and "down_blocks" not in name and "mid_block" not in name: - raise RuntimeError("variable is not norm name, must include one of up_blocks, up_blocks or mid_block") - if "attentions" not in name or "transformer_blocks" not in name or "attn" not in name: - raise RuntimeError("variable is not norm name, must include attentions, transformer_blocks or attn") + custom_prefix = None + if name.startswith("model.diffusion"): + name_parts = name.split('.') + custom_prefix_parts = [name_parts[2] + '.' + name_parts[3], name_parts[2] + + '.' + name_parts[3] + '.' + name_parts[4]] + custom_prefix = '/'.join(custom_prefix_parts) + '/' + name = '.'.join(name_parts[5:]) + name = name.replace("lora_up.", '') + name = name.replace("lora_down.", '') + name = name.replace("net.0", "net.net.0") + + name = _maybe_map_sgm_blocks_to_diffusers(name) + + name = _convert_kohya_name(name) name = name.replace("out_0", "out").replace("out.0", "out") - nums = re.findall(r"\d+", name) - if len(nums) < 3 or len(nums) > 4: - raise RuntimeError("only support norm tensor name") - new_name = "" - if "down_blocks" in name: - new_name = "/down_blocks." + nums[0] - elif "mid_block" in name: - new_name = "/mid_block" - elif "up_blocks" in name: - new_name = "/up_blocks." + nums[0] - new_name += "/attentions." + nums[-3] + "/transformer_blocks." + nums[-2] + "/attn" + nums[-1] - if "to_q" in name: - new_name += "/to_q/MatMul" - elif "to_v" in name: - new_name += "/to_v/MatMul" - elif "to_k" in name: - new_name += "/to_k/MatMul" - elif "to_out" in name: - new_name += "/to_out.0/MatMul" - return new_name + name = name.replace(".down.", ".").replace(".up.", ".") + name = name.replace('_lora', '') + name = name.replace('lora.', '') + name = name.replace('unet.', '') + name = name.replace('processor.', '') + name_split = name.split('.') + name_split.pop() + name_split.append('MatMul') + merged_name = [] + index = len(name_split) - 1 + while index >= 0: + if name_split[index].isdigit(): + merged_name.append(name_split[index-1] + '.' + name_split[index]) + index -= 2 + else: + merged_name.append(name_split[index]) + index -= 1 + + merged_name.reverse() + new_name = '/'.join(merged_name) + new_name = new_name.replace('to_out', 'to_out.0') + new_name = new_name.replace('to_out.0', 'to_out/to_out.0') if custom_prefix is not None else new_name + return "/" + new_name if custom_prefix is None else "/" + custom_prefix + new_name def _get_variable_weights_name(name_list_file): """Get variable weights name""" @@ -67,7 +203,6 @@ def _get_variable_weights_name(name_list_file): new_name_str += ',' + new_name return new_name_str[1:] - def _parse_update_weight_config_name(name_list_file): """Parse update weight config name""" with open(name_list_file, 'r') as f: diff --git a/mindspore/lite/python/api/tensor.py b/mindspore/lite/python/api/tensor.py index 66a14f8c7defc8aa7cc0b1935f197d47b88dfcff..72799d0206b07125b79576bf264131d742b2de20 100644 --- a/mindspore/lite/python/api/tensor.py +++ b/mindspore/lite/python/api/tensor.py @@ -290,8 +290,11 @@ class Tensor: Default: ``None``. dtype(DataType, optional): The dtype of the Tensor. Default: ``None``. - device(str, optional): The device type of the Tensor. - Default: ``None``. + device(str, optional): The device type of the Tensor. It can be ``"ascend"`` or + ``"ascend:device_id"`` or ``None``. ``device_id`` indicates the device number, which can be ``0`` , + ``1`` , ``2`` , ``3`` , ``4`` , ``5`` , ``6`` , or ``7``. If ``device`` is ``None``, the tensor will be + initialized at CPU. Default: ``None``. + Raises: TypeError: `tensor` is neither a Tensor nor ``None``. diff --git a/mindspore/lite/src/common/ops/populate/adder_populate.cc b/mindspore/lite/src/common/ops/populate/adder_populate.cc index a09e00643afde248a99407150618ea8ee406bcb7..305d90ff10d660309d5d0fab770c876c0aa1ae6d 100644 --- a/mindspore/lite/src/common/ops/populate/adder_populate.cc +++ b/mindspore/lite/src/common/ops/populate/adder_populate.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateAdderParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Adder primitive is nullptr!"); auto value = primitive->value_as_AdderFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/all_gather.cc b/mindspore/lite/src/common/ops/populate/all_gather.cc index ea8c1f1e8b119687e8648d6b559a961fad9f09fa..63774f0a55e5519477de35d3c6f007a8f8447f41 100644 --- a/mindspore/lite/src/common/ops/populate/all_gather.cc +++ b/mindspore/lite/src/common/ops/populate/all_gather.cc @@ -24,7 +24,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateAllGatherParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "AllGather primitive is nullptr!"); auto value = primitive->value_as_AllGather(); if (value == nullptr) { MS_LOG(ERROR) << "cast all_gather_primitive to value failed"; diff --git a/mindspore/lite/src/common/ops/populate/assign_add_populate.cc b/mindspore/lite/src/common/ops/populate/assign_add_populate.cc index 488fc68d6be55d052f61825a55a968f94aba6f7c..cc1824d4c0362e026606bf32377e5619a590f530 100644 --- a/mindspore/lite/src/common/ops/populate/assign_add_populate.cc +++ b/mindspore/lite/src/common/ops/populate/assign_add_populate.cc @@ -20,7 +20,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateAssignAddParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "AssignAdd primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { diff --git a/mindspore/lite/src/common/ops/populate/broadcast_to_populate.cc b/mindspore/lite/src/common/ops/populate/broadcast_to_populate.cc index a8c51d1c0a758a05f933d6442b388751caa9c78f..ff5001a3faec4c9d46bb357dc6d5a7c63b5044bf 100644 --- a/mindspore/lite/src/common/ops/populate/broadcast_to_populate.cc +++ b/mindspore/lite/src/common/ops/populate/broadcast_to_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateBroadcastToParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "BroadcastTo primitive is nullptr!"); auto value = primitive->value_as_BroadcastTo(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/clip_populate.cc b/mindspore/lite/src/common/ops/populate/clip_populate.cc index df1da96a65c5f85b1a0ce14a9263d7aeeb0fee0e..f0e9e1d5d8fb7d64d6ec8b1e3de2b10194ca9b9f 100644 --- a/mindspore/lite/src/common/ops/populate/clip_populate.cc +++ b/mindspore/lite/src/common/ops/populate/clip_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateClipParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Clip primitive is nullptr!"); auto value = primitive->value_as_Clip(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/common_populate.cc b/mindspore/lite/src/common/ops/populate/common_populate.cc index c0244a90239f71eb1efd841f63dc01d986a7a7b8..83bc353a08cca3da23bd46c63df14f6f3d3c01d4 100644 --- a/mindspore/lite/src/common/ops/populate/common_populate.cc +++ b/mindspore/lite/src/common/ops/populate/common_populate.cc @@ -24,9 +24,8 @@ using mindspore::schema::PrimitiveType_ZerosLike; namespace mindspore { namespace lite { OpParameter *PopulateCommonParameter(const void *prim) { - MS_CHECK_TRUE_RET(prim != nullptr, nullptr); + MS_CHECK_TRUE_MSG(prim != nullptr, nullptr, "Primitive is nullptr!"); auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); auto *param = reinterpret_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { diff --git a/mindspore/lite/src/common/ops/populate/crop_populate.cc b/mindspore/lite/src/common/ops/populate/crop_populate.cc index 7db5c4b5d63f14aaeb8b3a607d1d49c35d6ece1a..b4931a9649499def07c0dd429e1348bfd2566f93 100644 --- a/mindspore/lite/src/common/ops/populate/crop_populate.cc +++ b/mindspore/lite/src/common/ops/populate/crop_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateCropParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Crop primitive is nullptr!"); auto value = primitive->value_as_Crop(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; @@ -30,7 +30,7 @@ OpParameter *PopulateCropParameter(const void *prim) { auto *param = reinterpret_cast(malloc(sizeof(CropParameter))); if (param == nullptr) { - MS_LOG(ERROR) << "malloc CropParameter failed."; + MS_LOG(ERROR) << "malloc CropParameter failed!"; return nullptr; } memset(param, 0, sizeof(CropParameter)); diff --git a/mindspore/lite/src/common/ops/populate/cumsum_populate.cc b/mindspore/lite/src/common/ops/populate/cumsum_populate.cc index 76fc45a3658d3691911ae1aee63a9f39a946ff2a..43c1bce9e4901ae75a64f1986cc54948616e3b8a 100644 --- a/mindspore/lite/src/common/ops/populate/cumsum_populate.cc +++ b/mindspore/lite/src/common/ops/populate/cumsum_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateCumSumParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Cumsum primitive is nullptr!"); auto value = primitive->value_as_CumSum(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/default_populate.cc b/mindspore/lite/src/common/ops/populate/default_populate.cc index a5a7cbd3f46633c1b36c78daf84dbe82f0bc328f..ebb0493447f570cf220f1beb0dba9b7d498499df 100644 --- a/mindspore/lite/src/common/ops/populate/default_populate.cc +++ b/mindspore/lite/src/common/ops/populate/default_populate.cc @@ -23,8 +23,7 @@ namespace mindspore { namespace lite { OpParameter *DefaultPopulateParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Primitive is nullptr!"); auto *param = static_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { MS_LOG(ERROR) << "Malloc OpParameter failed."; diff --git a/mindspore/lite/src/common/ops/populate/depth_to_space_populate.cc b/mindspore/lite/src/common/ops/populate/depth_to_space_populate.cc index 0c7f6a6bed3d4b96470b80eb18bc484dc30a637c..812cd116d427fec4fa06ccac52e5fa152a9fdf2f 100644 --- a/mindspore/lite/src/common/ops/populate/depth_to_space_populate.cc +++ b/mindspore/lite/src/common/ops/populate/depth_to_space_populate.cc @@ -22,7 +22,7 @@ namespace lite { namespace { OpParameter *PopulateDepthToSpaceParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "DepthToSpace primitive is nullptr!"); auto value = primitive->value_as_DepthToSpace(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/detection_post_process_populate.cc b/mindspore/lite/src/common/ops/populate/detection_post_process_populate.cc index 6cfdb35cd56a85c0a9e9cbbadc2973a930f7fed7..aa0b675983de94b569601ce334a8fc9cbd64d1bf 100644 --- a/mindspore/lite/src/common/ops/populate/detection_post_process_populate.cc +++ b/mindspore/lite/src/common/ops/populate/detection_post_process_populate.cc @@ -22,6 +22,7 @@ namespace lite { OpParameter *PopulateDetectionPostProcessParameter(const void *prim) { auto primitive = static_cast(prim); MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "DetectionPostProcess primitive is nullptr!"); auto value = primitive->value_as_DetectionPostProcess(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc index 8e3933209d3bef9408dd70c5e5fe9a5e59208f00..10864214abb56c311eb6ac6762ecf578dddd6ded 100644 --- a/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc +++ b/mindspore/lite/src/common/ops/populate/dynamic_quant_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateDynamicQuantParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "DynamicQuant primitive is nullptr!"); auto value = primitive->value_as_DynamicQuant(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/eltwise_populate.cc b/mindspore/lite/src/common/ops/populate/eltwise_populate.cc index 9136dc2c9c28350f93c654795ad0a4bc8e70d343..4ed7f58fe6b1c82f38ed3e5b17b0d6bcfcdf0f1f 100644 --- a/mindspore/lite/src/common/ops/populate/eltwise_populate.cc +++ b/mindspore/lite/src/common/ops/populate/eltwise_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateEltwiseParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Eltwise primitive is nullptr!"); auto value = primitive->value_as_Eltwise(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/embedding_lookup_populate.cc b/mindspore/lite/src/common/ops/populate/embedding_lookup_populate.cc index 87b56c026bc64a5b0015d17f0f255cbc71b3b532..74cda9628c9d013b623b73b3567e0c36ea10319b 100644 --- a/mindspore/lite/src/common/ops/populate/embedding_lookup_populate.cc +++ b/mindspore/lite/src/common/ops/populate/embedding_lookup_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateEmbeddingLookupParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "EmbeddingLookup primitive is nullptr!"); auto value = primitive->value_as_EmbeddingLookupFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/fill_populate.cc b/mindspore/lite/src/common/ops/populate/fill_populate.cc index 0ce42b3187b4947756907d11d2f886e63d1fc7d2..1934e1c1580123b96de103898ee54d9439fb87bd 100644 --- a/mindspore/lite/src/common/ops/populate/fill_populate.cc +++ b/mindspore/lite/src/common/ops/populate/fill_populate.cc @@ -21,8 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateFillParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Fill primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { MS_LOG(ERROR) << "malloc FillParameter failed."; diff --git a/mindspore/lite/src/common/ops/populate/full_connection_populate.cc b/mindspore/lite/src/common/ops/populate/full_connection_populate.cc index 30106e647877b27f11dae1f449562ed5e811db9a..708ec6becaea6386b6c744a6c87de6fdd1a65519 100644 --- a/mindspore/lite/src/common/ops/populate/full_connection_populate.cc +++ b/mindspore/lite/src/common/ops/populate/full_connection_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateFullconnectionParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Fullconnection primitive is nullptr!"); auto value = primitive->value_as_FullConnection(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/fused_batchnorm_populate.cc b/mindspore/lite/src/common/ops/populate/fused_batchnorm_populate.cc index a23fb7077993305d945f4a48416d1c857e832ebd..1dad598cec206501ee939beacacf5e7e87786006 100644 --- a/mindspore/lite/src/common/ops/populate/fused_batchnorm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/fused_batchnorm_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateFusedBatchNorm(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "FusedBatchNorm primitive is nullptr!"); auto value = primitive->value_as_FusedBatchNorm(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/gather_d_populate.cc b/mindspore/lite/src/common/ops/populate/gather_d_populate.cc index b05fcfeecd94fd25827225e3576bcd86eae0d519..ce039cb4dddc4bd7903fac73be24114db1fed1f8 100644 --- a/mindspore/lite/src/common/ops/populate/gather_d_populate.cc +++ b/mindspore/lite/src/common/ops/populate/gather_d_populate.cc @@ -22,8 +22,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateGatherDParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "GatherD primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(GatherParameter))); if (param == nullptr) { MS_LOG(ERROR) << "malloc GatherParameter failed."; diff --git a/mindspore/lite/src/common/ops/populate/gather_nd_populate.cc b/mindspore/lite/src/common/ops/populate/gather_nd_populate.cc index 980a1adfa91cb813d3e9b09504e8d9d271e26a9f..ed41175fd8ded4e49eff83ff69b16ad9895dca25 100644 --- a/mindspore/lite/src/common/ops/populate/gather_nd_populate.cc +++ b/mindspore/lite/src/common/ops/populate/gather_nd_populate.cc @@ -21,8 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateGatherNdParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "GatherND primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(GatherNdParameter))); if (param == nullptr) { MS_LOG(ERROR) << "malloc GatherNdParameter failed."; diff --git a/mindspore/lite/src/common/ops/populate/gather_populate.cc b/mindspore/lite/src/common/ops/populate/gather_populate.cc index 7e19ccd904287767550f064a1c4c5fe18e920fd5..6efcdfb817ebd0ed5f97c6638075e257da518cd0 100644 --- a/mindspore/lite/src/common/ops/populate/gather_populate.cc +++ b/mindspore/lite/src/common/ops/populate/gather_populate.cc @@ -21,8 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateGatherParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Gather primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(GatherParameter))); if (param == nullptr) { MS_LOG(ERROR) << "malloc GatherParameter failed."; diff --git a/mindspore/lite/src/common/ops/populate/group_norm_populate.cc b/mindspore/lite/src/common/ops/populate/group_norm_populate.cc index c832e705fe8d3174917bf9cc7b9a0def505580f0..08b13533586846fd697bedc50cd3697e5402c484 100644 --- a/mindspore/lite/src/common/ops/populate/group_norm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/group_norm_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateIGroupNormParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "GroupNorm primitive is nullptr!"); auto value = primitive->value_as_GroupNormFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/gru_populate.cc b/mindspore/lite/src/common/ops/populate/gru_populate.cc index ed157b6b8d193a1694641ab1ee872883803018ad..70f94dcf56c91eb265df508ed476ceb4a6ffed87 100644 --- a/mindspore/lite/src/common/ops/populate/gru_populate.cc +++ b/mindspore/lite/src/common/ops/populate/gru_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateGruParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Gru primitive is nullptr!"); auto value = primitive->value_as_GRU(); if (value == nullptr) { MS_LOG(ERROR) << "param is nullptr."; diff --git a/mindspore/lite/src/common/ops/populate/instance_norm_populate.cc b/mindspore/lite/src/common/ops/populate/instance_norm_populate.cc index 71acd6e378f8094db64a013bd15cfd10dc1a4210..5e40ea3e7cbe7672c20bd99c062a0642aa4b136f 100644 --- a/mindspore/lite/src/common/ops/populate/instance_norm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/instance_norm_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateInstanceNormParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "InstanceNorm primitive is nullptr!"); auto value = primitive->value_as_InstanceNorm(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/l2_norm_populate.cc b/mindspore/lite/src/common/ops/populate/l2_norm_populate.cc index c1dc48da0f457ebe53a12cc0ce50f6bbb8dbcffc..601b47597090c5900be20d944bda505197761905 100644 --- a/mindspore/lite/src/common/ops/populate/l2_norm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/l2_norm_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateL2NormParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "L2Norm primitive is nullptr!"); auto value = primitive->value_as_L2NormalizeFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/layer_norm_populate.cc b/mindspore/lite/src/common/ops/populate/layer_norm_populate.cc index 9da07bfc69a9d2225561800aa65ad4515dfe473f..b7916f4b76a530c8217319a0637f1e0fa1cf2144 100644 --- a/mindspore/lite/src/common/ops/populate/layer_norm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/layer_norm_populate.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateLayerNormParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "LayerNorm primitive is nullptr!"); auto value = primitive->value_as_LayerNormFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/log_softmax_populate.cc b/mindspore/lite/src/common/ops/populate/log_softmax_populate.cc index 40ae66b368b226e1a5ca35a588746696f8216aba..37df191dfff85827ecce2ac0ea5001d2dd38c3fb 100644 --- a/mindspore/lite/src/common/ops/populate/log_softmax_populate.cc +++ b/mindspore/lite/src/common/ops/populate/log_softmax_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateLogSoftmaxParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "LogSoftmax primitive is nullptr!"); auto value = primitive->value_as_LogSoftmax(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/lstm_populate.cc b/mindspore/lite/src/common/ops/populate/lstm_populate.cc index b3a85b64b57dfa11498420c72729954aa7294b11..0a0ef25ec6503053db81f4da4bdf975e81b7f860 100644 --- a/mindspore/lite/src/common/ops/populate/lstm_populate.cc +++ b/mindspore/lite/src/common/ops/populate/lstm_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateLstmParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "LSTM primitive is nullptr!"); auto value = primitive->value_as_LSTM(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr."; diff --git a/mindspore/lite/src/common/ops/populate/mfcc_populate.cc b/mindspore/lite/src/common/ops/populate/mfcc_populate.cc index 3b7fc3d8860300af2b387ca30e1ef249e5a1bdcc..0b904ec0a25bd88777f29e374e511cd9995770ca 100644 --- a/mindspore/lite/src/common/ops/populate/mfcc_populate.cc +++ b/mindspore/lite/src/common/ops/populate/mfcc_populate.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateMfccParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Mfcc primitive is nullptr!"); auto value = primitive->value_as_Mfcc(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/mul_populate.cc b/mindspore/lite/src/common/ops/populate/mul_populate.cc index 3b3c5df038c7b54f38d4da731f70d0effb7cd980..a2e23faec1e8452715be07426ceeff3b4e8ddca9 100644 --- a/mindspore/lite/src/common/ops/populate/mul_populate.cc +++ b/mindspore/lite/src/common/ops/populate/mul_populate.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateMulParameter(const void *prim) { auto *primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Mul primitive is nullptr!"); auto mul_param = primitive->value_as_MulFusion(); if (mul_param == nullptr) { MS_LOG(ERROR) << "MulFusion param is nullptr!"; diff --git a/mindspore/lite/src/common/ops/populate/nllloss_populate.cc b/mindspore/lite/src/common/ops/populate/nllloss_populate.cc index 9a3c9f44e1c6012e96f7c9c5302cdfc5f4dcd681..6b8896a705bc49c18cb25b01121121b606240db7 100644 --- a/mindspore/lite/src/common/ops/populate/nllloss_populate.cc +++ b/mindspore/lite/src/common/ops/populate/nllloss_populate.cc @@ -26,7 +26,7 @@ namespace mindspore { namespace lite { OpParameter *PopulateNLLLossParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "NLLLoss primitive is nullptr!"); Reduction reduction; if (primitive->value_type() == PrimitiveType_NLLLoss) { auto value = primitive->value_as_NLLLoss(); diff --git a/mindspore/lite/src/common/ops/populate/one_hot_populate.cc b/mindspore/lite/src/common/ops/populate/one_hot_populate.cc index 18caaa3d688d4559eb7ac72f1bf42ff99be0ea91..67551deb8a36d98ae5b0b430226697a891095317 100644 --- a/mindspore/lite/src/common/ops/populate/one_hot_populate.cc +++ b/mindspore/lite/src/common/ops/populate/one_hot_populate.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2021 Huawei Technologies Co., Ltd + * Copyright 2019-2024 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,10 +21,10 @@ namespace mindspore { namespace lite { OpParameter *PopulateOneHotParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "OneHot primitive is nullptr!"); auto value = primitive->value_as_OneHot(); if (value == nullptr) { - MS_LOG(ERROR) << "value is nullptr"; + MS_LOG(ERROR) << "value is nullptr!"; return nullptr; } diff --git a/mindspore/lite/src/common/ops/populate/oneslike_populate.cc b/mindspore/lite/src/common/ops/populate/oneslike_populate.cc index 15c8ac6278a2d3dde065ee42b20dbcad112f1ec2..ee2a1bfbafbdb7843a4c579977e0653608c5332b 100644 --- a/mindspore/lite/src/common/ops/populate/oneslike_populate.cc +++ b/mindspore/lite/src/common/ops/populate/oneslike_populate.cc @@ -20,11 +20,10 @@ namespace mindspore { namespace lite { OpParameter *PopulateOnesLikeParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); - + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "OnesLike primitive is nullptr!"); auto *param = reinterpret_cast(malloc(sizeof(OpParameter))); if (param == nullptr) { - MS_LOG(ERROR) << "malloc OpParameter failed."; + MS_LOG(ERROR) << "malloc OpParameter failed!"; return nullptr; } memset(param, 0, sizeof(OpParameter)); diff --git a/mindspore/lite/src/common/ops/populate/p_relu_populate.cc b/mindspore/lite/src/common/ops/populate/p_relu_populate.cc index cda27de985a530b9a9b1ee6964164adc034f2a51..dfdd6d20cefdb66cbf99addaad1c298c18f6ad1a 100644 --- a/mindspore/lite/src/common/ops/populate/p_relu_populate.cc +++ b/mindspore/lite/src/common/ops/populate/p_relu_populate.cc @@ -20,11 +20,11 @@ using mindspore::schema::PrimitiveType_PReLUFusion; namespace mindspore { namespace lite { OpParameter *PopulatePReLUParameter(const void *prim) { - MS_ASSERT(prim != nullptr); auto primitive = static_cast(prim); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "PReLU primitive is nullptr!"); auto value = primitive->value_as_PReLUFusion(); if (value == nullptr) { - MS_LOG(ERROR) << "value is nullptr"; + MS_LOG(ERROR) << "value is nullptr!"; return nullptr; } diff --git a/mindspore/lite/src/common/ops/populate/pooling_populate.cc b/mindspore/lite/src/common/ops/populate/pooling_populate.cc index dd9fd519fb38e8a3bbbb76404fa288ccaaab4a80..dfda3825838d024c5c81e92cd495605031ce80e0 100644 --- a/mindspore/lite/src/common/ops/populate/pooling_populate.cc +++ b/mindspore/lite/src/common/ops/populate/pooling_populate.cc @@ -127,7 +127,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) { OpParameter *PopulateMaxPoolParameter(const void *primitive) { auto pooling_prim = static_cast(primitive); - MS_ASSERT(pooling_prim != nullptr); + MS_CHECK_TRUE_MSG(pooling_prim != nullptr, nullptr, "MaxPool primitive is nullptr!"); auto value = pooling_prim->value_as_MaxPoolFusion(); if (value == nullptr) { MS_LOG(ERROR) << "value is nullptr"; diff --git a/mindspore/lite/src/common/ops/populate/power_populate.cc b/mindspore/lite/src/common/ops/populate/power_populate.cc index 2559626bc6fbed9f32aa7e555973ad0d0a7e5e85..eb9f402b2d6df841c6f9ce87e8572a6a41f8bcca 100644 --- a/mindspore/lite/src/common/ops/populate/power_populate.cc +++ b/mindspore/lite/src/common/ops/populate/power_populate.cc @@ -21,16 +21,16 @@ namespace mindspore { namespace lite { OpParameter *PopulatePowerParameter(const void *prim) { auto primitive = static_cast(prim); - MS_ASSERT(primitive != nullptr); + MS_CHECK_TRUE_MSG(primitive != nullptr, nullptr, "Power primitive is nullptr!"); auto value = primitive->value_as_PowFusion(); if (value == nullptr) { - MS_LOG(ERROR) << "value is nullptr"; + MS_LOG(ERROR) << "value is nullptr!"; return nullptr; } auto *param = reinterpret_cast(malloc(sizeof(PowParameter))); if (param == nullptr) { - MS_LOG(ERROR) << "malloc PowParameter failed."; + MS_LOG(ERROR) << "malloc PowParameter failed!"; return nullptr; } memset(param, 0, sizeof(PowParameter)); diff --git a/mindspore/lite/src/extendrt/CMakeLists.txt b/mindspore/lite/src/extendrt/CMakeLists.txt index fee69247285cc886a048be968385a5f794697fdc..e458948de36fd30a1a2ba7c9f81551870253a017 100644 --- a/mindspore/lite/src/extendrt/CMakeLists.txt +++ b/mindspore/lite/src/extendrt/CMakeLists.txt @@ -125,6 +125,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE OR MSLITE_ENABLE_CLOUD_INFERENCE) endif() include(${LITE_DIR}/cmake/ccsrc_extendrt.cmake) + include(${LITE_DIR}/cmake/pocketfft.cmake) add_library(mindspore-extendrt SHARED ${MSLITE_EXTEND_RUNTIME_SRC} ${MSLITE_EXTEND_CPU_RUNTIME_SRC}) add_dependencies(mindspore-extendrt lite_src_common_mid) diff --git a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc index aad58f4901e0c3dfc3ef138dc430bafeac268d73..d9e5e6510041a4227c1c6773016e193a1e8cce82 100644 --- a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc +++ b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc @@ -132,15 +132,14 @@ std::unordered_map kStr2FormatMap{{"DEFAULT_FORM {"NC8HW8", mindspore::Format::NC8HW8}}; Status PrimitivePyToC(const FuncGraphPtr &func_graph) { - MS_ASSERT(func_graph != nullptr); auto node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { - MS_ASSERT(node != nullptr); + MS_EXCEPTION_IF_NULL(node); if (!utils::isa(node)) { continue; } auto cnode = node->cast(); - MS_ASSERT(cnode != nullptr); + MS_EXCEPTION_IF_NULL(cnode); // judge if primitive is PrimitivePy auto primpy_ptr = GetValueNode(cnode->input(0)); @@ -215,7 +214,10 @@ ConverterPlugin::ConverterFunc ConverterPlugin::GetConverterFunc() { } ConverterPlugin::ConverterFunc ConverterPlugin::GetConverterFuncInner() { -#ifndef _WIN32 +#ifdef _WIN32 + MS_LOG(ERROR) << "Not support libruntime_convert_plugin.so in Windows"; + return nullptr; +#else if (converter_func_ == nullptr) { std::string plugin_path; auto ret = DLSoPath({"libmindspore-lite.so", "_c_lite"}, "libruntime_convert_plugin.so", &plugin_path); @@ -232,9 +234,6 @@ ConverterPlugin::ConverterFunc ConverterPlugin::GetConverterFuncInner() { converter_func_ = reinterpret_cast(function); } return converter_func_; -#else - MS_LOG(ERROR) << "Not support libruntime_convert_plugin.so in Windows"; - return nullptr; #endif } @@ -244,7 +243,7 @@ FuncGraphPtr ModelImpl::LoadGraphByBufferImpl(const void *model_buff, size_t mod const std::shared_ptr &model_context, const std::string &model_path) { if (model_type != kMindIR) { - MS_LOG(ERROR) << "Invalid model type"; + MS_LOG(ERROR) << "Invalid model type " << model_type; return nullptr; } MS_CHECK_TRUE_MSG(model_context != nullptr, nullptr, "Invalid context pointers."); @@ -313,7 +312,7 @@ Status ModelImpl::UpdateSharingWorkspaceConfig(const void *model_buff, size_t mo MS_LOG(INFO) << "model_sharing_flag: " << model_sharing_flag; auto ret = UpdateConfig("inner_common", std::make_pair("inner_sharing_workspace", "true")); if (ret != kSuccess) { - MS_LOG(ERROR) << "UpdateConfig failed."; + MS_LOG(ERROR) << "UpdateConfig failed!ret=" << ret; return ret; } } @@ -345,8 +344,8 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M return kLiteError; } std::lock_guard lock(mutex_); - if (session_) { - MS_LOG(ERROR) << "Model has been called Build"; + if (session_ != nullptr) { + MS_LOG(ERROR) << "Model has been built already!"; return kLiteModelRebuild; } if (model_context == nullptr) { @@ -362,7 +361,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M UpdateProvider(); auto status = UpdateSharingWorkspaceConfig(model_buff, model_size, model_path); if (status != kSuccess) { - MS_LOG(ERROR) << "UpdateSharingWorkspaceConfig failed."; + MS_LOG(ERROR) << "UpdateSharingWorkspaceConfig failed!ret=" << status; return kLiteError; } auto mindir_path = GetConfig(lite::kConfigModelFileSection, lite::kConfigMindIRPathKey); @@ -372,14 +371,14 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M } session_ = InferSession::CreateSession(model_context, config_info_); if (session_ == nullptr) { - MS_LOG(ERROR) << "Create session failed."; + MS_LOG(ERROR) << "Create session failed!"; return kLiteError; } Status ret; if (model_type == kMindIR_Lite) { ret = session_->CompileGraph(model_buff, model_size, &graph_id_); if (ret != kSuccess) { - MS_LOG(ERROR) << "compile graph failed."; + MS_LOG(ERROR) << "compile graph failed!ret=" << ret; return ret; } return kSuccess; @@ -401,20 +400,20 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M // convert and optimize func graph to infer ret = ConvertGraphOnline(func_graph, model_context); if (ret != kSuccess) { - MS_LOG(ERROR) << "convert graph failed."; + MS_LOG(ERROR) << "convert graph failed!ret=" << ret; return ret; } } else { // new a func graph contains a custom node, which is the data-flow graph. func_graph = CreateFuncGraphFromDataFlow(model_buff, model_size); if (func_graph == nullptr) { - MS_LOG(ERROR) << "Create func graph failed from data flow graph."; + MS_LOG(ERROR) << "Create func graph failed from data flow graph!"; return kLiteError; } } ret = session_->CompileGraph(func_graph, nullptr, 0, &graph_id_); if (ret != kSuccess) { - MS_LOG(ERROR) << "compile graph failed."; + MS_LOG(ERROR) << "compile graph failed!"; return ret; } std::shared_lock build_lock(g_model_converter_lock); @@ -423,8 +422,8 @@ Status ModelImpl::BuildByBufferImpl(const void *model_buff, size_t model_size, M Status ModelImpl::Build(const FuncGraphPtr &func_graph, const std::shared_ptr &model_context) { std::lock_guard lock(mutex_); - if (session_) { - MS_LOG(ERROR) << "Model has been called Build"; + if (session_ != nullptr) { + MS_LOG(ERROR) << "Model has been built already!"; return kLiteModelRebuild; } if (model_context == nullptr) { @@ -451,13 +450,13 @@ Status ModelImpl::Build(const FuncGraphPtr &func_graph, const std::shared_ptr &model_context) { - MS_ASSERT(func_graph != nullptr); auto device_list = model_context->MutableDeviceInfo(); for (const auto &device_info : device_list) { if (device_info == nullptr) { diff --git a/mindspore/lite/src/extendrt/delegate/ascend_ge/ge_utils.cc b/mindspore/lite/src/extendrt/delegate/ascend_ge/ge_utils.cc index aceabf0e65d165e7b8dc279b9beece9affc9cd0c..13552bf53c50f6d2b8289389593b62381f70b155 100644 --- a/mindspore/lite/src/extendrt/delegate/ascend_ge/ge_utils.cc +++ b/mindspore/lite/src/extendrt/delegate/ascend_ge/ge_utils.cc @@ -23,7 +23,7 @@ #include "tools/converter/adapter/acl/mapper/primitive_mapper_register.h" #include "mindspore/core/ops/op_name.h" #include "src/common/common.h" -#include "transform/symbol/acl_base_symbol.h" +#include "transform/symbol/acl_rt_symbol.h" #include "transform/symbol/symbol_utils.h" namespace mindspore { diff --git a/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc index 7f40ba8f4352d7ed2f228258dbe98411e354d90c..5150506feafd1122b768ff859a2d906afcce82d3 100644 --- a/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc +++ b/mindspore/lite/src/extendrt/kernel/ascend/model/model_process.cc @@ -558,7 +558,11 @@ bool ModelProcess::Load(const void *om_data, size_t om_data_size) { return false; } MS_LOG(INFO) << "work_size: " << work_size << " weight_size: " << weight_size; - AclMemManager::GetInstance().UpdateWorkspace(work_size, weight_size); + auto ret = AclMemManager::GetInstance().UpdateWorkspace(work_size, weight_size, device_id_); + if (ret != lite::RET_OK) { + MS_LOG(ERROR) << "update workspace failed, ret = " << ret; + return false; + } return true; } else if (options_->multi_model_sharing_mem) { MS_LOG(INFO) << "using sharing mem by model group."; @@ -568,7 +572,7 @@ bool ModelProcess::Load(const void *om_data, size_t om_data_size) { return false; } AclModelMemInfo acl_work_mem_info; - auto ret = AclMemManager::GetInstance().GetModelWorkMem(&acl_work_mem_info); + auto ret = AclMemManager::GetInstance().GetModelWorkMem(&acl_work_mem_info, device_id_); if (ret != lite::RET_OK) { MS_LOG(ERROR) << "Get work mem failed."; return ret; diff --git a/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.cc b/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.cc index cfcab982fd0842b95cd64101ef8774dabcc46504..f5e6317ab3d8d85683a2cbaf3808019112fa3202 100644 --- a/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.cc +++ b/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.cc @@ -15,6 +15,7 @@ */ #include "src/litert/kernel/ascend/src/acl_mem_manager.h" +#include #include #include #include @@ -25,33 +26,57 @@ namespace mindspore::kernel { namespace acl { -void AclMemManager::UpdateWorkspace(size_t work_size, size_t weight_size) { - if (work_size > work_mem_info_.mem_size) { - work_mem_info_.mem_size = work_size; - MS_LOG(DEBUG) << "Update work_size = " << work_size << " successful."; +STATUS AclMemManager::UpdateWorkspace(size_t work_size, size_t weight_size, int32_t device_id) { + auto it = work_mem_info_map_.find(device_id); + if (it == work_mem_info_map_.end()) { + AclModelMemInfo new_work_mem = {nullptr, 0}; + work_mem_info_map_.insert(std::make_pair(device_id, std::make_pair(new_work_mem, false))); + } else if (it->second.second == true) { + MS_LOG(ERROR) << "Device " << device_id << " has alloc memory!"; + return lite::RET_ERROR; + } + + it = work_mem_info_map_.find(device_id); + if (it == work_mem_info_map_.end()) { + MS_LOG(ERROR) << "Get mem failed!"; + return lite::RET_ERROR; + } + + if (work_size > it->second.first.mem_size) { + it->second.first.mem_size = work_size; + MS_LOG(DEBUG) << "Update work_size = " << it->second.first.mem_size << " successful."; } if (weight_size > weight_mem_info_.mem_size) { weight_mem_info_.mem_size = weight_size; MS_LOG(DEBUG) << "Update weight_size = " << weight_size << " successful."; } + return lite::RET_OK; } -STATUS AclMemManager::GetModelWorkMem(AclModelMemInfo *acl_work_mem_info) { +STATUS AclMemManager::GetModelWorkMem(AclModelMemInfo *acl_work_mem_info, int32_t device_id) { std::unique_lock acl_mtx(acl_mem_alloc_mutex_); - if (work_mem_info_.mem_addr == nullptr) { - if (work_mem_info_.mem_size == 0) { + + auto it = work_mem_info_map_.find(device_id); + if (it == work_mem_info_map_.end()) { + MS_LOG(ERROR) << "Get work mem failed!"; + return lite::RET_ERROR; + } + it->second.second = true; + + if (it->second.first.mem_addr == nullptr) { + if (it->second.first.mem_size == 0) { return lite::RET_ERROR; } auto acl_ret = - CALL_ASCEND_API(aclrtMalloc, &work_mem_info_.mem_addr, work_mem_info_.mem_size, ACL_MEM_MALLOC_HUGE_FIRST); + CALL_ASCEND_API(aclrtMalloc, &(it->second.first.mem_addr), it->second.first.mem_size, ACL_MEM_MALLOC_HUGE_FIRST); if (acl_ret != ACL_ERROR_NONE) { MS_LOG(ERROR) << "Call aclrtMalloc failed, err_code = " << acl_ret; return lite::RET_ERROR; } - MS_LOG(DEBUG) << "Malloc max work size is " << work_mem_info_.mem_size; + MS_LOG(DEBUG) << "Malloc max work size is " << it->second.first.mem_size; } - *acl_work_mem_info = work_mem_info_; + *acl_work_mem_info = it->second.first; return lite::RET_OK; } @@ -74,10 +99,12 @@ STATUS AclMemManager::GetModelWeightMem(AclModelMemInfo *acl_weight_mem_info) { } AclMemManager::~AclMemManager() { - if (work_mem_info_.mem_addr != nullptr) { - (void)CALL_ASCEND_API(aclrtFree, work_mem_info_.mem_addr); - work_mem_info_.mem_addr = nullptr; - work_mem_info_.mem_size = 0; + for (auto &mem_info_pair : work_mem_info_map_) { + if (mem_info_pair.second.first.mem_addr != nullptr) { + (void)CALL_ASCEND_API(aclrtFree, mem_info_pair.second.first.mem_addr); + mem_info_pair.second.first.mem_addr = nullptr; + mem_info_pair.second.first.mem_size = 0; + } } if (weight_mem_info_.mem_addr != nullptr) { (void)CALL_ASCEND_API(aclrtFree, weight_mem_info_.mem_addr); diff --git a/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.h b/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.h index b3ec0cf1cb36d7e3db3117fad00bcc54e118fa47..91e908fbe70cac802e8b878117bb94464733b535 100644 --- a/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.h +++ b/mindspore/lite/src/litert/kernel/ascend/src/acl_mem_manager.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "include/errorcode.h" namespace mindspore::kernel { @@ -45,8 +46,8 @@ class AclMemManager { static AclMemManager instance; return instance; } - void UpdateWorkspace(size_t work_size, size_t weight_size); - STATUS GetModelWorkMem(AclModelMemInfo *acl_work_mem_info); + STATUS UpdateWorkspace(size_t work_size, size_t weight_size, int32_t device_id); + STATUS GetModelWorkMem(AclModelMemInfo *acl_work_mem_info, int32_t device_id); STATUS GetModelWeightMem(AclModelMemInfo *acl_weight_mem_info); void Lock() { return acl_execute_mutex_.lock(); } void Unlock() { return acl_execute_mutex_.unlock(); } @@ -54,7 +55,7 @@ class AclMemManager { private: std::mutex acl_mem_alloc_mutex_; std::mutex acl_execute_mutex_; - AclModelMemInfo work_mem_info_ = {nullptr, 0}; + std::map> work_mem_info_map_; AclModelMemInfo weight_mem_info_ = {nullptr, 0}; }; } // namespace acl diff --git a/mindspore/lite/src/litert/kernel/ascend/src/model_infer.cc b/mindspore/lite/src/litert/kernel/ascend/src/model_infer.cc index 2c86119e6632f67d85fb690ffc81e379bb0e9c79..2f59f88ebc21670b8f6a57edd3a0032cbe768e02 100644 --- a/mindspore/lite/src/litert/kernel/ascend/src/model_infer.cc +++ b/mindspore/lite/src/litert/kernel/ascend/src/model_infer.cc @@ -51,15 +51,15 @@ STATUS ModelInfer::Init() { MS_LOG(ERROR) << "Acl init failed."; return lite::RET_ERROR; } - int32_t device_id = options_.device_id; - aclError ret = CALL_ASCEND_API(aclrtSetDevice, device_id); + device_id_ = options_.device_id; + aclError ret = CALL_ASCEND_API(aclrtSetDevice, device_id_); if (ret != ACL_ERROR_NONE) { - MS_LOG(ERROR) << "Acl open device " << device_id << " failed, ret " << ret; + MS_LOG(ERROR) << "Acl open device " << device_id_ << " failed, ret " << ret; return lite::RET_ERROR; } - MS_LOG(INFO) << "Open device " << device_id << " success."; + MS_LOG(INFO) << "Open device " << device_id_ << " success."; - ret = CALL_ASCEND_API(aclrtCreateContext, &context_, device_id); + ret = CALL_ASCEND_API(aclrtCreateContext, &context_, device_id_); if (ret != ACL_ERROR_NONE) { MS_LOG(ERROR) << "Acl create context failed, ret " << ret; return lite::RET_ERROR; @@ -76,7 +76,7 @@ STATUS ModelInfer::Init() { model_process_.SetIsDevice(is_device); MS_LOG(INFO) << "Get run mode success is device input/output " << is_device; - MS_LOG(INFO) << "Init acl success, device id " << device_id; + MS_LOG(INFO) << "Init acl success, device id " << device_id_; init_flag_ = true; return lite::RET_OK; } @@ -164,12 +164,12 @@ STATUS ModelInfer::LoadAclModel(const Buffer &om_data) { MS_LOG(ERROR) << "Call aclmdlQuerySizeFromMem failed, ret = " << acl_ret; return lite::RET_ERROR; } - AclMemManager::GetInstance().UpdateWorkspace(work_size, weight_size); + AclMemManager::GetInstance().UpdateWorkspace(work_size, weight_size, device_id_); return lite::RET_OK; } else if (IsEnableMultiModelSharingMem()) { AclModelMemInfo acl_work_mem_info; AclModelMemInfo acl_weight_mem_info; - auto ret = AclMemManager::GetInstance().GetModelWorkMem(&acl_work_mem_info); + auto ret = AclMemManager::GetInstance().GetModelWorkMem(&acl_work_mem_info, device_id_); if (ret != lite::RET_OK) { MS_LOG(ERROR) << "Get work mem failed."; return ret; diff --git a/mindspore/lite/src/litert/kernel/ascend/src/model_infer.h b/mindspore/lite/src/litert/kernel/ascend/src/model_infer.h index 47fd43a553be0948811d43cc6f8f81dbf5ac9e9f..4224af9d316475992d7c6e8779455ca883764331 100644 --- a/mindspore/lite/src/litert/kernel/ascend/src/model_infer.h +++ b/mindspore/lite/src/litert/kernel/ascend/src/model_infer.h @@ -55,6 +55,7 @@ class ModelInfer { bool init_flag_; bool load_flag_; + int32_t device_id_; std::string device_type_; aclrtContext context_; Buffer om_data_; diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/activation_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/activation_int8.cc index 9bc410e7308591f73736f7f527b674b3856a7f5b..10b6cd5a3b844f659ee79656681669adb2cba172 100644 --- a/mindspore/lite/src/litert/kernel/cpu/int8/activation_int8.cc +++ b/mindspore/lite/src/litert/kernel/cpu/int8/activation_int8.cc @@ -16,6 +16,7 @@ #include "src/litert/kernel/cpu/int8/relux_int8.h" #include "src/litert/kernel/cpu/int8/hswish_int8.h" +#include "src/litert/kernel/cpu/int8/swish_int8.h" #include "src/litert/kernel/cpu/int8/sigmoid_int8.h" #include "src/litert/kernel/cpu/int8/tanh_int8.h" #include "src/litert/kernel/cpu/int8/leaky_relu_int8.h" @@ -50,6 +51,9 @@ kernel::LiteKernel *CpuActivationInt8KernelCreator(const std::vectordata_type() != mindspore::kNumberTypeInt8 || - out_tensors_[0]->data_type() != mindspore::kNumberTypeInt8) { - MS_LOG(ERROR) << "Datatype error, input0 data_type is " << in_tensors_[0]->data_type() << ", output data_type is " - << out_tensors_[0]->data_type(); - return RET_ERROR; - } in_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); if (in_quant_arg_ == nullptr) { MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; @@ -64,18 +58,7 @@ int ArgMinMaxInt8CPUKernel::Prepare() { in_quant_arg_->scale_ = in_quant_args.front().scale; in_quant_arg_->zp_ = in_quant_args.front().zeroPoint; - auto *out_tensor = out_tensors_.at(kOutputIndex); - auto out_quant_args = out_tensor->quant_params(); - CHECK_LESS_RETURN(out_quant_args.size(), 1); - out_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); - out_quant_arg_->scale_ = out_quant_args.front().scale; - out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; - if (out_quant_arg_ == nullptr) { - MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; - return RET_ERROR; - } - - compute_param_ = reinterpret_cast(sizeof(ArgMinMaxComputeParam)); + compute_param_ = reinterpret_cast(malloc(sizeof(ArgMinMaxComputeParam))); if (compute_param_ == nullptr) { MS_LOG(ERROR) << "Malloc ArgMinMaxComputeParam for argmin or argmax int8 op failed!"; return RET_ERROR; @@ -87,6 +70,28 @@ int ArgMinMaxInt8CPUKernel::Prepare() { compute_param_->out_value_ = param->out_value_; compute_param_->keep_dims_ = param->keep_dims_; + out_quant_arg_ = reinterpret_cast(malloc(sizeof(QuantArg))); + if (out_quant_arg_ == nullptr) { + MS_LOG(ERROR) << "Malloc QuantArg for argmin or argmax int8 op failed!"; + return RET_ERROR; + } + if (out_tensors_.size() == Num2 || compute_param_->out_value_) { + auto *out_tensor = out_tensors_.at(kOutputIndex); + auto out_quant_args = out_tensor->quant_params(); + if (out_quant_args.size() != C1NUM) { + MS_LOG(ERROR) + << "argmin/argmax int8 kernel only supports per-tensor quantization, but now out_quant_args.size() is " + << out_quant_args.size(); + return RET_ERROR; + } + CHECK_LESS_RETURN(out_quant_args.size(), 1); + out_quant_arg_->scale_ = out_quant_args.front().scale; + out_quant_arg_->zp_ = out_quant_args.front().zeroPoint; + } else { // set default quant value + out_quant_arg_->scale_ = 1.0f; + out_quant_arg_->zp_ = 0; + } + if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.cc index 00a3212a2098cc43ca20f28a4e752238b73543f6..698468a4b77e8387833ed405ca74952346086703 100644 --- a/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.cc +++ b/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.cc @@ -58,7 +58,12 @@ int SigmoidInt8CPUKernel::Prepare() { } lite::Tensor *input = in_tensors_.at(0); lite::Tensor *output = out_tensors_.at(0); - MS_CHECK_TRUE_RET(!input->quant_params().empty() && !output->quant_params().empty(), RET_ERROR); + if (input->quant_params().size() != C1NUM || output->quant_params().size() != C1NUM) { + MS_LOG(ERROR) + << "sigmoid int8 kernel only supports per-tensor quantization, but now input->quant_params().size() is " + << input->quant_params().size() << ", output->quant_params().size() is " << output->quant_params().size(); + return RET_ERROR; + } const float input_scale = input->quant_params().front().scale; const int32_t input_zp = input->quant_params().front().zeroPoint; const float output_scale = output->quant_params().front().scale; diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.h b/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.h index 1f383ae6f3938e5a77775e82d105a52a711f48fe..9080852fc18b04b56840ac7337ce1c510269b3d5 100644 --- a/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.h +++ b/mindspore/lite/src/litert/kernel/cpu/int8/sigmoid_int8.h @@ -34,7 +34,7 @@ class SigmoidInt8CPUKernel : public LiteKernel { int Run() override; int DoActivation(int task_id); - private: + protected: int8_t table_list_[256]{0}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.cc b/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..464e673a9fc8ec948bf1351f8979d6ebafb29f03 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.cc @@ -0,0 +1,72 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/litert/kernel/cpu/int8/swish_int8.h" +#include +#include +#include "nnacl/int8/quantize.h" +#include "src/litert/kernel_registry.h" +#include "include/errorcode.h" + +using mindspore::kernel::KERNEL_ARCH; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::ActivationType_SIGMOID; + +namespace mindspore::kernel { +// Calculate the quantization results of 0-255 in advance +void CalculateSwishTableList(int8_t *table, const float input_scale, const int32_t input_zp, const float output_scale, + const int32_t output_zp) { + int32_t min_value = std::numeric_limits::min(); + int32_t max_value = std::numeric_limits::max(); + for (int i = min_value; i < max_value; ++i) { + const float real_input_value = input_scale * (i - input_zp); + const float sigmoid_value = 1.0f / (1.0f + std::exp(-real_input_value)); + const int32_t quantized = (std::round(real_input_value * sigmoid_value / output_scale) + output_zp); + int8_t out_value = static_cast(std::max(std::min(quantized, max_value), min_value)); + uint8_t index = static_cast(i); + table[index] = out_value; + } +} + +int SwishInt8CPUKernel::Prepare() { + CHECK_LESS_RETURN(in_tensors_.size(), C1NUM); + CHECK_LESS_RETURN(out_tensors_.size(), C1NUM); + CHECK_NULL_RETURN(in_tensors_[0]); + CHECK_NULL_RETURN(out_tensors_[0]); + if (in_tensors_[0]->data_type() != mindspore::kNumberTypeInt8 || + out_tensors_[0]->data_type() != mindspore::kNumberTypeInt8) { + MS_LOG(ERROR) << "Datatype error, input0 data_type is " << in_tensors_[0]->data_type() << ", output data_type is " + << out_tensors_[0]->data_type(); + return RET_ERROR; + } + lite::Tensor *input = in_tensors_.at(0); + lite::Tensor *output = out_tensors_.at(0); + if (input->quant_params().size() != C1NUM || output->quant_params().size() != C1NUM) { + MS_LOG(ERROR) << "swish int8 kernel only supports per-tensor quantization, but now input->quant_params().size() is " + << input->quant_params().size() << ", output->quant_params().size() is " + << output->quant_params().size(); + return RET_ERROR; + } + const float input_scale = input->quant_params().front().scale; + const int32_t input_zp = input->quant_params().front().zeroPoint; + const float output_scale = output->quant_params().front().scale; + const int32_t output_zp = output->quant_params().front().zeroPoint; + CalculateSwishTableList(table_list_, input_scale, input_zp, output_scale, output_zp); + return RET_OK; +} +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.h b/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..7b8ef9ca470df4664bccb9b866aef99a81309415 --- /dev/null +++ b/mindspore/lite/src/litert/kernel/cpu/int8/swish_int8.h @@ -0,0 +1,38 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_INT8_SWISH_INT8_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_INT8_SWISH_INT8_H_ + +#include +#include "src/litert/lite_kernel.h" +#include "src/litert/kernel/cpu/int8/sigmoid_int8.h" +#include "nnacl/int8/softmax_int8.h" +#include "nnacl/int8/quantize.h" + +namespace mindspore::kernel { +class SwishInt8CPUKernel : public SigmoidInt8CPUKernel { + public: + SwishInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : SigmoidInt8CPUKernel(parameter, inputs, outputs, ctx) {} + ~SwishInt8CPUKernel() override = default; + + int Prepare() override; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_CPU_INT8_SWISH_INT8_H_ diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.cc new file mode 100644 index 0000000000000000000000000000000000000000..b675dd72a6ef468e138b4d646685b7603d37dbd2 --- /dev/null +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.cc @@ -0,0 +1,42 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "tools/converter/parser/onnx/onnx_concat_adjust.h" +#include +#include +#include +#include "tools/optimizer/common/gllo_utils.h" + +namespace mindspore::lite { +namespace { +constexpr uint32_t kTwoNum = 2; +} // namespace + +bool OnnxConcatAdjust::Adjust(const FuncGraphPtr &func_graph) { + MS_CHECK_TRUE_RET(func_graph != nullptr, false); + auto cnodes = func_graph->GetOrderedCnodes(); + for (auto &cnode : cnodes) { + if (!opt::CheckPrimitiveType(cnode, prim::kPrimConcat) || cnode->size() != kTwoNum) { + continue; + } + MS_LOG(INFO) << "Del Concat node, node name: " << cnode->cast()->fullname_with_scope() + << ", node size: " << cnode->size(); + auto manager = Manage(func_graph); + MS_CHECK_TRUE_RET(manager != nullptr, false); + (void)manager->Replace(cnode, cnode->cast()->input(1)); + } + return true; +} +} // namespace mindspore::lite diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.h b/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.h new file mode 100644 index 0000000000000000000000000000000000000000..1ef0394ab040ad0e48a08e53b6df48ff8a87128b --- /dev/null +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_concat_adjust.h @@ -0,0 +1,28 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_ONNX_ONNX_CONCAT_ADJUST_H_ +#define MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_ONNX_ONNX_CONCAT_ADJUST_H_ +#include +#include + +namespace mindspore::lite { +class OnnxConcatAdjust { + public: + static bool Adjust(const FuncGraphPtr &func_graph); +}; +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_TOOLS_CONVERTER_PARSER_ONNX_ONNX_CONCAT_ADJUST_H_ diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc index b75429f10a934f6e60e67d84fa0b722c2db00fc3..09d9077998c35f9d84b6e424cef266945d10fbc8 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc @@ -42,6 +42,7 @@ #include "tools/converter/parser/onnx/onnx_megatron_op_adjust.h" #include "tools/converter/parser/onnx/onnx_nonzero_adjust.h" #include "tools/converter/parser/onnx/onnx_pad_adjust.h" +#include "tools/converter/parser/onnx/onnx_concat_adjust.h" #include "tools/converter/parser/onnx/onnx_quantize_linear_adjust.h" #include "tools/converter/parser/onnx/onnx_deform_conv2d_adjust.h" #include "tools/converter/parser/onnx/onnx_custom_op_adjust.h" @@ -77,6 +78,11 @@ int Onnx2AnfAdjust(const std::set &all_func_graphs, const converte ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); return RET_ERROR; } + if (!OnnxConcatAdjust::Adjust(func_graph)) { + MS_LOG(ERROR) << "onnx OnnxConcatOp adjust failed."; + ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); + return RET_ERROR; + } if (!OnnxNonZeroAdjust::Adjust(func_graph)) { MS_LOG(ERROR) << "onnx nonzero adjust failed."; ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_ERROR); diff --git a/mindspore/python/mindspore/_extends/pijit/__init__.py b/mindspore/python/mindspore/_extends/pijit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..291e5d10cfbfe6b995c28fcb7345cd57b3756878 --- /dev/null +++ b/mindspore/python/mindspore/_extends/pijit/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Helper module for pijit analyze +""" + + +from .pijit_func_white_list import _func_map as pijit_func_white_list_map + + +__all__ = ['pijit_func_white_list_map'] diff --git a/mindspore/python/mindspore/_extends/pijit/pijit_func_white_list.py b/mindspore/python/mindspore/_extends/pijit/pijit_func_white_list.py new file mode 100644 index 0000000000000000000000000000000000000000..762f202bdfa0aa36dc5aa4cb25de9f66c4497df0 --- /dev/null +++ b/mindspore/python/mindspore/_extends/pijit/pijit_func_white_list.py @@ -0,0 +1,264 @@ +# This is the Python adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/). +# +# Copyright 2020-2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""The module of parser python object, called by c++.""" + +import types +import math +import numpy +from mindspore.nn import GraphCell, Cell +from mindspore.ops.primitive import Primitive, constexpr, _primexpr +from mindspore.ops.composite.base import GradOperation, _Grad +from mindspore.ops._primitive_cache import _get_cache_prim +from mindspore.common.api import jit +from mindspore.common.tensor import Tensor +from mindspore.common._register_for_tensor import Registry, tensor_operator_registry +from mindspore._c_expression import MetaFuncGraph_, function_id, Primitive_, PrimitiveFunction_ +from mindspore._c_expression import Tensor as Tensor_ +from mindspore._extends.parse.resources import convert_object_map + + +def _get_after_grad_code(): + """Get the code object of 'after_grad'""" + name = "after_grad" + codes = [] + for cnst in GradOperation.__call__.__code__.co_consts: + if isinstance(cnst, types.CodeType) and cnst.co_name == name: + codes.append(cnst) + for cnst in _Grad.__call__.__code__.co_consts: + if isinstance(cnst, types.CodeType) and cnst.co_name == name: + codes.append(cnst) + assert codes, "check GradOperation, can't find 'after_grad'" + return codes + + +def _get_psjit_code(): + """Get the code object of 'staging_specialize'""" + code = jit.__code__ + for cnst in code.co_consts: + if isinstance(cnst, types.CodeType) and cnst.co_name == "wrap_mindspore": + code = cnst + break + for cnst in code.co_consts: + if isinstance(cnst, types.CodeType) and cnst.co_name == "staging_specialize": + code = cnst + break + assert code is not jit.__code__, "check mindspore.api.jit, can't find 'staging_specialize'" + return code + + +def _get_constexpr_code(): + """Get the code object of '@constexpr'""" + @constexpr + def inner(): + pass + code = inner.__call__.__code__ + # check it before c++ use it + assert isinstance(inner, Primitive) + assert code is not Primitive.__call__.__code__ + return code + + +def _get_primexpr_code(): + """Get the code object of '@_primexpr'""" + @_primexpr + def inner(): + pass + code = inner.__call__.__code__ + # check it before c++ use it + assert isinstance(inner, Primitive) + assert code is not Primitive.__call__.__code__ + return code + + +def _pijit_constexpr(): + """Placeholder for uniqure id""" + + +def _get_ms_api(): + """Get ms api""" + target_types = Cell, types.FunctionType, Primitive_, PrimitiveFunction_ + results = [] + from mindspore.ops import operations as P + from mindspore.ops import functional as F + from mindspore.ops import composite as C + mods = P, F, C + for mod in mods: + for i in mod.__all__: + f = getattr(mod, i) + if isinstance(f, target_types): + results.append(f) + for f in tensor_operator_registry.values(): + if isinstance(f, target_types): + results.append(f) + return results + + +psjit_code = _get_psjit_code() +constexpr_code = _get_constexpr_code() +primexpr_code = _get_primexpr_code() + +primitive_key = id(Primitive.__call__) +constexpr_key = id(constexpr_code) +primexpr_key = id(primexpr_code) +meta_func_graph_key = id(MetaFuncGraph_) +pijit_forbidden_key = id(NotImplemented) +pijit_constexpr_key = id(_pijit_constexpr) + +assert function_id(tuple.__getitem__) == function_id(tuple().__getitem__), "check WrapperDescriptor failed" +assert function_id(list.__getitem__) == function_id(list().__getitem__), "check MethodDescriptor failed" +assert function_id(Tensor_.from_numpy) == function_id(Tensor_(1).from_numpy), "check instancemethod failed" +assert function_id(Tensor.astype) == function_id(Tensor(1).astype) == id(Tensor.astype), "check function id failed" +assert function_id(Primitive) == function_id(Primitive) == id(Primitive), "check user defined object id failed" + +FUNC_KEY_EMPTY = 0 # "" +FUNC_KEY_PIJIT_CONSTEXPR = 1 # "pijit.constexpr" +FUNC_KEY_PIJIT_FORBIDDEN = 2 # "pijit.forbidden" +FUNC_KEY_BUILTIN_FUNC = 3 # "builtin.func" +FUNC_KEY_LIST_APPEND = 4 # "list.append" +FUNC_KEY_DICT_POP = 5 # "dict.pop" +FUNC_KEY_PRIMITIVE = 6 # "mindspore._c_expression.Primitive_" +FUNC_KEY_META_FUNCG_RAPH = 7 # "mindspore._c_expression.MetaFuncGraph_" +FUNC_KEY_PSJIT_CODE = 8 # "mindspore.common.api.jit..staging_specialize" +FUNC_KEY_CONSTEXPR = 9 # "mindspore.ops.primitive.constexpr" +FUNC_KEY_PRIMEXPR = 10 # "mindspore.ops.primitive._primexpr" +FUNC_KEY_GET_CACHE_PRIM = 11 # "mindspore.ops._primitive_cache._get_cache_prim" +FUNC_KEY_REGISTRY_GET = 12 # "mindspore.common._register_for_tensor.Registry.get" +FUNC_KEY_TENSOR_ASTYPE = 13 # "mindspore.common.tensor.Tensor.astype" +FUNC_KEY_GRAD_OPERATIONS_CODE = 14 # "mindspore.ops.composite.base._Grad.__call__..after_grad" +FUNC_KEY_PSJIT_CONVERTMAP = 15 # "mindspore._extends.parse.resources.convert_object_map" +FUNC_KEY_GRAPH_CELL = 16 # "mindspore.nn.cell.GraphCell" +FUNC_KEY_MS_API = 17 # mindspore common api + +# Initialized only once. This map will initialize by c++ when start pijit. +# key is customer if fuzzy match. (Primitive, constexpr, primexpr, MetaFuncGraph) +# key is id of code for nest object. (jit..staging_specialize, GradOperation.__call__..after_grad) +# key is id of object for callalbe object. +# key is cfunction pointer for builtin_function or method. (isinstance, tuple.__getitem__, Tensor_.asnumpy) +_func_map = { + # special function + pijit_constexpr_key: FUNC_KEY_PIJIT_CONSTEXPR, + pijit_forbidden_key: FUNC_KEY_PIJIT_FORBIDDEN, + primitive_key: FUNC_KEY_PRIMITIVE, + constexpr_key: FUNC_KEY_CONSTEXPR, + primexpr_key: FUNC_KEY_PRIMEXPR, + meta_func_graph_key: FUNC_KEY_META_FUNCG_RAPH, + id(GraphCell.__call__): FUNC_KEY_GRAPH_CELL, + id(psjit_code): FUNC_KEY_PSJIT_CODE, + id(_get_cache_prim): FUNC_KEY_GET_CACHE_PRIM, + id(Registry.get): FUNC_KEY_REGISTRY_GET, + + # Tensor method + id(Tensor.astype): FUNC_KEY_TENSOR_ASTYPE, + + # types.BuiltinFunctionType + function_id(isinstance): FUNC_KEY_BUILTIN_FUNC, + function_id(issubclass): FUNC_KEY_BUILTIN_FUNC, + function_id(len): FUNC_KEY_BUILTIN_FUNC, + function_id(abs): FUNC_KEY_BUILTIN_FUNC, + function_id(max): FUNC_KEY_BUILTIN_FUNC, + function_id(all): FUNC_KEY_BUILTIN_FUNC, + function_id(any): FUNC_KEY_BUILTIN_FUNC, + function_id(hash): FUNC_KEY_BUILTIN_FUNC, + function_id(id): FUNC_KEY_BUILTIN_FUNC, + function_id(ord): FUNC_KEY_BUILTIN_FUNC, + function_id(callable): FUNC_KEY_BUILTIN_FUNC, + function_id(getattr): FUNC_KEY_BUILTIN_FUNC, + function_id(hasattr): FUNC_KEY_BUILTIN_FUNC, + + # types.MethodDescriptorType, types.WrapperDescriptorType + function_id(tuple.__getitem__): FUNC_KEY_BUILTIN_FUNC, + function_id(tuple.count): FUNC_KEY_BUILTIN_FUNC, + function_id(tuple.index): FUNC_KEY_BUILTIN_FUNC, + function_id(list.__getitem__): FUNC_KEY_BUILTIN_FUNC, + function_id(list.copy): FUNC_KEY_BUILTIN_FUNC, + function_id(list.index): FUNC_KEY_BUILTIN_FUNC, + function_id(list.count): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.__contains__): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.__getitem__): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.get): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.keys): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.values): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.items): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.fromkeys): FUNC_KEY_BUILTIN_FUNC, + function_id(dict.copy): FUNC_KEY_BUILTIN_FUNC, + function_id(set.__contains__): FUNC_KEY_BUILTIN_FUNC, + function_id(set.copy): FUNC_KEY_BUILTIN_FUNC, + function_id(set.issubset): FUNC_KEY_BUILTIN_FUNC, + function_id(str.find): FUNC_KEY_BUILTIN_FUNC, + function_id(str.count): FUNC_KEY_BUILTIN_FUNC, + function_id(str.index): FUNC_KEY_BUILTIN_FUNC, + function_id(str.rfind): FUNC_KEY_BUILTIN_FUNC, + function_id(str.rindex): FUNC_KEY_BUILTIN_FUNC, + function_id(str.startswith): FUNC_KEY_BUILTIN_FUNC, + function_id(str.endswith): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isascii): FUNC_KEY_BUILTIN_FUNC, + function_id(str.islower): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isupper): FUNC_KEY_BUILTIN_FUNC, + function_id(str.istitle): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isspace): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isdecimal): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isdigit): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isnumeric): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isalpha): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isalnum): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isidentifier): FUNC_KEY_BUILTIN_FUNC, + function_id(str.isprintable): FUNC_KEY_BUILTIN_FUNC, + function_id(str.format): FUNC_KEY_BUILTIN_FUNC, + function_id(str.format_map): FUNC_KEY_BUILTIN_FUNC, + function_id(str.__format__): FUNC_KEY_BUILTIN_FUNC, + function_id(list.append): FUNC_KEY_LIST_APPEND, + function_id(dict.pop): FUNC_KEY_DICT_POP, + + # instancemethod + function_id(Tensor_._flatten_tensors): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_._is_flattened): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_._get_flattened_tensors): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_._get_fusion_size): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_._is_test_stub): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_.__str__): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_.__repr__): FUNC_KEY_BUILTIN_FUNC, # pylint: disable=protected-access + function_id(Tensor_.convert_bytes_to_tensor): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.dim): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.from_numpy): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.getitem_index_info): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.get_bytes): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.is_init): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.is_contiguous): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.stride): FUNC_KEY_BUILTIN_FUNC, + function_id(Tensor_.asnumpy): FUNC_KEY_BUILTIN_FUNC, + + # other builtin function + function_id(math.log): FUNC_KEY_BUILTIN_FUNC, + + function_id(numpy.isinf): FUNC_KEY_BUILTIN_FUNC, + function_id(numpy.isnan): FUNC_KEY_BUILTIN_FUNC, + function_id(numpy.abs): FUNC_KEY_BUILTIN_FUNC, + function_id(numpy.log): FUNC_KEY_BUILTIN_FUNC, +} + +for after_grad in _get_after_grad_code(): + _func_map[id(after_grad)] = FUNC_KEY_GRAD_OPERATIONS_CODE + +for k, v in convert_object_map.items(): + key = id(k) + if key not in _func_map and isinstance(v, Primitive): + if key is print: + continue + _func_map[key] = FUNC_KEY_PSJIT_CONVERTMAP + +GUARD_KEY_RELAX_FUNC = 1 +_guard_func_map = dict() diff --git a/mindspore/python/mindspore/common/symbol.py b/mindspore/python/mindspore/common/symbol.py index 66f014fc1b4d6620dbe0d917411197a1a36d799a..db5be9ca5403749aaf541a4f44f0818b49ff1273 100644 --- a/mindspore/python/mindspore/common/symbol.py +++ b/mindspore/python/mindspore/common/symbol.py @@ -22,7 +22,7 @@ class Symbol: Symbol is a data structure to indicate the symbolic info of shape. For dynamic shape networks, compared with only setting the unknown dimensions ( ``None`` ) in `Tensor` , providing - more symbolic shape info can help the framework better optimize the computation graph, to improve the performce of + more symbolic shape info can help the framework better optimize the computation graph, to improve the performance of network execution. Args: diff --git a/mindspore/python/mindspore/communication/management.py b/mindspore/python/mindspore/communication/management.py index e2537627d22588eb4f2591da3290f0edc8c60479..ea4528a8c426bb506ff5d76166c39ea252dbac3d 100755 --- a/mindspore/python/mindspore/communication/management.py +++ b/mindspore/python/mindspore/communication/management.py @@ -23,6 +23,7 @@ from mindspore.communication._comm_helper import Backend, _get_rank_helper, _get MCCL_WORLD_COMM_GROUP, DEVICE_TO_BACKEND, _get_local_rank_helper, _get_local_size_helper, GlobalComm, \ _check_mpi_envs, _set_elegant_exit_handle from mindspore._c_expression import init_hccl, finalize_hccl, init_cluster, MSContext, ms_ctx_param +from mindspore.hal.device import is_initialized __all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size", "get_local_rank_size", "get_world_rank_from_group_rank", @@ -182,6 +183,10 @@ def init(backend_name=None): if device_target != "Ascend": raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', " "but got 'hccl'.".format(DEVICE_TO_BACKEND[device_target], device_target)) + if is_initialized(device_target): + logger.warning(f"For 'init' in Ascend backend, the backend is already initialized, please set it before " + "the definition of any Tensor and Parameter, and the instantiation and execution of any " + "operation and net, otherwise the 'init' may not take effect.") if not host_init: _check_parallel_envs() GlobalComm.BACKEND = Backend("hccl") diff --git a/mindspore/python/mindspore/context.py b/mindspore/python/mindspore/context.py index f2882b07aa68d7c4bc6143d88eb29053c0e7dc09..6bfcb4533b822880b9d868ed078563b1361c08d8 100644 --- a/mindspore/python/mindspore/context.py +++ b/mindspore/python/mindspore/context.py @@ -34,6 +34,7 @@ from mindspore.parallel._auto_parallel_context import _set_auto_parallel_context from mindspore.parallel._ps_context import _set_ps_context, _get_ps_context, _reset_ps_context, \ _need_reset_device_target_for_ps from mindspore.parallel._offload_context import _set_offload_context, _get_offload_context +from mindspore.hal.device import is_initialized __all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'STRICT', 'COMPATIBLE', 'LAX', 'set_context', 'get_context', 'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context', 'ParallelMode', @@ -1092,6 +1093,13 @@ def _check_target_specific_cfgs(device, arg_key): return False +def _check_ascend_device_context_initialized(device_target): + if device_target == 'Ascend' and is_initialized(device_target): + logger.warning(f"For 'context.set_context' in Ascend backend, the backend is already initialized, please set " + "it before the definition of any Tensor and Parameter, and the instantiation and execution of " + "any operation and net, otherwise the settings may not take effect.") + + @args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=(bool, int), save_graphs_path=str, enable_dump=bool, aoe_tune_mode=str, aoe_config=dict, save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str, @@ -1599,6 +1607,8 @@ def set_context(**kwargs): if 'device_target' in kwargs: ctx.set_device_target(kwargs['device_target']) device = ctx.get_param(ms_ctx_param.device_target) + _check_ascend_device_context_initialized(device) + for key, value in kwargs.items(): if key in ('enable_sparse', 'auto_tune_mode'): logger.warning(f"For 'context.set_context', '{key}' parameter is deprecated, " diff --git a/mindspore/python/mindspore/dataset/engine/datasets_vision.py b/mindspore/python/mindspore/dataset/engine/datasets_vision.py index ac5586f21b5a90d9c57bb350703bd48f7fd471be..5ed60ac49cb6a3332ccf646ec4515fbad02ebeaf 100644 --- a/mindspore/python/mindspore/dataset/engine/datasets_vision.py +++ b/mindspore/python/mindspore/dataset/engine/datasets_vision.py @@ -4503,10 +4503,10 @@ class VOCDataset(MappableDataset, VisionBaseDataset): The generated dataset with different `task` setting has different output columns: - `task` = :py:obj:`Detection` , output columns: :py:obj:`[image, dtype=uint8]` , - :py:obj:`[bbox, dtype=float32]` , :py:obj:`[label, dtype=uint32]` , - :py:obj:`[difficult, dtype=uint32]` , :py:obj:`[truncate, dtype=uint32]` . + :py:obj:`[bbox, dtype=float32]` , :py:obj:`[label, dtype=uint32]` , + :py:obj:`[difficult, dtype=uint32]` , :py:obj:`[truncate, dtype=uint32]` . - `task` = :py:obj:`Segmentation` , output columns: :py:obj:`[image, dtype=uint8]` , - :py:obj:`[target,dtype=uint8]` . + :py:obj:`[target,dtype=uint8]` . Args: dataset_dir (str): Path to the root directory that contains the dataset. diff --git a/mindspore/python/mindspore/dataset/vision/transforms.py b/mindspore/python/mindspore/dataset/vision/transforms.py index 587509dad0be09f7abacf829f7cb25251175b513..5ec5622f99a66b0aeb767214dd7a0b1608699449 100644 --- a/mindspore/python/mindspore/dataset/vision/transforms.py +++ b/mindspore/python/mindspore/dataset/vision/transforms.py @@ -1846,8 +1846,10 @@ class FiveCrop(PyTensorOperation): >>> img.save("./2.jpg") >>> data = Image.open("./2.jpg") >>> output = vision.FiveCrop(size=20)(data) - >>> print(np.array(output).shape, np.array(output).dtype) - (5,) object + >>> for cropped_img in output: + ... print(cropped_img.size) + ... break + (20, 20) >>> os.remove("./2.jpg") diff --git a/mindspore/python/mindspore/mint/__init__.py b/mindspore/python/mindspore/mint/__init__.py index 7a0b2ddfb1fb4c812026e9156d708f762d1d0f41..8c532803c6a22960e2feffee08ca425f2ed73e01 100644 --- a/mindspore/python/mindspore/mint/__init__.py +++ b/mindspore/python/mindspore/mint/__init__.py @@ -16,8 +16,13 @@ from __future__ import absolute_import from mindspore.ops.extend import * from mindspore.ops.extend import array_func, math_func, nn_func +from mindspore.mint.nn.functional import * +from mindspore.mint.nn import functional +from mindspore.ops import erf, where +from mindspore.ops.function.math_func import linspace_ext as linspace -__all__ = [] +__all__ = ['erf', 'where', 'linspace'] __all__.extend(array_func.__all__) __all__.extend(math_func.__all__) __all__.extend(nn_func.__all__) +__all__.extend(functional.__all__) diff --git a/mindspore/python/mindspore/mint/nn/__init__.py b/mindspore/python/mindspore/mint/nn/__init__.py index ea4e82b9f3878d2a53a7b6e3bcc927ef91ac68d2..96c43388fde0eca789d09b826b3ea8269c1b3bd6 100644 --- a/mindspore/python/mindspore/mint/nn/__init__.py +++ b/mindspore/python/mindspore/mint/nn/__init__.py @@ -20,7 +20,9 @@ Predefined building blocks or computing units to construct neural networks. from __future__ import absolute_import from mindspore.nn.extend import * from mindspore.nn.extend import basic, embedding +from mindspore.nn.extend import MaxPool2dExt as MaxPool2d + +__all__ = ['MaxPool2d'] -__all__ = [] __all__.extend(basic.__all__) __all__.extend(embedding.__all__) diff --git a/mindspore/python/mindspore/mint/nn/functional.py b/mindspore/python/mindspore/mint/nn/functional.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..8360c991f2491977779efa9c56e3f348981e6dcf 100644 --- a/mindspore/python/mindspore/mint/nn/functional.py +++ b/mindspore/python/mindspore/mint/nn/functional.py @@ -0,0 +1,22 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""mint nn functional.""" +from __future__ import absolute_import +from mindspore.ops.extend import max_pool2d_ext as max_pool2d +from mindspore.ops.functional import ( + conv_transpose2d +) + +__all__ = ['conv_transpose2d', 'max_pool2d'] diff --git a/mindspore/python/mindspore/multiprocessing/__init__.py b/mindspore/python/mindspore/multiprocessing/__init__.py index c04e24fb7808efd79d4c2d1d5e4e439ff5749ecd..6c3371257538c440c36b90ed5319dea5ccdefa67 100644 --- a/mindspore/python/mindspore/multiprocessing/__init__.py +++ b/mindspore/python/mindspore/multiprocessing/__init__.py @@ -16,6 +16,7 @@ mindspore.multiprocessing is a wrapper around the native `multiprocessing` module. Some methods are overrode to support fork-based multiprocess. """ +import types import signal import multiprocessing as mp from multiprocessing import * @@ -64,5 +65,8 @@ class Pool(mp.pool.Pool): # pylint: disable=function-redefined, abstract-method """ def Process(self, *args, **kwds): if self._ctx.get_start_method() == "fork": + # Process() becomes a staticmethod function of Pool with first argument 'ctx' in python 3.8.0 and later + if isinstance(super().Process, types.FunctionType): + args = args[1:] return _MsProcess(*args, **kwds) return super().Process(*args, **kwds) diff --git a/mindspore/python/mindspore/nn/extend/__init__.py b/mindspore/python/mindspore/nn/extend/__init__.py index d149f8b76ef54291c077df515c14a99492e830ae..0834dfc5d8cb256d80d0ffa64589e97a0abb04c6 100644 --- a/mindspore/python/mindspore/nn/extend/__init__.py +++ b/mindspore/python/mindspore/nn/extend/__init__.py @@ -19,5 +19,6 @@ from __future__ import absolute_import from mindspore.nn.extend.embedding import Embedding from mindspore.nn.extend.basic import Linear +from mindspore.nn.extend.pooling import MaxPool2dExt -__all__ = ['Embedding', 'Linear'] +__all__ = ['Embedding', 'Linear', 'MaxPool2dExt'] diff --git a/mindspore/python/mindspore/nn/extend/pooling.py b/mindspore/python/mindspore/nn/extend/pooling.py new file mode 100644 index 0000000000000000000000000000000000000000..2a0f62919eacc29a85d89678337ea53bb3ba0b9a --- /dev/null +++ b/mindspore/python/mindspore/nn/extend/pooling.py @@ -0,0 +1,114 @@ +#Copyright 2020-2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""pooling""" +from __future__ import absolute_import + +from mindspore.ops.auto_generate.gen_ops_prim import MaxPoolWithIndices, MaxPoolWithMask +from mindspore.nn.cell import Cell + +__all__ = ['MaxPool2dExt'] + + +class MaxPool2dExt(Cell): + r""" + Applies a 2D max pooling over an input Tensor which can be regarded as a composition of 2D planes. + + Typically the input is of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, MaxPool2d outputs + regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given kernel size + :math:`(h_{ker}, w_{ker})` and stride :math:`(s_0, s_1)`, the operation is as follows. + + .. math:: + \text{output}(N_i, C_j, h, w) = \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) + + Args: + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the max value, + is an int number or a single element tuple that represents height and width are both kernel_size, + or a tuple of two int numbers that represent height and width respectively. + Default: ``1`` . + stride (Union[int, tuple[int], None]): The distance of kernel moving, an int number or a single element tuple + that represents the height and width of movement are both stride, or a tuple of two int numbers that + represent height and width of movement respectively. + Default: ``None`` , which indicates the moving step is `kernel_size` . + padding (Union(int, tuple[int], list[int])): Specifies the padding value of the pooling operation. + Default: ``0`` . `padding` can only be an integer or a tuple/list containing one or two integers. If + `padding` is an integer or a tuple/list containing one integer, it will be padded `padding` times in the + four directions of the input. If `padding` is a tuple/list containing two integers, it will be padded + `padding[0]` times in the up-down direction of the input and `padding[1]` times in the left-right direction + of the input. + dilation (Union(int, tuple[int])): The spacing between the elements of the kernel in convolution, + used to increase the receptive field of the pooling operation. If it is a tuple, it must contain one or two + integers. Default: ``1`` . + return_indices (bool): If ``True`` , the function will return both the result of max pooling and the indices of + the max elements. Default: ``False`` . + ceil_mode (bool): If ``True`` , use ceil to compute the output shape instead of floor. Default: ``False`` . + + Inputs: + - **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. + + Outputs: + If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`). + + - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`. It has the + same data type as `input`. + - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int32. + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]} + \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]} + \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor + + Raises: + TypeError: If `input` is not a Tensor. + ValueError: If length of shape of `input` is not equal to 4. + TypeError: If `kernel_size` , `stride` , `padding` or `dilation` is not int or tuple. + ValueError: If `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `dilation` is not all 1. + ValueError: If `padding` is less than 0. + ValueError: If `padding` is more than half of `kernel_size`. + TypeError: If `ceil_mode` is not bool. + + Supported Platforms: + ``Ascend910B`` + + Examples: + >>> import mindspore as ms + >>> import numpy as np + >>> pool = ms.nn.MaxPool2d(kernel_size=3, stride=1) + >>> input = ms.Tensor(np.random.randint(0, 10, [1, 2, 4, 4]), ms.float32) + >>> output = pool(input) + >>> print(output.shape) + (1, 2, 2, 2) + """ + + def __init__(self, kernel_size=1, stride=None, padding=0, dilation=1, return_indices=False, + ceil_mode=False): + """Initialize MaxPool2d.""" + super(MaxPool2dExt, self).__init__() + self.return_indices = return_indices + strides = stride if (stride is not None) else kernel_size + if return_indices: + self.max_pool_func_ = MaxPoolWithIndices(kernel_size, strides, padding, dilation, ceil_mode) + else: + self.max_pool_func_ = MaxPoolWithMask(kernel_size, strides, padding, dilation, ceil_mode) + + def construct(self, input): + out, indices = self.max_pool_func_(input) + if self.return_indices: + return out, indices + return out diff --git a/mindspore/python/mindspore/numpy/fft.py b/mindspore/python/mindspore/numpy/fft.py index d00ca0ae961174a83f79bce64157557e5af21167..c1792daf7a5f3520fe3b6334573684ad7d0370ab 100644 --- a/mindspore/python/mindspore/numpy/fft.py +++ b/mindspore/python/mindspore/numpy/fft.py @@ -14,7 +14,8 @@ # ============================================================================ """Fast Fourier Transform operations, the function docs are adapted from Numpy API.""" from __future__ import absolute_import -__all__ = ['fftshift', 'ifftshift', 'fft', 'ifft', 'fft2', 'ifft2', 'fftn', 'ifftn'] +__all__ = ['fftshift', 'ifftshift', 'rfft', 'irfft', + 'fft', 'ifft', 'fft2', 'ifft2', 'fftn', 'ifftn'] from mindspore import ops diff --git a/mindspore/python/mindspore/ops/extend/__init__.py b/mindspore/python/mindspore/ops/extend/__init__.py index d05c2caa1608b86dc32ccca45bab8ec46f03d28f..46fccf3827dc31f21c2dcf90471bf3066b88179b 100644 --- a/mindspore/python/mindspore/ops/extend/__init__.py +++ b/mindspore/python/mindspore/ops/extend/__init__.py @@ -33,7 +33,7 @@ from . import ( nn_func, ) -from .array_func import gather, max, min, one_hot +from .array_func import gather, max, min, one_hot, narrow from .math_func import ( baddbmm, bmm, @@ -42,7 +42,8 @@ from .math_func import ( ) from .nn_func import ( - conv2d + conv2d, + max_pool2d_ext ) __all__ = [] diff --git a/mindspore/python/mindspore/ops/extend/array_func.py b/mindspore/python/mindspore/ops/extend/array_func.py index f0fe0f6ffc38e08f730be351f1d9adee17afc3e1..7217f587fe1f3a3ef4ed0dad2b9eecfd28433b12 100644 --- a/mindspore/python/mindspore/ops/extend/array_func.py +++ b/mindspore/python/mindspore/ops/extend/array_func.py @@ -21,11 +21,51 @@ Array Operators from mindspore.common import Tensor from mindspore.ops.operations.array_ops import ArgMaxWithValue, ArgMinWithValue from mindspore.ops._primitive_cache import _get_cache_prim -from mindspore.ops.auto_generate.gen_ops_prim import gather_d_op +from mindspore.ops.auto_generate.gen_ops_prim import gather_d_op, slice_ext_op from mindspore.ops.auto_generate.gen_ops_def import max_, min_ +from mindspore import _checkparam as validator from ..auto_generate import OneHotExt # define Primitive global variables +def narrow(input, dim, start, length): + """ + Returns a narrowed tensor from input tensor, and + the dimension axis is input from start to start + length. + + Args: + input (Tensor): the tensor to narrow. + dim (int): dimension along which to narrow. + start (int): the starting dimension. + length (int): the distance to the ending dimension. + + Returns: + Tensor. + + - output (Tensors) - The narrowed tensor. + + Raises: + TypeError: If the input is not a tensor or tuple or list of tensors. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> import mindspore + >>> from mindspore import ops + >>> from mindspore import Tensor + >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32) + >>> output = ops.narrow(x, 0, 0, 2) + >>> print(output) + [[ 1 2 3] + [ 4 5 6]] + >>> output = ops.narrow(x, 1, 1, 2) + >>> print(output) + [[ 2 3] + [ 5 6] + [ 8 9]] + """ + validator.check_value_type("input", input, Tensor, "narrow") + return slice_ext_op(input, dim, start, start+length, 1) def gather(input, dim, index): @@ -83,8 +123,7 @@ def max(input, dim=None, keepdim=False): Args: input (Tensor): The input tensor, can be any dimension. Complex tensor is not supported for now. - dim (int, optional): The dimension to reduce. When assigning a value to the `dim` parameter, please - assign the int type, and it does not support assignment to ``None`` . Default: ``None`` . + dim (int, optional): The dimension to reduce. Default: ``None`` . keepdim (bool, optional): Whether to reduce dimension, if true, the output will keep same dimension with the input, the output will reduce dimension if false. Default: ``False`` . @@ -133,8 +172,7 @@ def min(input, dim=None, keepdim=False): Args: input (Tensor): The input tensor, can be any dimension. Complex tensor is not supported for now. - dim (int, optional): The dimension to reduce. When assigning a value to the `dim` parameter, please - assign the int type, and it does not support assignment to ``None`` . Default: ``None`` . + dim (int, optional): The dimension to reduce. Default: ``None`` . keepdim (bool, optional): Whether to reduce dimension, if true, the output will keep same dimension with the input, the output will reduce dimension if false. Default: ``False`` . diff --git a/mindspore/python/mindspore/ops/extend/nn_func.py b/mindspore/python/mindspore/ops/extend/nn_func.py index 51fdb643ca501a6fa387f532655169937a6fa938..9abdd5850ad3afe914d588e1808dce17d3eadd40 100644 --- a/mindspore/python/mindspore/ops/extend/nn_func.py +++ b/mindspore/python/mindspore/ops/extend/nn_func.py @@ -19,7 +19,7 @@ NN Operators with better performance """ from mindspore.ops._primitive_cache import _get_cache_prim -from mindspore.ops.auto_generate.gen_ops_prim import Convolution, ConstantPadNd +from mindspore.ops.auto_generate.gen_ops_prim import Convolution, ConstantPadNd, MaxPoolWithIndices, MaxPoolWithMask from mindspore import _checkparam as validator @@ -163,7 +163,6 @@ def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): ValueError: If `stride` or `dilation` is less than 1. ValueError: If `pad_mode` is not one of 'same', 'valid' or 'pad'. ValueError: If `padding` is a tuple/list whose length is not equal to 2. - ValueError: If `pad_mode` is not equal to 'pad' and `padding` is greater than 0. Supported Platforms: ``Ascend`` @@ -218,4 +217,87 @@ def conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1): f"or a string, but got {type(padding)}") -__all__ = ['conv2d'] +def max_pool2d_ext(input, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): + r""" + Performs a 2D max pooling on the input Tensor. + + Typically, the input is a Tensor with shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})`, outputs + regional maximum in the :math:`(H_{in}, W_{in})`-dimension. Given `kernel_size` + :math:`ks = (h_{ker}, w_{ker})` and `stride` :math:`s = (s_0, s_1)`, the operation is as follows: + + .. math:: + \text{output}(N_i, C_j, h, w) = + \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1} + \text{input}(N_i, C_j, s_0 \times h + m, s_1 \times w + n) + + Args: + input (Tensor): Tensor of shape :math:`(N_{in}, C_{in}, H_{in}, W_{in})` with data type of float32 + in Ascend. + kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value and arg + value, is an int number that represents height and width of the kernel, or a tuple of + two int numbers that represent height and width respectively. + stride (Union[int, tuple[int], None]): The distance of kernel moving, an int number that represents + the height and width of movement are both stride, or a tuple of two int numbers that + represent height and width of movement respectively. + Default: ``None`` , which indicates the moving step is `kernel_size` . + padding (Union[int, tuple[int]]): An int number that represents the height and width of movement are both + strides, or a tuple of two int numbers that represent height and width of movement respectively. + Default: ``0`` . + dilation (Union[int, tuple[int]]): Control the stride of elements in the kernel. Default: ``1`` . + return_indices (bool): Whether to output the indices of max value. Default: ``False`` . + ceil_mode (bool): Whether to use ceil instead of floor to calculate output shape. Default: ``False`` . + + Returns: + If `return_indices` is ``False`` , return a Tensor `output`, else return a tuple (`output`, `argmax`). + + - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, H_{out}, W_{out})`. + It has the same data type as `input`. + + .. math:: + H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]} + \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor + + .. math:: + W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]} + \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor + + - **argmax** (Tensor) - Index corresponding to the maximum value. In Ascend, data type is int32. + It will be return only when `return_indices` is True. + + Raises: + TypeError: If `input` is not a Tensor. + ValueError: If length of shape of `input` is not equal to 4. + TypeError: If `kernel_size` , `stride` , `padding` or `dilation` is not int or tuple. + ValueError: If `kernel_size`, `stride` or `dilation` is less than 1. + ValueError: If `dilation` is not all 1. + ValueError: If `padding` is less than 0. + ValueError: If `padding` is more than half of `kernel_size`. + TypeError: If `ceil_mode` is not bool. + + Supported Platforms: + ``Ascend910B`` + + Examples: + >>> import mindspore + >>> import numpy as np + >>> from mindspore import Tensor, ops + >>> input = Tensor(np.arange(20 * 16 * 50 * 32).reshape((20, 16, 50, 32)), mindspore.float32) + >>> output_tensor, argmax = ops.max_pool2d_ext(input, kernel_size=(3, 2), stride=(2, 1), return_indices=True) + >>> print(output_tensor.shape) + (20, 16, 24, 31) + >>> print(argmax.shape) + (20, 16, 24, 31) + """ + strides = stride if (stride is not None) else kernel_size + if return_indices: + max_pool_func_ = _get_cache_prim(MaxPoolWithIndices)(kernel_size, strides, padding, dilation, ceil_mode) + out, indices = max_pool_func_(input) + else: + max_pool_func_ = _get_cache_prim(MaxPoolWithMask)(kernel_size, strides, padding, dilation, ceil_mode) + out, indices = max_pool_func_(input) + if return_indices: + return out, indices + return out + + +__all__ = ['conv2d', 'max_pool2d_ext'] diff --git a/mindspore/python/mindspore/ops/function/__init__.py b/mindspore/python/mindspore/ops/function/__init__.py index d03f041296bc1e100cf25212c9122ba2fd925c84..5ec20ba008f766485a94dc889b6befbf7505bf66 100644 --- a/mindspore/python/mindspore/ops/function/__init__.py +++ b/mindspore/python/mindspore/ops/function/__init__.py @@ -261,6 +261,7 @@ from .math_func import ( matrix_determinant, det, linspace, + linspace_ext, lu_solve, matrix_solve, maximum, @@ -515,6 +516,7 @@ from .nn_func import ( conv3d_transpose, conv1d, conv2d, + conv_transpose2d, sigmoid, logsigmoid, relu, diff --git a/mindspore/python/mindspore/ops/function/array_func.py b/mindspore/python/mindspore/ops/function/array_func.py index 1f541b0cd859ebe985664b4e03339d7d1a79fbd7..a2870106f97d5b9c9148886701bede3f4c8fe226 100644 --- a/mindspore/python/mindspore/ops/function/array_func.py +++ b/mindspore/python/mindspore/ops/function/array_func.py @@ -60,7 +60,7 @@ from mindspore.ops._utils.utils import ms_arrange from mindspore.ops.auto_generate import cat, range, scatter_nd, deepcopy, masked_fill, diagonal, expand_dims, \ nonzero, flip, transpose, unsorted_segment_sum, diag, gather, gather_d, gather_nd, reshape, broadcast_to, \ - strided_slice, ones, zeros, max_, min_ + strided_slice, ones, zeros, max_, min_, select from mindspore.ops.operations.manually_defined import tile, rank, scalar_cast arg_max_with_value_ = ArgMaxWithValue() @@ -387,25 +387,25 @@ def hamming_window(window_length, periodic=True, alpha=0.54, beta=0.46, *, dtype return out -def where(condition, x, y): +def where(condition, input, other): r""" - Selects elements from `x` or `y` based on `condition` and returns a tensor. + Selects elements from `input` or `other` based on `condition` and returns a tensor. .. math:: - output_i = \begin{cases} x_i,\quad &if\ condition_i \\ y_i,\quad &otherwise \end{cases} + output_i = \begin{cases} input_i,\quad &if\ condition_i \\ other_i,\quad &otherwise \end{cases} Args: - condition (Tensor[bool]): If True, yield `x`, otherwise yield `y`. - x (Union[Tensor, Scalar]): When `condition` is True, values to select from. - y (Union[Tensor, Scalar]): When `condition` is False, values to select from. + condition (Tensor[bool]): If True, yield `input`, otherwise yield `other`. + input (Union[Tensor, Scalar]): When `condition` is True, values to select from. + other (Union[Tensor, Scalar]): When `condition` is False, values to select from. Returns: - Tensor, elements are selected from `x` and `y`. + Tensor, elements are selected from `input` and `other`. Raises: TypeError: If `condition` is not a Tensor. - TypeError: If both `x` and `y` are scalars. - ValueError: If `condition`, `x` and `y` can not broadcast to each other. + TypeError: If both `input` and `other` are scalars. + ValueError: If `condition`, `input` and `other` can not broadcast to each other. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -422,25 +422,7 @@ def where(condition, x, y): [[0. 1.] [2. 1.]] """ - if not isinstance(condition, Tensor): - raise TypeError(f"For 'where', 'condition' must be a Tensor, but got {type(condition)}.") - if isinstance(x, (int, float)): - if not isinstance(y, Tensor): - raise TypeError( - f"For 'where', at least one of 'x' and 'y' should be Tensor, but got x:{type(x)}, y:{type(y)}." - ) - x = cast_(x, y.dtype) - elif isinstance(y, (int, float)): - if not isinstance(x, Tensor): - raise TypeError( - f"For 'where', at least one of 'x' and 'y' should be Tensor, but got x:{type(x)}, y:{type(y)}." - ) - y = cast_(y, x.dtype) - output_shape = _calc_broadcast_shape(x.shape, y.shape, condition.shape) - condition = broadcast_to(condition, output_shape) - x = broadcast_to(x, output_shape) - y = broadcast_to(y, output_shape) - return tensor_select_(condition, x, y) + return tensor_select_(condition, input, other) def reverse(x, axis): @@ -612,14 +594,14 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1): Returns: Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`, - and it has the same data type as `on_value`. + and it has the same data type as `on_value`. Raises: TypeError: If `axis` or `depth` is not an int. TypeError: If dtype of `indices` is not int32 or int64. TypeError: If dtype of `on_value` is not int32, int64, float16 or float32. TypeError: If `indices`, `on_value` or `off_value` is not a Tensor. - ValueError: If `axis` is not in range [-1, ndim]. + ValueError: If `axis` is not in range [-1, ndim]. ndim is the dimension of `indices` . ValueError: If `depth` is less than 0. Supported Platforms: @@ -1048,12 +1030,16 @@ def unique_consecutive(input, return_idx=False, return_counts=False, axis=None): returned. If specified, it must be int32 or int64. Default: ``None`` . Returns: - A tensor or a tuple of tensors containing tensor objects (`output`, `idx`, `counts`). `output` has the - same type as `input` and is used to represent the output list of unique scalar elements. If `return_idx` is - True, there will be an additional returned tensor, `idx`, which has the same shape as `input` and represents - the index of where the element in the original input maps to the position in the output. If `return_counts` - is True, there will be an additional returned tensor, `counts`, which represents the number of occurrences - for each unique value or tensor. + A tensor or a tuple of tensors containing tensor objects (`output`, `idx`, `counts`). + + - `output` has the + same type as `input` and is used to represent the output list of unique scalar elements. + - If `return_idx` is + True, there will be an additional returned tensor, `idx`, which has the same shape as `input` and represents + the index of where the element in the original input maps to the position in the output. + - If `return_counts` + is True, there will be an additional returned tensor, `counts`, which represents the number of occurrences + for each unique value or tensor. Raises: TypeError: If `input` is not a Tensor. @@ -1431,165 +1417,6 @@ def flatten(input, order='C', *, start_dim=1, end_dim=-1): return reshape_(input, new_shape) -def _check_select_type_match(scalar, tensor_type, scalar_name, tensor_name): - if isinstance(scalar, int) and tensor_type != mstype.int32: - raise TypeError(f"For functional operator[select], the input[{scalar_name}] is int, " - f"then the input[{tensor_name}] must be a Tensor of int32.") - if isinstance(scalar, float) and tensor_type != mstype.float32: - raise TypeError(f"For functional operator[select], the input[{scalar_name}] is float, " - f"then the input[{tensor_name}] must be a Tensor of float32.") - - -def _check_select_shape_match(input_shape, cond_shape, tensor_name): - if input_shape != cond_shape: - raise ValueError(f"For functional operator[select], the cond shape must be same as {tensor_name} shape.") - - -def _check_select_type(is_cond_tensor, is_x_scalar, is_y_scalar, is_x_tensor, is_y_tensor): - if not is_cond_tensor: - raise TypeError(f"For functional operator[select], the input[cond] must be a Tensor.") - if is_x_scalar and not is_y_tensor: - raise TypeError(f"For functional operator[select], the input[x] is int or float, " - f"then the input[y] must be a Tensor.") - if is_y_scalar and not is_x_tensor: - raise TypeError(f"For functional operator[select], the input[y] is int or float, " - f"then the input[x] must be a Tensor.") - - -def _check_select_shape_same(cond_shape, x_shape, y_shape): - """Check if input of select has same shape.""" - return cond_shape == x_shape and x_shape == y_shape and cond_shape == y_shape - - -def get_max_value(x, y, z): - """Get the maximum value of x, y and z.""" - if x >= y and x >= z: - return x - if y >= x and y >= z: - return y - return z - - -def _calc_broadcast_shape(cond_shape, x_shape, y_shape): - """Calculate broadcast shape for select""" - converted_shape = [] - cond_reverse = cond_shape[::-1] - x_reverse = x_shape[::-1] - y_reverse = y_shape[::-1] - max_len = get_max_value(len(cond_reverse), len(x_reverse), len(y_reverse)) - i = 0 - while i < max_len: - cond_element = 1 if i >= len(cond_reverse) else cond_reverse[i] - x_element = 1 if i >= len(x_reverse) else x_reverse[i] - y_element = 1 if i >= len(y_reverse) else y_reverse[i] - broadcast_element = get_max_value(cond_element, x_element, y_element) - if cond_element not in (1, broadcast_element): - raise ValueError(f"For select, condition input can not broadcast at index {i}") - if x_element not in (1, broadcast_element): - raise ValueError(f"For select, x input can not broadcast at index {i}") - if y_element not in (1, broadcast_element): - raise ValueError(f"For select, y input can not broadcast at index {i}") - converted_shape.append(broadcast_element) - i = i + 1 - converted_shape.reverse() - return tuple(converted_shape) - - -def select(cond, x, y): - r""" - The conditional tensor determines whether the corresponding element in the output must be - selected from `x` (if true) or `y` (if false) based on the value of each element. - - It can be defined as: - - .. math:: - out_i = \begin{cases} - x_i, & \text{if } cond_i \\ - y_i, & \text{otherwise} - \end{cases} - - Args: - cond (Tensor[bool]): The condition tensor, decides which element is chosen. - The shape is :math:`(x_1, x_2, ..., x_N, ..., x_R)`. - x (Union[Tensor, int, float]): The first Tensor or number to be selected. - If x is a Tensor, the shape is or can be broadcadt to :math:`(x_1, x_2, ..., x_N, ..., x_R)`. - If x is an int or a float, it will be cast to the type of int32 or float32, - and broadcast to the same shape as y. One of x and y must be a Tensor. - y (Union[Tensor, int, float]): The second Tensor or number to be selected. - If y is a Tensor, The shape is or can be broadcadt to :math:`(x_1, x_2, ..., x_N, ..., x_R)`. - If y is an int or a float, it will be cast to the type of int32 or float32, - and broadcast to the same shape as x. One of x and y must be a Tensor. - - Returns: - Tensor, has the same shape as `cond`. - - Raises: - TypeError: If `x` or `y` is not a Tensor, int or float. - ValueError: The shapes of inputs can not be broadcast. - - Supported Platforms: - ``Ascend`` ``GPU`` ``CPU`` - - Examples: - >>> import mindspore - >>> from mindspore import Tensor, ops - >>> # 1) Both inputs are Tensor - >>> - >>> cond = Tensor([True, False]) - >>> x = Tensor([2,3], mindspore.float32) - >>> y = Tensor([1,2], mindspore.float32) - >>> output = ops.select(cond, x, y) - >>> print(output) - [2. 2.] - >>> # 2) y is a float - >>> cond = Tensor([True, False]) - >>> x = Tensor([2,3], mindspore.float32) - >>> y = 2.0 - >>> output = ops.select(cond, x, y) - >>> print(output) - [2. 2.] - """ - is_x_scalar = isinstance(x, (int, float)) - is_y_scalar = isinstance(y, (int, float)) - is_x_tensor = isinstance(x, Tensor) - is_y_tensor = isinstance(y, Tensor) - is_cond_tensor = isinstance(cond, Tensor) - _check_select_type(is_cond_tensor, is_x_scalar, is_y_scalar, is_x_tensor, is_y_tensor) - input_x = x - input_y = y - if is_x_scalar: - _check_select_shape_match(y.shape, cond.shape, "y") - _check_select_type_match(x, y.dtype, "x", "y") - input_x = zeros_like_(y) + x - if isinstance(x, int): - input_x = cast_(input_x, mstype.int32) - else: - input_x = cast_(input_x, mstype.float32) - - if is_y_scalar: - _check_select_shape_match(x.shape, cond.shape, "x") - _check_select_type_match(y, x.dtype, "y", "x") - input_y = zeros_like_(x) + y - if isinstance(y, int): - input_y = cast_(input_y, mstype.int32) - else: - input_y = cast_(input_y, mstype.float32) - - if is_x_tensor and is_y_tensor and is_cond_tensor: - x_shape = ops.shape(x) - y_shape = ops.shape(y) - cond_shape = ops.shape(cond) - all_constant = ops.isconstant(cond_shape) and ops.isconstant(x_shape) and ops.isconstant(y_shape) - if all_constant and not _check_select_shape_same(cond_shape, x_shape, y_shape): - broadcast_shape = _calc_broadcast_shape(cond_shape, x_shape, y_shape) - new_cond = ops.broadcast_to(cond, broadcast_shape) - new_x = ops.broadcast_to(x, broadcast_shape) - new_y = ops.broadcast_to(y, broadcast_shape) - return tensor_select_(new_cond, new_x, new_y) - - return tensor_select_(cond, input_x, input_y) - - def slice(input_x, begin, size): r""" Slices a tensor in the specified shape. @@ -2767,11 +2594,11 @@ def gather_elements(input, dim, index): .. code-block:: - output[i][j][k] = x[index[i][j][k]][j][k] # if dim == 0 + output[i][j][k] = input[index[i][j][k]][j][k] # if dim == 0 - output[i][j][k] = x[i][index[i][j][k]][k] # if dim == 1 + output[i][j][k] = input[i][index[i][j][k]][k] # if dim == 1 - output[i][j][k] = x[i][j][index[i][j][k]] # if dim == 2 + output[i][j][k] = input[i][j][index[i][j][k]] # if dim == 2 `input` and `index` have the same length of dimensions, and `index.shape[axis] <= input.shape[axis]` where axis goes through all dimensions of `input` except `dim`. @@ -2832,10 +2659,12 @@ def tensor_scatter_add(input_x, indices, updates): output\left [indices \right ] = input\_x + update Note: - - On GPU, if some values of the `indices` are out of bound, instead of raising an index error, + If some values of the `indices` are out of `input_x` bound: + + - On GPU, instead of raising an index error, the corresponding `updates` will not be updated to self tensor. - - On CPU, if some values of the `indices` are out of bound, raising an index error. - - On Ascend, out of bound checking is not supported, if some values of the `indices` are out of bound, + - On CPU, raising an index error. + - On Ascend, out of bound checking is not supported, unknown errors may be caused. Args: @@ -2890,10 +2719,13 @@ def tensor_scatter_sub(input_x, indices, updates): output[indices] = input\_x - update Note: - On GPU, if some values of the `indices` are out of bound, instead of raising an index error, - the corresponding `updates` will not be updated to self tensor. On CPU, if some values of - the `indices` are out of bound, raising an index error. On Ascend, out of bound checking is - not supported, if some values of the `indices` are out of bound, unknown errors may be caused. + If some values of the `indices` are out of `input_x` bound: + + - On GPU, instead of raising an index error, + the corresponding `updates` will not be updated to self tensor. + - On CPU, raising an index error. + - On Ascend, out of bound checking is + not supported, unknown errors may be caused. Args: input_x (Tensor): The input tensor. The dimension of input_x must be no less than indices.shape[-1]. @@ -2943,10 +2775,12 @@ def tensor_scatter_max(input_x, indices, updates): output\left [indices \right ] = \max(input\_x, update) Note: - - On GPU, if some values of the `indices` are out of bound, instead of raising an index error, + If some values of the `indices` are out of `input_x` bound: + + - On GPU, instead of raising an index error, the corresponding `updates` will not be updated to self tensor. - - On CPU, if some values of the `indices` are out of bound, raising an index error. - - On Ascend, out of bound checking is not supported, if some values of the `indices` are out of bound, + - On CPU, raising an index error. + - On Ascend, out of bound checking is not supported, unknown errors may be caused. Args: @@ -3004,10 +2838,12 @@ def tensor_scatter_min(input_x, indices, updates): output\left [indices \right ] = \min(input\_x, update) Note: - - On GPU, if some values of the `indices` are out of bound, instead of raising an index error, + If some values of the `indices` are out of `input_x` bound: + + - On GPU, instead of raising an index error, the corresponding `updates` will not be updated to self tensor. - - On CPU, if some values of the `indices` are out of bound, raising an index error. - - On Ascend, out of bound checking is not supported, if some values of the `indices` are out of bound, + - On CPU, raising an index error. + - On Ascend, out of bound checking is not supported, unknown errors may be caused. Args: @@ -3497,7 +3333,7 @@ def matrix_diag(x, k=0, num_rows=-1, num_cols=-1, padding_value=0, align="RIGHT_ ValueError: If rank of `num_rows`, `num_cols` or `padding_value` is not equal to 0. ValueError: If size of `k` is not equal to 1 or 2. ValueError: If the value of `k` is not in (-num_rows, num_cols). - ValueError: If k[1] is not greater equal to k[0] when k[0] != k[1]. + ValueError: If k[1] is less than k[0] when k[0] != k[1]. ValueError: If rank of `x` is not greater than or is equal to 1 when k is an integer or k[0] == k[1]. ValueError: If rank of `x` is not greater than or is equal to 2 when k[0] != k[1]. ValueError: If x.shape[-2] is not equal to k[1] - k[0] + 1 when k[0] != k[1]. @@ -3561,11 +3397,13 @@ def matrix_diag_part(x, k, padding_value, align="RIGHT_LEFT"): Returns: A Tensor. Has the same type as `x`. - Assume `x` has r dimensions :math:`(I, J, ..., M, N)` . Let `max_diag_len` be the maximum length among all - diagonals to be extracted, :math:`max\_diag\_len = min(M + min(k[1], 0), N + min(-k[0], 0))` - Let `num_diags` be the number of diagonals to extract, :math:`num\_diags = k[1] - k[0] + 1`. - If :math:`num\_diags == 1`, the output tensor is of rank r - 1 with shape :math:`(I, J, ..., L, max\_diag\_len)` - Otherwise, the output tensor has rank r with dimensions :math:`(I, J, ..., L, num\_diags, max\_diag\_len)` . + + - Assume `x` has r dimensions :math:`(I, J, ..., M, N)` . Let `max_diag_len` be the maximum length among all + diagonals to be extracted, :math:`max\_diag\_len = min(M + min(k[1], 0), N + min(-k[0], 0))` + - Let `num_diags` be the number of diagonals to extract, :math:`num\_diags = k[1] - k[0] + 1`. + If :math:`num\_diags == 1`, the output tensor is of rank r - 1 + with shape :math:`(I, J, ..., L, max\_diag\_len)` + Otherwise, the output tensor has rank r with dimensions :math:`(I, J, ..., L, num\_diags, max\_diag\_len)` . Raises: TypeError: If `x` is not Tensor. @@ -3574,9 +3412,9 @@ def matrix_diag_part(x, k, padding_value, align="RIGHT_LEFT"): ValueError: If `align` is not a string or not in the valid range. ValueError: If rank of `k` is not equal to 0 or 1. ValueError: If rank of `padding_value` is not equal to 0. - ValueError: If rank of `x` is not greater equal to 2. + ValueError: If rank of `x` is less than 2. ValueError: If size of `k` is not equal to 1 or 2. - ValueError: If k[1] is not greater equal to k[0] in case the size of `k` is 2. + ValueError: If k[1] is less than k[0] in case the size of `k` is 2. ValueError: If the value of `k` is not in (-x.shape[-2], x.shape[-1]). Supported Platforms: @@ -3643,9 +3481,9 @@ def matrix_set_diag(x, diagonal, k=0, align="RIGHT_LEFT"): # pylint: disable=re TypeError: If `k` is not int32 dtype. ValueError: If `align` is not a string or not in the valid range. ValueError: If rank of `k` is not equal to 0 or 1. - ValueError: If rank of `x` is not greater equal to 2. + ValueError: If rank of `x` is less than 2. ValueError: If size of `k` is not equal to 1 or 2. - ValueError: If k[1] is not greater equal to k[0] in case the size of `k` is 2. + ValueError: If k[1] is less than k[0] in case the size of `k` is 2. ValueError: If the `diagonal` rank size don't match with input `x` rank size. ValueError: If the `diagonal` shape value don't match with input `x` shape value. ValueError: If the diagonal :math:`shape[-2]` is not equal to num_diags calculated by :math:`k[1]-k[0]+1`. @@ -4112,7 +3950,7 @@ def is_tensor(obj): obj (Object): input object. Returns: - Bool. Return True if `obj` is a Tensor, otherwise, return False. + Bool. Return ``True`` if `obj` is a Tensor, otherwise, return ``False``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -4238,10 +4076,12 @@ def tensor_scatter_div(input_x, indices, updates): output\left [indices \right ] = input\_x \div update Note: - - On GPU, if some values of the `indices` are out of bound, instead of raising an index error, + If some values of the `indices` are out of `input_x` bound: + + - On GPU, instead of raising an index error, the corresponding `updates` will not be updated to self tensor. - - On CPU, if some values of the `indices` are out of bound, raising an index error. - - On Ascend, out of bound checking is not supported, if some values of the `indices` are out of bound, + - On CPU, raising an index error. + - On Ascend, out of bound checking is not supported, unknown errors may be caused. - The operator can't handle division by 0 exceptions, so the user needs to make sure there is no 0 value in `updates`. @@ -4669,7 +4509,7 @@ def triu(input, diagonal=0): # pylint: disable=redefined-outer-name Args: input (Tensor): The input tensor with shape :math:`(M, N, *)` where * means any number of additional dimensions. - diagonal (int, optional): An optional attribute indicates the diagonal to consider, default: 0, + diagonal (int, optional): An optional attribute indicates the diagonal to consider, default: ``0``, indicating the main diagonal. Returns: @@ -4867,7 +4707,7 @@ def tensor_split(input, indices_or_sections, axis=0): TypeError: If argument `input` is not Tensor. TypeError: If argument `axis` is not int. ValueError: If argument `axis` is out of range of :math:`[-input.ndim, input.ndim)` . - TypeError: If each element in 'indices_or_sections' is not integer. + TypeError: If each element in `indices_or_sections` is not integer. TypeError: If argument `indices_or_sections` is not int, tuple(int) or list(int). Supported Platforms: diff --git a/mindspore/python/mindspore/ops/function/clip_func.py b/mindspore/python/mindspore/ops/function/clip_func.py index 45bcefe876d0860dbf814123b309e9e5cf8d4486..57d9e1b81f434a4751bf20a462a38b58246deab6 100644 --- a/mindspore/python/mindspore/ops/function/clip_func.py +++ b/mindspore/python/mindspore/ops/function/clip_func.py @@ -74,7 +74,7 @@ def clip_by_norm(x, max_norm, norm_type=2.0, error_if_nonfinite=False): max_norm (Union(float, int)): The upper limit of the norm for this group of network parameters. norm_type (Union(float, int)): Norm type. Default: ``2.0``. error_if_nonfinite (bool): If it is ``True``, an exception is thrown if the total norm from the input - is nan, inf or -inf. If it is ``False``, no exception will be thrown.Default: ``False`` . + is nan, inf or -inf. If it is ``False``, no exception will be thrown. Default: ``False`` . Returns: Tensors, a list or tuple of Tensors, representing clipped Tensors. diff --git a/mindspore/python/mindspore/ops/function/linalg_func.py b/mindspore/python/mindspore/ops/function/linalg_func.py index 6f61567fa86065fb7e812bd0b339d4fae2c54954..c846f15c1b3ffc41a888d5f84b7577e85d54f8c0 100644 --- a/mindspore/python/mindspore/ops/function/linalg_func.py +++ b/mindspore/python/mindspore/ops/function/linalg_func.py @@ -72,7 +72,7 @@ def cond(A, p=None): Raises: TypeError: If `A` is a vector and `p` is a str. - ValueError: If `A` is a matrices and `p` is not in valid mode. + ValueError: If `A` is a matrix and `p` is not in valid mode. ValueError: If `A` is a matrix and `p` is an integer that is not in [1, -1, 2, -2]. Supported Platforms: @@ -182,7 +182,7 @@ def svd(input, full_matrices=False, compute_uv=True): Args: input (Tensor): Tensor of the matrices to be decomposed. The shape should be :math:`(*, M, N)`, - the supported dtype are float32 and float64. + the supported dtypes are float32 and float64. full_matrices (bool, optional): If true, compute full-sized :math:`U` and :math:`V`. If false, compute only the leading P singular vectors, with P is the minimum of M and N. Default: ``False`` . diff --git a/mindspore/python/mindspore/ops/function/math_func.py b/mindspore/python/mindspore/ops/function/math_func.py index b08b762f70058cb2cdf8a0a59b9804674a472f94..43d0010f1e7cdd2d943409643142ba37a7697e4b 100644 --- a/mindspore/python/mindspore/ops/function/math_func.py +++ b/mindspore/python/mindspore/ops/function/math_func.py @@ -31,12 +31,13 @@ from mindspore.ops import composite as C from mindspore.ops.composite.multitype_ops import _constexpr_utils as const_utils from mindspore.ops.primitive import _primexpr from mindspore.ops.operations._inner_ops import TileSize -from mindspore.ops.auto_generate import Cummin, BatchMatMul +from mindspore.ops.auto_generate import Cummin, BatchMatMul, LinSpaceExt from mindspore.ops import auto_generate from mindspore.ops.operations.math_ops import STFT from mindspore.ops.operations.math_ops import LuUnpack from mindspore.ops.operations.math_ops import Roll from mindspore.ops.operations.math_ops import Ormqr +from mindspore.ops.operations.math_ops import DivMod from mindspore.ops.operations.array_ops import MatrixSetDiagV3, Transpose from mindspore.ops.auto_generate import (minimum, maximum, mul, sin, sinc, sinh, cummax, real, conj, add, sub, cos, cosh, matrix_exp, sqrt, rsqrt, square, trace, nextafter, abs, acos, acosh, angle, @@ -110,7 +111,7 @@ absolute_ = P.Abs() cast_ = P.Cast() tensor_add = P.Add() tensor_ceil = P.Ceil() -tensor_div = P.RealDiv() +tensor_div = P.Div() tensor_exp = P.Exp() tensor_expm1 = P.Expm1() tensor_floordiv = P.FloorDiv() @@ -164,7 +165,6 @@ cumprod_ = P.CumProd() cumsum_ = P.CumSum() cumulative_logsumexp_ = CumulativeLogsumexp() digamma_ = P.Digamma() -div_ = P.Div() dtype_ = P.DType() eps_ = P.Eps() erf_ = P.Erf() @@ -691,16 +691,6 @@ def subtract(input, other, *, alpha=1): return tensor_sub(input, alpha * other) -def true_divide(dividend, divisor): - r""" - Alias for :func:`mindspore.ops.div` with :math:`rounding\_mode=None`. - - Supported Platforms: - ``Ascend`` ``GPU`` ``CPU`` - """ - return div(dividend, divisor, rounding_mode=None) - - def multiply(input, other): r""" Alias for :func:`mindspore.ops.asinh`. @@ -766,14 +756,21 @@ def div(input, other, *, rounding_mode=None): """ if rounding_mode is not None and rounding_mode not in ['floor', 'trunc']: raise ValueError("For ops.div, rounding_mode value should be None, 'floor' or 'trunc'.") - - if rounding_mode == 'floor': - return tensor_floordiv(input, other) - output = div_(input, other) - if rounding_mode == 'trunc': - output = trunc_(output) + if rounding_mode: + output = DivMod()(input, other, rounding_mode) + else: + output = P.Div()(input, other) return output +def true_divide(dividend, divisor): + r""" + Alias for :func:`mindspore.ops.div` with :math:`rounding\_mode=None`. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + """ + return div(dividend, divisor) + def divide(input, other, *, rounding_mode=None): """ @@ -899,7 +896,7 @@ def logdet(input): the matrix determinant is 0, -inf will be returned. Raises: - TypeError: If dtype of `input` is not float32, float64, Complex64 or Complex128. + TypeError: If dtype of `input` is not float32, float64, complex64 or complex128. Supported Platforms: ``CPU`` @@ -2540,7 +2537,7 @@ def linspace(start, end, steps): end (Union[Tensor, int, float]): Last value of interval. The tensor data type must be float32 or float64 and with shape of 0-D. steps (Union[Tensor, int]): Number of ticks in the interval, inclusive of start and end. - Must be positive int number or 0D int32/int64 Tensor. + Must be positive int number or 0-D int32/int64 Tensor. Returns: Tensor, has the same dtype as `start`, and the shape of :math:`(steps)`. @@ -2549,7 +2546,7 @@ def linspace(start, end, steps): TypeError: If `start` or `end` is not a Tensor. TypeError: If dtype of `start` or dtype of `end` is not float32 or float64. ValueError: If shape of `start` or shape of `end` is not 0-D. - TypeError: If `steps` is not int or 0D int32/int64 Tensor. + TypeError: If `steps` is not int or 0-D int32/int64 Tensor. ValueError: If `steps` is not positive int number. Supported Platforms: @@ -2572,6 +2569,52 @@ def linspace(start, end, steps): return linspace_(start, end, steps) +def linspace_ext(start, end, steps, *, dtype=None): + r""" + Returns a Tensor whose value is `steps` evenly spaced in the interval `start` and `end` (including `start` and + `end`), and the length of the output Tensor is `steps`. + + .. math:: + \begin{aligned} + &step = (end - start)/(steps - 1)\\ + &output = [start, start+step, start+2*step, ... , end] + \end{aligned} + + Args: + start (Union[Tensor, Number]): Start value of interval. + If `start` is Tensor, data type must be float32 or float64 and with shape of 0-D. + end (Union[Tensor, Number]): Last value of interval. + If `end` is Tensor, data type must be float32 or float64 and with shape of 0-D. + steps (Union[Tensor, int]): Number of ticks in the interval, inclusive of start and end. + Must be positive int number or 0D int32/int64 Tensor. + + Keyword Args: + dtype (mindspore.dtype, optional): The output Tensor data type. Default: ``None`` , the data type of output + Tensor is float32. + + Returns: + Tensor, has the shape of :math:`(steps,)`. + + Raises: + TypeError: If dtype of `start` or dtype of `end` is not supported. + ValueError: If shape of `start` or shape of `end` is not 0-D. + TypeError: If `steps` is not int or 0D int32/int64 Tensor. + ValueError: If `steps` is not positive int number. + + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + + Examples: + >>> start = Tensor(1, mindspore.float32) + >>> end = Tensor(10, mindspore.float32) + >>> steps = 5 + >>> output = ops.linspace_ext(start, end, steps, dtype=mindspore.float32) + >>> print(output) + [ 1. 3.25 5.5 7.75 10. ] + """ + return _get_cache_prim(LinSpaceExt)()(start, end, steps, dtype) + + def det(input): r""" Computes the determinant of one or more square matrices. @@ -3450,7 +3493,7 @@ def nan_to_num(input, nan=0.0, posinf=None, neginf=None): Args: input (Tensor): The shape of tensor is :math:`(input_1, input_2, ..., input_R)`. With float32 or float16 data type. - nan (float): The replace value of 'NaN'. Default value is 0.0. + nan (float): The replace value of 'NaN'. Default value is ``0.0``. posinf (float): the value to replace positive infinity values with. Default: ``None``, replacing positive infinity with the maximum value supported by the data type of `input`. neginf (float): the value to replace negative infinity values with. Default: ``None``, @@ -3641,7 +3684,7 @@ def nanmedian(input, axis=-1, keepdims=False): .. warning:: `indices` does not necessarily contain the first occurrence of each median value found in the `input`, - unless it is unique. + unless it is unique. Args: input (Tensor): The input tensor to calculate the median and indices. @@ -4706,9 +4749,11 @@ def addmv(input, mat, vec, *, beta=1, alpha=1): raise TypeError("For Addmv, inputs must be all tensors.") if dtype_(mat) != dtype_(vec): raise TypeError("For Addmv, the mat and vec should be the same dtype.") - _check_input_dtype("input", input_dtype, - [mstype.float16, mstype.float32, mstype.float64, - mstype.int16, mstype.int32, mstype.int64], "Addmv") + valid_types = [mstype.float16, mstype.float32, mstype.float64, mstype.int16, mstype.int32, mstype.int64] + if input_dtype not in valid_types: + names = [t.__name__ if hasattr(t, "__name__") else t for t in valid_types] + input_dtype = input_dtype.__name__ if hasattr(input_dtype, '__name__') else repr(input_dtype) + raise TypeError(f"For 'Addmv', the 'input' should be one of '{names}', but got type '{input_dtype}'") _check_attr_dtype("alpha", alpha, [int, float, bool], "Addmv") _check_attr_dtype("beta", beta, [int, float, bool], "Addmv") if input_dtype in (mstype.int16, mstype.int32, mstype.int64): @@ -5430,8 +5475,8 @@ def sparse_segment_mean(x, indices, segment_ids): TypeError: If the dtype of `x` is not one of the following dtype: float16, float32, float64. TypeError: If the dtype of `indices` and `segment_ids` are not one of the following dtype: int32, int64. TypeError: If the dtype of `indices` and `segment_ids` are not the same. - ValueError: If the shape of `x`, 'indices' or `segment_ids` don't meet the parameter description. - ValueError: If the size of 'indices' and `segment_ids` are not the same. + ValueError: If the shape of `x`, `indices` or `segment_ids` don't meet the parameter description. + ValueError: If the size of `indices` and `segment_ids` are not the same. Supported Platforms: ``GPU`` ``CPU`` @@ -7760,7 +7805,7 @@ def matmul(input, other): Returns: Tensor or scalar, the matrix product of the inputs. This is a scalar only - when both `input`, `other` are 1-d vectors. + when both `input`, `other` are 1-d vectors. Raises: TypeError: If the dtype of `input` and the dtype of `other` are not the same. @@ -10373,7 +10418,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None): # pylint: disable=redefined-o TypeError: If the `s` or `dim` is not tuple(int). ValueError: If `input` dimension is less than 2. ValueError: If the length of `s` and `dim` are not the same. - ValueError: If the value in `dim` is not in the range of "[ `-input_dim` , `input_dim-1` ]". + ValueError: If the value in `dim` is not in the range of :math:`[-input.ndim, input.ndim)`. ValueError: If norm is none of "backward", "forward" or "ortho". Supported Platforms: @@ -10416,7 +10461,7 @@ def fftn(input, s=None, dim=None, norm=None): # pylint: disable=redefined-outer TypeError: If the `s` or `dim` is not tuple(int). ValueError: If the length of `s` and `dim` are not the same. ValueError: If `input` dimension is less than 1. - ValueError: If the value in `dim` is not in the range of "[ `-input_dim` , `input_dim-1` )". + ValueError: If the value in `dim` is not in the range of :math:`[-input.ndim, input.ndim)`. ValueError: If norm is none of "backward", "forward" or "ortho". Supported Platforms: @@ -10457,7 +10502,7 @@ def ifft(input, n=None, dim=-1, norm=None): # pylint: disable=redefined-outer-n Default: ``None`` that means ``"backward"``. Returns: - Tensor, The result of `ifft()` function. + Tensor, the result of `ifft()` function. Raises: TypeError: If the `input` type is not Tensor. @@ -10465,7 +10510,7 @@ def ifft(input, n=None, dim=-1, norm=None): # pylint: disable=redefined-outer-n TypeError: If `n` or `dim` type is not int32. ValueError: If `input` dimension is less than 1. ValueError: If `n` is less than 1. - ValueError: If `dim` is not in the range of "[ `-input_dim` , `input_dim-1` ]". + ValueError: If `dim` is not in the range of :math:`[-input.ndim, input.ndim)`. ValueError: If norm is none of "backward", "forward" or "ortho". Supported Platforms: @@ -10545,7 +10590,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None): # pylint: disable=redefined- TypeError: If the `s` or `dim` is not tuple(int). ValueError: If the length of `s` and `dim` are not the same. ValueError: If `input` dimension is less than 2. - ValueError: If the value in `dim` is not in the range of "[ `-input_dim` , `input_dim-1` )". + ValueError: If the value in `dim` is not in the range of :math:`[-input.ndim, input.ndim)`. ValueError: If norm is none of "backward", "forward" or "ortho". Supported Platforms: @@ -10942,12 +10987,12 @@ def vecdot(x, y, *, axis=-1): TypeError: If type of `axis` is not int. ValueError: If `axis` is out of range. - Supported Platforms: - ``Ascend`` ``GPU`` ``CPU`` - .. note:: Currently, complex numbers are not supported on GPU. + Supported Platforms: + ``Ascend`` ``GPU`` ``CPU`` + Examples: >>> import mindspore as ms >>> from mindspore import ops @@ -11013,7 +11058,7 @@ def dot(input, other): Raises: TypeError: If type of input and other are not the same. TypeError: If dtype of input or other is not float16 or float32. - ValueError: If rank of input or other less than 2. + ValueError: If rank of input or other is less than 2. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -11432,6 +11477,7 @@ __all__ = [ 'matrix_determinant', 'det', 'linspace', + 'linspace_ext', 'logspace', 'lu_solve', 'matrix_solve', diff --git a/mindspore/python/mindspore/ops/function/nn_func.py b/mindspore/python/mindspore/ops/function/nn_func.py index e0af4467361883b0da6dbc3be6fa40d8347b152e..64c953464b65ea016cc9dce2ed0cf023cce7042b 100644 --- a/mindspore/python/mindspore/ops/function/nn_func.py +++ b/mindspore/python/mindspore/ops/function/nn_func.py @@ -42,7 +42,7 @@ from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, from mindspore.common.api import _function_forbid_reuse from mindspore.ops.auto_generate import log_softmax, dense, prelu, celu, relu, fast_gelu, silu, elu, sigmoid, relu6 from mindspore.ops.auto_generate.gen_ops_prim import GroupNorm -from mindspore.ops.auto_generate.gen_ops_prim import embedding_op +from mindspore.ops.auto_generate.gen_ops_prim import embedding_op, Convolution abs_ = P.Abs() add_ = P.Add() @@ -5125,6 +5125,75 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila return output +def conv_transpose2d(input, weight, bias=None, stride=1, padding=0, output_padding=0, groups=1, dilation=1): + r""" + Calculates a 2D transposed convolution, which can be regarded as Conv2d for the gradient of the input, + also called deconvolution (although it is not an actual deconvolution). + + The input is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`, + where :math:`N` is batch size, :math:`C_{in}` is space dimension, + :math:`H_{in}, W_{in}` are the height and width of the feature layer respectively. + + When Conv2d and Conv2dTranspose are initialized with the same parameters, and `pad_mode` is set to 'pad', + :math:`dilation * (kernel\_size - 1) - padding` amount of zero will be paded to the height and width + directions of the input, they are inverses of each other in regard to the input and output shapes in this case. + However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network + can refer to `Deconvolutional Networks `_. + + Args: + input (Tensor): Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. + weight (Tensor): Tensor of shape + :math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel + is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`. + bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`. + When bias is ``None`` , zeros will be used. Default: ``None`` . + stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents + the height and width of movement are both strides, or a tuple of two int numbers that + represent height and width of movement respectively. Default: ``1`` . + padding (Union(int, tuple[int], list[int]), optional): Implicit paddings on both sides of the input `x`. + Can be an integer or a tuple/list with 2 integers. + output_padding (Union[int, tuple[int]]): The number of padding on the height and width directions of the output. + The data type is an integer or a tuple of two integers. If `output_padding` is an integer, + then the bottom and right padding are all equal to `output_padding`. If `output_padding` is a tuple of + 2 integers, then the bottom and right padding is equal to `output_padding[0]`, `output_padding[1]` + respectively. + groups (int, optional): Splits `input` into groups. Default: ``1`` . + dilation (Union(int, tuple[int]), optional): Gaps between kernel elements.The data type is int or a tuple of + 2 integers. Specifies the dilation rate to use for dilated convolution. If set to be :math:`k > 1`, + there will be :math:`k - 1` pixels skipped for each sampling location. Its value must + be greater than or equal to 1 and bounded by the height and width of the input `x`. Default: ``1`` . + + Returns: + Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`. + To see how different pad modes affect the output shape, please refer to + :class:`mindspore.nn.Conv2dTranspose` for more details. + + + Raises: + TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple. + TypeError: `groups` is not an int. + TypeError: If `bias` is not a Tensor. + ValueError: If the shape of `bias` is not :math:`(C_{out})` . + ValueError: If `stride` or `dilation` is less than 1. + ValueError: If `padding` is a tuple/list whose length is not equal to 2. + + Supported Platforms: + ``Ascend`` + + Examples: + >>> import mindspore + >>> import numpy as np + >>> from mindspore import Tensor, ops + >>> x = Tensor(np.ones([1, 6, 32, 32]), mindspore.float32) + >>> weight = Tensor(np.ones([6, 3, 5, 5]), mindspore.float32) + >>> output = ops.conv_transpose2d(x, weight) + >>> print(output.shape) + (1, 3, 36, 36) + """ + conv = _get_cache_prim(Convolution)(stride, padding, dilation, True, output_padding, groups) + return conv(input, weight, bias) + + def hardsigmoid(input): r""" Hard sigmoid activation function. @@ -7288,6 +7357,7 @@ __all__ = [ 'conv3d_transpose', 'conv1d', 'conv2d', + 'conv_transpose2d', 'sigmoid', 'logsigmoid', 'relu', diff --git a/mindspore/python/mindspore/ops/function/random_func.py b/mindspore/python/mindspore/ops/function/random_func.py index a148fb38269204677e51d3730d20a20b0ead6412..307b16fc37dd403ba3787600149ff512ff6cdf92 100755 --- a/mindspore/python/mindspore/ops/function/random_func.py +++ b/mindspore/python/mindspore/ops/function/random_func.py @@ -52,7 +52,7 @@ def random_gamma(shape, alpha, seed=None): Args: shape (Tensor): The shape of random tensor to be generated. - Must be one of the following types: int32, int64. 1-D integer tensor. + 1-D integer tensor. alpha (Tensor): The :math:`\alpha` distribution parameter. A Tensor. Must be one of the following types: half, float32, float64. seed (int, optional): Seed is used as entropy source for Random number engines generating pseudo-random numbers. @@ -190,8 +190,8 @@ def multinomial_with_replacement(x, seed, offset, numsamples, replacement=False) x (Tensor): the input tensor containing the cumsum of probabilities, must be 1 or 2 dimensions. Must be one of the following types: float16, float32, float64. seed (int): If seed is set to be -1, and offset is set to be 0, the random number - generator is seeded by a random seed. Otherwise, it is seeded by the given seed. - offset (int): Offset used to avoid seed collision. + generator is seeded by a random seed. Otherwise, it is seeded by the given seed. The supported dtype: int64. + offset (int): Offset used to avoid seed collision. The supported dtype: int64. numsamples (int): the number of samples to draw. replacement (bool, optional): Whether to draw with replacement or not. Default: ``False`` . @@ -1205,13 +1205,13 @@ def multinomial(input, num_samples, replacement=True, seed=None): and the resulting sequence is the calculation result of the polynomial distribution, with a length equal to the number of samplings. - In case 1 of the sample code, we perform two non-replacement samplings (`replacement` is `False`). + In case 1 of the sample code, we perform two non-replacement samplings (`replacement` is ``False``). The calculation result is most likely `[0, 1]`, and less likely `[1, 0]`. Since the probability of selecting index 0 is 90% for each sampling, the first result is most likely to be index 0. Since the probability of selecting index 2 is 0, index 2 cannot appear in the sampling result. Therefore, the second result must be index 1, and the resulting sequence is `[0, 1]`. - In case 2 of the sample code, we perform 10 replacement samplings (`replacement` is `True`). + In case 2 of the sample code, we perform 10 replacement samplings (`replacement` is ``True``). As expected, about 90% of the sampling results are index 0. In case 3 of the sample code, we extend the input to 2 dimensions, and the sampling results diff --git a/mindspore/python/mindspore/ops/operations/array_ops.py b/mindspore/python/mindspore/ops/operations/array_ops.py index dfd8946f5acc73561c1972f8d73da6e267f68a2f..54dbd0268829ce0b2f94e3db2c1d68cf6a064fcb 100755 --- a/mindspore/python/mindspore/ops/operations/array_ops.py +++ b/mindspore/python/mindspore/ops/operations/array_ops.py @@ -1067,9 +1067,9 @@ class MatrixSetDiagV3(Primitive): TypeError: If `k` is not int32 dtype. ValueError: If `align` is not a string or not in the valid range. ValueError: If rank of `k` is not equal to 0 or 1. - ValueError: If rank of `x` is not greater equal to 2. + ValueError: If rank of `x` is less than 2. ValueError: If size of `k` is not equal to 1 or 2. - ValueError: If `k[1]` is not greater equal to `k[0]` in case the size of `k` is 2. + ValueError: If `k[1]` is less than `k[0]` in case the size of `k` is 2. ValueError: If the `diagonal` rank size don't match with input `x` rank size. ValueError: If the `diagonal` shape value don't match with input `x` shape value. ValueError: If the diagonal :math:`shape[-2]` is not equal to num_diags calculated by diff --git a/mindspore/python/mindspore/ops/operations/math_ops.py b/mindspore/python/mindspore/ops/operations/math_ops.py index cd973d22add95b3064b6d2aeec048ba14577141f..7a7202df2dbfc26f5ea580992b6cd97de1f50ee9 100644 --- a/mindspore/python/mindspore/ops/operations/math_ops.py +++ b/mindspore/python/mindspore/ops/operations/math_ops.py @@ -39,7 +39,7 @@ from ..auto_generate import (Add, Addcdiv, Addcmul, ReduceMean, ReduceSum, Reduc LogicalXor, Cos, ACos, Sin, Asin, Abs, Round, Atan, Atanh, Atan2, LinSpace, MatrixDeterminant, LogMatrixDeterminant, Erfinv, Conj, Real, Complex, Angle, MatrixExp, CholeskyInverse, Trace, Cholesky, - FFTWithSize, NextAfter, NanToNum, Eig, Qr, Roll, Maximum, Div, CumProd, + FFTWithSize, NextAfter, NanToNum, Eig, Qr, Roll, Maximum, Div, DivMod, CumProd, CumSum, Less, LessEqual, AssignAdd, IsFinite) diff --git a/mindspore/python/mindspore/ops_generate/aclnn_config.yaml b/mindspore/python/mindspore/ops_generate/aclnn_config.yaml index 8cd7e5e35a9c9ecfd8192c42b7e78ecbd8cc4129..babdfb31951fea4b24ac77f52ce7cfd19f281d70 100644 --- a/mindspore/python/mindspore/ops_generate/aclnn_config.yaml +++ b/mindspore/python/mindspore/ops_generate/aclnn_config.yaml @@ -22,6 +22,7 @@ GatherDGradV2: 'aclnnScatterAdd' GatherD: 'aclnnGather' ReLU: 'aclnnRelu' ReLUGrad: 'aclnnThresholdBackward' +LinSpaceExt: 'aclnnLinspace' Tile: 'aclnnRepeat' Transpose: 'aclnnPermute' ArgMaxExt: 'aclnnArgMax' @@ -34,3 +35,8 @@ GroupNormGrad: 'aclnnGroupNormBackward' NotEqual: 'aclnnNeTensor' ClampScalar: 'aclnnClamp' OneHotExt: 'aclnnOneHot' +Select: 'aclnnSWhere' +MaxPoolWithIndices: 'aclnnMaxPool2dWithIndices' +MaxpoolGradWithIndices: 'aclnnMaxPool2dWithIndicesBackward' +MaxPoolWithMask: 'aclnnMaxPool2dWithMask' +MaxPoolGradWithMask: 'aclnnMaxPool2dWithMaskBackward' diff --git a/mindspore/python/mindspore/ops_generate/gen_ops_inner_prim.py b/mindspore/python/mindspore/ops_generate/gen_ops_inner_prim.py index 50a3f00de44a37b0bead47a02a01623ec7e3408b..6a27f19da5e7fa425695c514f5b65dcd00df463c 100644 --- a/mindspore/python/mindspore/ops_generate/gen_ops_inner_prim.py +++ b/mindspore/python/mindspore/ops_generate/gen_ops_inner_prim.py @@ -68,6 +68,8 @@ class StringToEnum(Primitive): def __call__(self, op_name, arg_name, enum_str): """Run in PyNative mode""" + if enum_str is None: + return None if not isinstance(enum_str, str): raise TypeError(f"For '{op_name}', the input '{arg_name}' should be a str, but got {type(enum_str)}.") return op_enum.str_to_enum(op_name, arg_name, enum_str) diff --git a/mindspore/python/mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py b/mindspore/python/mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py index 52f3f4a52f3ab25c1801b3ee56290238136e97c9..ed6f8439f673c26845d4ca92c99f0eae49bf119a 100644 --- a/mindspore/python/mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +++ b/mindspore/python/mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py @@ -52,7 +52,7 @@ class ProfilerInfoParser: def get_local_time(cls, syscnt: int) -> Decimal: """Convert syscnt to local time.""" if not cls._loaded_frequency: - localtime_stamp = c_expression.get_clock_time() + localtime_stamp = int(c_expression.get_clock_time() * 1e3) # us cast to ns syscnt_stamp = c_expression.get_clock_syscnt() outs, _ = cls.__run_cmd(['which', cls._msprof_cmd]) if not outs: diff --git a/mindspore/python/mindspore/rewrite/ast_helpers/ast_flattener.py b/mindspore/python/mindspore/rewrite/ast_helpers/ast_flattener.py index cc9e6cc8b7fee8c6e5547da00a3fc9d9b834681c..cd9f63f3dfb1a6bcdc44ccf2beb91acc6e209d77 100644 --- a/mindspore/python/mindspore/rewrite/ast_helpers/ast_flattener.py +++ b/mindspore/python/mindspore/rewrite/ast_helpers/ast_flattener.py @@ -178,16 +178,11 @@ class AstFlattener(ast.NodeTransformer): todos = getattr(node, todo_name) if isinstance(todos, list): new_list = [] - for idx, todo in enumerate(todos): + for todo in todos: # Starred expression(e.g. *args) cannot be flatten. if isinstance(todo, ast.Starred): new_list.append(todo) continue - # For codes like 'xxx and yyy and zzz', only 'xxx' can be flatten and parsed, - # otherwise executing 'yyy' may raise an exception when 'xxx' is False - if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.And) and idx > 0: - new_list.append(todo) - continue # ast.keywords are processed individually: # y = func(key=value) => new_target_name = value & y = func(key=new_target_name) if isinstance(todo, ast.keyword): @@ -199,8 +194,18 @@ class AstFlattener(ast.NodeTransformer): continue new_node, new_assign = self._create_new_assign_node(todo, target_names, node) if id(new_node) != id(todo): + # For codes like 'xxx and yyy and zzz', and codes are flatten to 'x = xxx; y = yyy; z = zzz', + # executing 'y = yyy' may raise an exception when 'xxx' is False. + # convert 'y = yyy' to 'if xxx: y = yyy', and convert 'z = zzz' to 'if x and y: z = zzz'. + if isinstance(node, ast.BoolOp) and isinstance(node.op, ast.And) and new_list: + if_test = ast.BoolOp(ast.And(), new_list[:]) if len(new_list) > 1 else new_list[0] + else_assign = ast.Assign(targets=new_assign.targets, + value=ast.Constant(value=False, kind=None)) + new_if_assign = ast.If(test=if_test, body=[new_assign], orelse=[else_assign]) + results.insert(0, new_if_assign) + else: + results.append(new_assign) new_list.append(new_node) - results.append(new_assign) else: new_list.append(todo) setattr(node, todo_name, new_list) diff --git a/mindspore/python/mindspore/train/model.py b/mindspore/python/mindspore/train/model.py index 257e066b129b193d3f18ead28ff8632bd922a512..29ab21caaaabcfff81fdff951592c18e29fb0a62 100644 --- a/mindspore/python/mindspore/train/model.py +++ b/mindspore/python/mindspore/train/model.py @@ -1695,7 +1695,7 @@ class Model: "execution_plan" : {"op_name3" : "data_type:float16", "op_name4" : "data_type:float32"}} Note that both the "configPath" is configured in the config_dict and the config_item, - in this case, the path_b in the config_dict takes precedence. + in this case, the path_b in the config_dict takes precedence. Returns: Tensor, array(s) of predictions. diff --git a/tests/st/backend_opt_pass/test_backend_common_unify.py b/tests/st/backend_opt_pass/test_backend_common_unify.py index 52419fc2d9f2199f5d806e15854f93faf7f8278d..cc725eabf67e73fef5572288302a7e523202f479 100644 --- a/tests/st/backend_opt_pass/test_backend_common_unify.py +++ b/tests/st/backend_opt_pass/test_backend_common_unify.py @@ -198,8 +198,8 @@ def test_adam_weightdecay(): super(Net, self).__init__() self.adam_weight_decay = ops.AdamWeightDecay() self.var = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="var") - self.m = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="m") - self.v = Parameter(Tensor(np.ones([2, 2]).astype(np.float32)), name="v") + self.m = Parameter(Tensor(np.ones([2, 2]).astype(np.float16)), name="m") + self.v = Parameter(Tensor(np.ones([2, 2]).astype(np.float16)), name="v") def construct(self, lr, beta1, beta2, epsilon, decay, grad): out = self.adam_weight_decay(self.var, self.m, self.v, lr, beta1, beta2, epsilon, decay, grad) diff --git a/tests/st/dataset/test_ascend_lenet.py b/tests/st/dataset/test_ascend_lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..e9b808ac9c7d54ccbf69c65b5eae8c22a247c4e7 --- /dev/null +++ b/tests/st/dataset/test_ascend_lenet.py @@ -0,0 +1,95 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import pytest + +import mindspore as ms +import mindspore.dataset as ds +import mindspore.nn as nn +from mindspore.common.initializer import Normal + + +class LeNet5(nn.Cell): + def __init__(self, num_class=10, num_channel=1): + super(LeNet5, self).__init__() + self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid') + self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid') + self.relu = nn.ReLU() + self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) + self.flatten = nn.Flatten() + self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02)) + self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02)) + self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02)) + + def construct(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.conv2(x) + x = self.relu(x) + x = self.max_pool2d(x) + x = self.flatten(x) + x = self.relu(self.fc1(x)) + x = self.relu(self.fc2(x)) + x = self.fc3(x) + return x + + +def proc_dataset(data_path, batch_size=32): + mnist_ds = ds.MnistDataset(data_path, shuffle=True) + + # define map operations + image_transforms = [ + ds.vision.Resize(32), + ds.vision.Rescale(1.0 / 255.0, 0), + ds.vision.Normalize(mean=(0.1307,), std=(0.3081,)), + ds.vision.HWC2CHW() + ] + label_transforms = ds.transforms.transforms.TypeCast(ms.int32) + + mnist_ds = mnist_ds.map(operations=label_transforms, input_columns="label") + mnist_ds = mnist_ds.map(operations=image_transforms, input_columns="image") + mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) + + return mnist_ds + + +def create_model(): + model = LeNet5() + net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") + net_opt = nn.Momentum(model.trainable_params(), learning_rate=0.01, momentum=0.9) + trainer = ms.Model(model, loss_fn=net_loss, optimizer=net_opt, metrics={"Accuracy": nn.Accuracy()}) + return trainer + + +@pytest.mark.level1 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_net_build_then_train_sink_size_1(): + """ + Feature: Test model.build and model.train in graph mode under Ascend platform + Description: Test sink_size is equal to 1 and epoch is equal to 130, execute model.build first and then model.train + Expectation: Training completes successfully + """ + ms.set_context(mode=ms.GRAPH_MODE, op_timeout=60) + trainer = create_model() + train_dataset = proc_dataset(os.path.join("/home/workspace/mindspore_dataset/mnist", "train")) + trainer.build(train_dataset, epoch=130, sink_size=1) + trainer.train(130, train_dataset, dataset_sink_mode=True, sink_size=1) + + +if __name__ == '__main__': + test_net_build_then_train_sink_size_1() diff --git a/tests/st/dataset/test_gpu_lenet.py b/tests/st/dataset/test_gpu_lenet.py index fa84a5f53aca3907a367874fea0525ae91869eb6..151a2c8b64248bf72c2ce462e1070528bab54ca9 100644 --- a/tests/st/dataset/test_gpu_lenet.py +++ b/tests/st/dataset/test_gpu_lenet.py @@ -1,4 +1,4 @@ -# Copyright 2023-2024 Huawei Technologies Co., Ltd +# Copyright 2023 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -import pytest - import mindspore as ms import mindspore.dataset as ds import mindspore.nn as nn -from mindspore.common.initializer import Normal from mindspore.communication.management import init, get_rank, get_group_size from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.ops import operations as P @@ -57,32 +54,6 @@ class LeNet(nn.Cell): return output -class LeNet5(nn.Cell): - def __init__(self, num_class=10, num_channel=1): - super(LeNet5, self).__init__() - self.conv1 = nn.Conv2d(num_channel, 6, 5, pad_mode='valid') - self.conv2 = nn.Conv2d(6, 16, 5, pad_mode='valid') - self.relu = nn.ReLU() - self.max_pool2d = nn.MaxPool2d(kernel_size=2, stride=2) - self.flatten = nn.Flatten() - self.fc1 = nn.Dense(16 * 5 * 5, 120, weight_init=Normal(0.02)) - self.fc2 = nn.Dense(120, 84, weight_init=Normal(0.02)) - self.fc3 = nn.Dense(84, num_class, weight_init=Normal(0.02)) - - def construct(self, x): - x = self.conv1(x) - x = self.relu(x) - x = self.max_pool2d(x) - x = self.conv2(x) - x = self.relu(x) - x = self.max_pool2d(x) - x = self.flatten(x) - x = self.relu(self.fc1(x)) - x = self.relu(self.fc2(x)) - x = self.fc3(x) - return x - - class Config: def __init__(self): self.data_path = "/home/workspace/mindspore_dataset/imagenet/imagenet_original/train" @@ -152,33 +123,6 @@ def create_dataset(dataset_path, do_train, batch_size=32, train_image_size=28, t return data_set -def proc_dataset(data_path, batch_size=32): - mnist_ds = ds.MnistDataset(data_path, shuffle=True) - - # define map operations - image_transforms = [ - ds.vision.Resize(32), - ds.vision.Rescale(1.0 / 255.0, 0), - ds.vision.Normalize(mean=(0.1307,), std=(0.3081,)), - ds.vision.HWC2CHW() - ] - label_transforms = ds.transforms.transforms.TypeCast(ms.int32) - - mnist_ds = mnist_ds.map(operations=label_transforms, input_columns="label") - mnist_ds = mnist_ds.map(operations=image_transforms, input_columns="image") - mnist_ds = mnist_ds.batch(batch_size, drop_remainder=True) - - return mnist_ds - - -def create_model(): - model = LeNet5() - net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") - net_opt = nn.Momentum(model.trainable_params(), learning_rate=0.01, momentum=0.9) - trainer = ms.Model(model, loss_fn=net_loss, optimizer=net_opt, metrics={"Accuracy": nn.Accuracy()}) - return trainer - - def set_parameter(): """set_parameter""" ms.set_context(mode=ms.PYNATIVE_MODE, device_target=config.device_target, save_graphs=False) @@ -253,23 +197,5 @@ def train_ft_fade(net): sink_size=dataset.get_dataset_size(), dataset_sink_mode=True) -@pytest.mark.level1 -@pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -def test_net_build_then_train_sink_size_1(): - """ - Feature: Test model.build and model.train in graph mode under Ascend platform - Description: Test sink_size is equal to 1 and epoch is equal to 130, execute model.build first and then model.train - Expectation: Training completes successfully - """ - ms.set_context(mode=ms.GRAPH_MODE, op_timeout=60) - trainer = create_model() - train_dataset = proc_dataset("../../ut/data/dataset/testMnistData") - trainer.build(train_dataset, epoch=130, sink_size=1) - trainer.train(130, train_dataset, dataset_sink_mode=True, sink_size=1) - - if __name__ == '__main__': test_train_net_fade_then_sink() - test_net_build_then_train_sink_size_1() diff --git a/tests/st/dump/dump_test_utils.py b/tests/st/dump/dump_test_utils.py index 96db8d2c7b5f315c5f70f4ab65cd88ff7d8f543b..47ad40d4f66011f842fd4eea741c896b619d2b99 100644 --- a/tests/st/dump/dump_test_utils.py +++ b/tests/st/dump/dump_test_utils.py @@ -110,12 +110,27 @@ async_dump_dict_acl = { "net_name": "Net", "iteration": "0", "input_output": 0, + "model_name": [], "kernels": [], "support_device": [0, 1, 2, 3, 4, 5, 6, 7], "op_debug_mode": 0 } } +async_dump_dict_acl_assign_ops = { + "common_dump_settings": { + "dump_mode": 1, + "path": "", + "net_name": "Net", + "iteration": "0", + "input_output": 0, + "model_name": "kernel_graph1_2", + "kernels": ["Default/Add-op0"], + "support_device": [0, 1, 2, 3, 4, 5, 6, 7], + "op_debug_mode": 0 + } +} + def generate_dump_json(dump_path, json_file_name, test_key, net_name='Net'): """ Util function to generate dump configuration json file. @@ -155,6 +170,9 @@ def generate_dump_json(dump_path, json_file_name, test_key, net_name='Net'): elif test_key == "test_acl_dump": data = async_dump_dict_acl data["common_dump_settings"]["path"] = dump_path + elif test_key == "test_acl_dump_assign_ops": + data = async_dump_dict_acl_assign_ops + data["common_dump_settings"]["path"] = dump_path else: raise ValueError( "Failed to generate dump json file. The test name value " + test_key + " is invalid.") diff --git a/tests/st/dump/test_ge_dump.py b/tests/st/dump/test_ge_dump.py index 4a37a2519c4fd2ae2a9ffd87f4c21302145c1d95..fa319bf95f96158a8e5524582742afb0fdf0ad4d 100644 --- a/tests/st/dump/test_ge_dump.py +++ b/tests/st/dump/test_ge_dump.py @@ -171,6 +171,19 @@ def test_ge_dump_acl(): """ run_ge_dump_acl("test_acl_dump") +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@security_off_wrap +def test_ge_dump_acl_assign_ops(): + """ + Feature: async dump on Ascend on GE backend. + Description: test async dump with default file_format value ("bin") + Expectation: dump data are generated as protobuf file format (suffix with timestamp) + """ + run_ge_dump_acl("test_acl_dump_assign_ops") + class ReluReduceMeanDenseRelu(Cell): def __init__(self, kernel, bias, in_channel, num_class): super().__init__() diff --git a/tests/st/fallback/test_graph_fallback_unsupport.py b/tests/st/fallback/test_graph_fallback_unsupport.py index 4674da787d6ed1a3e694a09b3d37d20b6da200e6..642e5f3bb81eda7250ddce2781ee6e8b44e5fc97 100644 --- a/tests/st/fallback/test_graph_fallback_unsupport.py +++ b/tests/st/fallback/test_graph_fallback_unsupport.py @@ -311,8 +311,7 @@ def test_call_third_party_class(): assert out == deque([4, 3, 2, 1]) -@pytest.mark.skip(reason="kwargs with AbstractAny, fix later") -@pytest.mark.level1 +@pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training diff --git a/tests/st/hal/test_hal_event.py b/tests/st/hal/test_hal_event.py index 050127d2bf9b061c1fc0a406cbdc224a3cfa235d..e240a5259f44c8e012929e46941832b3ff287ea6 100644 --- a/tests/st/hal/test_hal_event.py +++ b/tests/st/hal/test_hal_event.py @@ -88,11 +88,12 @@ def test_hal_event_wait(): with ms.hal.StreamCtx(s2): ev1.wait() c = ops.matmul(b, b) + ev2.record() ev2.wait() ev2.synchronize() assert ev1.query() is True - assert ev1.query() is True + assert ev2.query() is True assert np.allclose(ops.matmul(a, a).asnumpy(), b.asnumpy()) assert np.allclose(ops.matmul(b, b).asnumpy(), c.asnumpy()) diff --git a/tests/st/networks/models/fasterrcnn/test_fasterrcnn_overfit.py b/tests/st/networks/models/fasterrcnn/test_fasterrcnn_overfit.py index 444a486f9a376cc26dc87edaa98c19f9d9f5f994..337cbb4fd1c3163f157f28493925e849dfb6bf36 100644 --- a/tests/st/networks/models/fasterrcnn/test_fasterrcnn_overfit.py +++ b/tests/st/networks/models/fasterrcnn/test_fasterrcnn_overfit.py @@ -105,7 +105,7 @@ def get_optimizer(cfg, params, lr): raise ValueError(f"Not support {cfg.type}") -@pytest.mark.level0 +@pytest.mark.level1 @pytest.mark.platform_x86_ascend_training @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training diff --git a/tests/st/networks/test_mindcv_overfit.py b/tests/st/networks/test_mindcv_overfit.py index 70c8b82b9d8a39aabb3485b1acc5d129da0545f2..c634808a19fb982af12d0ab46ddfcfe205ea3bf4 100644 --- a/tests/st/networks/test_mindcv_overfit.py +++ b/tests/st/networks/test_mindcv_overfit.py @@ -247,6 +247,8 @@ def compute_process(q, device_id, device_num, args): q.put(loss_end) +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard def test_resnet_50_1p(): @@ -266,6 +268,8 @@ def test_resnet_50_1p(): @pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_single def test_resnet_50_8p(): @@ -294,6 +298,8 @@ def test_resnet_50_8p(): assert 0.97 <= res0 <= 1.07, f"Loss start should in [7.25, 7.35], but got {res0}" @pytest.mark.level1 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard def test_mobilenetv3_small_1p(): @@ -313,6 +319,8 @@ def test_mobilenetv3_small_1p(): @pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @test_utils.run_test_with_On @@ -333,6 +341,8 @@ def test_inception_v3_1p(): @pytest.mark.level0 +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @test_utils.run_test_with_On diff --git a/tests/st/ops/ascend/test_adam_weight_decay.py b/tests/st/ops/ascend/test_adam_weight_decay.py index 465cce3ba902f68d70643a393cb9e1e5cf8e8eba..f2977d9b6db4a99875524a18e4a970222d22066a 100644 --- a/tests/st/ops/ascend/test_adam_weight_decay.py +++ b/tests/st/ops/ascend/test_adam_weight_decay.py @@ -122,7 +122,7 @@ def test_adam_weight_decay_pass_without_same_type(): """ Feature: AdamWeightDecay op Description: test the rightness of AdamWeightDecay kernel, decay_flag is true - Expectation: the output is wrong + Expectation: the output is same """ decay_flag = True # equivalent to weight_decay is not zero weight_decay = Parameter(Tensor(np.array([0.9]).astype(np.float32)), name="weight_decay") @@ -148,3 +148,39 @@ def test_adam_weight_decay_pass_without_same_type(): fission_net = FissionNet() output2 = fission_net(param2, m2, v2, lr, beta1, beta2, eps, weight_decay, gradient) assert (output1.asnumpy() == output2[0].asnumpy()).all() + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_adam_weight_decay_pass_with_same_type_to_assign(): + """ + Feature: AdamWeightDecay op + Description: test the rightness of AdamWeightDecay kernel, decay_flag is true + Expectation: the output is same + """ + decay_flag = True # equivalent to weight_decay is not zero + weight_decay = Parameter(Tensor(np.array([0.9]).astype(np.float32)), name="weight_decay") + beta1 = Parameter(Tensor(np.array([0.9]).astype(np.float32)), name="beta1") + beta2 = Parameter(Tensor(np.array([0.999]).astype(np.float32)), name="beta2") + eps = Parameter(Tensor(np.array([1e-8]).astype(np.float32)), name="eps") + lr = Parameter(Tensor(np.array([0.001]).astype(np.float32)), name="lr") + gradient = Parameter(Tensor(np.array([[2, 3], [1, 5]]).astype(np.float16)), name="gradient") + + # The inputs: param, m and v will be modified in-place by P.AdamWeightDecay() or _update_run_op(), + # so here defines two copied of them: (param1, m1, v1) and (param2, m2, v2) + param1 = Parameter(Tensor(np.array([[1, 2], [3, 4]]).astype(np.float32)), name="param1") + m1 = Parameter(Tensor(np.array([[5, 6], [7, 8]]).astype(np.float32)), name="m1") + v1 = Parameter(Tensor(np.array([[3, 1], [7, 4]]).astype(np.float32)), name="v1") + + param2 = copy.deepcopy(param1) + m2 = copy.deepcopy(m1) + v2 = copy.deepcopy(v1) + + context.set_context(mode=context.GRAPH_MODE, device_target='Ascend') + origin_net = OriNet(decay_flag) + output1 = origin_net(param1, m1, v1, lr, beta1, beta2, eps, weight_decay, gradient) + fission_net = FissionNet() + output2 = fission_net(param2, m2, v2, lr, beta1, beta2, eps, weight_decay, gradient) + assert (output1.asnumpy() == output2[0].asnumpy()).all() diff --git a/tests/st/ops/ascend/test_addmv_op.py b/tests/st/ops/ascend/test_addmv_op.py index 9466247a651574fd52bbf5ff5c212d80cf694826..cc20929b3ef4e4a12b00e758c2147a720d6ea4f0 100644 --- a/tests/st/ops/ascend/test_addmv_op.py +++ b/tests/st/ops/ascend/test_addmv_op.py @@ -1,4 +1,4 @@ -# Copyright 2022 Huawei Technologies Co., Ltd +# Copyright 2022-2024 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -49,3 +49,20 @@ def test_addmv_forward_float32_tensor_api(): addmv_forward_tensor_api(np.float32) context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") addmv_forward_tensor_api(np.float32) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_addmv_invalid_dtypes(): + """ + Feature: test addmv invalid dtypes. + Description: test invalid dtypes inputs. + Expectation: the result match to the expect value. + """ + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + with pytest.raises(TypeError): + addmv_forward_tensor_api(np.uint16) + with pytest.raises(TypeError): + addmv_forward_tensor_api(np.int8) diff --git a/tests/st/ops/gpu/test_reshape_op.py b/tests/st/ops/gpu/test_reshape_op.py index 681145b1d9a912a7d9fabae89be8cc840da15627..a765212d71ad5dc2f1d3b637f0f2a47ddcf2368b 100644 --- a/tests/st/ops/gpu/test_reshape_op.py +++ b/tests/st/ops/gpu/test_reshape_op.py @@ -1,4 +1,4 @@ -# Copyright 2019 Huawei Technologies Co., Ltd +# Copyright 2019-2024 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ import pytest import mindspore.context as context from mindspore import Tensor from mindspore.ops import operations as P +import mindspore.nn as nn +import mindspore as ms def reshape(nptype): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") @@ -124,3 +126,29 @@ def test_reshape_uint8(): @pytest.mark.env_onecard def test_reshape_bool(): reshape_bool() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_reshape_fallback(): + """ + Feature: test Reshape in Fallback. + Description: test Reshape in Fallback. + Expectation: no exception. + """ + class Network(nn.Cell): + def __init__(self): + super().__init__() + self.fc = nn.Dense(1024, 512) + self.bn = nn.BatchNorm1d(512) + + def construct(self, x): + x = ms.ops.expand_dims(Tensor(np.max(x.asnumpy(), axis=2)), -1) + return ms.ops.reshape(x, (-1, 1024)) + + context.set_context(mode=context.GRAPH_MODE) + x = Tensor(np.ones((32, 1024, 128)), dtype=ms.float32) + net = Network() + out_shape = net(x) + assert out_shape.shape == (32, 1024) diff --git a/tests/st/ops/graph_kernel/test_dvm.py b/tests/st/ops/graph_kernel/test_dvm.py index 10f3d75433629b77e3243eef1439ad01d9ee915b..f0ef7e7d9673616a3130eb144df722d184080cd9 100644 --- a/tests/st/ops/graph_kernel/test_dvm.py +++ b/tests/st/ops/graph_kernel/test_dvm.py @@ -85,7 +85,7 @@ def fuse(shape1, shape2, dtype): np.testing.assert_allclose(expects[2], outputs[2], 0, 0) -@pytest.mark.level0 +@pytest.mark.level1 @pytest.mark.platform_arm_ascend910b_training @pytest.mark.env_onecard @pytest.mark.parametrize("shape1, shape2", [((32, 1024), (32, 1024)), ((44, 1, 47, 1), (1, 34, 1, 91))]) diff --git a/tests/st/ops/test_divide.py b/tests/st/ops/test_divide.py index 9aed6aee0debdb48f4a418e516be2fd1157b5757..a1572e082b1e698c7edee04ccf9eb50d31599968 100644 --- a/tests/st/ops/test_divide.py +++ b/tests/st/ops/test_divide.py @@ -1,91 +1,135 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ import numpy as np import pytest -import mindspore.common.dtype as mstype -import mindspore.nn as nn -from mindspore import Tensor -from mindspore import context -from mindspore import ops +import mindspore as ms +from tests.st.utils import test_utils +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP -class NetNone(nn.Cell): - def construct(self, x, other): - return ops.divide(x, other) +def generate_random_input(shape, dtype): + return np.random.randn(*shape).astype(dtype), np.random.randn(*shape).astype(dtype) -class NetFloor(nn.Cell): - def construct(self, x, other): - return ops.divide(x, other, rounding_mode="floor") +def generate_expect_forward_output(x, y, rounding_mode): + if rounding_mode == 'floor': + return np.floor_divide(x, y) + if rounding_mode == 'trunc': + return np.trunc(np.divide(x, y)) + return np.divide(x, y) -class NetTrunc(nn.Cell): - def construct(self, x, other): - return ops.divide(x, other, rounding_mode="trunc") +class NetNone(ms.nn.Cell): + def __init__(self): + super().__init__() + self.div = ms.ops.div + def construct(self, x, y): + return self.div(x, y) -@pytest.mark.level2 + +class NetFloor(ms.nn.Cell): + def __init__(self): + super().__init__() + self.div = ms.ops.div + + def construct(self, x, y): + return self.div(x, y, rounding_mode="floor") + + +class NetTrunc(ms.nn.Cell): + def __init__(self): + super().__init__() + self.div = ms.ops.div + + def construct(self, x, y): + return self.div(x, y, rounding_mode="trunc") + + +@pytest.mark.level1 +@pytest.mark.env_onecard @pytest.mark.platform_x86_cpu @pytest.mark.platform_arm_cpu @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) -def test_divide_none(mode): +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_div_vmap(mode): """ - Feature: tensor.divide() - Description: Verify the result of tensor.divide - Expectation: success + Feature: pyboost function. + Description: test function div vmap feature. + Expectation: expect correct result. """ - context.set_context(mode=mode) - net = NetNone() - x = Tensor(np.array([1.0, 5.0, 7.5]), mstype.float32) - y = Tensor(np.array([4.0, 2.0, 3.0]), mstype.float32) - output = net(x, y) - expected = np.array([0.25, 2.5, 2.5], dtype=np.float32) - assert np.allclose(output.asnumpy(), expected) + ms.context.set_context(mode=mode) + x = np.array([7, 8, 9], dtype=np.float32) + y = np.array([14, 6, 12], dtype=np.float32) + output = ms.ops.vmap(ms.ops.div, in_axes=-1, out_axes=0)(ms.Tensor(x), ms.Tensor(y)) + expect = generate_expect_forward_output(x, y, None) + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3) -@pytest.mark.level2 +@pytest.mark.level0 +@pytest.mark.env_onecard @pytest.mark.platform_x86_cpu -@pytest.mark.platform_arm_cpu @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) -def test_divide_floor(mode): +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('rounding_mode', [None, 'floor', 'trunc']) +def test_ops_div_std(mode, rounding_mode): """ - Feature: tensor.divide() - Description: Verify the result of tensor.divide - Expectation: success + Feature: pyboost function. + Description: test function div forward/backward. + Expectation: expect correct result. """ - context.set_context(mode=mode) - net = NetFloor() - x = Tensor(np.array([1.0, 5.0, 9.5]), mstype.float32) - y = Tensor(np.array([4.0, 2.0, 3.0]), mstype.float32) - output = net(x, y) - expected = np.array([0.0, 2.0, 3.0], dtype=np.float32) - assert np.allclose(output.asnumpy(), expected) + # forward test + ms.context.set_context(mode=mode) + x, y = generate_random_input((4, 5, 6), np.float32) + if rounding_mode == 'floor': + net = NetFloor() + elif rounding_mode == 'trunc': + net = NetTrunc() + else: + net = NetNone() + output = net(ms.Tensor(x, dtype=ms.float32), ms.Tensor(y, dtype=ms.float32)) + expect = generate_expect_forward_output(x, y, rounding_mode) + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3) + # backward test + x, y = np.array([1.0, 5.0, 7.5]), np.array([4.0, 2.0, 3.0]) + net = NetNone() + output = ms.ops.grad(net, (0,))(ms.Tensor(x, dtype=ms.float32), ms.Tensor(y, dtype=ms.float32)) + expect = [0.25, 0.5, 0.33333333] + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3) + +@test_utils.run_with_cell +def div_forward_dyn(x, y): + return ms.ops.div(x, y) -@pytest.mark.level2 + +@pytest.mark.level0 +@pytest.mark.env_onecard @pytest.mark.platform_x86_cpu -@pytest.mark.platform_arm_cpu @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training -@pytest.mark.env_onecard -@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) -def test_divide_trunc(mode): +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_div_dynamic_shape(mode): """ - Feature: tensor.divide() - Description: Verify the result of tensor.divide - Expectation: success + Feature: Test dynamic shape. + Description: test function div dynamic feature. + Expectation: expect correct result. """ - context.set_context(mode=mode) - net = NetTrunc() - x = Tensor(np.array([1.0, 5.0, 9.5]), mstype.float32) - y = Tensor(np.array([4.0, 2.0, 3.0]), mstype.float32) - output = net(x, y) - expected = np.array([0.0, 2.0, 3.0], dtype=np.float32) - assert np.allclose(output.asnumpy(), expected) + ms_x0, ms_y0 = ms.Tensor(np.array([[1, 2, 3, 4], [5, 6, 7, 8]]), ms.float32), ms.Tensor(np.array([[1, 2, 3, 4]]), + ms.float32) + ms_x1, ms_y1 = ms.Tensor(np.array([[1, 2, 3], [5, 6, 7]]), ms.float32), ms.Tensor(np.array([[1, 2, 3]]), ms.float32) + TEST_OP(div_forward_dyn, [[ms_x0, ms_y0], [ms_x1, ms_y1]], grad=True, mode=mode) diff --git a/tests/st/ops/test_mint_max_pool2d.py b/tests/st/ops/test_mint_max_pool2d.py new file mode 100644 index 0000000000000000000000000000000000000000..517e7b3193a2ed03095828694b964ecd1dc98bd9 --- /dev/null +++ b/tests/st/ops/test_mint_max_pool2d.py @@ -0,0 +1,171 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import pytest +import os +import numpy as np +import mindspore as ms +from mindspore import ops +from mindspore.mint.nn.functional import max_pool2d +from mindspore import dtype as mstype +from tests.st.utils import test_utils +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP + + +@test_utils.run_with_cell +def max_pool2d_forward_func(x, kernel_size, stride, padding, dilation, ceil_mode=False, return_indices=False): + return max_pool2d(x, kernel_size, stride, padding, dilation, return_indices, ceil_mode) + +@test_utils.run_with_cell +def max_pool2d_backward_func(x, kernel_size, stride, padding, dilation, ceil_mode, return_indices): + return ops.grad(max_pool2d_forward_func, (0,))(x, kernel_size, stride, padding, dilation, + ceil_mode, return_indices) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.context.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_ops_max_pool2d_forward_return_indices(mode): + """ + Feature: Pyboost function. + Description: Test function max_pool2d forward with return indices. + Expectation: Correct result. + """ + os.environ["GRAPH_OP_RUN"] = "1" + ms.context.set_context(mode=mode) + x = np.array([[[[1, 2, 3], [1, 2, 3]]]]).astype(np.float32) + kernel_size = 2 + stride = None + padding = 0 + dilation = (1, 1) + return_indices = True + ceil_mode = False + output, indices = max_pool2d_forward_func(ms.Tensor(x), kernel_size, stride, padding, + dilation, ceil_mode, return_indices) + expect_out1 = np.array([[[[2.]]]]) + expect_out2 = np.array([[[[1]]]]) + np.testing.assert_allclose(output.asnumpy(), expect_out1, rtol=1e-6) + np.testing.assert_allclose(indices.asnumpy(), expect_out2, rtol=1e-6) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.context.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_ops_max_pool2d_forward_without_return_indices(mode): + """ + Feature: Pyboost function. + Description: Test function max_pool2d forward without return indices. + Expectation: Correct result. + """ + os.environ["GRAPH_OP_RUN"] = "1" + ms.context.set_context(mode=mode) + x = np.array([[[[1, 2, 3], [1, 2, 3]]]]).astype(np.float32) + kernel_size = 2 + stride = None + padding = 0 + dilation = (1, 1) + return_indices = False + ceil_mode = False + output = max_pool2d_forward_func(ms.Tensor(x), kernel_size, stride, padding, + dilation, ceil_mode, return_indices) + expect_out = np.array([[[[2.]]]]) + np.testing.assert_allclose(output.asnumpy(), expect_out, rtol=1e-6) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.context.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_ops_max_pool2d_backward_return_indices(mode): + """ + Feature: Pyboost function. + Description: Test function max_pool2d backward with return indices. + Expectation: Correct result. + """ + os.environ["GRAPH_OP_RUN"] = "1" + ms.context.set_context(mode=mode) + x = np.array([[[[1, 2, 3], [1, 2, 3]]]]).astype(np.float32) + kernel_size = 2 + stride = None + padding = 0 + dilation = (1, 1) + return_indices = True + ceil_mode = False + output = max_pool2d_backward_func(ms.Tensor(x), kernel_size, stride, padding, dilation, + ceil_mode, return_indices) + expect = np.array([[[[0., 1., 0.], [0., 0., 0.]]]]) + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-6) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.context.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_ops_max_pool2d_backward_without_return_indices(mode): + """ + Feature: Pyboost function. + Description: Test function max_pool2d backward without return indices. + Expectation: Correct result. + """ + os.environ["GRAPH_OP_RUN"] = "1" + ms.context.set_context(mode=mode) + x = np.array([[[[1, 2, 3], [1, 2, 3]]]]).astype(np.float32) + kernel_size = 2 + stride = None + padding = 0 + dilation = (1, 1) + return_indices = False + ceil_mode = False + output = max_pool2d_backward_func(ms.Tensor(x), kernel_size, stride, padding, dilation, + ceil_mode, return_indices) + expect = np.array([[[[0., 1., 0.], [0., 0., 0.]]]]) + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-6) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_ops_max_pool2d_dynamic(mode): + """ + Feature: Pyboost function. + Description: Test function max_pool2d forward and backward with dynamic shape and rank. + Expectation: Correct result. + """ + os.environ["GRAPH_OP_RUN"] = "1" + x1 = ms.Tensor(np.arange(2 * 3 * 10 * 20).reshape((2, 3, 10, 20)), mstype.float32) + kernel_size1 = 2 + stride1 = 2 + padding1 = 0 + dilation1 = 1 + ceil_mode1 = True + + x2 = ms.Tensor(np.arange(10 * 1 * 20 * 10).reshape((10, 1, 20, 10)), mstype.float32) + kernel_size2 = 4 + stride2 = 2 + padding2 = 2 + dilation2 = 1 + ceil_mode2 = True + + TEST_OP(max_pool2d_forward_func, + [[x1, kernel_size1, stride1, padding1, dilation1, ceil_mode1], + [x2, kernel_size2, stride2, padding2, dilation2, ceil_mode2]], + mode=mode, jit_level="O0") + del os.environ["GRAPH_OP_RUN"] diff --git a/tests/st/ops/test_ops_erf.py b/tests/st/ops/test_ops_erf.py new file mode 100644 index 0000000000000000000000000000000000000000..74962bb8e4f436c4cdc384f6ca6f414de1dfcbbe --- /dev/null +++ b/tests/st/ops/test_ops_erf.py @@ -0,0 +1,151 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import pytest +import numpy as np +from scipy import special +import mindspore as ms +from mindspore import ops +from mindspore.mint import erf +from tests.st.utils import test_utils +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP + + +def generate_random_input(shape, dtype): + return np.random.randn(*shape).astype(dtype) + + +def generate_expect_forward_output(x): + return special.erf(x) + + +@test_utils.run_with_cell +def erf_forward_func(x): + return erf(x) + + +@test_utils.run_with_cell +def erf_backward_func(x): + return ops.grad(erf_forward_func, (0))(x) + + +@test_utils.run_with_cell +def erfinv_vmap_func(x): + return ops.vmap(erf_forward_func)(x) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('context_mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('dtype, tol', [(np.float16, 1.0e-3), (np.float32, 1.0e-4)]) +@pytest.mark.parametrize('shape', [(2, 3, 4, 5), (1, 256, 2048), (1, 256, 5120)]) +@test_utils.run_test_with_On +def test_ops_erf_forward(context_mode, shape, dtype, tol): + """ + Feature: pyboost function. + Description: test function erf forward. + Expectation: expect correct result. + """ + ms.context.set_context(mode=context_mode) + x = generate_random_input(shape, dtype) + output = erf_forward_func(ms.Tensor(x)) + expect = generate_expect_forward_output(x) + diff = output.asnumpy() - expect + error = np.ones(shape=expect.shape) * tol + assert np.all(np.abs(diff) < error) + + +@pytest.mark.level1 +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('context_mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_ops_erf_bf16(context_mode): + """ + Feature: pyboost function. + Description: test function erf forward(bf16). + Expectation: expect correct result. + """ + ms.context.set_context(mode=context_mode) + x_tensor = ms.Tensor([0, -1., 10.], dtype=ms.bfloat16) + output = erf_forward_func(x_tensor) + expect = np.array([0.000, -0.8427, 1.0000]) + np.testing.assert_allclose(output.float().asnumpy(), expect, rtol=5e-3, atol=5e-3) + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('context_mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('dtype, tol', [(np.float16, 1.0e-3), (np.float32, 1.0e-4)]) +@test_utils.run_test_with_On +def test_ops_erf_backward(context_mode, dtype, tol): + """ + Feature: pyboost function. + Description: test function erf backward. + Expectation: expect correct result. + """ + ms.context.set_context(mode=context_mode) + x = np.array([0.1, 0.2, 0.3, 1, 2]).astype(dtype) + output = erf_backward_func(ms.Tensor(x)) + expect = np.array([1.1171516, 1.0841347, 1.0312609, 0.4151074, 0.02066698]).astype(dtype) + np.testing.assert_allclose(output.asnumpy(), expect, rtol=tol) + + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('context_mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('dtype, tol', [(np.float16, 1.0e-3), (np.float32, 1.0e-4)]) +@pytest.mark.parametrize('shape', [(2, 3, 4, 5), (1, 256, 2048), (1, 256, 5120)]) +@test_utils.run_test_with_On +def test_ops_erfinv_vmap(context_mode, shape, dtype, tol): + """ + Feature: pyboost function. + Description: test function erfinv vmap feature. + Expectation: expect correct result. + """ + ms.context.set_context(mode=context_mode) + x = generate_random_input(shape, dtype) + output = erfinv_vmap_func(ms.Tensor(x)) + expect = generate_expect_forward_output(x) + diff = output.asnumpy() - expect + error = np.ones(shape=expect.shape) * tol + assert np.all(np.abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('context_mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_erf_dynamic_shape(context_mode): + """ + Feature: Test dynamic shape. + Description: test function erf dynamic feature. + Expectation: expect correct result. + """ + ms_data1 = generate_random_input((2, 3, 4, 5), np.float32) + ms_data2 = generate_random_input((3, 4, 5, 6), np.float32) + TEST_OP(erf_forward_func + , [[ms.Tensor(ms_data1)], [ms.Tensor(ms_data2)]], grad=True, mode=context_mode) diff --git a/tests/st/ops/test_ops_fft2.py b/tests/st/ops/test_ops_fft2.py index 2e1a4dbf9463823066ca7d05e7c08ecc3ff9ae61..aad1a1b8cb75abe4e3cf6ed6ec3e7980cb58590f 100644 --- a/tests/st/ops/test_ops_fft2.py +++ b/tests/st/ops/test_ops_fft2.py @@ -91,8 +91,6 @@ def test_ops_fft2_backward(mode): dim = (0,) x = generate_random_input((2, 3, 4, 5), np.float32) dout = generate_random_input((2, 3, 4, 5), np.complex64) - x = np.arange(1, 17).reshape(2, 8) - dout = np.ones_like(x).astype(np.complex64) net = FFT2Net() grad_net = FFT2GradNet(net, ms.Tensor(dout)) grad_net.set_train() diff --git a/tests/st/ops/test_ops_fftn.py b/tests/st/ops/test_ops_fftn.py index 540b71fe7af924b966fa5879655bff7d3f51a40b..442a3b03e93af9d64261340ef63d38135ebab012 100644 --- a/tests/st/ops/test_ops_fftn.py +++ b/tests/st/ops/test_ops_fftn.py @@ -91,8 +91,6 @@ def test_ops_fftn_backward(mode): dim = (0,) x = generate_random_input((2, 3, 4, 5), np.float32) dout = generate_random_input((2, 3, 4, 5), np.complex64) - x = np.arange(1, 17).reshape(2, 8) - dout = np.ones_like(x).astype(np.complex64) net = FFTNNet() grad_net = FFTNGradNet(net, ms.Tensor(dout)) grad_net.set_train() diff --git a/tests/st/ops/test_ops_ifft2.py b/tests/st/ops/test_ops_ifft2.py index 645f610ccd1d32dae0d7d68ca313173d5a50e79c..8d145f31489801acce6009d08e72f2bc493b954e 100644 --- a/tests/st/ops/test_ops_ifft2.py +++ b/tests/st/ops/test_ops_ifft2.py @@ -91,8 +91,6 @@ def test_ops_ifft2_backward(mode): dim = (0,) x = generate_random_input((2, 3, 4, 5), np.float32) dout = generate_random_input((2, 3, 4, 5), np.complex64) - x = np.arange(1, 17).reshape(2, 8) - dout = np.ones_like(x).astype(np.complex64) net = IFFT2Net() grad_net = IFFT2GradNet(net, ms.Tensor(dout)) grad_net.set_train() diff --git a/tests/st/ops/test_ops_ifftn.py b/tests/st/ops/test_ops_ifftn.py index 0c70a5841754be20c99b89f0e389d0161ce052a9..e10ad74f7ba22fbaacaaad105870c1b83b8c40d8 100644 --- a/tests/st/ops/test_ops_ifftn.py +++ b/tests/st/ops/test_ops_ifftn.py @@ -91,8 +91,6 @@ def test_ops_ifftn_backward(mode): dim = (0,) x = generate_random_input((2, 3, 4, 5), np.float32) dout = generate_random_input((2, 3, 4, 5), np.complex64) - x = np.arange(1, 17).reshape(2, 8) - dout = np.ones_like(x).astype(np.complex64) net = IFFTNNet() grad_net = IFFTNGradNet(net, ms.Tensor(dout)) grad_net.set_train() diff --git a/tests/st/ops/test_ops_irfft.py b/tests/st/ops/test_ops_irfft.py index db4aa6bb44b66d8d728aafed3871b5bf43561c52..f498d745d77f769b8236e8bc042c0b27c90048dd 100644 --- a/tests/st/ops/test_ops_irfft.py +++ b/tests/st/ops/test_ops_irfft.py @@ -28,9 +28,9 @@ class IRFFTNet(nn.Cell): return self.irfft(x, n, dim) -class RFFTGradNet(nn.Cell): +class IRFFTGradNet(nn.Cell): def __init__(self, net, dout): - super(RFFTGradNet, self).__init__() + super(IRFFTGradNet, self).__init__() self.net = net self.dout = dout self.grad = ops.GradOperation(sens_param=True) @@ -100,7 +100,7 @@ def test_ops_irfft_backward(mode): x = generate_random_input((2, 3), np.float32) dout = np.ones((2, 3)).astype(np.float32) net = IRFFTNet() - grad_net = RFFTGradNet(net, ms.Tensor(dout)) + grad_net = IRFFTGradNet(net, ms.Tensor(dout)) grad_net.set_train() grad = grad_net(ms.Tensor(x), n, dim) expect = generate_expect_backward_output_2_3(dout, n, dim) @@ -197,7 +197,7 @@ def test_ops_irfft_backward_dynamic_shape(mode): x1 = generate_random_input((2, 3), np.float32) dout1 = np.ones((2, 3)).astype(np.float32) - grad_net = RFFTGradNet(net, ms.Tensor(dout1)) + grad_net = IRFFTGradNet(net, ms.Tensor(dout1)) grad_net.set_train() grad_net.set_inputs(x_dyn, n_dyn, dim_dyn) output = grad_net(ms.Tensor(x1), n_dyn, dim_dyn) @@ -206,7 +206,7 @@ def test_ops_irfft_backward_dynamic_shape(mode): x2 = generate_random_input((2, 4), np.float32) dout2 = np.ones((2, 4)).astype(np.float32) - grad_net = RFFTGradNet(net, ms.Tensor(dout2)) + grad_net = IRFFTGradNet(net, ms.Tensor(dout2)) grad_net.set_train() grad_net.set_inputs(x_dyn, n_dyn, dim_dyn) output = grad_net(ms.Tensor(x2), n_dyn, dim_dyn) @@ -237,7 +237,7 @@ def test_ops_irfft_backward_dynamic_rank(mode): x1 = generate_random_input((2, 3), np.float32) dout1 = np.ones((2, 3)).astype(np.float32) - grad_net = RFFTGradNet(net, ms.Tensor(dout1)) + grad_net = IRFFTGradNet(net, ms.Tensor(dout1)) grad_net.set_train() grad_net.set_inputs(x_dyn, n_dyn, dim_dyn) output = grad_net(ms.Tensor(x1), n_dyn, dim_dyn) @@ -246,7 +246,7 @@ def test_ops_irfft_backward_dynamic_rank(mode): x2 = generate_random_input((2, 4), np.float32) dout2 = np.ones((2, 4)).astype(np.float32) - grad_net = RFFTGradNet(net, ms.Tensor(dout2)) + grad_net = IRFFTGradNet(net, ms.Tensor(dout2)) grad_net.set_train() grad_net.set_inputs(x_dyn, n_dyn, dim_dyn) output = grad_net(ms.Tensor(x2), n_dyn, dim_dyn) diff --git a/tests/st/ops/test_ops_lin_space_ext.py b/tests/st/ops/test_ops_lin_space_ext.py new file mode 100644 index 0000000000000000000000000000000000000000..ee26ca6e9b853ef0607fdd2b91828a639b7039ac --- /dev/null +++ b/tests/st/ops/test_ops_lin_space_ext.py @@ -0,0 +1,156 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest +from tests.st.utils import test_utils +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP +from mindspore import ops, Tensor +from mindspore.ops.function.math_func import linspace_ext +import mindspore as ms +from mindspore.common import mutable +import os + +def generate_random_input(shape, dtype): + return np.random.randn(*shape).astype(dtype) + +@test_utils.run_with_cell +def lin_space_ext_forward_func(start, end, steps, dtype=None): + return linspace_ext(start, end, steps, dtype=dtype) + +@test_utils.run_with_cell +def lin_space_ext_backward_func(start, end, steps, dtype=None): + return ops.grad(lin_space_ext_forward_func, (0, 1))(start, end, steps, dtype) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('dtype', [ms.float32]) +def test_lin_space_ext_normal(mode, dtype): + """ + Feature: Ops. + Description: test op LinSpaceExt forward and backward. + Expectation: expect correct result. + """ + ms.context.set_context(mode=mode) + os.environ["GRAPH_OP_RUN"] = '1' + ## forward + start_scalar, end_scalar, steps_scalar = 5, 25, 5 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + output1 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect1 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output1.asnumpy(), expect1) + output2 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect2 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output2.asnumpy(), expect2) + + start_scalar, end_scalar, steps_scalar = 1.0, 25.0, 20 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + dtype = ms.float32 + output3 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect3 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output3.asnumpy(), expect3) + output4 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect4 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output4.asnumpy(), expect4) + + start_scalar, end_scalar, steps_scalar = 5.0, 250, 14 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + dtype = ms.float32 + output5 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect5 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output5.asnumpy(), expect5) + output6 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect6 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output6.asnumpy(), expect6) + + ## backward + start, end, steps = -115, 251, 101 + dtype = ms.float32 + grads = lin_space_ext_backward_func(ms.Tensor(start, ms.float32), ms.Tensor(end, ms.float32), steps, dtype) + grads_ = [out.asnumpy() for out in grads] + expect = [0, 0] + assert np.allclose(grads_, expect) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend910b_training +@pytest.mark.parametrize('mode', [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +@pytest.mark.parametrize('dtype', [ms.bfloat16]) +def test_lin_space_ext_bfloat16(mode, dtype): + """ + Feature: Ops. + Description: test op LinSpaceExt. + Expectation: expect correct result. + """ + ms.context.set_context(mode=mode) + os.environ["GRAPH_OP_RUN"] = '1' + + start_scalar, end_scalar, steps_scalar = 5, 25, 5 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + output1 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect1 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output1.float().asnumpy(), expect1) + output2 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect2 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output2.float().asnumpy(), expect2) + + start_scalar, end_scalar, steps_scalar = 1.0, 25.0, 20 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + dtype = ms.float32 + output3 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect3 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output3.float().asnumpy(), expect3) + output4 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect4 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output4.float().asnumpy(), expect4) + + start_scalar, end_scalar, steps_scalar = 5.0, 250, 14 + start_tensor, end_tensor, steps_tensor = ms.Tensor(start_scalar), ms.Tensor(end_scalar), ms.Tensor(steps_scalar) + dtype = ms.float32 + output5 = lin_space_ext_forward_func(start_scalar, end_scalar, steps_scalar, dtype) + expect5 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output5.float().asnumpy(), expect5) + output6 = lin_space_ext_forward_func(start_tensor, end_tensor, steps_tensor, dtype) + expect6 = np.linspace(start_scalar, end_scalar, steps_scalar, axis=-1) + assert np.allclose(output6.float().asnumpy(), expect6) + del os.environ["GRAPH_OP_RUN"] + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize('mode', [ms.context.GRAPH_MODE, ms.context.PYNATIVE_MODE]) +def test_lin_space_ext_dynamic(mode): + """ + Feature: test dynamic by TEST_OP. + Description: test op concat. + Expectation: expect tile result. + """ + os.environ["GRAPH_OP_RUN"] = '1' + input_case1 = (Tensor([5]), Tensor([23]), Tensor([5])) + input_case2 = (Tensor([-4]), Tensor([40]), Tensor([6])) + TEST_OP(lin_space_ext_forward_func, [[*input_case1], [*input_case2]], nontensor_dynamic_type='None', + mode=mode, grad=True) + + input_case3 = (5, 50.23, mutable(5), ms.int32) + input_case4 = (-5, 43.97, mutable(13), ms.float32) + TEST_OP(lin_space_ext_forward_func, [[*input_case3], [*input_case4]], nontensor_dynamic_type='None', + mode=mode, grad=True, test_resize=False) + del os.environ["GRAPH_OP_RUN"] diff --git a/tests/st/ops/test_ops_narrow.py b/tests/st/ops/test_ops_narrow.py new file mode 100644 index 0000000000000000000000000000000000000000..73f39034ce1c7974ecea6c273a503485a37104ef --- /dev/null +++ b/tests/st/ops/test_ops_narrow.py @@ -0,0 +1,96 @@ +# Copyright 2024 Huawei Technonarrowies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import pytest +import os +import numpy as np +import mindspore as ms +from mindspore import ops +from mindspore.ops.extend import narrow +import tests.st.utils.test_utils as test_utils +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP + + +def generate_random_input(shape, dtype): + return np.random.randn(*shape).astype(dtype) + + +def generate_expect_forward_output(x, dim, start, length): + condition = np.zeros(x.shape[dim]) + if start < 0: + start += x.shape[dim] + condition[start:start+length] = 1 + return np.compress(condition, x, axis=dim) + + +@test_utils.run_with_cell +def narrow_forward_func(x, dim, start, length): + return narrow(x, dim, start, length) + + +@test_utils.run_with_cell +def narrow_backward_func(x, dim, start, length): + return ops.grad(narrow_forward_func, (0))(x, dim, start, length) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_ops_narrow_forward(context_mode): + """ + Feature: pyboost function. + Description: test function narrow forward. + Expectation: expect correct result. + """ + os.environ['GRAPH_OP_RUN'] = '1' + ms.context.set_context(mode=context_mode) + x = generate_random_input((2, 3, 4, 5), np.float32) + dim = 2 + start = 1 + length = 2 + output = narrow_forward_func(ms.Tensor(x), dim, start, length) + expect_out = generate_expect_forward_output(x, dim, start, length) + np.testing.assert_allclose(output.asnumpy(), expect_out, rtol=1e-3) + + output = narrow_backward_func(ms.Tensor(x), dim, start, length) + expect = np.zeros_like(x) + expect[:, :, start:start+length, :] = 1 + np.testing.assert_allclose(output.asnumpy(), expect, rtol=1e-3) + del os.environ['GRAPH_OP_RUN'] + + +@pytest.mark.level1 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.parametrize("context_mode", [ms.GRAPH_MODE, ms.PYNATIVE_MODE]) +def test_ops_narrow_backward_dynamic_shape(context_mode): + """ + Feature: pyboost function. + Description: test function narrow backward with dynamic shape. + Expectation: expect correct result. + """ + os.environ['GRAPH_OP_RUN'] = '1' + x1 = generate_random_input((2, 3, 4, 5), np.float32) + dim1 = 1 + start1 = 0 + length1 = 2 + + dim2 = 3 + start2 = 0 + length2 = 2 + + TEST_OP(narrow_forward_func, [[ms.Tensor(x1), dim1, start1, length1], [ms.Tensor(x1), dim2, start2, length2]], + grad=True, mode=context_mode) + del os.environ['GRAPH_OP_RUN'] diff --git a/tests/st/ops/test_ops_relu.py b/tests/st/ops/test_ops_relu.py index 68b6e924aac144451c39fb5399580f0e00da6dfe..c608d4ae4a35eb6ae72f2d3a37b2e77b22fd14ae 100644 --- a/tests/st/ops/test_ops_relu.py +++ b/tests/st/ops/test_ops_relu.py @@ -231,4 +231,4 @@ def test_relu_bfloat16(mode): x = Tensor(np_array, ms.bfloat16) output = relu_forward_func(x) expect = generate_expect_forward_output(np_array, np.float32) - assert np.allclose(output.float().asnumpy(), expect) + assert np.allclose(output.float().asnumpy(), expect, rtol=2e-3, atol=2e-3) diff --git a/tests/st/ops/test_ops_select.py b/tests/st/ops/test_ops_select.py new file mode 100644 index 0000000000000000000000000000000000000000..d04b5e6551000f06731b7d958ba3130166767732 --- /dev/null +++ b/tests/st/ops/test_ops_select.py @@ -0,0 +1,310 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +"""test select""" +import numpy as np +import pytest +import os +import mindspore.common.dtype as mstype + +from mindspore.ops import select +from mindspore import ops, Tensor, jit, JitConfig, context +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP +from tests.st.utils import test_utils + + +def generate_random_input(shape, dtype): + return Tensor(np.random.randn(*shape).astype(dtype)) + + +def generate_expect_forward_output(condition, x, y): + return np.where(condition, x, y) + + +def generate_expect_backward_output(condition): + return np.zeros(np.shape(condition), dtype=np.bool_),\ + np.where(condition, 1, 0), np.where(condition, 0, 1) + + +@test_utils.run_with_cell +def select_forward_func(condition, x, y): + return select(condition, x, y) + + +@test_utils.run_with_cell +def select_backward_func(condition, x, y): + return ops.grad(select_forward_func, (0, 1, 2))(condition, x, y) + + +@test_utils.run_with_cell +def select_vmap_func(condition, x, y, in_axes=0): + return ops.vmap(select_forward_func, in_axes, out_axes=0)(condition, x, y) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) +def test_select_float32(mode): + """ + Feature: Test functional select operator. Support x or y is a float32 Tensor. + Description: Operator select's inputs `x` and `y` are Tensor with float32 type. + Expectation: Assert result. + """ + context.set_context(mode=mode) + cond = np.array([[True, False], [True, False]]).astype(np.bool) + x = np.array([[1.2, 1], [1, 0]]).astype(np.float32) + y = np.array([[1, 2], [3, 4.0]]).astype(np.float32) + output = select_forward_func(Tensor(cond), Tensor(x), Tensor(y)) + print(output.asnumpy()) + expect = [[1.2, 2], [1, 4.0]] + error = np.ones(shape=[2, 2]) * 1.0e-6 + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) +def test_select_float16(mode): + """ + Feature: Test functional select operator. Support x or y is a float16 Tensor. + Description: Operator select's inputs `x` and `y` are Tensor with float16 type. + Expectation: Assert result. + """ + context.set_context(mode=mode) + cond = np.array([[True, False], [True, False]]).astype(np.bool) + x = np.array([[1.2, 1], [1, 0]]).astype(np.float16) + y = np.array([[1, 2], [3, 4.0]]).astype(np.float16) + output = select_forward_func(Tensor(cond), Tensor(x), Tensor(y)) + print(output.asnumpy()) + expect = [[1.2, 2], [1, 4.0]] + error = np.ones(shape=[2, 2]) * 1.0e-3 + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) +def test_select_int32(mode): + """ + Feature: Test functional select operator. Support x or y is a int32 Tensor. + Description: Operator select's inputs `x` and `y` are Tensor with int32 type. + Expectation: Assert result. + """ + context.set_context(mode=mode) + cond = np.array([[True, False], [True, False]]).astype(np.bool) + x = np.array([[12, 1], [1, 0]]).astype(np.int32) + y = np.array([[1, 2], [3, 4]]).astype(np.int32) + output = select_forward_func(Tensor(cond), Tensor(x), Tensor(y)) + print(output.asnumpy()) + expect = [[12, 2], [1, 4]] + error = np.ones(shape=[2, 2]) * 1.0e-6 + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) +def test_functional_select_scalar(mode): + """ + Feature: Test functional select operator. Support x or y is a int/float. + Description: Operator select's input `x` is a Tensor with int32 type, input `y` is a int. + Expectation: Assert result. + """ + context.set_context(mode=mode) + cond = np.array([[True, False], [True, False]]).astype(np.bool) + x = np.array([[12, 1], [1, 0]]).astype(np.int32) + y = 2 + output = select_forward_func(Tensor(cond), Tensor(x), y) + print(output.asnumpy()) + expect = [[12, 2], [1, 2]] + error = np.ones(shape=[2, 2]) * 1.0e-6 + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_cpu +@pytest.mark.platform_x86_gpu_training +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +@pytest.mark.parametrize('mode', [context.GRAPH_MODE, context.PYNATIVE_MODE]) +def test_functional_select_broadcast(mode): + """ + Feature: Test functional select operator support broadcast input. + Description: Operator select's support broadcast input. + Expectation: Assert result. + """ + context.set_context(mode=mode) + cond = Tensor(np.random.rand(1, 65, 54, 12, 5, 2), dtype=mstype.bool_) + x = Tensor(np.random.rand(5, 5, 65, 1, 12, 5, 2).astype(np.float32)) + y = Tensor(np.random.rand(65, 54, 1, 5, 2).astype(np.float32)) + ret = select_forward_func(cond, x, y) + assert ret.shape == (5, 5, 65, 54, 12, 5, 2) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize('mode', ['pynative', 'KBK', 'GE']) +def test_select_ext_static_shape(mode): + """ + Feature: Test select with static shape in graph and pynative mode. + Description: call ops.select with valid input and index. + Expectation: return the correct value. + """ + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + if mode == 'pynative': + ms_out = select_forward_func(cond, x, y) + elif mode == 'KBK': + ms_out = (jit(select_forward_func, jit_config=JitConfig(jit_level="O0")))(cond, x, y) + else: + ms_out = (jit(select_forward_func, jit_config=JitConfig(jit_level="O2")))(cond, x, y) + + expect = generate_expect_forward_output(cond.asnumpy(), x.asnumpy(), y.asnumpy()) + assert np.allclose(ms_out.asnumpy(), expect, rtol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.parametrize('jit_level', ["O0", "O2"]) +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +def test_select_ext_dynamic_shape(jit_level): + """ + Feature: Test select with dynamic shape in graph mode. + Description: call ops.select with valid input and index. + Expectation: return the correct value. + """ + x1 = generate_random_input((2, 3, 4, 5), np.float32) + y1 = generate_random_input((2, 3, 4, 5), np.float32) + cond1 = x1 > 0 + + x2 = generate_random_input((6, 7, 8), np.float32) + y2 = generate_random_input((6, 7, 8), np.float32) + cond2 = x2 > 0 + TEST_OP(select_forward_func, [[cond1, x1, y1], [cond2, x2, y2]], grad=True, jit_level=jit_level) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize('graph_level', ["0", "1"]) +def test_select_vmap(graph_level): + """ + Feature: Test select with vmap. + Description: call ops.select with valid input and index. + Expectation: return the correct value. + """ + def _foreach_run(condition, x, y, batch): + out = [] + for i in range(condition.shape[batch]): + if batch == -1: + cond_inner = condition[..., i] + x_inner = x[..., i] + y_inner = y[..., i] + else: + cond_inner = condition[i, ...] + x_inner = x[i, ...] + y_inner = y[i, ...] + out.append(select_forward_func(cond_inner, x_inner, y_inner)) + out = ops.Stack()(out) + return out + + os.environ['GRAPH_OP_RUN'] = graph_level + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + batch_axis = -1 + output = select_vmap_func(cond, x, y, batch_axis) + expect = _foreach_run(cond, x, y, batch_axis) + assert np.allclose(output.asnumpy(), expect.asnumpy(), rtol=1e-4) + + batch_axis = 0 + output = select_vmap_func(cond, x, y, batch_axis) + expect = _foreach_run(cond, x, y, batch_axis) + assert np.allclose(output.asnumpy(), expect.asnumpy(), rtol=1e-4) + + del os.environ['GRAPH_OP_RUN'] + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize("mode", ['pynative', 'GE', 'KBK']) +def test_select_ext_grad(mode): + """ + Feature: Test select with backward. + Description: call ops.select with valid input and index. + Expectation: return the correct value. + """ + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + if mode == 'pynative': + ms_cond, ms_x, ms_y = select_backward_func(cond, x, y) + elif mode == 'KBK': + ms_cond, ms_x, ms_y = (jit(select_backward_func, jit_config=JitConfig(jit_level="O0")))(cond, x, y) + else: + ms_cond, ms_x, ms_y = (jit(select_backward_func, jit_config=JitConfig(jit_level="O2")))(cond, x, y) + expect_cond, expect_x, expect_y = generate_expect_backward_output(cond.asnumpy()) + assert np.allclose(ms_cond.asnumpy(), expect_cond, rtol=1e-4) + assert np.allclose(ms_x.asnumpy(), expect_x, rtol=1e-4) + assert np.allclose(ms_y.asnumpy(), expect_y, rtol=1e-4) diff --git a/tests/st/ops/test_ops_where.py b/tests/st/ops/test_ops_where.py index 5d60033b1c92c5e26d9b558d758b818163ea874b..706114330a3c14540a86941fa23cd0909ca27d66 100644 --- a/tests/st/ops/test_ops_where.py +++ b/tests/st/ops/test_ops_where.py @@ -1,15 +1,57 @@ +# Copyright 2024 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +"""test where""" import numpy as np import pytest +import os import mindspore.common.dtype as mstype -import mindspore.nn as nn -import mindspore.ops as ops -from mindspore import Tensor -from mindspore import context + +from mindspore.ops import where +from mindspore import ops, Tensor, jit, JitConfig, context +from tests.st.ops.dynamic_shape.test_op_utils import TEST_OP +from tests.st.utils import test_utils + + +def generate_random_input(shape, dtype): + return Tensor(np.random.randn(*shape).astype(dtype)) + + +def generate_expect_forward_output(condition, x, y): + return np.where(condition, x, y) -class Net(nn.Cell): - def construct(self, condition, x, y): - return ops.where(condition, x, y) +def generate_expect_backward_output(condition): + return np.zeros(np.shape(condition), dtype=np.bool_),\ + np.where(condition, 1, 0), np.where(condition, 0, 1) + + +@test_utils.run_with_cell +def where_forward_func(condition, x, y): + return where(condition, x, y) + + +@test_utils.run_with_cell +def where_backward_func(condition, x, y): + return ops.grad(where_forward_func, (0, 1, 2))(condition, x, y) + + +@test_utils.run_with_cell +def where_vmap_func(condition, x, y, in_axes=0): + return ops.vmap(where_forward_func, in_axes, out_axes=0)(condition, x, y) @pytest.mark.level2 @@ -27,10 +69,135 @@ def test_ops_where(mode): Expectation: success """ context.set_context(mode=mode) - net = Net() x = Tensor(np.arange(4).reshape((2, 2)), mstype.float32) y = Tensor(np.ones((2, 2)), mstype.float32) condition = x < 3 - output = net(condition, x, y) + output = where_forward_func(condition, x, y) expected = np.array([[0, 1], [2, 1]], dtype=np.float32) assert np.allclose(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize('mode', ['pynative', 'KBK', 'GE']) +def test_where_ext_static_shape(mode): + """ + Feature: Test where with static shape in graph and pynative mode. + Description: call ops.where with valid input and index. + Expectation: return the correct value. + """ + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + if mode == 'pynative': + ms_out = where_forward_func(cond, x, y) + elif mode == 'KBK': + ms_out = (jit(where_forward_func, jit_config=JitConfig(jit_level="O0")))(cond, x, y) + else: + ms_out = (jit(where_forward_func, jit_config=JitConfig(jit_level="O2")))(cond, x, y) + + expect = generate_expect_forward_output(cond.asnumpy(), x.asnumpy(), y.asnumpy()) + assert np.allclose(ms_out.asnumpy(), expect, rtol=1e-4) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.parametrize('jit_level', ["O0", "O2"]) +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +def test_where_ext_dynamic_shape(jit_level): + """ + Feature: Test where with dynamic shape in graph mode. + Description: call ops.where with valid input and index. + Expectation: return the correct value. + """ + x1 = generate_random_input((2, 3, 4, 5), np.float32) + y1 = generate_random_input((2, 3, 4, 5), np.float32) + cond1 = x1 > 0 + + x2 = generate_random_input((6, 7, 8), np.float32) + y2 = generate_random_input((6, 7, 8), np.float32) + cond2 = x2 > 0 + TEST_OP(where_forward_func, [[cond1, x1, y1], [cond2, x2, y2]], grad=True, jit_level=jit_level) + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize('graph_level', ["0", "1"]) +def test_where_vmap(graph_level): + """ + Feature: Test where with vmap. + Description: call ops.where with valid input and index. + Expectation: return the correct value. + """ + def _foreach_run(condition, x, y, batch): + out = [] + for i in range(condition.shape[batch]): + if batch == -1: + cond_inner = condition[..., i] + x_inner = x[..., i] + y_inner = y[..., i] + else: + cond_inner = condition[i, ...] + x_inner = x[i, ...] + y_inner = y[i, ...] + out.append(where_forward_func(cond_inner, x_inner, y_inner)) + out = ops.Stack()(out) + return out + + os.environ['GRAPH_OP_RUN'] = graph_level + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + batch_axis = -1 + output = where_vmap_func(cond, x, y, batch_axis) + expect = _foreach_run(cond, x, y, batch_axis) + assert np.allclose(output.asnumpy(), expect.asnumpy(), rtol=1e-4) + + batch_axis = 0 + output = where_vmap_func(cond, x, y, batch_axis) + expect = _foreach_run(cond, x, y, batch_axis) + assert np.allclose(output.asnumpy(), expect.asnumpy(), rtol=1e-4) + + del os.environ['GRAPH_OP_RUN'] + + +@pytest.mark.level0 +@pytest.mark.env_onecard +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_cpu_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.parametrize("mode", ['pynative', 'GE', 'KBK']) +def test_where_ext_grad(mode): + """ + Feature: Test where with backward. + Description: call ops.where with valid input and index. + Expectation: return the correct value. + """ + x = generate_random_input((2, 3, 4, 5), np.float32) + y = generate_random_input((2, 3, 4, 5), np.float32) + cond = x > 0 + + if mode == 'pynative': + ms_cond, ms_x, ms_y = where_backward_func(cond, x, y) + elif mode == 'KBK': + ms_cond, ms_x, ms_y = (jit(where_backward_func, jit_config=JitConfig(jit_level="O0")))(cond, x, y) + else: + ms_cond, ms_x, ms_y = (jit(where_backward_func, jit_config=JitConfig(jit_level="O2")))(cond, x, y) + expect_cond, expect_x, expect_y = generate_expect_backward_output(cond.asnumpy()) + assert np.allclose(ms_cond.asnumpy(), expect_cond, rtol=1e-4) + assert np.allclose(ms_x.asnumpy(), expect_x, rtol=1e-4) + assert np.allclose(ms_y.asnumpy(), expect_y, rtol=1e-4) diff --git a/tests/st/optimizer_ex/test_asgd_cmp.py b/tests/st/optimizer_ex/test_asgd_cmp.py index 3315b535dfe37e9d5184f9f97febeb489c962a89..0fd89fd9634742c877141309e7067a77f63c79a4 100644 --- a/tests/st/optimizer_ex/test_asgd_cmp.py +++ b/tests/st/optimizer_ex/test_asgd_cmp.py @@ -168,7 +168,7 @@ def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): assert np.array(data_expected).shape == np.array(data_me).shape -@pytest.mark.level0 +@pytest.mark.level1 @pytest.mark.platform_x86_gpu_training @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training diff --git a/tests/st/pi_jit/control_flow/ctrl_factory.py b/tests/st/pi_jit/control_flow/ctrl_factory.py new file mode 100644 index 0000000000000000000000000000000000000000..9a918bf2eb2d7e08d482f58e6aa626f9baca0760 --- /dev/null +++ b/tests/st/pi_jit/control_flow/ctrl_factory.py @@ -0,0 +1,44 @@ +from mindspore import jit, context +from mindspore.common import dtype +from mindspore.common import Tensor +from mindspore.nn import ForwardValueAndGrad +from ..share.utils import allclose_nparray + + +class CtrlFactory(): + def __init__(self, *inputs): + super().__init__() + self.ms_input = [Tensor(x, dtype.float32) for x in inputs] + + self.count = 0 + self.dyn = [] + for x in self.ms_input: + xshp = x.shape + if xshp: + dshp = [None for _ in x.shape] + dynt = Tensor(shape=dshp, dtype=x.dtype) + self.dyn.append(dynt) + else: + self.dyn.append(x) + + def compare(self, ps_net, pi_net, dyn=False): + self.count += 1 + if self.count == 2: + for x in self.tc_input: + if x.grad is not None: + x.grad.data.zero_() + if dyn: + ps_net.set_inputs(*self.dyn) + pi_net.set_inputs(*self.dyn) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ps_net.construct, mode="PSJit") + grad_net = ForwardValueAndGrad(ps_net, get_all=True) + ps_out, ps_grad = grad_net(*self.ms_input) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=pi_net.construct, mode="PIJit") + grad_net = ForwardValueAndGrad(pi_net, get_all=True) + pi_out, pi_grad = grad_net(*self.ms_input) + + allclose_nparray(pi_out.asnumpy(), ps_out.asnumpy(), 0.001, 0.001) + for s, i in zip(ps_grad, pi_grad): + allclose_nparray(s.asnumpy(), i.asnumpy(), 0.001, 0.001) diff --git a/tests/st/pi_jit/control_flow/test_break_continue.py b/tests/st/pi_jit/control_flow/test_break_continue.py index 298de5e0f9eb9a5ea401b69b292e1a1b31796467..38dd35c58b4b045f9765d8d08a6e3c181fd8c3a8 100644 --- a/tests/st/pi_jit/control_flow/test_break_continue.py +++ b/tests/st/pi_jit/control_flow/test_break_continue.py @@ -1,11 +1,13 @@ import numpy as np -from mindspore.common import dtype as mstype +from mindspore.nn import Cell +from mindspore.common import dtype as ms from mindspore import nn from mindspore import Tensor from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore import context, jit from mindspore.common.parameter import Parameter +from ..share.utils import match_array import pytest grad_all = C.GradOperation(get_all=True) @@ -20,6 +22,7 @@ class Grad(nn.Cell): grads = self.grad(self.forward_net)(*inputs) return grads + @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard @@ -34,7 +37,7 @@ def test_while_true_break(): super(WhileTrueBreakNet, self).__init__() self.add = P.Add() self.mul = P.Mul() - self.para = Parameter(Tensor(t, mstype.int32), name="a") + self.para = Parameter(Tensor(t, ms.int32), name="a") @jit(mode="PIJit") def construct(self, x, y): @@ -50,10 +53,652 @@ def test_while_true_break(): context.set_context(mode=context.PYNATIVE_MODE) t = np.array([1]).astype(np.int32) - y = Tensor([1], mstype.int32) - x = Tensor([5], mstype.int32) + y = Tensor([1], ms.int32) + x = Tensor([5], ms.int32) net = WhileTrueBreakNet(t) grad_net = Grad(net) grad_out = grad_net(x, y) - expect = (Tensor([0], mstype.int32), Tensor([1], mstype.int32)) + expect = (Tensor([0], ms.int32), Tensor([1], ms.int32)) assert expect == grad_out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_concatenation_10_layer(): + """ + TEST_SUMMARY: + Description: create a net, with ten serial while loop + Expectation: result match + """ + class Net2(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(10): + while x < y: + out = self.add(out, out) + x = x + 1 + x = x - 2 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([4], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net2.construct, mode="PSJit") + ps_net = Net2() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net2.construct, mode="PIJit") + pi_net = Net2() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_break(): + """ + TEST_SUMMARY: + Description: create a net, with ten serial while loop + Expectation: result match + """ + class Net3(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + out = self.add(z, z) + x = x + 1 + if x == y: + break + return out + x = Tensor([2], ms.float32) + y = Tensor([4], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net3.construct, mode="PSJit") + ps_net = Net3() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net3.construct, mode="PIJit") + pi_net = Net3() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_nested_break(): + """ + TEST_SUMMARY: + Description: create a net, with break in while in while + Expectation: result match + """ + class Net4(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + while x + 1 < y: + out = self.add(z, z) + x = x + 1 + if x == y - 1: + break + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([8], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net4.construct, mode="PSJit") + ps_net = Net4() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net4.construct, mode="PIJit") + pi_net = Net4() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_alone(): + """ + TEST_SUMMARY: + Description: create a net, with while independent of output + Expectation: result match + """ + class Net5(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + a = z + while x < y: + a = self.add(a, a) + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([4], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net5.construct, mode="PSJit") + ps_net = Net5() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net5.construct, mode="PIJit") + pi_net = Net5() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_if_single_break_in_true(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(True) in while + Expectation: result match + """ + class Net6(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + x = x + 1 + if x == y: + out = self.add(out, out) + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([4], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net6.construct, mode="PSJit") + ps_net = Net6() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net6.construct, mode="PIJit") + pi_net = Net6() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_if_single_break_in_false(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(True) in while + Expectation: result match + """ + class Net7(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + x = x + 1 + if x < y: + pass + else: + out = self.add(out, out) + if 2 * x == y: + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([4], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net7.construct, mode="PSJit") + ps_net = Net7() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net7.construct, mode="PIJit") + pi_net = Net7() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_multi_if_break_nested_if_001(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(True) in while + Expectation: result match + """ + class Net8(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + x = x + 1 + if x < y: + if x + 2 < y: + x = x + 2 + break + else: + pass + if y > 2 * x: + if y > 2 * x + 1: + if y > 3 * x: + out = self.add(out, out) + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([8], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net8.construct, mode="PSJit") + ps_net = Net8() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net8.construct, mode="PIJit") + pi_net = Net8() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_multi_if_break_nested_if_002(): + """ + TEST_SUMMARY: + Description: create a net, with break in if in if in while + Expectation: result match + """ + class CtrlWhileMultiIf(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + x = x + 1 + if x < y: + x = x + 2 + out = self.add(out, out) + if x + 2 < y: + x = x + 1 + else: + pass + if x == y - 2: + break + + if y > 2 * x: + if y > 2 * x + 1: + out = self.add(out, out) + if y > 3 * x: + y = y - 1 + if 3 * x == y: + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileMultiIf.construct, mode="PSJit") + ps_net = CtrlWhileMultiIf() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileMultiIf.construct, mode="PIJit") + pi_net = CtrlWhileMultiIf() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_multi_if_break_concatenation_if(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 if in while + Expectation: result match + """ + class Net10(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + x = x + 1 + out = self.relu(out) + if x + 2 == y: + x = x + 2 + out = self.add(out, out) + break + + if x + 4 == y: + y = y - 2 + out = self.relu(out) + break + + if x == y: + out = self.relu(out) + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net10.construct, mode="PSJit") + ps_net = Net10() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net10.construct, mode="PIJit") + pi_net = Net10() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_multi_while_nested_if_break_001(): + """ + TEST_SUMMARY: + Description: create a net, with break in if in while in while + Expectation: result match + """ + class Net11(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + while 3 * x < y: + if 2 * x == y: + out = self.add(out, out) + break + out = self.relu(out) + y = y - 1 + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net11.construct, mode="PSJit") + ps_net = Net11() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net11.construct, mode="PIJit") + pi_net = Net11() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_multi_while_nested_if_break_002(): + """ + TEST_SUMMARY: + Description: create a net, with break in second if in while in while + Expectation: result match + """ + class Net12(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + while 3 * x < y: + out = self.relu(out) + if 2 * x == y: + out = self.add(out, out) + if x + 6 == y: + break + y = y - 1 + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net12.construct, mode="PSJit") + ps_net = Net12() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net12.construct, mode="PIJit") + pi_net = Net12() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_multi_while_nested_if_break_003(): + """ + TEST_SUMMARY: + Description: create a net, with break in both if in while in while + Expectation: result match + """ + class Net13(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + while 3 * x < y: + if 2 * x == y: + out = self.add(out, out) + break + x = x + 1 + if x + 6 == y: + break + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net13.construct, mode="PSJit") + ps_net = Net13() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net13.construct, mode="PIJit") + pi_net = Net13() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_multi_while_concatenation_if_break(): + """ + TEST_SUMMARY: + Description: create a net, with break in all 3 if in while + Expectation: result match + """ + class Net14(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + if 2 * x < y: + out = self.add(out, out) + break + + if 3 * x < y: + out = self.relu(out) + break + + if x == y: + out = self.relu(out) + break + x = x + 1 + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net14.construct, mode="PSJit") + ps_net = Net14() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net14.construct, mode="PIJit") + pi_net = Net14() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_if_break_in_true(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(True) in for + Expectation: result match + """ + class Net15(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(5): + if 2 * x < y: + out = self.add(out, out) + if x + 6 == y: + break + else: + out = self.relu(out) + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([8], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net15.construct, mode="PSJit") + ps_net = Net15() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net15.construct, mode="PIJit") + pi_net = Net15() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_if_break_in_false(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(False) in for + Expectation: result match + """ + class Net16(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(5): + if 3 * x < y: + out = self.add(out, out) + else: + out = self.relu(out) + if x + 6 == y: + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([8], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net16.construct, mode="PSJit") + ps_net = Net16() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net16.construct, mode="PIJit") + pi_net = Net16() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_multi_if_break_nested_001(): + """ + TEST_SUMMARY: + Description: create a net, with break in if(third) in while + Expectation: result match + """ + class Net17(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(5): + if 2 * x < y: + out = self.relu(out) + if 3 * x < y: + out = self.add(out, out) + if 3 * x + 1 == y: + break + x = x + 1 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net17.construct, mode="PSJit") + ps_net = Net17() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net17.construct, mode="PIJit") + pi_net = Net17() + match_array(ps_net(x, y, z), pi_net(x, y, z)) diff --git a/tests/st/pi_jit/control_flow/test_break_continue2.py b/tests/st/pi_jit/control_flow/test_break_continue2.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bc93fbf764aa47d79712c24ef9ac0380dd5776 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_break_continue2.py @@ -0,0 +1,349 @@ +from mindspore import context, jit +from mindspore.nn import Cell +import numpy as np +import pytest +from mindspore.common import Tensor +from mindspore.common import dtype as ms +from mindspore.common import Parameter +import mindspore.ops.operations as P +from ..share.utils import match_array + + +class CtrlWhileBC(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(Tensor(t, ms.float32), name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x > 2: + out = self.add(out, y) + x -= 1 + if x < 4: + break + elif x < 8: + continue + self.para = self.mul(self.para, y) + out = self.mul(self.para, y) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_continue(): + """ + TEST_SUMMARY: + Description: create a net, with while break continue + Expectation: result match + """ + x = Tensor([10], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileBC.construct, mode="PSJit") + ps_net = CtrlWhileBC(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileBC.construct, mode="PIJit") + pi_net = CtrlWhileBC(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlWhileBR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.mul(y, y) + while x < 10: + x += 2 + if x > 7: + break + if x > 8: + return out + out = self.add(out, y) + out = self.mul(out, self.para) + return y + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_return(): + """ + TEST_SUMMARY: + Description: create a net, with while break return + Expectation: result match + """ + x = Tensor([1], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileBR.construct, mode="PSJit") + ps_net = CtrlWhileBR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileBR.construct, mode="PIJit") + pi_net = CtrlWhileBR(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlWhileCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.mul(y, y) + while x < 10: + x += 2 + if x > 7: + continue + if x > 8: + return out + out = self.add(out, y) + out = self.mul(out, self.para) + return y + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_return(): + """ + TEST_SUMMARY: + Description: create a net, with while continue return + Expectation: result match + """ + x = Tensor([1], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileCR.construct, mode="PSJit") + ps_net = CtrlWhileCR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileCR.construct, mode="PIJit") + pi_net = CtrlWhileCR(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlWhileBCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.mul(y, y) + while x < 10: + x += 1 + if x > 3: + continue + elif x > 5: + return out + elif x > 8: + break + out = self.add(out, y) + out = self.mul(out, self.para) + return y + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_return_break(): + """ + TEST_SUMMARY: + Description: create a net, with while continue return break + Expectation: result match + """ + x = Tensor([1], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileBCR.construct, mode="PSJit") + ps_net = CtrlWhileBCR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileBCR.construct, mode="PIJit") + pi_net = CtrlWhileBCR(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlForBC(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.mul(y, y) + for _ in range(5): + out = self.add(out, y) + x += 1 + if x > 2: + out = self.add(out, y) + break + else: + continue + out = self.mul(self.para, y) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_break_continue(): + """ + TEST_SUMMARY: + Description: create a net, with for break continue + Expectation: result match + """ + x = Tensor([-1], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBC.construct, mode="PSJit") + ps_net = CtrlForBC(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBC.construct, mode="PIJit") + pi_net = CtrlForBC(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlForBR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(Tensor(t), name="a") + + def construct(self, y): + out = y + for i in range(-1, -9, -2): + self.assignadd(self.para, y) + y = self.add(y, y) + if i == -7: + self.para *= 2 + break + elif i > -7: + out = self.add(out, y) + else: + y += 1 + return y + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_break_return(): + """ + Feature: control flow for with break and return. + Description: use assignadd resolve parameter + and test for with if, break and return + Expectation: result match + """ + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBR.construct, mode="PSJit") + ps_net = CtrlForBR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBR.construct, mode="PIJit") + pi_net = CtrlForBR(y) + match_array(ps_net(y), pi_net(y)) + + +class CtrlForCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + out = out * y + for _ in range(-6, 8, 2): + x -= 1 + if x > 3: + out = self.add(out, self.para) + continue + elif x > 1: + out = out * y + else: + out = self.add(out, y) + return out + out = self.mul(out, out) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_continue_return(): + """ + TEST_SUMMARY: + Description: create a net, with for continue return + Expectation: result match + """ + x = Tensor([5], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForCR.construct, mode="PSJit") + ps_net = CtrlForCR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForCR.construct, mode="PIJit") + pi_net = CtrlForCR(y) + match_array(ps_net(x, y), pi_net(x, y)) + + +class CtrlForBCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + for i in range(1, 10, 3): + x += i + if x < 3: + x += 1 + out = self.add(out, y) + self.assignadd(self.para, y) + continue + out = self.add(out, self.para) + if x < 10: + x += 3 + break + elif x < 12: + return out + out = self.mul(out, y) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_continue_break_return(): + """ + TEST_SUMMARY: + Description: create a net, with for continue break return + Expectation: result match + """ + x = Tensor([5], ms.float32) + y = Tensor(np.random.randn(2, 3), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBCR.construct, mode="PSJit") + ps_net = CtrlForBCR(y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBCR.construct, mode="PIJit") + pi_net = CtrlForBCR(y) + match_array(ps_net(x, y), pi_net(x, y)) diff --git a/tests/st/pi_jit/control_flow/test_break_continue3.py b/tests/st/pi_jit/control_flow/test_break_continue3.py new file mode 100644 index 0000000000000000000000000000000000000000..1a02bca5059d81b6287c1043347c237e4d5ac6a1 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_break_continue3.py @@ -0,0 +1,637 @@ +from mindspore import context, jit +from mindspore.nn import Cell +import numpy as np +import pytest +from mindspore.common import Tensor +from mindspore.common import dtype as ms +from mindspore.common import Parameter +import mindspore.ops.operations as P +from ..share.utils import match_array + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_multi_if_break_nested_002(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net18(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(5): + if 2 * x < y: + if 3 * x < y: + out = self.add(out, out) + x = x + 1 + out = self.relu(out) + if x + 6 == y: + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net18.construct, mode="PSJit") + ps_net = Net18() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net18.construct, mode="PIJit") + pi_net = Net18() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_multi_if_break_nested_003(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net19(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(5): + if 2 * x < y: + if 3 * x < y: + out = self.add(out, out) + x = x + 1 + if 2 * x + 1 == y: + break + out = self.relu(out) + if x + 6 == y: + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net19.construct, mode="PSJit") + ps_net = Net19() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net19.construct, mode="PIJit") + pi_net = Net19() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_multi_if_break_concatenation(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net20(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(2): + for _ in range(3): + if 2 * x < y: + out = self.add(out, out) + x = x + 1 + if x + 6 == y: + break + + for _ in range(2): + if 2 * x < y: + out = self.relu(out) + y = y - 1 + if x + 5 == y: + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net20.construct, mode="PSJit") + ps_net = Net20() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net20.construct, mode="PIJit") + pi_net = Net20() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_multi_if_continue_concatenation(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net21(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(2): + for _ in range(3): + if 2 * x < y: + out = self.add(out, out) + x = x + 1 + else: + continue + + for _ in range(2): + if 3 * x < y: + out = self.relu(out) + y = y - 1 + else: + continue + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net21.construct, mode="PSJit") + ps_net = Net21() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net21.construct, mode="PIJit") + pi_net = Net21() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_combine_break_continue_001(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net22(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(2): + while 2 * x < y: + if 2 * x < y: + out = self.add(out, out) + + if 3 * x < y: + x = x + 2 + else: + break + x = x + 1 + + for _ in range(2): + if x + 5 < y: + out = self.relu(out) + else: + continue + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([16], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net22.construct, mode="PSJit") + ps_net = Net22() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net22.construct, mode="PIJit") + pi_net = Net22() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_combine_break_continue_002(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net23(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(2): + for _ in range(2): + if 4 * x < y: + out = self.relu(out) + else: + continue + + while x < y: + if 2 * x < y: + out = self.add(out, out) + x = x + 2 + if 3 * x < y: + x = x + 1 + else: + break + x = x + 2 + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net23.construct, mode="PSJit") + ps_net = Net23() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net23.construct, mode="PIJit") + pi_net = Net23() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_combine_break_continue_003(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net24(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + for _ in range(2): + for _ in range(2): + if 3 * x < y: + break + else: + y = y - 1 + + while x < y: + if 2 * x < y: + out = self.add(out, out) + x = x + 2 + if 3 * x < y: + x = x + 1 + else: + break + x = x + 2 + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net24.construct, mode="PSJit") + ps_net = Net24() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net24.construct, mode="PIJit") + pi_net = Net24() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_combine_break_continue_004(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net25(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + if x < y: + while 2 * x < y: + for _ in range(2): + if 3 * x < y: + out = self.add(out, out) + else: + continue + x = x + 2 + if 2 * x == y: + break + + while x + 2 < y: + if x + 5 < y: + out = self.relu(out) + x = x + 1 + x = x + 1 + if x + 2 == y: + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net25.construct, mode="PSJit") + ps_net = Net25() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net25.construct, mode="PIJit") + pi_net = Net25() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_combine_break_continue_005(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net26(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + for _ in range(2): + if 2 * x < y: + out = self.add(out, out) + if 2 * x + 10 == y: + break + + if 3 * x < y: + for _ in range(2): + if 2 * x < y: + out = self.relu(out) + else: + continue + else: + while 2 * x < y: + for _ in range(2): + out = self.relu(out) + if x + 9 == y: + break + y = y - 1 + continue + x = x + 2 + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net26.construct, mode="PSJit") + ps_net = Net26() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net26.construct, mode="PIJit") + pi_net = Net26() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_break_return_001(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net27(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + if 2 * x < y: + out = self.relu(out) + x = x + 1 + elif 3 * x < y: + out = self.add(out, out) + x = x - 1 + else: + out = self.relu(out) + if 2 * x == y: + break + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net27.construct, mode="PSJit") + ps_net = Net27() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net27.construct, mode="PIJit") + pi_net = Net27() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_break_return_002(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net28(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + if 2 * x == y: + continue + elif 3 * x < y: + out = self.add(out, out) + x = x + 1 + else: + out = self.relu(out) + x = x - 1 + if 3 * x - 1 == y: + break + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net28.construct, mode="PSJit") + ps_net = Net28() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net28.construct, mode="PIJit") + pi_net = Net28() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_condition_define_in_init(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net29(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + self.x = 2 + self.y = 20 + + def construct(self, z): + out = z + while self.x < self.y: + if 2 * self.x < self.y: + out = self.add(out, out) + if self.x + 18 == self.y: + break + out = self.relu(out) + return out + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net29.construct, mode="PSJit") + ps_net = Net29() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net29.construct, mode="PIJit") + pi_net = Net29() + match_array(ps_net(z), pi_net(z)) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_break_parameter(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net30(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + add_np = np.full((4, 4, 4), 0.5, dtype=np.float32) + self.add_weight = Parameter(Tensor(add_np), name="add_weight") + + def construct(self, x, y, z): + out = z + while x < y: + if 2 * x < y: + out = self.add(out, self.add_weight) + elif 3 * x < y: + out = self.relu(out) + x = x + 1 + else: + break + x = x + 1 + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([20], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net30.construct, mode="PSJit") + ps_net = Net30() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net30.construct, mode="PIJit") + pi_net = Net30() + match_array(ps_net(x, y, z), pi_net(x, y, z)) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_for_if_break_plus_continue(): + """ + TEST_SUMMARY: + Description: create a net, with break in 3 nested if in for + Expectation: result match + """ + class Net31(Cell): + def __init__(self): + super().__init__() + self.relu = P.ReLU() + self.add = P.Add() + + def construct(self, x, y, z): + out = z + while x < y: + if 3 * x < y: + out = self.add(out, out) + if 3 * x == y: + break + if x + 20 == y: + continue + elif 2 * x < y: + out = self.relu(out) + x = x + 1 + else: + break + x = x + 1 + + out = self.relu(out) + return out + x = Tensor([2], ms.float32) + y = Tensor([10], ms.float32) + z = Tensor(np.random.randn(4, 4, 4), ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=Net31.construct, mode="PSJit") + ps_net = Net31() + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net31.construct, mode="PIJit") + pi_net = Net31() + match_array(ps_net(x, y, z), pi_net(x, y, z)) diff --git a/tests/st/pi_jit/control_flow/test_control_flow_bool_tensor.py b/tests/st/pi_jit/control_flow/test_control_flow_bool_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..f922ece8b804577b03d9f8a911a0bd13e22164a7 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_flow_bool_tensor.py @@ -0,0 +1,78 @@ +from mindspore import context, jit +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.functional as F +import numpy as np +from ..share.grad import GradOfFirstInput +import pytest + + +class Net1(Cell): + def __init__(self): + super().__init__() + self.a = Tensor([True], dtype.bool_) + self.b = Tensor([False], dtype.bool_) + self.flag = True + + def construct(self, x): + out = x + if self.a: + out = out * x + while self.b: + out = out + x + if self.a and self.b: + out = 2 * out + elif self.a or self.b: + out = out - x + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_tensor_bool(): + """ + TEST_SUMMARY: + Description: create a net use bool tensor as condition + Expectation: result match + """ + npx = np.random.rand(3, 4).astype(np.float32) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net1.construct, mode="PIJit", jit_config={"loop_unrolling":True}) + pi_net = Net1() + grad_net = F.grad(pi_net) + pi_net(Tensor(npx)) + grad_net(Tensor(npx)) + + +class Net2(Cell): + def __init__(self): + super().__init__() + self.a = Tensor([True], dtype.bool_) + + def construct(self, x): + out = x + if self.a and x > 1: + out = out + x + else: + out = out + 2 * x + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_tensor_bool_with_x(): + """ + TEST_SUMMARY: + Description: create a net use bool tensor as condition + Expectation: result match + """ + x = Tensor([0], dtype.float32) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net2.construct, mode="PIJit") + pi_net = Net2() + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_net(x) + grad_net(x) diff --git a/tests/st/pi_jit/control_flow/test_control_for.py b/tests/st/pi_jit/control_flow/test_control_for.py new file mode 100644 index 0000000000000000000000000000000000000000..ebc3b03dc2d1200a1830fcc66e670bec33e1c496 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for.py @@ -0,0 +1,222 @@ +from mindspore import context, jit +from mindspore.nn import Cell +from mindspore.common import Tensor +import numpy as np +from ..share.utils import match_array +from ..share.grad import GradOfFirstInput +import mindspore.ops.operations as op +import pytest + + +class ControlOneForAddn(Cell): + def __init__(self, start, stop, step): + super().__init__() + self.addn = op.AddN() + self.start = start + self.stop = stop + self.step = step + + def construct(self, input_x): + out = input_x + for _ in range(self.start, self.stop, self.step): + out = self.addn([out, input_x, input_x]) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_range_addn(): + """ + TEST_SUMMARY: + Description: create a net, with break continue in while + Expectation: result match + """ + input_shape = (214, 214, 7, 7) + start, stop, step = 10, 25, 3 + input_np = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneForAddn.construct, mode="PSJit") + ps_net = ControlOneForAddn(start, stop, step) + out_ps = ps_net(Tensor(input_np)) + grad_net = GradOfFirstInput(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(input_np)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneForAddn.construct, mode="PIJit") + pi_net = ControlOneForAddn(start, stop, step) + out_pi = pi_net(Tensor(input_np)) + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(input_np)) + match_array(out_ps, out_pi, error=4) + match_array(ps_grad, pi_grad, error=4) + + +class ControlOneForSplit(Cell): + def __init__(self): + super().__init__() + self.split = op.Split(1, 4) + self.addn = op.AddN() + + def construct(self, input_x): + x = self.addn([input_x, input_x]) + sub_tensors = self.split(x) + out = sub_tensors[0] + for s in sub_tensors: + out = self.addn([out, s]) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_split(): + """ + TEST_SUMMARY: + Description: create a net, with break continue in while + Expectation: result match + """ + input_shape = (4, 4) + input_np = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneForSplit.construct, mode="PSJit") + ps_net = ControlOneForSplit() + out_ps = ps_net(Tensor(input_np)) + grad_net = GradOfFirstInput(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(input_np)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneForSplit.construct, mode="PIJit") + pi_net = ControlOneForSplit() + out_pi = pi_net(Tensor(input_np)) + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(input_np)) + match_array(out_ps, out_pi, error=4) + match_array(ps_grad, pi_grad, error=4) + + +class ControlOneForOneIf(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, input_x, x, y, z): + out = input_x + for i in [x, y]: + if i > z: + out = self.addn([out, out]) + else: + out = self.addn([out, input_x]) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_if(): + """ + TEST_SUMMARY: + Description: create a net, with for in list of input + Expectation: result match + """ + input_shape = (4, 3, 4) + x = np.array(1, np.float32) + y = np.array(-1, np.float32) + z = np.array(0, np.float32) + input_np = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneForSplit.construct, mode="PSJit") + ps_net = ControlOneForOneIf() + out_ps = ps_net(Tensor(input_np), Tensor(x), Tensor(y), Tensor(z)) + grad_net = GradOfFirstInput(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(input_np), Tensor(x), Tensor(y), Tensor(z)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneForSplit.construct, mode="PIJit") + pi_net = ControlOneForOneIf() + out_pi = pi_net(Tensor(input_np), Tensor(x), Tensor(y), Tensor(z)) + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(input_np), Tensor(x), Tensor(y), Tensor(z)) + match_array(out_ps, out_pi, error=4) + match_array(ps_grad, pi_grad, error=4) + + +class ControlOneForOneFor(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, input_x): + out = input_x + for _ in range(5): + for _ in range(4): + out = self.addn([out, input_x]) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_for_in_for(): + """ + TEST_SUMMARY: + Description: create a net, with for in for + Expectation: result match + """ + input_shape = (4, 3, 4) + input_np = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneForOneFor.construct, mode="PSJit") + ps_net = ControlOneForOneFor() + out_ps = ps_net(Tensor(input_np)) + grad_net = GradOfFirstInput(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(input_np)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneForOneFor.construct, mode="PIJit") + pi_net = ControlOneForOneFor() + out_pi = pi_net(Tensor(input_np)) + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(input_np)) + match_array(out_ps, out_pi, error=4) + match_array(ps_grad, pi_grad, error=4) + + +class ControlOneWhileInFor(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, input_x, x, y): + out = input_x + for _ in range(3): + y = y + 1 + while x < y: + out = self.addn([out, input_x]) + x = x + 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_while_in_for(): + """ + TEST_SUMMARY: + Description: create a net, with while in for + Expectation: result match + """ + input_shape = (4, 3, 4) + x = np.array(1, np.float32) + y = np.array(4, np.float32) + input_np = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileInFor.construct, mode="PSJit") + ps_net = ControlOneWhileInFor() + out_ps = ps_net(Tensor(input_np), Tensor(x), Tensor(y)) + grad_net = GradOfFirstInput(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(input_np), Tensor(x), Tensor(y)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileInFor.construct, mode="PIJit") + pi_net = ControlOneWhileInFor() + out_pi = pi_net(Tensor(input_np), Tensor(x), Tensor(y)) + grad_net = GradOfFirstInput(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(input_np), Tensor(x), Tensor(y)) + match_array(out_ps, out_pi, error=4) + match_array(ps_grad, pi_grad, error=4) diff --git a/tests/st/pi_jit/control_flow/test_control_for_break.py b/tests/st/pi_jit/control_flow/test_control_for_break.py new file mode 100644 index 0000000000000000000000000000000000000000..ef239c3f2d79828c3458c988c436c20e89ca6b7a --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_break.py @@ -0,0 +1,236 @@ +from mindspore.nn import Cell +from mindspore.common import Tensor +from mindspore.common import Parameter +from mindspore.common import dtype as ms +from mindspore import nn +from mindspore import context, jit +from ..share.utils import match_array +import mindspore.ops.operations as P +import pytest + + +class CtrlForBreakRange1(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(1, 10, 3): + if i >= 7: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_1_10_3_break(): + """ + Feature: PIJit + Description: create a net, with if break in for range(1, 10, 3) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakRange1.construct, mode="PSJit") + ps_net = CtrlForBreakRange1() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBreakRange1.construct, mode="PIJit") + pi_net = CtrlForBreakRange1() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForBreakRange2(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(4, -8, -4): + if i < 0: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_4_n8_n4_break(): + """ + Feature: PIJit + Description: create a net, with if break in for range(4, -8, -4) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakRange2.construct, mode="PSJit") + ps_net = CtrlForBreakRange2() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBreakRange2.construct, mode="PIJit") + pi_net = CtrlForBreakRange2() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForBreakRange3(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-5, 5, 2): + if i == 3: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n5_5_2_break(): + """ + Feature: PIJit + Description: create a net, with if break in for range(-5, 5, 2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakRange3.construct, mode="PSJit") + ps_net = CtrlForBreakRange3() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBreakRange3.construct, mode="PIJit") + pi_net = CtrlForBreakRange3() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForBreakRange4(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-2, -8, -2): + if i <= -4: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n2_n8_n2_break(): + """ + Feature: PIJit + Description: create a net, with if break in for range(-2, -8, -2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakRange4.construct, mode="PSJit") + ps_net = CtrlForBreakRange4() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBreakRange4.construct, mode="PIJit") + pi_net = CtrlForBreakRange4() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForEnumerateIfBreak(Cell): + def __init__(self, t1, t2, t3): + super().__init__() + self.p1 = Parameter(Tensor(t1, ms.float32), name="a") + self.p2 = Parameter(Tensor(t2, ms.float32), name="b") + self.p3 = Parameter(Tensor(t3, ms.float32), name="c") + self.assignadd = P.AssignAdd() + self.add = P.Add() + + def construct(self, x): + plist = [self.p1, self.p2, self.p3] + out = x + for i, t in enumerate(plist): + if t > 2: + break + out = self.add(out, i * x) + return out + + +@pytest.mark.level7 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_enumerate_if_break(): + """ + Feature: PIJit + Description: create a net, with if break in for enumerate list + Expectation: No exception. + """ + t1 = 1 + t2 = 2 + t3 = 3 + x = Tensor([4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakRange4.construct, mode="PSJit") + ps_net = CtrlForEnumerateIfBreak(t1, t2, t3) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForEnumerateIfBreak.construct, mode="PIJit") + pi_net = CtrlForEnumerateIfBreak(t1, t2, t3) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForBreakElifElse(Cell): + def __init__(self): + super().__init__() + self.cell_list = nn.CellList() + self.cell_list.append(nn.ReLU()) + self.cell_list.append(nn.Tanh()) + self.cell_list.append(nn.Sigmoid()) + + def construct(self, x): + out = x + for activate in self.cell_list: + add = activate(x) + out = out + add + if add > 1: + out += x + elif add < 1: + break + else: + break + x += add + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_break_in_elif_else(): + """ + Feature: PIJit + Description: create a net, with if break in for in cell list + Expectation: No exception. + """ + x = Tensor([0.5], ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForBreakElifElse.construct, mode="PSJit") + ps_net = CtrlForBreakElifElse() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForBreakElifElse.construct, mode="PIJit") + pi_net = CtrlForBreakElifElse() + pi_out = pi_net(x) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_by_while_continue.py b/tests/st/pi_jit/control_flow/test_control_for_by_while_continue.py new file mode 100644 index 0000000000000000000000000000000000000000..774a411c10b7d7b96287b54fda3f76029df9a73d --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_by_while_continue.py @@ -0,0 +1,318 @@ +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlForContinueWhileX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + x -= 1 + if x < 5: + continue + out = self.add(out, x) + while x > 1: + out = self.add(out, x) + x -= 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_continue_in_for_x(): + """ + Feature: PIJit + Description: create a net, with break in while + Expectation: No exception. + """ + x = Tensor([7], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueWhileX.construct, mode="PSJit") + ps_net = CtrlForContinueWhileX() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueWhileX.construct, mode="PIJit") + pi_net = CtrlForContinueWhileX() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForContinueWhile(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(5): + out = self.add(out, x) + if i > 2: + continue + while x > 1: + x -= 1 + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_continue_in_for(): + """ + Feature: PIJit + Description: create a net, with continue in for, for by while + Expectation: No exception. + """ + x = Tensor([3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueWhile.construct, mode="PSJit") + ps_net = CtrlForContinueWhile() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueWhile.construct, mode="PIJit") + pi_net = CtrlForContinueWhile() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinueOne(Cell): + def __init__(self, tensor): + super().__init__() + self.param = Parameter(tensor, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + x += 1 + if x > 1: + continue + while x < 5: + self.param += 1 + x = x + 1 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_param_continue_in_for(): + """ + Feature: PIJit + Description: create a net, with continue in for, for by while + Expectation: No exception. + """ + t = 2 + x = Tensor([-2], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinueOne.construct, mode="PSJit") + ps_net = CtrlForWhileContinueOne(t) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinueOne.construct, mode="PIJit") + pi_net = CtrlForWhileContinueOne(t) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinueAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + x += 1 + if x > 1: + continue + while x < 5: + x += 1 + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_by_while_continue_no_param(): + """ + Feature: PIJit + Description: create a net, with continue in for, for by while + Expectation: No exception. + """ + x = Tensor([-2], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinueAdd.construct, mode="PSJit") + ps_net = CtrlForWhileContinueAdd() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinueAdd.construct, mode="PIJit") + pi_net = CtrlForWhileContinueAdd() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinueX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + x -= 1 + out = self.add(out, x) + while x > 1: + x -= 1 + if x < 0: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_by_while_continue_in_while_x(): + """ + Feature: PIJit + Description: create a net, with continue in while, for by while + Expectation: No exception. + """ + x = Tensor([3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinueX.construct, mode="PSJit") + ps_net = CtrlForWhileContinueX() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinueX.construct, mode="PIJit") + pi_net = CtrlForWhileContinueX() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinue(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(5): + out = self.add(out, x) + while x > 1: + x -= 1 + out = self.add(out, x) + if x < 3: + continue + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_continue_in_while(): + """ + Feature: PIJit + Description: create a net, with continue in while, for by while + Expectation: No exception. + """ + x = Tensor([5], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinue.construct, mode="PSJit") + ps_net = CtrlForWhileContinue() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinue.construct, mode="PIJit") + pi_net = CtrlForWhileContinue() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinueP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if self.param > 2: + continue + x = self.add(x, self.param) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_continue_in_while_param(): + """ + Feature: PIJit + Description: create a net, with continue in while, for by while + Expectation: No exception. + """ + x = Tensor([1], ms.int32) + t = -4 + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinueP.construct, mode="PSJit") + ps_net = CtrlForWhileContinueP(t) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinueP.construct, mode="PIJit") + pi_net = CtrlForWhileContinueP(t) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileContinueN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + x += 1 + if x > 1: + continue + out = self.add(out, x) + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_continue_in_while_no(): + """ + Feature: PIJit + Description: create a net, with continue in while, for by while + Expectation: No exception. + """ + x = Tensor([-3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileContinueN.construct, mode="PSJit") + ps_net = CtrlForWhileContinueN() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileContinueN.construct, mode="PIJit") + pi_net = CtrlForWhileContinueN() + pi_out = pi_net(x) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_by_while_return.py b/tests/st/pi_jit/control_flow/test_control_for_by_while_return.py new file mode 100644 index 0000000000000000000000000000000000000000..26aed686d5b7ef6e33faa37eb75ae9a87f608a74 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_by_while_return.py @@ -0,0 +1,318 @@ +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlForReturnWhileX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + x -= 1 + if x < 5: + return out + out = self.add(out, x) + while x > 1: + out = self.add(out, x) + x -= 1 + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_return_in_for_x(): + """ + Feature: PIJit + Description: create a net, return in for, for by while + Expectation: No exception. + """ + x = Tensor([7], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnWhileX.construct, mode="PSJit") + ps_net = CtrlForReturnWhileX() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnWhileX.construct, mode="PIJit") + pi_net = CtrlForReturnWhileX() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForReturnWhile(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(5): + out = self.add(out, x) + if i > 2: + return out + while x > 1: + x -= 1 + out = self.add(out, x) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_return_in_for(): + """ + Feature: PIJit + Description: create a net, return in for, for by while + Expectation: No exception. + """ + x = Tensor([3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnWhile.construct, mode="PSJit") + ps_net = CtrlForReturnWhile() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnWhile.construct, mode="PIJit") + pi_net = CtrlForReturnWhile() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturnOne(Cell): + def __init__(self, tensor): + super().__init__() + self.param = Parameter(tensor, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + x += 1 + if x > 1: + return x + while x < 5: + x = x + 1 + self.param += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_param_return_in_for(): + """ + Feature: PIJit + Description: create a net, return in for, for by while + Expectation: No exception. + """ + t = 2 + x = Tensor([-2], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturnOne.construct, mode="PSJit") + ps_net = CtrlForWhileReturnOne(t) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturnOne.construct, mode="PIJit") + pi_net = CtrlForWhileReturnOne(t) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturnAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + x += 1 + if x > 1: + return out + while x < 5: + x += 1 + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_by_while_return_no_param(): + """ + Feature: PIJit + Description: create a net, return in for, for by while + Expectation: No exception. + """ + x = Tensor([-2], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturnAdd.construct, mode="PSJit") + ps_net = CtrlForWhileReturnAdd() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturnAdd.construct, mode="PIJit") + pi_net = CtrlForWhileReturnAdd() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturnX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + x -= 1 + out = self.add(out, x) + while x > 1: + x -= 1 + if x < 0: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_by_while_return_in_while_x(): + """ + Feature: PIJit + Description: create a net, return in while, for by while + Expectation: No exception. + """ + x = Tensor([3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturnX.construct, mode="PSJit") + ps_net = CtrlForWhileReturnX() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturnX.construct, mode="PIJit") + pi_net = CtrlForWhileReturnX() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturn(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(5): + out = self.add(out, x) + while x > 1: + x -= 1 + out = self.add(out, x) + if x < 3: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_return_in_while(): + """ + Feature: PIJit + Description: create a net, return in while, for by while + Expectation: No exception. + """ + x = Tensor([5], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturn.construct, mode="PSJit") + ps_net = CtrlForWhileReturn() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturn.construct, mode="PIJit") + pi_net = CtrlForWhileReturn() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturnP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if self.param > 2: + return x + x = self.add(x, self.param) + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_return_in_while_param(): + """ + Feature: PIJit + Description: create a net, return in while, for by while + Expectation: No exception. + """ + x = Tensor([1], ms.int32) + t = -4 + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturnP.construct, mode="PSJit") + ps_net = CtrlForWhileReturnP(t) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturnP.construct, mode="PIJit") + pi_net = CtrlForWhileReturnP(t) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForWhileReturnN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + if x > 1: + return x + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_while_return_in_while_no(): + """ + Feature: PIJit + Description: create a net, return in while, for by while + Expectation: No exception. + """ + x = Tensor([-3], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForWhileReturnN.construct, mode="PSJit") + ps_net = CtrlForWhileReturnN() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForWhileReturnN.construct, mode="PIJit") + pi_net = CtrlForWhileReturnN() + pi_out = pi_net(x) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_continue.py b/tests/st/pi_jit/control_flow/test_control_for_continue.py new file mode 100644 index 0000000000000000000000000000000000000000..62468662fbc95565193fe3b9307d61693cc80ec0 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_continue.py @@ -0,0 +1,237 @@ +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlForContinueRange1(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(1, 10, 3): + if i >= 7: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_1_10_3_continue(): + """ + Feature: PIJit + Description: create a net, with continue in for range(1, 10, 3) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueRange1.construct, mode="PSJit") + ps_net = CtrlForContinueRange1() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueRange1.construct, mode="PIJit") + pi_net = CtrlForContinueRange1() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForContinueRange2(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(4, -8, -4): + if i < 0: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_4_n8_n4_continue(): + """ + Feature: PIJit + Description: create a net, with continue in for range(4, -8, -4) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueRange2.construct, mode="PSJit") + ps_net = CtrlForContinueRange2() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueRange2.construct, mode="PIJit") + pi_net = CtrlForContinueRange2() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForContinueRange3(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-5, 5, 2): + if i == 3: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n5_5_2_continue(): + """ + Feature: PIJit + Description: create a net, with continue in for range(-5, 5, 2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueRange3.construct, mode="PSJit") + ps_net = CtrlForContinueRange3() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueRange3.construct, mode="PIJit") + pi_net = CtrlForContinueRange3() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForContinueRange4(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-2, -8, -2): + if i <= -4: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n2_n8_n2_continue(): + """ + Feature: PIJit + Description: create a net, with continue in for range(-2, -8, -2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueRange4.construct, mode="PSJit") + ps_net = CtrlForContinueRange4() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueRange4.construct, mode="PIJit") + pi_net = CtrlForContinueRange4() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForEnumerateIfContinue(Cell): + def __init__(self, t1, t2, t3): + super().__init__() + self.p1 = Parameter(Tensor(t1, ms.float32), name="a") + self.p2 = Parameter(Tensor(t2, ms.float32), name="b") + self.p3 = Parameter(Tensor(t3, ms.float32), name="c") + self.assignadd = P.AssignAdd() + self.add = P.Add() + + def construct(self, x): + plist = [self.p1, self.p2, self.p3] + out = x + for i, t in enumerate(plist): + if t > 2: + continue + self.assignadd(t, x) + out = self.add(out, i * x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_enumerate_if_continue(): + """ + Feature: PIJit + Description: create a net, with continue in for enumerate + Expectation: No exception. + """ + t1 = 1 + t2 = 2 + t3 = 3 + x = Tensor([4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForEnumerateIfContinue.construct, mode="PSJit") + ps_net = CtrlForEnumerateIfContinue(t1, t2, t3) + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForEnumerateIfContinue.construct, mode="PIJit") + pi_net = CtrlForEnumerateIfContinue(t1, t2, t3) + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForContinueElifElse(Cell): + def __init__(self): + super().__init__() + self.cell_list = nn.CellList() + self.cell_list.append(nn.ReLU()) + self.cell_list.append(nn.Tanh()) + self.cell_list.append(nn.Sigmoid()) + + def construct(self, x): + out = x + for activate in self.cell_list: + add = activate(x) + out = out + add + if add > 1: + out += x + elif add < 1: + continue + else: + continue + x += add + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_continue_in_elif_else(): + """ + Feature: PIJit + Description: create a net, with continue in for cell list + Expectation: No exception. + """ + x = Tensor([0.5], ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForContinueElifElse.construct, mode="PSJit") + ps_net = CtrlForContinueElifElse() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForContinueElifElse.construct, mode="PIJit") + pi_net = CtrlForContinueElifElse() + pi_out = pi_net(x) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_return.py b/tests/st/pi_jit/control_flow/test_control_for_return.py new file mode 100644 index 0000000000000000000000000000000000000000..1bc535afd32becc86315c733b2de0f449dd66f47 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_return.py @@ -0,0 +1,270 @@ +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlForReturnRange1(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(1, 10, 3): + if i >= 7: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_1_10_3_return(): + """ + Feature: PIJit + Description: create a net, with return in for, for range(1, 10, 3) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnRange1.construct, mode="PSJit") + ps_net = CtrlForReturnRange1() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnRange1.construct, mode="PIJit") + pi_net = CtrlForReturnRange1() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForReturnRange2(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(4, -8, -4): + if i < 0: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_4_n8_n4_return(): + """ + Feature: PIJit + Description: create a net, with return in for, for range(4, -8, -4) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnRange2.construct, mode="PSJit") + ps_net = CtrlForReturnRange2() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnRange2.construct, mode="PIJit") + pi_net = CtrlForReturnRange2() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForReturnRange3(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-5, 5, 2): + if i == 3: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n5_5_2_return(): + """ + Feature: PIJit + Description: create a net, with return in for, for range(-5, 5, 2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnRange3.construct, mode="PSJit") + ps_net = CtrlForReturnRange3() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnRange3.construct, mode="PIJit") + pi_net = CtrlForReturnRange3() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForReturnRange4(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(-2, -8, -2): + if i <= -4: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_range_n2_n8_n2_return(): + """ + Feature: PIJit + Description: create a net, with return in for, for range(-2, -8, -2) + Expectation: No exception. + """ + x = Tensor([2, 3, 4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnRange4.construct, mode="PSJit") + ps_net = CtrlForReturnRange4() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnRange4.construct, mode="PIJit") + pi_net = CtrlForReturnRange4() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlForReturnElifElse(Cell): + def __init__(self): + super().__init__() + self.cell_list = nn.CellList() + self.cell_list.append(nn.ReLU()) + self.cell_list.append(nn.Tanh()) + self.cell_list.append(nn.Sigmoid()) + + def construct(self, x): + out = x + for activate in self.cell_list: + add = activate(x) + out = out + add + if add > 1: + out += x + elif add < 1: + return out + else: + return out + x += add + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_return_in_elif_else(): + """ + Feature: PIJit + Description: create a net, with return in for, for cell list + Expectation: No exception. + """ + x = Tensor([0.5], ms.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnElifElse.construct, mode="PSJit") + ps_net = CtrlForReturnElifElse() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnElifElse.construct, mode="PIJit") + pi_net = CtrlForReturnElifElse() + pi_out = pi_net(x) + match_array(ps_out, pi_out) + + +class CtrlFor2ElifReturnInIf(Cell): + def __init__(self, t1, t2): + super().__init__() + self.p1 = Parameter(Tensor(t1, ms.float32), name="a") + self.p2 = Parameter(Tensor(t2, ms.float32), name="b") + + def construct(self, x): + out = x + dictionary = {"a": self.p2, + "b": self.p1} + for value in dictionary.values(): + x += value + if x > 2: + break + elif x > 1: + x -= 1 + elif x > 0: + x += 1 + out += x + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_2elif_return_in_if(): + """ + Feature: PIJit + Description: create a net, with return in for, for dict + Expectation: No exception. + """ + t1 = 1 + t2 = 2 + x = Tensor([-3], ms.int32) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlFor2ElifReturnInIf.construct, mode="PIJit") + pi_net = CtrlFor2ElifReturnInIf(t1, t2) + pi_net(x) + + +class CtrlForReturnAll(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + self.add = P.Add() + + def construct(self, x): + if x > 2: + res = self.mul(x, x) + elif x == 1: + res = self.add(x, x) + else: + res = x + return res + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_return_fib(): + """ + Feature: PIJit + Description: create a net, with return in for, in all branches + Expectation: No exception. + """ + x = Tensor([4], ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForReturnAll.construct, mode="PSJit") + ps_net = CtrlForReturnAll() + ps_out = ps_net(x) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForReturnAll.construct, mode="PIJit") + pi_net = CtrlForReturnAll() + pi_out = pi_net(x) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_while_by_if.py b/tests/st/pi_jit/control_flow/test_control_for_while_by_if.py new file mode 100644 index 0000000000000000000000000000000000000000..37f34198a229eb714597b2806ea1c3b196b45ea2 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_while_by_if.py @@ -0,0 +1,102 @@ +import numpy as np +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlWhilebyIfBR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x > -4: + x -= 1 + if x < 0: + out = self.mul(out, out) + break + out = self.add(out, y) + if x < -1: + return out + if x > -4: + out = self.add(out, self.para) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_by_if_break_return(): + """ + Feature: PIJit + Description: create a net, with while by if, break return in while + Expectation: No exception. + """ + input_np = np.random.randn(3, 2).astype(np.float32) + x = Tensor([5], ms.int32) + t = Tensor(input_np, ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhilebyIfBR.construct, mode="PSJit") + ps_net = CtrlWhilebyIfBR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhilebyIfBR.construct, mode="PIJit") + pi_net = CtrlWhilebyIfBR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) + + +class CtrlWhilebyIfCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assign = P.Assign() + self.para = Parameter(Tensor(t, ms.float32), name="a") + + def construct(self, x, y): + out = self.mul(y, y) + while x > 5: + self.para -= 1 + x += 1 + if x > 3: + self.assign(self.para, x) + continue + out = self.add(out, y) + if x != 3: + return out + out = self.mul(out, y) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_by_if_continue_return(): + """ + Feature: PIJit + Description: create a net, with while by if, continue in while, return in if + Expectation: No exception. + """ + input_np = np.random.randn(3, 2).astype(np.float32) + x = Tensor([2], ms.int32) + t = Tensor([8], ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhilebyIfCR.construct, mode="PSJit") + ps_net = CtrlWhilebyIfCR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhilebyIfCR.construct, mode="PIJit") + pi_net = CtrlWhilebyIfCR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_for_while_in_if_bcr.py b/tests/st/pi_jit/control_flow/test_control_for_while_in_if_bcr.py new file mode 100644 index 0000000000000000000000000000000000000000..e1d3ac04a468931f2f7b278c386bada6e7b5564d --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_for_while_in_if_bcr.py @@ -0,0 +1,246 @@ +import numpy as np +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from ..share.utils import match_array +import pytest + + +class CtrlForInIfBC(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + if x > 2: + x -= 2 + for _ in range(1, 10): + x += 1 + if x < 2: + out = self.add(out, y) + elif x < 5: + y = self.mul(y, y) + continue + else: + break + out = self.add(out, self.para) + return out + + +@pytest.mark.level7 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_if_continue_break(): + """ + Feature: PIJit + Description: create a net, with for in if, if in for, continue break in for + Expectation: No exception. + """ + input_np = np.random.randn(3, 4, 5).astype(np.float32) + x = Tensor([3], ms.int32) + t = Tensor(input_np, ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForInIfBC.construct, mode="PSJit") + ps_net = CtrlForInIfBC(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForInIfBC.construct, mode="PIJit") + pi_net = CtrlForInIfBC(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) + + +class CtrlForInIfBR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + if x > 2: + res = out + else: + for _ in range(0, -5, -1): + x -= 1 + if x > 0: + out = self.mul(out, y) + else: + break + res = self.add(out, self.para) + return res + + +@pytest.mark.level7 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_if_return_break(): + """ + Feature: PIJit + Description: create a net, with return in if, break in for + Expectation: No exception. + """ + input_np = np.random.randn(3, 4, 5).astype(np.float32) + x = Tensor([1], ms.int32) + t = Tensor(input_np, ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForInIfBR.construct, mode="PSJit") + ps_net = CtrlForInIfBR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForInIfBR.construct, mode="PIJit") + pi_net = CtrlForInIfBR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) + + +class CtrlForInIfBCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + if y[1] > 2: + for i in range(3): + if i == 0: + out = self.mul(y, out) + if i == 1: + x += 2 + continue + if x > 2: + break + return out + out = self.add(out, self.para) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_if_break_continue_return(): + """ + Feature: PIJit + Description: create a net, with for in if, return out, break, continue in + Expectation: No exception. + """ + input_np = np.random.randn(3,).astype(np.float32) + x = Tensor([1], ms.int32) + t = Tensor(input_np, ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlForInIfBCR.construct, mode="PSJit") + ps_net = CtrlForInIfBCR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlForInIfBCR.construct, mode="PIJit") + pi_net = CtrlForInIfBCR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) + + +class CtrlWhileInIfCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(Tensor(t, ms.float32), name="a") + + def construct(self, x, y): + out = self.mul(y, y) + if x != 3: + while x > 5: + self.para -= 1 + x += 1 + if x > 3: + continue + out = self.add(out, y) + return out + out = self.mul(out, y) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_if_continue_return(): + """ + Feature: PIJit + Description: create a net, with while in if, break, return out, continue in + Expectation: No exception. + """ + input_np = np.random.randn(3, 2).astype(np.float32) + x = Tensor([2], ms.int32) + t = Tensor([8], ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileInIfCR.construct, mode="PSJit") + ps_net = CtrlWhileInIfCR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileInIfCR.construct, mode="PIJit") + pi_net = CtrlWhileInIfCR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) + + +class CtrlWhileInIfBCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assign = P.Assign() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.mul(y, self.para) + if x < 4: # 1 + while True: + if x == 3: + out = self.add(out, y) + x = x + 2 + if x == 5: + self.assign(self.para, out) + x = x - 3 + continue + if x == 2: + break + return out + out = self.add(out, out) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_if_break_continue_return(): + """ + Feature: PIJit + Description: create a net, with while in if, break, return out, continue break in + Expectation: No exception. + """ + input_np = np.random.randn(3, 2).astype(np.float32) + x = Tensor([3], ms.int32) + t = Tensor(input_np, ms.int32) + y = Tensor(input_np, ms.int32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhileInIfBCR.construct, mode="PSJit") + ps_net = CtrlWhileInIfBCR(t) + ps_out = ps_net(x, y) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhileInIfBCR.construct, mode="PIJit") + pi_net = CtrlWhileInIfBCR(t) + pi_out = pi_net(x, y) + match_array(ps_out, pi_out) diff --git a/tests/st/pi_jit/control_flow/test_control_if.py b/tests/st/pi_jit/control_flow/test_control_if.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a9d912d7af1309809ae623b10ed0c1c1bacec4 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_if.py @@ -0,0 +1,160 @@ +import numpy as np +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import Tensor +from mindspore import context, jit +import mindspore.ops.operations as op +from ..share.utils import match_array +from ..share.grad import GradOfAllInputs +import pytest + + +class ControlOneIfOneAddnOneAddn(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, x, y, input1, input2): + if x > y: + out = self.addn([input1, input1, input1]) + else: + out = self.addn([input2, input2, input2]) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_addn_addn_true(): + """ + Feature: PIJit + Description: create a net, with if, True AddN input1 + Expectation: No exception. + """ + x = Tensor(1, ms.float32) + y = Tensor(0, ms.float32) + input_shape = (1024, 512, 7, 7) + input1 = np.random.randn(*input_shape).astype(np.float32) + input2 = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneIfOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneIfOneAddnOneAddn() + ps_out = ps_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneIfOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneIfOneAddnOneAddn() + pi_out = pi_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + pi_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + match_array(ps_out, pi_out) + match_array(ps_grad[2], pi_grad[2]) + match_array(ps_grad[3], pi_grad[3]) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_addn_addn_false(): + """ + Feature: PIJit + Description: create a net, with if, False AddN input2 + Expectation: No exception. + """ + x = Tensor(0, ms.float32) + y = Tensor(1, ms.float32) + input_shape = (1024, 512, 7, 7) + input1 = np.random.randn(*input_shape).astype(np.float32) + input2 = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneIfOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneIfOneAddnOneAddn() + ps_out = ps_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneIfOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneIfOneAddnOneAddn() + pi_out = pi_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + pi_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + match_array(ps_out, pi_out) + match_array(ps_grad[2], pi_grad[2]) + match_array(ps_grad[3], pi_grad[3]) + + +class ControlOneIfOneAddnOneAddnOneAddn(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, x, y, input1, input2): + if x > y: + out = self.addn([input1, input1, input1]) + else: + out = self.addn([input2, input2, input2]) + out_me = self.addn([out, input1]) + return out_me + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_addn_addn_addn_true(): + """ + Feature: PIJit + Description: create a net, with if, True AddN input1, then Addn + Expectation: No exception. + """ + x = Tensor(1, ms.float32) + y = Tensor(0, ms.float32) + input_shape = (1024, 512, 7, 7) + input1 = np.random.randn(*input_shape).astype(np.float32) + input2 = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneIfOneAddnOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneIfOneAddnOneAddnOneAddn() + ps_out = ps_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneIfOneAddnOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneIfOneAddnOneAddnOneAddn() + pi_out = pi_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + pi_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + match_array(ps_out, pi_out) + match_array(ps_grad[2], pi_grad[2]) + match_array(ps_grad[3], pi_grad[3]) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_addn_addn_addn_false(): + """ + Feature: PIJit + Description: create a net, with if, False AddN input2, then Addn + Expectation: No exception. + """ + x = Tensor(0, ms.float32) + y = Tensor(1, ms.float32) + input_shape = (1024, 512, 7, 7) + input1 = np.random.randn(*input_shape).astype(np.float32) + input2 = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneIfOneAddnOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneIfOneAddnOneAddnOneAddn() + ps_out = ps_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneIfOneAddnOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneIfOneAddnOneAddnOneAddn() + pi_out = pi_net(x, y, Tensor(input1), Tensor(input2)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + pi_grad = grad_net(x, y, Tensor(input1), Tensor(input2)) + match_array(ps_out, pi_out) + match_array(ps_grad[2], pi_grad[2]) + match_array(ps_grad[3], pi_grad[3]) diff --git a/tests/st/pi_jit/control_flow/test_control_if_by_if.py b/tests/st/pi_jit/control_flow/test_control_if_by_if.py new file mode 100644 index 0000000000000000000000000000000000000000..652261ea969fe5975bf8bc682e84f618cabc4572 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_if_by_if.py @@ -0,0 +1,487 @@ +import numpy as np +from mindspore.nn import Cell +from mindspore import Tensor +from mindspore.common.parameter import Parameter +import mindspore.ops.operations as op +from ..parse.parser_factory import ParserFactory +import pytest + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_basic(): + """ + Feature: PIJit + Description: create a net, with if by if + Expectation: No exception. + """ + class Net41(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b: + if self.a < self.c: + out = self.relu(x) + else: + out = x + 1 + else: + out = x + 2 + + if self.b > self.c: + out = x + 3 + else: + pass + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net41() + pi_net = Net41() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_with_for(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class Net42(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b: + for _ in range(0, 2): + x = self.relu(x) + out = x + else: + out = x + 2 + + if self.b > self.c: + out = x + 3 + else: + pass + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net42() + pi_net = Net42() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_second_if_match_the_false_branch_of_first_if(): + """ + Feature: PIJit + Description: create a net, with if by if + Expectation: No exception. + """ + class Net44(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + + def construct(self, x): + if self.a > self.b: + x = self.relu(x) + if self.a <= self.b: + x = self.tanh(x) + return x + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net44() + pi_net = Net44() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_combine_with_elif_else(): + """ + Feature: PIJit + Description: create a net, with if by if and elif + Expectation: No exception. + """ + class Net45(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + out = x + if self.a > self.b: + if self.a < self.c: + out = self.relu(x) + elif self.b == self.c: + out = self.tanh(x) + else: + out = self.sigmoid(x) + + if self.c <= self.b: + out = self.add(out, out) + + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net45() + pi_net = Net45() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_call_func(): + """ + Feature: PIJit + Description: create a net, with if by if + Expectation: No exception. + """ + class Net49(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def func1(self, x): + x = self.relu(x) + return x + + def func2(self, x): + x = self.add(x, x) + return x + + def construct(self, x): + if self.a > self.b: + if self.a < self.c: + out = self.func1(x) + else: + out = self.func2(x) + else: + out = x + 2 + if self.b > self.c: + out = x + 3 + else: + pass + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net49() + pi_net = Net49() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_call_func_which_include_ctrl_flow(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class Net50(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.tanh = op.Tanh() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def func1(self, x): + if self.a > self.b: + x = self.relu(x) + else: + x = x * 2 + return x + + def func2(self, x): + while self.c < 10: + if self.a > 3: + x = self.relu(x) + self.a -= 1 + self.c += 1 + return x + + def construct(self, x): + if self.a > self.b: + if self.a < self.c: + out = self.func1(x) + else: + out = self.func2(x) + else: + out = x + 2 + if self.b > self.c: + out = x + 3 + else: + pass + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net50() + pi_net = Net50() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_call_subnet(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class SubNet(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + + def construct(self, x): + x = self.relu(x) + return x + + class Net51(Cell): + def __init__(self): + super().__init__() + self.net_inside = SubNet() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b: + x = self.net_inside(x) + else: + x = self.sigmoid(x) + + if self.a < self.c: + x = self.add(x, 0) + + return x + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net51() + pi_net = Net51() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_call_subnet_which_include_ctrl_flow(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class SubNet(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + + def construct(self, x): + if self.a > self.b: + x = self.relu(x) + while self.b < 6: + x = self.add(x, 0) + self.b += 1 + return x + + class Net52(Cell): + def __init__(self): + super().__init__() + self.net_inside = SubNet() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b: + x = self.net_inside(x) + else: + x = self.sigmoid(x) + + if self.a > self.c: + x = self.add(x, 0) + else: + x = self.relu(x) + return x + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net52() + pi_net = Net52() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_combine_with_not_or_and(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class Net53(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.add = op.TensorAdd() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b and self.a < self.c: + x = self.relu(x) + if self.b > self.c or self.a < self.b: + x = self.add(x, x) + if not self.a < self.c: + x = self.sigmoid(x) + return x + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net53() + pi_net = Net53() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() + + +@pytest.mark.level6 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_if_by_if_combine_with_dynamic_shape(): + """ + Feature: PIJit + Description: create a net, with for in if + Expectation: No exception. + """ + class Net54(Cell): + def __init__(self): + super().__init__() + self.relu = op.ReLU() + self.sigmoid = op.Sigmoid() + self.add = op.TensorAdd() + self.expanddims1 = op.ExpandDims() + self.expanddims2 = op.ExpandDims() + a = np.full((1,), 5, dtype=np.float32) + self.a = Parameter(Tensor(a), name="a") + b = np.full((1,), 4, dtype=np.float32) + self.b = Parameter(Tensor(b), name="b") + c = np.full((1,), 7, dtype=np.float32) + self.c = Parameter(Tensor(c), name="c") + + def construct(self, x): + if self.a > self.b: + out = 1 + else: + out = 2 + if self.b < self.c: + out = self.expanddims1(x, out) + else: + out = self.expanddims2(x, out) + return out + + input_np_a = np.random.randn(2, 3, 4, 5).astype(np.float32) + ps_net = Net54() + pi_net = Net54() + fact = ParserFactory(ps_net, pi_net, input_np_a) + fact.forward_cmp() + fact.backward_cmp() diff --git a/tests/st/pi_jit/control_flow/test_control_while.py b/tests/st/pi_jit/control_flow/test_control_while.py new file mode 100644 index 0000000000000000000000000000000000000000..f8cf16f1404201dffb002f0a8af1a11949cec3fa --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while.py @@ -0,0 +1,260 @@ +import numpy as np +from mindspore.nn import Cell +from mindspore.common import dtype as ms +from mindspore import nn +from mindspore import Tensor +from mindspore.ops import composite as C +from mindspore import context, jit +from mindspore.common.parameter import Parameter +from mindspore.common.initializer import initializer +import mindspore.ops.operations as op +from ..share.utils import match_array +from ..share.grad import GradOfAllInputs +import pytest + + +class ControlOneWhileOneAddn(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, x, y, input_param): + out = input_param + while x < y: + out = self.addn([out, input_param, input_param]) + x = x + 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_addn_true(): + """ + Feature: PIJit + Description: create a net, test while, addn + Expectation: No exception. + """ + x = np.array(0).astype(np.float32) + y = np.array(2).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOneAddn.construct, mode="PSJit") + ps_net = ControlOneWhileOneAddn() + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOneAddn.construct, mode="PIJit") + pi_net = ControlOneWhileOneAddn() + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + match_array(ps_grad[1], pi_grad[1]) + match_array(ps_grad[2], pi_grad[2]) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_addn_false(): + """ + Feature: PIJit + Description: create a net, test while, addn False + Expectation: No exception. + """ + x = np.array(3).astype(np.float32) + y = np.array(2).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOneAddn.construct, mode="PSJit") + ps_net = ControlOneWhileOneAddn() + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOneAddn.construct, mode="PIJit") + pi_net = ControlOneWhileOneAddn() + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + match_array(ps_grad[1], pi_grad[1]) + match_array(ps_grad[2], pi_grad[2]) + + +class ControlOneWhileOneAddnOneAddn(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, x, y, input_param): + out = input_param + while x < y: + out = self.addn([out, input_param, input_param]) + x = x + 1 + out_me = self.addn([out, input_param]) + return out_me + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_addn_addn_true(): + """ + Feature: PIJit + Description: create a net, test while, True, then addn + Expectation: No exception. + """ + x = np.array(1).astype(np.float32) + y = np.array(2).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneWhileOneAddnOneAddn() + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneWhileOneAddnOneAddn() + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + match_array(ps_grad[1], pi_grad[1]) + match_array(ps_grad[2], pi_grad[2]) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_addn_addn_false(): + """ + Feature: PIJit + Description: create a net, test while, False, then addn + Expectation: No exception. + """ + x = np.array(3).astype(np.float32) + y = np.array(2).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOneAddnOneAddn.construct, mode="PSJit") + ps_net = ControlOneWhileOneAddnOneAddn() + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(ps_net, sens_param=False) + ps_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOneAddnOneAddn.construct, mode="PIJit") + pi_net = ControlOneWhileOneAddnOneAddn() + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + grad_net = GradOfAllInputs(pi_net, sens_param=False) + pi_grad = grad_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + match_array(ps_grad[1], pi_grad[1]) + match_array(ps_grad[2], pi_grad[2]) + + +class ControlOneWhileOnePara(Cell): + def __init__(self, input_shape): + super().__init__() + self.assign = op.Assign() + self.inputdata = Parameter(initializer(1, input_shape, ms.float32), name="global_step") + + def construct(self, x, y, input_param): + out = input_param + while x < y: + inputdata = self.inputdata + x = x + 1 + out = self.assign(inputdata, input_param) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_para_true(): + """ + Feature: PIJit + Description: create a net, test while, assign, True + Expectation: No exception. + """ + x = np.array(1).astype(np.float32) + y = np.array(0).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOnePara.construct, mode="PSJit") + ps_net = ControlOneWhileOnePara(input_shape) + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOnePara.construct, mode="PIJit") + pi_net = ControlOneWhileOnePara(input_shape) + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_while_para_false(): + """ + Feature: PIJit + Description: create a net, test while, assign, False + Expectation: No exception. + """ + x = np.array(3).astype(np.float32) + y = np.array(1).astype(np.float32) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneWhileOnePara.construct, mode="PSJit") + ps_net = ControlOneWhileOnePara(input_shape) + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneWhileOnePara.construct, mode="PIJit") + pi_net = ControlOneWhileOnePara(input_shape) + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) + + +class ControlOneBoolWhileOneAddn(Cell): + def __init__(self): + super().__init__() + self.addn = op.AddN() + + def construct(self, x, y, input_param): + out = input_param + while x: + out = self.addn([input_param, input_param, input_param]) + x = y + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_ctrl_bool_while_addn_true(): + """ + Feature: PIJit + Description: create a net, test while, condition bool + Expectation: No exception. + """ + x = np.array(True).astype(np.bool) + y = np.array(False).astype(np.bool) + input_shape = (512, 512, 7, 7) + input_param = np.random.randn(*input_shape).astype(np.float32) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=ControlOneBoolWhileOneAddn.construct, mode="PSJit") + ps_net = ControlOneBoolWhileOneAddn() + out_ps = ps_net(Tensor(x), Tensor(y), Tensor(input_param)) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=ControlOneBoolWhileOneAddn.construct, mode="PIJit") + pi_net = ControlOneBoolWhileOneAddn() + out_pi = pi_net(Tensor(x), Tensor(y), Tensor(input_param)) + match_array(out_pi.asnumpy(), out_ps.asnumpy()) diff --git a/tests/st/pi_jit/control_flow/test_control_while_break.py b/tests/st/pi_jit/control_flow/test_control_while_break.py new file mode 100644 index 0000000000000000000000000000000000000000..a5b377c60271661c644b3711b14b349c6b7c1117 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_break.py @@ -0,0 +1,575 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +import mindspore.ops.functional as F +from mindspore.common.parameter import Parameter +from .ctrl_factory import CtrlFactory +import numpy as np +import pytest + + +class CtrlWhileIfBreak(Cell): + def __init__(self): + super().__init__() + self.loop = Parameter(Tensor(1, dtype.float32), name="loop") + + def construct(self, x): + while self.loop < 5: + self.loop += 1 + if x > 1: + x += 1 + break + x += 1 + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_if_break_not_relevant_gt(): + ''' + Description: test control flow, loop is parameter in init + if-break variable is x, different from loop, use cmp operator > + Expectation: No exception. + ''' + fact = CtrlFactory(-2) + ps_net = CtrlWhileIfBreak() + pi_net = CtrlWhileIfBreak() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakIn(Cell): + def __init__(self): + super().__init__() + self.addn = P.AddN() + + def construct(self, x): + s = x + t = x + 1 + tensor_list = [x, x] + while len(tensor_list) < 4: + tensor_list.append(x) + a = self.addn(tensor_list) + x += 1 + if t in tensor_list: + break + s += a + return s + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_break(): + ''' + Description: test control flow while break, use member operator in + Expectation: No exception. + ''' + fact = CtrlFactory(-2) + ps_net = CtrlWhileBreakIn() + pi_net = CtrlWhileBreakIn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileCast(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x, loop): + while loop >= 3: + loop -= 2 + if self.cast(x, dtype.bool_): + break + return loop + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_cast(): + ''' + Description: test control flow, use op cast + Expectation: No exception. + ''' + fact = CtrlFactory(1, 7) + ps_net = CtrlWhileCast() + pi_net = CtrlWhileCast() + fact.compare(ps_net, pi_net) + + +class CtrlOnceBreak(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + self.add(x, x) + while x > 2: + if x > 1: + pass + x = x + 1 + break + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_once_break(): + ''' + Description: test control flow, while once break + Expectation: No exception. + ''' + fact = CtrlFactory(-2) + ps_net = CtrlOnceBreak() + pi_net = CtrlOnceBreak() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x < 2: + x += 1 + if x >= 2: + break + elif x == 1: + x = self.mul(x, x) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_in_if(): + ''' + Description: test control flow, while, if-elif, break in if + Expectation: No exception. + ''' + fact = CtrlFactory(-3) + ps_net = CtrlWhileBreakInIf() + pi_net = CtrlWhileBreakInIf() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + out = self.mul(x, x) + while x < 2: + x += 2 + if x <= 0: + out += x + elif x != 1: + break + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_break_in_elif(): + ''' + Description: test control flow, if-elif in while, break in elif + Expectation: No exception. + ''' + fact = CtrlFactory(-3) + ps_net = CtrlWhileBreakInElif() + pi_net = CtrlWhileBreakInElif() + fact.compare(ps_net, pi_net) + + +class CtrlElifTwoBreak(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x > 0: + x -= 1 + if x < 2: + break + elif x < 1: + break + out = self.mul(t, out) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_elif_two_break(): + ''' + Description: test control flow, if-elif in while, both break + Expectation: No exception. + ''' + fact = CtrlFactory(3, [1, 2, 3]) + ps_net = CtrlElifTwoBreak() + pi_net = CtrlElifTwoBreak() + fact.compare(ps_net, pi_net) + + +class CtrlElifBreakOnce(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 3: + x -= 2 + if x > 4: + x -= 1 + elif x > 6: + x += 1 + out = self.mul(out, t) + break + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_once_elif_break(): + ''' + Description: test control flow, if-elif in while, both break + Expectation: No exception. + ''' + fact = CtrlFactory(8, [2, 3, 4]) + ps_net = CtrlElifBreakOnce() + pi_net = CtrlElifBreakOnce() + fact.compare(ps_net, pi_net) + + +class CtrlIfBreakElse(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, y, t): + out = t + while x + y > 4: + if x > 1 and y > 1: + break + elif x > 4 or y > 2: + out += t + else: + out = self.mul(out, t) + x -= 2 + y += 1 + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_break_in_if(): + ''' + Description: test control flow, if-elif-else in while + Expectation: No exception. + ''' + x = 9 + y = -2 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, y, t) + ps_net = CtrlIfBreakElse() + pi_net = CtrlIfBreakElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhileElseBreakInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 4: + x += 1 + if not x > 1: + out += t + elif 1 <= x < 2: + break + else: + out = self.mul(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_elif_break(): + ''' + Description: test control flow, if-elif-else in while, break in elif + Expectation: No exception. + ''' + x = -1 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, t) + ps_net = CtrlWhileElseBreakInElif() + pi_net = CtrlWhileElseBreakInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInIfElif(Cell): + def __init__(self): + super().__init__() + self.square = P.Square() + self.add = P.Add() + + def construct(self, x): + while x < 5: + x += 2 + if self.double(x) < 3: + break + elif self.sqr(x) < 5: + break + else: + x -= 1 + return x + + def double(self, x): + return self.add(x, x) + + def sqr(self, x): + return self.square(x) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_func(): + ''' + Description: test control flow, condition func(x), if-elif break + Expectation: No exception. + ''' + fact = CtrlFactory(3) + ps_net = CtrlWhileBreakInIfElif() + pi_net = CtrlWhileBreakInIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBreakInElif(Cell): + def __init__(self): + super().__init__() + self.reduce = P.ReduceSum() + self.max = P.ReduceMax() + + def construct(self, x, y): + while y < 4: + y += 1 + if self.reduce(x) > 2: + x[1] -= 2 + elif self.reduce(x) > 1: + break + elif self.max(x) > 2: + y += 1 + else: + x[0] += 1 + x = x * y + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_break_func2(): + ''' + Description: test control flow, condition func(x), if-elif break + Expectation: No exception. + ''' + x = [-2, -3, 4] + y = 2 + fact = CtrlFactory(x, y) + ps_net = CtrlWhile2ElifBreakInElif() + pi_net = CtrlWhile2ElifBreakInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBreakInElse(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, t, x): + self.add(t, t) + while t < 20: + t += 1 + if x.all(): + t += 4 + elif x.any(): + t += 3 + elif not x.all(): + t += 2 + else: + break + return t + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_else(): + ''' + Description: test control flow, if-elif-elif-else in while + break in else, use tensor.any(), tensor.all() + Expectation: No exception. + ''' + t = 0 + x = [True, False, False] + fact = CtrlFactory(t) + fact.ms_input.append(Tensor(x, dtype.bool_)) + ps_net = CtrlWhile2ElifBreakInElse() + pi_net = CtrlWhile2ElifBreakInElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBInIfElif(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x): + while self.cast(x, dtype.bool_): + x -= 1 + if x < -1: + break + elif x < 3: + break + elif x < 9: + x -= 1 + else: + x -= 2 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_ifelif(): + ''' + Description: test control flow, if-elif-elif-else in while + break in if and elif + Expectation: No exception. + ''' + x = 12 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBInIfElif() + pi_net = CtrlWhile2ElifBInIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBreakIfElif(Cell): + def __init__(self): + super().__init__() + self.sqrt = F.sqrt + self.square = F.square + + def construct(self, x): + while x < 20: + if self.sqrt(x) > 4: + break + elif x > 10: + break + elif self.square(x) > 4: + x += 3 + else: + x += 2 + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_if_elif_usef(): + ''' + Description: test control flow, if-elif-elif-else in while + break in if and elif, use F.sqrt + Expectation: No exception. + ''' + x = 1 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBreakIfElif() + pi_net = CtrlWhile2ElifBreakIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBreakInIfElse(Cell): + def __init__(self, t): + super().__init__() + self.assign = P.Assign() + self.weight = Parameter(Tensor(t, dtype.float32), name="w") + + def construct(self, x): + while x < 2: + x += 1 + if x < -4: + break + elif x < -3: + self.assign(self.weight, x) + elif x < 0: + x += 2 + else: + break + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_if_else(): + ''' + Description: test control flow, if-elif-elif-else in while + break in if and else, assign parameter + Expectation: No exception. + ''' + x = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBreakInIfElse() + pi_net = CtrlWhile2ElifBreakInIfElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBreakInElifElse(Cell): + def __init__(self): + super().__init__() + self.print = P.Print() + + def construct(self, x): + while x < 20: + if x > 4: + self.print(x) + elif x >= 3: + x += 1 + elif x * 2 > 4: + break + else: + break + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_if_else2(): + ''' + Description: test control flow, if-elif-elif-else in while + break in elif2 and else, print in if + Expectation: No exception. + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBreakInElifElse() + pi_net = CtrlWhile2ElifBreakInElifElse() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_break_2.py b/tests/st/pi_jit/control_flow/test_control_while_break_2.py new file mode 100644 index 0000000000000000000000000000000000000000..bcee11dc706400cd5cffca3f4046cfa80e91ca83 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_break_2.py @@ -0,0 +1,46 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore import context, jit +from ..share.utils import allclose_nparray +import pytest + + +class CtrlWhile2ElifBreakInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x[2] < 4: + x[2] -= 1 + if x[0] > 2: + break + elif x[1] > 2: + x[2] += 1 + elif x[2] > 2: + x[1] += 1 + else: + x = self.mul(x, x) + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_break_in_if(): + ''' + Description: test control flow, 2elif in while, break in if + use tensor get_item, set_item as condition, torch not supports grad + graph mode set item change inputs, cause load mindir endless loop + Expectation: no expectation + ''' + x = [1, 2, 3] + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhile2ElifBreakInIf.construct, mode="PSJit") + ps_net = CtrlWhile2ElifBreakInIf() + ps_out = ps_net(Tensor(x, dtype.float32)) + pi_net = CtrlWhile2ElifBreakInIf() + pi_out = pi_net(Tensor(x, dtype.float32)) + allclose_nparray(ps_out.asnumpy(), pi_out.asnumpy(), 0.001, 0.001) diff --git a/tests/st/pi_jit/control_flow/test_control_while_by_for_break.py b/tests/st/pi_jit/control_flow/test_control_while_by_for_break.py new file mode 100644 index 0000000000000000000000000000000000000000..33449f02046d6973d16684a33bfdb9caa2bb3c00 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_by_for_break.py @@ -0,0 +1,208 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore.common.parameter import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileForBreakOne(Cell): + def __init__(self, t): + super().__init__() + self.param = Parameter(Tensor(t, dtype.float32), name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + if x > 1: + break + for _ in range(3): + self.param += 2 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_param_break_in_while(): + ''' + Description: test control flow, while by for, break in while + change parameter + Expectation: no expectation + ''' + t = 2 + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForBreakOne(t) + pi_net = CtrlWhileForBreakOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileForBreakAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + break + for _ in range(3): + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_break_no_param(): + ''' + Description: test control flow, while by for, break in while + no parameter + Expectation: no expectation + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForBreakAdd() + pi_net = CtrlWhileForBreakAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + out = self.add(out, x) + x -= 1 + for _ in range(3): + x -= 1 + if x < 0: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_break_in_for_x(): + ''' + Description: test control flow, while by for, break in for + no parameter, block while change condition of for + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakForX() + pi_net = CtrlWhileBreakForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + x -= 1 + out = self.add(out, x) + for i in range(5): + out = self.add(out, x) + if i > 2: + break + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_break_in_for(): + ''' + Description: test control flow, while by for, break in for + no parameter + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakFor() + pi_net = CtrlWhileBreakFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakForP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + for _ in range(3): + self.param += 2 + if self.param > 2: + break + x = self.add(x, self.param) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_break_in_for_param(): + ''' + Description: test control flow, while by for, break in for + with parameter + Expectation: no expectation + ''' + x = 1 + t = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakForP(t) + pi_net = CtrlWhileBreakForP(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakForN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + out = self.add(out, x) + if x > 1: + break + x += 1 + for _ in range(3): + out = self.add(out, x) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_break_in_for_no(): + ''' + Description: test control flow, while by for, break in while + no parameter + Expectation: no expectation + ''' + x = -3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakForN() + pi_net = CtrlWhileBreakForN() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_by_for_continue.py b/tests/st/pi_jit/control_flow/test_control_while_by_for_continue.py new file mode 100644 index 0000000000000000000000000000000000000000..319b722fdc81223f1eea958ba3853f55265ab2dd --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_by_for_continue.py @@ -0,0 +1,210 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore.common import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileForContinueOne(Cell): + def __init__(self, t): + super().__init__() + self.param = Parameter(Tensor(t, dtype.float32), name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + if x > 1: + continue + for _ in range(3): + self.param += 2 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_param_continue_in_while(): + ''' + Description: test control flow, while by for + continue in while, change parameter + Expectation: no expectation + ''' + t = 2 + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForContinueOne(t) + pi_net = CtrlWhileForContinueOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileForContinueAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + continue + for _ in range(3): + out = self.add(out, x) + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_continue_no_param(): + ''' + Description: test control flow, while by for + continue in while, without parameter + Expectation: no expectation + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForContinueAdd() + pi_net = CtrlWhileForContinueAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + out = self.add(out, x) + x -= 1 + for _ in range(3): + x -= 1 + if x < 0: + continue + out = self.add(out, x) + return out + + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_continue_in_for_x(): + ''' + Description: test control flow, while for continue + continue in while, change parameter + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueForX() + pi_net = CtrlWhileContinueForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + x -= 1 + out = self.add(out, x) + for i in range(5): + out = self.add(out, x) + if i > 2: + continue + return out + + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_continue_in_for(): + ''' + Description: test control flow, while by for + continue in for + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueFor() + pi_net = CtrlWhileContinueFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueForP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + for _ in range(3): + self.param += 2 + if self.param > 2: + continue + x = self.add(x, self.param) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_continue_in_for_param(): + ''' + Description: test control flow, while by for + continue in for, change parameter + Expectation: no expectation + ''' + x = 1 + t = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueForP(t) + pi_net = CtrlWhileContinueForP(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueForN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + x += 1 + if x > 1: + continue + out = self.add(out, x) + for _ in range(3): + out = self.add(out, x) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_continue_in_for_no(): + ''' + Description: test control flow, while by for + continue in while, without parameter + Expectation: no expectation + ''' + x = -3 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueForN() + pi_net = CtrlWhileContinueForN() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_by_return.py b/tests/st/pi_jit/control_flow/test_control_while_by_return.py new file mode 100644 index 0000000000000000000000000000000000000000..05826a78456fce5ef882c15a8a579a343a1eeb6b --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_by_return.py @@ -0,0 +1,208 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore.common import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileForReturnOne(Cell): + def __init__(self, t): + super().__init__() + self.param = Parameter(Tensor(t, dtype.float32), name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + if x > 1: + return x + for _ in range(3): + self.param += 2 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_param_return_in_while(): + ''' + Description: test control flow, while by for + return in while, with parameter + Expectation: no expectation + ''' + t = 2 + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForReturnOne(t) + pi_net = CtrlWhileForReturnOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileForReturnAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + return out + for _ in range(3): + out = self.add(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_return_no_param(): + ''' + Description: test control flow, while by for + return in while, without parameter + Expectation: no expectation + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileForReturnAdd() + pi_net = CtrlWhileForReturnAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + out = self.add(out, x) + x -= 1 + for _ in range(3): + x -= 1 + if x < 0: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_return_in_for_x(): + ''' + Description: test control flow, while by for + return in for, change x + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnForX() + pi_net = CtrlWhileReturnForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x > 1: + x -= 1 + out = self.add(out, x) + for i in range(5): + out = self.add(out, x) + if i > 2: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_return_in_for(): + ''' + Description: test control flow, while by for + return in for, not change x + Expectation: no expectation + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnFor() + pi_net = CtrlWhileReturnFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnForP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + while x < 5: + self.param += 1 + x += 1 + for _ in range(3): + self.param += 2 + if self.param > 2: + return x + x = self.add(x, self.param) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_return_in_for_param(): + ''' + Description: test control flow, while by for + return in for, with parameter + Expectation: no expectation + ''' + x = 1 + t = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnForP(t) + pi_net = CtrlWhileReturnForP(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnForN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + while x < 5: + out = self.add(out, x) + if x > 1: + return out + x += 1 + for _ in range(3): + out = self.add(out, x) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_for_return_in_for_no(): + ''' + Description: test control flow, while by for + return in while, without parameter + Expectation: no expectation + ''' + x = -3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnForN() + pi_net = CtrlWhileReturnForN() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_continue_2.py b/tests/st/pi_jit/control_flow/test_control_while_continue_2.py new file mode 100644 index 0000000000000000000000000000000000000000..fa02cfa69e5fd66dc98fb727637c39fe812c1531 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_continue_2.py @@ -0,0 +1,67 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore import context, jit +from ..share.utils import allclose_nparray +import pytest + + +class CtrlWhileContinueInElse(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, t, x, y): + self.mul(t, t) + while t > 2: + t -= 1 + if (x and y) or not x: + t -= 1 + elif x or y: + x = not x + t -= 2 + else: + continue + return t + + +class CtrlWhile2ElifContinueInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x[2] < 4: + x[2] -= 1 + if x[0] > 2: + continue + elif x[1] > 2: + x[2] += 1 + elif x[2] > 2: + x[1] += 1 + else: + x = self.mul(x, x) + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_continue_in_if(): + ''' + Description: test control flow, 2elif in while, continue in if + use tensor get_item, set_item as condition + Expectation: no expectation + ''' + x = [1, 2, 3] + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhile2ElifContinueInIf.construct, mode="PSJit") + ps_net = CtrlWhile2ElifContinueInIf() + ps_out = ps_net(Tensor(x, dtype.float32)) + + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhile2ElifContinueInIf.construct, mode="PIJit") + pi_net = CtrlWhile2ElifContinueInIf() + pi_out = pi_net(Tensor(x, dtype.float32)) + allclose_nparray(ps_out.asnumpy(), pi_out.asnumpy(), 0.001, 0.001) diff --git a/tests/st/pi_jit/control_flow/test_control_while_in_for_break.py b/tests/st/pi_jit/control_flow/test_control_while_in_for_break.py new file mode 100644 index 0000000000000000000000000000000000000000..fba664f6b11d5b3e9dfd08ceef074fa24837cb91 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_in_for_break.py @@ -0,0 +1,307 @@ +from mindspore.nn import Cell +import mindspore.ops.operations as P +from mindspore.common.parameter import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileInForBreakX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x, t): + out = t + for _ in range(4): + out = self.add(out, t) + x += 1 + while x > 4: + x -= 1 + out = self.add(out, t) + if x < 2: + break + return out + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_for_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in for + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 6 + t = [1, 2, 3] + fact = CtrlFactory(x, t) + ps_net = CtrlWhileInForBreakX() + pi_net = CtrlWhileInForBreakX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForBreak(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(10): + out = self.add(out, x) + if i > 5: + break + while x > 3: + out = self.add(out, x) + x -= 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in for + 2. not change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 9 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForBreak() + pi_net = CtrlWhileInForBreak() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForBreakOne(Cell): + def __init__(self, tensor): + super().__init__() + self.param = Parameter(tensor, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if x > 1: + break + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_while_param_break_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in for + 2. change parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + t = 2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForBreakOne(t) + pi_net = CtrlWhileInForBreakOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForBreakAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + break + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_while_no_param(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in for + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForBreakAdd() + pi_net = CtrlWhileInForBreakAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(3): + x -= i + while x > 1: + out = self.add(out, x) + x -= 1 + if x < 0: + break + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_while_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in while + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakInForX() + pi_net = CtrlWhileBreakInForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + tmp = x + for _ in range(5): + out = self.add(out, x) + while x > 1: + x -= 1 + out = self.add(out, x) + if x > 2: + break + x = tmp + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_while(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in while + 2. not change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakInFor() + pi_net = CtrlWhileBreakInFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInForP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if self.param > 2: + break + x = self.add(x, self.param) + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_while_param(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in while + 2. change parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 1 + t = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakInForP(t) + pi_net = CtrlWhileBreakInForP(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileBreakInForN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + if x > 1: + break + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_break_in_while_no(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, break in while + 2. change parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -3 + fact = CtrlFactory(x) + ps_net = CtrlWhileBreakInForN() + pi_net = CtrlWhileBreakInForN() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_in_for_continue.py b/tests/st/pi_jit/control_flow/test_control_while_in_for_continue.py new file mode 100644 index 0000000000000000000000000000000000000000..87bcf08f47780d74bef588c2e0ceebd751a9b06a --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_in_for_continue.py @@ -0,0 +1,250 @@ +from mindspore.nn import Cell +import mindspore.ops.operations as P +from mindspore.common.parameter import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileInForContinueX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x, t): + out = x + for _ in range(4): + out = self.add(out, t) + x += 1 + while x > 4: + x -= 1 + out = self.add(out, t) + if x < 2: + continue + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_continue_in_for_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in for + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 6 + t = [1, 2, 3] + fact = CtrlFactory(x, t) + ps_net = CtrlWhileInForContinueX() + pi_net = CtrlWhileInForContinueX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForContinue(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(10): + out = self.add(out, x) + if i > 5: + continue + while x > 3: + out = self.add(out, x) + x -= 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_continue_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in for + 2. not change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 9 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForContinue() + pi_net = CtrlWhileInForContinue() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForContinueOne(Cell): + def __init__(self, tensor): + super().__init__() + self.param = Parameter(tensor, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if x > 1: + continue + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_while_param_continue_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in for + 2. change parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + t = 2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForContinueOne(t) + pi_net = CtrlWhileInForContinueOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForContinueAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + continue + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_continue_in_while_no_param(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in for + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForContinueAdd() + pi_net = CtrlWhileInForContinueAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(3): + x -= i + while x > 1: + out = self.add(out, x) + x -= 1 + if x < 0: + continue + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_continue_in_while_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in while + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueInForX() + pi_net = CtrlWhileContinueInForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + tmp = x + for _ in range(5): + out = self.add(out, x) + while x > 1: + x -= 1 + out = self.add(out, x) + if x > 2: + continue + x = tmp + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_continue_in_while(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, continue in while + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueInFor() + pi_net = CtrlWhileContinueInFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInForP(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.param = Parameter(t, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if self.param > 2: + continue + x = self.add(x, self.param) + return x diff --git a/tests/st/pi_jit/control_flow/test_control_while_in_for_return.py b/tests/st/pi_jit/control_flow/test_control_while_in_for_return.py new file mode 100644 index 0000000000000000000000000000000000000000..50168f1704b6c427f3e99f5d2fbe387ba26d05cf --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_in_for_return.py @@ -0,0 +1,269 @@ +from mindspore.nn import Cell +import mindspore.ops.operations as P +from mindspore.common.parameter import Parameter +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileInForReturnX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x, t): + out = t + for _ in range(4): + out = self.add(out, t) + x += 1 + while x > 4: + x -= 1 + out = self.add(out, t) + if x < 2: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_for_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in for + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 6 + t = [1, 2, 3] + fact = CtrlFactory(x, t) + ps_net = CtrlWhileInForReturnX() + pi_net = CtrlWhileInForReturnX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForReturn(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(10): + out = self.add(out, x) + if i > 5: + return out + while x > 3: + out = self.add(out, x) + x -= 1 + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in for + 2. not change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 9 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForReturn() + pi_net = CtrlWhileInForReturn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForReturnOne(Cell): + def __init__(self, tensor): + super().__init__() + self.param = Parameter(tensor, name="p") + + def construct(self, x): + for _ in range(3): + self.param += 2 + while x < 5: + self.param += 1 + x += 1 + if x > 1: + return x + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_for_in_while_param_return_in_for(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in for + 2. change parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + t = 2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForReturnOne(t) + pi_net = CtrlWhileInForReturnOne(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInForReturnAdd(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + x += 1 + if x > 1: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_while_no_param(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in for + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -2 + fact = CtrlFactory(x) + ps_net = CtrlWhileInForReturnAdd() + pi_net = CtrlWhileInForReturnAdd() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInForX(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for i in range(3): + x -= i + while x > 1: + out = self.add(out, x) + x -= 1 + if x < 0: + return out + out = self.add(out, x) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_while_x(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in while + 2. change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnInForX() + pi_net = CtrlWhileReturnInForX() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInFor(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + tmp = x + for _ in range(5): + out = self.add(out, x) + while x > 1: + x -= 1 + out = self.add(out, x) + if x > 2: + return out + x = tmp + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_for_nochange(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in while + 2. not change x + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnInFor() + pi_net = CtrlWhileReturnInFor() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInForN(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + out = x + for _ in range(3): + out = self.add(out, x) + while x < 5: + out = self.add(out, x) + if x > 1: + return x + x += 1 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_for_return_in_while_no(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in for, return in while + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = -3 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnInForN() + pi_net = CtrlWhileReturnInForN() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_in_while_bcr.py b/tests/st/pi_jit/control_flow/test_control_while_in_while_bcr.py new file mode 100644 index 0000000000000000000000000000000000000000..b428365b93c4b2642ace236bc1b291051e4c3807 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_in_while_bcr.py @@ -0,0 +1,243 @@ +from mindspore.nn import Cell +from mindspore.common.parameter import Parameter +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +import numpy as np +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileInWhileBC(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x < 2: + self.assignadd(self.para, y) + x += 1 + if x < 4: + out = self.add(out, out) + break + while x + 1 > 1: + x -= 1 + if x < 7: + out = self.mul(out, self.para) + continue + out = self.add(out, y) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_while_in_if_break_continue(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in while, break out, continue in + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + input_np = np.random.randn(3, 2).astype(np.float32) + x = 1 + t = input_np + y = input_np + fact = CtrlFactory(x, y) + ps_net = CtrlWhileInWhileBC(t) + pi_net = CtrlWhileInWhileBC(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInWhileCB(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x < 2: + self.assignadd(self.para, y) + x += 1 + if x < 4: + out = self.add(out, out) + continue + while x + 1 > 1: + x -= 1 + if x < 7: + out = self.mul(out, self.para) + break + out = self.add(out, y) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_if_continue_break(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in while, continue out, break in + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + input_np = np.random.randn(3, 2).astype(np.float32) + x = 1 + t = input_np + y = input_np + fact = CtrlFactory(x, y) + ps_net = CtrlWhileInWhileCB(t) + pi_net = CtrlWhileInWhileCB(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInWhileBR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x > -4: + x -= 3 + self.assignadd(self.para, y) + if x < 0: + out = self.mul(out, out) + break + while x > -4: + x -= 1 + out = self.add(out, y) + if x < -1: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_while_break_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in while, break out, return in + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + input_np = np.random.randn(3, 2).astype(np.float32) + x = 5 + t = input_np + y = input_np + fact = CtrlFactory(x, y) + ps_net = CtrlWhileInWhileBR(t) + pi_net = CtrlWhileInWhileBR(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInWhileRB(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.assignadd = P.AssignAdd() + self.para = Parameter(t, name="a") + + def construct(self, x, y): + out = self.add(y, y) + while x > -4: + x -= 3 + self.assignadd(self.para, y) + if x < 0: + out = self.mul(out, out) + return out + while x > -4: + x -= 1 + out = self.add(out, y) + if x < -1: + break + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_while_return_break(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in while, return out, break in + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + input_np = np.random.randn(3, 2).astype(np.float32) + x = 5 + t = input_np + y = input_np + fact = CtrlFactory(x, y) + ps_net = CtrlWhileInWhileRB(t) + pi_net = CtrlWhileInWhileRB(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhileInWhileCR(Cell): + def __init__(self, t): + super().__init__() + self.add = P.Add() + self.mul = P.Mul() + self.para = Parameter(Tensor(t, dtype.float32), name="a") + + def construct(self, x, y): + out = self.mul(y, y) + while x != 3: + while x > 5: + x += 1 + if x > 3: + x = x - 1 + return out + out = self.add(out, self.para) + x = x + 1 + continue + out = self.mul(out, y) + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_while_continue_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with while in while, return in, continue out + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + input_np = np.random.randn(3, 2).astype(np.float32) + x = 2 + t = 8 + y = input_np + fact = CtrlFactory(x, y) + ps_net = CtrlWhileInWhileCR(t) + pi_net = CtrlWhileInWhileCR(t) + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_return.py b/tests/st/pi_jit/control_flow/test_control_while_return.py new file mode 100644 index 0000000000000000000000000000000000000000..ad59987419548a25c14388f7ba1f3211009f9737 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_return.py @@ -0,0 +1,708 @@ +from mindspore.nn import Cell +from mindspore.common.parameter import Parameter +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +import mindspore.ops.functional as F +import numpy as np +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileIfReturn(Cell): + def __init__(self): + super().__init__() + self.loop = Parameter(Tensor(1, dtype.float32), name="loop") + + def construct(self, x): + while self.loop < 5: + self.loop += 1 + if x > 1: + x += 1 + return x + x += 1 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_if_return_not_relevant_gt(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with return in while, condition is parameter + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(-2) + ps_net = CtrlWhileIfReturn() + pi_net = CtrlWhileIfReturn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnIn(Cell): + def __init__(self): + super().__init__() + self.addn = P.AddN() + + def construct(self, x): + s = x + t = x + 1 + tensor_list = [x, x] + while len(tensor_list) < 4: + tensor_list.append(x) + a = self.addn(tensor_list) + x += 1 + if t in tensor_list: + return s + s += a + return s + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with return in while, use member op in + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(2) + ps_net = CtrlWhileReturnIn() + pi_net = CtrlWhileReturnIn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileCast(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x, loop): + while loop >= 3: + loop -= 2 + if self.cast(x, dtype.bool_): + return loop + return loop + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_return_cast(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with return in while, use op cast + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(1, 7) + ps_net = CtrlWhileCast() + pi_net = CtrlWhileCast() + fact.compare(ps_net, pi_net) + + +class CtrlOnceReturn(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, x): + self.add(x, x) + while x > 2: + if x > 1: + pass + x = x + 1 + return x + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_once_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with return in while, once out + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(-2) + ps_net = CtrlOnceReturn() + pi_net = CtrlOnceReturn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x < 2: + x += 1 + if x >= 2: + res = x + break + elif x == 1: + x = self.mul(x, x) + return res + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_return_in_if(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif in while, return in if + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(-3) + ps_net = CtrlWhileReturnInIf() + pi_net = CtrlWhileReturnInIf() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + out = self.mul(x, x) + while x < 2: + x += 2 + if x <= 0: + out += x + elif x != 1: + return out + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_return_in_elif(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif in while, return in elif + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(-3) + ps_net = CtrlWhileReturnInElif() + pi_net = CtrlWhileReturnInElif() + fact.compare(ps_net, pi_net) + + +class CtrlElifReturnOnce(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 3: + x -= 2 + if x > 4: + x -= 1 + elif x > 6: + x += 1 + out = self.mul(out, t) + return out + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_once_elif_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif in while, return at last + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(8, [2, 3, 4]) + ps_net = CtrlElifReturnOnce() + pi_net = CtrlElifReturnOnce() + fact.compare(ps_net, pi_net) + + +class CtrlIfReturnElse(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, y, t): + out = t + while x + y > 4: + if x > 1 and y > 1: + res = out + break + elif x > 4 or y > 2: + out += t + else: + out = self.mul(out, t) + x -= 2 + y += 1 + return res + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_return_in_if(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif-else in while, return in if + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + x = 9 + y = -2 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, y, t) + ps_net = CtrlIfReturnElse() + pi_net = CtrlIfReturnElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhileElseReturnInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 4: + x += 1 + if not x > 1: + out += t + elif x >= 1 and x < 2: + res = out + break + else: + out = self.mul(out, x) + return res + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_elif_return(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif-else in while, return in elif, use and not + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + use and not + ''' + x = -1 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, t) + ps_net = CtrlWhileElseReturnInElif() + pi_net = CtrlWhileElseReturnInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInIfElif(Cell): + def __init__(self): + super().__init__() + self.square = P.Square() + self.add = P.Add() + + def construct(self, x): + while x < 5: + x += 2 + if self.double(x) < 3: + res = x + break + elif self.sqr(x) < 5: + res = x + break + else: + x -= 1 + return res + + def double(self, x): + return self.add(x, x) + + def sqr(self, x): + return self.square(x) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_return_func(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif-else in while, return in if elif, condition of func + 2. no parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as psjit + ''' + fact = CtrlFactory(3) + ps_net = CtrlWhileReturnInIfElif() + pi_net = CtrlWhileReturnInIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInIfElse(Cell): + def __init__(self, a): + super().__init__() + self.param = Parameter(Tensor(a, dtype.float32), name="a") + self.add = P.Add() + + def construct(self, x): + out = x + while self.param > -5 and x > -5: + if self.param > 0: + res = out + break + elif self.param > -3: + out = self.add(out, x) + else: + res = out + break + self.param -= 1 + x -= 1 + return res + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_return_in_if_else(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif-else in while, return in if else + 2. parameter as condition + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + a = -7 + x = -7 + fact = CtrlFactory(x) + ps_net = CtrlWhileReturnInIfElse(a) + pi_net = CtrlWhileReturnInIfElse(a) + fact.compare(ps_net, pi_net) + + +class CtrlWhileReturnInElifElse(Cell): + def __init__(self, tensor): + super().__init__() + self.a = Parameter(tensor, name="t") + self.mul = P.Mul() + + def construct(self, x): + while x > 5: + if x > self.a: + x -= 2 + elif x == self.a: + return x + else: + return x + x -= 1 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_return_in_elif_else(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-elif-else in while, return in elif else + 2. parameter as condition + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + t = Tensor(3, dtype.float32) + fact = CtrlFactory(7) + ps_net = CtrlWhileReturnInElifElse(t) + pi_net = CtrlWhileReturnInElifElse(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifReturnInElif(Cell): + def __init__(self): + super().__init__() + self.reduce = P.ReduceSum() + self.max = P.ReduceMax() + + def construct(self, x, y): + while y < 4: + y += 1 + if self.reduce(x) > 2: + x[1] -= 2 + elif self.reduce(x) > 1: + return x + elif self.max(x) > 2: + y += 1 + else: + x[0] += 1 + x = x * y + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_elif(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-2elif-else in while, return in elif + 2. use sum + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + x = [-2, -3, 4] + y = 2 + fact = CtrlFactory(x, y) + ps_net = CtrlWhile2ElifReturnInElif() + pi_net = CtrlWhile2ElifReturnInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifReturnInElse(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, t, x): + self.add(t, t) + while t < 20: + t += 1 + if x.all(): + t += 4 + elif x.any(): + t += 3 + elif not x.all(): + t += 2 + else: + return t + return t + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_else(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with iwhile 2elif, return in else + 2. use sum + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + t = 0 + x = [True, False, False] + fact = CtrlFactory(t) + fact.ms_input.append(Tensor(x, dtype.bool_)) + ps_net = CtrlWhile2ElifReturnInElse() + pi_net = CtrlWhile2ElifReturnInElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBInIfElif(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x): + while self.cast(x, dtype.bool_): + x -= 1 + if x < -1: + return x + elif x < 3: + return x + elif x < 9: + x -= 1 + else: + x -= 2 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_ifelif(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-2elif-else in while, return in if elif + 2. parameter as condition + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + x = 12 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBInIfElif() + pi_net = CtrlWhile2ElifBInIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifReturnIfElif(Cell): + def __init__(self): + super().__init__() + self.sqrt = F.sqrt + self.square = F.square + + def construct(self, x): + while x < 20: + if self.sqrt(x) > 4: + x = x + 1 + return x + elif x > 10: + x = x + 4 + return x + elif self.square(x) > 4: + x += 3 + else: + x += 2 + x += 1 + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_if_elif_usef(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-2elif-else in while, return in if elif + 2. use F + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + x = 1 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifReturnIfElif() + pi_net = CtrlWhile2ElifReturnIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifReturnInIfElse(Cell): + def __init__(self, t): + super().__init__() + self.assign = P.Assign() + self.weight = Parameter(Tensor(t, dtype.float32), name="w") + + def construct(self, x): + while x < 2: + x += 1 + if x < -4: + return x + elif x < -3: + self.assign(self.weight, x) + elif x < 0: + x += 2 + else: + return x + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_if_else(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-2elif-else in while, return in if else + 2. assign parameter + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + t = 4 + x = -4 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifReturnInIfElse(t) + pi_net = CtrlWhile2ElifReturnInIfElse(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifReturnInElifElse(Cell): + def __init__(self): + super().__init__() + self.print = P.Print() + + def construct(self, x): + while x < 20: + if x > 4: + self.print(x) + elif x >= 3: + x += 1 + elif x * 2 > 4: + return x + else: + return x + x += 1 + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_elif_else(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with if-2elif-else in while, return in elif else + 2. use print + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifReturnInElifElse() + pi_net = CtrlWhile2ElifReturnInElifElse() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/control_flow/test_control_while_return_2.py b/tests/st/pi_jit/control_flow/test_control_while_return_2.py new file mode 100644 index 0000000000000000000000000000000000000000..e61d3af3adee353ae82a7c2183b898bdd59e929f --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_control_while_return_2.py @@ -0,0 +1,52 @@ +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +from mindspore import context, jit +from ..share.utils import allclose_nparray +import pytest + + +class CtrlWhile2ElifReturnInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x[2] < 4: + x[2] -= 1 + if x[0] > 2: + return x + elif x[1] > 2: + x[2] += 1 + elif x[2] > 2: + x[1] += 1 + else: + x = self.mul(x, x) + return x + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_return_in_if(): + ''' + TEST_SUMMARY: + Description: + 1. create a net, with return in if, use get_item + 2. run the net + Expectation: + 1. the network run ok + 2. the network forward and backward result is the same as torch + ''' + x = [1, 2, 3] + context.set_context(mode=context.GRAPH_MODE) + jit(fn=CtrlWhile2ElifReturnInIf.construct, mode="PSJit") + ps_net = CtrlWhile2ElifReturnInIf() + ps_out = ps_net(Tensor(x, dtype.float32)) + + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=CtrlWhile2ElifReturnInIf.construct, mode="PIJit") + pi_net = CtrlWhile2ElifReturnInIf() + pi_out = pi_net(Tensor(x, dtype.float32)) + allclose_nparray(ps_out.asnumpy(), pi_out.asnumpy(), 0.001, 0.001) diff --git a/tests/st/pi_jit/control_flow/test_while_continue.py b/tests/st/pi_jit/control_flow/test_while_continue.py new file mode 100644 index 0000000000000000000000000000000000000000..b95cbf01318ceb72ac64f0943c2a476fc0b9b6b3 --- /dev/null +++ b/tests/st/pi_jit/control_flow/test_while_continue.py @@ -0,0 +1,593 @@ +import mindspore.nn as nn +from mindspore.nn import Cell +from mindspore.common import dtype +from mindspore.common import Tensor +import mindspore.ops.operations as P +import mindspore.ops.functional as F +from mindspore.common import Parameter +import numpy as np +from .ctrl_factory import CtrlFactory +import pytest + + +class CtrlWhileIfContinue(Cell): + def __init__(self): + super().__init__() + self.loop = Parameter(Tensor(1, dtype.float32), name="loop") + + def construct(self, x): + while self.loop < 5: + self.loop += 1 + if x > 1: + x += 1 + continue + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_if_continue_not_relevant_gt(): + ''' + Description: test control flow, loop is parameter in init + if-continue variable is x, different from loop, use cmp operator > + Expectation: No exception. + ''' + fact = CtrlFactory(-2) + ps_net = CtrlWhileIfContinue() + pi_net = CtrlWhileIfContinue() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueIn(Cell): + def __init__(self): + super().__init__() + self.addn = P.AddN() + + def construct(self, x): + s = x + t = x + 1 + tensor_list = [x, x] + while len(tensor_list) < 4: + tensor_list.append(x) + a = self.addn(tensor_list) + x += 1 + if t in tensor_list: + continue + s += a + return s + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_in_continue(): + ''' + Description: test control flow while continue, use member operator in + Expectation: No exception. + ''' + fact = CtrlFactory(2) + ps_net = CtrlWhileContinueIn() + pi_net = CtrlWhileContinueIn() + fact.compare(ps_net, pi_net) + + +class CtrlWhileCast(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x, loop): + while loop >= 3: + loop -= 2 + if self.cast(x, dtype.bool_): + continue + return loop + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_cast(): + ''' + Description: test control flow, use op cast + Expectation: No exception. + ''' + fact = CtrlFactory(1, 7) + ps_net = CtrlWhileCast() + pi_net = CtrlWhileCast() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInIf(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + while x < 2: + x += 1 + if x >= 2: + continue + elif x == 1: + x = self.mul(x, x) + return x + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_in_if(): + ''' + Description: test control flow, while once continue + Expectation: No exception. + ''' + fact = CtrlFactory(-3) + ps_net = CtrlWhileContinueInIf() + pi_net = CtrlWhileContinueInIf() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x): + out = self.mul(x, x) + while x < 2: + x += 2 + if x <= 0: + out += x + elif x != 1: + continue + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_continue_in_elif(): + """ + Description: + Test Steps: + 1. create a net which contains while and if, elif in while + 2. run net forward and backward + Expectation: + 1. the network train return ok + 2. the network forward and backward is the same as psjit + """ + fact = CtrlFactory(-3) + ps_net = CtrlWhileContinueInElif() + pi_net = CtrlWhileContinueInElif() + fact.compare(ps_net, pi_net) + + +class CtrlElifTwoContinue(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x > 0: + x -= 1 + if x < 2: + continue + elif x < 1: + continue + out = self.mul(t, out) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_elif_two_continue(): + ''' + Description: test control flow, if-elif in while, both continue + Expectation: No exception. + ''' + fact = CtrlFactory(3, [1, 2, 3]) + ps_net = CtrlElifTwoContinue() + pi_net = CtrlElifTwoContinue() + fact.compare(ps_net, pi_net) + + +class CtrlElifContinueOnce(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 3: + x -= 2 + if x > 4: + x -= 1 + elif x > 6: + x += 1 + out = self.mul(out, t) + continue + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_once_elif_continue(): + ''' + Description: test control flow, if-elif in while, continue at last + Expectation: No exception. + ''' + fact = CtrlFactory(8, [2, 3, 4]) + ps_net = CtrlElifContinueOnce() + pi_net = CtrlElifContinueOnce() + fact.compare(ps_net, pi_net) + + +class CtrlIfContinueElse(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, y, t): + out = t + while x + y > 4: + if x > 1 and y > 1: + continue + elif x > 4 or y > 2: + out += t + else: + out = self.mul(out, t) + x -= 2 + y += 1 + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_continue_in_if(): + ''' + Description: test control flow, if-elif-else in while + Expectation: No exception. + ''' + x = 9 + y = -2 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, y, t) + ps_net = CtrlIfContinueElse() + pi_net = CtrlIfContinueElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhileElseContinueInElif(Cell): + def __init__(self): + super().__init__() + self.mul = P.Mul() + + def construct(self, x, t): + out = t + while x < 4: + x += 1 + if not x > 1: + out += t + elif 1 <= x < 2: + continue + else: + out = self.mul(out, x) + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_else_elif_continue(): + ''' + Description: test control flow, if-elif-else in while, continue in elif + use and, not + Expectation: No exception. + ''' + x = -1 + t = np.random.rand(3, 4) + fact = CtrlFactory(x, t) + ps_net = CtrlWhileElseContinueInElif() + pi_net = CtrlWhileElseContinueInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInIfElse(Cell): + def __init__(self, a): + super().__init__() + self.param = Parameter(Tensor(a, dtype.float32), name="a") + self.add = P.Add() + + def construct(self, x): + out = x + while self.param > -5 and x > -5: + if self.param > 0: + continue + elif self.param > -3: + out = self.add(out, x) + else: + continue + self.param -= 1 + x -= 1 + return out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_in_if_else(): + ''' + Description: test control flow, if-elif-else in while + continue in if else, param as condition + Expectation: No exception. + ''' + a = -7 + x = -7 + fact = CtrlFactory(x) + ps_net = CtrlWhileContinueInIfElse(a) + pi_net = CtrlWhileContinueInIfElse(a) + fact.compare(ps_net, pi_net) + + +class CtrlWhileContinueInElifElse(Cell): + def __init__(self, t): + super().__init__() + self.a = Parameter(Tensor(t, dtype.float32), name="t") + self.mul = P.Mul() + + def construct(self, x): + while x > 5: + if x > self.a: + x -= 2 + elif x == self.a: + continue + else: + continue + x -= 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_in_elif_else(): + ''' + Description: test control flow, if-elif-else in while + continue in elif and else, compare with param + Expectation: No exception. + ''' + t = 3 + fact = CtrlFactory(7) + ps_net = CtrlWhileContinueInElifElse(t) + pi_net = CtrlWhileContinueInElifElse(t) + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifContinueInElif(Cell): + def __init__(self): + super().__init__() + self.reduce = P.ReduceSum() + self.max = P.ReduceMax() + + def construct(self, x, y): + while y < 4: + y += 1 + if self.reduce(x) > 2: + x[1] -= 2 + elif self.reduce(x) > 1: + continue + elif self.max(x) > 2: + y += 1 + else: + x[0] += 1 + x = x * y + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_continue_in_elif_else2(): + ''' + Description: test control flow, if-elif-else in while + continue in elif and else, compare with param + Expectation: No exception. + ''' + x = [-2, -3, 4] + y = 2 + fact = CtrlFactory(x, y) + ps_net = CtrlWhile2ElifContinueInElif() + pi_net = CtrlWhile2ElifContinueInElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifContinueInElse(Cell): + def __init__(self): + super().__init__() + self.add = P.Add() + + def construct(self, t, x): + self.add(t, t) + while t < 20: + t += 1 + if x.all(): + t += 4 + elif x.any(): + t += 3 + elif not x.all(): + t += 2 + else: + continue + return t + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_continue_in_else(): + ''' + Description: test control flow, if-2elif-else in while + use tensor.any, tensor.all + Expectation: No exception. + ''' + t = 0 + x = [True, False, False] + fact = CtrlFactory(t) + fact.ms_input.append(Tensor(x, dtype.bool_)) + ps_net = CtrlWhile2ElifContinueInElse() + pi_net = CtrlWhile2ElifContinueInElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifBInIfElif(Cell): + def __init__(self): + super().__init__() + self.cast = P.Cast() + + def construct(self, x): + while self.cast(x, dtype.bool_): + x -= 1 + if x < -1: + continue + elif x < 3: + continue + elif x < 9: + x -= 1 + else: + x -= 2 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_continue_in_ifelif(): + ''' + Description: test control flow, if-2elif-else in while + continue in if elif, use cast to bool + Expectation: No exception. + ''' + x = 12 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifBInIfElif() + pi_net = CtrlWhile2ElifBInIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifContinueIfElif(Cell): + def __init__(self): + super().__init__() + self.sqrt = F.sqrt + self.square = F.square + + def construct(self, x): + while x < 20: + if self.sqrt(x) > 4: + x = x + 1 + continue + elif x > 10: + x = x + 4 + continue + elif self.square(x) > 4: + x += 3 + else: + x += 2 + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_continue_in_if_elif_usef(): + ''' + Description: test control flow, if-2elif-else in while + continue in if elif, use F.sqrt, F.square + Expectation: No exception. + ''' + x = 1 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifContinueIfElif() + pi_net = CtrlWhile2ElifContinueIfElif() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2ElifContinueInElifElse(Cell): + def __init__(self): + super().__init__() + self.print = P.Print() + + def construct(self, x): + while x < 20: + if x > 4: + self.print(x) + elif x >= 3: + x += 1 + elif x * 2 > 4: + continue + else: + continue + x += 1 + return x + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2elif_continue_in_elif_else(): + ''' + Description: test control flow, if-2elif-else in while + continue in elif, else, use P.Print + Expectation: No exception. + ''' + x = 3 + fact = CtrlFactory(x) + ps_net = CtrlWhile2ElifContinueInElifElse() + pi_net = CtrlWhile2ElifContinueInElifElse() + fact.compare(ps_net, pi_net) + + +class CtrlWhile2IfContinueTwo(Cell): + def __init__(self): + super().__init__() + self.cell_list = nn.CellList() + self.cell_list.append(nn.ReLU()) + self.cell_list.append(nn.Tanh()) + self.cell_list.append(nn.Sigmoid()) + + def construct(self, t, x): + out = t + while x < 3: + add = self.cell_list[x](t) + out = out + add + x += 1 + if add > 1: + x += 1 + if add < 1: + continue + return out + + +@pytest.mark.skip(reason="ata_expected = array(4, data_me = array(2.6165862), result match error") +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_control_flow_while_2if_continue_second(): + ''' + Description: test control flow, 2if in while + continue in second if, use cell list + Expectation: No exception. + ''' + x = 0 + t = 1 + fact = CtrlFactory(t) + fact.ms_input.append(x) + ps_net = CtrlWhile2IfContinueTwo() + pi_net = CtrlWhile2IfContinueTwo() + fact.compare(ps_net, pi_net) diff --git a/tests/st/pi_jit/dynamic_shape/__init__.py b/tests/st/pi_jit/dynamic_shape/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/st/pi_jit/dynamic_shape/test_dynamic.py b/tests/st/pi_jit/dynamic_shape/test_dynamic.py new file mode 100644 index 0000000000000000000000000000000000000000..5d690789b1e7aa10f02aac8a36bf82667518d264 --- /dev/null +++ b/tests/st/pi_jit/dynamic_shape/test_dynamic.py @@ -0,0 +1,53 @@ +from mindspore._c_expression import update_pijit_default_config +from mindspore.nn import Cell +from mindspore import ops +from mindspore import context, jit +from mindspore.common import dtype +from mindspore.common import Tensor +import numpy as np +import pytest + +update_pijit_default_config(print_after_all=True) +class DynamicFactory: + def __init__(self, ps_net): + self.ps_net = ps_net + + def forward_cmp(self, inputs): + context.set_context(mode=context.PYNATIVE_MODE, save_graphs=True, save_graphs_path="./ir") + jit(fn=self.ps_net.construct, mode="PIJit") + self.ps_net(inputs) + +class Net7(Cell): + def __init__(self): + super().__init__() + self.pow_op = ops.Pow() + + def construct(self, x): + a = self.pow_op(x, 0.0) + #print(type(a),"hejianheng") + b = ops.rrelu(a) + return b + + +@pytest.mark.skip +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_frontend_optimize(): + ''' + TEST_SUMMARY: + Description: + 1. create a net with pow rrelu + 2. run twice for Resize + 3. set inputs for pow frontend pass + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net7() + + #x = np.random.randn(3, 4, 5).astype(np.float32) + #s = np.random.randn(3, 4, 5).astype(np.float32) + d = Tensor(np.random.randn(3, 4, 5), dtype=dtype.float32) + fact = DynamicFactory(ps_net) + fact.forward_cmp(d) diff --git a/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_net.py b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_net.py new file mode 100644 index 0000000000000000000000000000000000000000..81bdc971c616274f53297af049c8607fed3e4a25 --- /dev/null +++ b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_net.py @@ -0,0 +1,269 @@ +from mindspore.nn import Cell +from mindspore import ops +from mindspore import context, jit +from mindspore.common import dtype +from mindspore.common import Tensor +import numpy as np +from ..share.grad import GradOfAllInputs +from ..share.compare_base import comparebase +import pytest + + +class DynamicFactory: + def __init__(self, ps_net, pi_net): + self.ps_net = ps_net + self.pi_net = pi_net + + def forward_cmp(self, *inputs): + ms_inputs = [] + for i in inputs: + msx = Tensor(i) + ms_inputs.append(msx) + context.set_context(mode=context.GRAPH_MODE) + jit(fn=self.ps_net.construct, mode="PSJit") + ps_out = self.ps_net(*ms_inputs) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=self.pi_net.construct, mode="PIJit") + pi_out = self.pi_net(*ms_inputs) + comparebase.compare_nparray(pi_out.asnumpy(), ps_out.asnumpy(), 0.001, 0.001) + + def grad_cmp(self, *inputs, sens): + ms_inputs = [] + ms_sens = Tensor(sens) + for i in inputs: + msx = Tensor(i) + ms_inputs.append(msx) + + context.set_context(mode=context.GRAPH_MODE) + jit(fn=self.ps_net.construct, mode="PSJit") + grad_net = GradOfAllInputs(self.ps_net) + ps_grad = grad_net(*ms_inputs, ms_sens) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=self.pi_net.construct, mode="PIJit") + grad_net = GradOfAllInputs(self.pi_net) + pi_grad = grad_net(*ms_inputs, ms_sens) + for s, i in zip(ps_grad, pi_grad): + comparebase.compare_nparray(i.asnumpy(), s.asnumpy(), 0.0001, 0.0001) + + +class Net1(Cell): + def __init__(self): + super().__init__() + self.flatten = ops.Flatten() + + def construct(self, x, y): + a = x + y + b = self.flatten(a) + out = ops.square(b) + return out + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_set_inputs(): + ''' + TEST_SUMMARY: + Description: + 1. create a net use maximum + 2. set_inputs + 3. change rank, run twice + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net1() + d3 = Tensor(shape=[None, None, None], dtype=dtype.float32) + ps_net.set_inputs(d3, d3) + pi_net = Net1() + pi_net.set_inputs(d3, d3) + fact = DynamicFactory(ps_net, pi_net) + x = np.random.randn(3, 4, 5).astype(np.float32) + y = np.random.randn(3, 4, 5).astype(np.float32) + s = np.random.randn(3, 20).astype(np.float32) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + + # run twice + x = np.random.randn(3, 4, 5, 2).astype(np.float32) + y = np.random.randn(3, 4, 5, 2).astype(np.float32) + s = np.random.randn(3, 40).astype(np.float32) + d4 = Tensor(shape=[None, None, None, None], dtype=dtype.float32) + ps_net.set_inputs(d4, d4) + pi_net.set_inputs(d4, d4) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_not_set_inputs(): + ''' + TEST_SUMMARY: + Description: + 1. create a net use flatten + 2. not set_inputs + 3. change rank, run twice + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net1() + pi_net = Net1() + + fact = DynamicFactory(ps_net, pi_net) + x = np.random.randn(3, 4, 5).astype(np.float32) + y = np.random.randn(3, 4, 5).astype(np.float32) + s = np.random.randn(3, 20).astype(np.float32) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + + # run twice + x = np.random.randn(3, 4, 5, 2).astype(np.float32) + y = np.random.randn(3, 4, 5, 2).astype(np.float32) + s = np.random.randn(3, 40).astype(np.float32) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + + +class Net4(Cell): + def __init__(self, new_dtype): + super().__init__() + self.red = ops.ReduceSum(keep_dims=False) + self.dtype = new_dtype + + def construct(self, x, axis): + s1 = x.shape + if self.dtype == dtype.bool_: + x = x.astype(dtype.float32) + dyrank = self.red(x, axis) + if self.dtype == dtype.bool_: + dyrank = dyrank.astype(self.dtype) + r = ops.rank(dyrank) + s2 = ops.shape(dyrank) + return r, s1, s2 + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_op_all_dtypes(): + ''' + TEST_SUMMARY: + Description: + 1. create a net with reduce, get dynamic rank + 2. use rank, shape, tensor.shape + 3. run with all dtypes + Expectation: + 1. the net run ok + 2. the result is correct + ''' + di = Tensor(shape=[None], dtype=dtype.int32) + y = Tensor([1,], dtype=dtype.int32) + all_types = [dtype.float16, dtype.float32, dtype.float64,\ + dtype.int8, dtype.int16, dtype.int32, dtype.int64,\ + dtype.complex64, dtype.complex128] + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=Net4.construct, mode="PIJit") + for dt in all_types: + d1 = Tensor(shape=[None, None], dtype=dt) + x = Tensor([[1, 1], [1, 1]], dtype=dt) + net = Net4(dt) + net.set_inputs(d1, di) + out = net(x, y) + assert out[0] == 1 + assert out[1] == (2, 2) + assert out[2] == (2,) + + +class Net5(Cell): + def __init__(self): + super().__init__() + self.addn = ops.AddN() + + def construct(self, x, y): + z = self.addn((x, y)) + out = self.addn((x, y, z)) + return out + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_same_prim_twice(): + ''' + TEST_SUMMARY: + Description: + 1. create a net with addn, set_inputs + 2. call the same primitive twice + 3. run the net also twice + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net5() + pi_net = Net5() + d = Tensor(shape=[None, None], dtype=dtype.float32) + x = np.random.rand(3, 4).astype(np.float32) + y = np.random.rand(3, 4).astype(np.float32) + s = np.random.rand(3, 4).astype(np.float32) + ps_net.set_inputs(d, d) + pi_net.set_inputs(d, d) + fact = DynamicFactory(ps_net, pi_net) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + # run twice + x = np.random.rand(3, 4, 3).astype(np.float32) + y = np.random.rand(3, 4, 3).astype(np.float32) + s = np.random.rand(3, 4, 3).astype(np.float32) + d = Tensor(shape=[None, None, None], dtype=dtype.float32) + ps_net.set_inputs(d, d) + pi_net.set_inputs(d, d) + fact.forward_cmp(x, y) + fact.grad_cmp(x, y, sens=s) + + +class Net7(Cell): + def __init__(self): + super().__init__() + self.pow_op = ops.Pow() + + def construct(self, x): + a = self.pow_op(x, 0.0) + b = ops.rrelu(a) + return b + + +@pytest.mark.skip(reason="mindspore/ccsrc/pipeline/jit/ps/validator.cc:216 CheckDeadNodeInOutputRecursively") +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_frontend_optimize(): + ''' + TEST_SUMMARY: + Description: + 1. create a net with pow rrelu + 2. run twice for Resize + 3. set inputs for pow frontend pass + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net7() + pi_net = Net7() + + x = np.random.randn(3, 4, 5).astype(np.float32) + s = np.random.randn(3, 4, 5).astype(np.float32) + d = Tensor(shape=[None, None, None],\ + dtype=dtype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = DynamicFactory(ps_net, ps_net) + fact.forward_cmp(x) + fact.grad_cmp(x, sens=x) + + x = np.random.rand(6, 5, 5).astype(np.float32) + s = np.random.rand(6, 5, 5).astype(np.float32) + fact.forward_cmp(x) + fact.grad_cmp(x, sens=s) diff --git a/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_getitem.py b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_getitem.py new file mode 100644 index 0000000000000000000000000000000000000000..7259537a1fc4e624ce3f5162ea565ad5eac66550 --- /dev/null +++ b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_getitem.py @@ -0,0 +1,589 @@ +from mindspore.nn import Cell +from mindspore import context, jit +from mindspore.common import dtype as mstype +from mindspore.common import Tensor +from mindspore.common import mutable +import numpy as np +from ..share.compare_base import comparebase +from ..share.grad import GradOfAllInputs +import pytest + + +class IndexFactory: + def __init__(self, ps_net, pi_net): + self.ps_net = ps_net + self.pi_net = pi_net + + def compare_forward(self, *inputs): + context.set_context(mode=context.GRAPH_MODE) + jit(fn=self.ps_net.construct, mode="PSJit") + ps_out = self.ps_net(*inputs) + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=self.pi_net.construct, mode="PIJit") + pi_out = self.pi_net(*inputs) + + # compare + comparebase.compare_nparray(pi_out.asnumpy(), ps_out.asnumpy(), 0.0001, 0.0001) + + + grad_net = GradOfAllInputs(self.ps_net, False) + grad_net(*inputs) + + + def compare_forward_grad(self, *inputs): + context.set_context(mode=context.GRAPH_MODE) + jit(fn=self.ps_net.construct, mode="PSJit") + ps_out = self.ps_net(*inputs) + grad_net = GradOfAllInputs(self.ps_net, False) + ps_grads = grad_net(*inputs) + + context.set_context(mode=context.PYNATIVE_MODE) + jit(fn=self.pi_net.construct, mode="PIJit") + pi_out = self.pi_net(*inputs) + grad_net = GradOfAllInputs(self.pi_net, False) + pi_grads = grad_net(*inputs) + + # compare + comparebase.compare_nparray(pi_out.asnumpy(), ps_out.asnumpy(), 0.0001, 0.0001) + + for s, i in zip(ps_grads, pi_grads): + if i is None: + continue + comparebase.compare_nparray(i.asnumpy(), s.asnumpy(), 0.0001, 0.0001) + + +class Net1(Cell): + def __init__(self): + super().__init__() + self.n = 2 + + def construct(self, x): + out = x[...] * self.n + return out + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_ellipsis(): + ''' + Description: + 1. dynamic rank getitem ellipsis + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net1() + pi_net = Net1() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net4(Cell): + def __init__(self): + super().__init__() + self.n = None + + def construct(self, x): + out = x[self.n] + return out + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_none(): + ''' + Description: + 1. dynamic rank getitem bool + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net4() + pi_net = Net4() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net6(Cell): + def __init__(self): + super().__init__() + self.idx = -1 + + def construct(self, x): + out = x[self.idx] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_int(): + ''' + Description: + 1. dynamic rank getitem -1 + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net6() + pi_net = Net6() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net7(Cell): + def __init__(self): + super().__init__() + self.n = 2 + + def construct(self, x, y): + idx = y.shape[0] - y.shape[1] + out = x[idx] + return out * self.n + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_shape(): + ''' + Description: + 1. dynamic rank getitem shape[0] - shape[1] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net7() + pi_net = Net7() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + y = Tensor([[1, 2]], dtype=mstype.int32) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(shape=[None, None], dtype=mstype.int32) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +class Net8(Cell): + def __init__(self): + super().__init__() + self.n = 2 + + def construct(self, x, y): + out = x[y] * self.n + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_tensor_int(): + ''' + Description: + 1. dynamic rank getitem Tensor[int] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net8() + pi_net = Net8() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + y = Tensor([0, 1], dtype=mstype.int32) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(shape=[None], dtype=mstype.int32) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_tensor_bool(): + ''' + Description: + 1. dynamic rank getitem Tensor[bool] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net8() + pi_net = Net8() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + y = Tensor([False, True], dtype=mstype.bool_) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(shape=[None], dtype=mstype.bool_) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +class Net9(Cell): + def __init__(self): + super().__init__() + self.a = -4 + self.b = -1 + + def construct(self, x): + out = x[self.a:self.b] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_slice_int(): + ''' + Description: + 1. dynamic rank getitem -4:-1 + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net9() + pi_net = Net9() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net10(Cell): + def __init__(self): + super().__init__() + self.a = 0 + self.b = 1 + + def construct(self, x, y): + out = x[y.shape[self.a]:y.shape[self.b]] + return out + + +@pytest.mark.skip(reason="AssertionError, result not match") +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_slice_shape(): + ''' + Description: + 1. dynamic rank getitem shape[0]:shape[1] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net10() + pi_net = Net10() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + y = Tensor(np.random.rand(2, 4), dtype=mstype.int32) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(shape=[None, None], dtype=mstype.int32) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +class Net11(Cell): + def __init__(self): + super().__init__() + self.n = 1 + + def construct(self, x, y): + out = x[self.n:y] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_slice_tensor(): + ''' + Description: + 1. dynamic rank getitem 1:Tensor(2) + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net11() + pi_net = Net11() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + y = Tensor(2, dtype=mstype.int64) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(None, dtype=mstype.int64) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +class Net12(Cell): + def __init__(self): + super().__init__() + self.n = 1 + + def construct(self, x): + out = x[self.n:None] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_slice_none(): + ''' + Description: + 1. dynamic rank getitem 1:none + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net12() + pi_net = Net12() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net13(Cell): + def __init__(self): + super().__init__() + self.idx = [1, 0] + + def construct(self, x): + out = x[self.idx] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_list_int(): + ''' + Description: + 1. dynamic rank getitem 1:none + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net13() + pi_net = Net13() + x = Tensor(np.random.rand(4, 3, 2), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net14(Cell): + def __init__(self): + super().__init__() + self.idx = [True, False, True, False] + + def construct(self, x): + out = x[self.idx] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_list_bool(): + ''' + Description: + 1. dynamic rank getitem list[bool] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net14() + pi_net = Net14() + x = Tensor(np.random.rand(4, 3, 2), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net15(Cell): + def __init__(self): + super().__init__() + self.idx = mutable([2, 1, 0]) + + def construct(self, x): + out = x[self.idx] + return out + + +@pytest.mark.skip(reason="runtime error in mstorch-infer-r2.3") +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_list_mutable(): + ''' + Description: + 1. dynamic rank getitem mutable(list) + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net15() + pi_net = Net15() + x = Tensor(np.random.rand(3, 3, 2), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net16(Cell): + def __init__(self): + super().__init__() + self.idx = () + + def construct(self, x): + out = x[self.idx] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_empty_tuple(): + ''' + Description: + 1. dynamic rank getitem empty tuple + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net16() + pi_net = Net16() + x = Tensor(np.random.rand(3, 3, 2), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net17(Cell): + def __init__(self): + super().__init__() + self.n = None + + def construct(self, x): + out = x[..., True, self.n] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_tuple_basic(): + ''' + Description: + 1. dynamic rank getitem (..., True, None) + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net17() + pi_net = Net17() + x = Tensor(np.random.rand(3, 3, 2), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) + + +class Net19(Cell): + def __init__(self): + super().__init__() + self.idx3 = [2] + + def construct(self, x, y): + out = x[y.shape[0], 1:2, self.idx3] + return out + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_tuple_complex(): + ''' + Description: + 1. dynamic rank getitem shape[0], 1:2, [1, 2] + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net19() + pi_net = Net19() + x = Tensor(np.random.rand(6, 5, 6), dtype=mstype.float32) + y = Tensor(np.random.rand(3,), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(shape=[None], dtype=mstype.float32) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) + + +class Net20(Cell): + def __init__(self): + super().__init__() + self.n = 2 + + def construct(self, x, y): + out = x[y, 1:2] + return out * self.n + + +@pytest.mark.skip(reason="result not match in mstorch-infer-r2.3") +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_getitem_tuple_tensor(): + ''' + Description: + 1. dynamic rank getitem, Tensor(3), 1:2 + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net20() + pi_net = Net20() + x = Tensor(np.random.rand(6, 5, 6), dtype=mstype.float32) + y = Tensor(3, dtype=mstype.int64) + d = Tensor(None, dtype=mstype.float32) + dy = Tensor(None, dtype=mstype.int64) + ps_net.set_inputs(d, dy) + pi_net.set_inputs(d, dy) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x, y) diff --git a/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_setitem.py b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_setitem.py new file mode 100644 index 0000000000000000000000000000000000000000..465ce1ba7a173611476c7182228ddea897d15cd6 --- /dev/null +++ b/tests/st/pi_jit/dynamic_shape/test_dynamic_rank_tensor_setitem.py @@ -0,0 +1,38 @@ +from mindspore.nn import Cell +from mindspore.common import dtype as mstype +from mindspore.common import Tensor +import numpy as np +from .test_dynamic_rank_tensor_getitem import IndexFactory +import pytest + + +class Net1(Cell): + def __init__(self): + super().__init__() + self.n = 2 + + def construct(self, x): + x[...] = 1 + out = x + return out * self.n + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_rank_setitem_ellipsis(): + ''' + Description: + 1. dynamic rank setitem ellipsis + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + ps_net = Net1() + pi_net = Net1() + x = Tensor(np.random.rand(2, 3, 4), dtype=mstype.float32) + d = Tensor(None, dtype=mstype.float32) + ps_net.set_inputs(d) + pi_net.set_inputs(d) + fact = IndexFactory(ps_net, pi_net) + fact.compare_forward_grad(x) diff --git a/tests/st/pi_jit/dynamic_shape/test_dynamic_tensor.py b/tests/st/pi_jit/dynamic_shape/test_dynamic_tensor.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4e401b96c737b611fdce35eddd6e296a699ad3 --- /dev/null +++ b/tests/st/pi_jit/dynamic_shape/test_dynamic_tensor.py @@ -0,0 +1,33 @@ +from mindspore.common import Tensor +from mindspore.common import dtype as mstype +import pytest + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_tensor_shape_not_none(): + ''' + Description: + 1. create a tensor, all args are int + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + Tensor(input_data=None, dtype=mstype.float32, shape=[2, 4], init=1) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_tensor_shape(): + ''' + Description: + 1. create a tensor, all args are None + Expectation: + 1. the net run ok + 2. the result is the same as psjit + ''' + x = Tensor(dtype=mstype.float32, shape=[None, 4]) + s = x.shape + assert s == (-1, 4) diff --git a/tests/st/pi_jit/operation/test_abs.py b/tests/st/pi_jit/operation/test_abs.py new file mode 100644 index 0000000000000000000000000000000000000000..e5c137c778cc99a3fcd00553f84909d5b9a150bc --- /dev/null +++ b/tests/st/pi_jit/operation/test_abs.py @@ -0,0 +1,208 @@ +import numpy as np +import pytest +from ..share.ops.primitive.abs_ops import AbsFactory +from ..dynamic_shape_operations.abs import AbsDynamicShapeMock + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_1(): + """ + Description: + 1.abs算子正反向测试,input_shape=(1,), dtype:fp32 + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1,) + fact = AbsFactory(input_shape, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_1x1(): + """ + Description: + 1.abs算子正反向测试,input_shape=(1,1), dtype=uint8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 1) + dtype = np.uint8 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_256x256x256(): + """ + Description: + 1.abs算子正反向测试,input_shape=(256,256,256), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (256, 256, 256) + dtype = np.float32 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_1x1x1x1(): + """ + Description: + 1.abs算子正反向测试,input_shape=(1,1,1,1), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 1, 1, 1) + dtype = np.float32 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_32x2x16x8(): + """ + Description: + 1.abs算子正反向测试,input_shape=(32, 2, 16, 8), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (32, 2, 16, 8) + dtype = np.float32 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_1x1x1x1x1(): + """ + Description: + 1.abs算子正反向测试,input_shape=(1, 1, 1, 1, 1), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 1, 1, 1, 1) + dtype = np.float32 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_32x8x16x8x32(): + """ + Description: + 1.abs算子正反向测试,input_shape=(32,8,16,8,32), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (32, 8, 16, 8, 32) + dtype = np.float32 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_abs_input_32x8_dtype_fp16(): + """ + Description: + 1.abs算子正反向测试,input_shape=(32,8), dtype=fp16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (32, 8) + dtype = np.float16 + fact = AbsFactory(input_shape, dtype=dtype) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_abs_4d_float32(): + """ + Description: + 1.test abs with dynamic shape input, dtype=float32, 4d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_np = np.random.randn(8, 8, 8, 8).astype(np.float32) + indices_np = np.unique(np.random.randint(0, 3, size=6).astype(np.int32)) + fact = AbsDynamicShapeMock(input_np, indices_np) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_abs_3d_float32(): + """ + Description: + 1.test abs with dynamic shape input, dtype=float32, 3d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_np = np.random.randn(128, 128, 32).astype(np.float32) + indices_np = np.unique(np.random.randint(0, 1, size=5).astype(np.int32)) + fact = AbsDynamicShapeMock(input_np, indices_np) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_abs_6d_float16(): + """ + Description: + 1.test abs with dynamic shape input, dtype=float32, 6d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_np = np.random.randn(3, 6, 6, 6, 4, 4).astype(np.float32) + indices_np = np.unique(np.random.randint(1, 3, size=2).astype(np.int32)) + fact = AbsDynamicShapeMock(input_np, indices_np) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_cos.py b/tests/st/pi_jit/operation/test_cos.py new file mode 100644 index 0000000000000000000000000000000000000000..e51bcd4d3198072f1b448a7cf5e188ebd40d2faf --- /dev/null +++ b/tests/st/pi_jit/operation/test_cos.py @@ -0,0 +1,124 @@ +import numpy as np +import pytest +from mindspore import Tensor +from ..share.ops.primitive.cos_ops import CosMock +from ..dynamic_shape_operations.cos import CosDynamicShapeFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_cos_input_64x3125(): + ''' + Description: cos算子测试,inputa_shape=(64, 3125) + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.randn(64, 3125).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_cos_input_shape(): + ''' + Description: cos算子测试,inputa_shape=1D--->6D + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.randn(7,).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + input_x = Tensor(np.random.randn(2, 3, 4).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + input_x = Tensor(np.random.randn(2, 3, 4, 5).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + input_x = Tensor(np.random.randn(6, 2, 3, 4, 5).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_cos_input_shape_6d(): + ''' + Description: cos算子测试,inputa_shape 6D + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.randn(2, 3, 7, 8, 4, 5).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_cos_input_float32(): + ''' + Description: Cos算子正反向dynamic shape测试,input_shape=(3, 5, 8, 10, 5), dtype=np.float32 + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.randn(3, 5, 8, 10, 5).astype(np.float32)) + fact = CosMock(inputs=[input_x]) + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_cos_input_float16(): + ''' + Description: Cos算子正反向dynamic shape测试,input_shape=(3, 4, 5), dtype=np.float16 + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.randn(3, 4, 5).astype(np.float64)) + fact = CosMock(inputs=[input_x]) + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_cos_float32(): + ''' + Description: test cos with dynamic shape input, dtype=float32 + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.rand(2, 10, 5, 10).astype(np.float32)) + indices = Tensor(np.random.choice(3, 2, replace=False).astype(np.int32)) + fact = CosDynamicShapeFactory([input_x, indices], dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_cos_float16(): + ''' + Description: test cos with dynamic shape input, dtype=float16 + + Expectation: + 1. output return ok and the accuracy is consistent with the benchmark. + ''' + input_x = Tensor(np.random.rand(1, 1, 2, 4, 10).astype(np.float16)) + indices = Tensor(np.random.choice(3, 1, replace=False).astype(np.int32)) + fact = CosDynamicShapeFactory([input_x, indices], dtype=np.float16) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_dense.py b/tests/st/pi_jit/operation/test_dense.py new file mode 100644 index 0000000000000000000000000000000000000000..974634b2239acb7e385ac332be513161149e64b0 --- /dev/null +++ b/tests/st/pi_jit/operation/test_dense.py @@ -0,0 +1,250 @@ +import numpy as np +import pytest +from ..share.ops.primitive.dense_ops import DenseFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_102x44_in_44_out_32_2d_fp32(): + ''' + Description: + test operator dense input_shape is 2d , dtype is np.float32 + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(102, 44), in_channel=44, out_channel=32, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_1_in_248_out_100_1d_fp16(): + ''' + Description: + test operator dense input_shapeis 1d, dtype is np.float16 + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(248,), in_channel=248, out_channel=100, dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_128_496_out_124_6d_int32(): + ''' + Description: + test operator dense input_shape 6d + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(1, 2, 4, 5, 28, 496), in_channel=496, out_channel=124, + dtype=np.int32) + fact.b_np = None + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_out_2_7d(): + ''' + Description: + test operator dense input_shape=7d, dtype is np.int64 + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(1, 3, 5, 6, 5, 10, 102), in_channel=102, out_channel=2, + dtype=np.int64) + fact.b_np = np.random.randint(-10, 10) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_3d_in_3_out_4_uint8(): + ''' + Description: + test operator dense input_shape 3d, uint8 + + Expectation: + pijit result match psjit + + ''' + input_shape = (5, 2, 3) + in_channel = 3 + out_channel = 4 + fact = DenseFactory(input_shape=input_shape, in_channel=in_channel, out_channel=out_channel, + dtype=np.uint8) + fact.x_np = np.random.randint(0, 100, input_shape).astype(np.uint8) + fact.w_np = np.random.randint(0, 100, (out_channel, in_channel)).astype(np.uint8) + fact.b_np = np.random.randint(0, 100, out_channel).astype(np.uint8) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_inf(): + ''' + Description: + test operator dense input_shape 3d, uint8 + + Expectation: + pijit result match psjit + + ''' + input_shape = (2, 2) + in_channel = 2 + out_channel = 2 + fact = DenseFactory(input_shape=input_shape, in_channel=in_channel, out_channel=out_channel, + dtype=np.float32) + fact.x_np = np.array([[2, 4], [np.inf, 2]]).astype(np.float32) + fact.w_np = np.array([[2, 4], [np.inf, 2]]).astype(np.float32) + fact.b_np = np.array([2, 4]).astype(np.float32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_nan(): + ''' + Description: + test operator dense with nan + + Expectation: + pijit result match psjit + + ''' + input_shape = (2, 2) + in_channel = 2 + out_channel = 2 + fact = DenseFactory(input_shape=input_shape, in_channel=in_channel, out_channel=out_channel, + dtype=np.float32) + fact.x_np = np.array([[2, np.nan], [2, 2]]).astype(np.float32) + fact.w_np = np.array([[2, 4], [np.inf, 2]]).astype(np.float32) + fact.b_np = np.array([2, 4]).astype(np.float32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_n_np_2d(): + ''' + Description: + test operator dense abnormal shape,b_np=2d + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(1, 1000), in_channel=1000, out_channel=1000, dtype=np.float32) + fact.b_np = np.random.randn(1000, 1).astype(np.float32) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_x_1d_w_2d(): + ''' + Description: + test operator dense abnormal shape, input_shape is (10),w shape is 2d + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(10,), in_channel=10, out_channel=10, dtype=np.float32) + fact.w_np = np.random.randn(10, 10).astype(np.float32) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_2d_w_3d(): + ''' + Description: + test operator dense abnormal shape,w_np is 3d + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(1, 1000), in_channel=1000, out_channel=1024, dtype=np.float32) + fact.w_np = np.random.randn(1000, 1000, 1).astype(np.float32) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_1_1000_in_1000_out_1024_bias_1000_1_abnormal(): + ''' + Description: + test operator dense abnormal shape + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(1, 1000), in_channel=1000, out_channel=1024, dtype=np.float32) + fact.b_np = np.ones((1000, 1)).astype(np.float32) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_forward_input_type_not_same(): + ''' + Description: + test operator dense abnormal dtypes. + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(28, 3), in_channel=3, out_channel=64, dtype=np.float32) + fact.w_np = np.random.randn(64, 3).astype(np.float32) + fact.b_np = np.random.randn(64).astype(np.float16) + with pytest.raises(TypeError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dense_input_56x28x16x28x3_in_4_out_3_abnormal(): + ''' + Description: + test operator dense input_shape=(56, 28, 16, 28, 3), in_channel=4, out_channel=3 + + Expectation: + pijit result match psjit + + ''' + fact = DenseFactory(input_shape=(56, 28, 16, 28, 3), in_channel=4, out_channel=3) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_div.py b/tests/st/pi_jit/operation/test_div.py new file mode 100644 index 0000000000000000000000000000000000000000..0e2a979e139855a150468401af41446d27a69ad7 --- /dev/null +++ b/tests/st/pi_jit/operation/test_div.py @@ -0,0 +1,373 @@ +from ..share.ops.primitive.div_ops import DivFactory +from ..share.ops.primitive.div_ops import Div +from mindspore import jit, context +import numpy as np +import pytest + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_245520_245520(): + """ + Description: + 1. div算子测试,input (245520, ), (245520, ). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((245520,), (245520,), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_512_256(): + """ + Description: + 1. div算子测试,input (512, 256), (512, 256). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((512, 256), (512, 256)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_1024x81x4_1024x81x4(): + """ + Description: + 1. div算子测试,input (1024, 81, 4), (1024, 81, 4). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((1024, 81, 4), (1024, 81, 4)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_32x256x14x14_32x256x14x14(): + """ + Description: + 1. div算子测试,input (1024, 81, 4), (1024, 81, 4). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((32, 256, 14, 14), (32, 256, 14, 14)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_5d_7d(): + """ + Description: + 1. div算子测试,input 5d-7d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((2, 4, 8, 16, 8), (2, 4, 8, 16, 8), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + fact = DivFactory((2, 4, 8, 16, 8, 4), (2, 4, 8, 16, 8, 4), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + fact = DivFactory((2, 4, 8, 16, 8, 4, 2), (2, 4, 8, 16, 8, 4, 2), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_scalar_scalar(): + """ + Description: + 1. div算子测试,input scalar, scalar. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((1,), (1,)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_1_128x64(): + """ + Description: + 1. div算子测试,input (1), (128, 64), dtype=np.float16,反向的时候inputx的精度有误差. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((1,), (128, 64), dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_256x256_256x1(): + """ + Description: + 1. div算子测试,input (256, 256), (256, 1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((256, 256), (256, 1)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_32x32x7x7_32x32x1x1(): + """ + Description: + 1. div算子测试,input (32, 32, 7, 7), (32, 32, 1, 1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((32, 32, 7, 7), (32, 32, 1, 1)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_4x4x1_3(): + """ + Description: + 1. div算子测试,input (4, 4, 1), (3). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((4, 4, 1), (3,)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_4x4x4x1_3(): + """ + Description: + 1. div算子测试,input (4, 4, 1), (3). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((4, 4, 4, 1), (3,)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_4x4x2_4x2(): + """ + Description: + 1. div算子测试,input (4, 4, 2), (4, 2). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((4, 4, 2), (4, 2)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_4x4x4x2_4x2(): + """ + Description: + 1. div算子测试,input (4, 4, 2), (4, 2). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((4, 4, 4, 2), (4, 2)) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_32x12x128x128_1(): + """ + Description: + 1. div算子测试,input (4, 4, 2), (4, 2). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((32, 12, 128, 128), (1,)) + fact.loss = 0.005 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_forward_input_256x256_int32(): + """ + Description: + 1. div算子正向测试,input (8), (1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((256, 256), (256, 256), dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_forward_input_256x256_int64(): + """ + Description: + 1. div算子测试,input (1), (1024, 4096). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((256, 256), (256, 256), dtype=np.int64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_input_1_1024x4096(): + """ + Description: + 1. div算子测试,input (1), (1024, 4096). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((1,), (1024, 4096)) + fact.loss = 0.005 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_forward_input_2x2_3x2(): + """ + Description: + 1. div算子异常测试,input (2, 2), (3, 2). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((2, 2), (3, 2)) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_abnormal_input_2x2_str_2x2_str32(): + """ + Description: + 1. div算子异常测试,input str(2, 2), str(2, 2). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + with pytest.raises(TypeError): + fact = DivFactory((2, 2), (2, 2), dtype=np.str) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_normal_input_1_32x64(): + """ + Description: + 1. div算子测试,input (1), (32, 64). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((1,), (32, 64)) + pi_net = Div() + jit(pi_net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + out = fact.forward_mindspore_impl(pi_net) + assert out.shape == (32, 64), out.dtype == np.float32 + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_div_abnormal_inputy_zero(): + """ + Description: + 1. div算子测试,inputy 0, 除数为0,输出结果为inf. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = DivFactory((5,), (5,)) + fact.inputy = np.array([0, 2, 0, 2, 0], dtype=np.float32) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_dtype.py b/tests/st/pi_jit/operation/test_dtype.py new file mode 100644 index 0000000000000000000000000000000000000000..9f9987bb58c2e21d81377335d286722206d4d151 --- /dev/null +++ b/tests/st/pi_jit/operation/test_dtype.py @@ -0,0 +1,888 @@ +from ..share.ops.primitive.dtype_ops import DTypeFactory +from ..share.ops.primitive.dtype_ops import DType +import numpy as np +import pytest +from mindspore import jit, context + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_fp32(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype:fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + fact = DTypeFactory(input_shape, dtype=np.float32) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_bool(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype:bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.bool_ + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1_dtype_fp32(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12_dtype_fp32(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_12_dtype_fp32(): + """ + Description: + 1. DType算子正向测试,input_shape=(12,), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (12,) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_5x1x2x5x1x2x8_dtype_fp32(): + """ + Description: + 1. DType算子正向测试,input_shape=(5,1,2,5,1,2,8), dtype=fp32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (5, 1, 2, 5, 1, 2, 8) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1x2x3_dtype_fp16(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1,2,3), dtype=fp16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1, 2, 3) + dtype = np.float16 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1x2_dtype_fp64(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1,2), dtype=fp64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1, 2) + dtype = np.float64 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_int8(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=int8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.int8 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_forward_input_1x12x1x1_dtype_int16(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=int16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.int16 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_int32(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=int32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.int32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_int64(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=int64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_uint8(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=uint8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.uint8 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_uint16(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=uint16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.uint16 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_uint32(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=uint32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.uint32 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_dtype_uint64(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1), dtype=uint64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = (1, 12, 1, 1) + dtype = np.uint64 + fact = DTypeFactory(input_shape, dtype=dtype) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_scalar(): + """ + Description: + 1. DType算子正向测试,input_dtype=np, dtype=fp64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = np.float(8.88) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_int(): + """ + Description: + 1. DType算子正向测试,input_dtype=int. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = 888 + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_float(): + """ + Description: + 1. DType算子正向测试,input_dtype=float. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = 8.88 + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_contains_none(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = np.random.randn(1, 12, 1, 1).astype(np.float32) + input_np[0, 0, 0, 0] = None + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_contains_nan(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = np.random.randn(1, 12, 1, 1).astype(np.float32) + input_np[0, 0, 0, 0] = np.nan + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_1x12x1x1_contains_inf(): + """ + Description: + 1. DType算子正向测试,input_shape=(1,12,1,1). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = np.random.randn(1, 12, 1, 1).astype(np.float32) + input_np[0, 0, 0, 0] = np.inf + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_int(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=int. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (666, 888, 999) + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (True, False, True) + dtype = np.bool_ + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_float(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=float. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (6.66, 8.88, 9.99) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_int_float(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=int & float. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (6.66, 888, 999) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_int_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=int & bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (False, 666, 888, 999, True) + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_float_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=float & bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (False, 6.66, 8.88, 9.99, True) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_int_nan(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=int & nan. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (np.nan, 666, 888, 999) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_tuple_int_inf(): + """ + Description: + 1. DType算子正向测试,input_dtype=tuple, dtype=int & inf. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = (np.inf, 666, 888, 999) + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_int(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=int. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [666, 888, 999] + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [True, False, True] + dtype = np.bool_ + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_float(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=float. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [6.66, 8.88, 9.99] + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_int_float(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=int & float. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [6.66, 888, 999] + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_int_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=int & bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [False, 666, 888, 999, True] + dtype = np.int64 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_float_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=float & bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [False, 6.66, 8.88, 9.99, True] + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_int_nan(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=int & nan. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [np.nan, 666, 888, 999] + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_list_int_inf(): + """ + Description: + 1. DType算子正向测试,input_dtype=list, dtype=int & inf. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = [np.inf, 666, 888, 999] + dtype = np.float32 + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_dtype_input_bool(): + """ + Description: + 1. DType算子正向测试,input_dtype=bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_shape = () + input_np = True + dtype = np.bool_ + fact = DTypeFactory(input_shape, dtype=dtype, input_x=input_np) + net = DType() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + fact.forward_cmp(net) + fact.grad_cmp(net) diff --git a/tests/st/pi_jit/operation/test_equal.py b/tests/st/pi_jit/operation/test_equal.py new file mode 100644 index 0000000000000000000000000000000000000000..9c0216723eaf7bd7d9a19a46dee4b63a50a4c642 --- /dev/null +++ b/tests/st/pi_jit/operation/test_equal.py @@ -0,0 +1,409 @@ +from ..share.ops.primitive.equal_ops import EqualFactory +from ..share.ops.primitive.equal_ops import EqualMock +from ..share.ops.primitive.equal_ops import Equal +from ..share.utils import allclose_nparray +import mindspore as ms +from mindspore.common import dtype as mstype +from mindspore import Tensor, jit, context +import mindspore.ops.operations as op +import numpy as np +import pytest + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_245520(): + """ + Description: + 1. Equal算子正向测试,input_shape=(245520,). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(245520,), dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_n(): + """ + Description: + 1. Equal算子正向测试,input_shape=(n,w),n、m in (64, 96, 128). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + for n in (64, 96, 128): + for w in (64, 96, 128): + fact = EqualFactory(input_shape=(n, w), dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input(): + """ + Description: + 1. Equal算子测试,dtype不一致. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_1 = np.random.randn(1, 1).astype(np.float32) + input_2 = np.random.randn(1, 2).astype(np.float16) + fact = EqualFactory(input_shape=(1, 2)) + fact.left_input_np = input_1 + fact.right_input_np = input_2 + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_normal_outshape_sameas_first_input(): + """ + Description: + 1. Equal算子测试,验证输出的shape与第一个输入相等. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(256, 1), dtype=np.float32) + net = Equal() + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + out = fact.forward_mindspore_impl(net) + assert out.shape == (256, 1), out.dtype == ms.bool_ + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_float16_0d(): + """ + Description: + 1. Equal算子正向测试,input_shape=0d,dtype=float16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_list = [] + x0 = Tensor(np.random.randn(70).astype(np.float16)) + input_list.append(x0) + x1 = Tensor(np.random.randn(70).astype(np.float16)) + input_list.append(x1) + fact = EqualMock(inputs=input_list) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_float64_1d(): + """ + Description: + 1. Equal算子正向测试,input_shape=1d,dtype=float64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(64,), dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_float16_2d(): + """ + Description: + 1. Equal算子正向测试,input_shape=2d,dtype=float16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8), dtype=np.float16) + fact.forward_cmp() + + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_float32_3d(): + """ + Description: + 1. Equal算子正向测试,input_shape=3d,dtype=float32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16), dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_int8_4d(): + """ + Description: + 1. Equal算子正向测试,input_shape=4d,dtype=int16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16), dtype=np.int8) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_int16_4d(): + """ + Description: + 1. Equal算子正向测试,input_shape=4d,dtype=int16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16, 8), dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_int32_5d(): + """ + Description: + 1. Equal算子正向测试,input_shape=4d,dtype=int32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16, 8, 4), dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_int64_6d(): + """ + Description: + 1. Equal算子正向测试,input_shape=6d,dtype=int64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16, 8, 4, 9), dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_uint8_7d(): + """ + Description: + 1. Equal算子正向测试,input_shape=7d,dtype=uint8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16, 8, 4, 2, 2), dtype=np.uint8) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_int64(): + """ + Description: + 1. Equal算子正向测试,input_shape=3d,dtype=int64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16), dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_num(): + """ + Description: + 1. Equal算子正向测试,input num. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_1 = Tensor(np.array([1]), ms.float32) + input_2 = 1.0 + net = op.Equal() + jit(net, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + out = net(input_1, input_2) + assert out + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_uint32(): + """ + Description: + 1. Equal算子正向测试,input shape (4, 8) dtype np.uint32 for cpu. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8, 16), dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_bool_for_gpu(): + """ + Description: + 1. Equal算子正向测试,input bool for gpu. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_1 = np.array([1]).astype(np.bool) + input_2 = True + ps_net = op.Equal() + jit(ps_net, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + out1 = ps_net(Tensor(input_1), input_2) + + pi_net = op.Equal() + jit(pi_net, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + out2 = pi_net(Tensor(input_1), input_2) + + allclose_nparray(out2[0].numpy(), out1[0].asnumpy(), 0.001, 0.001) + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_input_type_float64_for_gpu(): + """ + Description: + 1. Equal算子正向测试,input float64 for gpu. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8), dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_forward_dtype_bool_for_gpu(): + """ + Description: + 1. Equal算子正向测试,input_shape=(4,8,16),dtype bool for gpu. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = EqualFactory(input_shape=(4, 8), dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_input_dtype_string(): + """ + Description: + 1. test Equal with input shape from 3d, dtype string. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_list = [] + x0 = Tensor(np.random.randn(8, 7, 1), dtype=mstype.string) + input_list.append(x0) + x1 = Tensor(np.random.randn(8, 7, 1), dtype=mstype.string) + input_list.append(x1) + fact = EqualMock(inputs=input_list) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_input_dtype_bool(): + """ + Description: + 1. test Equal with input shape from 3d, dtype bool. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_list = [] + x0 = Tensor(np.random.randn(7, 6, 13).astype(np.bool)) + input_list.append(x0) + x1 = Tensor(np.random.randn(7, 6, 13).astype(np.bool)) + input_list.append(x1) + fact = EqualMock(inputs=input_list) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_equal_input_dtype_bool2(): + """ + Description: + 1. test Equal with input shape from 3d, dtype complex64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_list = [] + x0 = Tensor(np.random.randn(7, 6, 13).astype(np.bool)) + input_list.append(x0) + x1 = Tensor(np.random.randn(7, 6, 13).astype(np.bool)) + input_list.append(x1) + fact = EqualMock(inputs=input_list) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_floor.py b/tests/st/pi_jit/operation/test_floor.py new file mode 100644 index 0000000000000000000000000000000000000000..c0c96dbe1648dd73f6cd81c1579012010095d6b7 --- /dev/null +++ b/tests/st/pi_jit/operation/test_floor.py @@ -0,0 +1,141 @@ +from ..share.ops.primitive.floor_ops import FloorFactory +from mindspore import Tensor +import mindspore.ops.operations as op +import numpy as np +import pytest + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_1d_fp16(): + """ + Description: + 1. test faster_rcnn floor with input shape (512,) forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(512,), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_2d_fp32(): + """ + Description: + 1. test faster_rcnn floor with input shape=2d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(512, 7), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_3d_fp16(): + """ + Description: + 1. test faster_rcnn floor with input shape=2d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(256, 7, 2), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_4d_fp32(): + """ + Description: + 1. test faster_rcnn floor with input shape=2d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(20, 4, 2, 1), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_5d_fp16(): + """ + Description: + 1. test faster_rcnn floor with input shape=2d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(10, 5, 3, 4, 2), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_6d_fp32(): + """ + Description: + 1. test faster_rcnn floor with input shape=6d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(5, 7, 8, 4, 5, 8), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_7d_fp16(): + """ + Description: + 1. test faster_rcnn floor with input shape=6d forward grad. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorFactory(input_shape=(2, 6, 4, 2, 1, 4, 3), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floor_input_512_512_fp16(): + """ + Description: + 1. test floor with two input shape (512,) forward. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_np = np.random.randn(5,).astype(np.float16) + with pytest.raises(TypeError): + op.Floor(Tensor(input_np), Tensor(input_np)) diff --git a/tests/st/pi_jit/operation/test_floordiv.py b/tests/st/pi_jit/operation/test_floordiv.py new file mode 100644 index 0000000000000000000000000000000000000000..0fa40b573da898e5b24a9e82d073bcff7da6819c --- /dev/null +++ b/tests/st/pi_jit/operation/test_floordiv.py @@ -0,0 +1,319 @@ +import pytest +import numpy as np +import mindspore.ops.operations as op +from mindspore import Tensor +from mindspore.common import dtype +from ..share.ops.primitive.floordiv_ops import FloorDivFactory +from ..share.ops.primitive.floordiv_ops import FloorDivMock +from ..share.utils import get_empty_tensor +from ..dynamic_shape_operations.floordiv import FloorDivDynamicShapeFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_128x1024_fp32(): + """ + Description: + 1. test reid floordiv with input shape (128 * 1024, 1),float32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(128 * 1024, 1), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_3d_fp16(): + """ + Description: + 1. test reid floordiv with input =3D, dtype=float16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(302, 110, 10), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_int8(): + """ + Description: + 1. test reid floordiv with input=1d, dtype=int8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(7,), dtype=np.int8) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_int16(): + """ + Description: + 1. test reid floordiv with input=2d, dtype=int16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(3, 3), dtype=np.int16) + fact.right_input_np = np.random.uniform((1,)).astype(np.int8) + fact.input_x2 = Tensor(fact.right_input_np) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_int64(): + """ + Description: + 1. test reid floordiv with input=3d, dtype=int64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(7, 8, 10), dtype=np.int64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_fp64(): + """ + Description: + 1. test reid floordiv with input=4d, dtype=float64. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(7, 8, 9, 10), dtype=np.float64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_int32(): + """ + Description: + 1. test reid floordiv with input=5d, dtype=int32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(7, 8, 9, 10, 11), dtype=np.int32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_uint16(): + """ + Description: + 1. test reid floordiv with input=6d, dtype=uint16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(2, 4, 3, 6, 3, 5), dtype=np.uint16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_input_shape_dtype_uint8(): + """ + Description: + 1. test reid floordiv with input=7d, dtype=uint8. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + fact = FloorDivFactory(input_shape=(1, 4, 6, 2, 3, 5, 7), dtype=np.uint8) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_right_input_int(): + """ + Description: + 1. test floordiv with input1 shape (13, 8), dtype =float32 ,input2 = 5. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + left_input_np = np.random.randn(13, 8).astype(np.float32) + right_input_np = 5 + net = op.FloorDiv() + out = net(Tensor(left_input_np), right_input_np) + assert "float32" in str(out.dtype).lower() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_forward_left_input_bool(): + """ + Description: + 1. test floordiv with input1 = True, dtype . + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + right_input_np = np.random.randn(5, 3).astype(np.float16) + net = op.FloorDiv() + out = net(True, Tensor(right_input_np)) + assert "float16" in str(out.dtype).lower() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_empty_tensor(): + """ + Description: + 1. test floordiv with get_empty_tensor(). + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_x1 = get_empty_tensor() + input_x2 = get_empty_tensor() + fact = FloorDivMock(inputs=[input_x1, input_x2]) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_floordiv_input_3d_int16(): + """ + Description: + 1. test floor_div with input shape 3D, type=int16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + x1 = np.random.randint(1, 512, size=(4, 4, 4)).astype(np.int16) + x2 = np.random.randint(1, 512, size=(4, 4, 4)).astype(np.int16) + input_x1 = Tensor(x1) + input_x2 = Tensor(x2) + fact = FloorDivMock(inputs=[input_x1, input_x2]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_floordiv_input_2d_fp16(): + """ + Description: + 1. test floor_div dynamic shape with input shape 2D, type=float16. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + x = np.random.randn(2, 3) + y = np.random.randn(2, 3) + input_x = Tensor(x, dtype=dtype.float16) + input_y = Tensor(y, dtype=dtype.float16) + fact = FloorDivMock(inputs=[input_x, input_y]) + fact.forward_dynamic_shape_cmp() + fact.grad_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_floordiv_input_1d_fp32(): + """ + Description: + 1. test floor_div dynamic shape with input shape 1D, type=float32. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + x1 = np.random.randn(2).astype(np.float32) + x2 = np.random.randn(2).astype(np.float32) + input_x1 = Tensor(x1) + input_x2 = Tensor(x2) + fact = FloorDivMock(inputs=[input_x1, input_x2]) + fact.forward_dynamic_shape_cmp() + fact.grad_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_floordiv_params_double_2d(): + """ + Description: + 1. test floordiv with dynamic shape input, dtype=double, 2d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_x = Tensor(np.random.rand(8, 5, 8, 5).astype(np.float64)) + input_y = Tensor(np.random.rand(8, 5, 8, 5).astype(np.float64)) + indices = Tensor(np.random.choice(4, 2, replace=False).astype(np.int32)) + fact = FloorDivDynamicShapeFactory([input_x, input_y, indices]) + fact.forward_cmp() + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_floordiv_params_float32_2d(): + """ + Description: + 1. test floordiv with dynamic shape input, dtype=float32, 2d. + + Expectation: + 1. the network run ok + 2. the result is the same as psjit + """ + input_x = Tensor(np.random.rand(100, 10, 10).astype(np.float32)) + input_y = Tensor(np.random.rand(100, 10, 10).astype(np.float32)) + indices = Tensor(np.random.choice(3, 1, replace=False).astype(np.int32)) + fact = FloorDivDynamicShapeFactory([input_x, input_y, indices]) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_greater.py b/tests/st/pi_jit/operation/test_greater.py new file mode 100644 index 0000000000000000000000000000000000000000..4d1c97d22050ea2035802b7652e0d0e5c0d9c22a --- /dev/null +++ b/tests/st/pi_jit/operation/test_greater.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest +from mindspore import ops, jit, context +import mindspore as ms + + +@jit(mode="PIJit") +def greater_forward_func(x, y): + return ops.greater(x, y) + +@jit(mode="PIJit") +def greater_backward_func(x, y): + return ops.grad(greater_forward_func, (0,))(x, y) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_greater_forward(): + """ + Feature: Ops. + Description: test op greater. + Expectation: expect correct result. + """ + context.set_context(mode=context.PYNATIVE_MODE) + x = ms.Tensor(np.array([1, 2, 3]), ms.int32) + y = ms.Tensor(np.array([1, 1, 4]), ms.int32) + expect_out = np.array([False, True, False]) + out = greater_forward_func(x, y) + assert np.allclose(out.asnumpy(), expect_out) + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_greater_backward(): + """ + Feature: Auto grad. + Description: test auto grad of op greater. + Expectation: expect correct result. + """ + context.set_context(mode=context.PYNATIVE_MODE) + x = ms.Tensor(np.array([1, 2, 3]), ms.int32) + y = ms.Tensor(np.array([1, 1, 4]), ms.int32) + expect_out = np.array([0, 0, 0]) + out = greater_backward_func(x, y) + assert np.allclose(out.asnumpy(), expect_out) diff --git a/tests/st/pi_jit/operation/test_greaterequal.py b/tests/st/pi_jit/operation/test_greaterequal.py new file mode 100644 index 0000000000000000000000000000000000000000..26e8858cb8eca145a97b273c1b13a322953fc0ab --- /dev/null +++ b/tests/st/pi_jit/operation/test_greaterequal.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest +import mindspore as ms +from mindspore import ops + + +def greater_equal_forward_func(x, y): + return ops.greater_equal(x, y) + + +def greater_equal_backward_func(x, y): + return ops.grad(greater_equal_forward_func, (0,))(x, y) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_greater_equal_forward(): + """ + Feature: Ops. + Description: test op greater_equal. + Expectation: expect correct result. + """ + ms.context.set_context(mode=ms.PYNATIVE_MODE) + x = ms.Tensor(np.array([1, 2, 3]), ms.int32) + y = ms.Tensor(np.array([1, 1, 4]), ms.int32) + expect_out = np.array([True, True, False]) + out = greater_equal_forward_func(x, y) + assert np.allclose(out.asnumpy(), expect_out) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_greater_equal_backward(): + """ + Feature: Auto grad. + Description: test op greater_equal. + Expectation: expect correct result. + """ + ms.context.set_context(mode=ms.PYNATIVE_MODE) + x = ms.Tensor(np.array([1, 2, 3]), ms.int32) + y = ms.Tensor(np.array([1, 1, 4]), ms.int32) + expect_out = np.array([0, 0, 0]) + grads = greater_equal_backward_func(x, y) + assert np.allclose(grads.asnumpy(), expect_out) diff --git a/tests/st/pi_jit/operation/test_inplaceadd.py b/tests/st/pi_jit/operation/test_inplaceadd.py new file mode 100644 index 0000000000000000000000000000000000000000..f2a59d863e83661011fc5ed97e08263c4e2e38fd --- /dev/null +++ b/tests/st/pi_jit/operation/test_inplaceadd.py @@ -0,0 +1,97 @@ +import numpy as np +import pytest +from ..share.ops.primitive.inplaceadd_ops import InplaceAddFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_input_3d_all(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given (inputx_shape=(128, 32),inputv_shape=(2, 32),dtype=np.float32). + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(8, 128, 64), inputv_shape=(8, 128, 64), + indices=(0, 1, 2, 3, 4, 5, 6, 7), dtype1=np.float32, + dtype2=np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_input_3d_indices_int(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given (inputx_shape=(128, 32),inputv_shape=(2, 32),dtype=np.float32). + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(32, 128, 64), inputv_shape=(1, 128, 64), indices=18, + dtype1=np.float32, dtype2=np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_input_4d_indices_all(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given (inputx_shape=(128, 32),inputv_shape=(2, 32),dtype=np.float32). + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(8, 128, 64, 2), inputv_shape=(8, 128, 64, 2), + indices=(0, 1, 2, 3, 4, 5, 6, 7), dtype1=np.float32, + dtype2=np.float32) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_input_5d_7d(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given (inputx_shape=5d-7d). + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(16, 8, 8, 4, 4), inputv_shape=(4, 8, 8, 4, 4), + indices=(0, 1, 2, 3), dtype1=np.float32, dtype2=np.float32) + fact.forward_cmp() + + fact = InplaceAddFactory(inputx_shape=(16, 8, 8, 4, 4, 2), inputv_shape=(4, 8, 8, 4, 4, 2), + indices=(0, 1, 14, 15), dtype1=np.float16, dtype2=np.float16) + fact.forward_cmp() + + fact = InplaceAddFactory(inputx_shape=(16, 8, 8, 4, 4, 2, 2), + inputv_shape=(4, 8, 8, 4, 4, 2, 2), indices=(12, 13, 14, 15), + dtype1=np.float64, dtype2=np.float64) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_input_1d(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given (inputx_shape=1d). + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(16,), inputv_shape=(4,), indices=(0, 1, 2, 3), + dtype1=np.float32, dtype2=np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplaceadd_indices_repeat(): + """ + Feature: Ops. + Description: test operator InplaceAdd, given v repeat. + Expectation: expect correct result. + """ + fact = InplaceAddFactory(inputx_shape=(16, 8, 8, 4, 4), inputv_shape=(2, 8, 8, 4, 4), + indices=(1, 1), dtype1=np.float32, dtype2=np.float32) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_inplacesub.py b/tests/st/pi_jit/operation/test_inplacesub.py new file mode 100644 index 0000000000000000000000000000000000000000..70adafddd5a7fed44829323d33e09347c30403e3 --- /dev/null +++ b/tests/st/pi_jit/operation/test_inplacesub.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest +from ..share.ops.primitive.inplacesub_ops import InplaceSubFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplacesub_input_1d(): + """ + Feature: Ops. + Description: test operator InplaceSub, given (inputx_shape=1d). + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(16,), target_shape=(4,), + indices=(0, 1, 2, 3), dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplacesub_indices_repeat(): + """ + Feature: Ops. + Description: test operator InplaceSub, given v repeat. + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(16, 8, 8, 4, 4), + target_shape=(2, 8, 8, 4, 4), indices=(1, 1), + dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_inplacesub_input_32_8_128_ind_28_float32(): + """ + Feature: Ops. + Description: test operator InplaceSub, given (input_shape=(32,8,128), + indices= 28,target_shape=(3,8,128),dtype=np.float32). + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(32, 8, 128), indices=28, + target_shape=(1, 8, 128), dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_inplacesub_input_1d_float16(): + """ + Feature: Ops. + Description: test InplaceSub with 1D input, x_dtype=Float32. + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(3,), indices=(2, 1, 0), + target_shape=(3,), + dtype=np.float16) + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_inplacesub_input_2d_float32(): + """ + Feature: Ops. + Description: test InplaceSub with 2D input, x_dtype=Float32. + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(8, 16), target_shape=(2, 16), + indices=(2, 1), dtype=np.float32) + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_inplacesub_input_3d_float64(): + """ + Feature: Ops. + Description: test InplaceSub with 3D input, x_dtype=Float64. + Expectation: expect correct result. + """ + fact = InplaceSubFactory(input_shape=(6, 200, 200), target_shape=(1, 200, 200), + indices=(4,), dtype=np.float64) + fact.forward_dynamic_shape_cmp() diff --git a/tests/st/pi_jit/operation/test_invert.py b/tests/st/pi_jit/operation/test_invert.py new file mode 100644 index 0000000000000000000000000000000000000000..8b939b95789d80fda5eb80e22061852b416cc80a --- /dev/null +++ b/tests/st/pi_jit/operation/test_invert.py @@ -0,0 +1,185 @@ +from ..share.ops.primitive.invert_ops import InvertFactory +import numpy as np +import pytest + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_256(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(1,), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (256,) + fact = InvertFactory(input_shape, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_256x256(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(256, 256), dtype=uint16. + Expectation: expect correct result. + """ + input_shape = (256, 256) + fact = InvertFactory(input_shape, dtype=np.uint16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_128x8x1(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(128,8,1), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (128, 8, 1) + fact = InvertFactory(input_shape, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_32x16x8x4(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 16, 8, 4), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (32, 26, 8, 4) + fact = InvertFactory(input_shape, dtype=np.uint16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_32x8x16x8x32(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 8, 16, 8, 32), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (32, 8, 16, 8, 32) + fact = InvertFactory(input_shape, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x8x16x32x8x16(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(8,8,16,32,8,16), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (8, 8, 16, 32, 8, 16) + fact = InvertFactory(input_shape, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x2x4x128x1x16x7(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 8, 16, 8, 32), dtype=int16. + Expectation: expect correct result. + """ + input_shape = (8, 2, 4, 128, 1, 16, 7) + fact = InvertFactory(input_shape, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x2x4x128x1x16x7_int8(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 8, 16, 8, 32), dtype=int8. + Expectation: expect correct result. + """ + input_shape = (8, 2, 4, 128, 1, 16, 7) + fact = InvertFactory(input_shape, dtype=np.int8) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x8x16x32x8x16_uint8(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 8, 16, 8, 32), dtype=int8. + Expectation: expect correct result. + """ + input_shape = (8, 8, 16, 32, 8, 16) + fact = InvertFactory(input_shape, dtype=np.uint8) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x2x4x128x1x16x7_int64(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(32, 8, 16, 8, 32), dtype=int8. + Expectation: expect correct result. + """ + input_shape = (8, 2, 4, 128, 1, 16, 7) + fact = InvertFactory(input_shape, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_8x8x16x32x8x16_uint64(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(8,8,16,32,8,16), dtype=uint64. + Expectation: expect correct result. + """ + input_shape = (8, 8, 16, 32, 8, 16) + fact = InvertFactory(input_shape, dtype=np.uint64) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_2x2_int32(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(2, 2), dtype=int32. + Expectation: expect correct result. + """ + input_shape = (2, 2) + fact = InvertFactory(input_shape, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_invert_input_2x2_uint32(): + """ + Feature: Ops. + Description: test operator Invert, input_shape=(2, 2), dtype=uint32. + Expectation: expect correct result. + """ + input_shape = (2, 2) + fact = InvertFactory(input_shape, dtype=np.uint32) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_less.py b/tests/st/pi_jit/operation/test_less.py new file mode 100644 index 0000000000000000000000000000000000000000..7e76a8ce9bb286c4c81ef65f76578d71473b544b --- /dev/null +++ b/tests/st/pi_jit/operation/test_less.py @@ -0,0 +1,247 @@ +import numpy as np +import pytest +from ..share.ops.primitive.less_ops import LessFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_1d_all_float16(): + """ + Feature: Ops. + Description: less正向用例:input=1d,left_type=float16,right_type=float16. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(2455,)).astype(np.float16) + right_input = np.random.randn(*(2455,)).astype(np.float16) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_2d_all_float32(): + """ + Feature: Ops. + Description: less正向用例:input=2d,left_type=float32,right_type=float32. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(128, 8)).astype(np.float32) + right_input = np.random.randn(*(128, 1)).astype(np.float32) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_3d_all_int32(): + """ + Feature: Ops. + Description: less正向用例:input=3d,left_type=int32,right_type=int32. + Expectation: expect correct result. + """ + left_input = np.random.randint(-2147483648, 2147483647, (32, 16, 128)).astype(np.int32) + right_input = np.random.randint(-2147483648, 2147483647, (16, 128)).astype(np.int32) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_4d_all_int8(): + """ + Feature: Ops. + Description: input=4d,left_type=int8,right_type=int8. + Expectation: expect correct result. + """ + left_input = np.random.randint(-128, 127, (2, 16, 8, 16)).astype(np.int8) + right_input = np.random.randint(-128, 127, (2, 16, 8, 16)).astype(np.int8) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_5d_uint8(): + """ + Feature: Ops. + Description: input=5d,left_type=uint8,right_type=uint8. + Expectation: expect correct result. + """ + left_input = np.random.randint(-256, 255, (2, 16, 8, 16, 12)).astype(np.uint8) + right_input = np.random.randint(-256, 255, (1, 1, 8, 16, 12)).astype(np.uint8) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_6d_float32_float16(): + """ + Feature: Ops. + Description: less正向用例:input=6d,left_type=float32,right_type=float64. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(11, 10, 8, 4, 16, 32)).astype(np.float64) + right_input = np.random.randn(*(11, 10, 8, 4, 16, 32)).astype(np.float64) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_7d_float16_int32(): + """ + Feature: Ops. + Description: less正向用例:input=7d,left_type=float16,right_type=int16. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(8, 16, 4, 2, 1, 32, 9)).astype(np.int16) + right_input = np.random.randint(-128, 128, (8, 16, 4, 2, 1, 1, 1)).astype(np.int16) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_1d_left_float32_right_bool(): + """ + Feature: Ops. + Description: less正向用例:input=1d,left_type=float32,right_type=bool,测试隐式转换. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(128,)).astype(np.float32) + right_input = np.random.randn(*(128,)).astype(np.bool) + fact = LessFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_right_float(): + """ + Feature: Ops. + Description: less正向用例:input=3d,left_type=int32,right_type=float,测试一个参数是float. + Expectation: expect correct result. + """ + left_input = np.random.randint(-1024, 1024, (128, 4, 1)).astype(np.int32) + right_input = 0.56 + fact = LessFactory(left_input, right_input, rightistensor=False) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_left_int(): + """ + Feature: Ops. + Description: less正向用例:input=4d,left_type=5,right_type=float32,测试一个参数是int. + Expectation: expect correct result. + """ + left_input = 5 + right_input = np.random.randint(-128, 128, (8, 4, 2, 16)).astype(np.float32) + fact = LessFactory(left_input, right_input, leftistensor=False) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_left_bool(): + """ + Feature: Ops. + Description: less正向用例:input=2d,left_type=bool,right_type=float16,测试一个参数是bool. + Expectation: expect correct result. + """ + left_input = True + right_input = np.random.randint(-128, 128, (8, 16)).astype(np.float16) + fact = LessFactory(left_input, right_input, leftistensor=False) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_forward_input_right_bool(): + """ + Feature: Ops. + Description: less正向用例:input=5d,left_type=float32,right_type=bool,测试right_input=bool. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(16, 8, 1, 1, 2)).astype(np.float32) + right_input = False + fact = LessFactory(left_input, right_input, rightistensor=False) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_input_list(): + """ + Feature: Ops. + Description: less异常用例:参数为列表. + Expectation: expect correct result. + """ + left_input = [1, 2, 3, 4] + right_input = np.random.randn(*(4,)).astype(np.float32) + fact = LessFactory(left_input, right_input, leftistensor=False) + with pytest.raises(TypeError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_input_trulp(): + """ + Feature: Ops. + Description: less异常用例:参数为元组. + Expectation: expect correct result. + """ + left_input = np.random.randn(*(4,)).astype(np.float16) + right_input = (1, 2, 3, 4) + fact = LessFactory(left_input, right_input, rightistensor=False) + with pytest.raises(TypeError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_input_all_number(): + """ + Feature: Ops. + Description: less异常用例:参数都是数字. + Expectation: expect correct result. + """ + left_input = 8 + right_input = 6 + fact = LessFactory(left_input, right_input, leftistensor=False, rightistensor=False) + with pytest.raises(TypeError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_less_input_all_bool(): + """ + Feature: Ops. + Description: less异常用例:参数都是bool. + Expectation: expect correct result. + """ + left_input = True + right_input = False + fact = LessFactory(left_input, right_input, leftistensor=False, rightistensor=False) + with pytest.raises(TypeError): + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_maximum.py b/tests/st/pi_jit/operation/test_maximum.py new file mode 100644 index 0000000000000000000000000000000000000000..0b71e69dbf2a23912eb13c249d34c84014e956e8 --- /dev/null +++ b/tests/st/pi_jit/operation/test_maximum.py @@ -0,0 +1,208 @@ +import numpy as np +import pytest +import mindspore +from ..share.ops.primitive.maximum_ops import MaximumFactory +from ..share.ops.primitive.maximum_ops import Maximum +from mindspore import Tensor + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_512x1_512x1(): + """ + Feature: Ops. + Description: maximum算子测试,input_shape (512, 1), (512, 1). + Expectation: expect correct result. + """ + left_input = np.random.randn(512, 1).astype(np.float16) + right_input = np.random.randn(512, 1).astype(np.float16) + fact = MaximumFactory(left_input, right_input, dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_2x2_2x2(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (2, 2), (2, 2). + Expectation: expect correct result. + """ + left_input = np.random.randn(2, 2).astype(np.float32) + right_input = np.random.randn(2, 2).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_3x3x3x3_3x3x3x3(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (3, 3, 3, 3), (3, 3, 3, 3). + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 3, 3).astype(np.int8) + right_input = np.random.randn(3, 3, 3, 3).astype(np.int8) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_5d(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape 5D & 隐式类型转换. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 4, 5, 4).astype(np.float16) + right_input = np.random.randn(3, 3, 4, 5, 4).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_6d(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape 6D. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 4, 5, 4, 3).astype(np.uint8) + right_input = np.random.randn(3, 3, 4, 5, 4, 3).astype(np.uint8) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_3dtensor_scalar_scalar(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (128, 128, 64), array(3.2). + Expectation: expect correct result. + """ + left_input = np.random.randn(128, 128, 64).astype(np.float32) + right_input = np.array(3.2).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_scalar_3dtensor_scalar(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape array(3.2), (128, 128, 64). + Expectation: expect correct result. + """ + left_input = np.array(3.2).astype(np.float32) + right_input = np.random.randn(128, 128, 64).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_forward_input_1dtensor_2dtensor(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (2), (2, 3). + Expectation: expect correct result. + """ + left_input = np.random.randn(2).astype(np.float32) + right_input = np.random.randn(2, 3).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_forward_input_32x128x1024_1(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (32, 128, 1024), (1). + Expectation: expect correct result. + """ + left_input = np.random.randn(32, 128, 1024).astype(np.float32) + right_input = np.random.randn(1).astype(np.float32) + fact = MaximumFactory(left_input, right_input) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_tensor_bool_tensor_int32(): + """ + Feature: Ops. + Description:maximum, Tensor(bool) & Tensor(int32). + Expectation: expect correct result. + """ + left_input_np = np.array([False, False, False]) + right_input_np = np.array([-1, 0, 1]) + net = Maximum() + out_me = net(Tensor(left_input_np), Tensor(right_input_np, mindspore.int32)) + out_np = np.maximum(left_input_np, right_input_np) + assert out_me.asnumpy().all() == out_np.all() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_randn_512x1_512x1_int32(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (512, 1), (512, 1), dtype=int32. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 2560, size=(512, 1)).astype(np.int32) + right_input = np.random.randint(0, 2560, size=(512, 1)).astype(np.int32) + fact = MaximumFactory(left_input, right_input, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_input_randint_8x1_8x1_int32(): + """ + Feature: Ops. + Description:maximum算子测试,input_shape (8, 1), (8, 1), dtype=int32. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 256, size=(8, 1)).astype(np.int32) + right_input = np.random.randint(0, 256, size=(8, 1)).astype(np.int32) + fact = MaximumFactory(left_input, right_input, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maximum_performance_improve(): + """ + Feature: Ops. + Description:test maximum performance,input >1w. + Expectation: expect correct result. + """ + input_x = np.random.random((8, 8, 64, 64)).astype(np.float32) + input_y = np.random.random((8, 8, 64, 64)).astype(np.float32) + fact = MaximumFactory(input_x, input_y) + + net = Maximum() + inputs = [Tensor(fact.left_input), Tensor(fact.right_input)] + for _ in range(50): + net(*inputs) diff --git a/tests/st/pi_jit/operation/test_maxpool.py b/tests/st/pi_jit/operation/test_maxpool.py new file mode 100644 index 0000000000000000000000000000000000000000..e27a8630b0c6a16920b11978028d41a4bc724bed --- /dev/null +++ b/tests/st/pi_jit/operation/test_maxpool.py @@ -0,0 +1,232 @@ +import numpy as np +import pytest +from mindspore import Tensor +from ..share.ops.primitive.maxpool_ops import MaxPoolMock + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_1x3x224x224_float16_strides_2_valid(): + """ + Feature: Ops. + Description: create a net which contains MaxPool for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randint(1, 10, (1, 3, 224, 224)).astype(np.float16))) + attributes = {"pad_mode": "VALID", + "kernel_size": 2, + "strides": 2} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_2x2x2x2_float32_strides_2_valid(): + """ + Feature: Ops. + Description: create a net with input shape 2x2x2x2 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randint(-10, 10, (2, 2, 2, 2)).astype(np.float32))) + attributes = {"pad_mode": "same", + "kernel_size": 1, + "strides": 1} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_16x1x2x8_float16_strides_2_valid(): + """ + Feature: Ops. + Description: create a net with input shape 16x1x2x8 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randint(1, 10, (16, 1, 2, 8)).astype(np.float16))) + attributes = {"pad_mode": "VALID", + "kernel_size": 1, + "strides": 2} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_2x32x16x16_float32_strides_2_valid(): + """ + Feature: Ops. + Description: create a net with input shape 2x32x16x16 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(2, 32, 16, 16).astype(np.float32))) + attributes = {"pad_mode": "SAME", + "kernel_size": 8, + "strides": 1, + "data_format": "NHWC"} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_16x32x8x12_float16_strides_2_valid(): + """ + Feature: Ops. + Description: create a net with input shape 16x32x8x12 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(16, 32, 8, 12).astype(np.float16))) + attributes = {"pad_mode": "valid", + "kernel_size": 3, + "strides": 2} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_2x12x12x12_float32_strides_2_same(): + """ + Feature: Ops. + Description: create a net with input shape 2x12x12x12 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(2, 12, 12, 12).astype(np.float32))) + attributes = {"pad_mode": "SAMe", + "kernel_size": 2, + "strides": 5, + "data_format": "NHWC"} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_2x256x3x3_float16_strides_2_same(): + """ + Feature: Ops. + Description: create a net with input shape 2x256x3x3 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(2, 256, 3, 3).astype(np.float16))) + attributes = {"pad_mode": "same", + "kernel_size": 8, + "strides": 6, + "data_format": "NHWC"} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_32x32x32x32_float32_strides_2_same(): + """ + Feature: Ops. + Description: create a net with input shape 32x32x32x32 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(32, 32, 32, 32).astype(np.float32))) + attributes = {"pad_mode": "same", + "kernel_size": 8, + "strides": (16, 2)} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_1x7x32x16_float16_strides_2d_same(): + """ + Feature: Ops. + Description: create a net with input shape 1x7x32x16 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(1, 7, 32, 16).astype(np.float16))) + attributes = {"pad_mode": "Valid", + "kernel_size": (2, 2), + "strides": 2} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_1x8x256x256_float32_strides_2d_valid(): + """ + Feature: Ops. + Description: create a net with input shape 1x8x256x256 for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(1, 8, 256, 256).astype(np.float32))) + attributes = {"pad_mode": "same", + "kernel_size": (7, 7), + "strides": (7, 7)} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + fact.forward_cmp() + fact.grad_cmp() + fact.highgrad_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_maxpool_input_1x8x3x3_k_4_valid(): + """ + Feature: Ops. + Description: create a net with input shape error for mindspore and pijit, compare their results. + Expectation: expect correct result. + """ + inputs = [] + inputs.append(Tensor(np.random.randn(1, 8, 3, 3).astype(np.float32))) + attributes = {"pad_mode": "valid", + "kernel_size": 4, + "strides": 1} + fact = MaxPoolMock(attributes=attributes, inputs=[inputs]) + fact.loss = 1e-3 + with pytest.raises((ValueError, RuntimeError)): + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_median.py b/tests/st/pi_jit/operation/test_median.py new file mode 100644 index 0000000000000000000000000000000000000000..ef6c1bd4a2a33f4229792fff731f4f01908458bc --- /dev/null +++ b/tests/st/pi_jit/operation/test_median.py @@ -0,0 +1,194 @@ +import pytest +import numpy as np +from mindspore import Tensor, jit, context +from ..share.ops.primitive.median_ops import MedianFactory +from ..share.ops.primitive.median_ops import Median + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_1d_fp32(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(10, ), dtype=fp32. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(10,), global_median=False, axis=0, keep_dims=True, + dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_4d_int16(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(10, 8, 3, 2), dtype=int16. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(10, 8, 3, 2), global_median=False, axis=0, + keep_dims=True, dtype=np.int16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_3d_int32(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(10, 9, 3), dtype=int32. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(10, 9, 3), global_median=False, axis=1, + keep_dims=True, dtype=np.int32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_5d_int64(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(12, 2, 3, 4, 2), dtype=int64. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(12, 2, 3, 4, 2), global_median=False, axis=0, + keep_dims=True, dtype=np.int64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_6d_fp32(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(10, 9, 1, 2, 3, 4), dtype=fp32. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(10, 9, 1, 2, 3, 4), global_median=False, + axis=0, keep_dims=True) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_7d_fp64(): + """ + Feature: Ops. + Description: median算子正向测试 input_shape=(10, 1, 2, 3, 9, 8, 7), dtype=fp64. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(10, 1, 2, 3, 9, 8, 7), global_median=False, + axis=3, keep_dims=True, dtype=np.float64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_abnormal_axis_left_out_bound(): + """ + Feature: Ops. + Description: median算子测试 异常场景,axis out left bound. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(2, 1, 6, 32), global_median=False, axis=-5, keep_dims=False) + with pytest.raises(ValueError, + match=r"For primitive\[Median\], the axis must be in \[-4,4\), but got -5."): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_abnormal_axis_right_out_bound(): + """ + Feature: Ops. + Description: median算子测试 异常场景,axis out right bound. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(2, 1, 6, 32, 1, 2), global_median=False, + axis=6, keep_dims=True) + with pytest.raises(ValueError, + match=r"For primitive\[Median\], the axis must be in \[-6,6\), but got 6."): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_abnormal_axis_float(): + """ + Feature: Ops. + Description: median算子测试 异常场景,axis is float. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(2, 1, 6, 32), global_median=False, axis=1.2, keep_dims=True) + with pytest.raises(TypeError, + match=r"For 'Median', the type of 'axis' should be 'int', " + r"but got type 'float'."): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_abnormal_keepdims_not_bool(): + """ + Feature: Ops. + Description: median算子测试 异常场景,keep_dims is not bool. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(4, 5), global_median=False, axis=-1, keep_dims="False") + with pytest.raises(TypeError, + match=r"For 'Median', the type of 'keep_dims' should be 'bool', " + r"but got type 'str'."): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_globalmedian_true_axis_default(): + """ + Feature: Ops. + Description: median算子测试,global_median=True, axis为默认值. + Expectation: expect correct result. + """ + fact = MedianFactory(input_shape=(3, 5), global_median=True, axis=0, + keep_dims=False, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_median_input_same_value(): + """ + Feature: Ops. + Description: median算子测试,input含有多个相同中值. + Expectation: expect correct result. + """ + x = np.array([[2, 2, 2, 2], [2, 2, 2, 2]]).astype(np.float32) + ps_net = Median(global_median=False, axis=1, keep_dims=True) + jit(ps_net.construct, mode="PSJit") + context.set_context(mode=context.GRAPH_MODE) + y_psjit, _ = ps_net(Tensor(x)) + pi_net = Median(global_median=False, axis=1, keep_dims=True) + jit(ps_net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + y_pijit, _ = pi_net(Tensor(x)) + assert np.allclose(y_psjit.asnumpy(), y_pijit.asnumpy(), 0.0001, 0.0001) diff --git a/tests/st/pi_jit/operation/test_minimum.py b/tests/st/pi_jit/operation/test_minimum.py new file mode 100644 index 0000000000000000000000000000000000000000..9d637449e2af98716efee84ed9e84e2198b0f0c0 --- /dev/null +++ b/tests/st/pi_jit/operation/test_minimum.py @@ -0,0 +1,249 @@ +import pytest +import numpy as np +import mindspore +from mindspore import Tensor +from ..share.ops.primitive.minimum_ops import MinimumFactory +from ..share.ops.primitive.minimum_ops import Minimum + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_512x1_512x1(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape (512, 1), (512, 1). + Expectation: expect correct result. + """ + left_input = np.random.randn(512, 1).astype(np.float16) + right_input = np.random.randn(512, 1).astype(np.float16) + fact = MinimumFactory(left_input, right_input, np.float16) + fact.forward_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_2x2_2x2(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape (2, 2), (2, 2), dtype=np.float32. + Expectation: expect correct result. + """ + left_input = np.random.randn(2, 2).astype(np.float32) + right_input = np.random.randn(2, 2).astype(np.float32) + MinimumFactory(left_input, right_input, np.float32) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_3x3x3x3_3x3x3x3(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape (3, 3, 3, 3), (3, 3, 3, 3), dtype=np.float32. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 3, 3).astype(np.float32) + right_input = np.random.randn(3, 3, 3, 3).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_5d(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape 5D &隐式类型转换. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 3, 3, 5).astype(np.float16) + right_input = np.random.randn(3, 3, 3, 3, 5).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_6d(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape 6D. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 3, 3, 5, 4).astype(np.float32) + right_input = np.random.randn(3, 3, 3, 3, 5, 4).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_7d(): + """ + Feature: Ops. + Description: Minimum算子测试,input_shape 7D. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3, 3, 3, 5, 4, 3).astype(np.float32) + right_input = np.random.randn(3, 3, 3, 3, 5, 4, 3).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_3dtensor_scalar(): + """ + Feature: Ops. + Description: Minimum算子测试,left_input 3dtensor, right_input scalar, dtype=np.float32. + Expectation: expect correct result. + """ + left_input = np.random.randn(128, 128, 64).astype(np.float32) + right_input = np.array(3.2).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_scalar_3dtensor(): + """ + Feature: Ops. + Description: Minimum算子测试,left_input scalar, right_input 3dtensor, dtype=np.float32. + Expectation: expect correct result. + """ + left_input = np.array(3.2).astype(np.float32) + right_input = np.random.randn(128, 128, 64).astype(np.float32) + fact = MinimumFactory(left_input, right_input, np.float32) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_forward_input_2dtensor_3dtensor_int32(): + """ + Feature: Ops. + Description: Minimum算子异常场景测试,left_input 2dtensor, right_input 3dtensor, dtype=np.int32. + Expectation: expect correct result. + """ + left_input = np.random.randn(3, 3).astype(np.int32) + right_input = np.random.randn(1, 3, 2).astype(np.int32) + fact = MinimumFactory(left_input, right_input, grad=right_input) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_int64(): + """ + Feature: Ops. + Description: minimum, dtype=int64. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int64) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int64) + fact = MinimumFactory(left_input, right_input, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_input_bool_tensor_int32(): + """ + Feature: Ops. + Description: minimum, bool & Tensor(int32). + Expectation: expect correct result. + """ + left_input_np = False + right_input_np = np.array([-1, 0, 1]) + net = Minimum() + out_me = net(left_input_np, Tensor(right_input_np, mindspore.int32)) + out_np = np.minimum(left_input_np, right_input_np) + assert out_me.asnumpy().all() == out_np.all() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_int8(): + """ + Feature: Ops. + Description: minimum, dtype=int8. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int8) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int8) + fact = MinimumFactory(left_input, right_input, dtype=np.int8) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_int16(): + """ + Feature: Ops. + Description: minimum, dtype=int16. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int16) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int16) + fact = MinimumFactory(left_input, right_input, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_uint16(): + """ + Feature: Ops. + Description: minimum, dtype=uint16. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.uint16) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.uint16) + fact = MinimumFactory(left_input, right_input, dtype=np.uint16) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_uint8(): + """ + Feature: Ops. + Description: minimum, dtype=uint8. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.uint8) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.uint8) + fact = MinimumFactory(left_input, right_input, dtype=np.uint8) + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_minimum_dtype_int32_tensor_interface(): + """ + Feature: Ops. + Description: minimum, tensor interface, int32. + Expectation: expect correct result. + """ + left_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int32) + right_input = np.random.randint(0, 25, size=(3, 3)).astype(np.int32) + out_np = np.minimum(left_input, right_input).astype(np.int32) + output = Tensor(left_input).minimum(Tensor(right_input)) + assert output.asnumpy().all() == out_np.all() diff --git a/tests/st/pi_jit/operation/test_pow.py b/tests/st/pi_jit/operation/test_pow.py new file mode 100644 index 0000000000000000000000000000000000000000..9c263ec654c9479c4c5f12b6f6153b7399466078 --- /dev/null +++ b/tests/st/pi_jit/operation/test_pow.py @@ -0,0 +1,372 @@ +import pytest +import numpy as np +from mindspore import context, jit +import mindspore as ms +from mindspore.nn import Cell +import mindspore.ops.operations as op +from mindspore.common.tensor import Tensor +from ..share.ops.primitive.pow_ops import PowFactory +from ..share.utils import allclose_nparray + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_25x51(): + """ + Feature: Ops. + Description: pow算子测试, input 25x51. + Expectation: expect correct result. + """ + exp_np = 2.000000 + fact = PowFactory(input_shape=(25, 51), exp=exp_np, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_nx512(): + """ + Feature: Ops. + Description: pow算子测试,input (64, 96, 128)x512. + Expectation: expect correct result. + """ + for n in (64, 96, 128): + exp_np = 2.000000 + fact = PowFactory(input_shape=(n, 512), exp=exp_np, dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_256_512(): + """ + Feature: Ops. + Description: pow算子测试,input (256, 512). + Expectation: expect correct result. + """ + exp_np = 2 + fact = PowFactory(input_shape=(256, 512), exp=exp_np, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_512_256(): + """ + Feature: Ops. + Description: pow算子测试,input (512, 256). + Expectation: expect correct result. + """ + exp_np = np.absolute(np.random.randn()) + fact = PowFactory(input_shape=(512, 256), exp=exp_np, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_scalar_exp_scalar_invalid(): + """ + Feature: Ops. + Description: pow算子测试,input=-1.35, exp=2.35. + Expectation: expect correct result. + """ + fact = PowFactory(input_shape=(1, 1), exp=2.35, dtype=np.float32) + fact.input = -1.35 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_3x5x2x2_exp_tensor(): + """ + Feature: Ops. + Description: pow算子测试,input (3, 5, 2, 2), exp (). + Expectation: expect correct result. + """ + exp_np = np.absolute(np.random.randn()) + fact = PowFactory(input_shape=(3, 5, 2, 2), exp=exp_np, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_3x5x2x2x12_exp_2(): + """ + Feature: Ops. + Description: pow算子测试,input (3, 5, 2, 2, 12). + Expectation: expect correct result. + """ + fact = PowFactory(input_shape=(3, 5, 2, 2, 12), exp=2.00000, dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_3x5x2x2x1x1_exp_bool(): + """ + Feature: Ops. + Description: pow算子测试,input (3, 5, 2, 2, 12, 2). + Expectation: expect correct result. + """ + fact = PowFactory(input_shape=(3, 5, 2, 2, 12, 2), exp=True, dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_3x5x2x2x12x2x32_exp_tensor(): + """ + Feature: Ops. + Description: pow算子测试,input (3, 5, 2, 2, 12, 2, 32). + Expectation: expect correct result. + """ + exp_np = np.absolute(np.random.randn(), dtype=np.float16) + fact = PowFactory(input_shape=(3, 5, 2, 2, 12, 2, 32), exp=Tensor(exp_np), dtype=np.float16) + fact.forward_cmp() + fact.exp = exp_np.astype(np.float16) + fact.exp = Tensor(exp_np.astype(np.float16)) + fact.grad_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_exp_not_broadcastable(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp (3, 2). + Expectation: expect correct result. + """ + exp_np = np.random.randn(3, 2).astype(np.float32) + fact = PowFactory(input_shape=(2, 2), exp=Tensor(exp_np, ms.float32)) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_1_scalar(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp (3, 2). + Expectation: expect correct result. + """ + exp_np = np.absolute(np.random.randn(), dtype=np.float16) + fact = PowFactory(input_shape=(1,), exp=Tensor(exp_np), dtype=np.float32) + fact.forward_cmp() + fact.exp = exp_np.astype(np.float32) + fact.exp = Tensor(fact.exp) + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_1_1(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp (3, 2). + Expectation: expect correct result. + """ + exp_np = np.abs(np.random.randn(1)) + fact = PowFactory(input_shape=(1,), exp=Tensor(exp_np, ms.int32)) + fact.forward_cmp() + fact.exp = exp_np.astype(np.float32) + fact.exp = Tensor(fact.exp) + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_scalar_negative_exp_scalar_positive_2(): + """ + Feature: Ops. + Description: pow算子测试,input (1), exp 2.0. + Expectation: expect correct result. + """ + exp = 2.0 + fact = PowFactory(input_shape=(1,), exp=exp, dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_scalar_negative_exp_scalar_positive(): + """ + Feature: Ops. + Description: pow算子测试,input (1), exp (2.5). + Expectation: expect correct result. + """ + exp = 2.5 + fact = PowFactory(input_shape=(1,), exp=exp, dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_exp_broadcastable_2d(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp (3, 2). + Expectation: expect correct result. + """ + exp_np = np.random.randn(1, 2).astype(np.float32) + fact = PowFactory(input_shape=(2, 2), exp=Tensor(exp_np, ms.float32)) + fact.forward_cmp() + fact.exp = exp_np.astype(np.float32) + fact.exp = Tensor(fact.exp) + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_num_exp_tensor(): + """ + Feature: Ops. + Description: pow算子测试,input 3.0, exp tensor. + Expectation: expect correct result. + """ + class Net(Cell): + def __init__(self, input_np): + super(Net, self).__init__() + self.pow = op.Pow() + self.input_np = input_np + + @jit(mode="PIJit") + def construct(self, exp): + return self.pow(input_np, exp) + + input_np = 3.0 + exp = Tensor(2, dtype=ms.float32) + pow_net = Net(input_np) + jit(pow_net.construct, mode="PSJit") + context.set_context(mode=context.GRAPH_MODE) + psjit_out = pow_net(exp) + + pow_net = Net(input_np) + jit(pow_net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + pijit_out = pow_net(exp) + + allclose_nparray(pijit_out.asnumpy(), psjit_out.asnumpy(), 0.001, 0.001) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_float_exp_tensor(): + """ + Feature: Ops. + Description: pow算子测试,input 3.0, exp tensor. + Expectation: expect correct result. + """ + class Net(Cell): + def __init__(self, input_np): + super(Net, self).__init__() + self.pow = op.Pow() + self.input_np = input_np + + def construct(self, exp): + return self.pow(input_np, exp) + + input_np = True + exp = Tensor(2, dtype=ms.float32) + net = Net(input_np) + jit(net.construct, mode="PSJit") + context.set_context(mode=context.GRAPH_MODE) + psjit_out = net(exp) + + net = Net(input_np) + jit(net.construct, mode="PSJit") + context.set_context(mode=context.GRAPH_MODE) + pijit_out = net(exp) + + allclose_nparray(pijit_out.asnumpy(), psjit_out.asnumpy(), 0.001, 0.001) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_bool_exp_tensor(): + """ + Feature: Ops. + Description: pow算子测试,input bool, exp tensor. + Expectation: expect correct result. + """ + class Net(Cell): + def __init__(self, input_np): + super(Net, self).__init__() + self.pow = op.Pow() + self.input_np = input_np + + def construct(self, exp): + return self.pow(input_np, exp) + + input_np = True + exp = Tensor(2, dtype=ms.float32) + net = Net(input_np) + jit(net.construct, mode="PSJit") + context.set_context(mode=context.GRAPH_MODE) + psjit_out = net(exp) + net = Net(input_np) + jit(net.construct, mode="PIJit") + context.set_context(mode=context.PYNATIVE_MODE) + pijit_out = net(exp) + + allclose_nparray(pijit_out.asnumpy(), psjit_out.asnumpy(), 0.001, 0.001) + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_exp_tensor_bool(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp Tensor(True). + Expectation: expect correct result. + """ + exp = Tensor(True, ms.bool_) + fact = PowFactory(input_shape=(2, 2), exp=exp) + fact.forward_cmp() + + fact.exp = fact.exp.asnumpy().astype(np.float32) + fact.exp = exp + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_pow_input_exp_bool(): + """ + Feature: Ops. + Description: pow算子测试,input (2, 2), exp True. + Expectation: expect correct result. + """ + fact = PowFactory(input_shape=(2, 2), exp=False) + fact.forward_cmp() + fact.grad_cmp() diff --git a/tests/st/pi_jit/operation/test_range.py b/tests/st/pi_jit/operation/test_range.py new file mode 100644 index 0000000000000000000000000000000000000000..f69a2619892f4ccfc82fd1fe442e90f6a1943348 --- /dev/null +++ b/tests/st/pi_jit/operation/test_range.py @@ -0,0 +1,224 @@ +from ..share.ops.primitive.p_range_ops import OpsRangeFactory +import pytest +import numpy as np + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_1_limit_5_delta_2_max_50_int32(): + """ + Feature: Ops. + Description: range算子正向测试,start=1, limit=5, delta=2,maxlen=50, int32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=5, delta=2, maxlen=50, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_limit_100_delta_2_max_200_fp32(): + """ + Feature: Ops. + Description: range算子正向测试,start=0.2, limit=100, delta=2, maxlen=200, float32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=0.2, limit=100, delta=2, maxlen=200, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_320_limit_1000_delta_032_max_2500_fp32(): + """ + Feature: Ops. + Description: range算子正向测试,start=320, limit=1000.8, delta=0.32,maxlen=2500, float32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=320, limit=1000.8, delta=0.32, maxlen=2500, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_neg_int32(): + """ + Feature: Ops. + Description: range算子正向测试,start=-1, int32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-1, limit=1000, delta=1, maxlen=3500, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_neg_fp32(): + """ + Feature: Ops. + Description: range算子正向测试,start=-1, int32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-0.5, limit=1000.8, delta=0.32, maxlen=3500, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_limit_neg_int32(): + """ + Feature: Ops. + Description: range算子正向测试,limit=-1, float32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-100, limit=-1, delta=1, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_maxlen_10000000_int32(): + """ + Feature: Ops. + Description: range算子正向测试,maxlen=1千万, int32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=9000000, delta=1, maxlen=10000000, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_maxlen_10000000_float32(): + """ + Feature: Ops. + Description: range算子正向测试,maxlen=1千万, float32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=8000, delta=0.01, maxlen=10000000, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_limit_0_int32(): + """ + Feature: Ops. + Description: range算子正向测试,limit=0 int32. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=0, limit=0, delta=1, maxlen=3500, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_1_limit_5_delta_2_max_50_int64(): + """ + Feature: Ops. + Description: range算子正向测试,start=1, limit=5, delta=2,maxlen=50, int64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=5, delta=2, maxlen=50) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_limit_100_delta_2_max_200_fp64(): + """ + Feature: Ops. + Description: range算子正向测试,start=0.2, limit=100, delta=2, maxlen=200, float64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=0.2, limit=100, delta=2, maxlen=200, dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_320_limit_1000_delta_032_max_2500_fp64(): + """ + Feature: Ops. + Description: range算子正向测试,start=320, limit=1000.8, delta=0.32,maxlen=2500, float64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=320, limit=1000.8, delta=0.32, maxlen=2500, dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_neg_int64(): + """ + Feature: Ops. + Description: range算子正向测试,start=-1, int64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-1, limit=1000, delta=1, maxlen=3500, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_start_neg_fp64(): + """ + Feature: Ops. + Description: range算子正向测试,start=-0.5, float64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-0.5, limit=1000.8, delta=0.32, maxlen=3500, dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_limit_neg_int64(): + """ + Feature: Ops. + Description: range算子正向测试,limit=-1, float64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=-100, limit=-1, delta=1, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_maxlen_10000000_int64(): + """ + Feature: Ops. + Description: range算子正向测试,maxlen=1千万, int64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=9000000, delta=1, maxlen=10000000, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_range_input_maxlen_10000000_float64(): + """ + Feature: Ops. + Description: range算子正向测试,maxlen=1千万, float64. + Expectation: expect correct result. + """ + fact = OpsRangeFactory(start=1, limit=8000, delta=0.01, maxlen=10000000, dtype=np.float64) + fact.forward_cmp() diff --git a/tests/st/pi_jit/operation/test_round.py b/tests/st/pi_jit/operation/test_round.py index c4e98659bdf8ac01c1a3265f9200c9297556223d..a105b8a41aa7e94a36b372297f5e7e7e275c1bea 100644 --- a/tests/st/pi_jit/operation/test_round.py +++ b/tests/st/pi_jit/operation/test_round.py @@ -1,6 +1,10 @@ import pytest +import mindspore.ops.operations as P +from mindspore import nn from mindspore import jit, context from ..share.utils import match_array +import numpy as np +from ..share.ops.primitive.round_ops import RoundFactory @jit(mode="PIJit") @@ -38,3 +42,194 @@ def test_round_operations(func, ms_func, x, n, error): context.set_context(mode=context.GRAPH_MODE) ms_res = ms_func(x, n) match_array(res, ms_res, error=error, err_msg=str(ms_res)) + + +class VmapRound(nn.Cell): + def __init__(self): + super().__init__() + self.round = P.Round() + + def construct(self, x): + return self.round(x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_512x12(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 512x12, and data_type float16 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(512, 12), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_512(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 512, and data_type float16 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(512,), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_64x128x1(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 64x128x1, and data_type float16 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(64, 128, 1), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_64x128x1x512(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 64x128x1x512, and data_type float16 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(64, 128, 1, 512), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_2048(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 2048, and data_type float32 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(2048,), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_16x1024(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape(16, 1024), and data_type float32 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(16, 1024), dtype=np.int32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_20x48_fp64(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 20x48, and data_type float64 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(20, 48), dtype=np.float64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_5x12x4_fp64(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 5x12x4, and data_type float64 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(5, 12, 4), dtype=np.float64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_32x16x128x8_int64(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 32x16x128x8, and data_type int64 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(32, 16, 128, 8), dtype=np.int64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_32x4x28x8x6_int64(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 32x4x28x8x6, and data_type int64 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(32, 4, 28, 8, 6), dtype=np.int64) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_5d(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 64x128x1x512x32, and data_type float16 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(64, 128, 1, 512, 32), dtype=np.float16) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_6d(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 64x128x1x512x32x3, and data_type float32 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(64, 128, 1, 512, 32, 3), dtype=np.float32) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_round_input_7d(): + """ + Feature: ALL TO ALL + Description: test operator round with input shape 24x8x1x12x32x3x4, and data_type int32 + Expectation: the result match + """ + fact = RoundFactory(input_shape=(24, 8, 1, 12, 32, 3, 4), dtype=np.int32) + fact.forward_cmp() + fact.grad_cmp() diff --git a/tests/st/pi_jit/operation/test_sin.py b/tests/st/pi_jit/operation/test_sin.py new file mode 100644 index 0000000000000000000000000000000000000000..071630a3c7d6a85b7a68fd5ce15970712f539cb5 --- /dev/null +++ b/tests/st/pi_jit/operation/test_sin.py @@ -0,0 +1,267 @@ +import pytest +import numpy as np +from mindspore.common.tensor import Tensor +from ..share.ops.primitive.sin_ops import SinMock +from ..dynamic_shape_operations.sin import SinDynamicShapeFactory + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_32x1024x1269(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[32 * 1024, 1269], np.float32 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(32 * 1024, 1269).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_x1269(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[1269], np.float32 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(1269).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_2x4x8(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[2, 4, 8], np.float16 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(2, 4, 8).astype(np.float16)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_2x4x8x16(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[2, 4, 8, 16], np.float32 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(2, 4, 8, 16).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_2x4x8x16_fp64(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[2, 4, 8, 16], np.float64 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(2, 4, 8, 16).astype(np.float64)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_1x2x4x8x16(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[1, 2, 4, 8, 16], np.float16 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(1, 2, 4, 8, 16).astype(np.float16)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_2x4x8x16x1x16(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:[2, 4, 8, 16, 1, 16], np.float32 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(2, 4, 8, 16, 1, 16).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_input_4d_cp64(): + """ + Feature: ALL TO ALL + Description: test Sin with 4D input, dtype=complex64 + Expectation: the result match + """ + input_x_real = np.random.rand(2, 3, 5, 7).astype(np.float32) + input_x_imag = np.random.rand(2, 3, 5, 7).astype(np.float32) + input_x = Tensor((input_x_real + 1j * input_x_imag).astype(np.complex64)) + fact = SinMock(inputs=[input_x]) + fact.loss = 2e-6 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_input_5d_cp128(): + """ + Feature: ALL TO ALL + Description: test Sin with 5D input, dtype=complex128 + Expectation: the result match + """ + input_x_real = np.random.rand(8, 4, 3, 12, 7).astype(np.float64) + input_x_imag = np.random.rand(8, 4, 3, 12, 7).astype(np.float64) + input_x = Tensor((input_x_real + 1j * input_x_imag).astype(np.complex128)) + fact = SinMock(inputs=[input_x]) + fact.loss = 2e-10 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_input_2d_cp128(): + """ + Feature: ALL TO ALL + Description: test Sin with 2D input, dtype=complex128 + Expectation: the result match + """ + input_x_real = np.random.rand(38, 65).astype(np.float64) + input_x_imag = np.random.rand(38, 65).astype(np.float64) + input_x = Tensor((input_x_real + 1j * input_x_imag).astype(np.complex128)) + fact = SinMock(inputs=[input_x]) + fact.loss = 2e-10 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_input_7d_cp64(): + """ + Feature: ALL TO ALL + Description: test Sin with 7D input, dtype=complex64 + Expectation: the result match + """ + input_x_real = np.random.rand(9, 6, 4, 2, 9, 8, 12).astype(np.float32) + input_x_imag = np.random.rand(9, 6, 4, 2, 9, 8, 12).astype(np.float32) + input_x = Tensor((input_x_real + 1j * input_x_imag).astype(np.complex64)) + fact = SinMock(inputs=[input_x]) + fact.loss = 2e-6 + fact.forward_cmp() + fact.grad_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_sin_input_type_not_support(): + """ + Feature: ALL TO ALL + Description: sin算子测试,input:int32,int8,uint8 + Expectation: the result match + """ + input_x1 = Tensor(np.random.randn(2, 4, 8).astype(np.int32)) + fact1 = SinMock(inputs=[input_x1]) + + input_x2 = Tensor(np.random.randn(2, 4, 8).astype(np.int8)) + fact2 = SinMock(inputs=[input_x2]) + + input_x3 = Tensor(np.random.randn(2, 4, 8).astype(np.uint8)) + fact3 = SinMock(inputs=[input_x3]) + + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact1.forward_cmp() + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact2.forward_cmp() + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact3.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_sin_input_float32(): + """ + Feature: ALL TO ALL + Description: Sin算子正反向dynamic shape测试,input_shape=(3, 16, 32), dtype=np.float32 + Expectation: the result match + """ + input_x = Tensor(np.random.randn(3, 16, 32).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_dynamic_shape_cmp() + fact.grad_dynamic_shape_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_sin_float32(): + """ + Feature: ALL TO ALL + Description: test sin with dynamic shape input, dtype=float32 + Expectation: the result match + """ + input_x = Tensor(np.random.rand(2, 10, 5, 10).astype(np.float32)) + indices = Tensor(np.random.choice(3, 2, replace=False).astype(np.int32)) + fact = SinDynamicShapeFactory([input_x, indices], dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_sin_float16(): + """ + Feature: ALL TO ALL + Description: test sin with dynamic shape input, dtype=float16 + Expectation: the result match + """ + input_x = Tensor(np.random.rand(1, 1, 2, 4, 10).astype(np.float16)) + indices = Tensor(np.random.choice(3, 1, replace=False).astype(np.int32)) + fact = SinDynamicShapeFactory([input_x, indices], dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level2 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_tensor_sin(): + """ + Feature: ALL TO ALL + Description: test tensor API sin + Expectation: the result match + """ + input_x = Tensor(np.random.random((8, 3, 6)).astype(np.float32)) + fact = SinMock(inputs=[input_x]) + fact.forward_tensor_cmp() diff --git a/tests/st/pi_jit/operation/test_slice.py b/tests/st/pi_jit/operation/test_slice.py new file mode 100644 index 0000000000000000000000000000000000000000..31c8b0f56ab3e782efcb52099122d6cab974670d --- /dev/null +++ b/tests/st/pi_jit/operation/test_slice.py @@ -0,0 +1,477 @@ +import numpy as np +import pytest +from mindspore import Tensor +from mindspore.common import dtype as mstype +from ..share.ops.primitive.slice_ops import SliceFactory +from ..share.ops.primitive.slice_ops import SliceMock +from ..dynamic_shape_operations.slice import DynamicShapeSliceFactory + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_n1024x1270_0x0_4x4(): + """ + Feature: ALL TO ALL + Description: test sin with dynamic shape input, dtype=float16 + Expectation: the result match + """ + for n in (128,): + input_shape = (n * 1024, 1270) + begin = (0, 0) + size = (4, 4) + fact = SliceFactory(input_shape, begin, size, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_8x32x6_0x28x0_8x4x6(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=(8, 32, 6), begin=(0, 28, 0), size=(8, 4, 6) + Expectation: the result match + """ + input_shape = (8, 32, 6) + begin = (0, 28, 0) + size = (8, 4, 6) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_2d(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=(8, 87), begin=(0, 56), size=(8, 27) + Expectation: the result match + """ + input_shape = (8, 87) + begin = (0, 56) + size = (8, 27) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_3d(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=(8, 87, 4), begin=(0, 56, 0), size=(8, 27, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + begin = (0, 56, 0) + size = (8, 27, 4) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_0d_fp64(): + """ + Feature: ALL TO ALL + Description: slice算子测试,the type of input is float64, shape 0d + Expectation: the result match + """ + input_x = Tensor(np.random.randn(), dtype=mstype.float64) + begin = () + size = () + fact = SliceMock(inputs=[input_x, begin, size]) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_0d_dtype_complex64(): + """ + Feature: ALL TO ALL + Description: test slice with input shape from 0d, type complex64 + Expectation: the result match + """ + x_real = np.random.randn() + x_imag = np.random.randn() + x = Tensor((x_real + 1j * x_imag), dtype=mstype.complex64) + begin = () + size = () + fact = SliceMock(inputs=[x, begin, size]) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_complex64_begin_size_shape_larger_than_input_x(): + """ + Feature: ALL TO ALL + Description: slice算子测试,test slice with input shape from 5d, type complex128, real type float16 + Expectation: the result match + """ + x_real = np.random.randn(12, 32, 18, 24, 8).astype(np.float16) + x_imag = np.random.randn(12, 32, 18, 24, 8).astype(np.float64) + x = Tensor((x_real + 1j * x_imag), dtype=mstype.complex64) + begin = (0, 12, 6, 12, 5) + size = (8, 9, 6, 12, 5) + fact = SliceMock(inputs=[x, begin, size]) + with pytest.raises(ValueError): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_begin_bool(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8,87,4), begin=bool, size=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + begin = [True, False, True] + size = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_begin_int(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8,87,4), begin=int, size=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + begin = 0 + size = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_begin_list(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8,87,4), begin=list, size=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + begin = [1, 0.1] + size = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_size_bool(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8, 87, 4), size=bool, begin=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + size = True + begin = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_size_list(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8, 87, 4), size=list, begin=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + size = [1, 0.1] + begin = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_size_int(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape(8, 87, 4), size=int, begin=(8, 57, 4) + Expectation: the result match + """ + input_shape = (8, 87, 4) + size = 2 + begin = (8, 57, 4) + fact = SliceFactory(input_shape, begin, size) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_4d_dtype_float16(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=4d, float64 + Expectation: the result match + """ + input_shape = (1, 2, 3, 4) + begin = (0, 0, 0, 0) + size = (1, 1, 1, 4) + fact = SliceFactory(input_shape, begin, size, dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_float32(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=fp32 + Expectation: the result match + """ + input_shape = (1, 2, 3, 4, 5) + begin = (0, 0, 0, 0, 0) + size = (1, 1, 1, 1, 5) + fact = SliceFactory(input_shape, begin, size, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_shape_2x32x112x112x48_begin_1x2x3x4x5_size_1x10x1x20x40_fp32(): + """ + Feature: ALL TO ALL + Description: slice算子测试, input shape [2, 32, 112, 112, 48] + begin=[1, 2, 3, 4, 5] + size=[1, 10, 1, 20, 40] + type=float32 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.float32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_fp16(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=fp16 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.float16) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_fp64(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=fp64 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.float64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_int64(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=int64 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_int32(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=int32 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_5d_dtype_int16(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=int16 + Expectation: the result match + """ + input_shape = (2, 32, 112, 112, 48) + begin = (1, 2, 3, 4, 5) + size = (1, 10, 1, 20, 40) + fact = SliceFactory(input_shape, begin, size, dtype=np.int16) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_6d_dtype_int64(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=5d dtype=int64 + Expectation: the result match + """ + input_shape = (1, 2, 3, 4, 5, 6) + begin = (0, 0, 0, 0, 0, 3) + size = (1, 1, 1, 1, 2, 2) + fact = SliceFactory(input_shape, begin, size, dtype=np.int64) + fact.forward_cmp() + + +@pytest.mark.level5 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_input_dtype_int32(): + """ + Feature: ALL TO ALL + Description: slice算子测试,input_shape=int32 + Expectation: the result match + """ + input_shape = (56, 45) + begin = (10, 9) + size = (3, 6) + fact = SliceFactory(input_shape, begin, size, dtype=np.int32) + fact.forward_cmp() + + +@pytest.mark.level3 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_p_slice_size_greater_than_input(): + """ + Feature: ALL TO ALL + Description: slice算子测试,size greater than input + Expectation: the result match + """ + input_shape = (56, 45) + begin = (8, 9) + size = (1, 1, 1) + fact = SliceFactory(input_shape, begin, size, dtype=np.float32) + with pytest.raises((RuntimeError, TypeError, ValueError)): + fact.forward_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_slice_input_3d(): + """ + Feature: ALL TO ALL + Description: slice算子动态shape测试,input_shape 3d + Expectation: the result match + """ + input_shape = (8, 32, 6) + begin = (0, 28, 0) + size = (8, 4, 6) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_slice_input_5d(): + """ + Feature: ALL TO ALL + Description: slice算子动态shape测试,input_shape 5d + Expectation: the result match + """ + input_shape = (12, 32, 18, 24, 8) + begin = (0, 12, 6, 12, 2) + size = (8, 9, 6, 12, 5) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level1 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_slice_input_7d(): + """ + Feature: ALL TO ALL + Description: slice算子动态shape测试,input_shape 7d + Expectation: the result match + """ + input_shape = (1, 2, 3, 4, 5, 6, 7) + begin = (0, 0, 1, 2, 0, 3, 1) + size = (1, 1, 1, 1, 2, 2, 5) + fact = SliceFactory(input_shape, begin, size) + fact.forward_cmp() + fact.forward_dynamic_shape_cmp() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_dynamic_shape_p_slice_2d_fp32(): + """ + Feature: ALL TO ALL + Description: slice算子动态shape测试,input_shape 2d with float32 + Expectation: the result match + """ + input_shape = (8, 512) + begin = (0,) + size = (128,) + axis = np.array([0]) + fact = DynamicShapeSliceFactory(input_shape, begin, size, axis, dtype=np.float32) + fact.forward_cmp() diff --git a/tests/st/profiler/test_ascend_profiler.py b/tests/st/profiler/test_ascend_profiler.py index caf5d8c9d1e2f18933a0233e68beda40fad582ba..7df5877508843924d202dbcf2994542d56237835 100644 --- a/tests/st/profiler/test_ascend_profiler.py +++ b/tests/st/profiler/test_ascend_profiler.py @@ -175,7 +175,7 @@ def test_collect_custom_aicpu(): profiler.analyse() aicpu_intermediate_file_list = glob.glob(f"{tmpdir}/profiler/aicpu_intermediate_*.csv") assert len(aicpu_intermediate_file_list) == 1 - s1 = {'Select', 'Xlogy', 'Cast'} + s1 = {'Cast', 'BroadcastTo', 'Select', 'Xlogy'} s2 = set() with open(aicpu_intermediate_file_list[0], 'r') as fr: reader = csv.DictReader(fr) diff --git a/tests/st/profiler/test_profiler.py b/tests/st/profiler/test_profiler.py index 8e4cd18c544206166905f5a4edea988b31a41dd1..e3eb2ba6f853919d4f4e0b25dfcfdbbd7e57fcf3 100644 --- a/tests/st/profiler/test_profiler.py +++ b/tests/st/profiler/test_profiler.py @@ -185,7 +185,6 @@ class TestProfiler: def test_ascend_profiler(self): self._train_with_profiler(device_target="Ascend", profile_memory=True) self._check_d_profiling_file() - self._check_d_profiling_step_trace_on_multisubgraph() self._check_host_profiling_file() @pytest.mark.level1 @@ -267,14 +266,6 @@ class TestProfiler: for file in d_profiler_files: assert os.path.isfile(file) - def _check_d_profiling_step_trace_on_multisubgraph(self): - step_trace_file = self.profiler_path + f'step_trace_raw_{self.rank_id}_detail_time.csv' - assert os.path.isfile(step_trace_file) - with open(step_trace_file, 'r') as csvfile: - reader = csv.DictReader(csvfile) - row_count = sum(1 for _ in reader) - assert row_count == 11 - def _check_cpu_profiling_file(self): op_detail_file = self.profiler_path + f'cpu_op_detail_info_{self.device_id}.csv' op_type_file = self.profiler_path + f'cpu_op_type_info_{self.device_id}.csv' diff --git a/tests/st/pynative/pyboost/test_pyboost_ops_abs.py b/tests/st/pynative/pyboost/test_pyboost_ops_abs.py new file mode 100644 index 0000000000000000000000000000000000000000..37ea1507ad710172b64ee2e4213859dbf357cd91 --- /dev/null +++ b/tests/st/pynative/pyboost/test_pyboost_ops_abs.py @@ -0,0 +1,67 @@ +# Copyright 2023 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import pytest +import numpy as np +import test_utils +import mindspore +from mindspore import Tensor +from mindspore.ops.auto_generate import abs +from mindspore import ops + + +@test_utils.run_with_cell +def abs_forward_func(x): + return abs(x) + + +@test_utils.run_with_cell +def abs_backward_func(x): + return ops.grad(abs_forward_func, (0))(x) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_pyboost_abs_forward(): + """ + Feature: test abs operator + Description: test abs forward by pyboost + Expectation: success + """ + x = Tensor([1.0, -2.0, -3.0], mindspore.float32) + output1 = abs_forward_func(x) + assert np.allclose(output1.asnumpy(), [1.0, 2.0, 3.0]) + x = Tensor([1, 0, 0], mindspore.bool_) + output2 = abs_forward_func(x) + assert np.allclose(output2.asnumpy(), [True, False, False]) + + +@pytest.mark.level0 +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.env_onecard +def test_pyboost_abs_backward(): + """ + Feature: test abs operator + Description: test abs backward by pyboost + Expectation: success + """ + x = Tensor([1.0, -2.0, -3.0], mindspore.float32) + output1 = abs_backward_func(x) + assert np.allclose(output1.asnumpy(), [1.0, -1.0, -1.0]) + x = Tensor([1, 0, 0], mindspore.float32) + output2 = abs_backward_func(x) + assert np.allclose(output2.asnumpy(), [1.0, 0, 0]) diff --git a/tests/st/pynative/pyboost/test_utils.py b/tests/st/pynative/pyboost/test_utils.py index b6561b5eb58f2dbd19a75d47d9e2a32a6b8bb0fc..283446c345ddfcb2f1ad362df1f557cb500149b9 100644 --- a/tests/st/pynative/pyboost/test_utils.py +++ b/tests/st/pynative/pyboost/test_utils.py @@ -16,9 +16,11 @@ import os import inspect from functools import wraps + +import pytest from mindspore import nn import mindspore as ms -from mindspore import Tensor +from mindspore import Tensor, ops import numpy as np ms.set_context(jit_syntax_level=ms.STRICT) @@ -101,7 +103,6 @@ def need_run_graph_op_mode(func, args, kwargs): def run_test_func(test_func): - @wraps(test_func) def wrapper(*args, **kwargs): # call original test function @@ -118,3 +119,20 @@ def run_test_func(test_func): del os.environ['GRAPH_OP_RUN'] return wrapper + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.platform_arm_ascend_training +@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_pynative_base_tensor_data_converter(): + """ + Feature: test base-tensor convert + Description: test base-tensor convert by pynative + Expectation: success + """ + x = Tensor([1, 2, 3, 4, 5]) + out = ops.ReduceSum()(x, ops.ReLU()(Tensor(0))) + assert out == 15 diff --git a/tests/st/runtime/test_runtime_inline_control_flow.py b/tests/st/runtime/test_runtime_inline_control_flow.py index 7054ca93b15138245832d23847d08b7677339472..9dca41395223da5690363a338f2b79caeb7bc2fc 100644 --- a/tests/st/runtime/test_runtime_inline_control_flow.py +++ b/tests/st/runtime/test_runtime_inline_control_flow.py @@ -760,6 +760,54 @@ def test_if_in_if(): assert ret2 +def test_output_ref_of_parameter(): + """ + Feature: Contrtol flow inline. + Description: Inline switch node into kernel graph. + Expectation: Not throw exception. + """ + param_a = Parameter(Tensor(5, mstype.int32), name='a') + + @jit + def foo(x, y, param_a): + if x > y: + out = ops.addn([x, x, param_a]) + else: + out = ops.assign(param_a, x) + return out + + x = Tensor(2, mstype.int32) + y = Tensor(1, mstype.int32) + ret1 = foo(x, x, param_a) + ret2 = foo(x, y, param_a) + assert ret1 + assert ret2 + + +def test_gather_switch_gather_output(): + """ + Feature: Contrtol flow inline. + Description: Inline switch node into kernel graph. + Expectation: Not throw exception. + """ + param_a = Parameter(Tensor(5, mstype.int32), name='a') + + @jit + def foo(x, y, param_a): + if x > y: + out = param_a + else: + out = ops.addn([x, x, x]) + if x > y: + out = ops.assign(param_a, x) + return out + + x = Tensor(1, mstype.int32) + y = Tensor(1, mstype.int32) + ret1 = foo(x, y, param_a) + assert ret1 + + def test_if_in_if_directly(): """ Feature: Contrtol flow inline. diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index bec86c14f3bcf803ac3d0c633d4ae4175c7297ca..0176d04668cfdc86f41d7f343e4296b201713d75 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -42,7 +42,8 @@ link_directories(${MS_CCSRC_BUILD_PATH}/minddata/mindrecord) include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset) include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/minddata/dataset/kernels/image) -file(GLOB_RECURSE UT_CORE_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./ops/*.cc) +file(GLOB_RECURSE UT_CORE_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./core/abstract/*.cc ./core/utils/*.cc + ./ir/dtype/*.cc ./ir/*.cc ./mindapi/*.cc ./mindir/*.cc ./ops/*.cc ./ops/view/*.cc ./base/*.cc) file(GLOB_RECURSE UT_MINDDATA_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./dataset/*.cc ./mindrecord/*.cc) file(GLOB_RECURSE UT_MINDDATA_COMMON_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./dataset/common/*.cc) file(GLOB_RECURSE UT_API_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ./cxx_api/*.cc) diff --git a/tests/ut/cpp/backend/test_sparse_softmax_cross_entropy_with_logits_unify_mindir.cc b/tests/ut/cpp/backend/test_sparse_softmax_cross_entropy_with_logits_unify_mindir.cc index c3e5877757c14b91bf8093673e11e99e4f5d4693..720e0428f03ddaf129f709095586e9f4fe23b74f 100644 --- a/tests/ut/cpp/backend/test_sparse_softmax_cross_entropy_with_logits_unify_mindir.cc +++ b/tests/ut/cpp/backend/test_sparse_softmax_cross_entropy_with_logits_unify_mindir.cc @@ -32,7 +32,8 @@ class SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR : public UT::Common { /// Description: Convert SparseSoftmaxCrossEntropyWithLogits(is_grad=false) to /// OneHot+SoftmaxCrossEntropyWithLogits+ReduceMean /// Expectation: After optimize, match OneHot+SoftmaxCrossEntropyWithLogits+ReduceMean. -TEST_F(SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR, test_sparse_softmax_cross_entropy_with_logits_is_grad_is_false) { +TEST_F(SparseSoftmaxCrossEntropyWithLogitsUnifyMindIR, + DISABLED_test_sparse_softmax_cross_entropy_with_logits_is_grad_is_false) { test::ConstructGraph c; auto logits = c.NewTensorInput("logits", kFloat, {2, 3}); auto labels = c.NewTensorInput("labels", kInt32, {2}); diff --git a/tests/ut/cpp/common/common_test.cc b/tests/ut/cpp/common/common_test.cc index e4287a2458ae507db0db634ab1eb21c5192f9660..057671ca9964646edd0b6e154daf9d082221e841 100644 --- a/tests/ut/cpp/common/common_test.cc +++ b/tests/ut/cpp/common/common_test.cc @@ -15,6 +15,7 @@ */ #include "common/common_test.h" #include "utils/log_adapter.h" +#include "resource.h" #ifdef __cplusplus #if __cplusplus @@ -30,7 +31,11 @@ void Common::TearDownTestCase() {} void Common::SetUp() {} -void Common::TearDown() {} +void Common::TearDown() { + const char *suite_name = testing::UnitTest::GetInstance()->current_test_suite()->name(); + const char *test_name = testing::UnitTest::GetInstance()->current_test_info()->name(); + UT::UTResourceManager::GetInstance()->DropFuncGraph(UTKeyInfo{suite_name, test_name}); +} } // namespace UT diff --git a/tests/ut/cpp/common/common_test.h b/tests/ut/cpp/common/common_test.h index b5cb96e58da9ee4ef7fae68457e695800f4cb698..fba6ce3711209dc4934f0353bd7ed78a13112ad3 100644 --- a/tests/ut/cpp/common/common_test.h +++ b/tests/ut/cpp/common/common_test.h @@ -19,8 +19,8 @@ #include #include #include +#include #include "gtest/gtest.h" -#include "ops/auto_generate/gen_ops_primitive.h" namespace UT { class Common : public testing::Test { diff --git a/tests/ut/cpp/common/py_func_graph_fetcher.h b/tests/ut/cpp/common/py_func_graph_fetcher.h index be9d4d3c12377e3a4678bd677c8973aa9303f130..df913721accaaafd94bf83b7a3cc14e77d059713 100644 --- a/tests/ut/cpp/common/py_func_graph_fetcher.h +++ b/tests/ut/cpp/common/py_func_graph_fetcher.h @@ -18,6 +18,7 @@ #include #include +#include "resource.h" #include "ir/anf.h" #include "ir/primitive.h" #include "ir/manager.h" @@ -25,6 +26,7 @@ #include "pipeline/jit/ps/parse/parse_base.h" #include "pipeline/jit/ps/parse/parse.h" #include "pipeline/jit/ps/parse/resolve.h" +#include "gtest/gtest.h" namespace UT { diff --git a/tests/ut/cpp/common/resource.cc b/tests/ut/cpp/common/resource.cc new file mode 100644 index 0000000000000000000000000000000000000000..5713097125d9a72b265af9589994af8641f2d67a --- /dev/null +++ b/tests/ut/cpp/common/resource.cc @@ -0,0 +1,32 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "resource.h" +#include + +namespace UT { +std::shared_ptr UTResourceManager::inst_resource_manager_ = nullptr; + +std::shared_ptr UTResourceManager::GetInstance() { + static std::once_flag init_flag_ = {}; + std::call_once(init_flag_, [&]() { + if (inst_resource_manager_ == nullptr) { + inst_resource_manager_ = std::make_shared(); + } + }); + MS_EXCEPTION_IF_NULL(inst_resource_manager_); + return inst_resource_manager_; +} +} // namespace UT diff --git a/tests/ut/cpp/common/resource.h b/tests/ut/cpp/common/resource.h new file mode 100644 index 0000000000000000000000000000000000000000..080c4886acfb99033cf41979a9f14d6d01f8c930 --- /dev/null +++ b/tests/ut/cpp/common/resource.h @@ -0,0 +1,74 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ir/func_graph.h" +#include "gtest/gtest.h" + +#ifndef MINDSPORE_UT_COMMON_RESOURCE_H +#define MINDSPORE_UT_COMMON_RESOURCE_H +namespace UT { +using UTKeyInfo = std::pair; + +class UTResourceManager { + public: + UTResourceManager() = default; + ~UTResourceManager() { + for (const auto &it : all_func_graphs_) { + auto key_info = it.first; + std::cout << "Unexpected unreleased func graph resource of case:" << key_info.first << "." << key_info.second + << std::endl; + } + if (!all_func_graphs_.empty()) { + std::cout << "Please check `TearDown` function of testcase, and make sure all func graphs can be dropped after " + "case executed, otherwise core dumped might occur." + << std::endl; + } + } + + void HoldFuncGraph(const mindspore::FuncGraphPtr &fg) { + const char *suite_name = testing::UnitTest::GetInstance()->current_test_suite()->name(); + const char *test_name = testing::UnitTest::GetInstance()->current_test_info()->name(); + auto new_fg = std::make_shared(); + std::cout << "Hold func graph of case:" << suite_name << "." << test_name << std::endl; + (void)all_func_graphs_[UTKeyInfo{suite_name, test_name}].insert(fg); + } + + mindspore::FuncGraphPtr MakeAndHoldFuncGraph() { + auto func_graph = std::make_shared(); + HoldFuncGraph(func_graph); + return func_graph; + } + + void DropFuncGraph(const UTKeyInfo &ut_info) { + if (all_func_graphs_.find(ut_info) == all_func_graphs_.cend()) { + return; + } + std::cout << "Drop func graph of case:" << ut_info.first << "." << ut_info.second << std::endl; + (void)all_func_graphs_.erase(ut_info); + } + + void DropAllFuncGraphs() { all_func_graphs_.clear(); } + + static std::shared_ptr GetInstance(); + + private: + static std::shared_ptr inst_resource_manager_; + std::map> all_func_graphs_; +}; + +} // namespace UT + +#endif // MINDSPORE_UT_COMMON_RESOURCE_H diff --git a/tests/ut/cpp/distributed/cluster/topology/test_dynamic_networking.cc b/tests/ut/cpp/distributed/cluster/topology/test_dynamic_networking.cc index c9dc6a575fcedc0e4141ed83fbb514376346c1f1..552dc5552969a6409ab6f4c91c1fd6fd7478a5ad 100644 --- a/tests/ut/cpp/distributed/cluster/topology/test_dynamic_networking.cc +++ b/tests/ut/cpp/distributed/cluster/topology/test_dynamic_networking.cc @@ -37,7 +37,7 @@ class TestDynamicNetworking : public UT::Common { /// Feature: test the normal node registration from compute graph nodes to meta server node. /// Description: start some compute graph nodes and meta server node and send a register message. /// Expectation: these register messages are received by meta server node successfully. -TEST_F(TestDynamicNetworking, NodeRegister) { +TEST_F(TestDynamicNetworking, DISABLED_NodeRegister) { std::string server_host = "127.0.0.1"; std::string server_port = "8090"; common::SetEnv(kEnvMetaServerHost, server_host.c_str()); @@ -86,7 +86,7 @@ TEST_F(TestDynamicNetworking, NodeRegister) { /// Feature: test sending message through compute graph node to meta server node. /// Description: send a special kind of message to msn and register the corresponding message handler. /// Expectation: the registered handler received the sent message successfully. -TEST_F(TestDynamicNetworking, AddMessageHandler) { +TEST_F(TestDynamicNetworking, DISABLED_AddMessageHandler) { std::string server_host = "127.0.0.1"; std::string server_port = "8090"; common::SetEnv(kEnvMetaServerHost, server_host.c_str()); @@ -137,7 +137,7 @@ TEST_F(TestDynamicNetworking, AddMessageHandler) { /// Feature: test retrieve message from the meta server node. /// Description: send a retrieve request to msn. /// Expectation: get message from msn successfully. -TEST_F(TestDynamicNetworking, RetrieveMessageFromMSN) { +TEST_F(TestDynamicNetworking, DISABLED_RetrieveMessageFromMSN) { std::string server_host = "127.0.0.1"; std::string server_port = "8090"; common::SetEnv(kEnvMetaServerHost, server_host.c_str()); @@ -184,7 +184,7 @@ TEST_F(TestDynamicNetworking, RetrieveMessageFromMSN) { /// Feature: test the recovery of meta server node. /// Description: construct a cluster and restart the meta server node under recovery mode. /// Expectation: the meta server node is restarted successfully and all the metadata is restored. -TEST_F(TestDynamicNetworking, MetaServerNodeRecovery) { +TEST_F(TestDynamicNetworking, DISABLED_MetaServerNodeRecovery) { // Prepare the environment. std::string local_file = "recovery.dat"; char *dir = getcwd(nullptr, 0); @@ -266,7 +266,7 @@ TEST_F(TestDynamicNetworking, MetaServerNodeRecovery) { /// Description: start a cluster with one meta server node and three compute graph nodes, and then kill one of the /// compute graph node. /// Expectation: the number of alive compute graph node is equal to two. -TEST_F(TestDynamicNetworking, HeartbeatTimeout) { +TEST_F(TestDynamicNetworking, DISABLED_HeartbeatTimeout) { // Start the meta server node in the parent process. std::string server_host = "127.0.0.1"; std::string server_port = "8090"; @@ -326,7 +326,7 @@ TEST_F(TestDynamicNetworking, HeartbeatTimeout) { /// Feature: test reconnect to meta server node if needed during node registration period. /// Description: first start the compute graph node and then start the meta server node. /// Expectation: the cluster topology is constructed successfully. -TEST_F(TestDynamicNetworking, ReconnectToMetaServerDuringReg) { +TEST_F(TestDynamicNetworking, DISABLED_ReconnectToMetaServerDuringReg) { // Init the environment variables. std::string server_host = "127.0.0.1"; std::string server_port = "8090"; @@ -387,7 +387,7 @@ TEST_F(TestDynamicNetworking, ReconnectToMetaServerDuringReg) { /// Description: start the meta server node and several compute graph nodes, then restart the meta server node after the /// cluster is initialized successfully. /// Expectation: the cluster topology is shutdown finally. -TEST_F(TestDynamicNetworking, ReconnectToMetaServerDuringUnreg) { +TEST_F(TestDynamicNetworking, DISABLED_ReconnectToMetaServerDuringUnreg) { // Init the environment variables. std::string local_file = "recovery.dat"; char *dir = getcwd(nullptr, 0); @@ -470,7 +470,7 @@ TEST_F(TestDynamicNetworking, ReconnectToMetaServerDuringUnreg) { /// Feature: test get hostnames from meta server node from compute graph node. /// Description: build a cluster and call the gethostname of compute graph node. /// Expectation: the hostnames of specified compute graph node are returned. -TEST_F(TestDynamicNetworking, GetHostNames) { +TEST_F(TestDynamicNetworking, DISABLED_GetHostNames) { std::string server_host = "127.0.0.1"; std::string server_port = "8090"; common::SetEnv(kEnvMetaServerHost, server_host.c_str()); diff --git a/tests/ut/cpp/distributed/rpc/tcp/tcp_test.cc b/tests/ut/cpp/distributed/rpc/tcp/tcp_test.cc index 22e63490983d8837f502b82e0dc5241ef85d178c..899445a952b691dfb87344e3ac600459ecb72e6e 100644 --- a/tests/ut/cpp/distributed/rpc/tcp/tcp_test.cc +++ b/tests/ut/cpp/distributed/rpc/tcp/tcp_test.cc @@ -141,7 +141,7 @@ TEST_F(TCPTest, StartServerFail) { /// Feature: test start a socket server. /// Description: start the socket server with a specified socket. /// Expectation: the socket server is started successfully. -TEST_F(TCPTest, StartServerSucc) { +TEST_F(TCPTest, DISABLED_StartServerSucc) { std::unique_ptr server = std::make_unique(); bool ret = server->Initialize("127.0.0.1:8081"); ASSERT_TRUE(ret); @@ -151,7 +151,7 @@ TEST_F(TCPTest, StartServerSucc) { /// Feature: test normal tcp message sending. /// Description: start a socket server and send a normal message to it. /// Expectation: the server received the message sented from client. -TEST_F(TCPTest, SendOneMessage) { +TEST_F(TCPTest, DISABLED_SendOneMessage) { Init(); // Start the tcp server. @@ -193,7 +193,7 @@ TEST_F(TCPTest, SendOneMessage) { /// Feature: test sending two message continuously. /// Description: start a socket server and send two normal message to it. /// Expectation: the server received the two messages sented from client. -TEST_F(TCPTest, SendTwoMessages) { +TEST_F(TCPTest, DISABLED_SendTwoMessages) { Init(); // Start the tcp server. @@ -248,7 +248,7 @@ TEST_F(TCPTest, StartServerWithRandomPort) { /// Feature: test send the message synchronously. /// Description: start a socket server and send the message synchronously. /// Expectation: the number of bytes sent could be got synchronously. -TEST_F(TCPTest, SendSyncMessage) { +TEST_F(TCPTest, DISABLED_SendSyncMessage) { Init(); // Start the tcp server. @@ -338,7 +338,7 @@ TEST_F(TCPTest, SendLargeMessages) { /// Feature: test delete invalid tcp connection used in connection pool in tcp client when some socket error happened. /// Description: start a socket server and tcp client pair and stop the tcp server. /// Expectation: the connection from the tcp client to the tcp server will be deleted automatically. -TEST_F(TCPTest, DeleteInvalidConnectionForTcpClient) { +TEST_F(TCPTest, DISABLED_DeleteInvalidConnectionForTcpClient) { pid_t pid = fork(); EXPECT_LE(0, pid); diff --git a/tests/ut/cpp/func_graph_builder/func_graph_builder_test.cc b/tests/ut/cpp/func_graph_builder/func_graph_builder_test.cc index 0ca8f56ace13021cfeece9ec74c9595ce6f4af34..3fb2618d666e6ac85e7aeff248d5a79064fb14d0 100644 --- a/tests/ut/cpp/func_graph_builder/func_graph_builder_test.cc +++ b/tests/ut/cpp/func_graph_builder/func_graph_builder_test.cc @@ -22,16 +22,13 @@ #include "include/common/utils/convert_utils.h" #include "ops/arithmetic_ops.h" #include "ops/other_ops.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { class TestFuncGraphBuilder : public UT::Common { public: TestFuncGraphBuilder() : get_py_fun_("gtest_input.pipeline.pi.func_graph_builder", true) {} - virtual void SetUp(); - - virtual void TearDown(); - bool CheckEqual(const FuncGraphPtr &fg1, const FuncGraphPtr &fg2) { equiv_graph_.clear(); equiv_node_.clear(); @@ -44,10 +41,6 @@ class TestFuncGraphBuilder : public UT::Common { NodeMapEquiv equiv_node_; }; -void TestFuncGraphBuilder::SetUp() {} - -void TestFuncGraphBuilder::TearDown() {} - // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add inputs and add outputs. // Expectation: The expected graph is constructed. @@ -69,7 +62,7 @@ TEST_F(TestFuncGraphBuilder, TestAddInputAddOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add cnode. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestAddNodeAndSingleOutput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddNodeAndSingleOutput) { FuncGraphBuilder func_graph_builder; py::int_ int_v1 = 1; auto input1 = func_graph_builder.AddInput(int_v1); @@ -94,7 +87,7 @@ TEST_F(TestFuncGraphBuilder, TestAddNodeAndSingleOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add cnode. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestAddNodeAndMultiOutput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddNodeAndMultiOutput) { FuncGraphBuilder func_graph_builder; py::int_ int_v1 = 1; auto input1 = func_graph_builder.AddInput(int_v1); @@ -120,7 +113,7 @@ TEST_F(TestFuncGraphBuilder, TestAddNodeAndMultiOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to remove an output. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestRemoveOutput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestRemoveOutput) { FuncGraphBuilder func_graph_builder; py::int_ int_v1 = 1; auto input1 = func_graph_builder.AddInput(int_v1); @@ -157,7 +150,7 @@ TEST_F(TestFuncGraphBuilder, TestRemoveOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add cnode with constant input. // Expectation: Failed to add the node. -TEST_F(TestFuncGraphBuilder, TestAddNodeConstantInput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddNodeConstantInput) { FuncGraphBuilder func_graph_builder; py::int_ int_v1 = 1; auto input1 = func_graph_builder.AddInput(int_v1); @@ -194,7 +187,7 @@ TEST_F(TestFuncGraphBuilder, TestAddNodeUnCallable) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add cnode with constant input. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestAddMultiNode) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddMultiNode) { FuncGraphBuilder func_graph_builder; py::int_ int_v1 = 1; auto input1 = func_graph_builder.AddInput(int_v1); @@ -213,7 +206,7 @@ TEST_F(TestFuncGraphBuilder, TestAddMultiNode) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add func_graph called node. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestAddFgCallNodeSingleOutput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddFgCallNodeSingleOutput) { FuncGraphBuilder func_graph_builder1; py::int_ int_v1 = 1; auto input1 = func_graph_builder1.AddInput(int_v1); @@ -250,7 +243,7 @@ TEST_F(TestFuncGraphBuilder, TestAddFgCallNodeSingleOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to add func_graph called node. // Expectation: The expected graph is constructed. -TEST_F(TestFuncGraphBuilder, TestAddFgCallNodeMultiOutput) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestAddFgCallNodeMultiOutput) { FuncGraphBuilder func_graph_builder1; py::int_ int_v1 = 1; auto input1 = func_graph_builder1.AddInput(int_v1); @@ -292,7 +285,7 @@ TEST_F(TestFuncGraphBuilder, TestAddFgCallNodeMultiOutput) { // Feature: Build graph in pi_jit. // Description: Use the func_graph_builder api to get the function or primitive from a method. // Expectation: Get the correct function or primitive. -TEST_F(TestFuncGraphBuilder, TestGetFunctionFromMethod) { +TEST_F(TestFuncGraphBuilder, DISABLED_TestGetFunctionFromMethod) { py::tuple t; auto func = FuncGraphBuilder::ConvertMethod(t.attr("index")); ASSERT_NE(func.ptr(), nullptr); diff --git a/tests/ut/cpp/ir/anf_test.cc b/tests/ut/cpp/ir/anf_test.cc index a0b4948c4879d75e7625db04167cac217c1845ac..2864ae67dd3001bcba70f0dcd6458d262a84c2b4 100644 --- a/tests/ut/cpp/ir/anf_test.cc +++ b/tests/ut/cpp/ir/anf_test.cc @@ -24,6 +24,7 @@ #include "ir/func_graph.h" #include "frontend/operator/ops.h" #include "utils/anf_utils.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { diff --git a/tests/ut/cpp/mindir/test_node_attr_export.cc b/tests/ut/cpp/mindir/test_node_attr_export.cc index 2805cae1597f0e9098d56d730c691a27bd83d1ab..61766138f867a8d5c4b22e826f4dffbdcdd5c8d5 100644 --- a/tests/ut/cpp/mindir/test_node_attr_export.cc +++ b/tests/ut/cpp/mindir/test_node_attr_export.cc @@ -34,7 +34,7 @@ class TestLoadExport : public BackendCommon { /// Feature: MindIR node attribute export and load. /// Description: Node attribute export and load. /// Expectation: success. -TEST_F(TestLoadExport, test_export_attr) { +TEST_F(TestLoadExport, DISABLED_test_export_attr) { auto func_graph = getPyFun.CallAndParseRet("export_test", "add_node_attr_test"); tensor::TensorPtr t = std::make_shared(kFloat32->type_id(), std::vector{1, 2, 3}); @@ -65,7 +65,7 @@ TEST_F(TestLoadExport, test_export_attr) { /// Feature: MindIR export abstract scalar. /// Description: abstract scalar export and load. /// Expectation: success. -TEST_F(TestLoadExport, test_export_abstract_scalar) { +TEST_F(TestLoadExport, DISABLED_test_export_abstract_scalar) { auto func_graph = getPyFun.CallAndParseRet("export_test_scalar", "node_scalar_out_test"); // Renormalize func_graph to infer and set shape and type information. diff --git a/tests/ut/cpp/operator/grad_implementations_test.cc b/tests/ut/cpp/operator/grad_implementations_test.cc index 46a15fadada026e60b07b79ff025756c96944ae5..83c0efa815978d919742c77c34fc6a3bbee8d0ec 100644 --- a/tests/ut/cpp/operator/grad_implementations_test.cc +++ b/tests/ut/cpp/operator/grad_implementations_test.cc @@ -25,6 +25,7 @@ #include "include/common/utils/convert_utils.h" #include "ir/manager.h" #include "ir/value.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace prim { @@ -34,7 +35,7 @@ class TestGradImplementations : public UT::Common { virtual void SetUp() {} }; -TEST_F(TestGradImplementations, TestGetAugmentedGraph) { +TEST_F(TestGradImplementations, DISABLED_TestGetAugmentedGraph) { FuncGraphPtr fg = ad::g_k_prims.KPrimitive(nullptr, NewValueNode(kPrimScalarMul), nullptr); ASSERT_TRUE(fg != nullptr); diff --git a/tests/ut/cpp/operator/ops_test.cc b/tests/ut/cpp/operator/ops_test.cc index ba35d414c17a312f809d90fa1c10094c3b436f67..f81d262a112023df1a72670072ba83c319103892 100644 --- a/tests/ut/cpp/operator/ops_test.cc +++ b/tests/ut/cpp/operator/ops_test.cc @@ -150,7 +150,7 @@ TEST_F(TestOps, ScalarGeTest) { } TEST_F(TestOps, BoolNotTest) { - auto prim = std::make_shared("bool_not"); + auto prim = std::make_shared("BoolNot"); ASSERT_EQ(prim->name(), kPrimBoolNot->name()); } @@ -330,7 +330,7 @@ TEST_F(TestOps, ReturnTest) { // Miscellaneous TEST_F(TestOps, IdentityTest) { - auto prim = std::make_shared("identity"); + auto prim = std::make_shared("Identity"); ASSERT_EQ(prim->name(), kPrimIdentity->name()); } diff --git a/tests/ut/cpp/ops/test_batchmatmul.cc b/tests/ut/cpp/ops/test_batchmatmul.cc index b78d88f6472ac956ae4a65b6e7057597d75d1d35..f533a2d4bafd1f275b5e157eff86a58cc326e636 100644 --- a/tests/ut/cpp/ops/test_batchmatmul.cc +++ b/tests/ut/cpp/ops/test_batchmatmul.cc @@ -25,6 +25,7 @@ #include "ops/test_ops.h" #include "ops/test_value_utils.h" #include "ops/test_ops_cmp_utils.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace ops { diff --git a/tests/ut/cpp/ops/test_ops_argmax_ext.cc b/tests/ut/cpp/ops/test_ops_argmax_ext.cc index 0fb2bd7941a6e27557d85f7fadd4e227edc72e08..c18ebec9edc9bcda75fd2b1a6c4fcf73b187904c 100644 --- a/tests/ut/cpp/ops/test_ops_argmax_ext.cc +++ b/tests/ut/cpp/ops/test_ops_argmax_ext.cc @@ -19,6 +19,7 @@ #include "ops/ops_func_impl/argmax_ext.h" #include "ops/test_value_utils.h" #include "abstract/dshape.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace ops { diff --git a/tests/ut/cpp/ops/test_ops_concat.cc b/tests/ut/cpp/ops/test_ops_concat.cc index 69680bdbc6eacae6c408d120ecf176f41a016ae7..121b697a2d2d1d8fb97b6c1f533de73b1ba14878 100644 --- a/tests/ut/cpp/ops/test_ops_concat.cc +++ b/tests/ut/cpp/ops/test_ops_concat.cc @@ -23,6 +23,7 @@ #include "ir/primitive.h" #include "abstract/abstract_value.h" #include "abstract/ops/primitive_infer_map.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore::ops { struct ConcatParams { diff --git a/tests/ut/cpp/ops/test_ops_divmod.cc b/tests/ut/cpp/ops/test_ops_divmod.cc new file mode 100644 index 0000000000000000000000000000000000000000..e04627ea86f13fba1719090107228a2fdec62cbb --- /dev/null +++ b/tests/ut/cpp/ops/test_ops_divmod.cc @@ -0,0 +1,98 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "common/common_test.h" +#include "ir/dtype/type.h" +#include "abstract/dshape.h" +#include "utils/tensor_construct_utils.h" +#include "ir/primitive.h" +#include "abstract/abstract_value.h" +#include "include/backend/optimizer/helper.h" +#include "ops/test_ops.h" +#include "ops/ops_func_impl/divmod.h" +#include "ops/test_value_utils.h" + +namespace mindspore { +namespace ops { + +struct DivModShape { + std::vector x_shape; + std::vector y_shape; + ValuePtr rounding_mode; + std::vector out_shape; +}; + +struct DivModType { + TypePtr x_type; + TypePtr y_type; + TypePtr out_type; +}; + +class TestDivMod : public TestOps, public testing::WithParamInterface> {}; + +TEST_P(TestDivMod, DivMod_dyn_shape) { + const auto &shape_param = std::get<0>(GetParam()); + const auto &dtype_param = std::get<1>(GetParam()); + + DivModFuncImpl DivMod_func_impl; + auto prim = std::make_shared("DivMod"); + auto x = std::make_shared(dtype_param.x_type, shape_param.x_shape); + auto y = std::make_shared(dtype_param.y_type, shape_param.y_shape); + auto expect_shape = std::make_shared(shape_param.out_shape); + auto expect_dtype = std::make_shared(dtype_param.out_type); + + auto out_shape = DivMod_func_impl.InferShape(prim, {x, y, shape_param.rounding_mode->ToAbstract()}); + ASSERT_TRUE(*out_shape == *expect_shape); + auto out_dtype = DivMod_func_impl.InferType(prim, {x, y, shape_param.rounding_mode->ToAbstract()}); + ASSERT_TRUE(*out_dtype == *expect_dtype); +} + +auto DivModOpShapeTestCases = testing::ValuesIn({ + /* y is number */ + DivModShape{{10}, {}, CreateScalar(2), {10}}, + DivModShape{{10, 1, 2}, {}, CreateScalar(2), {10, 1, 2}}, + DivModShape{{10, 4, 2}, {}, CreateScalar(2), {10, 4, 2}}, + DivModShape{{10, 1, -1}, {}, CreateScalar(2), {10, 1, -1}}, + DivModShape{{-2}, {}, CreateScalar(2), {-2}}, + /* x is number */ + DivModShape{{}, {10}, CreateScalar(2), {10}}, + DivModShape{{}, {10, 1, 2}, CreateScalar(2), {10, 1, 2}}, + DivModShape{{}, {10, 4, 2}, CreateScalar(2), {10, 4, 2}}, + DivModShape{{}, {10, 1, -1}, CreateScalar(2), {10, 1, -1}}, + DivModShape{{}, {-2}, CreateScalar(2), {-2}}, + /* x and y both tensor */ + DivModShape{{4, 5}, {2, 3, 4, 5}, CreateScalar(2), {2, 3, 4, 5}}, + DivModShape{{2, 1, 4, 5, 6, 9}, {9}, CreateScalar(2), {2, 1, 4, 5, 6, 9}}, + DivModShape{{2, 3, 4, -1}, {2, 3, 4, 5}, CreateScalar(2), {2, 3, 4, 5}}, + DivModShape{{2, 3, 4, -1}, {-1, -1, 4, 5}, CreateScalar(2), {2, 3, 4, 5}}, + DivModShape{{2, 1, 4, -1}, {-1, -1, 4, 5}, CreateScalar(2), {2, -1, 4, 5}}, + DivModShape{{2, 1, 4, 5, 6, 9}, {-2}, CreateScalar(2), {-2}}, + DivModShape{{2, 1, 4, 5, -1, 9}, {-2}, CreateScalar(2), {-2}}, + DivModShape{{-2}, {2, 1, 4, 5, 6, 9}, CreateScalar(2), {-2}}, + DivModShape{{-2}, {2, 1, 4, 5, -1, 9}, CreateScalar(2), {-2}}, + DivModShape{{-2}, {-2}, CreateScalar(2), {-2}} +}); + +auto DivModOpTypeTestCases = testing::ValuesIn({ + DivModType{kFloat16, kFloat16, kFloat16}, + DivModType{kFloat32, kFloat32, kFloat32}, + DivModType{kFloat64, kFloat64, kFloat64}, +}); + +INSTANTIATE_TEST_CASE_P(TestDivMod, TestDivMod, testing::Combine(DivModOpShapeTestCases, DivModOpTypeTestCases)); +} // namespace ops +} // namespace mindspore diff --git a/tests/ut/cpp/ops/test_ops_erf.cc b/tests/ut/cpp/ops/test_ops_erf.cc index 6bd456959ea0b945573914bf54e79af541592841..4e55450c29efcfb19feba4b5a5e9cb452871491a 100644 --- a/tests/ut/cpp/ops/test_ops_erf.cc +++ b/tests/ut/cpp/ops/test_ops_erf.cc @@ -22,7 +22,9 @@ namespace mindspore { namespace ops { OP_FUNC_IMPL_TEST_DECLARE(Erf, EltwiseOpParams); -OP_FUNC_IMPL_TEST_CASES(Erf, testing::Values(EltwiseOpParams{{2, 3}, kFloat32, {2, 3}, kFloat32}, +OP_FUNC_IMPL_TEST_CASES(Erf, testing::Values(EltwiseOpParams{{2, 3}, kBool, {2, 3}, kFloat32}, + EltwiseOpParams{{2, 3}, kInt64, {2, 3}, kFloat32}, + EltwiseOpParams{{2, 3}, kFloat32, {2, 3}, kFloat32}, EltwiseOpParams{{-1, 3}, kFloat32, {-1, 3}, kFloat32}, EltwiseOpParams{{-1, -1}, kFloat32, {-1, -1}, kFloat32}, EltwiseOpParams{{-2}, kFloat32, {-2}, kFloat32})); diff --git a/tests/ut/cpp/ops/test_ops_lin_space_ext.cc b/tests/ut/cpp/ops/test_ops_lin_space_ext.cc new file mode 100644 index 0000000000000000000000000000000000000000..e308b0223bfdba0c8874348ce699642259ea0d5f --- /dev/null +++ b/tests/ut/cpp/ops/test_ops_lin_space_ext.cc @@ -0,0 +1,59 @@ +/** + * Copyright 2023 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ops/test_ops.h" +#include "ops/ops_func_impl/lin_space_ext.h" +#include "ops/test_value_utils.h" + +namespace mindspore { +namespace ops { +struct LinSpaceExtParams { + ShapeVector start_shape; + TypePtr start_type; + ShapeVector end_shape; + TypePtr end_type; + ValuePtr steps; + ShapeVector output_shape; + TypePtr output_type; + ValuePtr dtype; +}; + + +class TestLinSpaceExt : public TestOps, public testing::WithParamInterface {}; + +TEST_P(TestLinSpaceExt, dyn_shape) { + const auto ¶m = GetParam(); + auto start = std::make_shared(param.start_type, param.start_shape); + auto end = std::make_shared(param.end_type, param.end_shape); + auto steps = param.steps->ToAbstract(); + auto dtype = param.dtype->ToAbstract(); + auto expect = std::make_shared(param.output_type, param.output_shape); + + LinSpaceExtFuncImpl lin_space_ext_func_impl; + auto prim = std::make_shared("LinSpaceExt"); + + auto out_dtype = lin_space_ext_func_impl.InferType(prim, {start, end, steps, dtype}); + ASSERT_TRUE(*out_dtype == *expect->GetType()); + auto out_shape = lin_space_ext_func_impl.InferShape(prim, {start, end, steps, dtype}); + ASSERT_TRUE(*out_shape == *expect->GetShape()); +} + +INSTANTIATE_TEST_CASE_P( + TestLinSpaceExt, TestLinSpaceExt, + testing::Values(LinSpaceExtParams{{}, kFloat64, {}, kFloat64, CreateScalar(3), {3}, kFloat32, CreatePyInt(kNumberTypeFloat32)}, + LinSpaceExtParams{{}, kFloat64, {}, kFloat64, CreateScalar(kValueAny), {-1}, kFloat64, CreatePyInt(kNumberTypeFloat64)})); +} // namespace ops +} // namespace mindspore diff --git a/tests/ut/cpp/ops/test_ops_max_pool_grad_with_indices.cc b/tests/ut/cpp/ops/test_ops_max_pool_grad_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..af7ca120bf21f81603322179cca88a9e196a3456 --- /dev/null +++ b/tests/ut/cpp/ops/test_ops_max_pool_grad_with_indices.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "ops/test_ops.h" +#include "common/common_test.h" +#include "ir/dtype/type.h" +#include "abstract/dshape.h" +#include "utils/tensor_construct_utils.h" +#include "ir/primitive.h" +#include "abstract/abstract_value.h" +#include "ops/op_name.h" +#include "ops/ops_func_impl/max_pool_grad_with_indices.h" +#include "include/backend/optimizer/helper.h" + +namespace mindspore { +namespace ops { +struct MaxPoolGradWithIndicesParams { + ShapeVector x_shape; + TypePtr x_dtype; + ShapeVector out_shape; + TypePtr out_dtype; +}; + +class TestMaxPoolGradWithIndices : public TestOps, public testing::WithParamInterface {}; + +TEST_P(TestMaxPoolGradWithIndices, dyn_shape) { + const auto ¶m = GetParam(); + auto max_pool_grad_with_indices_func_impl = std::make_shared(); + auto prim = std::make_shared("MaxPoolGradWithIndices"); + + auto x = std::make_shared(param.x_dtype, param.x_shape); + ASSERT_NE(x, nullptr); + auto expect_shape = std::make_shared(param.out_shape); + auto expect_type = std::make_shared(param.out_dtype); + auto infer_shape = max_pool_grad_with_indices_func_impl->InferShape(prim, {x}); + ASSERT_NE(infer_shape, nullptr); + ASSERT_TRUE(*infer_shape == *expect_shape); + auto infer_type = max_pool_grad_with_indices_func_impl->InferType(prim, {x}); + ASSERT_NE(infer_type, nullptr); + ASSERT_TRUE(*infer_type == *expect_type); +} + +INSTANTIATE_TEST_CASE_P( + TestMaxPoolGradWithIndicesGroup, TestMaxPoolGradWithIndices, + testing::Values(MaxPoolGradWithIndicesParams{{1, 3, 5, 5}, kFloat16, {1, 3, 5, 5}, kFloat16}, + MaxPoolGradWithIndicesParams{{1, 3, -1, -1}, kFloat32, {1, 3, -1, -1}, kFloat32}, + MaxPoolGradWithIndicesParams{{-1, -1, -1, -1}, kFloat16, {-1, -1, -1, -1}, kFloat16}, + MaxPoolGradWithIndicesParams{{-2}, kFloat32, {-1, -1, -1, -1}, kFloat32})); + +} // namespace ops +} // namespace mindspore diff --git a/tests/ut/cpp/ops/test_ops_max_pool_with_indices.cc b/tests/ut/cpp/ops/test_ops_max_pool_with_indices.cc new file mode 100644 index 0000000000000000000000000000000000000000..7f60b81becc6928b57643ca9bd2013db70b5f98d --- /dev/null +++ b/tests/ut/cpp/ops/test_ops_max_pool_with_indices.cc @@ -0,0 +1,140 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "ops/test_ops.h" +#include "common/common_test.h" +#include "ir/dtype/type.h" +#include "abstract/dshape.h" +#include "utils/tensor_construct_utils.h" +#include "ir/primitive.h" +#include "abstract/abstract_value.h" +#include "ops/op_name.h" +#include "ops/ops_func_impl/max_pool_with_indices.h" +#include "include/backend/optimizer/helper.h" +#include "ops/test_value_utils.h" + +namespace mindspore { +namespace ops { +#define I64(x) (static_cast((x))) +struct MaxPoolWithIndicesParams { + ShapeVector x_shape; + TypePtr x_dtype; + ValuePtr kernel_size; + ValuePtr strides; + ValuePtr pads; + ValuePtr dilation; + ValuePtr ceil_mode; + ValuePtr argmax_type; + ShapeVector out1_shape; + TypePtr out1_type; + ShapeVector out2_shape; + TypePtr out2_type; +}; + +class TestMaxPoolWithIndices : public TestOps, public testing::WithParamInterface {}; + +TEST_P(TestMaxPoolWithIndices, dyn_shape) { + const auto ¶m = GetParam(); + auto max_pool_with_indices_func_impl = std::make_shared(); + auto prim = std::make_shared("MaxPoolWithIndices"); + + auto x = std::make_shared(param.x_dtype, param.x_shape); + ASSERT_NE(x, nullptr); + auto kernel_size = param.kernel_size->ToAbstract(); + ASSERT_NE(kernel_size, nullptr); + auto strides = param.strides->ToAbstract(); + ASSERT_NE(strides, nullptr); + auto pads = param.pads->ToAbstract(); + ASSERT_NE(pads, nullptr); + auto dilation = param.dilation->ToAbstract(); + ASSERT_NE(dilation, nullptr); + auto ceil_mode = param.ceil_mode->ToAbstract(); + ASSERT_NE(ceil_mode, nullptr); + auto argmax_type = param.argmax_type->ToAbstract(); + ASSERT_NE(argmax_type, nullptr); + auto expect1_shape = std::make_shared(param.out1_shape); + auto expect1_type = std::make_shared(param.out1_type); + auto expect2_shape = std::make_shared(param.out2_shape); + auto expect2_type = std::make_shared(param.out2_type); + std::vector shape_list = {expect1_shape, expect2_shape}; + auto expect_shape = std::make_shared(shape_list); + std::vector type_list = {expect1_type, expect2_type}; + auto expect_type = std::make_shared(type_list); + auto infer_shape = max_pool_with_indices_func_impl->InferShape( + prim, {x, kernel_size, strides, pads, dilation, ceil_mode, argmax_type}); + ASSERT_NE(infer_shape, nullptr); + ASSERT_TRUE(*infer_shape == *expect_shape); + auto infer_type = + max_pool_with_indices_func_impl->InferType(prim, {x, kernel_size, strides, pads, dilation, ceil_mode, argmax_type}); + ASSERT_NE(infer_type, nullptr); + ASSERT_TRUE(*infer_type == *expect_type); +} + +INSTANTIATE_TEST_CASE_P(TestMaxPoolWithIndicesGroup, TestMaxPoolWithIndices, + testing::Values(MaxPoolWithIndicesParams{{-2}, + kFloat16, + CreateTuple({I64(4), I64(4)}), + CreateTuple({I64(2), I64(2)}), + CreateTuple({I64(1), I64(1)}), + CreateTuple({I64(2), I64(2)}), + CreateScalar(false), + CreatePyInt(kNumberTypeInt64), + {-1, -1, -1, -1}, + kFloat16, + {-1, -1, -1, -1}, + kInt64}, + MaxPoolWithIndicesParams{{-1, -1, -1, -1}, + kFloat16, + CreateTuple({I64(4), I64(4)}), + CreateTuple({I64(2), I64(2)}), + CreateTuple({I64(1), I64(1)}), + CreateTuple({I64(2), I64(2)}), + CreateScalar(false), + CreatePyInt(kNumberTypeInt64), + {-1, -1, -1, -1}, + kFloat16, + {-1, -1, -1, -1}, + kInt64}, + MaxPoolWithIndicesParams{{1, 1, 8, 8}, + kFloat16, + CreateTuple({I64(4), I64(4)}), + CreateTuple({I64(2), I64(2)}), + CreateTuple({I64(1), I64(1)}), + CreateTuple({I64(2), I64(2)}), + CreateScalar(false), + CreatePyInt(kNumberTypeInt64), + {1, 1, 2, 2}, + kFloat16, + {1, 1, 2, 2}, + kInt64}, + MaxPoolWithIndicesParams{{1, 1, 8, 8}, + kFloat16, + CreateTuple({I64(4), I64(4)}), + CreateTuple({I64(2), I64(2)}), + CreateTuple({I64(1), I64(1)}), + CreateTuple({I64(2), I64(2)}), + CreateScalar(true), + CreatePyInt(kNumberTypeInt64), + {1, 1, 3, 3}, + kFloat16, + {1, 1, 3, 3}, + kInt64})); + +} // namespace ops +} // namespace mindspore diff --git a/tests/ut/cpp/ops/test_ops_select.cc b/tests/ut/cpp/ops/test_ops_select.cc new file mode 100644 index 0000000000000000000000000000000000000000..8a853dece9ee1ddb5068fdd9b9520a75902f7ab6 --- /dev/null +++ b/tests/ut/cpp/ops/test_ops_select.cc @@ -0,0 +1,44 @@ +/** + * Copyright 2024 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "common/common_test.h" +#include "ops/ops_func_impl/select.h" +#include "ops/test_ops.h" +#include "ops/test_ops_cmp_utils.h" +#include "ops/test_value_utils.h" + +namespace mindspore { +namespace ops { +OP_FUNC_IMPL_TEST_DECLARE(Select, MultiInputOpParams); + +OP_FUNC_IMPL_TEST_CASES( + Select, + testing::Values( + MultiInputOpParams{{{2, 3}, {2, 3}, {2, 3}}, {kBool, kFloat32, kFloat32}, {{2, 3}}, {kFloat32}, {}}, + MultiInputOpParams{{{2, 3}, {2, 3}, {2, 3}}, {kBool, kFloat32, kInt32}, {{2, 3}}, {kFloat32}, {}}, + MultiInputOpParams{{{-1, 3}, {2, 3}, {2, 3}}, {kBool, kFloat32, kFloat32}, {{2, 3}}, {kFloat32}, {}}, + MultiInputOpParams{{{2, -1}, {2, 3}, {2, 3}}, {kBool, kFloat32, kFloat32}, {{2, 3}}, {kFloat32}, {}}, + MultiInputOpParams{{{2, -1}, {2, -1}, {2, -1}}, {kBool, kFloat32, kFloat32}, {{2, -1}}, {kFloat32}, {}}, + MultiInputOpParams{{{-1, -1}, {-1, -1}, {2, -1}}, {kBool, kFloat32, kFloat32}, {{2, -1}}, {kFloat32}, {}}, + MultiInputOpParams{{{-1, -1}, {-1, -1}, {-1, -1}}, {kBool, kFloat32, kFloat32}, {{-1, -1}}, {kFloat32}, {}}, + MultiInputOpParams{{{4, 5, 8}, {1, 5, 8}, {4, 1, 8}}, {kBool, kFloat32, kFloat32}, {{4, 5, 8}}, {kFloat32}, {}}, + MultiInputOpParams{{{1, 65, 54, 12, 5, 2}, {5, 5, 65, 1, 12, 5, 2}, {65, 54, 1, 5, 2}}, + {kBool, kFloat32, kFloat32}, + {{5, 5, 65, 54, 12, 5, 2}}, + {kFloat32}, + {}})); +} // namespace ops +} // namespace mindspore diff --git a/tests/ut/cpp/optimizer/ad/ad_test.cc b/tests/ut/cpp/optimizer/ad/ad_test.cc index 3095e334764252246fb8acb70487504c71a33ec0..2a8dff61bdf50917de99ee94d55840e2960397b0 100644 --- a/tests/ut/cpp/optimizer/ad/ad_test.cc +++ b/tests/ut/cpp/optimizer/ad/ad_test.cc @@ -105,17 +105,17 @@ TEST_F(TestAD, test_prim_scalar_add) { AssertExpect("test_prim_scalar_add", dg); } -TEST_F(TestAD, test_prim_scalar_mul) { +TEST_F(TestAD, DISABLED_test_prim_scalar_mul) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarMul), resourcePtr); AssertExpect("test_prim_scalar_mul", dg); } -TEST_F(TestAD, test_prim_scalar_sub) { +TEST_F(TestAD, DISABLED_test_prim_scalar_sub) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarSub), resourcePtr); AssertExpect("test_prim_scalar_sub", dg); } -TEST_F(TestAD, test_prim_scalar_div) { +TEST_F(TestAD, DISABLED_test_prim_scalar_div) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarDiv), resourcePtr); AssertExpect("test_prim_scalar_div", dg); } @@ -140,22 +140,22 @@ TEST_F(TestAD, test_prim_scalar_usub) { AssertExpect("test_prim_scalar_usub", dg); } -TEST_F(TestAD, test_prim_scalar_gt) { +TEST_F(TestAD, DISABLED_test_prim_scalar_gt) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarGt), resourcePtr); AssertExpect("test_prim_scalar_gt", dg); } -TEST_F(TestAD, test_prim_scalar_lt) { +TEST_F(TestAD, DISABLED_test_prim_scalar_lt) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarLt), resourcePtr); AssertExpect("test_prim_scalar_lt", dg); } -TEST_F(TestAD, test_prim_scalar_ge) { +TEST_F(TestAD, DISABLED_test_prim_scalar_ge) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarGe), resourcePtr); AssertExpect("test_prim_scalar_ge", dg); } -TEST_F(TestAD, test_prim_scalar_le) { +TEST_F(TestAD, DISABLED_test_prim_scalar_le) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimScalarLe), resourcePtr); AssertExpect("test_prim_scalar_le", dg); } @@ -165,7 +165,7 @@ TEST_F(TestAD, test_prim_tuple_getitem) { AssertExpect("test_prim_tuple_getitem", dg); } -TEST_F(TestAD, test_prim_identity) { +TEST_F(TestAD, DISABLED_test_prim_identity) { FuncGraphPtr dg = Kprim(NewValueNode(prim::kPrimIdentity), resourcePtr); AssertExpect("test_prim_identity", dg); } diff --git a/tests/ut/cpp/optimizer/assign_add_opt_test.cc b/tests/ut/cpp/optimizer/assign_add_opt_test.cc index 9cf396f17f83822882a2d04b911bad2ca7a0b131..0464d288d80cc162e43c8befd0523cb9ea0d0547 100644 --- a/tests/ut/cpp/optimizer/assign_add_opt_test.cc +++ b/tests/ut/cpp/optimizer/assign_add_opt_test.cc @@ -17,7 +17,7 @@ #include #include "common/common_test.h" - +#include "common/resource.h" #include "mindspore/core/ops/sequence_ops.h" #include "common/py_func_graph_fetcher.h" #include "ir/anf.h" @@ -51,7 +51,7 @@ class TestAssignAddOpt : public UT::Common { }; FuncGraphPtr GenerateBackwardFuncGraph() { - FuncGraphPtr bg = std::make_shared(); + FuncGraphPtr bg = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); bg->set_flag(FUNC_GRAPH_FLAG_CORE, true); bg->debug_info()->set_name("Backward"); std::vector shape = {64, 64}; @@ -133,7 +133,7 @@ FuncGraphPtr GenerateBackwardFuncGraph() { } FuncGraphPtr GenerateForwardGraph(FuncGraphPtr bg) { - FuncGraphPtr fg = std::make_shared(); + FuncGraphPtr fg = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); fg->set_flag(FUNC_GRAPH_FLAG_CORE, true); fg->debug_info()->set_name("Forward"); std::vector shape = {64, 64}; @@ -160,7 +160,7 @@ FuncGraphPtr GenerateForwardGraph(FuncGraphPtr bg) { // Feature: Assign add and concat eliminate opt. // Description: Merge matmul and move concat to forward for ge no_task opt. // Expectation: Each graph has one concat. -TEST_F(TestAssignAddOpt, test_assign_add_opt) { +TEST_F(TestAssignAddOpt, DISABLED_test_assign_add_opt) { auto ms_context = MsContext::GetInstance(); ms_context->set_param(MS_CTX_ENABLE_CONCAT_ELIMINATE_OPT, true); mindspore::parallel::g_device_manager = std::make_shared(); diff --git a/tests/ut/cpp/optimizer/cconv_test.cc b/tests/ut/cpp/optimizer/cconv_test.cc index 0581baa89f52c593fd7e7636ca01edbe2e6cad2c..20ad4d3768771ccb9ee07e8a5a229722cfca51cf 100644 --- a/tests/ut/cpp/optimizer/cconv_test.cc +++ b/tests/ut/cpp/optimizer/cconv_test.cc @@ -60,16 +60,12 @@ class TestCconv : public UT::Common { virtual void SetUp(); - virtual void TearDown(); - public: UT::PyFuncGraphFetcher getPyFun; }; void TestCconv::SetUp() {} -void TestCconv::TearDown() {} - TEST_F(TestCconv, TestStraight) { FuncGraphPtr func_graph = getPyFun.CallAndParseRet("get_test_cconv_fn", "test_straight"); ASSERT_TRUE(nullptr != func_graph); diff --git a/tests/ut/cpp/optimizer/lib_test.cc b/tests/ut/cpp/optimizer/lib_test.cc index 87d881e30e9be3c1e913237f6df4d7dada3cd439..49852cc3a91fa99d28eca9cc4e32fb9ece2383c4 100644 --- a/tests/ut/cpp/optimizer/lib_test.cc +++ b/tests/ut/cpp/optimizer/lib_test.cc @@ -40,13 +40,14 @@ using abstract::AnalysisResult; class TestOptLib : public UT::Common { public: TestOptLib() : getPyFun("gtest_input.optimizer.opt_test", true), irpass() {} - void SetUp() { + void SetUp() override { UT::InitPythonPath(); parse::data_converter::ClearObjectCache(); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); ms_context->set_param(MS_CTX_EXECUTION_MODE, kGraphMode); } + FuncGraphPtr RunTransform(FuncGraphPtr gbefore, const SubstitutionList &transform) { equiv_node.clear(); equiv_graph.clear(); @@ -164,7 +165,7 @@ TEST_F(TestOptLib, test_arithmetic) { ASSERT_TRUE(CheckOpt(b5, after, patterns)); } -TEST_F(TestOptLib, test_elim_cast_same_dtype) { +TEST_F(TestOptLib, DISABLED_test_elim_cast_same_dtype) { FuncGraphPtr before = getPyFun.CallAndParseRet("test_elim_cast_same_dtype", "fp32_cast_fp32"); FuncGraphPtr after = getPyFun.CallAndParseRet("test_elim_cast_same_dtype", "after"); // construct such case that cast srcT equal dstT @@ -474,7 +475,7 @@ TEST_F(TestOptLib, test_minmax_grad) { ASSERT_TRUE(CheckOpt(before4, before4, patterns)); } -TEST_F(TestOptLib, test_reducesum_one) { +TEST_F(TestOptLib, DISABLED_test_reducesum_one) { FuncGraphPtr before1 = getPyFun.CallAndParseRet("test_reducesum_one", "before_1"); FuncGraphPtr before2 = getPyFun.CallAndParseRet("test_reducesum_one", "before_2"); FuncGraphPtr before3 = getPyFun.CallAndParseRet("test_reducesum_one", "before_3"); diff --git a/tests/ut/cpp/optimizer/opt_test.cc b/tests/ut/cpp/optimizer/opt_test.cc index d8d227f1cee3635c735ebff40b0cdaa1687c44c0..66b0df2472c94ab8a93142cf46ca7c2c2a952a6b 100644 --- a/tests/ut/cpp/optimizer/opt_test.cc +++ b/tests/ut/cpp/optimizer/opt_test.cc @@ -312,7 +312,7 @@ size_t TupleArgAndParamSum(const FuncGraphPtr &func_graph) { // Description: Test switch call's tuple arg transform.This case include partial's tuple arg and the call's tuple arg in // the same time. // Expectation: All tuple args are correctly transformed to tensor args. -TEST_F(TestOptOpt, SwitchPartialTupleTrans) { +TEST_F(TestOptOpt, DISABLED_SwitchPartialTupleTrans) { FuncGraphPtr test_graph = getPyFun.CallAndParseRet("test_tuple_flatten", "test_flatten_switch_partial_arg"); ASSERT_TRUE(nullptr != test_graph); diff --git a/tests/ut/cpp/optimizer/renormalize_test.cc b/tests/ut/cpp/optimizer/renormalize_test.cc index 36104c625f62c617490cd66ebb83e04d4cbe601e..e04c4c440223479648fe5e86868b5baef81324da 100644 --- a/tests/ut/cpp/optimizer/renormalize_test.cc +++ b/tests/ut/cpp/optimizer/renormalize_test.cc @@ -49,7 +49,7 @@ class TestRenormalize : public UT::Common { // Feature: Specialize. // Description: If a poly node's parent are not specialized, poly node should be delay specialized. // Expectation: graph can be executed and no exception raised. -TEST_F(TestRenormalize, TestPolyDelaySpecialize) { +TEST_F(TestRenormalize, DISABLED_TestPolyDelaySpecialize) { FuncGraphPtr test_graph = getPyFun.CallAndParseRet("test_renormalize", "test_poly_delay_specialize_ut"); ASSERT_TRUE(nullptr != test_graph); pipeline::ResourcePtr res = std::make_shared(); @@ -62,7 +62,7 @@ TEST_F(TestRenormalize, TestPolyDelaySpecialize) { // Feature: Static analysis of control flow. // Description: IgnoreValue flag should not be tagged when a function called twice if the function is header of 'if'. // Expectation: No tuple-getitem exist in specialized graph. -TEST_F(TestRenormalize, TestIgnoreValueTag) { +TEST_F(TestRenormalize, DISABLED_TestIgnoreValueTag) { FuncGraphPtr test_graph = getPyFun.CallAndParseRet("test_renormalize", "test_ignore_flag_with_twice_call_if"); ASSERT_TRUE(nullptr != test_graph); pipeline::ResourcePtr res = std::make_shared(); diff --git a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc index 3ce0b12fbd4a9fed28d4f02bc6f0d74ae3f1daa9..8748575edf35aa19669840f9e1da8b31062fc378 100644 --- a/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/dp_algo_test.cc @@ -1248,79 +1248,79 @@ void TestDPAlgo::ConstructIdentityDiamondGraph() { cost_graph->AddEdge(mm2_ptr, mm3_ptr, edge_m2_m3); } -TEST_F(TestDPAlgo, test_ConstructTwoLargeMatMul) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTwoLargeMatMul) { ConstructTwoLargeMatMul(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); ASSERT_EQ(cost_graph->InitSelectedStrategy(), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructBatmanGraph) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructBatmanGraph) { ConstructBatmanGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); ASSERT_EQ(cost_graph->InitSelectedStrategy(), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructTriangleGraph) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTriangleGraph) { ConstructTriangleGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructTriangleGraph2) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTriangleGraph2) { ConstructTriangleGraph2(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructStarGraph2) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructStarGraph2) { ConstructStarGraph2(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructStarGraph3) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructStarGraph3) { ConstructStarGraph3(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructTwoSeparateGraphs2) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTwoSeparateGraphs2) { ConstructTwoSeparateGraphs2(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructTwoSeparateSingleNodeGraph) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTwoSeparateSingleNodeGraph) { ConstructTwoSeparateSingleNodeGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructThreeSeparateGraphs) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructThreeSeparateGraphs) { ConstructThreeSeparateGraphs(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_ConstructTwoSeparateGraphs) { +TEST_F(TestDPAlgo, DISABLED_test_ConstructTwoSeparateGraphs) { ConstructTwoSeparateGraphs(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_GetStrategy) { +TEST_F(TestDPAlgo, DISABLED_test_GetStrategy) { ConstructDiamondGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_GetStrategy_for_MMR_graph) { +TEST_F(TestDPAlgo, DISABLED_test_GetStrategy_for_MMR_graph) { ConstructMMRGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_GetStrategy_for_IdentityDiamondGraph) { +TEST_F(TestDPAlgo, DISABLED_test_GetStrategy_for_IdentityDiamondGraph) { ConstructIdentityDiamondGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_GetStrategy_for_StarGraph) { +TEST_F(TestDPAlgo, DISABLED_test_GetStrategy_for_StarGraph) { ConstructStarGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); } -TEST_F(TestDPAlgo, test_GetStrategy_for_DoubleStarGraph) { +TEST_F(TestDPAlgo, DISABLED_test_GetStrategy_for_DoubleStarGraph) { ConstructDoubleStarGraph(); ASSERT_EQ(GetStrategy(cost_graph), SUCCESS); diff --git a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc index da4cbbc6af5af8d785604e05a8cf940c6610159e..a9f0609483bf47a9c5907fb44deb0a40f359846c 100644 --- a/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/edge_costmodel_test.cc @@ -105,7 +105,7 @@ void TestEdgeCostModel::SetUp() { matmul5->set_outputs_type({kFloat32}); } -TEST_F(TestEdgeCostModel, test_InitEdgeCost) { +TEST_F(TestEdgeCostModel, DISABLED_test_InitEdgeCost) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m2 = std::make_shared(edge_name, matmul1, matmul2, 0, 0, false); matmul1->GenerateStrategies(0); @@ -115,7 +115,7 @@ TEST_F(TestEdgeCostModel, test_InitEdgeCost) { ASSERT_EQ(edge_m1_m2->InitEdgeCost(), SUCCESS); } -TEST_F(TestEdgeCostModel, test_OpEliminationSetNewCost) { +TEST_F(TestEdgeCostModel, DISABLED_test_OpEliminationSetNewCost) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m2 = std::make_shared(edge_name, matmul1, matmul2, 0, 0, false); std::shared_ptr edge_m2_m4 = std::make_shared(edge_name, matmul2, matmul4, 0, 0, false); @@ -135,7 +135,7 @@ TEST_F(TestEdgeCostModel, test_OpEliminationSetNewCost) { new_edge->OpEliminationSetNewCost(edge_m1_m2, matmul2, edge_m2_m4); } -TEST_F(TestEdgeCostModel, test_EdgeEliminationSetNewCost) { +TEST_F(TestEdgeCostModel, DISABLED_test_EdgeEliminationSetNewCost) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m5 = std::make_shared(edge_name, matmul1, matmul5, 0, 0, false); std::shared_ptr edge_m1_m5_2 = std::make_shared(edge_name, matmul1, matmul5, 0, 1, false); diff --git a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc index d0bf0019b16508dd58260a93dd5b8c4914dd836d..6061a099cc24e089752d2b2f787d672fe9791aee 100644 --- a/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc +++ b/tests/ut/cpp/parallel/auto_parallel/graph_costmodel_test.cc @@ -233,7 +233,7 @@ void TestCostGraph::ConstructSingleNodeGraph() { cost_graph.AddOperator(matmul1); } -TEST_F(TestCostGraph, test_CheckMergeElimination) { +TEST_F(TestCostGraph, DISABLED_test_CheckMergeElimination) { ConstructStarGraph(); ASSERT_EQ(cost_graph.CheckMergeElimination().get(), matmul1.get()); cost_graph.EliminationOp(matmul2); @@ -241,20 +241,20 @@ TEST_F(TestCostGraph, test_CheckMergeElimination) { cost_graph.EliminationMerge(matmul1); } -TEST_F(TestCostGraph, test_CheckContractAndMergeElimination) { +TEST_F(TestCostGraph, DISABLED_test_CheckContractAndMergeElimination) { ConstructStarGraph2(); ASSERT_EQ(cost_graph.CheckMergeElimination().get(), matmul0.get()); cost_graph.EliminationMerge(matmul0); ASSERT_EQ(cost_graph.CheckContractElimination().get(), matmul2.get()); } -TEST_F(TestCostGraph, test_EliminationMerge) { +TEST_F(TestCostGraph, DISABLED_test_EliminationMerge) { ConstructStarGraph(); ASSERT_EQ(cost_graph.EliminationMerge(matmul3).get(), matmul4.get()); ASSERT_EQ(matmul3->is_alive(), false); } -TEST_F(TestCostGraph, test_SearchStrategy_for_single_node_graph) { +TEST_F(TestCostGraph, DISABLED_test_SearchStrategy_for_single_node_graph) { ConstructSingleNodeGraph(); cost_graph.SearchStrategy(); auto cost = matmul1->selected_cost(); @@ -330,12 +330,12 @@ TEST_F(TestCostGraph, test_SelectCostListWithMinTrainingTimeMultiple) { ASSERT_DOUBLE_EQ(ret_list[1]->computation_cost_, 1010); } -TEST_F(TestCostGraph, test_CheckOpElimination) { +TEST_F(TestCostGraph, DISABLED_test_CheckOpElimination) { ConstructLinearGraph(); ASSERT_EQ(cost_graph.CheckOpElimination().get(), matmul2.get()); } -TEST_F(TestCostGraph, test_CheckEdgesElimination) { +TEST_F(TestCostGraph, DISABLED_test_CheckEdgesElimination) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m5 = std::make_shared(edge_name, matmul1, matmul5, 0, 0, false); std::shared_ptr edge_m1_m5_2 = std::make_shared(edge_name, matmul1, matmul5, 0, 1, false); @@ -355,7 +355,7 @@ TEST_F(TestCostGraph, test_CheckEdgesElimination) { ASSERT_EQ(cost_graph.CheckEdgeElimination()[1].get(), edge_m1_m5_2.get()); } -TEST_F(TestCostGraph, test_CreateFinalCostList_AND_Select) { +TEST_F(TestCostGraph, DISABLED_test_CreateFinalCostList_AND_Select) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m2 = std::make_shared(edge_name, matmul1, matmul2, 0, 0, false); matmul1->GenerateStrategies(0); @@ -373,14 +373,14 @@ TEST_F(TestCostGraph, test_CreateFinalCostList_AND_Select) { cost_graph.SelectCostWithMinInferenceTime(cost_list, device_mem_capacity); } -TEST_F(TestCostGraph, test_EliminationOp) { +TEST_F(TestCostGraph, DISABLED_test_EliminationOp) { ConstructLinearGraph(); auto new_edge = cost_graph.EliminationOp(matmul2); ASSERT_EQ(new_edge.get(), matmul1->succ_edges()[0].get()); ASSERT_EQ(new_edge.get(), matmul4->prev_edges()[0].get()); } -TEST_F(TestCostGraph, test_EliminationEdges) { +TEST_F(TestCostGraph, DISABLED_test_EliminationEdges) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m5 = std::make_shared(edge_name, matmul1, matmul5, 0, 0, false); std::shared_ptr edge_m1_m5_2 = std::make_shared(edge_name, matmul1, matmul5, 0, 1, false); @@ -407,7 +407,7 @@ TEST_F(TestCostGraph, test_EliminationEdges) { ASSERT_EQ(new_edge.get(), matmul5->prev_edges()[0].get()); } -TEST_F(TestCostGraph, test_SearchStrategy) { +TEST_F(TestCostGraph, DISABLED_test_SearchStrategy) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m2 = std::make_shared(edge_name, matmul1, matmul2, 0, 0, false); matmul1->GenerateStrategies(0); @@ -423,7 +423,7 @@ TEST_F(TestCostGraph, test_SearchStrategy) { cost_graph.SearchStrategy(); } -TEST_F(TestCostGraph, test_SearchStrategyV2) { +TEST_F(TestCostGraph, DISABLED_test_SearchStrategyV2) { std::string edge_name = "MatMul-MatMul"; std::shared_ptr edge_m1_m2 = std::make_shared(edge_name, matmul1, matmul2, 0, 0, false); matmul1->GenerateStrategies(0); diff --git a/tests/ut/cpp/parallel/ops_info/activation_test.cc b/tests/ut/cpp/parallel/ops_info/activation_test.cc index b042a8cf8c14fa790bdc3ebc956483f646ff350c..69859b75962298f3d2268ce7cf5d76a1d068ddcc 100644 --- a/tests/ut/cpp/parallel/ops_info/activation_test.cc +++ b/tests/ut/cpp/parallel/ops_info/activation_test.cc @@ -91,7 +91,7 @@ TEST_F(TestActivation, test_activation_strategies) { } } -TEST_F(TestActivation, test_softmax_strategies) { +TEST_F(TestActivation, DISABLED_test_softmax_strategies) { ASSERT_EQ(soft_ptr_->GenerateStrategies(0), Status::SUCCESS); std::vector> sc = soft_ptr_->GetStrategyCost(); for (const auto& swc : sc) { diff --git a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc index 1e2dc9be3bdcfdfbe9bdeaa5a6b02076c3daa3bd..f4e6426fdf70b1e35ac00016d5fbf353ee920288 100644 --- a/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/log_softmax_info_test.cc @@ -63,7 +63,7 @@ void TestLogSoftmaxInfo::SetUp() { log_softmax = std::make_shared("log_softmax_info", inputs_shape, outputs_shape, attr); } -TEST_F(TestLogSoftmaxInfo, InferDevMatrixShape1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -74,7 +74,7 @@ TEST_F(TestLogSoftmaxInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestLogSoftmaxInfo, InferSliceShape1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_InferSliceShape1) { Strategies str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -95,7 +95,7 @@ TEST_F(TestLogSoftmaxInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestLogSoftmaxInfo, GetTensorLayout1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_GetTensorLayout1) { Strategies str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -116,7 +116,7 @@ TEST_F(TestLogSoftmaxInfo, GetTensorLayout1) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestLogSoftmaxInfo, GetForwardOp1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_GetForwardOp1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -127,7 +127,7 @@ TEST_F(TestLogSoftmaxInfo, GetForwardOp1) { ASSERT_EQ(size, 0); } -TEST_F(TestLogSoftmaxInfo, GetMirrorOPs1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -139,7 +139,7 @@ TEST_F(TestLogSoftmaxInfo, GetMirrorOPs1) { ASSERT_EQ(size, 0); } -TEST_F(TestLogSoftmaxInfo, CheckStrategy1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_CheckStrategy1) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -148,7 +148,7 @@ TEST_F(TestLogSoftmaxInfo, CheckStrategy1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestLogSoftmaxInfo, CheckStrategy2) { +TEST_F(TestLogSoftmaxInfo, DISABLED_CheckStrategy2) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -157,7 +157,7 @@ TEST_F(TestLogSoftmaxInfo, CheckStrategy2) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestLogSoftmaxInfo, CheckStrategy3) { +TEST_F(TestLogSoftmaxInfo, DISABLED_CheckStrategy3) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 4, 8, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -166,7 +166,7 @@ TEST_F(TestLogSoftmaxInfo, CheckStrategy3) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestLogSoftmaxInfo, GetDeviceList1) { +TEST_F(TestLogSoftmaxInfo, DISABLED_GetDeviceList1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc index 454c5860daa974e36e98c7a460533ad1b37702ad..2e4ea591a1dfc2efe0c14e7519010a962e63fa39 100644 --- a/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/matmul_info_test.cc @@ -102,7 +102,7 @@ void TestMatmulInfo::SetUp() { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape1) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -116,7 +116,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape1) { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape2) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape2) { Strategies inputs = {{2, 4, 8, 8}, {2, 4, 8, 2}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -130,7 +130,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape2) { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape3) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape3) { Strategies inputs = {{2, 4, 8, 16}, {1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -144,7 +144,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape3) { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape4) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape4) { Strategies inputs = {{2, 4, 8, 8}, {2, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -158,7 +158,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape4) { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape5) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape5) { Strategies inputs = {{8, 16}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -172,7 +172,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape5) { /// Feature: test matmul info /// Description: infer dev matrix /// Expectation: the dev matrix is right -TEST_F(TestMatmulInfo, InferDevMatrixShape6) { +TEST_F(TestMatmulInfo, DISABLED_InferDevMatrixShape6) { Strategies inputs = {{8, 8}, {2, 4, 2, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -186,7 +186,7 @@ TEST_F(TestMatmulInfo, InferDevMatrixShape6) { /// Feature: test matmul info /// Description: infer tensor map /// Expectation: the tensor map is right -TEST_F(TestMatmulInfo, InferTensorMap1) { +TEST_F(TestMatmulInfo, DISABLED_InferTensorMap1) { Strategies str = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -214,7 +214,7 @@ TEST_F(TestMatmulInfo, InferTensorMap1) { /// Feature: test matmul info /// Description: infer tensor map /// Expectation: the tensor map is right -TEST_F(TestMatmulInfo, InferTensorMap2) { +TEST_F(TestMatmulInfo, DISABLED_InferTensorMap2) { Strategies str = {{2, 4, 8, 16}, {1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -242,7 +242,7 @@ TEST_F(TestMatmulInfo, InferTensorMap2) { /// Feature: test matmul info /// Description: infer tensor map /// Expectation: the tensor map is right -TEST_F(TestMatmulInfo, InferTensorMap3) { +TEST_F(TestMatmulInfo, DISABLED_InferTensorMap3) { Strategies str = {{8, 16}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -270,7 +270,7 @@ TEST_F(TestMatmulInfo, InferTensorMap3) { /// Feature: test matmul info /// Description: infer slice shape /// Expectation: the slice shape is right -TEST_F(TestMatmulInfo, InferSliceShape1) { +TEST_F(TestMatmulInfo, DISABLED_InferSliceShape1) { Strategies str = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -298,7 +298,7 @@ TEST_F(TestMatmulInfo, InferSliceShape1) { /// Feature: test matmul info /// Description: infer slice shape /// Expectation: the slice shape is right -TEST_F(TestMatmulInfo, InferSliceShape2) { +TEST_F(TestMatmulInfo, DISABLED_InferSliceShape2) { Strategies str = {{2, 4, 8, 16}, {1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -326,7 +326,7 @@ TEST_F(TestMatmulInfo, InferSliceShape2) { /// Feature: test matmul info /// Description: infer slice shape /// Expectation: the slice shape is right -TEST_F(TestMatmulInfo, InferSliceShape3) { +TEST_F(TestMatmulInfo, DISABLED_InferSliceShape3) { Strategies str = {{8, 16}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -354,7 +354,7 @@ TEST_F(TestMatmulInfo, InferSliceShape3) { /// Feature: test matmul info /// Description: get tensor layout /// Expectation: the tensor layout is right -TEST_F(TestMatmulInfo, GetTensorLayout3) { +TEST_F(TestMatmulInfo, DISABLED_GetTensorLayout3) { Strategies str = {{8, 16}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -382,7 +382,7 @@ TEST_F(TestMatmulInfo, GetTensorLayout3) { /// Feature: test matmul info /// Description: infer forward op /// Expectation: the forward op is right -TEST_F(TestMatmulInfo, GetForwardOp1) { +TEST_F(TestMatmulInfo, DISABLED_GetForwardOp1) { Strategies inputs = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -411,7 +411,7 @@ TEST_F(TestMatmulInfo, GetForwardOp1) { /// Feature: test matmul info /// Description: infer forward op /// Expectation: the forward op is right -TEST_F(TestMatmulInfo, GetForwardOp2) { +TEST_F(TestMatmulInfo, DISABLED_GetForwardOp2) { Strategies inputs = {{2, 4, 8, 1}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -424,7 +424,7 @@ TEST_F(TestMatmulInfo, GetForwardOp2) { /// Feature: test matmul info /// Description: infer virtual_div op /// Expectation: the virtual_div op is right -TEST_F(TestMatmulInfo, GetVirtualDivOp1) { +TEST_F(TestMatmulInfo, DISABLED_GetVirtualDivOp1) { Strategies inputs = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -446,7 +446,7 @@ TEST_F(TestMatmulInfo, GetVirtualDivOp1) { /// Feature: test matmul info /// Description: infer mirror op /// Expectation: the mirror op is right -TEST_F(TestMatmulInfo, GetMirrorOPs1) { +TEST_F(TestMatmulInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -468,7 +468,7 @@ TEST_F(TestMatmulInfo, GetMirrorOPs1) { /// Feature: test matmul info /// Description: infer mirror op /// Expectation: the mirror op is right -TEST_F(TestMatmulInfo, GetMirrorOPs2) { +TEST_F(TestMatmulInfo, DISABLED_GetMirrorOPs2) { Strategies inputs = {{2, 4, 1, 16}, {8, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -490,7 +490,7 @@ TEST_F(TestMatmulInfo, GetMirrorOPs2) { /// Feature: test matmul info /// Description: infer mirror op /// Expectation: the mirror op is right -TEST_F(TestMatmulInfo, GetMirrorOPs3) { +TEST_F(TestMatmulInfo, DISABLED_GetMirrorOPs3) { Strategies inputs = {{8, 16}, {2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -511,7 +511,7 @@ TEST_F(TestMatmulInfo, GetMirrorOPs3) { /// Feature: test matmul info /// Description: infer mirror op /// Expectation: the mirror op is right -TEST_F(TestMatmulInfo, GetMirrorOPs4) { +TEST_F(TestMatmulInfo, DISABLED_GetMirrorOPs4) { Strategies inputs = {{2, 4, 1, 16}, {2, 4, 16, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -524,7 +524,7 @@ TEST_F(TestMatmulInfo, GetMirrorOPs4) { /// Feature: test matmul info /// Description: init twice /// Expectation: the mirror op is right -TEST_F(TestMatmulInfo, InitTwice) { +TEST_F(TestMatmulInfo, DISABLED_InitTwice) { Strategies inputs = {{2, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -548,7 +548,7 @@ TEST_F(TestMatmulInfo, InitTwice) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy1) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy1) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -560,7 +560,7 @@ TEST_F(TestMatmulInfo, CheckStrategy1) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy2) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy2) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{2, 4, 8, 16}, {4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -572,7 +572,7 @@ TEST_F(TestMatmulInfo, CheckStrategy2) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy3) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy3) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{2, 4, 8, 16}, {2, 4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -584,7 +584,7 @@ TEST_F(TestMatmulInfo, CheckStrategy3) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy4) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy4) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{2, 4, 8, 16}, {2, 3, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -596,7 +596,7 @@ TEST_F(TestMatmulInfo, CheckStrategy4) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy5) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy5) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{0, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -608,7 +608,7 @@ TEST_F(TestMatmulInfo, CheckStrategy5) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy6) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy6) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{-1, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -620,7 +620,7 @@ TEST_F(TestMatmulInfo, CheckStrategy6) { /// Feature: test matmul info /// Description: check strategy, the strategy is invalid /// Expectation: return FAILED -TEST_F(TestMatmulInfo, CheckStrategy7) { +TEST_F(TestMatmulInfo, DISABLED_CheckStrategy7) { // Success: {{2,4,8,16}, {2,4,16,1}} Strategies inputs = {{4, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -632,7 +632,7 @@ TEST_F(TestMatmulInfo, CheckStrategy7) { /// Feature: test matmul info /// Description: init, invalid strategy /// Expectation: return FAILED -TEST_F(TestMatmulInfo, InitFailed) { +TEST_F(TestMatmulInfo, DISABLED_InitFailed) { // matmul4 attr is wrong Strategies inputs = {{4, 4, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -644,7 +644,7 @@ TEST_F(TestMatmulInfo, InitFailed) { /// Feature: test matmul info /// Description: generate strategy /// Expectation: the computation cost is right -TEST_F(TestMatmulInfo, test_GenerateStrategies1) { +TEST_F(TestMatmulInfo, DISABLED_test_GenerateStrategies1) { // the parameter '0' indicates that the stageId = 0, there are 1024 devices in the stage 0 ASSERT_EQ(matmul1->GenerateStrategies(0), Status::SUCCESS); std::vector> sc = matmul1->GetStrategyCost(); diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc index c1e4917be031222f6ccd291008b005c393ac498b..1e3aeb51573ab625c0d11bd5097888148fb6595c 100644 --- a/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test.cc @@ -63,7 +63,7 @@ void TestOneHotInfo::SetUp() { onehot_info = std::make_shared("OneHotInfo", inputs_shape, outputs_shape, attr); } -TEST_F(TestOneHotInfo, InferDevMatrixShape1) { +TEST_F(TestOneHotInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{8, 1}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -75,7 +75,7 @@ TEST_F(TestOneHotInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo, InferDevMatrixShape2) { +TEST_F(TestOneHotInfo, DISABLED_InferDevMatrixShape2) { Strategies inputs = {{4, 1}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -87,7 +87,7 @@ TEST_F(TestOneHotInfo, InferDevMatrixShape2) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo, InferDevMatrixShape3) { +TEST_F(TestOneHotInfo, DISABLED_InferDevMatrixShape3) { Strategies inputs = {{4, 2}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -99,7 +99,7 @@ TEST_F(TestOneHotInfo, InferDevMatrixShape3) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo, InferTensorMap2) { +TEST_F(TestOneHotInfo, DISABLED_InferTensorMap2) { Strategies str = {{8, 1}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -121,7 +121,7 @@ TEST_F(TestOneHotInfo, InferTensorMap2) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestOneHotInfo, InferSliceShape1) { +TEST_F(TestOneHotInfo, DISABLED_InferSliceShape1) { Strategies str = {{8, 1}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -143,7 +143,7 @@ TEST_F(TestOneHotInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestOneHotInfo, InferSliceShape2) { +TEST_F(TestOneHotInfo, DISABLED_InferSliceShape2) { Strategies str = {{4, 2}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -165,7 +165,7 @@ TEST_F(TestOneHotInfo, InferSliceShape2) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestOneHotInfo, InferSliceShape3) { +TEST_F(TestOneHotInfo, DISABLED_InferSliceShape3) { Strategies str = {{2, 2}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -187,7 +187,7 @@ TEST_F(TestOneHotInfo, InferSliceShape3) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestOneHotInfo, GetMirrorOPs1) { +TEST_F(TestOneHotInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{8, 1}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc index cd5f1e5448eb39e6abbf6a20ac18bfe1364c8cfa..12cd637b70133aa6b0b0407e19146eb3ca47fc2d 100644 --- a/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc +++ b/tests/ut/cpp/parallel/ops_info/onehot_info_test_axis_0.cc @@ -63,7 +63,7 @@ void TestOneHotInfo2::SetUp() { onehot_info2 = std::make_shared("onehot_info", inputs_shape, outputs_shape, attr); } -TEST_F(TestOneHotInfo2, InferDevMatrixShape1) { +TEST_F(TestOneHotInfo2, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{1, 8}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -75,7 +75,7 @@ TEST_F(TestOneHotInfo2, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo2, InferDevMatrixShape2) { +TEST_F(TestOneHotInfo2, DISABLED_InferDevMatrixShape2) { Strategies inputs = {{1, 4}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -87,7 +87,7 @@ TEST_F(TestOneHotInfo2, InferDevMatrixShape2) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo2, InferDevMatrixShape3) { +TEST_F(TestOneHotInfo2, DISABLED_InferDevMatrixShape3) { Strategies inputs = {{2, 4}, {}, {}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -99,7 +99,7 @@ TEST_F(TestOneHotInfo2, InferDevMatrixShape3) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestOneHotInfo2, InferTensorMap2) { +TEST_F(TestOneHotInfo2, DISABLED_InferTensorMap2) { Strategies str = {{1, 8}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -121,7 +121,7 @@ TEST_F(TestOneHotInfo2, InferTensorMap2) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestOneHotInfo2, InferSliceShape1) { +TEST_F(TestOneHotInfo2, DISABLED_InferSliceShape1) { Strategies str = {{1, 8}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -143,7 +143,7 @@ TEST_F(TestOneHotInfo2, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestOneHotInfo2, InferSliceShape2) { +TEST_F(TestOneHotInfo2, DISABLED_InferSliceShape2) { Strategies str = {{2, 4}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); @@ -165,7 +165,7 @@ TEST_F(TestOneHotInfo2, InferSliceShape2) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestOneHotInfo2, InferSliceShape3) { +TEST_F(TestOneHotInfo2, DISABLED_InferSliceShape3) { Strategies str = {{2, 2}, {}, {}}; StrategyPtr strategy = NewStrategy(0, str); diff --git a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc index b126ac7a54485a06922c9f8d0b0b51daa745a002..7388c40ab62b4da317f95b2d5e11b7c640449338 100644 --- a/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc +++ b/tests/ut/cpp/parallel/ops_info/reduce_method_test.cc @@ -68,7 +68,7 @@ void TestReduceSumInfo::SetUp() { reduce_sum->set_input_value(val); } -TEST_F(TestReduceSumInfo, InferDevMatrixShape1) { +TEST_F(TestReduceSumInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -79,7 +79,7 @@ TEST_F(TestReduceSumInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestReduceSumInfo, InferSliceShape1) { +TEST_F(TestReduceSumInfo, DISABLED_InferSliceShape1) { Strategies str = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -100,7 +100,7 @@ TEST_F(TestReduceSumInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestReduceSumInfo, GetTensorLayout1) { +TEST_F(TestReduceSumInfo, DISABLED_GetTensorLayout1) { Strategies str = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -121,7 +121,7 @@ TEST_F(TestReduceSumInfo, GetTensorLayout1) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestReduceSumInfo, GetForwardOp1) { +TEST_F(TestReduceSumInfo, DISABLED_GetForwardOp1) { Strategies inputs = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -132,7 +132,7 @@ TEST_F(TestReduceSumInfo, GetForwardOp1) { ASSERT_EQ(size, 0); } -TEST_F(TestReduceSumInfo, GetForwardOp2) { +TEST_F(TestReduceSumInfo, DISABLED_GetForwardOp2) { Strategies inputs = {{4, 4, 2}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -156,7 +156,7 @@ TEST_F(TestReduceSumInfo, GetForwardOp2) { ASSERT_EQ(arg1_name, "group"); } -TEST_F(TestReduceSumInfo, GetMirrorOPs1) { +TEST_F(TestReduceSumInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -168,7 +168,7 @@ TEST_F(TestReduceSumInfo, GetMirrorOPs1) { ASSERT_EQ(size, 0); } -TEST_F(TestReduceSumInfo, GetMirrorOPs2) { +TEST_F(TestReduceSumInfo, DISABLED_GetMirrorOPs2) { Strategies inputs = {{4, 4, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -187,7 +187,7 @@ TEST_F(TestReduceSumInfo, GetMirrorOPs2) { ASSERT_EQ(arg0_name, "group"); } -TEST_F(TestReduceSumInfo, CheckStrategy1) { +TEST_F(TestReduceSumInfo, DISABLED_CheckStrategy1) { Strategies inputs = {{2, 2, 8, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -195,7 +195,7 @@ TEST_F(TestReduceSumInfo, CheckStrategy1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestReduceSumInfo, CheckStrategy2) { +TEST_F(TestReduceSumInfo, DISABLED_CheckStrategy2) { Strategies inputs = {{2, 4, 8}, {2, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -203,7 +203,7 @@ TEST_F(TestReduceSumInfo, CheckStrategy2) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestReduceSumInfo, CheckStrategy3) { +TEST_F(TestReduceSumInfo, DISABLED_CheckStrategy3) { Strategies inputs = {{4, 4, 2}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -211,7 +211,7 @@ TEST_F(TestReduceSumInfo, CheckStrategy3) { ASSERT_EQ(ret, SUCCESS); } -TEST_F(TestReduceSumInfo, CheckStrategy4) { +TEST_F(TestReduceSumInfo, DISABLED_CheckStrategy4) { Strategies inputs = {{4, 8, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/cpp/parallel/ops_info/reshape_test.cc b/tests/ut/cpp/parallel/ops_info/reshape_test.cc index 444aebbe51f41da055c773cb4ca646637716390f..b36ddc18420deb823e5c41c6520f3a7548cb963b 100644 --- a/tests/ut/cpp/parallel/ops_info/reshape_test.cc +++ b/tests/ut/cpp/parallel/ops_info/reshape_test.cc @@ -67,7 +67,7 @@ void TestReshapeInfo::SetUp() { reshape->set_input_value(val); } -TEST_F(TestReshapeInfo, InferDevMatrixShape1) { +TEST_F(TestReshapeInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -78,7 +78,7 @@ TEST_F(TestReshapeInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestReshapeInfo, InferDevMatrixShape2) { +TEST_F(TestReshapeInfo, DISABLED_InferDevMatrixShape2) { Strategies inputs = {{32, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -89,7 +89,7 @@ TEST_F(TestReshapeInfo, InferDevMatrixShape2) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestReshapeInfo, InferSliceShape1) { +TEST_F(TestReshapeInfo, DISABLED_InferSliceShape1) { Strategies str = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -110,7 +110,7 @@ TEST_F(TestReshapeInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestReshapeInfo, InferSliceShape2) { +TEST_F(TestReshapeInfo, DISABLED_InferSliceShape2) { Strategies str = {{32, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -131,7 +131,7 @@ TEST_F(TestReshapeInfo, InferSliceShape2) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestReshapeInfo, GetTensorLayout1) { +TEST_F(TestReshapeInfo, DISABLED_GetTensorLayout1) { Strategies str = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -152,7 +152,7 @@ TEST_F(TestReshapeInfo, GetTensorLayout1) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestReshapeInfo, GetTensorLayout2) { +TEST_F(TestReshapeInfo, DISABLED_GetTensorLayout2) { Strategies str = {{32, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, str); @@ -173,7 +173,7 @@ TEST_F(TestReshapeInfo, GetTensorLayout2) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestReshapeInfo, GetForwardOp1) { +TEST_F(TestReshapeInfo, DISABLED_GetForwardOp1) { Strategies inputs = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -184,7 +184,7 @@ TEST_F(TestReshapeInfo, GetForwardOp1) { ASSERT_EQ(size, 0); } -TEST_F(TestReshapeInfo, GetMirrorOPs1) { +TEST_F(TestReshapeInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -196,7 +196,7 @@ TEST_F(TestReshapeInfo, GetMirrorOPs1) { ASSERT_EQ(size, 2); } -TEST_F(TestReshapeInfo, CheckStrategy1) { +TEST_F(TestReshapeInfo, DISABLED_CheckStrategy1) { Strategies inputs = {{1, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -204,7 +204,7 @@ TEST_F(TestReshapeInfo, CheckStrategy1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestReshapeInfo, CheckStrategy2) { +TEST_F(TestReshapeInfo, DISABLED_CheckStrategy2) { Strategies inputs = {{2, 4, 8}, {2, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -212,7 +212,7 @@ TEST_F(TestReshapeInfo, CheckStrategy2) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestReshapeInfo, CheckStrategy3) { +TEST_F(TestReshapeInfo, DISABLED_CheckStrategy3) { Strategies inputs = {{4, 1, 1, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc index ef6c50ec43f9d0400adc235bb4fafce9b3dc07e5..abc09da6ea6f48897079b0a88fdcbdf9672cbc12 100644 --- a/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc +++ b/tests/ut/cpp/parallel/ops_info/softmax_info_test.cc @@ -67,7 +67,7 @@ void TestSoftmaxInfo::SetUp() { softmax2 = std::make_shared("softmax_info2", inputs_shape, outputs_shape, attr2); } -TEST_F(TestSoftmaxInfo, InferDevMatrixShape1) { +TEST_F(TestSoftmaxInfo, DISABLED_InferDevMatrixShape1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -78,7 +78,7 @@ TEST_F(TestSoftmaxInfo, InferDevMatrixShape1) { ASSERT_EQ(dev_matrix_shape, expect); } -TEST_F(TestSoftmaxInfo, InferSliceShape1) { +TEST_F(TestSoftmaxInfo, DISABLED_InferSliceShape1) { Strategies str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -99,7 +99,7 @@ TEST_F(TestSoftmaxInfo, InferSliceShape1) { ASSERT_EQ(output_slice_shape, output_slice_shape_expect); } -TEST_F(TestSoftmaxInfo, GetTensorLayout1) { +TEST_F(TestSoftmaxInfo, DISABLED_GetTensorLayout1) { Strategies str = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, str); @@ -120,7 +120,7 @@ TEST_F(TestSoftmaxInfo, GetTensorLayout1) { ASSERT_EQ(output_tensor_map.array(), output_expect); } -TEST_F(TestSoftmaxInfo, GetForwardOp1) { +TEST_F(TestSoftmaxInfo, DISABLED_GetForwardOp1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -131,7 +131,7 @@ TEST_F(TestSoftmaxInfo, GetForwardOp1) { ASSERT_EQ(size, 0); } -TEST_F(TestSoftmaxInfo, GetMirrorOPs1) { +TEST_F(TestSoftmaxInfo, DISABLED_GetMirrorOPs1) { Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -143,7 +143,7 @@ TEST_F(TestSoftmaxInfo, GetMirrorOPs1) { ASSERT_EQ(size, 0); } -TEST_F(TestSoftmaxInfo, CheckStrategy1) { +TEST_F(TestSoftmaxInfo, DISABLED_CheckStrategy1) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 2, 8, 16}, {2, 4, 16, 1}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -152,7 +152,7 @@ TEST_F(TestSoftmaxInfo, CheckStrategy1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestSoftmaxInfo, CheckStrategy2) { +TEST_F(TestSoftmaxInfo, DISABLED_CheckStrategy2) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 4, 8}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -161,7 +161,7 @@ TEST_F(TestSoftmaxInfo, CheckStrategy2) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestSoftmaxInfo, CheckStrategy3) { +TEST_F(TestSoftmaxInfo, DISABLED_CheckStrategy3) { // Success: {{2,4,1,16}} Strategies inputs = {{2, 4, 8, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -170,7 +170,7 @@ TEST_F(TestSoftmaxInfo, CheckStrategy3) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestSoftmaxInfo, InitFailed1) { +TEST_F(TestSoftmaxInfo, DISABLED_InitFailed1) { // softmax2's axis is wrong Strategies inputs = {{2, 4, 1, 16}}; StrategyPtr strategy = NewStrategy(0, inputs); @@ -179,7 +179,7 @@ TEST_F(TestSoftmaxInfo, InitFailed1) { ASSERT_EQ(ret, FAILED); } -TEST_F(TestSoftmaxInfo, InitFailed2) { +TEST_F(TestSoftmaxInfo, DISABLED_InitFailed2) { // dev num is wrong Strategies inputs = {{2, 4, 1, 100}}; StrategyPtr strategy = NewStrategy(0, inputs); diff --git a/tests/ut/cpp/parallel/step_auto_parallel_test.cc b/tests/ut/cpp/parallel/step_auto_parallel_test.cc index 189976c494f80d2d42b76f181e889e390f4ffd0f..83d47d84a9c7ab30207c6710a9786abda30a5e1f 100644 --- a/tests/ut/cpp/parallel/step_auto_parallel_test.cc +++ b/tests/ut/cpp/parallel/step_auto_parallel_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" +#include "common/resource.h" #include "mindspore/core/ops/math_ops.h" #include "frontend/parallel/step_parallel.h" #include "frontend/parallel/step_parallel_utils.h" @@ -29,13 +30,11 @@ namespace parallel { class TestStepAutoParallel : public UT::Common { public: TestStepAutoParallel() {} - void SetUp(); - void TearDown() {} + void SetUp() override; }; void TestStepAutoParallel::SetUp() { RankList dev_list; - for (int32_t i = 0; i < 20; i++) { dev_list.push_back(i); } @@ -52,7 +51,7 @@ void TestStepAutoParallel::SetUp() { } CNodePtr Create_Node(Shape x, Shape y, Shape out) { - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); ParameterPtr param1 = func_graph->add_parameter(); ParameterPtr param2 = func_graph->add_parameter(); param1->set_name("x"); @@ -84,7 +83,7 @@ CNodePtr Create_Node(Shape x, Shape y, Shape out) { } CNodePtr Create_two_nodes(Shape x, Shape y, Shape z, Shape w, Shape out) { - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); ParameterPtr paramX = func_graph->add_parameter(); ParameterPtr paramY = func_graph->add_parameter(); ParameterPtr paramW = func_graph->add_parameter(); @@ -115,6 +114,7 @@ CNodePtr Create_two_nodes(Shape x, Shape y, Shape z, Shape w, Shape out) { MatMul_1_inputs.push_back(paramX); MatMul_1_inputs.push_back(paramY); CNodePtr MatMul_1_node = func_graph->NewCNode(MatMul_1_inputs); + PrimitivePtr prim = MatMul_1_node->input(0)->cast()->value()->cast(); ValuePtr transpose_a = MakeValue(false); ValuePtr transpose_b = MakeValue(false); @@ -140,7 +140,7 @@ CNodePtr Create_two_nodes(Shape x, Shape y, Shape z, Shape w, Shape out) { /// Features: test create op instance /// Description: /// Expectation: -TEST_F(TestStepAutoParallel, test_create_op_instance) { +TEST_F(TestStepAutoParallel, DISABLED_test_create_op_instance) { Shape inputs_x_dims = {64, 32}; Shape inputs_y_dims = {32, 64}; Shape outputs_dims = {64, 64}; diff --git a/tests/ut/cpp/parallel/step_parallel_test.cc b/tests/ut/cpp/parallel/step_parallel_test.cc index b59907b1cf96ad9ce1737ff323cebd6ca34cb7f6..8c446155062de8bfe7cb93636e63ef0c22b15e31 100644 --- a/tests/ut/cpp/parallel/step_parallel_test.cc +++ b/tests/ut/cpp/parallel/step_parallel_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" +#include "common/resource.h" #include "mindspore/core/ops/math_ops.h" #include "mindspore/core/ops/array_ops.h" #include "mindspore/core/ops/framework_ops.h" @@ -36,7 +37,6 @@ class TestStepParallel : public UT::Common { public: TestStepParallel() {} void SetUp(); - void TearDown() {} }; void Init_Device_Manager() { @@ -65,7 +65,7 @@ void TestStepParallel::SetUp() { } CNodePtr Make_Node(Shape x, Shape y, Shape out, int64_t condition = 0) { - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); ParameterPtr param1 = func_graph->add_parameter(); ParameterPtr param2 = func_graph->add_parameter(); param1->set_name("x"); @@ -129,7 +129,7 @@ FuncGraphManagerPtr Make_Manager(int64_t condition = 0) { std::vector inputs_z = {64, 128}; std::vector outputs_1 = {64, 64}; std::vector outputs_2 = {64, 128}; - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); ParameterPtr param1 = func_graph->add_parameter(); ParameterPtr param2 = func_graph->add_parameter(); ParameterPtr param3 = func_graph->add_parameter(); @@ -408,7 +408,7 @@ TEST_F(TestStepParallel, OperatorInstance) { /// Feature: test ExtractInformation in auto parallel. /// Description: /// Expectation: success. -TEST_F(TestStepParallel, ExtractInformation) { +TEST_F(TestStepParallel, DISABLED_ExtractInformation) { FuncGraphManagerPtr manager = Make_Manager(); FuncGraphSet graphs = manager->func_graphs(); FuncGraphPtr graph = *graphs.begin(); @@ -444,7 +444,7 @@ TEST_F(TestStepParallel, ExtractInformation3) { /// Feature: test ForwardCommunication. /// Description: /// Expectation: success. -TEST_F(TestStepParallel, ForwardCommunication1) { +TEST_F(TestStepParallel, DISABLED_ForwardCommunication1) { ValuePtr attr0_value = MakeValue(REDUCE_OP_SUM); ValuePtr attr1_value = MakeValue("0-1-2"); Attr attr0 = std::make_pair("op", attr0_value); @@ -498,7 +498,7 @@ TEST_F(TestStepParallel, ForwardCommunication1) { /// Feature: test ForwardCommunication. /// Description: /// Expectation: success. -TEST_F(TestStepParallel, ForwardCommunication2) { +TEST_F(TestStepParallel, DISABLED_ForwardCommunication2) { OperatorVector op_list; FuncGraphManagerPtr manager = Make_Manager(); FuncGraphSet graphs = manager->func_graphs(); @@ -524,7 +524,7 @@ TEST_F(TestStepParallel, ForwardCommunication2) { /// Feature: test ForwardCommunication. /// Description: /// Expectation: success. -TEST_F(TestStepParallel, ForwardCommunication3) { +TEST_F(TestStepParallel, DISABLED_ForwardCommunication3) { OperatorVector op_list; FuncGraphManagerPtr manager = Make_Manager(); FuncGraphSet graphs = manager->func_graphs(); @@ -554,9 +554,9 @@ TEST_F(TestStepParallel, ForwardCommunication3) { /// Feature: test GetTensorInLayout. /// Description: /// Expectation: success. -TEST_F(TestStepParallel, GetTensorInLayout) { +TEST_F(TestStepParallel, DISABLED_GetTensorInLayout) { // create attrs and prim - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); Shape inputs_x_dims = {64, 32}; Shape inputs_y_dims = {32, 64}; Shape outputs_dims = {64, 64}; @@ -592,7 +592,7 @@ TEST_F(TestStepParallel, GetTensorInLayout) { /// Expectation: the status is correct TEST_F(TestStepParallel, UpdateMicroBatchInterleavedStatus) { std::vector inputs; - FuncGraphPtr func_graph = std::make_shared(); + FuncGraphPtr func_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); ValueNodePtr stridedSlicePtr = NewValueNode(prim::kPrimStridedSlice); PrimitivePtr prim = stridedSlicePtr->value()->cast(); diff --git a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc index 542564790c486b3006c7f5e14c9337eabde9faf4..e3a26ac11c4ef7b328250332f12607be25e806eb 100644 --- a/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc +++ b/tests/ut/cpp/parallel/tensor_layout/construct_operator_test.cc @@ -72,12 +72,12 @@ void TestConstructOperator::SetUp() { constructor.UpdateTensorShape(tensor_shape); } -TEST_F(TestConstructOperator, TestReshapeOP) { +TEST_F(TestConstructOperator, DISABLED_TestReshapeOP) { Shape shape = {512, 512, 2}; ASSERT_EQ(constructor.ReshapeOP(shape), Status::SUCCESS); } -TEST_F(TestConstructOperator, TestStridedSliceOP) { +TEST_F(TestConstructOperator, DISABLED_TestStridedSliceOP) { Args args = {1, 2, 3}; int64_t split_count = args[0]; int64_t split_dim = args[1]; @@ -111,22 +111,22 @@ TEST_F(TestConstructOperator, TestStridedSliceOP) { } } -TEST_F(TestConstructOperator, TestAllGatherOP) { +TEST_F(TestConstructOperator, DISABLED_TestAllGatherOP) { int64_t dev_dim = 2; ASSERT_EQ(constructor.AllGatherOP(dev_dim), Status::SUCCESS); } -TEST_F(TestConstructOperator, TestConcatOP) { +TEST_F(TestConstructOperator, DISABLED_TestConcatOP) { int64_t concat_dim = 0; ASSERT_EQ(constructor.ConcatOP(concat_dim), Status::SUCCESS); } -TEST_F(TestConstructOperator, TestSplitOP) { +TEST_F(TestConstructOperator, DISABLED_TestSplitOP) { int64_t split_count = 2; ASSERT_EQ(constructor.SplitOP(split_count), Status::SUCCESS); } -TEST_F(TestConstructOperator, TestAlltoAllOP) { +TEST_F(TestConstructOperator, DISABLED_TestAlltoAllOP) { int64_t split_count = 2; int64_t split_dim = 0; int64_t concat_dim = 1; diff --git a/tests/ut/cpp/pipeline/graph_executor_test.cc b/tests/ut/cpp/pipeline/graph_executor_test.cc index 6ba48ded255e2e2171c6a575e376a88bf7a4d401..532eca5c6b505077e85205a13dad9b1ec699c0a3 100644 --- a/tests/ut/cpp/pipeline/graph_executor_test.cc +++ b/tests/ut/cpp/pipeline/graph_executor_test.cc @@ -31,7 +31,7 @@ class TestGraphExecutor : public UT::Common { /// Feature: Test jit_config /// Description: Test set jit_level = o0 /// Expectation: success -TEST_F(TestGraphExecutor, test_jit_config_with_jit_level_equal_o0) { +TEST_F(TestGraphExecutor, DISABLED_test_jit_config_with_jit_level_equal_o0) { py::dict obj = python_adapter::CallPyFn("gtest_input.pipeline.graph_executor_test", "get_jit_config_o0"); pipeline::GraphExecutorPy::GetInstance()->SetJitConfig(obj); diff --git a/tests/ut/cpp/pipeline/parse/boost_parse_test.cc b/tests/ut/cpp/pipeline/parse/boost_parse_test.cc index f80526b7527215d84667d69b7200ed2f38e08a6c..b16a199c8491577c55b5ed47ae49ca0a891af5c5 100644 --- a/tests/ut/cpp/pipeline/parse/boost_parse_test.cc +++ b/tests/ut/cpp/pipeline/parse/boost_parse_test.cc @@ -26,10 +26,6 @@ class TestBoostParse : public UT::Common { public: TestBoostParse() : getPyFun_("gtest_input.pipeline.parse.boost_parse") {} - virtual void SetUp(); - - virtual void TearDown(); - void CheckFalseBranch(const FuncGraphPtr &func_graph, bool folded = true) { auto manager = Manage(func_graph); EXPECT_TRUE(manager != nullptr); @@ -55,10 +51,6 @@ class TestBoostParse : public UT::Common { UT::PyFuncGraphFetcher getPyFun_; }; -void TestBoostParse::SetUp() {} - -void TestBoostParse::TearDown() {} - // Feature: Boost parse. // Description: Parse the network witch has "if var:" statement. // Expectation: The false branch should be folded. diff --git a/tests/ut/cpp/pipeline/parse/parallel_if.cc b/tests/ut/cpp/pipeline/parse/parallel_if.cc index 1b0935c589a4ff133a098af8e82bf04cc52e8a51..7ff9f8caaf0158d006d009b08a1c02ccbf9ce53c 100644 --- a/tests/ut/cpp/pipeline/parse/parallel_if.cc +++ b/tests/ut/cpp/pipeline/parse/parallel_if.cc @@ -35,7 +35,6 @@ class TestParallelIf : public UT::Common { public: TestParallelIf() : getPyFun("gtest_input.pipeline.parse.parallel_if") {} virtual void SetUp(); - virtual void TearDown(); py::function GetPythonFunction(std::string function); bool CheckIsomorphic(FuncGraphPtr basic, FuncGraphPtr manual, std::vector opts = {}) { @@ -126,27 +125,25 @@ class TestParallelIf : public UT::Common { void TestParallelIf::SetUp() { UT::InitPythonPath(); } -void TestParallelIf::TearDown() {} - // Feature: Parallel if transformation // Description: Check parallel if transformatin for test code with single if/else. // Expectation: The funcgraph after transformation should be isomorphic with the funcgraph manually constructed. -TEST_F(TestParallelIf, SimpleIf) { CheckParallelIfTransform("test_simple_if"); } +TEST_F(TestParallelIf, DISABLED_SimpleIf) { CheckParallelIfTransform("test_simple_if"); } // Feature: Parallel if transformation // Description: Check parallel if transformatin for test code with if-by-if. // Expectation: The funcgraph after transformation should be isomorphic with the funcgraph manually constructed. -TEST_F(TestParallelIf, IfByIf) { CheckParallelIfTransform("test_if_by_if"); } +TEST_F(TestParallelIf, DISABLED_IfByIf) { CheckParallelIfTransform("test_if_by_if"); } // Feature: Parallel if transformation // Description: Check parallel if transformatin for test code with if-in-if. // Expectation: The funcgraph after transformation should be isomorphic with the funcgraph manually constructed. -TEST_F(TestParallelIf, IfInIf) { CheckParallelIfTransform("test_if_in_if"); } +TEST_F(TestParallelIf, DISABLED_IfInIf) { CheckParallelIfTransform("test_if_in_if"); } // Feature: Parallel if transformation // Description: Check parallel if transformatin for test code with if-elif-else. // Expectation: The funcgraph after transformation should be isomorphic with the funcgraph manually constructed. -TEST_F(TestParallelIf, IfElifElse) { CheckParallelIfTransform("test_if_elif_else"); } +TEST_F(TestParallelIf, DISABLED_IfElifElse) { CheckParallelIfTransform("test_if_elif_else"); } // Return statement section. // Feature: Parallel if transformation diff --git a/tests/ut/cpp/pipeline/resource_test.cc b/tests/ut/cpp/pipeline/resource_test.cc index f5f37408b6d0068a457e6fe2830d2aae62072379..b9e7779c1de6c490f77b94e3a67e35cb9ae1273b 100644 --- a/tests/ut/cpp/pipeline/resource_test.cc +++ b/tests/ut/cpp/pipeline/resource_test.cc @@ -22,6 +22,7 @@ #include "pipeline/jit/ps/resource.h" #include "ir/primitive.h" #include "frontend/operator/ops.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace pipeline { diff --git a/tests/ut/cpp/pipeline/static_analysis/data_test.cc b/tests/ut/cpp/pipeline/static_analysis/data_test.cc index d56814f64e795b437e157aa4d5a5a125c0e0f9fd..5d78b9c2b5adba488e04155fe70fc323b89ff500 100644 --- a/tests/ut/cpp/pipeline/static_analysis/data_test.cc +++ b/tests/ut/cpp/pipeline/static_analysis/data_test.cc @@ -23,6 +23,7 @@ #include "pipeline/jit/ps/static_analysis/prim.h" #include "frontend/operator/ops.h" #include "abstract/utils.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace abstract { diff --git a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc index 54d482208263dacdbe3e8e727dae3400b8c1e019..6728f9a52576fe9c9ac694c314b2f88b7e0f857a 100644 --- a/tests/ut/cpp/pipeline/static_analysis/prim_test.cc +++ b/tests/ut/cpp/pipeline/static_analysis/prim_test.cc @@ -151,7 +151,7 @@ TEST_F(TestPrim, test_typeof) { ASSERT_TRUE(*res_value == Int(64)); } -TEST_F(TestPrim, test_list_reduce) { +TEST_F(TestPrim, DISABLED_test_list_reduce) { AbstractBasePtrList args_spec_list; int64_t v1 = 1; diff --git a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc index 461b7cf5f731795384e1da320a392d176de4b53c..ba32ad78053846afde9ace2de493b000dc507b84 100644 --- a/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc +++ b/tests/ut/cpp/pipeline/static_analysis/static_analysis_test.cc @@ -33,6 +33,7 @@ #include "pipeline/static_analysis/helper.h" #include "utils/log_adapter.h" #include "include/common/debug/anf_ir_dump.h" +#include "ops/auto_generate/gen_ops_primitive.h" namespace mindspore { namespace abstract { @@ -421,7 +422,6 @@ class TestEvalCNode : public UT::Common { public: TestEvalCNode() : getPyFun_("gtest_input.pipeline.infer.infer_test", true, true), engine_(nullptr) {} void SetUp(); - void TearDown(); UT::PyFuncGraphFetcher getPyFun_; AnalysisEnginePtr engine_; @@ -429,10 +429,6 @@ class TestEvalCNode : public UT::Common { void TestEvalCNode::SetUp() { engine_ = SetupAnalysisEngineStub(); } -void TestEvalCNode::TearDown() { - // destroy resource -} - abstract::AbstractBasePtr EvalFunction(const ValuePtr &value, const abstract::AbstractBasePtrList &args_abs) { return pipeline::AbstractAnalyze(value, args_abs).eval_result->abstract(); } diff --git a/tests/ut/cpp/plugin/device/cpu/hal/test_ms_collective_topo.cc b/tests/ut/cpp/plugin/device/cpu/hal/test_ms_collective_topo.cc index 33dde90441de5b573405bcf318c9f356d83d6f0e..60c1c4a02083a3f75e254f4a1f780a8a79b18282 100644 --- a/tests/ut/cpp/plugin/device/cpu/hal/test_ms_collective_topo.cc +++ b/tests/ut/cpp/plugin/device/cpu/hal/test_ms_collective_topo.cc @@ -33,7 +33,7 @@ class TestMSCollectiveTopo : public UT::Common { /// Feature: test create cpu collective topology node. /// Description: create the topology node. /// Expectation: the topology node is created successfully. -TEST_F(TestMSCollectiveTopo, InitCollectiveTopoNode) { +TEST_F(TestMSCollectiveTopo, DISABLED_InitCollectiveTopoNode) { std::string server_host = "127.0.0.1"; std::string server_port = "8090"; common::SetEnv(distributed::cluster::topology::kEnvMetaServerHost, server_host.c_str()); diff --git a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc index bce9e278e6eb1df423ca5cde5843251f2bf7b165..803a1122660cfa10e223009417fdbd0bf21150b2 100644 --- a/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc +++ b/tests/ut/cpp/pre_activate/mem_reuse/mem_reuse_test.cc @@ -30,6 +30,7 @@ #include "backend/common/mem_reuse/mem_reuse.h" #include "common/common_test.h" +#include "common/resource.h" #include "common/py_func_graph_fetcher.h" namespace mindspore { @@ -155,7 +156,7 @@ static KernelGraphPtr CreateGraphWithExecOrder() { * mul * return */ - auto anf_graph = std::make_shared(); + auto anf_graph = UT::UTResourceManager::GetInstance()->MakeAndHoldFuncGraph(); std::vector shape = {2, 32, 224, 224}; auto abstract = std::make_shared(kFloat32, shape); EXPECT_NE(abstract, nullptr); diff --git a/tests/ut/cpp/pynative/pynative_execute_test.cc b/tests/ut/cpp/pynative/pynative_execute_test.cc index 80876d12913894096ca6c5363bd3da8e0f414227..63887205e6a05253246aa7f3e87e5876944d0f3a 100644 --- a/tests/ut/cpp/pynative/pynative_execute_test.cc +++ b/tests/ut/cpp/pynative/pynative_execute_test.cc @@ -111,7 +111,7 @@ TEST_F(TestPynativeExecute, TestDefaultContext) { /// Feature: Test pynative infer operation /// Description: Test pynative infer interface by using `matmul` ops /// Expectation: success -TEST_F(TestPynativeExecute, TestInferOperator) { +TEST_F(TestPynativeExecute, DISABLED_TestInferOperator) { auto conv_obj = prim::GetPythonOps("matmul", "gtest_input.pynative"); auto t1 = prim::GetPythonOps("tensor1", "gtest_input.pynative"); auto t2 = prim::GetPythonOps("tensor2", "gtest_input.pynative"); diff --git a/tests/ut/cpp/runtest.sh b/tests/ut/cpp/runtest.sh index 8c8cf919139981b02281f5805fd766954e0d5478..68478dbc7d2d827728fa7b74db7d9d08b6588c26 100755 --- a/tests/ut/cpp/runtest.sh +++ b/tests/ut/cpp/runtest.sh @@ -41,11 +41,31 @@ python ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/data/dataset/testAlbum/gen_j RET=0 if [ $# -gt 0 ]; then ./ut_CORE_tests --gtest_filter=$1 + ./ut_API_tests --gtest_filter=$1 + ./ut_FRONTEND_tests --gtest_filter=$1 + ./ut_OLD_BACKEND_tests --gtest_filter=$1 + ./ut_BACKEND_tests --gtest_filter=$1 + ./ut_PS_tests --gtest_filter=$1 + ./ut_OTHERS_tests --gtest_filter=$1 + ./ut_MINDDATA0_tests --gtest_filter=$1 + ./ut_MINDDATA1_tests --gtest_filter=$1 exit 0 fi +set +e + +#./ut_CORE_tests +#./ut_API_tests +#./ut_FRONTEND_tests +#./ut_OLD_BACKEND_tests +#./ut_BACKEND_tests +#./ut_PS_tests +#./ut_OTHERS_tests +#./ut_MINDDATA0_tests +#./ut_MINDDATA1_tests + pids=() -tasks=(./ut_CORE_tests) +tasks=(./ut_CORE_tests ./ut_API_tests ./ut_FRONTEND_tests ./ut_BACKEND_tests ./ut_PS_tests ./ut_OTHERS_tests ./ut_MINDDATA0_tests) set +e for task in "${tasks[@]}"; do $task & diff --git a/tests/ut/cpp/stub/ge/ge_operator_stub.cc b/tests/ut/cpp/stub/ge/ge_operator_stub.cc index 3bc9317ec577cee05ea431c2a592f8aa5905574b..0e74f416dbf6823da6c5a3e80f50af93ff097bdd 100644 --- a/tests/ut/cpp/stub/ge/ge_operator_stub.cc +++ b/tests/ut/cpp/stub/ge/ge_operator_stub.cc @@ -19,6 +19,7 @@ namespace ge { AscendString::AscendString(char const *name) {} +Operator::Operator(const string &name, const string &type) {} Operator::Operator(const AscendString &name, const AscendString &type) {} Operator::Operator(const char *name, const char *type) {} Operator::Operator(const std::string &type) {} diff --git a/tests/ut/python/rewrite/test_control_flow_if.py b/tests/ut/python/rewrite/test_control_flow_if.py index 1e4c6546f974c4398ac1bfd98bc8e8e37e5e1b2b..9d5adadf82f7632fe5d976a4a2689aa4456ba84f 100644 --- a/tests/ut/python/rewrite/test_control_flow_if.py +++ b/tests/ut/python/rewrite/test_control_flow_if.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import mindspore as ms from mindspore.rewrite import SymbolTree as SymbolTreeApi from mindspore.rewrite import NodeType, Node from mindspore import nn, ops, context @@ -201,16 +202,24 @@ def test_flatten_if_control_flow(): assert codes.count("x = self.abs5(x)") == 0 +def custom_func(x): + return x.shape == (2, 2) + class IfNet(nn.Cell): def __init__(self): super().__init__() - self.relu = nn.ReLU() self.abs = ops.Abs() def construct(self, x, y): + if isinstance(x, ms.Tensor) and custom_func(x): + x = self.abs(x) + if isinstance(y, ms.Tensor) and custom_func(y): + x = self.abs(x) if isinstance(y, ms.Tensor) and y.shape == (2, 2): - x = self.relu(x) - else: + x = self.abs(x) + if isinstance(y, ms.Tensor) and y.shape: + x = self.abs(x) + if isinstance(x, ms.Tensor) and isinstance(y, ms.Tensor) and x.shape and custom_func(x) and custom_func(y): x = self.abs(x) return x @@ -218,10 +227,37 @@ def test_flatten_if_with_and(): """ Feature: Test flatten rewrite if control flow node. Description: Test flatten if with and. - Expectation: The first node in and is flatten and other nodes are not flatten. + Expectation: Success. """ net = IfNet() stree = SymbolTreeApi.create(net) codes = stree.get_code() - assert codes.count("isinstance_var = isinstance(y, ms.Tensor)") == 1 - assert codes.count("and_var = (isinstance_var and (y.shape == (2, 2)))") == 1 + assert codes.count("isinstance_var_5 = isinstance(x, ms.Tensor)") == 1 + assert codes.count("if isinstance_var_5:") == 1 + assert codes.count("custom_func_var_3 = custom_func(x)") == 1 + assert codes.count("custom_func_var_3 = False") == 1 + assert codes.count("isinstance_var_4 = isinstance(y, ms.Tensor)") == 1 + assert codes.count("if isinstance_var_4:") == 1 + assert codes.count("custom_func_var_2 = custom_func_1(y)") == 1 + assert codes.count("custom_func_var_2 = False") == 1 + assert codes.count("isinstance_var_3 = isinstance(y, ms.Tensor)") == 1 + assert codes.count("if isinstance_var_3:") == 1 + assert codes.count("tuple_var_2 = (2, 2)") == 1 + assert codes.count("compare_var = (y.shape == tuple_var_2)") == 1 + assert codes.count("compare_var = False") == 1 + assert codes.count("isinstance_var_2 = isinstance(y, ms.Tensor)") == 1 + assert codes.count("and_var_3 = (isinstance_var_2 and y.shape)") == 1 + assert codes.count("isinstance_var = isinstance(x, ms.Tensor)") == 1 + assert codes.count("if isinstance_var:") == 1 + assert codes.count("isinstance_var_1 = isinstance(y, ms.Tensor)") == 1 + assert codes.count("isinstance_var_1 = False") == 1 + assert codes.count("and_var_2 = (isinstance_var and isinstance_var_1 and x.shape)") == 1 + assert codes.count("if and_var_2:") == 1 + assert codes.count("custom_func_var = custom_func_2(x)") == 1 + assert codes.count("custom_func_var = False") == 1 + assert codes.count("and_var_1 = (isinstance_var and isinstance_var_1 and x.shape and custom_func_var)") == 1 + assert codes.count("if and_var_1:") == 1 + assert codes.count("custom_func_var_1 = custom_func_3(y)") == 1 + assert codes.count("custom_func_var_1 = False") == 1 + assert codes.count("and_var = (isinstance_var and isinstance_var_1 and x.shape " + "and custom_func_var and custom_func_var_1)") == 1 diff --git a/third_party/patch/openssl/CVE-2024-2511.patch b/third_party/patch/openssl/CVE-2024-2511.patch new file mode 100644 index 0000000000000000000000000000000000000000..8be177e5ae0da5ebb59bd81db3f66a35575a0c1f --- /dev/null +++ b/third_party/patch/openssl/CVE-2024-2511.patch @@ -0,0 +1,487 @@ +From fc43b2b1abae58c1b261962299d2bbeee770810a Mon Sep 17 00:00:00 2001 +From: jxlang910 +Date: Thu, 11 Apr 2024 17:24:44 +0800 +Subject: [PATCH] fix CVE-2024-2511 + +--- + include/openssl/sslerr.h | 4 +- + ssl/ssl_err.c | 5 +- + ssl/ssl_lib.c | 5 +- + ssl/ssl_sess.c | 36 ++++- + ssl/statem/statem_srvr.c | 5 +- + test/sslapitest.c | 300 +++++++++++++++++++++++++++++++++++++++ + 6 files changed, 339 insertions(+), 16 deletions(-) + +diff --git a/include/openssl/sslerr.h b/include/openssl/sslerr.h +index aa5f56a482..3e99ffc27f 100644 +--- a/include/openssl/sslerr.h ++++ b/include/openssl/sslerr.h +@@ -1,6 +1,6 @@ + /* + * Generated by util/mkerr.pl DO NOT EDIT +- * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. ++ * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy +@@ -224,7 +224,7 @@ int ERR_load_SSL_strings(void); + # define SSL_F_SSL_RENEGOTIATE_ABBREVIATED 546 + # define SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT 320 + # define SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT 321 +-# define SSL_F_SSL_SESSION_DUP 348 ++# define SSL_F_SSL_SESSION_DUP_INTERN 668 + # define SSL_F_SSL_SESSION_NEW 189 + # define SSL_F_SSL_SESSION_PRINT_FP 190 + # define SSL_F_SSL_SESSION_SET1_ID 423 +diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c +index 5a7c42a88c..c4144bb8b4 100644 +--- a/ssl/ssl_err.c ++++ b/ssl/ssl_err.c +@@ -1,6 +1,6 @@ + /* + * Generated by util/mkerr.pl DO NOT EDIT +- * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. ++ * Copyright 1995-2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy +@@ -325,7 +325,8 @@ static const ERR_STRING_DATA SSL_str_functs[] = { + "SSL_renegotiate_abbreviated"}, + {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT, 0), ""}, + {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT, 0), ""}, +- {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SESSION_DUP, 0), "ssl_session_dup"}, ++ {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SESSION_DUP_INTERN, 0), ++ "ssl_session_dup_intern"}, + {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SESSION_NEW, 0), "SSL_SESSION_new"}, + {ERR_PACK(ERR_LIB_SSL, SSL_F_SSL_SESSION_PRINT_FP, 0), + "SSL_SESSION_print_fp"}, +diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c +index 618549a2ca..2a44960fac 100644 +--- a/ssl/ssl_lib.c ++++ b/ssl/ssl_lib.c +@@ -3541,9 +3541,10 @@ void ssl_update_cache(SSL *s, int mode) + + /* + * If the session_id_length is 0, we are not supposed to cache it, and it +- * would be rather hard to do anyway :-) ++ * would be rather hard to do anyway :-). Also if the session has already ++ * been marked as not_resumable we should not cache it for later reuse. + */ +- if (s->session->session_id_length == 0) ++ if (s->session->session_id_length == 0 || s->session->not_resumable) + return; + + /* +diff --git a/ssl/ssl_sess.c b/ssl/ssl_sess.c +index 1b4c85b60c..5cc816b0fc 100644 +--- a/ssl/ssl_sess.c ++++ b/ssl/ssl_sess.c +@@ -94,16 +94,11 @@ SSL_SESSION *SSL_SESSION_new(void) + return ss; + } + +-SSL_SESSION *SSL_SESSION_dup(SSL_SESSION *src) +-{ +- return ssl_session_dup(src, 1); +-} +- + /* + * Create a new SSL_SESSION and duplicate the contents of |src| into it. If + * ticket == 0 then no ticket information is duplicated, otherwise it is. + */ +-SSL_SESSION *ssl_session_dup(SSL_SESSION *src, int ticket) ++static SSL_SESSION *ssl_session_dup_intern(SSL_SESSION *src, int ticket) + { + SSL_SESSION *dest; + +@@ -221,11 +216,32 @@ SSL_SESSION *ssl_session_dup(SSL_SESSION *src, int ticket) + + return dest; + err: +- SSLerr(SSL_F_SSL_SESSION_DUP, ERR_R_MALLOC_FAILURE); ++ SSLerr(SSL_F_SSL_SESSION_DUP_INTERN, ERR_R_MALLOC_FAILURE); + SSL_SESSION_free(dest); + return NULL; + } + ++SSL_SESSION *SSL_SESSION_dup(SSL_SESSION *src) ++{ ++ return ssl_session_dup_intern(src, 1); ++} ++ ++/* ++ * Used internally when duplicating a session which might be already shared. ++ * We will have resumed the original session. Subsequently we might have marked ++ * it as non-resumable (e.g. in another thread) - but this copy should be ok to ++ * resume from. ++ */ ++SSL_SESSION *ssl_session_dup(SSL_SESSION *src, int ticket) ++{ ++ SSL_SESSION *sess = ssl_session_dup_intern(src, ticket); ++ ++ if (sess != NULL) ++ sess->not_resumable = 0; ++ ++ return sess; ++} ++ + const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, unsigned int *len) + { + if (len) +@@ -455,6 +471,12 @@ SSL_SESSION *lookup_sess_in_cache(SSL *s, const unsigned char *sess_id, + ret = s->session_ctx->get_session_cb(s, sess_id, sess_id_len, ©); + + if (ret != NULL) { ++ if (ret->not_resumable) { ++ /* If its not resumable then ignore this session */ ++ if (!copy) ++ SSL_SESSION_free(ret); ++ return NULL; ++ } + tsan_counter(&s->session_ctx->stats.sess_cb_hit); + + /* +diff --git a/ssl/statem/statem_srvr.c b/ssl/statem/statem_srvr.c +index 1b3b8002ee..d242e98024 100644 +--- a/ssl/statem/statem_srvr.c ++++ b/ssl/statem/statem_srvr.c +@@ -2418,9 +2418,8 @@ int tls_construct_server_hello(SSL *s, WPACKET *pkt) + * so the following won't overwrite an ID that we're supposed + * to send back. + */ +- if (s->session->not_resumable || +- (!(s->ctx->session_cache_mode & SSL_SESS_CACHE_SERVER) +- && !s->hit)) ++ if (!(s->ctx->session_cache_mode & SSL_SESS_CACHE_SERVER) ++ && !s->hit) + s->session->session_id_length = 0; + + if (usetls13) { +diff --git a/test/sslapitest.c b/test/sslapitest.c +index 5ee982ab06..395b1e5457 100644 +--- a/test/sslapitest.c ++++ b/test/sslapitest.c +@@ -6669,6 +6669,128 @@ static int test_ca_names(int tst) + return testresult; + } + ++/* ++ * Test that a session cache overflow works as expected ++ * Test 0: TLSv1.3, timeout on new session later than old session ++ * Test 1: TLSv1.2, timeout on new session later than old session ++ * Test 2: TLSv1.3, timeout on new session earlier than old session ++ * Test 3: TLSv1.2, timeout on new session earlier than old session ++ */ ++#if !defined(OPENSSL_NO_TLS1_3) || !defined(OPENSSL_NO_TLS1_2) ++static int test_session_cache_overflow(int idx) ++{ ++ SSL_CTX *sctx = NULL, *cctx = NULL; ++ SSL *serverssl = NULL, *clientssl = NULL; ++ int testresult = 0; ++ SSL_SESSION *sess = NULL; ++ ++#ifdef OPENSSL_NO_TLS1_3 ++ /* If no TLSv1.3 available then do nothing in this case */ ++ if (idx % 2 == 0) ++ TEST_info("No TLSv1.3 available"); ++ return 1; ++#endif ++#ifdef OPENSSL_NO_TLS1_2 ++ /* If no TLSv1.2 available then do nothing in this case */ ++ if (idx % 2 == 1) ++ TEST_info("No TLSv1.2 available"); ++ return 1; ++#endif ++ ++ if (!TEST_true(create_ssl_ctx_pair(TLS_server_method(), ++ TLS_client_method(), TLS1_VERSION, ++ (idx % 2 == 0) ? TLS1_3_VERSION ++ : TLS1_2_VERSION, ++ &sctx, &cctx, cert, privkey)) ++ || !TEST_true(SSL_CTX_set_options(sctx, SSL_OP_NO_TICKET))) ++ goto end; ++ ++ SSL_CTX_sess_set_get_cb(sctx, get_session_cb); ++ get_sess_val = NULL; ++ ++ SSL_CTX_sess_set_cache_size(sctx, 1); ++ ++ if (!TEST_true(create_ssl_objects(sctx, cctx, &serverssl, &clientssl, ++ NULL, NULL))) ++ goto end; ++ ++ if (!TEST_true(create_ssl_connection(serverssl, clientssl, SSL_ERROR_NONE))) ++ goto end; ++ ++ if (idx > 1) { ++ sess = SSL_get_session(serverssl); ++ if (!TEST_ptr(sess)) ++ goto end; ++ ++ /* ++ * Cause this session to have a longer timeout than the next session to ++ * be added. ++ */ ++ if (!TEST_true(SSL_SESSION_set_timeout(sess, LONG_MAX / 2))) { ++ sess = NULL; ++ goto end; ++ } ++ sess = NULL; ++ } ++ ++ SSL_shutdown(serverssl); ++ SSL_shutdown(clientssl); ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ serverssl = clientssl = NULL; ++ ++ /* ++ * Session cache size is 1 and we already populated the cache with a session ++ * so the next connection should cause an overflow. ++ */ ++ ++ if (!TEST_true(create_ssl_objects(sctx, cctx, &serverssl, &clientssl, ++ NULL, NULL))) ++ goto end; ++ ++ if (!TEST_true(create_ssl_connection(serverssl, clientssl, SSL_ERROR_NONE))) ++ goto end; ++ ++ /* ++ * The session we just negotiated may have been already removed from the ++ * internal cache - but we will return it anyway from our external cache. ++ */ ++ get_sess_val = SSL_get_session(serverssl); ++ if (!TEST_ptr(get_sess_val)) ++ goto end; ++ sess = SSL_get1_session(clientssl); ++ if (!TEST_ptr(sess)) ++ goto end; ++ ++ SSL_shutdown(serverssl); ++ SSL_shutdown(clientssl); ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ serverssl = clientssl = NULL; ++ ++ if (!TEST_true(create_ssl_objects(sctx, cctx, &serverssl, &clientssl, ++ NULL, NULL))) ++ goto end; ++ ++ if (!TEST_true(SSL_set_session(clientssl, sess))) ++ goto end; ++ ++ if (!TEST_true(create_ssl_connection(serverssl, clientssl, SSL_ERROR_NONE))) ++ goto end; ++ ++ testresult = 1; ++ ++ end: ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ SSL_CTX_free(sctx); ++ SSL_CTX_free(cctx); ++ SSL_SESSION_free(sess); ++ ++ return testresult; ++} ++#endif /* !defined(OPENSSL_NO_TLS1_3) || !defined(OPENSSL_NO_TLS1_2) */ ++ + /* + * Test 0: Client sets servername and server acknowledges it (TLSv1.2) + * Test 1: Client sets servername and server does not acknowledge it (TLSv1.2) +@@ -7288,6 +7410,180 @@ static int test_inherit_verify_param(void) + return testresult; + } + ++struct resume_servername_cb_data { ++ int i; ++ SSL_CTX *cctx; ++ SSL_CTX *sctx; ++ SSL_SESSION *sess; ++ int recurse; ++}; ++ ++/* ++ * Servername callback. We use it here to run another complete handshake using ++ * the same session - and mark the session as not_resuamble at the end ++ */ ++static int resume_servername_cb(SSL *s, int *ad, void *arg) ++{ ++ struct resume_servername_cb_data *cbdata = arg; ++ SSL *serverssl = NULL, *clientssl = NULL; ++ int ret = SSL_TLSEXT_ERR_ALERT_FATAL; ++ ++ if (cbdata->recurse) ++ return SSL_TLSEXT_ERR_ALERT_FATAL; ++ ++ if ((cbdata->i % 3) != 1) ++ return SSL_TLSEXT_ERR_OK; ++ ++ cbdata->recurse = 1; ++ ++ if (!TEST_true(create_ssl_objects(cbdata->sctx, cbdata->cctx, &serverssl, ++ &clientssl, NULL, NULL)) ++ || !TEST_true(SSL_set_session(clientssl, cbdata->sess))) ++ goto end; ++ ++ ERR_set_mark(); ++ /* ++ * We expect this to fail - because the servername cb will fail. This will ++ * mark the session as not_resumable. ++ */ ++ if (!TEST_false(create_ssl_connection(serverssl, clientssl, SSL_ERROR_NONE))) { ++ ERR_clear_last_mark(); ++ goto end; ++ } ++ ERR_pop_to_mark(); ++ ++ ret = SSL_TLSEXT_ERR_OK; ++ end: ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ cbdata->recurse = 0; ++ return ret; ++} ++ ++/* ++ * Test multiple resumptions and cache size handling ++ * Test 0: TLSv1.3 (max_early_data set) ++ * Test 1: TLSv1.3 (SSL_OP_NO_TICKET set) ++ * Test 2: TLSv1.3 (max_early_data and SSL_OP_NO_TICKET set) ++ * Test 3: TLSv1.3 (SSL_OP_NO_TICKET, simultaneous resumes) ++ * Test 4: TLSv1.2 ++ */ ++static int test_multi_resume(int idx) ++{ ++ SSL_CTX *sctx = NULL, *cctx = NULL; ++ SSL *serverssl = NULL, *clientssl = NULL; ++ SSL_SESSION *sess = NULL; ++ int max_version = TLS1_3_VERSION; ++ int i, testresult = 0; ++ struct resume_servername_cb_data cbdata; ++ ++#if defined(OPENSSL_NO_TLS1_2) ++ if (idx == 4) ++ TEST_info("TLSv1.2 is disabled in this build"); ++ return 1; ++#else ++ if (idx == 4) ++ max_version = TLS1_2_VERSION; ++#endif ++#if defined(OPENSSL_NO_TLS1_3) ++ if (idx != 4) ++ TEST_info("No usable TLSv1.3 in this build"); ++ return 1; ++#endif ++ ++ if (!TEST_true(create_ssl_ctx_pair(TLS_server_method(), ++ TLS_client_method(), TLS1_VERSION, ++ max_version, &sctx, &cctx, cert, ++ privkey))) ++ goto end; ++ ++ /* ++ * TLSv1.3 only uses a session cache if either max_early_data > 0 (used for ++ * replay protection), or if SSL_OP_NO_TICKET is in use ++ */ ++ if (idx == 0 || idx == 2) { ++ if (!TEST_true(SSL_CTX_set_max_early_data(sctx, 1024))) ++ goto end; ++ } ++ if (idx == 1 || idx == 2 || idx == 3) ++ SSL_CTX_set_options(sctx, SSL_OP_NO_TICKET); ++ ++ SSL_CTX_sess_set_cache_size(sctx, 5); ++ ++ if (idx == 3) { ++ SSL_CTX_set_tlsext_servername_callback(sctx, resume_servername_cb); ++ SSL_CTX_set_tlsext_servername_arg(sctx, &cbdata); ++ cbdata.cctx = cctx; ++ cbdata.sctx = sctx; ++ cbdata.recurse = 0; ++ } ++ ++ for (i = 0; i < 30; i++) { ++ if (!TEST_true(create_ssl_objects(sctx, cctx, &serverssl, &clientssl, ++ NULL, NULL)) ++ || !TEST_true(SSL_set_session(clientssl, sess))) ++ goto end; ++ ++ /* ++ * Check simultaneous resumes. We pause the connection part way through ++ * the handshake by (mis)using the servername_cb. The pause occurs after ++ * session resumption has already occurred, but before any session ++ * tickets have been issued. While paused we run another complete ++ * handshake resuming the same session. ++ */ ++ if (idx == 3) { ++ cbdata.i = i; ++ cbdata.sess = sess; ++ } ++ ++ /* ++ * Recreate a bug where dynamically changing the max_early_data value ++ * can cause sessions in the session cache which cannot be deleted. ++ */ ++ if ((idx == 0 || idx == 2) && (i % 3) == 2) ++ SSL_set_max_early_data(serverssl, 0); ++ ++ if (!TEST_true(create_ssl_connection(serverssl, clientssl, SSL_ERROR_NONE))) ++ goto end; ++ ++ if (sess == NULL || (idx == 0 && (i % 3) == 2)) { ++ if (!TEST_false(SSL_session_reused(clientssl))) ++ goto end; ++ } else { ++ if (!TEST_true(SSL_session_reused(clientssl))) ++ goto end; ++ } ++ SSL_SESSION_free(sess); ++ ++ /* Do a full handshake, followed by two resumptions */ ++ if ((i % 3) == 2) { ++ sess = NULL; ++ } else { ++ if (!TEST_ptr((sess = SSL_get1_session(clientssl)))) ++ goto end; ++ } ++ ++ SSL_shutdown(clientssl); ++ SSL_shutdown(serverssl); ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ serverssl = clientssl = NULL; ++ } ++ ++ /* We should never exceed the session cache size limit */ ++ if (!TEST_long_le(SSL_CTX_sess_number(sctx), 5)) ++ goto end; ++ ++ testresult = 1; ++ end: ++ SSL_free(serverssl); ++ SSL_free(clientssl); ++ SSL_CTX_free(sctx); ++ SSL_CTX_free(cctx); ++ SSL_SESSION_free(sess); ++ return testresult; ++} ++ + int setup_tests(void) + { + if (!TEST_ptr(certsdir = test_get_argument(0)) +@@ -7422,6 +7718,10 @@ int setup_tests(void) + #if !defined(OPENSSL_NO_TLS1_2) && !defined(OPENSSL_NO_TLS1_3) + ADD_ALL_TESTS(test_serverinfo_custom, 4); + #endif ++#if !defined(OPENSSL_NO_TLS1_2) || !defined(OPENSSL_NO_TLS1_3) ++ ADD_ALL_TESTS(test_session_cache_overflow, 4); ++#endif ++ ADD_ALL_TESTS(test_multi_resume, 5); + return 1; + } + +-- +2.43.0.windows.1 +