diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc index 7e18ab23368d6b17e8cb8f5337073d885bd99db4..8c7b2462b8556e054b0279e3588c8e1e7f465d84 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc @@ -990,7 +990,7 @@ bool GeKernelExecutor::MemoryCopyAsync(const CNodePtr &node, const vector &inputs, const vector &workspace, const vector &outputs, KernelMod *kernel_mod, void *stream) const { - profiler::ascend::ProfilingFrameworkData::RecordLaunchGETaskBegin(kernel); + profiler::ascend::ProfilingFrameworkData::RecordLaunchGETaskBegin(kernel->fullname_with_scope()); // launch kernel uint64_t start_time = 0; PROFILER_START(start_time); @@ -1009,7 +1009,7 @@ bool GeKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vectorfullname_with_scope()); // for PyNative Sync Run mode auto ret = PySyncRuning(stream); if (!ret) { diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc index a2de31ba20619b6ac27fe82e89edd79312197a7e..59948ce6bdfc71e53b45d325d0737a1c4da5506f 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.cc @@ -113,9 +113,9 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id, uint32_t device_list[1] = {device_id_}; uint32_t device_num = 1; - uint64_t mask = GetOptionsMask(); aclprofAicoreMetrics aic_metrics = GetAicMetrics(); - acl_config_ = CALL_ASCEND_API(aclprofCreateConfig, device_list, device_num, aic_metrics, nullptr, GetOptionsMask()); + uint64_t mask = GetOptionsMask(aic_metrics); + acl_config_ = CALL_ASCEND_API(aclprofCreateConfig, device_list, device_num, aic_metrics, nullptr, mask); if (acl_config_ == nullptr) { MS_LOG(EXCEPTION) << "Failed to call aclprofCreateConfig function."; } @@ -124,9 +124,8 @@ void AscendProfiler::Init(const std::string &profiling_path, uint32_t device_id, init_flag_ = true; } -uint64_t AscendProfiler::GetOptionsMask() const { - uint64_t mask = ACL_PROF_ACL_API | ACL_PROF_AICORE_METRICS; - +uint64_t AscendProfiler::GetOptionsMask(aclprofAicoreMetrics aic_metrics) const { + uint64_t mask = 0; nlohmann::json options_json; try { options_json = nlohmann::json::parse(profiling_options_); @@ -135,6 +134,10 @@ uint64_t AscendProfiler::GetOptionsMask() const { return ACL_AICORE_NONE; } + if (aic_metrics != ACL_AICORE_NONE) { + mask |= ACL_PROF_AICORE_METRICS; + } + if (options_json["task_trace"] == "on") { mask |= ACL_PROF_TASK_TIME; } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h index f0482d2b113dfaa896d7c58720b9176b71322a7f..e5d85253b7cf88e3bf32048044e64c4d60744eaf 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/ascend_profiling.h @@ -43,7 +43,7 @@ class AscendProfiler : public Profiler { void StepStop() override; void StepProfilingEnable(const bool enable_flag) override; void OpDataProducerEnd() override { return; } - uint64_t GetOptionsMask() const; + uint64_t GetOptionsMask(aclprofAicoreMetrics aic_metrics) const; void MsprofInitProfiler() const; void MsprofStopProfiler() const; aclprofAicoreMetrics GetAicMetrics() const; diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_data_dumper.h b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_data_dumper.h index c80fb551ca4966555ce3857d4e92b3b8e8a6dd22..3d93e34b4f8f088a1aae43bb2064afab0d7790ef 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_data_dumper.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_data_dumper.h @@ -38,8 +38,8 @@ namespace profiler { namespace ascend { constexpr uint32_t kDefaultRingBuffer = 1024; constexpr uint32_t kBatchMaxLen = 5 * 1024 * 1024; // 5 MB -constexpr uint32_t kMaxWaitTimeUs = 1024; -constexpr uint32_t kNotifyInterval = 2; +constexpr uint32_t kMaxWaitTimeUs = 1000 * 1000; +constexpr uint32_t kNotifyInterval = 1000; class Utils { public: diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.cc b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.cc index 9eb6768d7773c2fa33fdca3342428148f92e8191..62da2edf1b8a7695067716daebd009cdfebccaab 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.cc +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.cc @@ -118,8 +118,7 @@ std::vector OpRangeData::encode() { return resultTLV; } -void ProfilingFrameworkData::RecordLaunchGETaskBegin(const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); +void ProfilingFrameworkData::RecordLaunchGETaskBegin(const std::string &scope_name) { auto ascend_profiler = Profiler::GetInstance(kAscendDevice); MS_EXCEPTION_IF_NULL(ascend_profiler); if (!ascend_profiler->GetEnableFlag()) { @@ -128,11 +127,10 @@ void ProfilingFrameworkData::RecordLaunchGETaskBegin(const CNodePtr &node) { int64_t start_ns = GetClockSyscnt(); auto tid = syscall(SYS_gettid); - kernel_launch_begin_[std::to_string(tid) + "_" + node->fullname_with_scope()] = start_ns; + kernel_launch_begin_[std::to_string(tid) + "_" + scope_name] = start_ns; } -void ProfilingFrameworkData::RecordGETask(const CNodePtr &node) { - MS_EXCEPTION_IF_NULL(node); +void ProfilingFrameworkData::RecordGETask(const std::string &scope_name) { auto ascend_profiler = Profiler::GetInstance(kAscendDevice); MS_EXCEPTION_IF_NULL(ascend_profiler); if (!ascend_profiler->GetEnableFlag()) { @@ -140,23 +138,22 @@ void ProfilingFrameworkData::RecordGETask(const CNodePtr &node) { } auto tid = syscall(SYS_gettid); - std::string full_scope_name = node->fullname_with_scope(); - auto iter = kernel_launch_begin_.find(std::to_string(tid) + "_" + full_scope_name); + auto iter = kernel_launch_begin_.find(std::to_string(tid) + "_" + scope_name); if (iter == kernel_launch_begin_.end()) { - MS_LOG(WARNING) << "Do not find op info: " << full_scope_name; + MS_LOG(WARNING) << "Do not find op info: " << scope_name; return; } int64_t start_ns = iter->second; int64_t end_ns = GetClockSyscnt(); int64_t sequence_number = 0; - uint64_t process_id = getpid(); + uint64_t process_id = 0; uint64_t start_thread_id = static_cast(tid); uint64_t end_thread_id = start_thread_id; uint64_t forward_thread_id = start_thread_id; bool is_async = false; OpRangeData report = OpRangeData(start_ns, end_ns, sequence_number, process_id, start_thread_id, end_thread_id, - forward_thread_id, is_async, full_scope_name, ProfilingFrameworkData::Device_Id); + forward_thread_id, is_async, scope_name, ProfilingFrameworkData::Device_Id); ProfilingDataDumper::GetInstance()->Report(std::make_unique(report)); } diff --git a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.h b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.h index eb0654c1d2911a0c8efb8aad54bce8a42d121794..b4a092dd1c9e1244ab1983550a908b15bfa05dd7 100644 --- a/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.h +++ b/mindspore/ccsrc/plugin/device/ascend/hal/profiler/profiling_framework_data.h @@ -74,8 +74,8 @@ struct OpRangeData : BaseReportData { class ProfilingFrameworkData { public: - static void RecordLaunchGETaskBegin(const CNodePtr &node); - static void RecordGETask(const CNodePtr &node); + static void RecordLaunchGETaskBegin(const std::string &scope_name); + static void RecordGETask(const std::string &scope_name); inline static std::map kernel_launch_begin_; inline static int32_t Device_Id = 0;