LSTM层不能释放显存

请提出你的问题 Please ask your question

paddle官方给出的demo里面，当这个数据集的数量超过20万的时候lstm层会不断申请显存直到一个epcho结束才会释放
https://www.paddlepaddle.org.cn/documentation/docs/zh/practices/nlp/seq2seq_with_attention.html

  File "/root/code/pre_train.py", line 94, in run
    loss.backward()
  File "/home/pai/lib/python3.9/site-packages/decorator.py", line 232, in fun
    return caller(func, *(extras + args), **kw)
  File "/home/pai/lib/python3.9/site-packages/paddle/fluid/wrapped_decorator.py", line 26, in __impl__
    return wrapped_func(*args, **kwargs)
  File "/home/pai/lib/python3.9/site-packages/paddle/fluid/framework.py", line 534, in __impl__
    return func(*args, **kwargs)
  File "/home/pai/lib/python3.9/site-packages/paddle/fluid/dygraph/varbase_patch_methods.py", line 297, in backward
    core.eager.run_backward([self], grad_tensor, retain_graph)
SystemError: (Fatal) Operator rnn_grad raises an paddle::memory::allocation::BadAlloc exception.
The exception content is
:

--------------------------------------
C++ Traceback (most recent call last):
--------------------------------------
0   egr::Backward(std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&, std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&, bool)
1   egr::RunBackward(std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&, std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&, bool, bool, std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&, bool, std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> > const&)
2   rnnGradNodeCompat::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor, std::allocator<paddle::experimental::Tensor> >, 15u>&, bool, bool)
3   paddle::imperative::Tracer::TraceOp(std::string const&, paddle::imperative::NameTensorMap const&, paddle::imperative::NameTensorMap const&, paddle::framework::AttributeMap&, phi::Place const&, paddle::framework::AttributeMap*, bool, std::map<std::string, std::string, std::less<std::string >, std::allocator<std::pair<std::string const, std::string > > > const&)
4   void paddle::imperative::Tracer::TraceOpImpl<egr::EagerVariable>(std::string const&, paddle::imperative::details::NameVarMapTrait<egr::EagerVariable>::Type const&, paddle::imperative::details::NameVarMapTrait<egr::EagerVariable>::Type const&, paddle::framework::AttributeMap&, phi::Place const&, bool, std::map<std::string, std::string, std::less<std::string >, std::allocator<std::pair<std::string const, std::string > > > const&, paddle::framework::AttributeMap*, bool)
5   paddle::imperative::PreparedOp::Run(paddle::imperative::NameTensorMap const&, paddle::imperative::NameTensorMap const&, paddle::framework::AttributeMap const&, paddle::framework::AttributeMap const&)
6   phi::KernelImpl<void (*)(phi::GPUContext const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, paddle::optional<phi::DenseTensor> const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, float, bool, int, int, int, std::string const&, int, bool, phi::DenseTensor*, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >), &(void phi::RnnGradKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, paddle::optional<phi::DenseTensor> const&, phi::DenseT...
7   void phi::KernelImpl<void (*)(phi::GPUContext const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, paddle::optional<phi::DenseTensor> const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, float, bool, int, int, int, std::string const&, int, bool, phi::DenseTensor*, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >), &(void phi::RnnGradKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, paddle::optional<phi::DenseTensor> const&, phi::D...
8   void phi::RnnGradKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, paddle::optional<phi::DenseTensor> const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, std::vector<phi::DenseTensor const*, std::allocator<phi::DenseTensor const*> > const&, float, bool, int, int, int, std::string const&, int, bool, phi::DenseTensor*, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >, std::vector<phi::DenseTensor*, std::allocator<phi::DenseTensor*> >)
9   void phi::EmptyKernel<unsigned char, phi::GPUContext>(phi::GPUContext const&, paddle::experimental::IntArrayBase<phi::DenseTensor> const&, paddle::experimental::DataType, phi::DenseTensor*)
10  unsigned char* phi::DeviceContext::Alloc<unsigned char>(phi::TensorBase*, unsigned long, bool) const
11  phi::DeviceContext::Impl::Alloc(phi::TensorBase*, phi::Place const&, paddle::experimental::DataType, unsigned long, bool) const
12  phi::DenseTensor::AllocateFrom(phi::Allocator*, paddle::experimental::DataType, unsigned long)
13  paddle::memory::allocation::StatAllocator::AllocateImpl(unsigned long)
14  paddle::memory::allocation::Allocator::Allocate(unsigned long)
15  paddle::memory::allocation::Allocator::Allocate(unsigned long)
16  paddle::memory::allocation::Allocator::Allocate(unsigned long)
17  paddle::memory::allocation::CUDAAllocator::AllocateImpl(unsigned long)
18  std::string phi::enforce::GetCompleteTraceBackString<std::string >(std::string&&, char const*, int)
19  phi::enforce::GetCurrentTraceBackString[abi:cxx11](bool)

----------------------
Error Message Summary:
----------------------
ResourceExhaustedError: 

Out of memory error on GPU 0. Cannot allocate 8.812500MB memory on GPU 0, 22.187256GB memory has been allocated and available memory is only 12.125000MB.

Please check whether there is any other process using GPU 0.
1. If yes, please stop them, or start PaddlePaddle on another GPU.
2. If no, please decrease the batch size of your model. 
If the above ways do not solve the out of memory problem, you can try to use CUDA managed memory. The command is `export FLAGS_use_cuda_managed_memory=false`.
 (at /paddle/paddle/fluid/memory/allocation/cuda_allocator.cc:95)
. (at /paddle/paddle/fluid/imperative/tracer.cc:351)

GVP PaddlePaddle / Paddle

内容风险标识

请提出你的问题 Please ask your question

评论 (0)

GVPPaddlePaddle / Paddle

内容风险标识

LSTM层不能释放显存

请提出你的问题 Please ask your question

评论 (0)

搜索帮助

GVP PaddlePaddle / Paddle