99 Star 796 Fork 1.4K

MindSpore / models

 / 详情

迁移bdcn到mindspore过程中出现空指针报错

REJECTED
Question
创建于  
2023-03-06 10:53

模型库中没有bdcn相关文章,迁移bdcn到mindspore,遇到算子问题
框架:mindspore-gpu2.0 cuda10.1 python3.7
问题:计算loss是会出现指针相关报错报错信息
报错处代码

下为代码

import numpy as np
import argparse
import time
import os
import BDCN as bdcn
import cfg
import log
import mindspore
from mindspore import nn
from mindspore import ops
from mindspore import Parameter
import mindspore.context as context
from dataset import GetDatasetGenerator
import mindspore.dataset as ds
# from Transform import SegDataSet
import re
from mindspore import Tensor
import cv2
import random

device_target = 'GPU'
if device_target == 'GPU':
    os.environ['CUDA_VISIBLE_DEVICES'] = '7'
    context.set_context(device_target="GPU")
    # #  2.0版本之前默认静态图传入
    context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
else:
    context.set_context(device_target="CPU")
    # #  2.0版本之前默认静态图传入
    context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")


def adjust_learning_rate(optimizer, steps, step_size, gamma=0.1, logger=None):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""

    for param_group in optimizer.param_groups:
        param_group['lr'] = param_group['lr'] * gamma
        if logger:
            logger.info('%s: %s' % (param_group['name'], param_group['lr']))


def cross_entropy_loss2d(inputs, targets, cuda=False, balance=1.1):
    """
    :param inputs: inputs is a 4 dimensional data nx1xhxw
    :param targets: targets is a 3 dimensional data nx1xhxw
    :return:
    """
    n, c, h, w = inputs.shape
    weights = np.zeros((n, c, h, w))
    for i in range(n):
        t = targets.asnumpy()[i]
        pos = (t == 1).sum()
        neg = (t == 0).sum()
        valid = neg + pos
        weights[i, t == 1] = neg * 1. / valid
        weights[i, t == 0] = pos * balance / valid
    weights = mindspore.Tensor(weights, dtype=mindspore.float32)
    inputs = mindspore.ops.sigmoid(inputs)
    loss = nn.BCELoss(weights, reduction='mean')(inputs, targets)
    return loss


def train(model, args):
    # data_root = r'/public/xdw/lcb/mindspore/BDCN/Data_512/mindrecord'
    # trainloader = SegDataSet(mindrecord_root=data_root, batch_size=args.batch_size)
    # trainloader = trainloader.get_dataset(repeat=1)

    train_img = GetDatasetGenerator()
    trainloader = ds.GeneratorDataset(source=train_img, column_names=["images", "labels"])
    trainloader = trainloader.shuffle(4)
    trainloader = trainloader.batch(args.batch_size)

    start_step = 1
    mean_loss = []
    pos = 0
    iter_per_epoch = trainloader.get_dataset_size()
    logger = args.logger
    logger.info('*' * 40)
    logger.info('train images in all are %d ' % iter_per_epoch)
    logger.info('*' * 40)
    start_time = time.time()

    params_dict = dict(model.parameters_and_names())
    base_lr = args.base_lr
    weight_decay = args.weight_decay
    params = []
    for key, n in params_dict.items():
        v = list()
        v.append(n)
        if re.match(r'conv[1-5]_[1-3]_down', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 0.1, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 0.2, 'weight_decay': weight_decay * 0}]
        elif re.match(r'.*conv[1-4]_[1-3]', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 1, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 2, 'weight_decay': weight_decay * 0}]
        elif re.match(r'.*conv5_[1-3]', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 100, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 200, 'weight_decay': weight_decay * 0}]
        elif re.match(r'score_dsn[1-5]', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 0.01, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 0.02, 'weight_decay': weight_decay * 0}]
        elif re.match(r'upsample_[248](_5)?', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 0, 'weight_decay': weight_decay * 0}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 0, 'weight_decay': weight_decay * 0}]
        elif re.match(r'.*msblock[1-5]_[1-3]\.conv', key):
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 1, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 2, 'weight_decay': weight_decay * 0}]
        else:
            if 'weight' in key:
                params += [{'params': v, 'lr': base_lr * 0.001, 'weight_decay': weight_decay * 1}]
            elif 'bias' in key:
                params += [{'params': v, 'lr': base_lr * 0.002, 'weight_decay': weight_decay * 0}]

    optimizer = nn.SGD(params, momentum=args.momentum,
                       learning_rate=args.base_lr, weight_decay=args.weight_decay)

    # for param_group in optimizer.param_groups:
    #     if logger:
    #         logger.info('%s: %s' % (param_group['name'], param_group['lr']))
    # if args.cuda:
    #     model.cuda()
    # if args.resume:
    #     logger.info('resume from %s' % args.resume)
    #     state = torch.load(args.resume)
    #     start_step = state['step']
    #     optimizer.load_state_dict(state['solver'])
    #     model.load_state_dict(state['param'])

    batch_size = args.iter_size * args.batch_size

    # Define forward function
    def forward_fn(images, labels):
        out = model(images)
        loss = 0
        for k in range(10):
            loss += args.side_weight * cross_entropy_loss2d(out[k], labels, args.cuda, args.balance) / batch_size
        loss += args.fuse_weight * cross_entropy_loss2d(out[-1], labels, args.cuda, args.balance) / batch_size
        # loss = cross_entropy_loss2d(out, labels, args.cuda, args.balance) / batch_size
        return loss, out

    # Get gradient function
    grad_fn = mindspore.ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)

    # Define function of one-step training
    def train_step(images, labels):
        (loss, logits), grads = grad_fn(images, labels)
        optimizer(grads)
        return loss

    for step in range(start_step, args.max_iter + 1):
        batch_loss = 0
        model.set_train()
        for batch, (images, labels) in enumerate(trainloader.create_tuple_iterator()):
            loss = train_step(images, labels)
            if batch % 5 == 0:
                loss = loss.asnumpy()
                print(f"loss: {loss:>7f}  [{batch:>3d}/{iter_per_epoch:>3d}]")

        if step % args.step_size == 0:
            adjust_learning_rate(optimizer, step, args.step_size, args.gamma)
        if step % args.snapshots == 0:
            mindspore.save_checkpoint(model, '{}/bdcn_step{}.pth'.format(args.param_dir, step))
            print("****** saving ckpt in param ******")

        if len(mean_loss) < args.average_loss:
            mean_loss.append(batch_loss)
        else:
            mean_loss[pos] = batch_loss
            pos = (pos + 1) % args.average_loss
        if step % args.display == 0:
            tm = time.time() - start_time
            print("time: ", tm, " mean loss: ", mean_loss)
            start_time = time.time()


def main():
    args = parse_args()
    logger = log.get_logger(args.log)
    args.logger = logger
    logger.info('*' * 80)
    logger.info('the args are the below')
    logger.info('*' * 80)
    for x in args.__dict__:
        logger.info(x + ',' + str(args.__dict__[x]))
    logger.info(cfg.config[args.dataset])
    logger.info('*' * 80)
    # os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if not os.path.exists(args.param_dir):
        os.mkdir(args.param_dir)
    # torch.manual_seed(int(time.time()))  # long
    model = bdcn.BDCN(pretrain=args.pretrain, logger=logger)
    # if args.complete_pretrain:
    #     model.load_state_dict(torch.load(args.complete_pretrain))
    logger.info(model)
    train(model, args)


def parse_args():
    parser = argparse.ArgumentParser(description='Train BDCN for different args')
    parser.add_argument('-d', '--dataset', type=str, choices=cfg.config.keys(),
                        default='bsds500', help='The dataset to train')
    parser.add_argument('--param-dir', type=str, default='params',
                        help='the directory to store the params')
    parser.add_argument('--lr', dest='base_lr', type=float, default=1e-4,
                        help='the base learning rate of model default 1e-6')
    parser.add_argument('-m', '--momentum', type=float, default=0.9,
                        help='the momentum')
    parser.add_argument('-c', '--cuda', action='store_true',
                        help='whether use gpu to train network')
    parser.add_argument('-g', '--gpu', type=str, default='0',
                        help='the gpu id to train net')
    parser.add_argument('--weight-decay', type=float, default=0.0002,
                        help='the weight_decay of net')
    parser.add_argument('-r', '--resume', type=str, default=None,
                        help='whether resume from some, default is None')
    parser.add_argument('-p', '--pretrain', type=str, default=None,
                        help='init net from pretrained model default is None')
    parser.add_argument('--max-iter', type=int, default=50,
                        help='max iters to train network, default is 40000')
    parser.add_argument('--iter-size', type=int, default=10,
                        help='iter size equal to the batch size, default 10')
    parser.add_argument('--average-loss', type=int, default=50,
                        help='smoothed loss, default is 50')
    parser.add_argument('-s', '--snapshots', type=int, default=10,
                        help='how many iters to store the params, default is 1000')
    parser.add_argument('--step-size', type=int, default=10000,
                        help='the number of iters to decrease the learning rate, default is 10000')
    parser.add_argument('--display', type=int, default=20,
                        help='how many iters display one time, default is 20')
    parser.add_argument('-b', '--balance', type=float, default=1.1,
                        help='the parameter to balance the neg and pos, default is 1.1')
    parser.add_argument('-l', '--log', type=str, default='log.txt',
                        help='the file to store log, default is log.txt')
    parser.add_argument('-k', type=int, default=1,
                        help='the k-th split set of multicue')
    parser.add_argument('--batch-size', type=int, default=1,
                        help='batch size of one iteration, default 1')
    parser.add_argument('--crop-size', type=int, default=None,
                        help='the size of image to crop, default not crop')
    parser.add_argument('--yita', type=float, default=None,
                        help='the param to operate gt, default is data in the config file')
    parser.add_argument('--complete-pretrain', type=str, default=None,
                        help='finetune on the complete_pretrain, default None')
    parser.add_argument('--side-weight', type=float, default=0.5,
                        help='the loss weight of sideout, default 0.5')
    parser.add_argument('--fuse-weight', type=float, default=1.1,
                        help='the loss weight of fuse, default 1.1')
    parser.add_argument('--gamma', type=float, default=0.1,
                        help='the decay of learning rate, default 0.1')
    return parser.parse_args()


if __name__ == '__main__':
    main()

评论 (3)

任意 创建了Question

Please assign maintainer to check this issue.
请为此issue分配处理人。
@fangwenyi @chengxiaoli @wuweikang

Please add labels (comp or sig), also you can visit https://gitee.com/mindspore/community/blob/master/sigs/dx/docs/labels.md to find more.
为了让代码尽快被审核,请您为Pull Request打上 组件(comp)或兴趣组(sig) 标签,打上标签的PR可直接推送给责任人进行审核。
更多的标签可以查看https://gitee.com/mindspore/community/blob/master/sigs/dx/docs/labels.md
以组件相关代码提交为例,如果你提交的是data组件代码,你可以这样评论:
//comp/data
当然你也可以邀请data SIG组来审核代码,可以这样写:
//sig/data
另外你还可以给这个PR标记类型,例如是bugfix或者是特性需求:
//kind/bug or //kind/feature
恭喜你,你已经学会了使用命令来打标签,接下来就在下面的评论里打上标签吧!

issue:I6K1N0 已经记录了相同的问题,此issue为重复提问,当前关闭处理,如有问题,可以反馈下具体信息,并将ISSUE状态修改为WIP,我们这边会进一步跟踪,谢谢。

wuweikang 任务状态TODO 修改为REJECTED

登录 后才可以发表评论

状态
负责人
项目
里程碑
Pull Requests
关联的 Pull Requests 被合并后可能会关闭此 issue
分支
开始日期   -   截止日期
-
置顶选项
优先级
预计工期 (小时)
参与者(3)
6573907 hw kk 1584444816
1
https://gitee.com/mindspore/models.git
git@gitee.com:mindspore/models.git
mindspore
models
models

搜索帮助