本文共 8818 字,大约阅读时间需要 29 分钟。
这个和上一篇教程一样,都是简单的图像分类,只是针对的数据集不同。鉴于是入门教程,再仔细写一遍
分享一篇我觉得非常适合新手理解CNN的博文
MNIST数据集(训练集6万张,测试集1万张,每张灰度图28*28*1(单通道),数字0-9对应标签值0-9)如下:
1. 全代码名称展示
因为是最简单的一个手写体数据集,随便跑个几十轮,在测试集上的准确率就到了99%以上
2. 代码
import torchimport torch.nn as nnfrom torchvision.datasets import MNISTfrom torchvision import transformsfrom torch.utils.data import DataLoader,Datasetfrom torch.optim import Adamimport osimport shutil"""torch.nn是专门为神经网络设计的模块化接口。nn构建于autograd之上,可以用来定义和运行神经网络。nn.Module是nn中十分重要的类,包含网络各层的定义及forward方法。定义自已的网络: 需要继承nn.Module类,并实现forward方法。 一般把网络中具有可学习参数的层放在构造函数__init__()中, 不具有可学习参数的层(如ReLU)可放在构造函数中,也可不放在构造函数中(而在forward中使用nn.functional来代替)。 只要在nn.Module的子类中定义了forward函数,backward函数就会被自动实现(利用Autograd)。注:Pytorch基于nn.Module构建的模型中,只支持mini-batch的Variable输入方式, 比如,只有一张输入图片,也需要变成 N x C x H x W 的形式: input_image = torch.FloatTensor(1, 28, 28) input_image = input_image.unsqueeze(0) # 1 x 1 x 28 x 28"""class Unit(nn.Module):#以上解释了这里为什么必须写出继承类nn.Module def __init__(self,inc,ouc): super(Unit,self).__init__() self.unit_net = nn.Sequential(nn.Conv2d(inc,ouc,kernel_size=3,padding=1), nn.BatchNorm2d(ouc), nn.ReLU()) def forward(self, x): return self.unit_net(x)class Net(nn.Module): def __init__(self): super(Net,self).__init__() self.net = nn.Sequential(Unit(1,32),#32*32 Unit(32,32), Unit(32,32), nn.MaxPool2d(2),#16 Unit(32,64), Unit(64,64), Unit(64,64), Unit(64,64), nn.MaxPool2d(2),#8 Unit(64, 128), Unit(128, 128), Unit(128, 128), Unit(128, 128), nn.MaxPool2d(2),#4 Unit(128,128), Unit(128,128), Unit(128,128), nn.AvgPool2d(4)#1 ) self.fc = nn.Linear(128, 10) def forward(self, x): y = self.net(x) y = y.view(-1,128) return self.fc(y)'训练集转换'train_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(),#随机翻转 transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])'train=True加载训练集'train_set = MNIST('./data/',train=True,transform=train_transforms,download=True)train_dataloader = DataLoader(train_set,batch_size=512,shuffle=True)'测试集转换'test_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])'train=False加载测试集'test_set = MNIST('./data/',train=False,transform=test_transforms,download=True)test_dataloader = DataLoader(test_set,batch_size=512,shuffle=False)param_path = r'./param/mnist_cnn.pkl'tmp_param_path = r'./param/mnist_cnn_temp.pkl'CUDA = torch.cuda.is_available()# print(CUDA)module = Net()if CUDA: module.cuda()optimizer = Adam(module.parameters(),lr=0.001,weight_decay=0.0001)loss_f = nn.CrossEntropyLoss()#分类用交叉熵'创建一个学习率调整函数,每30个周期将学习率除以10'def adjust_lr_rate(epoch): lr = 0.001 if epoch>180: lr = lr / 1000000 elif epoch>150: lr = lr / 100000 elif epoch>120: lr = lr / 10000 elif epoch>90: lr = lr / 1000 elif epoch>60: lr = lr / 100 elif epoch>30: lr = lr / 10 for param_group in optimizer.param_groups: param_group['lr'] = lrdef test():#测试集1万张 test_acc = 0 module.eval() for j,(imgs, labels) in enumerate(test_dataloader):#每次处理512张 if CUDA: imgs = imgs.cuda() labels = labels.cuda() outs = module(imgs) #训练求loss是为了做权重更新,测试里不需要 _, prediction = torch.max(outs, 1) test_acc += torch.sum(prediction == labels) test_acc = test_acc.cpu().item() / 10000 return test_accdef train(num_epoch):#训练集6万张 if os.path.exists(param_path): module.load_state_dict(torch.load(param_path)) for epoch in range(num_epoch): train_loss = 0 train_acc = 0 module.train() for i, (imgs, labels) in enumerate(train_dataloader):#每次处理512张 # print('labels:',labels)#每个标签对应一个0-9的数字 if CUDA: imgs = imgs.cuda() labels = labels.cuda() outs = module(imgs) # print(outs.shape) # print('outs:',outs) loss = loss_f(outs, labels) optimizer.zero_grad() loss.backward() optimizer.step() # print('1111',loss) # print('2222',loss.data)#tensor且GPU # print('3333',loss.cpu()) # print('4444',loss.cpu().data)#tensor且CPU # # print('5555',loss.cpu().data[0])#报错 IndexError: invalid index of a 0-dim tensor. Use tensor.item() to convert a 0-dim tensor to a Python number # # print('6666',loss.cpu().numpy())#报错 RuntimeError: Can't call numpy() on Variable that requires grad. Use var.detach().numpy() instead. # print('7777',loss.cpu().detach().numpy()) # print('8888',loss.cpu().data.numpy()) # print('9999',loss.cpu().item()) # print('aaaa',loss.item())#后四者一样,都是把数值取出来 train_loss += loss.cpu().item() * imgs.size(0)#imgs.size(0)批次 '分类问题,常用torch.max(outs,1)得到索引来表示类别' _, prediction = torch.max(outs,1)#prediction对应每行最大值所在位置的索引值,即0-9 train_acc += torch.sum(prediction == labels) # print(train_acc.cpu().item()) adjust_lr_rate(epoch) train_loss = train_loss / 60000 train_acc = train_acc.cpu().item() / 60000 #此处求概率必须用item()把数值取出,否则求出的不是小数 '每训练完一个epoch,用测试集做一遍评估' test_acc = test() best_acc = 0 if test_acc > best_acc: best_acc = test_acc if os.path.exists(tmp_param_path): shutil.copyfile(tmp_param_path, param_path)#防权重损坏 torch.save(module.state_dict(),tmp_param_path) print('Epoch:',epoch,'Train_Loss:',train_loss,'Train_Acc:',train_acc,'Test_Acc:',test_acc)train(1000)
3. 扩展
用如上训练好的模型做推断
import torchimport torch.nn as nnfrom torchvision import transformsfrom torchvision.models import squeezenet1_1from PIL import Imageclass Unit(nn.Module):#以上解释了这里为什么必须写出继承类nn.Module def __init__(self,inc,ouc): super(Unit,self).__init__() self.unit_net = nn.Sequential(nn.Conv2d(inc,ouc,kernel_size=3,padding=1), nn.BatchNorm2d(ouc), nn.ReLU()) def forward(self, x): return self.unit_net(x)class Net(nn.Module): def __init__(self): super(Net,self).__init__() self.net = nn.Sequential(Unit(1,32),#32*32 Unit(32,32), Unit(32,32), nn.MaxPool2d(2),#16 Unit(32,64), Unit(64,64), Unit(64,64), Unit(64,64), nn.MaxPool2d(2),#8 Unit(64, 128), Unit(128, 128), Unit(128, 128), Unit(128, 128), nn.MaxPool2d(2),#4 Unit(128,128), Unit(128,128), Unit(128,128), nn.AvgPool2d(4)#1 ) self.fc = nn.Linear(128, 10) def forward(self, x): y = self.net(x) y = y.view(-1,128) return self.fc(y)CUDA = torch.cuda.is_available()# module = squeezenet1_1(pretrained=True)#torchvision官方内置模型直接调用module = Net()#自己的模型调用module.load_state_dict(torch.load(r'./param/mnist_cnn.pkl'))module.eval()if CUDA: module.cuda()def predict_img(img_path): img = Image.open(img_path) img2data = transforms.Compose([ transforms.Resize(28),#因为这个模型是专门针对32*32的CIFAR10 transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))]) data = img2data(img) '因为PyTorch将所有的图像当做批次,默认只处理四维数据' data = data.unsqueeze_(0) if CUDA: data = data.cuda() out = module(data) # print(out) _,index = torch.max(out,1) return index.item()if __name__ == '__main__': index = predict_img(r'C:\Users\87419\Desktop/00.jpg') print(index)
随便拿一张灰度图测试一下:
注:测试图也需是灰度图
RGB图转灰度图代码
from PIL import ImageI = Image.open(r'C:\Users\87419\Desktop/0.jpg').convert('L')I.save(r'C:\Users\87419\Desktop/00.jpg')#灰度图
转载地址:http://xgpi.baihongyu.com/