VGG框架

Welcome file
import torch  
import torch.nn as nn  
  
from DeepLearning.CNN.AlexNet import running_loss, correct  
from numexpr import is_cpu_amd_intel  
  
  
class VGG16(nn.Module):  
 def __init__(self, num_classes=1000): super(VGG16, self).__init__() self.features = nn.Sequential( # Block1 nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block2 nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block3 nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block4 nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block5 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, num_classes), )  
 def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) x = self.classifier(x) return x  

典型示例:CIFAR-100 图像分类

  • 目标:对 100 类物体(鱼、昆虫、交通工具等)进行分类
  • 数据集:CIFAR-100 数据集(包含 50k 训练样本和 10k 测试样本)。
import torchvision  
import torch.optim as optim  
from torchvision import transforms  
  
# 数据预处理(适配VGG输入尺寸)  
transform = transforms.Compose([  
 transforms.Resize(224), # 上采样至224x224  
 transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])  
  
# 加载数据集  
train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)  
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)  
test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)  
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)  
  
# 初始化模型(调整为100类输出)  
model = VGG16(num_classes=100)  
criterion = nn.CrossEntropyLoss()  
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  
  
# 训练模型(使用GPU)  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
model.to(device)  
  
for epoch in range(30):  
 model.train() running_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')  
# 测试准确率  
model.eval()  
correct = 0  
total = 0  
with torch.no_grad():  
 for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')  

特征图可视化:提取中间卷积层输出并可视化:

import matplotlib.pyplot as plt  
  
activation = model.features[0](images[0].unsqueeze(0).to(device))  
plt.imshow(activation[0, 0].detach().cpu().numpy(), cmap='viridis')  
plt.axis('off')  
plt.show()  

此博客中的热门博文

Numberical Analysis --- Interpolation & Polynomial Approximation

Compuer Animation(Postgraduate Course): Lecture 4:Keyframe interpolation and velocity control

Compuer Animation(Postgraduate Course): Lecture 3:Representation of transformation and rotation