VGG框架

- 一月 30, 2025

Welcome file

import torch  
import torch.nn as nn  
  
from DeepLearning.CNN.AlexNet import running_loss, correct  
from numexpr import is_cpu_amd_intel  
  
  
class VGG16(nn.Module):  
 def __init__(self, num_classes=1000): super(VGG16, self).__init__() self.features = nn.Sequential( # Block1 nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block2 nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block3 nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block4 nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block5 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, num_classes), )  
 def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) x = self.classifier(x) return x

典型示例：CIFAR-100 图像分类

目标：对 100 类物体（鱼、昆虫、交通工具等）进行分类
数据集：CIFAR-100 数据集（包含 50k 训练样本和 10k 测试样本）。

import torchvision  
import torch.optim as optim  
from torchvision import transforms  
  
# 数据预处理（适配VGG输入尺寸）  
transform = transforms.Compose([  
 transforms.Resize(224), # 上采样至224x224  
 transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])  
  
# 加载数据集  
train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)  
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)  
test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)  
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)  
  
# 初始化模型（调整为100类输出）  
model = VGG16(num_classes=100)  
criterion = nn.CrossEntropyLoss()  
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)  
  
# 训练模型（使用GPU）  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
model.to(device)  
  
for epoch in range(30):  
 model.train() running_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')  
# 测试准确率  
model.eval()  
correct = 0  
total = 0  
with torch.no_grad():  
 for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')

特征图可视化：提取中间卷积层输出并可视化：

import matplotlib.pyplot as plt  
  
activation = model.features[0](images[0].unsqueeze(0).to(device))  
plt.imshow(activation[0, 0].detach().cpu().numpy(), cmap='viridis')  
plt.axis('off')  
plt.show()

搜索此博客

forliage's computer life

VGG框架

典型示例：CIFAR-100 图像分类

此博客中的热门博文

Numberical Analysis --- Interpolation & Polynomial Approximation

Compuer Animation(Postgraduate Course): Lecture 4:Keyframe interpolation and velocity control

Compuer Animation(Postgraduate Course): Lecture 3:Representation of transformation and rotation