VGG框架
import torch
import torch.nn as nn
from DeepLearning.CNN.AlexNet import running_loss, correct
from numexpr import is_cpu_amd_intel
class VGG16(nn.Module):
def __init__(self, num_classes=1000): super(VGG16, self).__init__() self.features = nn.Sequential( # Block1 nn.Conv2d(3, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block2 nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block3 nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block4 nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), # Block5 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, kernel_size=3, stride=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2), ) self.classifier = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, num_classes), )
def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) x = self.classifier(x) return x
典型示例:CIFAR-100 图像分类
- 目标:对 100 类物体(鱼、昆虫、交通工具等)进行分类
- 数据集:CIFAR-100 数据集(包含 50k 训练样本和 10k 测试样本)。
import torchvision
import torch.optim as optim
from torchvision import transforms
# 数据预处理(适配VGG输入尺寸)
transform = transforms.Compose([
transforms.Resize(224), # 上采样至224x224
transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# 加载数据集
train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)
# 初始化模型(调整为100类输出)
model = VGG16(num_classes=100)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# 训练模型(使用GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for epoch in range(30):
model.train() running_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')
# 测试准确率
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')
特征图可视化:提取中间卷积层输出并可视化:
import matplotlib.pyplot as plt
activation = model.features[0](images[0].unsqueeze(0).to(device))
plt.imshow(activation[0, 0].detach().cpu().numpy(), cmap='viridis')
plt.axis('off')
plt.show()