AlexNet框架
import torch
import torch.nn as nn
class AlexNet(nn.Module):
def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2), # 输入通道3(RGB)
nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( nn.Dropout(0.5), # 原始论文使用Dropout
nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), )
def forward(self, x): x = self.features(x) x = torch.flatten(x, 1) x = self.classifier(x) return x
CIFAR-10 图像分类
- 目标:对 10 类物体(飞机、汽车、鸟等)进行分类
- 数据集:CIFAR-10 数据集(包含 50k 训练样本和 10k 测试样本)。
import torchvision
import torch.optim as optim
from torchvision import transforms
import torch
# 数据预处理(适配 CIFAR-10的32x32输入)
transform = transforms.Compose([
transforms.Resize(224), # 将图像上采样到 224x224(适配AlexNet输入)
transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# 加载数据集
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)
# 初始化模型(调整为10类输出)
model = AlexNet(num_classes=10)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
for epoch in range(20):
model.train() running_loss = 0.0 for images, labels in train_loader: images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')
# 测试准确率
model.eval()
correct = 0
total = 0
with torch.no_grad():
for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f'Test Accuracy: {100 * correct / total:.2f}%')
特征可视化:使用 Grad-CAM 展示卷积层的注意力区域。
import matplotlib.pyplot as plt
from torchcam.methods import GradCAM
cam_extractor = GradCAM(model, 'features.10') # 选择最后一个卷积层
with torch.no_grad():
out = model(images) activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)
plt.imshow(activation_map[0].squeeze().cpu().numpy(), cmap='jet')
plt.axis('off')
plt.show()