手把手教你用DCGAN和WGAN-GP生成逼真人脸:伪造者与鉴别者的博弈实战
生成对抗网络(Generative Adversarial Network, GAN)是深度学习领域中一项革命性的技术,由Ian Goodfellow在2014年提出。它的核心思想如同一个"伪造者"与"鉴别者"之间的博弈:生成器(Generator)不断学习如何生成以假乱真的数据,而判别器(Discriminator)则努力区分真实数据与生成数据。两者在相互竞争中共同进步,最终生成器能够产生高度逼真的样本。
本文将介绍两种主流的GAN变体:DCGAN(Deep Convolutional GAN) 和 WGAN-GP(Wasserstein GAN with Gradient Penalty),并使用它们来生成逼真的人脸图像。我们将使用CelebA数据集,这是一个包含超过20万张名人面部图像的大型数据集,非常适合用于人脸生成任务。
DCGAN:深度卷积生成对抗网络
DCGAN是将卷积神经网络引入GAN架构的重要改进。它使用卷积层替代传统的全连接层,并采用批归一化、LeakyReLU等技术,极大地提升了生成图像的质量和训练的稳定性。
网络结构
在DCGAN中,生成器使用转置卷积(ConvTranspose2d)将随机噪声向量逐步上采样为图像,判别器则使用标准卷积层对图像进行特征提取和真假判别。
代码实现
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
num_epochs = 20
batch_size = 256
learning_rate = 0.0002
betas = (0.5, 0.999)
dataset = datasets.CelebA(
root='./data/celeba',
split='train',
transform=transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]),
download=True,
)
dataloader = DataLoader(dataset , batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 生成随机数据
def generate_random(batch_size, size):
random_data = torch.randn(batch_size, size)
return random_data.to(device)
# 生成器 - 使用转置卷积
class Generator(nn.Module):
def __init__(self):
super().__init__()
# 输入是长度为100的噪声向量
self.input_size = 100
# 起始特征图大小
self.ngf = 64
self.main = nn.Sequential(
# 输入是 Z: [batch, 100, 1, 1]
nn.ConvTranspose2d(self.input_size, self.ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(self.ngf * 8),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*8, 4, 4]
nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf * 4),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*4, 8, 8]
nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf * 2),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*2, 16, 16]
nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf),
nn.ReLU(True),
# 状态尺寸: [batch, ngf, 32, 32]
nn.ConvTranspose2d(self.ngf, 3, 4, 2, 1, bias=False),
nn.Tanh(),
# 最终输出尺寸: [batch, 3, 64, 64]
)
def forward(self, x):
# 将输入重塑为 [batch, 100, 1, 1]
x = x.view(x.size(0), self.input_size, 1, 1)
return self.main(x)
# 判别器 - 使用卷积
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
# 特征图基础大小
self.ndf = 64
self.main = nn.Sequential(
# 输入是 [batch, 3, 64, 64]
nn.Conv2d(3, self.ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf, 32, 32]
nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*2, 16, 16]
nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*4, 8, 8]
nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*8, 4, 4]
nn.Conv2d(self.ndf * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid(),
# 输出尺寸: [batch, 1, 1, 1]
)
def forward(self, x):
return self.main(x).view(-1, 1)
generator = Generator().to(device)
discriminator = Discriminator().to(device)
def weights_init(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1 or classname.find("BatchNorm") != -1:
if hasattr(m, "weight") and m.weight is not None:
nn.init.normal_(m.weight.data, 0.0, 0.02)
if hasattr(m, "bias") and m.bias is not None:
nn.init.constant_(m.bias.data, 0)
# 初始化生成器和判别器
generator.apply(weights_init)
discriminator.apply(weights_init)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(), lr=learning_rate, betas=betas)
optimizer_d = torch.optim.Adam(
discriminator.parameters(), lr=learning_rate, betas=betas
)
# 损失函数
criterion = nn.BCELoss()
# 模型训练
generator.train()
discriminator.train()
loss_G = []
loss_D = []
# 训练
def train():
for epoch in range(num_epochs):
for i, (images, _) in enumerate(dataloader):
# 获取实际的批次大小
current_batch_size = images.size(0)
images = images.to(device)
# 真实数据标签平滑(防止D过强)
real_labels = torch.full((current_batch_size, 1), 0.9, device=device)
fake_labels = torch.full((current_batch_size, 1), 0.1, device=device)
# 训练判别器
optimizer_d.zero_grad()
# 真实数据
real_output = discriminator(images)
d_loss_real = criterion(real_output, real_labels)
# 生成数据
z = generate_random(current_batch_size, 100)
fake_data = generator(z).detach() # 不要计算G的梯度
fake_output = discriminator(fake_data)
d_loss_fake = criterion(fake_output, fake_labels)
d_loss = (d_loss_real + d_loss_fake) / 2
d_loss.backward()
optimizer_d.step()
# 训练生成器
optimizer_g.zero_grad()
z = generate_random(current_batch_size, 100)
fake_data = generator(z)
fake_output = discriminator(fake_data)
g_loss = criterion(fake_output, real_labels)
g_loss.backward()
optimizer_g.step()
# 记录损失
if i % 64 == 0:
loss_G.append(g_loss.item())
loss_D.append(d_loss.item())
if i % 64 == 0:
print(
f"Epoch {epoch + 1}, Step {i + current_batch_size}, loss_G: {loss_G[-1]}, loss_D: {loss_D[-1]}"
)
# 训练结束后保存模型
def save_model():
torch.save(generator.state_dict(), "generator.pth")
torch.save(discriminator.state_dict(), "discriminator.pth")
# 绘制损失曲线
def plot_loss():
plt.figure(figsize=(10, 5))
plt.plot(loss_G, label="Generator Loss")
plt.plot(loss_D, label="Discriminator Loss")
plt.legend()
plt.show()
# 生成图像
def generate_image():
rows, cols = 4, 4
figure = plt.figure(figsize=(8, 8))
# 在生成图像前将生成器设置为评估模式
generator.eval()
for row in range(rows):
for col in range(cols):
z = generate_random(1, 100)
output = generator(z)
# 将图像从 [1, 3, 64, 64] 转换为 [64, 64, 3]
img = output.detach().cpu().squeeze().permute(1, 2, 0).numpy()
# 将图像从 [-1, 1] 范围转换到 [0, 1] 范围
img = (img + 1) / 2
index = row * cols + col
plot = figure.add_subplot(rows, cols, index + 1)
plot.imshow(img)
plot.axis("off")
plt.show()
if __name__ == "__main__":
train()
save_model()
plot_loss()
generate_image()
训练结果分析
损失曲线

从上图可以看出,DCGAN的生成器损失和判别器损失在训练过程中呈现出一定的振荡特性。生成器损失整体呈下降趋势,表明生成图像的质量在逐步提升;判别器损失则保持在相对稳定的范围内,说明判别器能够与生成器保持平衡的对抗状态。
生成的人脸图像

DCGAN生成的16张人脸图像展现了较好的多样性,涵盖了不同的肤色、发型和面部朝向。虽然部分图像存在一些模糊或不够精细的细节,但整体上已经能够让人一眼认出是人脸,展现了GAN在图像生成任务上的强大能力。
WGAN-GP:带梯度惩罚的Wasserstein GAN
WGAN-GP是对原始GAN的重要改进。它通过Wasserstein距离替代JS散度,并引入梯度惩罚(Gradient Penalty)来满足Lipschitz约束,从而解决了传统GAN训练不稳定、容易模式崩溃等问题。
主要改进
- 移除判别器最后一层的Sigmoid:判别器输出的是一个实数值(critic score),而非0-1之间的概率
- 使用Wasserstein损失:没有对数函数,直接使用输出值的均值
- 梯度惩罚:约束判别器梯度的范数,强制满足1-Lipschitz条件
- 更多的判别器训练次数:通常为5:1的比例
代码实现
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
num_epochs = 20
batch_size = 256
learning_rate = 0.0001 # WGAN-GP通常使用较小的学习率
n_critic = 5 # 判别器训练次数
lambda_gp = 10 # 梯度惩罚系数
dataset = datasets.CelebA(
root='./data/celeba',
split='train',
transform=transforms.Compose([
transforms.Resize((64, 64)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]),
download=True,
)
dataloader = DataLoader(dataset , batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 生成随机数据
def generate_random(batch_size, size):
random_data = torch.randn(batch_size, size)
return random_data.to(device)
# 生成器 - 使用转置卷积
class Generator(nn.Module):
def __init__(self):
super().__init__()
# 输入是长度为100的噪声向量
self.input_size = 100
# 起始特征图大小
self.ngf = 64
self.main = nn.Sequential(
# 输入是 Z: [batch, 100, 1, 1]
nn.ConvTranspose2d(self.input_size, self.ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(self.ngf * 8),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*8, 4, 4]
nn.ConvTranspose2d(self.ngf * 8, self.ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf * 4),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*4, 8, 8]
nn.ConvTranspose2d(self.ngf * 4, self.ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf * 2),
nn.ReLU(True),
# 状态尺寸: [batch, ngf*2, 16, 16]
nn.ConvTranspose2d(self.ngf * 2, self.ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ngf),
nn.ReLU(True),
# 状态尺寸: [batch, ngf, 32, 32]
nn.ConvTranspose2d(self.ngf, 3, 4, 2, 1, bias=False),
nn.Tanh(),
# 最终输出尺寸: [batch, 3, 64, 64]
)
def forward(self, x):
# 将输入重塑为 [batch, 100, 1, 1]
x = x.view(x.size(0), self.input_size, 1, 1)
return self.main(x)
# 判别器 - 使用卷积,移除sigmoid层
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
# 特征图基础大小
self.ndf = 64
self.main = nn.Sequential(
# 输入是 [batch, 3, 64, 64]
nn.Conv2d(3, self.ndf, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf, 32, 32]
nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 2),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*2, 16, 16]
nn.Conv2d(self.ndf * 2, self.ndf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 4),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*4, 8, 8]
nn.Conv2d(self.ndf * 4, self.ndf * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(self.ndf * 8),
nn.LeakyReLU(0.2, inplace=True),
# 状态尺寸: [batch, ndf*8, 4, 4]
nn.Conv2d(self.ndf * 8, 1, 4, 1, 0, bias=False),
# 移除了sigmoid层
)
def forward(self, x):
return self.main(x).view(-1, 1)
generator = Generator().to(device)
discriminator = Discriminator().to(device)
def weights_init(m):
classname = m.__class__.__name__
if classname.find("Conv") != -1 or classname.find("BatchNorm") != -1:
if hasattr(m, "weight") and m.weight is not None:
nn.init.normal_(m.weight.data, 0.0, 0.02)
if hasattr(m, "bias") and m.bias is not None:
nn.init.constant_(m.bias.data, 0)
# 初始化生成器和判别器
generator.apply(weights_init)
discriminator.apply(weights_init)
# 优化器
optimizer_g = torch.optim.Adam(generator.parameters(), lr=learning_rate, betas=(0.5, 0.999))
optimizer_d = torch.optim.Adam(
discriminator.parameters(), lr=learning_rate, betas=(0.5, 0.999)
)
# 损失函数
criterion = nn.BCELoss()
# 模型训练
generator.train()
discriminator.train()
loss_G = []
loss_D = []
# 计算梯度惩罚
def compute_gradient_penalty(D, real_samples, fake_samples):
"""计算梯度惩罚"""
# 随机权重
alpha = torch.rand(real_samples.size(0), 1, 1, 1, device=device)
# 获取插值样本
interpolates = (alpha * real_samples + ((1 - alpha) * fake_samples)).requires_grad_(True)
d_interpolates = D(interpolates)
fake = torch.ones(real_samples.size(0), 1, device=device)
# 获取梯度
gradients = torch.autograd.grad(
outputs=d_interpolates,
inputs=interpolates,
grad_outputs=fake,
create_graph=True,
retain_graph=True,
only_inputs=True,
)[0]
gradients = gradients.view(gradients.size(0), -1)
gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean()
return gradient_penalty
# 训练
def train():
for epoch in range(num_epochs):
for i, (images, _) in enumerate(dataloader):
# 获取实际的批次大小
current_batch_size = images.size(0)
images = images.to(device)
# 训练判别器
for _ in range(n_critic):
optimizer_d.zero_grad()
# 真实数据
real_output = discriminator(images)
d_loss_real = -real_output.mean()
# 生成数据
z = generate_random(current_batch_size, 100)
fake_data = generator(z).detach()
fake_output = discriminator(fake_data)
d_loss_fake = fake_output.mean()
# 计算梯度惩罚
gradient_penalty = compute_gradient_penalty(discriminator, images.data, fake_data.data)
# 判别器总损失
d_loss = d_loss_real + d_loss_fake + lambda_gp * gradient_penalty
d_loss.backward()
optimizer_d.step()
# 训练生成器
optimizer_g.zero_grad()
z = generate_random(current_batch_size, 100)
fake_data = generator(z)
fake_output = discriminator(fake_data)
g_loss = -fake_output.mean()
g_loss.backward()
optimizer_g.step()
# 记录损失
if i % 64 == 0:
loss_G.append(g_loss.item())
loss_D.append(d_loss.item())
print(
f"Epoch {epoch + 1}, Step {i + current_batch_size}, loss_G: {loss_G[-1]}, loss_D: {loss_D[-1]}"
)
# 训练结束后保存模型
def save_model():
torch.save(generator.state_dict(), "generator.pth")
torch.save(discriminator.state_dict(), "discriminator.pth")
# 绘制损失曲线
def plot_loss():
plt.figure(figsize=(10, 5))
plt.plot(loss_G, label="Generator Loss")
plt.plot(loss_D, label="Discriminator Loss")
plt.legend()
plt.show()
# 生成图像
def generate_image():
rows, cols = 4, 4
figure = plt.figure(figsize=(8, 8))
# 在生成图像前将生成器设置为评估模式
generator.eval()
for row in range(rows):
for col in range(cols):
z = generate_random(1, 100)
output = generator(z)
# 将图像从 [1, 3, 64, 64] 转换为 [64, 64, 3]
img = output.detach().cpu().squeeze().permute(1, 2, 0).numpy()
# 将图像从 [-1, 1] 范围转换到 [0, 1] 范围
img = (img + 1) / 2
index = row * cols + col
plot = figure.add_subplot(rows, cols, index + 1)
plot.imshow(img)
plot.axis("off")
plt.show()
if __name__ == "__main__":
train()
save_model()
plot_loss()
generate_image()
训练结果分析
损失曲线

WGAN-GP的损失曲线呈现出与传统GAN显著不同的特征。由于Wasserstein损失的特性,损失值可以为负,且曲线更加平滑稳定。生成器损失和判别器损失在训练过程中逐渐收敛并趋于平稳,这反映了WGAN-GP在训练稳定性方面的优势。
生成的人脸图像

WGAN-GP生成的16张人脸图像质量相比DCGAN有显著提升。图像更加清晰,面部特征更加完整和自然,伪影明显减少。从皮肤质感、光影效果到五官细节,WGAN-GP都展现出了更好的生成能力,这证明了梯度惩罚和Wasserstein距离在提升生成质量方面的有效性。
DCGAN vs WGAN-GP:对比总结
| 特性 | DCGAN | WGAN-GP |
|---|---|---|
| 损失函数 | 二元交叉熵(BCE) | Wasserstein损失 |
| 判别器输出 | Sigmoid概率 | 实数值(critic score) |
| 训练稳定性 | 中等,可能振荡 | 较高,收敛平滑 |
| 模式崩溃风险 | 存在 | 显著降低 |
| 超参数敏感性 | 较高 | 较低 |
| 生成质量 | 良好 | 更优 |
| 训练速度 | 较快 | 稍慢(需多次更新判别器) |
实践建议
- 数据预处理至关重要:将图像归一化到[-1, 1]范围配合生成器的Tanh激活函数是标准做法
- 标签平滑:在DCGAN中使用0.9和0.1而非1和0作为标签,可以有效防止判别器过强
- 学习率选择:GAN对学习率非常敏感,建议使用较小的学习率(如0.0002)
- 监控损失:生成器损失持续上升而判别器损失趋近于0,通常表示模式崩溃
- 梯度惩罚:在WGAN-GP中,λ=10是经过验证的有效参数
总结
本文通过DCGAN和WGAN-GP两种架构,展示了如何使用生成对抗网络生成逼真的人脸图像。从实验结果可以看出,WGAN-GP在训练稳定性和生成质量上都优于原始DCGAN。GAN作为生成式模型的重要代表,在图像生成、风格迁移、数据增强等领域有着广阔的应用前景。
希望本教程能够帮助读者理解GAN的基本原理和实现方法。完整代码已在上文提供,读者可以根据自己的需求调整网络结构和超参数,尝试生成不同风格和类型的人脸图像。