我們如何通過AI模擬實現屬于自己的清明上河圖?


來源:CSDN   時間:2019-11-16 15:00:09


作者 | 李秋鍵

責編 | 劉靜

出品 | CSDN(ID:CSDNnews)

我們知道清明上河圖是我國國畫的代表作之一,是中國十大傳世名畫 之一。為北宋風俗畫 ,北宋畫家張擇端 僅見的存世精品,屬國寶級文物 ,現藏于北京故宮博物院。

清明上河圖寬24.8厘米、長528.7厘米 ,絹本設色 。作品以長卷 形式,采用散點透視 構圖法,生動記錄了中國十二世紀北宋 都城東京(又稱汴京 ,今河南開封 )的城市面貌和當時社會各階層人民的生活狀況,是北宋時期都城汴京當年繁榮的見證,也是北宋城市經濟情況的寫照。

這在中國乃至世界繪畫史上都是獨一無二的。在五米多長的畫卷里,共繪了數量龐大的各色人物,牛、騾、驢等牲畜,車、轎、大小船只,房屋、橋梁、城樓 等各有特色,體現了宋代建筑的特征。具有很高的歷史價值和藝術價值。《清明上河圖》雖然場面熱鬧,但表現的并非繁榮市景,而是一幅帶有憂患意識的"盛世危圖",官兵懶散稅務重。

而我們今天的項目就是通過對算法的改造,實現屬于自己的清明上河圖。

下面我們將利用vgg19模型訓練畫作,詳細步驟如下,并且我在每個代碼上面都注釋了方便查看:

首先我們導入先關的庫:

import tensorflow as tf

import numpy as np

import scipy.io

import scipy.misc

import os

import time

接著定義一些變量方便調用:

CONTENT_IMG = "1.png"

STYLE_IMG = "sty.jpg"

OUTPUT_DIR = "neural_style_transfer_tensorflow/"

再創建一個目錄用來保存圖片:

if not os.path.exists(OUTPUT_DIR):

os.mkdir(OUTPUT_DIR)

定義生成圖像的長寬通道等信息:

IMAGE_W = 400

IMAGE_H = 300

COLOR_C = 3

NOISE_RATIO = 0.7

BETA = 5

ALPHA = 100

再接著定義模型路徑

VGG_MODEL = "imagenet-vgg-verydeep-19.mat"

生成一個參數矩陣,作為圖像的處理過程之一,對像素值運算:

MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))

再接著定義讀取模型函數,下面我都有所注解:

def load_vgg_model(path):

"""

Details of the VGG19 model:

- 0 is conv1_1 (3, 3, 3, 64)

- 1 is relu

- 2 is conv1_2 (3, 3, 64, 64)

- 3 is relu

- 4 is maxpool

- 5 is conv2_1 (3, 3, 64, 128)

- 6 is relu

- 7 is conv2_2 (3, 3, 128, 128)

- 8 is relu

- 9 is maxpool

- 10 is conv3_1 (3, 3, 128, 256)

- 11 is relu

- 12 is conv3_2 (3, 3, 256, 256)

- 13 is relu

- 14 is conv3_3 (3, 3, 256, 256)

- 15 is relu

- 16 is conv3_4 (3, 3, 256, 256)

- 17 is relu

- 18 is maxpool

- 19 is conv4_1 (3, 3, 256, 512)

- 20 is relu

- 21 is conv4_2 (3, 3, 512, 512)

- 22 is relu

- 23 is conv4_3 (3, 3, 512, 512)

- 24 is relu

- 25 is conv4_4 (3, 3, 512, 512)

- 26 is relu

- 27 is maxpool

- 28 is conv5_1 (3, 3, 512, 512)

- 29 is relu

- 30 is conv5_2 (3, 3, 512, 512)

- 31 is relu

- 32 is conv5_3 (3, 3, 512, 512)

- 33 is relu

- 34 is conv5_4 (3, 3, 512, 512)

- 35 is relu

- 36 is maxpool

- 37 is fullyconnected (7, 7, 512, 4096)

- 38 is relu

- 39 is fullyconnected (1, 1, 4096, 4096)

- 40 is relu

- 41 is fullyconnected (1, 1, 4096, 1000)

- 42 is softmax

"""

vgg = scipy.io.loadmat(path)

vgg_layers = vgg["layers"]

#加載vgg模型獲取模型各層參數和名稱

def _weights(layer, expected_layer_name):

W = vgg_layers[0][layer][0][0][2][0][0]

b = vgg_layers[0][layer][0][0][2][0][1]

layer_name = vgg_layers[0][layer][0][0][0][0]

assert layer_name == expected_layer_name

return W, b

#將加載的變量初始化成tf可運算的張量類型,函數返回值為激活函數的輸出

def _conv2d_relu(prev_layer, layer, layer_name):

W, b = _weights(layer, layer_name)

W = tf.constant(W)

b = tf.constant(np.reshape(b, (b.size)))

return tf.nn.relu(tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding="SAME") + b)

#定義池化層函數

def _avgpool(prev_layer):

return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

#將各層輸出值都放到列表中方便加載,形成字典

graph = {}

graph["input"]= tf.Variable(np.zeros((1, IMAGE_H, IMAGE_W, COLOR_C)), dtype="float32")

#定義["conv1_1"]為vgg模型的第0層,輸入層為上一層的["input" ]

graph["conv1_1"]= _conv2d_relu(graph["input"], 0, "conv1_1")

graph["conv1_2"]= _conv2d_relu(graph["conv1_1"], 2, "conv1_2")

graph["avgpool1"] = _avgpool(graph["conv1_2"])

graph["conv2_1"]= _conv2d_relu(graph["avgpool1"], 5, "conv2_1")

graph["conv2_2"]= _conv2d_relu(graph["conv2_1"], 7, "conv2_2")

graph["avgpool2"] = _avgpool(graph["conv2_2"])

graph["conv3_1"]= _conv2d_relu(graph["avgpool2"], 10, "conv3_1")

graph["conv3_2"]= _conv2d_relu(graph["conv3_1"], 12, "conv3_2")

graph["conv3_3"]= _conv2d_relu(graph["conv3_2"], 14, "conv3_3")

graph["conv3_4"]= _conv2d_relu(graph["conv3_3"], 16, "conv3_4")

graph["avgpool3"] = _avgpool(graph["conv3_4"])

graph["conv4_1"]= _conv2d_relu(graph["avgpool3"], 19, "conv4_1")

graph["conv4_2"]= _conv2d_relu(graph["conv4_1"], 21, "conv4_2")

graph["conv4_3"]= _conv2d_relu(graph["conv4_2"], 23, "conv4_3")

graph["conv4_4"]= _conv2d_relu(graph["conv4_3"], 25, "conv4_4")

graph["avgpool4"] = _avgpool(graph["conv4_4"])

graph["conv5_1"]= _conv2d_relu(graph["avgpool4"], 28, "conv5_1")

graph["conv5_2"]= _conv2d_relu(graph["conv5_1"], 30, "conv5_2")

graph["conv5_3"]= _conv2d_relu(graph["conv5_2"], 32, "conv5_3")

graph["conv5_4"]= _conv2d_relu(graph["conv5_3"], 34, "conv5_4")

graph["avgpool5"] = _avgpool(graph["conv5_4"])

return graph

為了實現自己的項目效果,設定損失函數:

#定義內容損失函數,變量為tf計算圖和vgg模型參數,返回值為損失值

def content_loss_func(sess, model):

#p就是model["conv4_2"])參數,x是model["conv4_2"])

def _content_loss(p, x):

#p的值為Tensor("Relu_9:0", shape=(1, 75, 100, 512), dtype=float32),故N為512,M為75*100,分別為卷積核個數,卷積核大小的寬*100

N = p.shape[3]

M = p.shape[1] * p.shape[2]

return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))

return _content_loss(sess.run(model["conv4_2"]), model["conv4_2"])

STYLE_LAYERS = [("conv1_1", 0.5), ("conv2_1", 1.0), ("conv3_1", 1.5), ("conv4_1", 3.0), ("conv5_1", 4.0)]

#返回值為_style_loss的值*0.5,1,1.5,4的加和

def style_loss_func(sess, model):

def _gram_matrix(F, N, M):

Ft = tf.reshape(F, (M, N))

return tf.matmul(tf.transpose(Ft), Ft)

#a,x都為"conv1_1", conv2_1", "conv3_1", "conv4_1","conv5_1"中的參數遍歷

def _style_loss(a, x):

#同內容損失函數

N = a.shape[3]

M = a.shape[1] * a.shape[2]

A = _gram_matrix(a, N, M)

G = _gram_matrix(x, N, M)

return (1 / (4 * N ** 2 * M ** 2)) * tf.reduce_sum(tf.pow(G - A, 2))

return sum([_style_loss(sess.run(model[layer_name]), model[layer_name]) * w for layer_name, w in STYLE_LAYERS])

再定義生成圖片,讀取圖片,保存圖片函數:

#產生噪聲圖片

def generate_noise_image(content_image, noise_ratio=NOISE_RATIO):

#隨機產生矩陣圖片,矩陣元素內容符合標準正太分布

noise_image = np.random.uniform(-20, 20, (1, IMAGE_H, IMAGE_W, COLOR_C)).astype("float32")

#將產生的矩陣內各元素與神經網絡加和

input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)

return input_image

#讀取圖片,改變尺寸,變成1行多列矩陣,將矩陣與初始值相減返回

def load_image(path):

image = scipy.misc.imread(path)

image = scipy.misc.imresize(image, (IMAGE_H, IMAGE_W))

#image.shape為[800,600,3],則(1, ) + image.shape)為[1,800,600,3]

image = np.reshape(image, ((1, ) + image.shape))

#MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))

#其中image為三通道矩陣,MEAN_VALUES為三維矩陣可以相減

image = image - MEAN_VALUES

return image

#保存圖片

def save_image(path, image):

image = image + MEAN_VALUES

#參見上面圖像加載時多加了1維,故形成時要減少維度,

image = image[0]

#截取所有數值在0-255之間的,因為像素值必須是這個范圍。而參數運算后可能會超過這個值

image = np.clip(image, 0, 255).astype("uint8")

#保存

scipy.misc.imsave(path, image)

下面是訓練加載:

#啟動計算圖

with tf.Session() as sess:

#讀取圖片,返回值為減去MEAN_VALUES的矩陣,矩陣形狀為[1,800,600,3]

content_image = load_image(CONTENT_IMG)

style_image = load_image(STYLE_IMG)

#加載vgg19模型,返回值為一個字典,里面為各網絡層參數,輸入和輸出

model = load_vgg_model(VGG_MODEL)

#產生噪聲圖片,返回值為隨機矩陣加上網絡層參數的新矩陣

input_image = generate_noise_image(content_image)

#變量初始化

sess.run(tf.global_variables_initializer())

#從網絡層input層開始運算內容圖片矩陣

sess.run(model["input"].assign(content_image))

content_loss = content_loss_func(sess, model)

# 從網絡層input層開始運算內容圖片矩陣

sess.run(model["input"].assign(style_image))

style_loss = style_loss_func(sess, model)

#總損失為內容損失加上風格損失

total_loss = BETA * content_loss + ALPHA * style_loss

#建立優化器以調整參數

optimizer = tf.train.AdamOptimizer(2.0)

#優化器調整參數,使得損失為最小

train = optimizer.minimize(total_loss)

sess.run(tf.global_variables_initializer())

# 從網絡層input層開始運算形成新的圖片

sess.run(model["input"].assign(input_image))

ITERATIONS = 2000

#訓練2000輪

for i in range(ITERATIONS):

sess.run(train)

print("Iteration %d" % i)

print("Cost: ", sess.run(total_loss))

if i % 100 == 0:

#每一百次加載一次網絡參數以保存圖片

output_image = sess.run(model["input"])

print("Iteration %d" % i)

print("Cost: ", sess.run(total_loss))

save_image(os.path.join(OUTPUT_DIR, "output_%d.jpg" % i), output_image)

最終得到的效果如圖所示:

左邊是電視里找的圖片,右邊是模擬的圖片,由此可見生成的效果還是可以的。而這個程序的主要思路就是在一個生成隨機矩陣的基礎上,通過加載網絡層訓練參數,然后生成的矩陣值按比例乘以網絡參數,然后把矩陣保存為圖片即可達到模擬生成的效果。而其中參數的調整是基于深層次網絡提取的圖像特征按公式運算,通過優化器優化參數,通過訓練次數的增加,參數也在逐漸改善,最終形成自己需要的圖片效果。

作者簡介:李秋鍵,CSDN 博客專家,CSDN達人課作者。碩士在讀于中國礦業大學,開發有安卓武俠游戲一部,VIP視頻解析,文意轉換寫作機器人等項目,發表論文若干,多次高數競賽獲獎等等。

聲明:本文為作者原創投稿,未經允許請勿轉載。

【END】

  版權及免責聲明:凡本網所屬版權作品,轉載時須獲得授權并注明來源“環球光伏網”,違者本網將保留追究其相關法律責任的權力。凡轉載文章,不代表本網觀點和立場。

延伸閱讀

最新文章

我國首款阿爾茨海默癥創新藥預計年底上市 我國首款阿爾茨海默癥創新藥預計年底上市

精彩推薦

產業新聞

我們如何通過AI模擬實現屬于自己的清明上河圖? 我們如何通過AI模擬實現屬于自己的清明上河圖?

熱門推薦

开码结果查询开奖生肖