keras 在cifar10、cifar100和imagenet上使用mobilnetv2精度不够

rvpgvaaj  于 2023-05-18  发布在  其他
关注(0)|答案(1)|浏览(199)

我一直在做一些实验与mobilenetv 2和使用数据集与cifar 10,cifar 100。
当我使用代码时,它没有给予我80%以上的准确性(使用验证数据集的准确性)
当我实现代码时,由于mobilenetv 2是为imagenet dataset制作的,它的尺寸约为228 x228,我已经通过使用tf.image.resize(96,96)将cifar 10,cifar 100尺寸更改为96 x96。但是当我在谷歌上搜索使用mbv 2和cifar 10,100达到的准确度时,他们通常说准确度达到90%以上。
1.我想知道我是否使用了错误的代码。
1.我想知道谷歌什么时候说他们已经达到了90%的准确率,他们使用的是哪个测试数据集?在CIFAR 10中,他们有50,000个列车组和10,000个测试组。他们正在使用这10,000个测试集,对吗?
1.我没有在代码中进行调优。只有模型结构本身。我是否必须做一些微调以达到更高的精度?如果是这样,我应该多做什么样的调优?
1.我尝试过tf.application.mobilnetv2作为模型。但它在cifar 10中提供了相同的~80%的准确性,在cifar 100中为~55%(两者都调整为96 x96)
我会附上密码以防万一。

#!/usr/bin/env python
# coding: utf-8

# In[1]:

test_list_acc = [0.7975999712944031]
import statistics
print(statistics.mean(test_list_acc))

# In[2]:

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.system("clear")

from tensorflow import keras
from tensorflow import keras
from keras import layers, models, datasets, activations
from keras.layers import Conv2D, Dense, Dropout, Flatten
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import GlobalMaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from matplotlib import pyplot
from keras import datasets

from keras.layers.core import Dense, Dropout, Activation, Flatten
import numpy as np
from keras.utils.np_utils import to_categorical
import matplotlib.pyplot as plt

# In[3]:

from tensorflow.keras.layers import Conv2D, DepthwiseConv2D, ReLU, BatchNormalization, add,Softmax, AveragePooling2D, Dense, Input, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# In[4]:

import tensorflow as tf

# from tensorflow import datasets

print(tf.__version__)
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

# In[5]:

import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
print(gpu_devices)
#tf.config.experimental.set_memory_growth(gpu_devices[0], True)

# In[6]:

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# In[7]:

#import data
(train_images,train_labels),(test_images,test_labels) = keras.datasets.cifar10.load_data()
print("Training Images Shape (x train shape) :", train_images.shape)
print("Label of training images (y train shape) :",train_labels.shape)
print("Test Images Shape (x test shape) :",test_images.shape)
print("Label of test images (y test shape) :",test_labels.shape)

# In[8]:

# np.reshape(train_images, (-1,224,224,3))
# train_images = train_images.reshape(50000,224,224,3)
# available 96,128,160,192
train_images = tf.image.resize(train_images,[96,96])
test_images = tf.image.resize(test_images,[96,96])
print("Training Images Shape (x train shape) :", train_images.shape)
print("Label of training images (y train shape) :",train_labels.shape)
print("Test Images Shape (x test shape) :",test_images.shape)
print("Label of test images (y test shape) :",test_labels.shape)

# In[9]:

train_images, test_images = train_images / 255, test_images / 255

# In[10]:

def expansion_block(x,t,filters,block_id):
    prefix = 'block_{}_'.format(block_id)
    total_filters = t*filters
    x = layers.Conv2D(total_filters,1,padding='same',use_bias=False, name = prefix +'expand')(x)
    x = layers.BatchNormalization(name=prefix +'expand_bn')(x)
    x = layers.ReLU(6,name = prefix +'expand_relu')(x)
    return x

def depthwise_block(x,stride,block_id):
    prefix = 'block_{}_'.format(block_id)
    x = layers.DepthwiseConv2D(3,strides=(stride,stride),padding ='same', use_bias = False, name = prefix + 'depthwise_conv')(x)
    x = layers.BatchNormalization(name=prefix +'dw_bn')(x)
    x = layers.ReLU(6,name=prefix +'dw_relu')(x)
    return x

def projection_block(x,out_channels,block_id):
    prefix = 'block_{}_'.format(block_id)
    x = layers.Conv2D(filters = out_channels,kernel_size = 1,padding='same',use_bias=False,name= prefix + 'compress')(x)
    x = layers.BatchNormalization(name=prefix +'compress_bn')(x)
    return x

# In[11]:

def Bottleneck(x,t,filters, out_channels,stride,block_id):
    y = expansion_block(x,t,filters,block_id)
    y = depthwise_block(y,stride,block_id)
    y = projection_block(y, out_channels,block_id)
    if y.shape[-1]==x.shape[-1]:
        y = layers.add([x,y])
    return y

# In[14]:

def MobileNetV2(input_image = (96,96,3), n_classes=10):
    # input = keras.Input(input_image)
    input = keras.Input(shape = input_image)

    x = layers.Conv2D(32,kernel_size=3,strides=(2,2),padding = 'same', use_bias=False)(input)
    x = layers.BatchNormalization(name='conv1_bn')(x)
    x = layers.ReLU(6, name = 'conv1_relu')(x)

    # 17 Bottlenecks

    x = depthwise_block(x,stride=1,block_id=1)
    x = projection_block(x, out_channels=16,block_id=1)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 24, stride = 2,block_id = 2)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 24, stride = 1,block_id = 3)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 2,block_id = 4)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 1,block_id = 5)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 32, stride = 1,block_id = 6)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 2,block_id = 7)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 8)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 9)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 64, stride = 1,block_id = 10)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 11)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 12)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 96, stride = 1,block_id = 13)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 2,block_id = 14)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 1,block_id = 15)
    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 160, stride = 1,block_id = 16)

    x = Bottleneck(x, t = 6, filters = x.shape[-1], out_channels = 320, stride = 1,block_id = 17)

    #1*1 conv
    x = layers.Conv2D(filters = 1280,kernel_size = 1,padding='same',use_bias=False, name = 'last_conv')(x)
    x = layers.BatchNormalization(name='last_bn')(x)
    x = layers.ReLU(6,name='last_relu')(x)

    #AvgPool 7*7
    x = layers.GlobalAveragePooling2D(name='global_average_pool')(x)

    output = layers.Dense(n_classes,activation='softmax')(x)

    model = Model(input, output)

    return model

# In[15]:

n_classes = 10
input_shape = (96,96,3)

model = MobileNetV2(input_shape,n_classes)
model.summary()

# In[16]:

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="Adam", metrics=["accuracy"])

# In[17]:

#Fit the model
hist= model.fit(train_images, train_labels, batch_size = 256, epochs=100, 
                validation_data = (test_images, test_labels))

# In[18]:

test_loss, test_acc = model.evaluate(test_images, test_labels, batch_size = 256)
print("test loss : ", test_loss)
print("test acc  : ", test_acc)

# In[19]:

#loss curve
plt.figure(figsize=[6,4])
plt.plot(hist.history['loss'], 'black', linewidth=2.0)
plt.plot(hist.history['val_loss'], 'green', linewidth=2.0)

# 어떤 그래프가 어떤 것인지 표시해준다 -> legend
plt.legend(['Training Loss', 'validation Loss'], fontsize=14)
plt.xlabel('Epochs', fontsize = 10)
plt.ylabel('Loss', fontsize=10)
plt.title('Loss Curves', fontsize =12)

# In[20]:

#Accuracy Curve
plt.figure(figsize = [6,4])
plt.plot(hist.history['accuracy'], 'black', linewidth=2.0)
plt.plot(hist.history['val_accuracy'], 'blue', linewidth=2.0)

# 어떤 그래프가 어떤 것인지 표시해준다 -> legend
plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=14)
plt.xlabel('Epochs', fontsize = 10)
plt.ylabel('Accuracy', fontsize=10)
plt.title('Accuracy Curves', fontsize = 12)

# In[21]:

prediction = model.predict(test_images)
prediction
prediction.shape
history_dict = hist.history
print(history_dict.keys())
ig9co6j1

ig9co6j11#

为什么将输入的分辨率更改为96x96,而不是imagenet数据集的分辨率228x228?神经网络通常针对一定大小的输入进行优化,这可能是性能不佳的原因(至少Not getting Proper Accurcacy for cifar10 dadatset with mobilenetv2 on CPU是这样建议的)另一个微调步骤可能是在训练中改变批量大小以获得更好的性能,但如果没有自己测试,我不会打赌这是一个修复。

相关问题