Python ConvNet 图像分类器 - 为二值图像分类拟合模型时出现“ValueError”

我对深度学习和 TensorFlow/Keras 非常陌生,因此在尝试拟合模型以将图像分类为“狗”或“猫”时,我无法理解为什么会抛出错误。 (图像数据库可以在这里找到: https://www.microsoft.com/en-us/download/details.aspx?id=54765 )。该模型是在我学习并遵循 YouTube 教程 ( https://www.youtube.com/watch?v=WvoLTXIjBYU ) 时在单独的模块中编写、保存和打开的。第一个代码块涉及创建和保存模型(使用 pickle),第二个代码块是训练实际卷积网络的部分。

下载图像数据库,保存到文件目录,并编写模型来训练分类器。代码如下:

import numpy as np
import matplotlib.pyplot as plt
import os
import cv2

DATADIR = "Pictureskagglecatsanddogs_3367aPetImages" 
#Workspace directory changed for posting
CATEGORIES = ["Dog", "Cat"]

#Iterate between all photos of dogs and cats
for category in CATEGORIES:
    path = os.path.join(DATADIR, category) #path to cats or dogs dir
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE) #Converts to grayscale, does not need color in this specific instance)
        plt.imshow(img_array, cmap = "gray")
        break
    break

#Print image dimensions
print(img_array.shape)

#All the images are different-shaped photos, so they must be normalized
#Everything must be made the same shape
#Decide on the image size you want to go with
IMG_SIZE = 180
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))

training_data = []

def create_training_data(): #With goal of iterating through everything and building the dataset
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category) #path to cats or dogs dir
        class_num = CATEGORIES.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE) #Converts to grayscale, does not need color in this specific instance)
                new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                training_data.append([new_array, class_num])
            except Exception as e:
                pass

create_training_data()

print(len(training_data))

#Shuffle the data
import random
random.shuffle(training_data)

for sample in training_data[:10]:
    print(sample[1])

#Packs data into variables we will use
x = []
y = []

for features, label in training_data:
    x.append(features)
    y.append(label)
x = np.array(x).reshape(-1, IMG_SIZE, IMG_SIZE, 1)

#Model was saved with pickle
import pickle
pickle_out = open("x.pickle", "wb")
pickle.dump(x, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()

然后在另一个 Jupyter Notebook 文件中打开代码并用于构建 CNN:

#Import necessary packages
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle

#Load models generated in previous tutorial
x = pickle.load(open("x.pickle", "rb"))
y = pickle.load(open("y.pickle", "rb"))

#Normalize the data
#255 is used due to RGB imagery
x = x/255

#Model building: First layer
model = Sequential()
#Convolutional network
model.add(Conv2D(64, (3,3), input_shape = x.shape[1:]))
model.add(Activation("relu"))
#Pooling
model.add(MaxPooling2D(pool_size = (2,2)))

#Model building: Second layer
#Convolutional network
model.add(Conv2D(64, (3,3), input_shape = x.shape[1:]))
model.add(Activation("relu"))
#Pooling
model.add(MaxPooling2D(pool_size = (2,2)))

#Final output layer
model.add(Flatten())
model.add(Dense(64))

model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss= "binary_crossentropy",
             optimizer = "adam",
             metrics = ['accuracy'])

model.fit(x, y, batch_size = 32, epochs = 3, validation_split = 0.1)

然后抛出异常:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-6-bb5f154147cd> in <module>
     39              metrics = ['accuracy'])
     40 
---> 41 model.fit(x, y, batch_size = 32, epochs = 3, validation_split = 0.1)

~AppDataRoamingPythonPython36site-packagestensorflowpythonkerasenginetraining.py in _method_wrapper(self, *args, **kwargs)
    106   def _method_wrapper(self, *args, **kwargs):
    107     if not self._in_multi_worker_mode():  # pylint: disable=protected-access
--> 108       return method(self, *args, **kwargs)
    109 
    110     # Running inside `run_distribute_coordinator` already.

~AppDataRoamingPythonPython36site-packagestensorflowpythonkerasenginetraining.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1038       (x, y, sample_weight), validation_data = (
   1039           data_adapter.train_validation_split(
-> 1040               (x, y, sample_weight), validation_split=validation_split))
   1041 
   1042     if validation_data:

~AppDataRoamingPythonPython36site-packagestensorflowpythonkerasenginedata_adapter.py in train_validation_split(arrays, validation_split)
   1374     raise ValueError(
   1375         "`validation_split` is only supported for Tensors or NumPy "
-> 1376         "arrays, found following types in the input: {}".format(unsplitable))
   1377 
   1378   if all(t is None for t in flat_arrays):

ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>, <class 'int'>

如何解决此错误?似乎错误出现在 model.fit(x, y, batch_size = 32, epochs = 3, validation_split = 0.1) 行中,因为当我在没有此行的情况下运行代码时,不会引发异常。 谢谢!

stack overflow Python ConvNet Image Classifier - "ValueError" when fitting a model for a binary image classification
原文答案
author avatar

接受的答案

You should do transformation proccess for numpy array of y, not just X.

X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)

print(X[0].reshape(-1, IMG_SIZE, IMG_SIZE, 1))

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y = np.array(y)

答案:

作者头像
import numpy as np 
X = np.array(X).reshape(-1,IMG_SIZE,IMG_SIZE,1)  
y = np.array(y) 

import tensorflow as tf 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D


import pickle  

. . .

and continue. I have also faced the same problem but it worked after loading the data into NumPy array as I have mentioned by adding an extra row defining X and y.

作者头像

The model is expecting that the input is in the form of a numpy array. What it received is a list of integers. You will have to transform the loaded data into numpy arrays and then pass them into the model

作者头像

Just add

 y = np.array(y) 

In your last program after

 #Load models generated in previous tutorial
 x = pickle.load(open("x.pickle", "rb"))
 y = pickle.load(open("y.pickle", "rb"))

 y = np.array(y)