Skip to content

image_classification_local.ipynb

# Load data locally with structure:
# directory
#  - class1
#    - image1
#    - image2
#    - ...
#  - class2
#    - image1
#    - image2
#    - ...
import os
import pandas as pd

# Load data
data_dir = 'sample'
classes = os.listdir(data_dir)
image_filenames = []
y = []
for c in classes:
    for f in os.listdir(os.path.join(data_dir, c)):
        image_filenames.append(os.path.join(data_dir, c, f))
        y.append(c)

# Load images
from PIL import Image
import numpy as np

X = []
for f in image_filenames:
    im = Image.open(f)
    
    # Resize to 64x64
    im = im.resize((64, 64))

    # Convert to numpy array
    some_x = np.array(im)

    # Reshape to one row
    some_x = some_x.reshape(-1)
    X.append(some_x)

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

X.shape, y.shape
((335, 12288), (335,))
# Look at some images
import matplotlib.pyplot as plt

some_image = X[8].reshape(64, 64, 3)
print(y[8])
print(some_image.shape)
plt.imshow(some_image)
parcel
(64, 64, 3)





<matplotlib.image.AxesImage at 0x7fdbb1b0ceb0>

png

# Split data into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=918379)

SGD Stochastic Gradient Descent Classifier

# SGD Classifier
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(random_state=101208)
sgd_clf.fit(x_train, y_train)
SGDClassifier(random_state=101208)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Predict some images
some_image = x_train[8].reshape(64, 64, 3)
print("Predicted: ", sgd_clf.predict([x_train[8]]))
print("Truth: ", y_train[8])
plt.imshow(some_image)
Predicted:  ['parcel']
Truth:  parcel





<matplotlib.image.AxesImage at 0x7fdbb1b88cd0>

png

# Evaluate with cross validation
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, x_train, y_train, cv=3, scoring="accuracy")
array([0.65555556, 0.6741573 , 0.79775281])
# Evaluate model on test set
from sklearn.metrics import accuracy_score

y_pred = sgd_clf.predict(x_test)
accuracy_score(y_test, y_pred)
0.8656716417910447

Random Forest Classifier

# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(random_state=101208)
rf_clf.fit(x_train, y_train)
RandomForestClassifier(random_state=101208)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
# Evaluate on test set
y_pred = rf_clf.predict(x_test)
accuracy_score(y_test, y_pred)
0.9402985074626866