image_clasification_online.ipynb

from sklearn.datasets import fetch_openml

# Load data
mnist = fetch_openml('mnist_784', version=1)
print(mnist.keys())
x, y = mnist["data"], mnist["target"]
dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])
# Check the type of the data
x.shape
(70000, 784)
# Visualize one image
from matplotlib import pyplot as plt

some_image = x.iloc[0].values.reshape(28, 28)
plt.imshow(some_image, cmap="binary")
<matplotlib.image.AxesImage at 0x7f9128ff98a0>

png

# Split the data into train and test
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train.shape
(56000, 784)

SGD Stochastic Gradient Descent Classifier

# SGD Classifier
from sklearn.linear_model import SGDClassifier

sgd_clf = SGDClassifier(random_state=129301937)

# Train the model
sgd_clf.fit(x_train, y_train)
# Try to predict
sgd_clf.predict([x.iloc[0]])
# Evaluate the model
from sklearn.model_selection import cross_val_score

cross_val_score(sgd_clf, x_train, y_train, cv=3, scoring="accuracy")
array([0.88482349, 0.81925323, 0.85122683])

Random Forest Classifier

# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

forest_clf = RandomForestClassifier(random_state=129301937)

# Train the model
forest_clf.fit(x_train, y_train)
# Try to predict
print("Prediction: ", forest_clf.predict([x.iloc[0]]))
print("Actual: ", y.iloc[0])
# Evaluate the model on the test set
from sklearn.metrics import accuracy_score

y_pred = forest_clf.predict(x_test)
accuracy_score(y_test, y_pred)
0.9665