image_clasification_online.ipynb
from sklearn.datasets import fetch_openml
# Load data
mnist = fetch_openml('mnist_784', version=1)
print(mnist.keys())
x, y = mnist["data"], mnist["target"]dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])
# Check the type of the data
x.shape(70000, 784)
# Visualize one image
from matplotlib import pyplot as plt
some_image = x.iloc[0].values.reshape(28, 28)
plt.imshow(some_image, cmap="binary")<matplotlib.image.AxesImage at 0x7f9128ff98a0>

# Split the data into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
x_train.shape(56000, 784)
SGD Stochastic Gradient Descent Classifier
# SGD Classifier
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state=129301937)
# Train the model
sgd_clf.fit(x_train, y_train)SGDClassifier(random_state=129301937)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SGDClassifier(random_state=129301937)
# Try to predict
sgd_clf.predict([x.iloc[0]])# Evaluate the model
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, x_train, y_train, cv=3, scoring="accuracy")array([0.88482349, 0.81925323, 0.85122683])
Random Forest Classifier
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
forest_clf = RandomForestClassifier(random_state=129301937)
# Train the model
forest_clf.fit(x_train, y_train)RandomForestClassifier(random_state=129301937)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=129301937)
# Try to predict
print("Prediction: ", forest_clf.predict([x.iloc[0]]))
print("Actual: ", y.iloc[0])# Evaluate the model on the test set
from sklearn.metrics import accuracy_score
y_pred = forest_clf.predict(x_test)
accuracy_score(y_test, y_pred)0.9665