image_clasification_online.ipynb
image_clasification_online.ipynb from sklearn.datasets import fetch_openml
# Load data
mnist = fetch_openml('mnist_784' , version= 1 )
print(mnist. keys())
x, y = mnist["data" ], mnist["target" ]
dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])
# Check the type of the data
x. shape
(70000, 784)
# Visualize one image
from matplotlib import pyplot as plt
some_image = x. iloc[0 ]. values. reshape(28 , 28 )
plt. imshow(some_image, cmap= "binary" )
<matplotlib.image.AxesImage at 0x7f9128ff98a0>
# Split the data into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.2 , random_state= 42 )
x_train. shape
(56000, 784)
SGD Stochastic Gradient Descent Classifier
# SGD Classifier
from sklearn.linear_model import SGDClassifier
sgd_clf = SGDClassifier(random_state= 129301937 )
# Train the model
sgd_clf. fit(x_train, y_train)
# Try to predict
sgd_clf. predict([x. iloc[0 ]])
# Evaluate the model
from sklearn.model_selection import cross_val_score
cross_val_score(sgd_clf, x_train, y_train, cv= 3 , scoring= "accuracy" )
array([0.88482349, 0.81925323, 0.85122683])
Random Forest Classifier
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
forest_clf = RandomForestClassifier(random_state= 129301937 )
# Train the model
forest_clf. fit(x_train, y_train)
# Try to predict
print("Prediction: " , forest_clf. predict([x. iloc[0 ]]))
print("Actual: " , y. iloc[0 ])
# Evaluate the model on the test set
from sklearn.metrics import accuracy_score
y_pred = forest_clf. predict(x_test)
accuracy_score(y_test, y_pred)
0.9665