from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import IPython
import spectral
import plotly.express as px


# Read the data (found in .mat format):

# Image scene of Pavia City:
mat_data = loadmat('Pavia.mat')

# Ground truth file:
gt_mat_data = loadmat('Pavia_gt.mat')


mat_data

# We see some meta data held within the first few keys

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri May 20 18:20:56 2011',
 '__version__': '1.0',
 '__globals__': [],
 'pavia': array([[[ 854,  601,  350, ..., 3664, 3636, 3643],
         [ 527,  642,  575, ..., 3834, 3725, 3768],
         [ 374,  322,  179, ..., 4318, 4311, 4321],
         ...,
         [ 367,  432,  461, ..., 2582, 2504, 2512],
         [ 261,  311,  366, ..., 2269, 2174, 2163],
         [1059,  678,  403, ..., 2245, 2135, 2136]],
 
        [[1060,  909,  596, ..., 2963, 2967, 2974],
         [ 707,  757,  646, ..., 3508, 3534, 3648],
         [ 143,  419,  417, ..., 4650, 4612, 4638],
         ...,
         [ 465,  547,  537, ..., 3156, 3052, 3035],
         [ 884,  615,  401, ..., 2792, 2667, 2639],
         [ 756,  401,  213, ..., 2600, 2484, 2445]],
 
        [[ 532,  545,  594, ..., 1675, 1653, 1680],
         [ 523,  491,  321, ..., 3339, 3349, 3403],
         [ 816,  681,  369, ..., 4627, 4600, 4650],
         ...,
         [ 408,  539,  436, ..., 3099, 3005, 3006],
         [ 393,  447,  476, ..., 3172, 3048, 3032],
         [ 798,  615,  489, ..., 3039, 2876, 2800]],
 
        ...,
 
        [[ 689,  560,  701, ..., 1314, 1265, 1271],
         [ 497,  785, 1029, ..., 1226, 1237, 1255],
         [ 947,  634,  587, ..., 1260, 1232, 1252],
         ...,
         [ 812,  483,  220, ..., 1791, 1699, 1641],
         [ 840,  538,  494, ..., 1506, 1456, 1411],
         [ 187,  305,  343, ..., 1512, 1415, 1399]],
 
        [[ 895,  586,  635, ..., 1302, 1281, 1290],
         [ 211,  373,  733, ..., 1270, 1248, 1244],
         [ 971,  881,  701, ..., 1261, 1272, 1302],
         ...,
         [ 802,  635,  481, ..., 2447, 2365, 2342],
         [ 328,  656,  680, ..., 2234, 2137, 2140],
         [ 897,  677,  411, ..., 2107, 1961, 1925]],
 
        [[ 610,  673,  592, ..., 1331, 1320, 1301],
         [ 961,  642,  491, ..., 1348, 1333, 1300],
         [ 443,  641,  779, ..., 1250, 1241, 1274],
         ...,
         [ 592,  589,  659, ..., 2688, 2584, 2541],
         [ 406,  416,  395, ..., 2849, 2714, 2707],
         [ 631,  405,  473, ..., 2810, 2688, 2672]]], dtype=uint16)}


# Limit to only the array we need containing the pixel data:
data = mat_data['pavia']

data

array([[[ 854,  601,  350, ..., 3664, 3636, 3643],
        [ 527,  642,  575, ..., 3834, 3725, 3768],
        [ 374,  322,  179, ..., 4318, 4311, 4321],
        ...,
        [ 367,  432,  461, ..., 2582, 2504, 2512],
        [ 261,  311,  366, ..., 2269, 2174, 2163],
        [1059,  678,  403, ..., 2245, 2135, 2136]],

       [[1060,  909,  596, ..., 2963, 2967, 2974],
        [ 707,  757,  646, ..., 3508, 3534, 3648],
        [ 143,  419,  417, ..., 4650, 4612, 4638],
        ...,
        [ 465,  547,  537, ..., 3156, 3052, 3035],
        [ 884,  615,  401, ..., 2792, 2667, 2639],
        [ 756,  401,  213, ..., 2600, 2484, 2445]],

       [[ 532,  545,  594, ..., 1675, 1653, 1680],
        [ 523,  491,  321, ..., 3339, 3349, 3403],
        [ 816,  681,  369, ..., 4627, 4600, 4650],
        ...,
        [ 408,  539,  436, ..., 3099, 3005, 3006],
        [ 393,  447,  476, ..., 3172, 3048, 3032],
        [ 798,  615,  489, ..., 3039, 2876, 2800]],

       ...,

       [[ 689,  560,  701, ..., 1314, 1265, 1271],
        [ 497,  785, 1029, ..., 1226, 1237, 1255],
        [ 947,  634,  587, ..., 1260, 1232, 1252],
        ...,
        [ 812,  483,  220, ..., 1791, 1699, 1641],
        [ 840,  538,  494, ..., 1506, 1456, 1411],
        [ 187,  305,  343, ..., 1512, 1415, 1399]],

       [[ 895,  586,  635, ..., 1302, 1281, 1290],
        [ 211,  373,  733, ..., 1270, 1248, 1244],
        [ 971,  881,  701, ..., 1261, 1272, 1302],
        ...,
        [ 802,  635,  481, ..., 2447, 2365, 2342],
        [ 328,  656,  680, ..., 2234, 2137, 2140],
        [ 897,  677,  411, ..., 2107, 1961, 1925]],

       [[ 610,  673,  592, ..., 1331, 1320, 1301],
        [ 961,  642,  491, ..., 1348, 1333, 1300],
        [ 443,  641,  779, ..., 1250, 1241, 1274],
        ...,
        [ 592,  589,  659, ..., 2688, 2584, 2541],
        [ 406,  416,  395, ..., 2849, 2714, 2707],
        [ 631,  405,  473, ..., 2810, 2688, 2672]]], dtype=uint16)


gt_mat_data

# Here is our 'ground truth' dataset

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Fri May 20 18:26:22 2011',
 '__version__': '1.0',
 '__globals__': [],
 'pavia_gt': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}


# Limit to only the array we need as we did with the data for the scene:
gt_data = gt_mat_data['pavia_gt']

gt_data

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)


print('Image shape: ', data.shape, '\nGround truth shape: ', gt_data.shape)

Image shape:  (1096, 715, 102) 
Ground truth shape:  (1096, 715)


# Our spectral image is 3 dimensions:
# 1096 pixels * 715 pixels
# 102 spectral bands

# In a hyperspectral image, each pixel is a vector of values representing the emmission or reflectance in each
# spectral band


fig = plt.figure(figsize = (10,5))

print('\nThe image represented with 12 random bands. Note that the shown color is merely for representation of band intensity')

for i in range (1,13): # we want 12 images
    fig.add_subplot(3,4,i)
    band = np.random.randint(data.shape[2]) # choose a random band from among the 102 available
    plt.imshow(data[:,:,band],cmap='terrain')
    plt.title(f'Band # {band}')
    plt.axis('off')

The image represented with 12 random bands. Note that the shown color is merely for representation of band intensity


# Use ploty express for being able to put a cursor on the ground truth image for easier class identification:
import plotly.express as px

ground_truth = px.imshow(gt_data, color_continuous_scale='jet')
ground_truth.show()


# A visualization of the scene with bands assigned to R,G,B
import spectral
spectral.imshow(data,(39,23,7))
plt.show()


# Take the 2D ground truth data, which is a 2D array for each row of pixels:
gt_data.shape

(1096, 715)


# And flatten to one dimension, with a 1D array for all the pixels (1096x715=783640)
gt_data_1d = gt_data.ravel()

gt_data_1d.shape

(783640,)


# Data is a 3D array:
data.shape

(1096, 715, 102)


# The band data is the third dimension, and it is of length 102:
data.shape[2]

102


# Reshape into a 2D array (to get rows of pixels and columns representing the 102 bands)

# The first dimension will be 1096x715=783640, but putting '-1' will make numpy infer this automatically
# The second dimension is 102, i.e. data.shape[2]

data_2d = data.reshape(-1, data.shape[2])

data_2d.shape

(783640, 102)


range(1,data.shape[2])

range(1, 102)


# combine both the 102 column band data and the single column class data together in a df:

df_data = pd.DataFrame(data_2d)
df_gt = pd.DataFrame(gt_data_1d)

df = pd.concat([df_data, df_gt], axis=1)
df.columns = [f'band{i}' for i in range(1, 1+data.shape[2])] + ['gt_class']

df.head(20)


# Examples where the class (final column) is not 0 and thus has a label (see last column on the right):

df.loc[df['gt_class']!=0,:]


df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 783640 entries, 0 to 783639
Columns: 103 entries, band1 to gt_class
dtypes: uint16(102), uint8(1)
memory usage: 153.2 MB


# Stats for all bands:

df.drop('gt_class',axis=1).describe()


df['gt_class'].value_counts()

0    635488
1     65971
8     42826
6      9248
2      7598
7      7287
5      6584
3      3090
9      2863
4      2685
Name: gt_class, dtype: int64


# drop zeros for the purpose of viewing summary stats:

df = df.loc[df['gt_class']!=0]

df['gt_class'].value_counts()

1    65971
8    42826
6     9248
2     7598
7     7287
5     6584
3     3090
9     2863
4     2685
Name: gt_class, dtype: int64


# Use PCA to reduce dimensionality of band data to 3 dimensions to visualize a pair plot better in a way that we
# can interpret since 102 dimensions cannot be realistically visualized:

from sklearn.decomposition import PCA

pca = PCA(n_components = 3)
band_data_reduced = pca.fit_transform(df.drop('gt_class', axis=1).values)
band_data_reduced = pd.DataFrame(band_data_reduced)
band_data_reduced['gt_class'] = df['gt_class'].values
band_data_reduced.columns = ['DIM1','DIM2','DIM3','gt_class']


class_key = {
1: 'water',
2: 'trees',
3: 'meadows',
4: 'self-blocking bricks',
5: 'bare soil',
6: 'asphalt',
7: 'bitumen',
8: 'tiles',
9: 'shadows',
}


band_data_reduced['labels'] = band_data_reduced['gt_class'].apply(lambda x: class_key[x])

band_data_reduced


plt.figure(figsize=(12,6))
sns.histplot(band_data_reduced['labels']);


sns.pairplot(data=band_data_reduced, x_vars=['DIM1','DIM2','DIM3'],hue='labels')

# This will take a while to load:

<seaborn.axisgrid.PairGrid at 0x7f94de19e4f0>


# Now instead of picking 3 dimensions (which we chose out of convenience for visualization),
# let's find the optimum number of dimensions to explain variance before applying our models:

# Let's take an arbitrary number less than 102 bands (we can adjust):
pca = PCA(n_components = 40)
p_components = pca.fit_transform(df.drop('gt_class',axis=1))

# Pull the explained variance ratio, which explains how much variation can be attributed to each principle component
evr = pca.explained_variance_ratio_

# calculate cumulative sum to visualize at what point most variance is explained
evr_cumulative = np.cumsum(evr)

plt.xlabel('Components')
plt.ylabel('Explained variance')

plt.plot(evr_cumulative)

[<matplotlib.lines.Line2D at 0x7f9494072ee0>]


# Let's reduce to fewer principle components to narrow down on the best number of principle components:

pca = PCA(n_components = 5)
p_components = pca.fit_transform(df.drop('gt_class',axis=1))

# Pull the explained variance ratio, which explains how much variation can be attributed to each principle component
evr = pca.explained_variance_ratio_

# calculate cumulative sum to visualize at what point most variance is explained
evr_cumulative = np.cumsum(evr)

plt.xlabel('Components')
plt.ylabel('Explained variance')

plt.plot(evr_cumulative)

[<matplotlib.lines.Line2D at 0x7f9496786160>]


# It looks like 3 was in fact a great number of components for us so we'll leave it at that.

# We still need to recreate our df and df_gt dataframes because we removed the zeros earlier for visualizing
# correlations but need them now for displaying the image with all of the requisite pixels as below:

df_data = pd.DataFrame(data_2d)
df_gt = pd.DataFrame(gt_data_1d)
df = pd.concat([df_data, df_gt], axis=1)
df.columns = [f'band{i}' for i in range(1, 1+data.shape[2])] + ['gt_class']

# Reduce to three dimensions

pca = PCA(n_components = 3)
band_data_reduced = pca.fit_transform(df.drop('gt_class', axis=1).values)
band_data_reduced = pd.DataFrame(band_data_reduced)
band_data_reduced['gt_class'] = df['gt_class'].values
band_data_reduced.columns = ['DIM1','DIM2','DIM3','gt_class']

# Visualize the image with the three bands after applying PCA:
fig = plt.figure(figsize=(12,5))

for i in range(1, 1+3):
    fig.add_subplot(1,3,i)
    plt.imshow(band_data_reduced.loc[:, f'DIM{i}'].values.reshape(1096, 715), cmap='nipy_spectral')
    plt.axis('off')
    plt.title(f'Band - {i}')


band_data_reduced


from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


# drop the unclassified 0s and create our X and y

X = band_data_reduced.loc[band_data_reduced['gt_class']!=0].drop('gt_class', axis=1).values

y = band_data_reduced.loc[band_data_reduced['gt_class']!=0]['gt_class'].values


# Because of the inbalance in classes, we can use 'stratify' to maintain the class proportions in train and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=786, stratify=y)


X_train.shape

# Number of training pixels:

(118521, 3)


# Set our regularization parameter to 100. A higher C allows for less misclassification potentially risks overfitting
# After experimenting with a couple different values for C, 100 appears to give us close to best results with SVM
svm = SVC(C = 100, kernel = 'rbf', verbose=False)


svm.fit(X_train,y_train)

SVC(C=100)


svm_predictions = svm.predict(X_test)


svm_predictions

array([6, 8, 8, ..., 2, 8, 8], dtype=uint8)


print(confusion_matrix(y_test,svm_predictions))

[[13193     0     0     0     0     1     0     0     0]
 [    0  1380   140     0     0     0     0     0     0]
 [    0    93   520     0     5     0     0     0     0]
 [    0     0     0   408    40     3    75    11     0]
 [    0     0     5    51  1253     0     3     5     0]
 [    2     0     0     6     0  1747    68    27     0]
 [    0     0     0    32     6   123  1289     7     0]
 [    0     0     0    15    15    28     5  8502     0]
 [    0     0     0     0     0     0     0     0   573]]


# Let's visualize the above better:

labels = ['Water','Trees','Meadows','Self Blocking Bricks','Bare Soil','Asphalt','Bitumen','Tiles','Shadows']

conf_matrix = confusion_matrix(y_test, svm_predictions)
conf_matrix_df = pd.DataFrame(conf_matrix, columns=labels, index = labels)
conf_matrix_df.index.name = 'Actual'
conf_matrix_df.columns.name = 'Predicted'
plt.figure(figsize = (10,10))
sns.set(font_scale=1.2)
sns.heatmap(conf_matrix_df, cmap="Reds", annot=True,annot_kws={"size": 16}, fmt='d')

<AxesSubplot:xlabel='Predicted', ylabel='Actual'>


print(classification_report(y_test, svm_predictions, target_names = labels))

                      precision    recall  f1-score   support

               Water       1.00      1.00      1.00     13194
               Trees       0.94      0.91      0.92      1520
             Meadows       0.78      0.84      0.81       618
Self Blocking Bricks       0.80      0.76      0.78       537
           Bare Soil       0.95      0.95      0.95      1317
             Asphalt       0.92      0.94      0.93      1850
             Bitumen       0.90      0.88      0.89      1457
               Tiles       0.99      0.99      0.99      8565
             Shadows       1.00      1.00      1.00       573

            accuracy                           0.97     29631
           macro avg       0.92      0.92      0.92     29631
        weighted avg       0.97      0.97      0.97     29631


from sklearn.ensemble import RandomForestClassifier


# I chose only n_estimators (number of trees) and criterion for my parameters, but more can be added
# Through some trial and error, 150 gave us good results (additional hyperparameter tuning methods can be
# used for better results, in theory)

random_forest = RandomForestClassifier(n_estimators = 150, criterion = 'entropy', random_state = 42)


random_forest.fit(X_train,y_train)

RandomForestClassifier(criterion='entropy', n_estimators=150, random_state=42)


rf_predictions = random_forest.predict(X_test)

rf_predictions

array([6, 8, 8, ..., 2, 8, 8], dtype=uint8)


# Let's visualize the above better:

labels = ['Water','Trees','Meadows','Self Blocking Bricks','Bare Soil','Asphalt','Bitumen','Tiles','Shadows']

rf_conf_matrix = confusion_matrix(y_test, rf_predictions)
rf_conf_matrix_df = pd.DataFrame(rf_conf_matrix, columns=labels, index = labels)
rf_conf_matrix_df.index.name = 'Actual'
rf_conf_matrix_df.columns.name = 'Predicted'
plt.figure(figsize = (10,10))
sns.set(font_scale=1.2)
sns.heatmap(rf_conf_matrix_df, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='d')

<AxesSubplot:xlabel='Predicted', ylabel='Actual'>


print(classification_report(y_test, rf_predictions, target_names = labels))

                      precision    recall  f1-score   support

               Water       1.00      1.00      1.00     13194
               Trees       0.94      0.93      0.93      1520
             Meadows       0.81      0.84      0.82       618
Self Blocking Bricks       0.81      0.80      0.80       537
           Bare Soil       0.95      0.95      0.95      1317
             Asphalt       0.94      0.94      0.94      1850
             Bitumen       0.91      0.91      0.91      1457
               Tiles       0.99      0.99      0.99      8565
             Shadows       1.00      1.00      1.00       573

            accuracy                           0.98     29631
           macro avg       0.93      0.93      0.93     29631
        weighted avg       0.98      0.98      0.98     29631


print(classification_report(y_test, svm_predictions, target_names = labels))

                      precision    recall  f1-score   support

               Water       1.00      1.00      1.00     13194
               Trees       0.94      0.91      0.92      1520
             Meadows       0.78      0.84      0.81       618
Self Blocking Bricks       0.80      0.76      0.78       537
           Bare Soil       0.95      0.95      0.95      1317
             Asphalt       0.92      0.94      0.93      1850
             Bitumen       0.90      0.88      0.89      1457
               Tiles       0.99      0.99      0.99      8565
             Shadows       1.00      1.00      1.00       573

            accuracy                           0.97     29631
           macro avg       0.92      0.92      0.92     29631
        weighted avg       0.97      0.97      0.97     29631


from tensorflow.keras.models import Sequential

2023-04-17 15:14:05.747121: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


from tensorflow.keras.layers import Dense


X_train.shape

(118521, 3)


y_train.shape

(118521,)


model = Sequential()

model.add(Dense(3,activation='relu'))
model.add(Dense(6,activation='relu'))

# Because this is a binary classification problem, the final output layer we use activation softmax
model.add(Dense(10,activation='softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])


# Implement early stopping:

from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss',patience=1)


from tensorflow.keras.utils import to_categorical


y_train.shape

# We need to reshape this for multiclass classification

(118521,)


y_cat_train = to_categorical(y_train)
y_cat_test = to_categorical(y_test)


y_cat_train.shape

(118521, 10)


y_cat_train[0]

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)


model.fit(X_train,y_cat_train,epochs=50,validation_data=(X_test,y_cat_test),callbacks=[early_stop])

Epoch 1/50
3704/3704 [==============================] - 5s 1ms/step - loss: 10.8929 - accuracy: 0.8320 - val_loss: 0.4345 - val_accuracy: 0.8902
Epoch 2/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.3816 - accuracy: 0.9008 - val_loss: 0.3394 - val_accuracy: 0.9089
Epoch 3/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.3232 - accuracy: 0.9064 - val_loss: 0.2983 - val_accuracy: 0.9070
Epoch 4/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.2858 - accuracy: 0.9147 - val_loss: 0.2636 - val_accuracy: 0.9184
Epoch 5/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.2526 - accuracy: 0.9195 - val_loss: 0.2284 - val_accuracy: 0.9230
Epoch 6/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.2197 - accuracy: 0.9259 - val_loss: 0.2095 - val_accuracy: 0.9260
Epoch 7/50
3704/3704 [==============================] - 5s 1ms/step - loss: 0.2139 - accuracy: 0.9268 - val_loss: 0.2107 - val_accuracy: 0.9345

<keras.callbacks.History at 0x7f9420539820>


ann_predictions = model.predict(X_test)

ann_predictions = np.argmax(ann_predictions, axis=1)

ann_predictions

926/926 [==============================] - 1s 852us/step

array([6, 8, 8, ..., 2, 8, 8])


# Let's visualize the above better:

labels = ['Water','Trees','Meadows','Self Blocking Bricks','Bare Soil','Asphalt','Bitumen','Tiles','Shadows']

ann_conf_matrix = confusion_matrix(y_test, ann_predictions)
ann_conf_matrix_df = pd.DataFrame(ann_conf_matrix, columns=labels, index = labels)
ann_conf_matrix_df.index.name = 'Actual'
ann_conf_matrix_df.columns.name = 'Predicted'
plt.figure(figsize = (10,10))
sns.set(font_scale=1.2)
sns.heatmap(ann_conf_matrix_df, cmap="Greens", annot=True,annot_kws={"size": 16}, fmt='d')

<AxesSubplot:xlabel='Predicted', ylabel='Actual'>


print(classification_report(y_test, ann_predictions, target_names = labels))

                      precision    recall  f1-score   support

               Water       0.99      1.00      0.99     13194
               Trees       0.71      1.00      0.83      1520
             Meadows       0.70      0.02      0.04       618
Self Blocking Bricks       0.22      0.01      0.01       537
           Bare Soil       0.70      0.98      0.82      1317
             Asphalt       0.86      0.90      0.88      1850
             Bitumen       0.86      0.83      0.84      1457
               Tiles       0.99      0.98      0.99      8565
             Shadows       0.90      0.67      0.77       573

            accuracy                           0.93     29631
           macro avg       0.77      0.71      0.69     29631
        weighted avg       0.93      0.93      0.92     29631


# Reintroduce 0 values that were not classified by the ground truth data to recreate the image:

predictions_with_zero = []
for i in range(band_data_reduced.shape[0]):
    if band_data_reduced.iloc[i, -1] == 0:
        predictions_with_zero.append(0)
    else:
        predictions_with_zero.append(random_forest.predict(band_data_reduced.iloc[i, :-1].values.reshape(1, -1)))


# Load the image data
predicted_img = np.array(predictions_with_zero).reshape(1096, 715).astype('float')

# Create the figure and subplots
fig, ax = plt.subplots(1, 2, figsize=(15, 6))

# Plot the first subplot
a = ax[0].imshow(predicted_img, cmap='nipy_spectral')
ax[0].set_title('Predicted')
ax[0].set_axis_off()
plt.colorbar(a,ax=ax[0])

# Plot the second subplot
b = ax[1].imshow(gt_data, cmap='nipy_spectral')
ax[1].set_title('Actual')
ax[1].set_axis_off()
plt.colorbar(b,ax=ax[1])

# Display the figure
plt.show()

/var/folders/tv/pz93z3wd1wgfq9rfj2tv7whm0000gn/T/ipykernel_10572/2581393157.py:2: VisibleDeprecationWarning:

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

	band1	band2	band3	band4	band5	band6	band7	band8	band9	band10	...	band94	band95	band96	band97	band98	band99	band100	band101	band102
0	854	601	350	266	138	118	178	194	257	269	...	3759	3773	3779	3752	3690	3671	3664	3636	3643
1	527	642	575	294	123	168	207	154	209	299	...	3873	3902	3921	3861	3854	3882	3834	3725	3768
2	374	322	179	87	169	268	360	339	286	309	...	4443	4472	4428	4353	4306	4284	4318	4311	4321
3	706	520	560	572	425	243	271	272	258	276	...	3972	4006	4032	3975	3946	3954	3944	3936	3939
4	1120	1027	592	414	407	463	417	365	332	334	...	4502	4485	4479	4445	4364	4290	4268	4235	4272
5	568	278	211	507	544	346	271	291	346	380	...	3862	3856	3866	3816	3738	3719	3691	3682	3713
6	627	520	308	231	211	162	211	289	314	292	...	2493	2477	2441	2466	2488	2424	2337	2302	2332
7	332	536	476	268	310	400	374	341	331	312	...	1965	1944	1931	1956	1934	1887	1879	1890	1920
8	0	181	522	437	370	463	520	530	485	402	...	1206	1205	1204	1230	1231	1207	1200	1222	1223
9	597	367	221	196	150	138	325	396	336	257	...	513	525	509	511	541	555	551	534	514
10	605	588	448	374	344	302	319	264	218	223	...	438	458	468	457	451	451	453	456	451
11	286	299	346	417	468	385	233	232	221	203	...	369	371	369	390	413	403	390	408	397
12	794	571	321	381	346	218	296	326	307	278	...	515	510	508	557	585	569	575	597	621
13	407	224	251	306	407	429	386	356	354	390	...	657	670	699	714	708	687	695	713	706
14	407	511	648	492	405	376	362	430	492	491	...	623	637	645	670	716	714	692	655	641
15	729	677	529	484	435	299	267	303	335	342	...	733	725	722	722	694	679	675	680	680
16	784	609	536	472	431	361	325	338	339	321	...	1140	1177	1174	1200	1231	1239	1227	1192	1209
17	628	661	450	484	480	385	461	512	504	527	...	2122	2109	2107	2110	2096	2083	2068	2050	2069
18	985	797	549	676	756	650	644	761	707	654	...	2263	2252	2232	2225	2242	2203	2126	2103	2160
19	1029	844	817	863	915	826	714	689	739	828	...	2238	2236	2233	2210	2191	2158	2125	2093	2121

	band1	band2	band3	band4	band5	band6	band7	band8	band9	band10	...	band94	band95	band96	band97	band98	band99	band100	band101	band102	gt_class
161	265	499	351	339	324	296	371	357	289	292	...	138	142	167	177	177	169	173	169	170	1
162	212	313	190	154	241	243	232	243	323	381	...	149	147	136	149	176	173	175	143	127	1
163	373	276	200	150	275	274	217	278	314	369	...	189	175	147	155	171	181	157	133	128	1
164	155	182	194	228	150	112	186	246	331	373	...	144	143	160	164	149	139	139	138	132	1
165	695	510	477	566	528	517	399	270	294	335	...	113	130	157	194	227	216	201	176	145	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
783481	343	273	183	36	113	261	357	456	504	469	...	4383	4373	4337	4317	4313	4286	4272	4173	4117	2
783491	880	681	574	401	297	278	256	229	210	208	...	4310	4306	4321	4326	4274	4210	4205	4186	4252	2
783492	450	460	363	371	340	324	357	384	372	301	...	4396	4405	4398	4389	4306	4250	4271	4260	4264	2
783493	777	429	347	360	433	474	485	410	347	330	...	4105	4160	4146	4079	4037	4001	3979	3934	3939	2
783494	466	284	260	260	293	416	441	393	399	389	...	3594	3598	3597	3567	3535	3531	3531	3511	3529	2

	band1	band2	band3	band4	band5	band6	band7	band8	band9	band10	...	band93	band94	band95	band96	band97	band98	band99	band100	band101	band102
count	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	...	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000	783640.000000
mean	756.377920	690.307315	643.929436	650.127791	666.007831	671.834180	675.212941	672.903627	674.686433	680.781220	...	1697.386833	1699.732885	1703.646257	1699.515219	1692.456623	1682.842181	1671.153438	1658.587115	1638.005885	1643.461353
std	396.311133	395.274284	403.694296	427.116754	449.411658	462.099314	469.061070	471.706490	478.086505	488.054049	...	1101.857332	1101.298899	1101.531161	1094.214808	1080.862130	1066.634222	1058.692782	1054.097477	1044.365696	1051.107124
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	496.000000	428.000000	368.000000	348.000000	345.000000	342.000000	339.000000	332.000000	327.000000	324.000000	...	883.000000	886.000000	890.000000	891.000000	897.000000	902.000000	898.000000	889.000000	874.000000	874.000000
50%	730.000000	647.000000	589.000000	595.000000	613.000000	617.000000	614.000000	606.000000	604.000000	607.000000	...	1683.000000	1686.000000	1689.000000	1686.000000	1680.000000	1672.000000	1661.000000	1648.000000	1627.000000	1634.000000
75%	974.000000	893.000000	846.000000	863.000000	884.000000	888.000000	890.000000	884.000000	885.000000	894.000000	...	2410.000000	2412.000000	2417.000000	2411.000000	2395.000000	2374.000000	2356.000000	2341.000000	2316.000000	2328.000000
max	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	...	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000	8000.000000

	DIM1	DIM2	DIM3	gt_class	labels
0	-7463.614614	393.864615	-44.729911	1	water
1	-7544.965493	381.214775	-177.865891	1	water
2	-7522.273608	370.450019	-55.195668	1	water
3	-7532.555647	404.161272	-179.121952	1	water
4	-7399.577812	246.545020	106.923650	1	water
...	...	...	...	...	...
148147	9873.095362	11826.994285	1539.203707	2	trees
148148	9331.170835	12057.453596	1480.846986	2	trees
148149	9833.617927	12387.541952	1480.413840	2	trees
148150	8932.464503	11446.088992	1420.602517	2	trees
148151	6572.026961	10222.795627	1238.605972	2	trees

	DIM1	DIM2	DIM3	gt_class
0	3903.296351	-10579.424391	969.223926	0
1	4489.219702	-10495.362304	1200.288715	0
2	7537.525472	-11984.664882	1063.019707	0
3	5856.807574	-10092.589084	1308.820550	0
4	7812.803615	-11935.049725	1544.780731	0
...	...	...	...	...
783635	344.641085	-7218.020858	926.371369	0
783636	-2501.022210	-5406.711438	330.277938	0
783637	947.076623	-6022.277158	192.120444	0
783638	2686.873944	-5559.307416	146.715406	0
783639	2744.519126	-4893.697411	152.339669	0

Pavia Centre HSI Classification with SVM, Random Forest, and ANN¶

Background¶

Importing libraries¶

Reading and Exploring the Data¶

Building Our Models¶

Classification with Support Vector Machine (SVM)¶

Classification with Random Forest¶

Classification with a Basic Artificial Neural Network (ANN)¶

Random Forest Classified Image (Predicted) vs Actual¶