Deep Neural Networks (CNNs,...)

Merged Jupyter Notebook


from file: CNN_mnist

Introduction

This notebook aims at training a Convolutional Neural Network for classifying mnist data. we will implement a CNN in Pytorch for hand-digits on MNIST dataset.

Table of contents


  • 1.Classification using CNN

    • Importing libraries
    • Loading the Data Set
    • Visualizing some samples
    • Defining the CNN model
    • Training the CNN model on mnist
    • Evaluation on test data
  • 2.Batch_Normalization

  • 3.Dropout
  • 4.visualizing Filters
  • 5.Visualizing Feature maps
  • 6.references

1.Classification using CNN

Importing libraries

In [ ]:
import torch
import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torchvision
from tqdm.auto import tqdm, trange
from torch.utils.data import random_split
%matplotlib inline
import matplotlib.pyplot as plt 
import torch.nn.functional as F

if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print('runing on ',device)
runing on  cuda

Loading the Data Set

In [ ]:
batch_size = 32
transform=transforms.ToTensor()
train_val_data= datasets.MNIST('./data', 
                               train=True, 
                               download=True, 
                               transform=transform)

test_data= datasets.MNIST('./data', 
                                    train=False, 
                                    transform=transform)
In [ ]:
test_data.classes
Out[ ]:
['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']
In [ ]:
train_size=int(0.9*(len(train_val_data)))
val_size=len(train_val_data)-train_size
train_data,val_data=random_split(train_val_data,[train_size,val_size])
train_loader=torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, drop_last = True)
test_loader=torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True, drop_last = True)
val_loader=torch.utils.data.DataLoader(val_data, batch_size=32, shuffle=True, drop_last = True)

print('number of batches in train data , test data and validation data are:')
print(len(train_loader),len(test_loader),len(val_loader))#number of batches in train data and test data and val data
number of batches in train data , test data and validation data are:
1687 312 187
In [ ]:
images,labels=next(iter(train_loader))
print(images.shape,labels.shape)
print(test_data)
torch.Size([32, 1, 28, 28]) torch.Size([32])
Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()

Visualizing some samples

In [ ]:
x = next(iter(test_loader))[0][:10].squeeze(1) # Get a batch and choose 10 of images
    
fig = plt.figure(figsize=(20, 20))  # figure size in inches
for i in range(len(x)):
        ax = fig.add_subplot(1,20, i + 1, xticks=[], yticks=[])
        ax.imshow(x[i].numpy(),cmap='gray', interpolation='nearest')

Defining the CNN model

In [ ]:
class CNN(nn.Module):
    def __init__(self,BN,Dropout):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.bn2 = nn.BatchNorm2d(64)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64*24*24, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.Dropout=Dropout
        self.BN=BN

    def forward(self, x):
        x = self.conv1(x)
        if self.BN:
          x=self.bn1(x)

        x = self.relu(x)
        x = self.conv2(x)
        
        if self.BN:
          x=self.bn2(x)

        x = self.relu(x)
        if self.Dropout:
          x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu(x)
        if self.Dropout:
          x = self.dropout2(x)
        x = self.fc2(x)
        m=torch.nn.Softmax(dim=1)
        return m(x)

Training the CNN model on mnist

In [53]:
#Training on val and train data

def train(net):
  
  from tqdm.notebook import tqdm
  costFunc = torch.nn.CrossEntropyLoss()
  optimizer=torch.optim.SGD(net.parameters(),lr=1e-4,momentum=0.9)
  val_loss,train_loss,train_acc,val_acc=[],[],[],[]
  for epoch in tqdm(range(41)):
          bchloss = 0
          net.train()
          correct=0
          total=0
          for i,batch in enumerate(train_loader,0):
              data,output=batch
              data,output = data.to(device),output.to(device)
              prediction = net(data)
              loss = costFunc(prediction,output)
              bchloss += int(loss.item()*1000)

              optimizer.zero_grad()
              loss.backward()
              optimizer.step()
        
              pred=torch.argmax(prediction,dim=1)
              total += output.size(0)#batch_size
              correct += (pred==output).sum().item()
          train_loss.append(bchloss/(len(train_loader)*32))
          train_acc.append((correct/total)*100)
          if epoch%5==0:
            print('***************** epoch',epoch,'*****************')
            print('train loss = ',bchloss/(len(train_loader)*32))
            print('train accuracy ','= ',str((correct/total)*100),'%')
          
          
          correct=0
          total=0
          bchloss=0
          for data,output in val_loader:
              data,output = data.to(device),output.to(device)
              prediction = net(data)
              loss = costFunc(prediction,output)
              bchloss += int(loss.item()*1000)
  
              pred=torch.argmax(prediction,dim=1)
              total += output.size(0)
              correct += (pred==output).sum().item()
          val_acc.append((correct/total)*100)
          val_loss.append(bchloss/(len(val_loader)*32))
          
          if epoch%5==0:
            print('validation loss = ',bchloss/(len(val_loader)*32))
            print('vaidation accuracy ','= ',str((correct/total)*100),'%')
          
  return val_loss,train_loss,train_acc,val_acc

in the next parts we will train 3 models to analyse the effect of batch normalization and dropout

  1. model1:model defined without using batch normalization and dropout(Batch_Normalization=False,Dropout=False)

  2. model2:model defined without using batch normalization and dropout(Batch_Normalization=True,Dropout=False)

  3. model3:model defined without using batch normalization but with using dropout (Batch_Normalization=False,Dropout=True)
In [56]:
model1 = CNN(BN=False,Dropout=False).to(device)
val1_loss,train1_loss,train1_acc,val1_acc=train(model1)
***************** epoch 0 *****************
train loss =  71.87635225251927
train accuracy  =  19.229771784232366 %
validation loss =  71.80314171122994
vaidation accuracy  =  33.1216577540107 %
***************** epoch 5 *****************
train loss =  52.3941908713693
train accuracy  =  81.63529934795494 %
validation loss =  51.66794786096256
vaidation accuracy  =  83.23863636363636 %
***************** epoch 10 *****************
train loss =  50.93153526970954
train accuracy  =  83.83224659158269 %
validation loss =  50.70755347593583
vaidation accuracy  =  84.6590909090909 %
***************** epoch 15 *****************
train loss =  50.625
train accuracy  =  84.54727326615293 %
validation loss =  50.405080213903744
vaidation accuracy  =  85.17713903743316 %
***************** epoch 20 *****************
train loss =  50.44565056312982
train accuracy  =  84.98258743331357 %
validation loss =  50.248495989304814
vaidation accuracy  =  85.46122994652407 %
***************** epoch 25 *****************
train loss =  50.300255631298164
train accuracy  =  85.4253112033195 %
validation loss =  50.11380347593583
vaidation accuracy  =  85.86229946524064 %
***************** epoch 30 *****************
train loss =  50.17634854771784
train accuracy  =  85.82913455838766 %
validation loss =  50.00835561497326
vaidation accuracy  =  86.38034759358288 %
***************** epoch 35 *****************
train loss =  50.06216656787196
train accuracy  =  86.11810906935389 %
validation loss =  49.90524732620321
vaidation accuracy  =  86.6644385026738 %
***************** epoch 40 *****************
train loss =  49.959228660343804
train accuracy  =  86.49599881446355 %
validation loss =  49.81751336898396
vaidation accuracy  =  86.99866310160428 %

In [41]:
plt.figure()
plt.subplot(2,1,1)
plt.title('accuracy')
plt.plot(val1_acc,label='validation')
plt.plot(train1_acc,label='train')
plt.legend()
plt.show()
In [42]:
plt.figure()
plt.subplot(2,1,1)
plt.title('loss validation')
plt.plot(val1_loss,label='validation')

plt.title('loss train ')
plt.plot(train1_loss,label='train')
plt.legend()
plt.show()

Evaluation on test data

In [40]:
correct=0
total=0
model1.eval()
for data,output in test_loader:
            total += output.size(0)
            data,output = data.to(device),output.to(device)
            prediction = model1(data)
            pred=torch.argmax(prediction,dim=1)
            correct += (pred==output).sum().item()
print('test Accuracy on epoch ',30,'= ',str((correct/total)*100),'%')
test Accuracy on epoch  30 =  93.64983974358975 %

2.Batch_Normalization

how does batch normalization change training process of a CNN ?\ batch normalization helps the network in faster convergence as we can see in the plots below , the loss of the network with batch normalization reduces much faster than the network without batch normalization because of the covariance shift (shifting of hidden values for each batch of input) This causes faster converge of the network and reduces the training time.so it improves the speed, performance, and stability of our neural networks.

In [43]:
model2 = CNN(BN=True,Dropout=False).to(device)
val2_loss,train2_loss,train2_acc,val2_acc=train(model2)
***************** epoch 0 *****************
train loss =  57.30516449318316
train accuracy  =  73.44027860106699 %
validation loss =  50.4884692513369
vaidation accuracy  =  90.9090909090909 %
***************** epoch 5 *****************
train loss =  47.38783713692946
train accuracy  =  95.91730883224659 %
validation loss =  47.25300802139037
vaidation accuracy  =  96.2566844919786 %
***************** epoch 10 *****************
train loss =  46.75913233550682
train accuracy  =  97.38811499703615 %
validation loss =  46.77991310160428
vaidation accuracy  =  97.14237967914438 %
***************** epoch 15 *****************
train loss =  46.50611292234736
train accuracy  =  97.97903082394784 %
validation loss =  46.587901069518715
vaidation accuracy  =  97.7105614973262 %
***************** epoch 20 *****************
train loss =  46.35755038529935
train accuracy  =  98.34210136336692 %
validation loss =  46.474598930481285
vaidation accuracy  =  97.96122994652407 %
***************** epoch 25 *****************
train loss =  46.25653897451097
train accuracy  =  98.61625666864256 %
validation loss =  46.3793449197861
vaidation accuracy  =  98.21189839572193 %
***************** epoch 30 *****************
train loss =  46.18835210432721
train accuracy  =  98.74407231772378 %
validation loss =  46.35360962566845
vaidation accuracy  =  98.34558823529412 %

In [44]:
plt.title('loss')
plt.plot(val1_loss, 'r', label='without BN')
plt.plot(val2_loss, 'g', label='with BN')
plt.legend()
plt.show()
plt.title('accuarcy')
plt.plot(val1_acc, 'm', label='without BN')
plt.plot(val2_acc, 'b', label='with BN')
plt.legend()
plt.show()

3.Dropout

Dropout Dropout is a technique used in neural networks to prevent overfitting the training data by dropping out neurons with probability p>0 It forces the model to avoid relying too much on particular sets of features.dropout.jpg.

it helps to reduce overfitting and generalization error Dropout technique deactivates few neurons in the neural network randomly and thus it avoids overfitting. Dropout deactivates some neurons by random at each training step instead of training the data on the original network.In the next iteration of the training step, the hidden neurons which are deactivated by dropout changes because of its probabilistic behavior. In this way, by applying dropout i.e…deactivating certain individual nodes at random during training we can simulate an ensemble of neural network with different architectures.

In [54]:
model3 = CNN(BN=False,Dropout=True).to(device)
val3_loss,train3_loss,train3_acc,val3_acc=train(model3)
***************** epoch 0 *****************
train loss =  71.8227252519265
train accuracy  =  16.458580320094843 %
validation loss =  71.64388368983957
vaidation accuracy  =  26.15307486631016 %
***************** epoch 5 *****************
train loss =  54.058054238292826
train accuracy  =  75.42234736218138 %
validation loss =  53.05347593582888
vaidation accuracy  =  78.72660427807486 %
***************** epoch 10 *****************
train loss =  49.96578615886188
train accuracy  =  87.73895969176053 %
validation loss =  49.65524732620321
vaidation accuracy  =  88.58622994652407 %
***************** epoch 15 *****************
train loss =  49.13205764671014
train accuracy  =  89.92849733254297 %
validation loss =  48.90992647058823
vaidation accuracy  =  90.57486631016043 %
***************** epoch 20 *****************
train loss =  48.728919679905154
train accuracy  =  91.03623295791346 %
validation loss =  48.5076871657754
vaidation accuracy  =  91.5942513368984 %
***************** epoch 25 *****************
train loss =  48.427941612329576
train accuracy  =  91.87722288085358 %
validation loss =  48.17613636363637
vaidation accuracy  =  92.59692513368985 %
***************** epoch 30 *****************
train loss =  48.2403304682869
train accuracy  =  92.3606994665086 %
validation loss =  48.02139037433155
vaidation accuracy  =  93.09826203208557 %
***************** epoch 35 *****************
train loss =  48.052663752222884
train accuracy  =  92.86640486069948 %
validation loss =  47.98495989304813
vaidation accuracy  =  93.09826203208557 %
***************** epoch 40 *****************
train loss =  47.920365293420275
train accuracy  =  93.32024303497333 %
validation loss =  47.84876336898396
vaidation accuracy  =  93.54946524064172 %

In [57]:
plt.title('loss')
plt.plot(val1_loss, 'r', label='without dropout')
plt.plot(val3_loss, 'g', label='with dropout')
plt.legend()
plt.show()
plt.title('accuracy')
plt.plot(val1_acc, 'm', label='without dropout')
plt.plot(val3_acc, 'b', label='with dropout')
plt.legend()
plt.show()

4.visualizing Filters

When we talk about filters in convolutional neural networks, then we are specifically talking about the weights. If you do a lot of practical deep learning coding, then you may know them by the name of kernels.\ These filters will determine which pixels or parts of the image the model will focus on

In [47]:
model1.parameters
Out[47]:
<bound method Module.parameters of CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=36864, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
  (relu): ReLU()
)>
In [48]:
# load the model
model_weights = [] 
conv_layers = [] 
model_children = list(model1.children())
In [49]:
counter = 0 
 
for i in range(len(model_children)):
    if type(model_children[i]) == nn.Conv2d:
        counter += 1
        model_weights.append(model_children[i].weight)
        conv_layers.append(model_children[i])
    elif type(model_children[i]) == nn.Sequential:
        for j in range(len(model_children[i])):
            for child in model_children[i][j].children():
                if type(child) == nn.Conv2d:
                    counter += 1
                    model_weights.append(child.weight)
                    conv_layers.append(child)
print(f"Total convolutional layers: {counter}")
Total convolutional layers: 2
In [50]:
# the first conv layer filters visualization
plt.figure(figsize=(20, 17))
for i, filter in enumerate(model_weights[0]):
    plt.subplot(8, 8, i+1) 
    plt.imshow(filter[0, :, :].detach().cpu()
    , cmap='gray')
    plt.axis('off')
plt.show()

5.Visualizing Feature maps

Feature maps are what we get after a filter has passed through the pixel values of an input image. Specifically, it is what the convolutional layer sees after passing the filters on the image. It is what we call a convolution operation in terms of deep learning

In [51]:
images,labels=next(iter(train_loader))
results = [conv_layers[0](images.cuda())]
for i in range(1, len(conv_layers)):
    results.append(conv_layers[i](results[-1]))

outputs = results
In [52]:
# visualizing features
for num_layer in range(len(outputs)):
    plt.figure(figsize=(30, 30))
    layer_viz = outputs[num_layer][0, :, :, :]
    layer_viz = layer_viz.data
    print(layer_viz.size())
    for i, filter in enumerate(layer_viz):
        if i == 64:
            break
        plt.subplot(8, 8, i + 1)
        plt.imshow(filter.cpu(), cmap='gray')
        plt.axis("off")
    print(f" layer {num_layer} feature maps...")
    plt.show()
    plt.close()
torch.Size([32, 26, 26])
Saving layer 0 feature maps...
torch.Size([64, 24, 24])
Saving layer 1 feature maps...


from file: Linear Regression

Hamid Shojaee 96021955

Table of Content

  • Introduction
  • Import packages
  • Reading data
  • Data overall info
  • Regression Models
  • Linear Regression
  • Simple Linear Regression
  • Multiple Regression
  • Ridge Regression
  • Lasso Regression
  • Measuring the error
  • Visualization

Introduction

In this jupyter file, several linear regression methods are implemented and as a result, the performance and accuracies of different algorithms was reported. The main dataset is collected data about Toyota Corolla cars information with different option and features.

Import Packages

In [3]:
import numpy as np 
import pandas as pd 
In [5]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.metrics import mean_squared_error
from collections import Counter

Reading Data

In [6]:
data_df = pd.read_csv("./ToyotaCorolla.csv")

Data Overall Info

In [8]:
data_df.head()
Out[8]:
Price Age KM FuelType HP MetColor Automatic CC Doors Weight
0 13500 23 46986 Diesel 90 1 0 2000 3 1165
1 13750 23 72937 Diesel 90 1 0 2000 3 1165
2 13950 24 41711 Diesel 90 1 0 2000 3 1165
3 14950 26 48000 Diesel 90 0 0 2000 3 1165
4 13750 30 38500 Diesel 90 0 0 2000 3 1170
In [10]:
data_df.count()
Out[10]:
Price        1436
Age          1436
KM           1436
FuelType     1436
HP           1436
MetColor     1436
Automatic    1436
CC           1436
Doors        1436
Weight       1436
dtype: int64
In [11]:
data_df.describe()
Out[11]:
Price Age KM HP MetColor Automatic CC Doors Weight
count 1436.000000 1436.000000 1436.000000 1436.000000 1436.000000 1436.000000 1436.000000 1436.000000 1436.00000
mean 10730.824513 55.947075 68533.259749 101.502089 0.674791 0.055710 1566.827994 4.033426 1072.45961
std 3626.964585 18.599988 37506.448872 14.981080 0.468616 0.229441 187.182436 0.952677 52.64112
min 4350.000000 1.000000 1.000000 69.000000 0.000000 0.000000 1300.000000 2.000000 1000.00000
25% 8450.000000 44.000000 43000.000000 90.000000 0.000000 0.000000 1400.000000 3.000000 1040.00000
50% 9900.000000 61.000000 63389.500000 110.000000 1.000000 0.000000 1600.000000 4.000000 1070.00000
75% 11950.000000 70.000000 87020.750000 110.000000 1.000000 0.000000 1600.000000 5.000000 1085.00000
max 32500.000000 80.000000 243000.000000 192.000000 1.000000 1.000000 2000.000000 5.000000 1615.00000
In [12]:
data_df.isnull().sum()
Out[12]:
Price        0
Age          0
KM           0
FuelType     0
HP           0
MetColor     0
Automatic    0
CC           0
Doors        0
Weight       0
dtype: int64
In [13]:
# Check Correlation amoung parameters
corr = data_df.corr()
fig, ax = plt.subplots(figsize=(8,8))
# Generate a heatmap
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
plt.xticks(range(len(corr.columns)), corr.columns)

plt.yticks(range(len(corr.columns)), corr.columns)

plt.show()
In [14]:
# plot regplots  for Age, KM, CC & HP against Price
f, axes = plt.subplots(2,2, figsize=(12,8))
# Age Vs Price
sns.regplot(x = 'Price', y = 'Age', data = data_df, ax = axes[0,0], scatter_kws={'alpha':0.6})
axes[0,0].set_xlabel('Price', fontsize = 14)
axes[0,0].set_ylabel('Age', fontsize=14)
axes[0,0].yaxis.tick_left()

# KM Vs Price
sns.regplot(x = 'Price', y = 'KM', data = data_df, ax = axes[0,1], scatter_kws={'alpha':0.6})
axes[0,1].set_xlabel('Price', fontsize = 14)
axes[0,1].set_ylabel('KM', fontsize=14)
axes[0,1].yaxis.set_label_position("right")
axes[0,1].yaxis.tick_right()

# CC Vs Price
sns.regplot(x = 'Price', y = 'CC', data = data_df, ax = axes[1,0], scatter_kws={'alpha':0.6})
axes[1,0].set_xlabel('Price', fontsize = 14)
axes[1,0].set_ylabel('CC', fontsize=14)
axes[1,0].yaxis.tick_left()

# Weight Vs Price
sns.regplot(x = 'Price', y = 'Weight', data = data_df, ax = axes[1,1], scatter_kws={'alpha':0.6})
axes[1,1].set_xlabel('Price', fontsize = 14)
axes[1,1].set_ylabel('Weight', fontsize=14)
axes[1,1].yaxis.set_label_position("right")
axes[1,1].yaxis.tick_right()

plt.show()
In [15]:
# Create the clasiification.
data_df = pd.get_dummies(data_df)
In [16]:
data_df.head()
Out[16]:
Price Age KM HP MetColor Automatic CC Doors Weight FuelType_CNG FuelType_Diesel FuelType_Petrol
0 13500 23 46986 90 1 0 2000 3 1165 0 1 0
1 13750 23 72937 90 1 0 2000 3 1165 0 1 0
2 13950 24 41711 90 1 0 2000 3 1165 0 1 0
3 14950 26 48000 90 0 0 2000 3 1165 0 1 0
4 13750 30 38500 90 0 0 2000 3 1170 0 1 0

Regression Models

Linear Regression

In statistics, simple linear regression is a linear regression model with a single explanatory variable. That is, it concerns two-dimensional sample points with one independent variable and one dependent variable (conventionally, the x and y coordinates in a Cartesian coordinate system) and finds a linear function (a non-vertical straight line) that, as accurately as possible, predicts the dependent variable values as a function of the independent variable. The adjective simple refers to the fact that the outcome variable is related to a single predictor.

In [17]:
from sklearn.linear_model import LinearRegression

Simple Linear Regression

Lets see how does our model perform if we have consider only one independent variable(Age) to predict the price.

In [19]:
X_simple_lreg = data_df[["Age"]].values
y_simple_lreg = data_df["Price"].values

print(X_simple_lreg[0:5])
print(y_simple_lreg[0:5])
[[23]
 [23]
 [24]
 [26]
 [30]]
[13500 13750 13950 14950 13750]
In [20]:
# Create train test dataset
from sklearn.model_selection import train_test_split
X_train_slreg, X_test_slreg, y_train_slreg, y_test_slreg = train_test_split(X_simple_lreg,y_simple_lreg, test_size = 0.25, random_state = 4)
print('Train Dataset : ', X_train_slreg.shape, y_train_slreg.shape)
print('Test Dataset : ', X_test_slreg.shape, y_test_slreg.shape)
Train Dataset :  (1077, 1) (1077,)
Test Dataset :  (359, 1) (359,)
In [21]:
simple_lreg = LinearRegression()
simple_lreg.fit(X_train_slreg, y_train_slreg)
print('Intercept : ', simple_lreg.intercept_)
print('Slope : ', simple_lreg.coef_)
Intercept :  20137.410273159752
Slope :  [-169.09157285]

As we can see, the slope is -169.09, which means that price of the vehicle is highly impacted by the age of the vehicle. However, it is negatively propotional to Price.

In [22]:
# Use the model to predict the test dataset.
y_simplelreg_pred_test = simple_lreg.predict(X_test_slreg)

# Use the model to predict the train dataset.
y_simplelreg_pred_train = simple_lreg.predict(X_train_slreg)
In [23]:
# Calculate the eualuation metrics of the model.
from sklearn.metrics import r2_score
r2_score_slreg_train = r2_score(y_simplelreg_pred_train, y_train_slreg)
r2_score_slreg_test = r2_score(y_simplelreg_pred_test, y_test_slreg)
rmse_slreg = np.sqrt(mean_squared_error(y_simplelreg_pred_test, y_test_slreg)**2)
print('r2_ score for train dataset for simple linear reg : ', r2_score_slreg_train)
print('r2_ score for test dataset for simple linear reg : ', r2_score_slreg_test)
print('root mean squared error for simple linear reg : ', rmse_slreg)
r2_ score for train dataset for simple linear reg :  0.6978153650611345
r2_ score for test dataset for simple linear reg :  0.6734388905656996
root mean squared error for simple linear reg :  3438902.3311535786

Multiple Regression

Multiple linear regression (MLR), also known simply as multiple regression, is a statistical technique that uses several explanatory variables to predict the outcome of a response variable. The goal of multiple linear regression (MLR) is to model the linear relationship between the explanatory (independent) variables and response (dependent) variable.

Let us include some more independent variables to predict the price of the vehicle.

In [24]:
# Separating the independent and dependent variable.
X_multi_lreg = data_df.drop('Price', axis = 1).values
y_multi_lreg = data_df["Price"].values.reshape(-1,1)
In [25]:
# Create train test dataset
from sklearn.model_selection import train_test_split
X_train_mlreg, X_test_mlreg, y_train_mlreg, y_test_mlreg = train_test_split(X_multi_lreg,y_multi_lreg, test_size = 0.25, random_state = 4)
print('Train Dataset : ', X_train_mlreg.shape, y_train_mlreg.shape)
print('Test Dataset : ', X_test_mlreg.shape, y_test_mlreg.shape)
Train Dataset :  (1077, 11) (1077, 1)
Test Dataset :  (359, 11) (359, 1)
In [26]:
multi_lreg = LinearRegression()
multi_lreg.fit(X_train_mlreg, y_train_mlreg)
print('Intercept : ', multi_lreg.intercept_)
print('Slope : ', multi_lreg.coef_)
Intercept :  [-3502.88149804]
Slope :  [[-1.20452220e+02 -1.55550211e-02  6.03642847e+01  2.06335655e+01
   3.04043156e+02 -4.36662302e+00 -1.69942436e+01  2.12472911e+01
  -1.74877821e+03  1.98227868e+03 -2.33500476e+02]]
In [27]:
# Use the model to predict the test dataset.
y_mlreg_pred_test = multi_lreg.predict(X_test_mlreg)

# Use the model to predict the train dataset.
y_mlreg_pred_train = multi_lreg.predict(X_train_mlreg)
In [30]:
# Have a look at the predicted & actual values.
print(y_mlreg_pred_test[0:5])
# print(y_test[0:5])

print(y_mlreg_pred_train[0:5])
# print(y_train[0:5])
[[ 7903.20434738]
 [10249.8764368 ]
 [ 9573.1006555 ]
 [11689.26155808]
 [ 8921.60255708]]
[[10138.88623699]
 [ 6324.52345301]
 [11437.15043997]
 [13059.85723899]
 [ 8895.51258966]]
In [31]:
# Calculate the eualuation metrics of the model.
from sklearn.metrics import r2_score
r2_score_mlreg_train = r2_score(y_mlreg_pred_train, y_train_mlreg)
r2_score_mlreg_test = r2_score(y_mlreg_pred_test, y_test_mlreg)
rmse_mlreg = np.sqrt(mean_squared_error(y_mlreg_pred_test, y_test_mlreg)**2)
print('r2_ score for train dataset for multi linear reg : ', r2_score_mlreg_train)
print('r2_ score for test dataset for multi linear reg : ', r2_score_mlreg_test)
print('root mean squared error for multi linear reg : ', rmse_mlreg)
r2_ score for train dataset for multi linear reg :  0.8453913190051008
r2_ score for test dataset for multi linear reg :  0.854121832445731
root mean squared error for multi linear reg :  1836109.732041979

As we can see that using multiple independent variables we can improve the accuracy of the model.

Ridge Regression

Ridge regression is a way to create a parsimonious model when the number of predictor variables in a set exceeds the number of observations, or when a data set has multicollinearity (correlations between predictor variables).

Let us look at a 2nd degree polynomial regression.

In [32]:
# Separating the independent and dependent variable.
X_ridge_reg = data_df.drop('Price', axis = 1).values
y_ridge_reg = data_df["Price"].values.reshape(-1,1)
In [33]:
# Create train test dataset
from sklearn.model_selection import train_test_split
X_train_ridge_reg, X_test_ridge_reg, y_train_ridge_reg, y_test_ridge_reg = train_test_split(X_ridge_reg,y_ridge_reg, test_size = 0.25, random_state = 4)
print('Train Dataset : ', X_train_ridge_reg.shape, y_train_ridge_reg.shape)
print('Test Dataset : ', X_test_ridge_reg.shape, y_test_ridge_reg.shape)
Train Dataset :  (1077, 11) (1077, 1)
Test Dataset :  (359, 11) (359, 1)
In [34]:
from sklearn.linear_model import Ridge

## training the model

ridgeReg = Ridge(alpha=0.05, normalize=True)

ridgeReg.fit(X_train_ridge_reg,y_train_ridge_reg)

# Use the model to predict the test dataset.
y_ridgereg_pred_test = ridgeReg.predict(X_test_ridge_reg)

# Use the model to predict the train dataset.
y_ridgereg_pred_train = ridgeReg.predict(X_train_ridge_reg)

# Calculate the eualuation metrics of the model.
from sklearn.metrics import r2_score
r2_score_ridgereg_train = r2_score(y_ridgereg_pred_train, y_train_ridge_reg)
r2_score_ridgereg_test = r2_score(y_ridgereg_pred_test, y_test_ridge_reg)
rmse_ridgereg = np.sqrt(mean_squared_error(y_ridgereg_pred_test, y_test_ridge_reg)**2)
print('r2_ score for train dataset for multi linear reg : ', r2_score_ridgereg_train)
print('r2_ score for test dataset for multi linear reg : ', r2_score_ridgereg_test)
print('root mean squared error for multi linear reg : ', rmse_ridgereg)
r2_ score for train dataset for multi linear reg :  0.831886671104807
r2_ score for test dataset for multi linear reg :  0.8384733768946311
root mean squared error for multi linear reg :  1879140.3375855063

Lasso Regression

Lasso regression is a type of linear regression that uses shrinkage. Shrinkage is where data values are shrunk towards a central point, like the mean. The lasso procedure encourages simple, sparse models (i.e. models with fewer parameters). This particular type of regression is well-suited for models showing high levels of muticollinearity or when you want to automate certain parts of model selection, like variable selection/parameter elimination.

In [35]:
from sklearn.linear_model import Lasso

## training the model

lassoReg = Lasso(alpha=0.3, normalize=True)

lassoReg.fit(X_train_ridge_reg,y_train_ridge_reg)

# Use the model to predict the test dataset.
y_lassoreg_pred_test = lassoReg.predict(X_test_ridge_reg)

# Use the model to predict the train dataset.
y_lassoreg_pred_train = lassoReg.predict(X_train_ridge_reg)

# Calculate the eualuation metrics of the model.
from sklearn.metrics import r2_score
r2_score_lassoreg_train = r2_score(y_lassoreg_pred_train, y_train_ridge_reg)
r2_score_lassoreg_test = r2_score(y_lassoreg_pred_test, y_test_ridge_reg)
rmse_lassoreg = np.sqrt(mean_squared_error(y_lassoreg_pred_test, y_test_ridge_reg)**2)
print('r2_ score for train dataset for multi linear reg : ', r2_score_lassoreg_train)
print('r2_ score for test dataset for multi linear reg : ', r2_score_lassoreg_test)
print('root mean squared error for multi linear reg : ', rmse_lassoreg)
r2_ score for train dataset for multi linear reg :  0.8428580545349307
r2_ score for test dataset for multi linear reg :  0.8502844251108707
root mean squared error for multi linear reg :  1846994.3544547232
In [36]:
from sklearn.linear_model import ElasticNet

## training the model

elasticNetReg = ElasticNet(alpha=1, l1_ratio=0.5, normalize=True)

elasticNetReg.fit(X_train_ridge_reg,y_train_ridge_reg)

# Use the model to predict the test dataset.
y_elasticNetReg_pred_test = elasticNetReg.predict(X_test_ridge_reg)

# Use the model to predict the train dataset.
y_elasticNetReg_pred_train = elasticNetReg.predict(X_train_ridge_reg)

# Calculate the eualuation metrics of the model.
from sklearn.metrics import r2_score
r2_score_elasticNetReg_train = r2_score(y_elasticNetReg_pred_train, y_train_ridge_reg)
r2_score_elasticNetReg_test = r2_score(y_elasticNetReg_pred_test, y_test_ridge_reg)
rmse_elasticNetReg = np.sqrt(mean_squared_error(y_lassoreg_pred_test, y_test_ridge_reg)**2)
print('r2_ score for train dataset for multi linear reg : ', r2_score_elasticNetReg_train)
print('r2_ score for test dataset for multi linear reg : ', r2_score_elasticNetReg_test)
print('root mean squared error for multi linear reg : ', rmse_elasticNetReg)
r2_ score for train dataset for multi linear reg :  -97076.91299047269
r2_ score for test dataset for multi linear reg :  -103047.22391574454
root mean squared error for multi linear reg :  1846994.3544547232

Measuring the Error

In [40]:
Models = [('Simple Linear Regression', r2_score_slreg_train, r2_score_slreg_test, rmse_slreg),
          ('Multiplt Linear Regression', r2_score_mlreg_train, r2_score_mlreg_test, rmse_mlreg),
          ('Ridge Regression', r2_score_ridgereg_train, r2_score_ridgereg_test, rmse_ridgereg),
          ('Lasso Regression', r2_score_lassoreg_train, r2_score_lassoreg_test, rmse_lassoreg),]
In [41]:
predict = pd.DataFrame(data = Models, columns = ['Models', 'r2_score Training', 'r2_score Testing', 'RMSE'])
predict
Out[41]:
Models r2_score Training r2_score Testing RMSE
0 Simple Linear Regression 0.697815 0.673439 3.438902e+06
1 Multiplt Linear Regression 0.845391 0.854122 1.836110e+06
2 Ridge Regression 0.831887 0.838473 1.879140e+06
3 Lasso Regression 0.842858 0.850284 1.846994e+06

Visualization

The performance of each algorithm is visualized as below:

In [42]:
f, axes = plt.subplots(3,1, figsize=(18,8))

sns.barplot(x='Models', y='r2_score Training', data = predict, ax = axes[0])
axes[0].set_xlabel('Models')
axes[0].set_ylabel('r2_score Training')
axes[0].set_ylim(0,1.0)

sns.barplot(x='Models', y='r2_score Testing', data = predict, ax = axes[1])
axes[0].set_xlabel('Models')
axes[0].set_ylabel('r2_score Testing')
axes[0].set_ylim(0,1.0)

sns.barplot(x='Models', y='RMSE', data = predict, ax = axes[2])
axes[0].set_xlabel('Models')
axes[0].set_ylabel('RMSE')
axes[0].set_ylim(0,1.0)
Out[42]:
(0.0, 1.0)
In [ ]:
 


from file: NN_MLP

Introduction:

This notebook and its consisting contents will try to shed some light on the NNs(Neural Networks) and MLPs

Neural Networks:

Neaural Networks are modeled loosely on the human brain, a neural net consists of thousands or even millions of simple processing nodes that are densely interconnected. Most of today’s neural nets are organized into layers of nodes, and they’re “feed-forward,” meaning that data moves through them in only one direction. An individual node might be connected to several nodes in the layer beneath it, from which it receives data, and several nodes in the layer above it, to which it sends data.

To each of its incoming connections, a node will assign a number known as a “weight.” When the network is active, the node receives a different data item — a different number — over each of its connections and multiplies it by the associated weight. It then adds the resulting products together, yielding a single number. If that number is below a threshold value, the node passes no data to the next layer. If the number exceeds the threshold value, the node “fires,” which in today’s neural nets generally means sending the number — the sum of the weighted inputs — along all its outgoing connections.

When a neural net is being trained, all of its weights and thresholds are initially set to random values. Training data is fed to the bottom layer — the input layer — and it passes through the succeeding layers, getting multiplied and added together in complex ways, until it finally arrives, radically transformed, at the output layer. During training, the weights and thresholds are continually adjusted until training data with the same labels consistently yield similar outputs.

1_Z3zHoX1nhK6Rsmd4yNPdsg.jpeg

MLP:

A multilayer perceptron (MLP) is a feedforward artificial neural network that generates a set of outputs from a set of inputs. An MLP is characterized by several layers of input nodes connected as a directed graph between the input and output layers. MLP uses backpropagation for training the network.

Multilayer Perceptron (MLP) the same thing as a Deep Neural Network(DNN)?

MLP is a subset of DNN. While DNN can have loops and MLP are always feed-forward, i.e. A Multilayer Perceptron is a finite acyclic graph.

Usages:

NNs have become the infrastructure of deep learning, voice recognition and almost every successful AI agent and model in the modern world.

Example:

We have summurized the importance of neural networks and their applications, we try to bring an example to shed some light on the subject and to make it more sensable!

Below we have the steps to learn the Fashion MNIST database with neural networks and a bunch of built-in libraries.

First we have to import needed libraries:

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

We import the Fashion MNIST dataset, a dataset which contains 70,000 grayscale images in 10 categories. The images show individual articles of clothing at low resolution (28 by 28 pixels).

Here, 60,000 images are used to train the network and 10,000 images to evaluate how accurately the network learned to classify images. We can access the Fashion MNIST directly from TensorFlow.

In [3]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
32768/29515 [=================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
26427392/26421880 [==============================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
8192/5148 [===============================================] - 0s 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
4423680/4422102 [==============================] - 0s 0us/step

The labels are an array of integers, ranging from 0 to 9. These correspond to the class of clothing the image represents:

Screenshot from 2021-06-12 17-38-45.png

In [4]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

If you inspect the first image in the training set, you will see that the pixel values fall in the range of 0 to 255, we scale these values to a range of 0 to 1 before feeding them to the neural network model. To do so, divide the values by 255.

In [6]:
train_images = train_images / 255.0
test_images = test_images / 255.0

In order to see if the scaling worked correctly, and to get a sense from the pictures obtain an output.

In [7]:
plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

Now we have to create our neural network. The basic building block of a neural network is the layer. Layers extract representations from the data fed into them. Hopefully, these representations are meaningful for the problem at hand.

Most of deep learning consists of chaining together simple layers. Most layers, such as tf.keras.layers.Dense, have parameters that are learned during training.

The first layer in this network, tf.keras.layers.Flatten, transforms the format of the images from a two-dimensional array (of 28 by 28 pixels) to a one-dimensional array (of 28 * 28 = 784 pixels). In anouther word this layer flattens the data, think of this layer as unstacking rows of pixels in the image and lining them up. This layer has no parameters to learn; it only reformats the data.

After the pixels are flattened, the network consists of a sequence of two tf.keras.layers.Dense layers. These are densely connected, or fully connected, neural layers. The first Dense layer has 128 nodes (or neurons). The second (and last) layer returns a logits array with length of 10. Each node contains a score that indicates the current image belongs to one of the 10 classes.

In [10]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10)
])
#compiling the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

Train the model

Training the neural network model requires the following steps:

  • Feed the training data to the model. In this example, the training data is in the train_images and train_labels arrays.
  • The model learns to associate images and labels.
  • You ask the model to make predictions about a test set—in this example, the test_images array.
  • Verify that the predictions match the labels from the test_labels array.
In [11]:
model.fit(train_images, train_labels, epochs=10)
Epoch 1/10
1875/1875 [==============================] - 5s 2ms/step - loss: 0.4943 - accuracy: 0.8267
Epoch 2/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.3721 - accuracy: 0.8671
Epoch 3/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.3369 - accuracy: 0.8756
Epoch 4/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.3126 - accuracy: 0.8849
Epoch 5/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.2944 - accuracy: 0.8913
Epoch 6/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.2788 - accuracy: 0.8969
Epoch 7/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.2670 - accuracy: 0.9014
Epoch 8/10
1875/1875 [==============================] - 4s 2ms/step - loss: 0.2563 - accuracy: 0.9044
Epoch 9/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.2457 - accuracy: 0.9075
Epoch 10/10
1875/1875 [==============================] - 3s 2ms/step - loss: 0.2366 - accuracy: 0.9110
Out[11]:
<tensorflow.python.keras.callbacks.History at 0x7f4a7bc54f90>

As the model trains, the loss and accuracy metrics are displayed. This model reaches an accuracy of about 0.91 (or 91%) on the training data.

Next, we have to test the trained model against the test data.

In [12]:
test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

print('\nTest accuracy:', test_acc)
313/313 - 0s - loss: 0.3595 - accuracy: 0.8737

Test accuracy: 0.8737000226974487

It turns out that the accuracy on the test dataset is a little less than the accuracy on the training dataset. This gap between training accuracy and test accuracy represents overfitting. Overfitting happens when a machine learning model performs worse on new, previously unseen inputs than it does on the training data. An overfitted model "memorizes" the noise and details in the training dataset to a point where it negatively impacts the performance of the model on the new data.

With the model trained, you can use it to make predictions about some images.

Let's plot several images with their predictions. Note that the model can be wrong even when very confident.

In [16]:
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Softmax()])
predictions = probability_model.predict(test_images)

def plot_image(i, predictions_array, true_label, img):
  true_label, img = true_label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
  true_label = true_label[i]
  plt.grid(False)
  plt.xticks(range(10))
  plt.yticks([])
  thisplot = plt.bar(range(10), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')
In [17]:
# Plot the first X test images, their predicted labels, and the true labels.
# Color correct predictions in blue and incorrect predictions in red.
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
  plt.subplot(num_rows, 2*num_cols, 2*i+1)
  plot_image(i, predictions[i], test_labels, test_images)
  plt.subplot(num_rows, 2*num_cols, 2*i+2)
  plot_value_array(i, predictions[i], test_labels)
plt.tight_layout()
plt.show()

We also can use the model to classify a single photo:

In [19]:
img = test_images[20]
img = (np.expand_dims(img,0))
predictions_single = probability_model.predict(img)
plot_value_array(1, predictions_single[0], test_labels)
_ = plt.xticks(range(10), class_names, rotation=45)
Authors
Mahdi Salmani
Author
Parsa Hosseini
Author
Alireza Dehghanpour Farashah
Author
Ali J. Alaee
Supervisor