# Recitation - 6
___
### Image Dataset
Custom Dataset & Dataloader\
Torchvision ImageFolder Dataset
### Model
Residual Block\
CNN Models with Residual Block
### Cosine Similarity
### Losses
Center Loss\
Triplet Loss




In [None]:
!unzip mnist

Archive: mnist.zip
replace mnist/testing/0/1487.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace mnist/testing/0/1768.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: no
replace mnist/testing/0/192.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
!nvidia-smi

Fri Oct 8 13:33:46 2021 
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74 Driver Version: 460.32.03 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
| 0 Tesla K80 Off | 00000000:00:04.0 Off | 0 |
| N/A 73C P0 74W / 149W | 1621MiB / 11441MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
 
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
| No running processes found |
+-----------------------------------------------------------------------------+


## Imports

In [None]:
import os
import numpy as np
from PIL import Image

import numpy as np
import torch
import torchvision 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

## Custom DataSet with DataLoader
___
We have used a subset of MNIST

In [None]:
class ImageDataset(Dataset):
 def __init__(self, file_list, target_list):
 self.file_list = file_list
 self.target_list = target_list
 self.n_class = len(list(set(target_list)))

 def __len__(self):
 return len(self.file_list)

 def __getitem__(self, index):
 img = Image.open(self.file_list[index])
 img = torchvision.transforms.ToTensor()(img)
 label = self.target_list[index]
 return img, label

#### Parse the given directory to accumulate all the images

In [None]:
def parse_data(datadir):
 img_list = []
 ID_list = []
 for root, directories, filenames in os.walk(datadir): #root: median/1
 for filename in filenames:
 if filename.endswith('.png'):
 filei = os.path.join(root, filename)
 img_list.append(filei)
 ID_list.append(root.split('/')[-1])

 # construct a dictionary, where key and value correspond to ID and target
 uniqueID_list = list(set(ID_list))
 class_n = len(uniqueID_list)
 target_dict = dict(zip(uniqueID_list, range(class_n)))
 label_list = [target_dict[ID_key] for ID_key in ID_list]

 print('{}\t\t{}\n{}\t\t{}'.format('#Images', '#Labels', len(img_list), len(set(label_list))))
 return img_list, label_list, class_n

In [None]:
img_list, label_list, class_n = parse_data('mnist/training')

#Images		#Labels
5000		10


In [None]:
print(img_list[1888])

mnist/training/7/11854.png


In [None]:
trainset = ImageDataset(img_list, label_list)

In [None]:
train_data_item, train_data_label = trainset.__getitem__(0)

In [None]:
print('data item shape: {}\t data item label: {}'.format(train_data_item.shape, train_data_label))

data item shape: torch.Size([1, 28, 28])	 data item label: 0


In [None]:
dataloader = DataLoader(trainset, batch_size=10, shuffle=True, num_workers=1, drop_last=False)

## Torchvision DataSet and DataLoader

In [None]:
imageFolder_dataset = torchvision.datasets.ImageFolder(root='mnist/training/', transform=torchvision.transforms.ToTensor())

In [None]:
imageFolder_dataloader = DataLoader(imageFolder_dataset, batch_size=10, shuffle=True, num_workers=1)

In [None]:
print(imageFolder_dataset.__len__(), len(imageFolder_dataset.classes))

5000 10


## Residual Block

Resnet: https://arxiv.org/pdf/1512.03385.pdf

Here is a basic usage of shortcut in Resnet

In [None]:
# This is the simplest possible residual block, with only one CNN layer.
# Looking at the paper, you can extend this block to have more layers, bottleneck, grouped convs (from shufflenet), etc.
# Or even look at more recent papers like resnext, regnet, resnest, senet, etc.
class SimpleResidualBlock(nn.Module):
 def __init__(self, channel_size, stride=1):
 super().__init__()
 self.conv1 = nn.Conv2d(channel_size, channel_size, kernel_size=3, stride=stride, padding=1, bias=False)
 self.bn1 = nn.BatchNorm2d(channel_size)
 if stride == 1:
 self.shortcut = nn.Identity()
 else:
 self.shortcut = nn.Conv2d(channel_size, channel_size, kernel_size=1, stride=stride)
 self.relu = nn.ReLU()
 
 def forward(self, x):
 out = self.conv1(x)
 out = self.bn1(out)
 
 shortcut = self.shortcut(x)
 
 out = self.relu(out + shortcut)
 
 return out

In [None]:
# This has hard-coded hidden feature sizes.
# You can extend this to take in a list of hidden sizes as argument if you want.
class ClassificationNetwork(nn.Module):
 def __init__(self, in_features, num_classes):
 super().__init__()
 
 self.layers = nn.Sequential(
 nn.Conv2d(in_features, 64, kernel_size=3, stride=1, padding=1, bias=False),
 nn.BatchNorm2d(64),
 nn.ReLU(),
 SimpleResidualBlock(64),
 SimpleResidualBlock(64),
 SimpleResidualBlock(64),
 SimpleResidualBlock(64),
 nn.AdaptiveAvgPool2d((1, 1)), # For each channel, collapses (averages) the entire feature map (height & width) to 1x1
 nn.Flatten(), # the above ends up with batch_size x 64 x 1 x 1, flatten to batch_size x 64
 )
 self.linear = nn.Linear(64, num_classes)
 
 def forward(self, x, return_embedding=False):
 embedding = self.layers(x) 
 
 if return_embedding:
 return embedding
 else:
 return self.linear(embedding) 

In [None]:
train_dataset = torchvision.datasets.ImageFolder(root='mnist/training/', 
 transform=torchvision.transforms.ToTensor())
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, 
 shuffle=True, num_workers=8)

dev_dataset = torchvision.datasets.ImageFolder(root='mnist/testing/', 
 transform=torchvision.transforms.ToTensor())
dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=128, 
 shuffle=False, num_workers=8)

 cpuset_checked))


In [None]:
numEpochs = 10
in_features = 3 # RGB channels

learningRate = 5e-2
weightDecay = 5e-5

num_classes = len(train_dataset.classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

network = ClassificationNetwork(in_features, num_classes)
network = network.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)

In [None]:
# Train!
for epoch in range(numEpochs):
 
 # Train
 network.train()
 avg_loss = 0.0
 for batch_num, (x, y) in enumerate(train_dataloader):
 optimizer.zero_grad()
 
 x, y = x.to(device), y.to(device)

 outputs = network(x)

 loss = criterion(outputs, y.long())
 loss.backward()
 optimizer.step()

 avg_loss += loss.item()

 if batch_num % 10 == 9:
 print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch, batch_num+1, avg_loss/50))
 avg_loss = 0.0
 
 # Validate
 network.eval()
 num_correct = 0
 for batch_num, (x, y) in enumerate(dev_dataloader):
 x, y = x.to(device), y.to(device)
 outputs = network(x)
 num_correct += (torch.argmax(outputs, axis=1) == y).sum().item()
 
 print('Epoch: {}, Validation Accuracy: {:.2f}'.format(epoch, num_correct / len(dev_dataset)))

 cpuset_checked))


Epoch: 0	Batch: 10	Avg-Loss: 0.4459
Epoch: 0	Batch: 20	Avg-Loss: 0.3750
Epoch: 0	Batch: 30	Avg-Loss: 0.3318
Epoch: 0	Batch: 40	Avg-Loss: 0.3124
Epoch: 0, Validation Accuracy: 0.15
Epoch: 1	Batch: 10	Avg-Loss: 0.2855
Epoch: 1	Batch: 20	Avg-Loss: 0.2545
Epoch: 1	Batch: 30	Avg-Loss: 0.2309
Epoch: 1	Batch: 40	Avg-Loss: 0.2029
Epoch: 1, Validation Accuracy: 0.16
Epoch: 2	Batch: 10	Avg-Loss: 0.2189
Epoch: 2	Batch: 20	Avg-Loss: 0.1736
Epoch: 2	Batch: 30	Avg-Loss: 0.1480
Epoch: 2	Batch: 40	Avg-Loss: 0.1320
Epoch: 2, Validation Accuracy: 0.33
Epoch: 3	Batch: 10	Avg-Loss: 0.1201
Epoch: 3	Batch: 20	Avg-Loss: 0.0961
Epoch: 3	Batch: 30	Avg-Loss: 0.0913
Epoch: 3	Batch: 40	Avg-Loss: 0.0877
Epoch: 3, Validation Accuracy: 0.58
Epoch: 4	Batch: 10	Avg-Loss: 0.0983
Epoch: 4	Batch: 20	Avg-Loss: 0.0801
Epoch: 4	Batch: 30	Avg-Loss: 0.0660
Epoch: 4	Batch: 40	Avg-Loss: 0.0541
Epoch: 4, Validation Accuracy: 0.73
Epoch: 5	Batch: 10	Avg-Loss: 0.0688
Epoch: 5	Batch: 20	Avg-Loss: 0.0593
Epoch: 5	Batch: 30	Avg-Loss:

## Computing Cosine Similarity between Feature Embeddings


In [None]:
# Let's try cosine similarity

compute_sim = nn.CosineSimilarity(dim=0)

img_a = dev_dataset[0][0] # this is class 0
img_b = dev_dataset[1][0] # this is also class 0
img_c = dev_dataset[51][0] # this is class 1
img_d = dev_dataset[451][0] # this is class 9

In [None]:
network.eval()
feats_a = network(img_a.cuda().unsqueeze(0), return_embedding=True).squeeze(0)
feats_b = network(img_b.cuda().unsqueeze(0), return_embedding=True).squeeze(0)
feats_c = network(img_c.cuda().unsqueeze(0), return_embedding=True).squeeze(0)
feats_d = network(img_d.cuda().unsqueeze(0), return_embedding=True).squeeze(0)

In [None]:
print("CS between two images of class 0: {:.4f}".format(compute_sim(feats_a, feats_b)))
print("CS between an image of class 0 and image of class 1: {:.4f}".format(compute_sim(feats_a, feats_c)))
print("CS between an image of class 0 and image of class 9: {:.4f}".format(compute_sim(feats_a, feats_d)))

CS between two images of class 0: 0.9911
CS between an image of class 0 and image of class 1: 0.8435
CS between an image of class 0 and image of class 9: 0.8955


## Center Loss
___
The following piece of code for Center Loss has been pulled and modified based on the code from the GitHub Repo: https://github.com/KaiyangZhou/pytorch-center-loss
 
Reference:
Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.

In [None]:
class CenterLoss(nn.Module):
 """
 Args:
 num_classes (int): number of classes.
 feat_dim (int): feature dimension.
 """
 def __init__(self, num_classes, feat_dim, device=torch.device('cpu')):
 super(CenterLoss, self).__init__()
 self.num_classes = num_classes
 self.feat_dim = feat_dim
 self.device = device
 
 self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).to(self.device))

 def forward(self, x, labels):
 """
 Args:
 x: feature matrix with shape (batch_size, feat_dim).
 labels: ground truth labels with shape (batch_size).
 """
 batch_size = x.size(0)
 distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
 torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
 distmat.addmm_(1, -2, x, self.centers.t())

 classes = torch.arange(self.num_classes).long().to(self.device)
 labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
 mask = labels.eq(classes.expand(batch_size, self.num_classes))

 dist = []
 for i in range(batch_size):
 value = distmat[i][mask[i]]
 value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
 dist.append(value)
 dist = torch.cat(dist)
 loss = dist.mean()

 return loss

In [None]:
class Network(nn.Module):
 def __init__(self, num_feats, hidden_sizes, num_classes, feat_dim=10):
 super(Network, self).__init__()
 
 self.hidden_sizes = [num_feats] + hidden_sizes + [num_classes]
 
 self.layers = []
 for idx, channel_size in enumerate(hidden_sizes):
 self.layers.append(nn.Conv2d(in_channels=self.hidden_sizes[idx], 
 out_channels=self.hidden_sizes[idx+1], 
 kernel_size=3, stride=2, bias=False))
 self.layers.append(nn.ReLU(inplace=True))
 self.layers.append(SimpleResidualBlock(channel_size = channel_size))
 
 self.layers = nn.Sequential(*self.layers)
 self.linear_label = nn.Linear(self.hidden_sizes[-2], self.hidden_sizes[-1], bias=False)
 
 # For creating the embedding to be passed into the Center Loss criterion
 self.linear_closs = nn.Linear(self.hidden_sizes[-2], feat_dim, bias=False)
 self.relu_closs = nn.ReLU(inplace=True)
 
 def forward(self, x, evalMode=False):
 output = x
 output = self.layers(output)
 
 output = F.avg_pool2d(output, [output.size(2), output.size(3)], stride=1)
 output = output.reshape(output.shape[0], output.shape[1])
 
 label_output = self.linear_label(output)
 label_output = label_output/torch.norm(self.linear_label.weight, dim=1)
 
 # Create the feature embedding for the Center Loss
 closs_output = self.linear_closs(output)
 closs_output = self.relu_closs(closs_output)

 return closs_output, label_output

def init_weights(m):
 if type(m) == nn.Conv2d or type(m) == nn.Linear:
 torch.nn.init.xavier_normal_(m.weight.data)

In [None]:
def train_closs(model, data_loader, test_loader, task='Classification'):
 model.train()

 for epoch in range(numEpochs):
 avg_loss = 0.0
 for batch_num, (feats, labels) in enumerate(data_loader):
 feats, labels = feats.to(device), labels.to(device)
 
 optimizer_label.zero_grad()
 optimizer_closs.zero_grad()
 
 feature, outputs = model(feats)

 l_loss = criterion_label(outputs, labels.long())
 c_loss = criterion_closs(feature, labels.long())
 loss = l_loss + closs_weight * c_loss
 
 loss.backward()
 
 optimizer_label.step()
 # by doing so, weight_cent would not impact on the learning of centers
 for param in criterion_closs.parameters():
 param.grad.data *= (1. / closs_weight)
 optimizer_closs.step()
 
 avg_loss += loss.item()

 if batch_num % 50 == 49:
 print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))
 avg_loss = 0.0 
 
 torch.cuda.empty_cache()
 del feats
 del labels
 del loss
 
 if task == 'Classification':
 val_loss, val_acc = test_classify_closs(model, test_loader)
 train_loss, train_acc = test_classify_closs(model, data_loader)
 print('Train Loss: {:.4f}\tTrain Accuracy: {:.4f}\tVal Loss: {:.4f}\tVal Accuracy: {:.4f}'.
 format(train_loss, train_acc, val_loss, val_acc))
 else:
 test_verify(model, test_loader)


def test_classify_closs(model, test_loader):
 model.eval()
 test_loss = []
 accuracy = 0
 total = 0

 for batch_num, (feats, labels) in enumerate(test_loader):
 feats, labels = feats.to(device), labels.to(device)
 feature, outputs = model(feats)
 
 _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
 pred_labels = pred_labels.view(-1)
 
 l_loss = criterion_label(outputs, labels.long())
 c_loss = criterion_closs(feature, labels.long())
 loss = l_loss + closs_weight * c_loss
 
 accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
 total += len(labels)
 test_loss.extend([loss.item()]*feats.size()[0])
 del feats
 del labels

 model.train()
 return np.mean(test_loss), accuracy/total

In [None]:
numEpochs = 10
num_feats = 3
closs_weight = 1
lr_cent = 0.5
feat_dim = 10

weightDecay = 5e-5

hidden_sizes = [32, 64]
num_classes = len(train_dataset.classes)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

network = Network(num_feats, hidden_sizes, num_classes, feat_dim)
network.apply(init_weights)

criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(num_classes, feat_dim, device)
optimizer_label = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)

In [None]:
network.train()
network.to(device)
train_closs(network, train_dataloader, dev_dataloader)

 cpuset_checked))
	addmm_(Number beta, Number alpha, Tensor mat1, Tensor mat2)
Consider using one of the following signatures instead:
	addmm_(Tensor mat1, Tensor mat2, *, Number beta, Number alpha) (Triggered internally at /pytorch/torch/csrc/utils/python_arg_parser.cpp:1025.)


Train Loss: 1.9780	Train Accuracy: 0.2150	Val Loss: 1.9809	Val Accuracy: 0.2080
Train Loss: 1.2386	Train Accuracy: 0.6250	Val Loss: 1.2105	Val Accuracy: 0.6480
Train Loss: 0.9858	Train Accuracy: 0.6466	Val Loss: 0.9397	Val Accuracy: 0.6620
Train Loss: 0.9313	Train Accuracy: 0.6600	Val Loss: 0.9176	Val Accuracy: 0.6620
Train Loss: 0.5583	Train Accuracy: 0.8600	Val Loss: 0.5493	Val Accuracy: 0.8600
Train Loss: 4.5172	Train Accuracy: 0.1360	Val Loss: 4.4614	Val Accuracy: 0.1320
Train Loss: 0.4353	Train Accuracy: 0.8812	Val Loss: 0.4230	Val Accuracy: 0.8980
Train Loss: 0.4435	Train Accuracy: 0.8686	Val Loss: 0.3987	Val Accuracy: 0.9060
Train Loss: 0.3900	Train Accuracy: 0.8876	Val Loss: 0.3889	Val Accuracy: 0.8920
Train Loss: 0.5629	Train Accuracy: 0.8208	Val Loss: 0.5867	Val Accuracy: 0.8140


## Triplet Loss
___
You can make a dataloader that returns a tuple of three images. Two being from the same class and one from a different class. You can then use triplet loss to seperate out the different class pair distance and decrease same class pair distance.

More on this link: https://github.com/adambielski/siamese-triplet/blob/master/losses.py

In [None]:
triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
face_img1, label_img1 = trainset.__getitem__(0)
face_img2, label_img2 = trainset.__getitem__(1)
face_img3, label_img3 = trainset.__getitem__(-1)

print(label_img1, label_img2, label_img3)
## face_img1 and face_img2 are from the same class and face_img3 is from a different class.
loss = triplet_loss(face_img1, face_img2, face_img3)
print ("Loss={:0.2f}".format(loss))

0 0 1
Loss=0.85
