Skip to content
Snippets Groups Projects
Commit e8881280 authored by Anichenko, Anastasia (UG - Comp Sci & Elec Eng)'s avatar Anichenko, Anastasia (UG - Comp Sci & Elec Eng)
Browse files

added missing baseline code

parent ac32e2bb
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:048616b7 tags:
``` python
#Code for running 3D CNN adapted from:https://github.com/latte488/smth-smth-v2
import os
import cv2
import sys
import importlib
import torch
import torchvision
import numpy as np
from torch import nn
import json
# imports for displaying a video an IPython cell
import io
import base64
from IPython.display import HTML
from data_parser import WebmDataset
from data_loader_av import VideoFolder
from models.multi_column import MultiColumn
from transforms_video import *
from utils import load_json_config, remove_module_from_checkpoint_state_dict
from pprint import pprint
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from tqdm import tqdm
from matplotlib import pyplot as plt
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic = True
print(f"Using {'GPU' if str(DEVICE) == 'cuda' else 'CPU'}.")
```
%% Output
Using GPU.
%% Cell type:code id:75e80d4f tags:
``` python
#helper functions
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def train(model, dataloader, optimizer, criterion, device):
model.train()
running_loss = 0.0
running_correct_preds = 0
count = 0
for i, (input, target) in tqdm(enumerate(dataloader), total=len(dataloader)):
count += 1
optimizer.zero_grad()
if config['nclips_train'] > 1:
input_var = list(input.split(config['clip_size'], 2))
for idx, inp in enumerate(input_var):
input_var[idx] = inp.to(device)
else:
input_var = [input.to(device)]
target = target.to(device)
model.zero_grad()
# compute output and loss
output = model(input_var)
loss = criterion(output, target)
running_loss += loss.item()
# compute accuracy
_, preds = torch.max(output.data, 1)
running_correct_preds += (preds == target).sum().item()
# backward pass
loss.backward()
optimizer.step()
# calculate loss and accuracy
epoch_loss = running_loss / count
epoch_acc = 100. * (running_correct_preds/ len(dataloader.dataset))
return epoch_loss, epoch_acc
def eval_model(model, dataloader, device):
y_pred = []
y_true = []
running_acc = 0
count = 0
with torch.no_grad():
for i, (input, target) in tqdm(enumerate(dataloader), total=len(dataloader)):
count += 1
if config['nclips_train'] > 1:
input_var = list(input.split(config['clip_size'], 2))
for idx, inp in enumerate(input_var):
input_var[idx] = inp.to(device)
else:
input_var = [input.to(device)]
target = target.to(device)
output = model(input_var)
_, preds = torch.max(output, 1)
count += target.size(0)
running_acc += (preds == target).sum().item()
y_pred.extend(preds.to('cpu').tolist())
y_true.extend(target.to('cpu').tolist())
acc = (100 * running_acc / count)
# classification report
print(classification_report(y_true, y_pred, target_names=['106', '112', '118'], zero_division=0))
# confusion matrix
cm = confusion_matrix(y_true, y_pred, labels=[0,1,2], normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(include_values=False)
disp.ax_.get_images()[0].set_clim(0, 1.0) # set scale so it does not vary
plt.show()
```
%% Cell type:markdown id:3b79d192 tags:
Create annotation files for subset of 3 classes that model will be fine-tuned on
%% Cell type:code id:9b4cdee0 tags:
``` python
label_train_path = '/vol/research/TopDownVideo/labels/something-something-v2-train.json'
label_val_path = '/vol/research/TopDownVideo/labels/something-something-v2-validation.json'
action_list = [
'Putting [something] into [something]',
'Putting [something] onto [something]',
'Putting [something] underneath [something]'
]
with open(label_train_path) as json_file:
train_json = json.load(json_file)
with open(label_val_path) as json_file:
val_json = json.load(json_file)
train_json_updated = []
for d in train_json:
if d['template'] in action_list:
train_json_updated.append(d)
print("Length of train set:" + str(len(train_json_updated)))
val_json_updated = []
for d in val_json:
if d['template'] in action_list:
val_json_updated.append(d)
print("Length of validation set:" + str(len(val_json_updated)))
label_train_target = '/vol/research/TopDownVideo/aa03813/LabelsForBaseline/something-something-v2-train3.json'
label_val_target = '/vol/research/TopDownVideo/aa03813/LabelsForBaseline/something-something-v2-val3.json'
with open(label_train_target, "w") as write_file:
json.dump(train_json_updated, write_file, indent=1)
with open(label_val_target, "w") as write_file:
json.dump(val_json_updated, write_file, indent=1)
```
%% Cell type:code id:07b888a1 tags:
``` python
# load config
config = load_json_config('./configs/pretrained/config_model1_for_finetuning.json')
#setup model from checkpoint
column_cnn_def = importlib.import_module("{}".format(config['conv_model']))
model_name = config["model_name"]
print("=> Name of the model -- {}".format(model_name))
# checkpoint path to a trained model
checkpoint_path = os.path.join("../", config["output_dir"], config["model_name"], "model_best.pth.tar")
print("=> Checkpoint path --> {}".format(checkpoint_path))
```
%% Output
=> Name of the model -- model3D_1
=> Checkpoint path --> ../trained_models/pretrained/model3D_1/model_best.pth.tar
%% Cell type:code id:dd213cfb tags:
``` python
#initialize and freeze model
model = MultiColumn(config['num_classes'], column_cnn_def.Model, int(config["column_units"]))
print("Num of trainable parameters before freezing: " + str(count_parameters(model)))
for param in model.parameters():
param.requires_grad = False
#replace last layer so it's output is only 3
model.clf_layers = nn.Linear(512, 3)
print("Num of trainable parameters after freezing: " + str(count_parameters(model)))
model.to(DEVICE)
```
%% Output
Num of trainable parameters before freezing: 23384430
Num of trainable parameters after freezing: 1539
MultiColumn(
(conv_column): Model(
(block1): Sequential(
(0): Conv3d(3, 32, kernel_size=(3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
(1): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Dropout3d(p=0.2, inplace=False)
)
(block2): Sequential(
(0): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(1): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
(4): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Dropout3d(p=0.2, inplace=False)
)
(block3): Sequential(
(0): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(1): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(4): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
(7): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): Dropout3d(p=0.2, inplace=False)
)
(block4): Sequential(
(0): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(1): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(4): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
(7): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): ReLU(inplace=True)
(9): Dropout3d(p=0.2, inplace=False)
)
(block5): Sequential(
(0): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
(1): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 2, 2), padding=(1, 1, 1))
(4): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
)
)
(clf_layers): Linear(in_features=512, out_features=3, bias=True)
)
%% Cell type:code id:9fae288b tags:
``` python
# define augmentation pipeline
upscale_size_train = int(config['input_spatial_size'] * config["upscale_factor_train"])
upscale_size_eval = int(config['input_spatial_size'] * config["upscale_factor_eval"])
# Random crop videos during training
transform_train_pre = ComposeMix([
[RandomRotationVideo(15), "vid"],
[Scale(upscale_size_train), "img"],
[RandomCropVideo(config['input_spatial_size']), "vid"],
])
# Center crop videos during evaluation
transform_eval_pre = ComposeMix([
[Scale(upscale_size_eval), "img"],
[torchvision.transforms.ToPILImage(), "img"],
[torchvision.transforms.CenterCrop(config['input_spatial_size']), "img"],
])
# Transforms common to train and eval sets and applied after "pre" transforms
transform_post = ComposeMix([
[torchvision.transforms.ToTensor(), "img"],
[torchvision.transforms.Normalize(
mean=[0.485, 0.456, 0.406], # default values for imagenet
std=[0.229, 0.224, 0.225]), "img"]
])
```
%% Cell type:code id:3b5c018d tags:
``` python
train_data = VideoFolder(root=config['data_folder'],
json_file_input=config['json_data_train'],
json_file_labels=config['json_file_labels'],
clip_size=config['clip_size'],
nclips=config['nclips_train'],
step_size=config['step_size_train'],
is_val=False,
transform_pre=transform_train_pre,
transform_post=transform_post,
augmentation_mappings_json=config['augmentation_mappings_json'],
augmentation_types_todo=config['augmentation_types_todo'],
get_item_id=False,
)
train_loader = torch.utils.data.DataLoader(
train_data,
batch_size=config['batch_size'], shuffle=True,
num_workers=config['num_workers'], pin_memory=True,
drop_last=True)
val_data = VideoFolder(root=config['data_folder'],
json_file_input=config['json_data_val'],
json_file_labels=config['json_file_labels'],
clip_size=config['clip_size'],
nclips=config['nclips_val'],
step_size=config['step_size_val'],
is_val=True,
transform_pre=transform_eval_pre,
transform_post=transform_post,
get_item_id=False,
)
val_loader = torch.utils.data.DataLoader(
val_data,
batch_size=config['batch_size'], shuffle=False,
num_workers=config['num_workers'], pin_memory=True,
drop_last=False)
```
%% Cell type:code id:fafe4c40 tags:
``` python
LR = 5e-2
OPTIMIZER = torch.optim.SGD(model.parameters(), LR)
CRITERION = nn.CrossEntropyLoss().to(DEVICE)
```
%% Cell type:code id:bdbab144 tags:
``` python
loss, acc = train(model, train_loader, OPTIMIZER, CRITERION, DEVICE)
print("Training loss: " + str(loss))
print("Training accuracy:" + str(acc))
```
%% Output
100%|█████████████████████████████████████████| 146/146 [04:35<00:00, 1.89s/it]
Training loss: 1.7852653726323011
Training accuracy:39.977298524404084
%% Cell type:code id:2dd0a6ae tags:
``` python
torch.save(model.state_dict(), '/vol/research/TopDownVideo/aa03813/LabelsForBaseline/model.pkl')
model.load_state_dict(torch.load('/vol/research/TopDownVideo/aa03813/LabelsForBaseline/model.pkl'))
```
%% Output
<All keys matched successfully>
%% Cell type:code id:b5990018 tags:
``` python
eval_model(model, val_loader, DEVICE)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment