Skip to content
Snippets Groups Projects
Commit 961470ab authored by Patel, Shrey G (UG - Comp Sci & Elec Eng)'s avatar Patel, Shrey G (UG - Comp Sci & Elec Eng)
Browse files

Added comments to midterm codebase

Added comments in PEP8 style to midterm submission codebase.

Renamed and backed up old script contents before changes.
parent eb53f213
No related branches found
No related tags found
No related merge requests found
import cv2
import numpy as np
import os
import shutil
from python_speech_features import mfcc
import scipy.io.wavfile as wav
DATASET_SIZE = 20
#DATASET_SIZE = 2
def frame_processing(img, face_cascade, eye_cascade):
"""Process individual frames via face descriptor: Rotate, crop and scale
Keyword arguments:
img -- the current frame read from the video file
face_cascade -- pretrained haar cascade classifier model for detecting faces
eye_cascade -- pretrained haar cascade classifier model for detecting eyes
"""
"""Downscale"""
dimensions = img.shape
# print(dimensions)
y = dimensions[0]
x = dimensions[1]
#print(y, x)
# Assumes the image is never taller than it is wide
if (y > 1920):
pixel_y_scale = 1080 / y
# Downscale frame to 1920x1080 while maintaining vertical FOV
img = cv2.resize(img, None, fx=pixel_y_scale,
fy=pixel_y_scale, interpolation=cv2.INTER_AREA)
"""Rotate"""
# Convert the image into grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Detect faces using face cascade and stores into faces variable
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# Obtain the coords of the rectangular crop region around the face
for (x, y, w, h) in faces:
#print("faces: x =", x, "y =", y, "w =", w, "h =", h)
pass
# Create a square region of interest using the crop region coords
#print("ROI: x =", x, "y =", y, "w =", w, "h =", h)
roi_gray = gray[y:(y+h), x:(x+w)]
#roi_color = img[y:(y+h), x:(x+w)]
# Detect eyes using eye cascade and store into eyes variable
eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 4)
index = 0
# Separate the two eyes into different variables
for (ex, ey, ew, eh) in eyes:
if index == 0:
eye_1 = (ex, ey, ew, eh)
elif index == 1:
eye_2 = (ex, ey, ew, eh)
index = index + 1
# Assumes that left eye is smaller! - Does not appear to work or change anything if left or right leading...
if eye_1[0] < eye_2[0]:
left_eye = eye_1
right_eye = eye_2
else:
left_eye = eye_2
right_eye = eye_1
# Calculate the coords of the central points of the rectangles
left_eye_center = (int(left_eye[0] + (left_eye[2] / 2)),
int(left_eye[1] + (left_eye[3] / 2)))
left_eye_x = left_eye_center[0]
left_eye_y = left_eye_center[1]
right_eye_center = (
int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
right_eye_x = right_eye_center[0]
right_eye_y = right_eye_center[1]
if left_eye_y > right_eye_y:
A = (right_eye_x, left_eye_y)
# Integer -1 indicates that the image will rotate in the clockwise direction
direction = -1
else:
A = (left_eye_x, right_eye_y)
# Integer 1 indicates that image will rotate in the counter clockwise direction
direction = 1
delta_x = right_eye_x - left_eye_x
delta_y = right_eye_y - left_eye_y
angle = np.arctan(delta_y/delta_x)
angle = (angle * 180) / np.pi
# Width and height of the image
h, w = img.shape[:2]
#print("img: h = ", h, "w = ", w)
# Calculate centre point of the image
# Integer division "//"" ensures that we receive whole numbers
center = (w // 2, h // 2)
# Defining a matrix M and calling
# cv2.getRotationMatrix2D method
M = cv2.getRotationMatrix2D(center, (angle), 1.0)
# Applying the rotation to our image using the
# cv2.warpAffine method
rotated = cv2.warpAffine(img, M, (w, h))
"""Crop"""
# Convert the image into grayscale
crop_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Detect faces using face cascade and stores into faces variable
crop_faces = face_cascade.detectMultiScale(crop_gray, 1.1, 4)
# Crop around the face using coords of the rectangular crop region
for (x, y, w, h) in crop_faces:
#print("crop_faces: x =", x, "y =", y, "w =", w, "h =", h)
# Crops around face
cropped = rotated[y:(y+h), x:(x+w)]
"""Scale"""
# Recalculate width and height of the image
h, w = cropped.shape[:2]
#print("cropped: h = ", h, "w = ", w)
if (w != h):
raise Exception("Error! Crop region is not square!")
# Note that by default python uses double precision floats (i.e. 64 bit representation)
# Numpy specific floats will still be limited to 64 bits, due to the C compiler... check logbook for more information
scaledown_ratio = 256 / h
#print("scaldown_ratio =", scaledown_ratio)
#print(np.finfo(np.longdouble))
# Scale frame up or down to a 256x256 representation
resized = cv2.resize(cropped, None, fx=scaledown_ratio,
fy=scaledown_ratio, interpolation=cv2.INTER_AREA)
return resized
def audio_processing(clip_num, frame_rate, frame_count):
"""Process audio clips via MFCC descriptor.
Keyword arguments:
clip_num -- the number of the current video file
frame_rate -- the frame rate of the current video file
frame_count -- the total number of frames in the current video file
"""
print("frame_rate =", frame_rate)
print("frame_count =", frame_count)
print("estimated video length = " + str(frame_count/frame_rate) + "s")
(rate, sig) = wav.read("vids/dataset/audio" + str(clip_num) + ".wav")
#frame_rate = 30
#frame_rate = 29.95904248021286
frame_time = 1 / frame_rate
print("frame_time = " + str(frame_time) + "s")
# Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
# Parameters: numcep – the number of cepstrum to return, default 13
# For a very basic understanding, cepstrum is the information of rate of change in spectral bands
# mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=<function <lambda>>)
mfcc_feat = mfcc(sig, rate, 0.025, frame_time, 13, 26, 1200)
#numpy.save(file, arr, allow_pickle=True, fix_imports=True)
# Save an array to a binary file in NumPy .npy format.
# Note: Any data saved to the file is appended to the end of the file.
return mfcc_feat
def data_processing(vid_num, face_cascade, eye_cascade):
"""Process the dataset of video and audio files for neural network training.
Keyword arguments:
vid_num -- the number of the current video file
face_cascade -- pretrained haar cascade classifier model for detecting faces
eye_cascade -- pretrained haar cascade classifier model for detecting eyes
"""
# Create a VideoCapture object and read from input file
# If the input is the camera, pass 0 instead of the video file name
#print(vid_num)
print('\nvids/dataset/video' + str(vid_num) + '.mp4')
cap = cv2.VideoCapture('vids/dataset/video' + str(vid_num) + '.mp4')
#cap.set(cv2.CAP_PROP_FRAME_HEIGHT)
vid_fps = cap.get(cv2.CAP_PROP_FPS)
vid_total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
print("result of cap.get(FPS) is: ", vid_fps)
print("result of cap.get(FRAME_COUNT) is: ", vid_total_frames)
# Check if camera opened successfully
if (cap.isOpened() == False):
raise Exception("Error opening video stream or file")
new_frame_path = 'data/clip' + str(vid_num) + '/images/'
new_audio_path = 'data/clip' + str(vid_num) + '/audio/'
# Creates data folder and other subdirectories, throws error if it already exists
os.makedirs(new_frame_path, exist_ok=False)
os.makedirs(new_audio_path, exist_ok=True)
# Read until video is completed
i = 0
while (cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
print("vid", vid_num, "frame", i)
processed_frame = frame_processing(
frame, face_cascade, eye_cascade)
cv2.imwrite(
(new_frame_path + 'image' + str(i).zfill(4) + '.jpg'), processed_frame)
i += 1
# Break the loop
else:
break
# When everything done, release the video capture object
cap.release()
# Calculate MFCC of the audio recording
mfcc_array = audio_processing(vid_num, vid_fps, vid_total_frames)
# Separate 2D MFCC array into frame-specific values (each line or row of the 2D MFCC array) and store as individual files
row_num = 0
for row in mfcc_array:
#print(row)
#np.savetxt("data/clip" + str(vid_num) + "/audio/" + str(row_num).zfill(6) + ".txt", mfcc_feat[row_num, :])
np.save("data/clip" + str(vid_num) + "/audio/" +
str(row_num).zfill(6), mfcc_array[row_num, :])
row_num += 1
print("all mfcc rows saved!")
return
# Create face_cascade and eye_cascade objects
print("about to create cascades")
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier("haarcascade_eye.xml")
print("created cascades")
# Deletes any previous data folder and old frames
print("about to delete data folder")
try:
shutil.rmtree('data/')
print("data folder deleted")
except FileNotFoundError:
print("data folder did not exist")
except PermissionError:
print("Error! Cannot delete data folder, an internal file is currently open... Close all open file(s) and try again!")
exit(0)
# Executes data processing code from functions above
for i in range(DATASET_SIZE):
data_processing(i+1, face_cascade, eye_cascade)
print("complete!")
File moved
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment