Skip to content
Snippets Groups Projects
Commit 8818db1c authored by Patel, Shrey G (UG - Comp Sci & Elec Eng)'s avatar Patel, Shrey G (UG - Comp Sci & Elec Eng)
Browse files

Added MFCC code

Added MFCC code to codebase as audio_processing() function

Improved debug print() output

Removed vid_num argument from frame_processing() function
parent 9971f0f1
No related branches found
No related tags found
No related merge requests found
import cv2
import numpy as np
import os
import shutil
from python_speech_features import mfcc
import scipy.io.wavfile as wav
#DATATASET_SIZE = 20
DATASET_SIZE = 2
def frame_processing(img, face_cascade, eye_cascade):
dimensions = img.shape
#print(dimensions)
y = dimensions[0]
x = dimensions[1]
#print(y, x)
# This assumes that the image never taller than it is wide
if (y > 1920):
pixel_y_scale = 1080 / y
# scaling to maintain vertical FOV
img = cv2.resize(img, None, fx=pixel_y_scale,
fy=pixel_y_scale, interpolation=cv2.INTER_AREA)
#winname = 'image' + str(img_num).zfill(4) + '.jpg'
# Converting the image into grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Creating variable faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
# Defining and drawing the rectangle around the face
for (x, y, w, h) in faces:
#print("faces: x =", x, "y =", y, "w =", w, "h =", h)
#cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3)
pass
# Creating two regions of interest
#print("ROI: x =", x, "y =", y, "w =", w, "h =", h)
roi_gray = gray[y:(y+h), x:(x+w)]
roi_color = img[y:(y+h), x:(x+w)]
# Creating variable eyes
eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 4)
index = 0
# Creating for loop in order to divide one eye from another
for (ex, ey, ew, eh) in eyes:
if index == 0:
eye_1 = (ex, ey, ew, eh)
elif index == 1:
eye_2 = (ex, ey, ew, eh)
# Drawing rectangles around the eyes
#cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 0, 255), 3)
index = index + 1
#cv2.imshow(winname, img)
# Assumes that left eye is smaller!!! - need to relook at this later
if eye_1[0] < eye_2[0]:
left_eye = eye_1
right_eye = eye_2
else:
left_eye = eye_2
right_eye = eye_1
# Calculating coordinates of a central points of the rectangles
left_eye_center = (int(left_eye[0] + (left_eye[2] / 2)),
int(left_eye[1] + (left_eye[3] / 2)))
left_eye_x = left_eye_center[0]
left_eye_y = left_eye_center[1]
right_eye_center = (
int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
right_eye_x = right_eye_center[0]
right_eye_y = right_eye_center[1]
"""cv2.circle(roi_color, left_eye_center, 5, (255, 0, 0), -1)
cv2.circle(roi_color, right_eye_center, 5, (255, 0, 0), -1)
cv2.line(roi_color, right_eye_center, left_eye_center, (0, 200, 200), 3)"""
if left_eye_y > right_eye_y:
A = (right_eye_x, left_eye_y)
# Integer -1 indicates that the image will rotate in the clockwise direction
direction = -1
else:
A = (left_eye_x, right_eye_y)
# Integer 1 indicates that image will rotate in the counter clockwise
# direction
direction = 1
"""cv2.circle(roi_color, A, 5, (255, 0, 0), -1)
cv2.line(roi_color, right_eye_center, left_eye_center, (0, 200, 200), 3)
cv2.line(roi_color, left_eye_center, A, (0, 200, 200), 3)
cv2.line(roi_color, right_eye_center, A, (0, 200, 200), 3)
#cv2.imshow(winname, img)"""
delta_x = right_eye_x - left_eye_x
delta_y = right_eye_y - left_eye_y
angle = np.arctan(delta_y/delta_x)
angle = (angle * 180) / np.pi
# Width and height of the image
h, w = img.shape[:2]
#print("img: h = ", h, "w = ", w)
# Calculating a center point of the image
# Integer division "//"" ensures that we receive whole numbers
center = (w // 2, h // 2)
# Defining a matrix M and calling
# cv2.getRotationMatrix2D method
M = cv2.getRotationMatrix2D(center, (angle), 1.0)
# Applying the rotation to our image using the
# cv2.warpAffine method
rotated = cv2.warpAffine(img, M, (w, h))
# Converting the image into grayscale
crop_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Creating variable faces
crop_faces = face_cascade.detectMultiScale(crop_gray, 1.1, 4)
# Defining and drawing the rectangle around the face
for (x, y, w, h) in crop_faces:
#print("crop_faces: x =", x, "y =", y, "w =", w, "h =", h)
# Crops around face
cropped = rotated[y:(y+h), x:(x+w)]
#cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 3)
# Recalculate width and height of the image
h, w = cropped.shape[:2]
#print("cropped: h = ", h, "w = ", w)
if (w != h):
raise Exception("Error! Crop region is not square!")
# note that by default python uses double precision floats (i.e. 64 bit representation)
# numpy specific floats will still be limited to 64 bits, due to the C compiler... check logbook for more information
scaledown_ratio = 256 / h
#print("scaldown_ratio =", scaledown_ratio)
# print(np.finfo(np.longdouble))
#resized = cv2.resize(img, (256, 256), interpolation = cv2.INTER_AREA)
resized = cv2.resize(cropped, None, fx=scaledown_ratio,
fy=scaledown_ratio, interpolation=cv2.INTER_AREA)
# cv2.destroyAllWindows()
return resized
def audio_processing(clip_num):
(rate, sig) = wav.read("vids/dataset/audio" + str(clip_num) + ".wav")
frame_time = 1 / 30
print("frame_time = " + str(frame_time) + "s")
#Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
#Parameters: numcep – the number of cepstrum to return, default 13
#For a very basic understanding, cepstrum is the information of rate of change in spectral bands
#mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=<function <lambda>>)
mfcc_feat = mfcc(sig, rate, 0.025, frame_time, 13, 26, 1200)
#numpy.save(file, arr, allow_pickle=True, fix_imports=True)
#Save an array to a binary file in NumPy .npy format.
#Notes: Any data saved to the file is appended to the end of the file.
return mfcc_feat
def data_processing(vid_num, face_cascade, eye_cascade):
# Create a VideoCapture object and read from input file
# If the input is the camera, pass 0 instead of the video file name
#cap = cv2.VideoCapture('D:vids/dataset/video1.mp4')
#print(vid_num)
print('vids/dataset/video' + str(vid_num) + '.mp4')
cap = cv2.VideoCapture('vids/dataset/video' + str(vid_num) + '.mp4')
# Check if camera opened successfully
if (cap.isOpened() == False):
raise Exception("Error opening video stream or file")
new_path = 'data/clip' + str(vid_num) + '/images/'
# Creates data folder and other subdirectories, throws error if it already exists
os.makedirs(new_path, exist_ok=False)
# Read until video is completed
i = 0
while (cap.isOpened()):
# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
print("vid", vid_num, "frame", i)
processed_frame = frame_processing(
frame, face_cascade, eye_cascade)
cv2.imwrite(
(new_path + 'image' + str(i).zfill(4) + '.jpg'), processed_frame)
i += 1
# Break the loop
else:
break
# When everything done, release the video capture object
cap.release()
mfcc_array = audio_processing(vid_num)
np.save("data/clip" + str(vid_num) + "/mfcc", mfcc_array)
print("mfcc for audio" + str(vid_num) + " saved!")
return
print("about to create cascades")
# Creating face_cascade and eye_cascade objects
face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier("haarcascade_eye.xml")
print("created cascades")
print("about to delete data folder")
# NEED TO ADD ERROR CHECKING / EXCEPTION HANDLING HERE!!!
# Deletes any previous data folder and old frames
try:
shutil.rmtree('data/')
print("data folder deleted")
except FileNotFoundError:
print("data folder did not exist")
for i in range(DATASET_SIZE):
data_processing(i+1, face_cascade, eye_cascade)
print("Complete!")
......@@ -4,7 +4,7 @@ import scipy.io.wavfile as wav
import numpy as np
frame_rate = 30
clip_num = 2
clip_num = 1
(rate, sig) = wav.read("vids/dataset/audio" + str(clip_num) + ".wav")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment