Added comments to midterm codebase

Added comments in PEP8 style to midterm submission codebase. Renamed and backed up old script contents before changes.

Added comments to midterm codebase
Added comments in PEP8 style to midterm submission codebase. Renamed and backed up old script contents before changes.
961470ab · Patel, Shrey G (UG - Comp Sci & Elec Eng) · eb53f213 · 961470ab · 961470ab
Commit 961470ab authored 2 years ago by Patel, Shrey G (UG - Comp Sci & Elec Eng)
--- a/midterm.py
+++ b/midterm.py
+import cv2
+import numpy as np
+import os
+import shutil
+from python_speech_features import mfcc
+import scipy.io.wavfile as wav
+
+DATASET_SIZE = 20
+#DATASET_SIZE = 2
+
+
+def frame_processing(img, face_cascade, eye_cascade):
+    """Process individual frames via face descriptor: Rotate, crop and scale
+
+    Keyword arguments:
+    img -- the current frame read from the video file
+    face_cascade -- pretrained haar cascade classifier model for detecting faces
+    eye_cascade -- pretrained haar cascade classifier model for detecting eyes
+    """
+
+    """Downscale"""
+
+    dimensions = img.shape
+    # print(dimensions)
+    y = dimensions[0]
+    x = dimensions[1]
+    #print(y, x)
+    # Assumes the image is never taller than it is wide
+    if (y > 1920):
+        pixel_y_scale = 1080 / y
+        # Downscale frame to 1920x1080 while maintaining vertical FOV
+        img = cv2.resize(img, None, fx=pixel_y_scale,
+                         fy=pixel_y_scale, interpolation=cv2.INTER_AREA)
+
+    """Rotate"""
+
+    # Convert the image into grayscale
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Detect faces using face cascade and stores into faces variable
+    faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+    # Obtain the coords of the rectangular crop region around the face
+    for (x, y,  w,  h) in faces:
+        #print("faces: x =", x, "y =", y, "w =", w, "h =", h)
+        pass
+
+    # Create a square region of interest using the crop region coords
+    #print("ROI: x =", x, "y =", y, "w =", w, "h =", h)
+    roi_gray = gray[y:(y+h), x:(x+w)]
+    #roi_color = img[y:(y+h), x:(x+w)]
+
+    # Detect eyes using eye cascade and store into eyes variable
+    eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 4)
+    index = 0
+    # Separate the two eyes into different variables
+    for (ex, ey,  ew,  eh) in eyes:
+        if index == 0:
+            eye_1 = (ex, ey, ew, eh)
+        elif index == 1:
+            eye_2 = (ex, ey, ew, eh)
+        index = index + 1
+
+    # Assumes that left eye is smaller! - Does not appear to work or change anything if left or right leading...
+    if eye_1[0] < eye_2[0]:
+        left_eye = eye_1
+        right_eye = eye_2
+    else:
+        left_eye = eye_2
+        right_eye = eye_1
+
+    # Calculate the coords of the central points of the rectangles
+    left_eye_center = (int(left_eye[0] + (left_eye[2] / 2)),
+                       int(left_eye[1] + (left_eye[3] / 2)))
+    left_eye_x = left_eye_center[0]
+    left_eye_y = left_eye_center[1]
+
+    right_eye_center = (
+        int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
+    right_eye_x = right_eye_center[0]
+    right_eye_y = right_eye_center[1]
+
+    if left_eye_y > right_eye_y:
+        A = (right_eye_x, left_eye_y)
+        # Integer -1 indicates that the image will rotate in the clockwise direction
+        direction = -1
+    else:
+        A = (left_eye_x, right_eye_y)
+        # Integer 1 indicates that image will rotate in the counter clockwise direction
+        direction = 1
+
+    delta_x = right_eye_x - left_eye_x
+    delta_y = right_eye_y - left_eye_y
+    angle = np.arctan(delta_y/delta_x)
+    angle = (angle * 180) / np.pi
+
+    # Width and height of the image
+    h, w = img.shape[:2]
+    #print("img: h = ", h, "w = ", w)
+    # Calculate centre point of the image
+    # Integer division "//"" ensures that we receive whole numbers
+    center = (w // 2, h // 2)
+    # Defining a matrix M and calling
+    # cv2.getRotationMatrix2D method
+    M = cv2.getRotationMatrix2D(center, (angle), 1.0)
+    # Applying the rotation to our image using the
+    # cv2.warpAffine method
+    rotated = cv2.warpAffine(img, M, (w, h))
+
+    """Crop"""
+
+    # Convert the image into grayscale
+    crop_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Detect faces using face cascade and stores into faces variable
+    crop_faces = face_cascade.detectMultiScale(crop_gray, 1.1, 4)
+    # Crop around the face using coords of the rectangular crop region
+    for (x, y,  w,  h) in crop_faces:
+        #print("crop_faces: x =", x, "y =", y, "w =", w, "h =", h)
+        # Crops around face
+        cropped = rotated[y:(y+h), x:(x+w)]
+
+    """Scale"""
+
+    # Recalculate width and height of the image
+    h, w = cropped.shape[:2]
+    #print("cropped: h = ", h, "w = ", w)
+    if (w != h):
+        raise Exception("Error! Crop region is not square!")
+    # Note that by default python uses double precision floats (i.e. 64 bit representation)
+    # Numpy specific floats will still be limited to 64 bits, due to the C compiler... check logbook for more information
+    scaledown_ratio = 256 / h
+    #print("scaldown_ratio =", scaledown_ratio)
+    #print(np.finfo(np.longdouble))
+    # Scale frame up or down to a 256x256 representation
+    resized = cv2.resize(cropped, None, fx=scaledown_ratio,
+                         fy=scaledown_ratio, interpolation=cv2.INTER_AREA)
+
+    return resized
+
+
+def audio_processing(clip_num, frame_rate, frame_count):
+    """Process audio clips via MFCC descriptor.
+
+    Keyword arguments:
+    clip_num -- the number of the current video file
+    frame_rate -- the frame rate of the current video file
+    frame_count -- the total number of frames in the current video file
+    """
+    print("frame_rate =", frame_rate)
+    print("frame_count =", frame_count)
+    print("estimated video length = " + str(frame_count/frame_rate) + "s")
+
+    (rate, sig) = wav.read("vids/dataset/audio" + str(clip_num) + ".wav")
+
+    #frame_rate = 30
+    #frame_rate = 29.95904248021286
+    frame_time = 1 / frame_rate
+    print("frame_time = " + str(frame_time) + "s")
+
+    # Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
+    # Parameters: numcep – the number of cepstrum to return, default 13
+    # For a very basic understanding, cepstrum is the information of rate of change in spectral bands
+    # mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=<function <lambda>>)
+    mfcc_feat = mfcc(sig, rate, 0.025, frame_time, 13, 26, 1200)
+
+    #numpy.save(file, arr, allow_pickle=True, fix_imports=True)
+    # Save an array to a binary file in NumPy .npy format.
+    # Note: Any data saved to the file is appended to the end of the file.
+
+    return mfcc_feat
+
+
+def data_processing(vid_num, face_cascade, eye_cascade):
+    """Process the dataset of video and audio files for neural network training.
+
+    Keyword arguments:
+    vid_num -- the number of the current video file
+    face_cascade -- pretrained haar cascade classifier model for detecting faces
+    eye_cascade -- pretrained haar cascade classifier model for detecting eyes
+    """
+    # Create a VideoCapture object and read from input file
+    # If the input is the camera, pass 0 instead of the video file name
+    #print(vid_num)
+    print('\nvids/dataset/video' + str(vid_num) + '.mp4')
+    cap = cv2.VideoCapture('vids/dataset/video' + str(vid_num) + '.mp4')
+    
+    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT)
+    vid_fps = cap.get(cv2.CAP_PROP_FPS)
+    vid_total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
+    print("result of cap.get(FPS) is: ", vid_fps)
+    print("result of cap.get(FRAME_COUNT) is: ", vid_total_frames)
+
+    # Check if camera opened successfully
+    if (cap.isOpened() == False):
+        raise Exception("Error opening video stream or file")
+
+    new_frame_path = 'data/clip' + str(vid_num) + '/images/'
+    new_audio_path = 'data/clip' + str(vid_num) + '/audio/'
+
+    # Creates data folder and other subdirectories, throws error if it already exists
+    os.makedirs(new_frame_path, exist_ok=False)
+    os.makedirs(new_audio_path, exist_ok=True)
+
+    # Read until video is completed
+    i = 0
+    while (cap.isOpened()):
+        # Capture frame-by-frame
+        ret, frame = cap.read()
+        if ret == True:
+            print("vid", vid_num, "frame", i)
+            processed_frame = frame_processing(
+                frame, face_cascade, eye_cascade)
+            cv2.imwrite(
+                (new_frame_path + 'image' + str(i).zfill(4) + '.jpg'), processed_frame)
+            i += 1
+        # Break the loop
+        else:
+            break
+
+    # When everything done, release the video capture object
+    cap.release()
+
+    # Calculate MFCC of the audio recording
+    mfcc_array = audio_processing(vid_num, vid_fps, vid_total_frames)
+    
+    # Separate 2D MFCC array into frame-specific values (each line or row of the 2D MFCC array) and store as individual files
+    row_num = 0
+    for row in mfcc_array:
+        #print(row)
+        #np.savetxt("data/clip" + str(vid_num) + "/audio/" + str(row_num).zfill(6) + ".txt", mfcc_feat[row_num, :])
+        np.save("data/clip" + str(vid_num) + "/audio/" +
+                str(row_num).zfill(6), mfcc_array[row_num, :])
+        row_num += 1
+    print("all mfcc rows saved!")
+
+    return
+
+
+# Create face_cascade and eye_cascade objects
+print("about to create cascades")
+face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")
+eye_cascade = cv2.CascadeClassifier("haarcascade_eye.xml")
+print("created cascades")
+
+# Deletes any previous data folder and old frames
+print("about to delete data folder")
+try:
+    shutil.rmtree('data/')
+    print("data folder deleted")
+except FileNotFoundError:
+    print("data folder did not exist")
+except PermissionError:
+    print("Error! Cannot delete data folder, an internal file is currently open... Close all open file(s) and try again!")
+    exit(0)
+
+# Executes data processing code from functions above
+for i in range(DATASET_SIZE):
+    data_processing(i+1, face_cascade, eye_cascade)
+print("complete!")
--- a/step2_and_step3.py
+++ b/step2_and_step3.py