Added debug prints for video and MFCC files

Added debug print() statements for fps, framerate and length Temporarily changed MFCC output to .txt file for debugging purposes

Added debug prints for video and MFCC files
Added debug print() statements for fps, framerate and length Temporarily changed MFCC output to .txt file for debugging purposes
c4b39d5d · Patel, Shrey G (UG - Comp Sci & Elec Eng) · 5f12a95d · c4b39d5d · c4b39d5d
Commit c4b39d5d authored 2 years ago by Patel, Shrey G (UG - Comp Sci & Elec Eng)
--- a/step2_and_step3.py
+++ b/step2_and_step3.py
@@ -142,11 +142,17 @@ def frame_processing(img, face_cascade, eye_cascade):
    return resized


-def audio_processing(clip_num):
+def audio_processing(clip_num, frame_rate, frame_count):
+
+    print("frame_rate =", frame_rate)
+    print("frame_count =", frame_count)
+    print("estimated video length = " + str(frame_count/frame_rate) + "s")

    (rate, sig) = wav.read("vids/dataset/audio" + str(clip_num) + ".wav")

-    frame_time = 1 / 30
+    #frame_rate = 30
+    #frame_rate = 29.95904248021286
+    frame_time = 1 / frame_rate
    print("frame_time = " + str(frame_time) + "s")

    # Returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
@@ -169,17 +175,25 @@ def data_processing(vid_num, face_cascade, eye_cascade):
    # If the input is the camera, pass 0 instead of the video file name
    #cap = cv2.VideoCapture('D:vids/dataset/video1.mp4')
    # print(vid_num)
-    print('vids/dataset/video' + str(vid_num) + '.mp4')
+    print('\nvids/dataset/video' + str(vid_num) + '.mp4')
    cap = cv2.VideoCapture('vids/dataset/video' + str(vid_num) + '.mp4')

+    #cap.set(cv2.CAP_PROP_FRAME_HEIGHT)
+    vid_fps = cap.get(cv2.CAP_PROP_FPS)
+    vid_total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
+    print("Result of cap.get(FPS) is: ", vid_fps)
+    print("Result of cap.get(FRAME_COUNT) is: ", vid_total_frames)
+
    # Check if camera opened successfully
    if (cap.isOpened() == False):
        raise Exception("Error opening video stream or file")

-    new_path = 'data/clip' + str(vid_num) + '/images/'
+    new_frame_path = 'data/clip' + str(vid_num) + '/images/'
+    new_audio_path = 'data/clip' + str(vid_num) + '/audio/'

    # Creates data folder and other subdirectories, throws error if it already exists
-    os.makedirs(new_path, exist_ok=False)
+    os.makedirs(new_frame_path, exist_ok=False)
+    os.makedirs(new_audio_path, exist_ok=True)

    # Read until video is completed
    i = 0
@@ -191,7 +205,7 @@ def data_processing(vid_num, face_cascade, eye_cascade):
            processed_frame = frame_processing(
                frame, face_cascade, eye_cascade)
            cv2.imwrite(
-                (new_path + 'image' + str(i).zfill(4) + '.jpg'), processed_frame)
+                (new_frame_path + 'image' + str(i).zfill(4) + '.jpg'), processed_frame)
            i += 1
        # Break the loop
        else:
@@ -200,8 +214,10 @@ def data_processing(vid_num, face_cascade, eye_cascade):
    # When everything done, release the video capture object
    cap.release()

-    mfcc_array = audio_processing(vid_num)
-    np.save("data/clip" + str(vid_num) + "/mfcc", mfcc_array)
+    mfcc_array = audio_processing(vid_num, vid_fps, vid_total_frames)
+    #np.save("data/clip" + str(vid_num) + "/mfcc", mfcc_array)
+    mfcc_array_row = 0
+    np.savetxt("data/clip" + str(vid_num) + "/audio/" + str(mfcc_array_row).zfill(6) + ".txt", mfcc_array)
    print("mfcc for audio" + str(vid_num) + " saved!")

    return

--- a/step3_test_script_multi_mfcc.py
+++ b/step3_test_script_multi_mfcc.py
 from python_speech_features import mfcc
-from python_speech_features import logfbank
+#from python_speech_features import logfbank
 import scipy.io.wavfile as wav
 import numpy as np

-frame_rate = 30
-clip_num = 1
+#frame_rate = 24.9743
+frame_rate = 29.95904248021286
+clip_num = 2

 mfcc_array_row = 0

@@ -17,6 +18,11 @@ print("frame_time = " + str(frame_time) + "s")
 #Parameters: numcep – the number of cepstrum to return, default 13
 #For a very basic understanding, cepstrum is the information of rate of change in spectral bands

+# NEED 146 MFCC rows!!!
+# 30 fps gives us 177 rows
+# 25 fps gives us 148 rows for video1, that's 146 rows if we discard the top and bottom row?
+
+
 #mfcc(signal, samplerate=16000, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512, lowfreq=0, highfreq=None, preemph=0.97, ceplifter=22, appendEnergy=True, winfunc=<function <lambda>>)
 mfcc_feat = mfcc(sig, rate, 0.025, frame_time, 13, 26, 1200)

@@ -28,7 +34,8 @@ print(fbank_feat[1:3, :])"""
 #Save an array to a binary file in NumPy .npy format.
 #Notes: Any data saved to the file is appended to the end of the file.

-np.save("data/clip" + str(clip_num) + "/audio/" + str(mfcc_array_row).zfill(6), mfcc_feat)
+#np.save("data/clip" + str(clip_num) + "/audio/" + str(mfcc_array_row).zfill(6), mfcc_feat)
+np.savetxt("data/clip" + str(clip_num) + "/audio/" + str(mfcc_array_row).zfill(6) + ".txt", mfcc_feat)
 print("MFCC saved!")

 #maybe test using np.savetxt() to see what the mfcc array output actually is first?