Skip to content
Snippets Groups Projects
Commit 6d15e6c4 authored by Chrol-Cannon, Joseph Dr (Computer Science)'s avatar Chrol-Cannon, Joseph Dr (Computer Science)
Browse files

finalize i3D processing in feature regen

parent 3ece3c41
No related branches found
No related tags found
No related merge requests found
......@@ -27,7 +27,7 @@ class FrameFV:
def __init__(self,path,args):
self.anno = annot_np(path)
self.net = VideoModelGlobalCoordLatent(args)
self.pre_resize_shape = (224, 224)
self.pre_resize_shape = (256, 340)
self.random_crop = gtransforms.GroupMultiScaleCrop(output_size=224,
scales=[1],
max_distort=0,
......@@ -39,23 +39,27 @@ class FrameFV:
# load video to ndarray list
img_array = load_av(finput)
print(img_array[0].shape)
#for i in range(len(img_array)):
# img_array[i] = cv2.resize(img_array[i],self.pre_resize_shape)
img_array = [cv2.resize(img, (self.pre_resize_shape[1], self.pre_resize_shape[0])) for img in img_array]
# convert BGR to RGB
frames = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in img_array]
# convert ndarray to array of PIL Images for resize and cropping
frames = [Image.fromarray(img.astype('uint8'), 'RGB') for img in frames]
# resize
frames = [img.resize((self.pre_resize_shape[1], self.pre_resize_shape[0]), Image.BILINEAR) for img in frames]
# crop
frames, (offset_h, offset_w, crop_h, crop_w) = self.random_crop(frames)
# convert back from PIL to ndarray for cv2 channel separation
frames = [np.array(img) for img in frames]
# separate channels into R,G,B frame sequences
rs = []
gs = []
bs = []
for i in range(len(img_array)//3):
B, R, G = cv2.split(img_array[i])
for i in range(len(frames)):
R, G, B = cv2.split(frames[i])
rs.append(R)
gs.append(G)
bs.append(B)
frames = [rs, gs, bs]
#frames = [Image.fromarray(img.astype('uint8'), 'RGB') for img in img_array]
#frames, (offset_h, offset_w, crop_h, crop_w) = self.random_crop(frames)
# read frame annotations into Sequence
seq = SmthSequence()
......@@ -72,12 +76,8 @@ class FrameFV:
relations.append(fv)
relations = np.asarray(relations)
# TODO bb category embedding per frame
# i3D features per frame
#clip = torch.from_numpy(np.asarray([[img_array[0],img_array[1],img_array[2]]]))
clip = torch.from_numpy(np.asarray([frames]))
#clip = img_array
print(clip.shape)
clip = clip.float()
glo, vid = self.net.i3D(clip)
......@@ -111,7 +111,7 @@ if __name__ == '__main__':
help='primary image input size')
parser.add_argument('--batch_size', '-b', default=72, type=int,
metavar='N', help='mini-batch size (default: 72)')
parser.add_argument('--num_classes', default=50, type=int,
parser.add_argument('--num_classes', default=174, type=int,
help='num of class in the model')
parser.add_argument('--num_boxes', default=4, type=int,
help='num of boxes for each image')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment