diff --git a/vitookit/datasets/ffcv_transform.py b/vitookit/datasets/ffcv_transform.py index 89d749e18474bf6bb8deeb490582271d6d415bc9..a8ea5890d7a92f5d25b1efe78e13a481d80c98c1 100644 --- a/vitookit/datasets/ffcv_transform.py +++ b/vitookit/datasets/ffcv_transform.py @@ -29,6 +29,7 @@ from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder, S import torch import torchvision.transforms as tfms +from torchvision.transforms import functional as F from torch import nn IMAGENET_MEAN = np.array([0.485, 0.456, 0.406]) * 255 @@ -315,6 +316,69 @@ class Solarization(Operation): return previous_state, None +@njit +def generate_gaussian_filter(sigma: int | float,filter_shape: list | tuple = (3, 3)): + # 'sigma' is the standard deviation of the gaussian distribution + + m, n = filter_shape + m_half = m // 2 + n_half = n // 2 + + # initializing the filter + gaussian_filter = np.zeros((m, n), np.float32) + k = 1 / (2.0 * sigma**2.0) + # generating the filter + for y in range(-m_half, m_half+1): + for x in range(-n_half, n_half+1): + exp_term = np.exp(-(x**2 + y**2) *k) + gaussian_filter[y+m_half, x+n_half] = exp_term + kernel = gaussian_filter/gaussian_filter.sum() + return kernel + +@njit +def convolution(image: np.ndarray, kernel: list | tuple, output: np.ndarray) -> np.ndarray: + ''' + It is a "valid" Convolution algorithm implementaion. + ### Example + >>> import numpy as np + >>> from PIL import Image + >>> + >>> kernel = np.array( + >>> [[-1, 0, 1], + >>> [-2, 0, 2], + >>> [-1, 0, 1]], np.float32 + >>> ) + >>> img = np.array(Image.open('./lenna.png')) + >>> res = convolution(img, Kx) + ''' + if len(image.shape) == 3: + m_i, n_i, c_i = image.shape + else: + raise Exception('Shape of image not supported') + + m_k, n_k, _ = kernel.shape + + y_strides = m_i - m_k + 1 # possible number of strides in y direction + x_strides = n_i - n_k + 1 # possible number of strides in x direction + + pad_y = (m_k - 1) // 2 + pad_x = (n_k - 1) // 2 + sub_matrix = image[:m_k, :n_k] + # center region + for i in range(y_strides): + for j in range(x_strides): + for c in range(c_i): # looping over the all channels + sub_matrix = image[i:i+m_k, j:j+n_k,c] + output[i+pad_y,j+pad_x, c] = np.sum(sub_matrix * kernel) + # no filter for the padding region + output[:pad_y] = image[:pad_y] + output[-pad_y:] = image[-pad_y:] + output[:, :pad_x] = image[:, :pad_x] + output[:, -pad_x:] = image[:, -pad_x:] + + return output + + class ThreeAugmentation(Operation): def __init__( self, threshold=128, radius_min=0.1, radius_max=2. @@ -330,32 +394,35 @@ class ThreeAugmentation(Operation): radius_min = self.radius_min radius_max = self.radius_max - def randchoice(images, _): + def randchoice(images, dst): for i in my_range(images.shape[0]): idx = random.randint(0, 2) if idx == 0: # solarize mask = images[i] >= threshold - images[i] = np.where(mask, 255 - images[i], images[i]) + dst[i] = np.where(mask, 255 - images[i], images[i]) elif idx == 1: # grayscale - images[i] = ( + dst[i] = ( 0.2989 * images[i, ..., 0:1] + 0.5870 * images[i, ..., 1:2] + 0.1140 * images[i, ..., 2:3] ) else: - # TODO: GaussianBlur - radius = np.random.uniform(radius_min, radius_max) - # images[i] = gaussian_filter(images[i], radius) - return images - # randchoice.is_parallel = True - + sigma = np.random.uniform(radius_min, radius_max) + kernel = generate_gaussian_filter(sigma,filter_shape=(5, 5)) + convolution(images[i], kernel, dst[i]) + + return dst + randchoice.is_parallel = True return randchoice def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: # No updates to state or extra memory necessary! - return previous_state, None + mem_alloc = AllocationQuery(previous_state.shape,dtype=previous_state.dtype) + return previous_state, mem_alloc + + @gin.configurable def ThreeAugmentPipeline(img_size=224,scale=(0.08,1), color_jitter=None): @@ -376,7 +443,7 @@ def ThreeAugmentPipeline(img_size=224,scale=(0.08,1), color_jitter=None): # ToDevice(torch.device('cuda')), ToTorchImage(), ]) - label_pipeline = [IntDecoder(), ToTensor(),ToDevice(torch.device('cuda')),View(-1)] + label_pipeline = [IntDecoder(), ToTensor(),View(-1)] # Pipeline for each data field pipelines = { 'image': image_pipeline, diff --git a/vitookit/datasets/transform.py b/vitookit/datasets/transform.py index 4fdfb0f384d9e218e1fd047bc1a7da72f2697084..24f877c932987a09d0cdb08036bc7f785d16e4ad 100644 --- a/vitookit/datasets/transform.py +++ b/vitookit/datasets/transform.py @@ -13,7 +13,7 @@ IMAGENET_MEAN = np.array([0.485, 0.456, 0.406]) * 255 IMAGENET_STD = np.array([0.229, 0.224, 0.225]) * 255 @gin.configurable -def SimplePipeline(img_size=224,scale=(0.2,1), ratio=(3.0/4.0, 4.0/3.0)): +def SimplePipeline(img_size=224,scale=(0.2,1), ratio=(3.0/4.0, 4.0/3.0),blur=False): image_pipeline = [ RandomResizedCropRGBImageDecoder((img_size, img_size), scale=scale,ratio=ratio), RandomHorizontalFlip(), @@ -22,6 +22,8 @@ def SimplePipeline(img_size=224,scale=(0.2,1), ratio=(3.0/4.0, 4.0/3.0)): ToDevice(torch.device('cuda')), ToTorchImage(), ] + if blur: + image_pipeline.append(transforms.GaussianBlur(3)) label_pipeline = [IntDecoder(), ToTensor(),ToDevice(torch.device('cuda'))] # Pipeline for each data field pipelines = { diff --git a/vitookit/evaluation/eval_cls.py b/vitookit/evaluation/eval_cls.py index d06c688632b723d5ea4deadf2613218a5006904d..e0896cff0c0291a6bb21aff0117389deb8c1a646 100644 --- a/vitookit/evaluation/eval_cls.py +++ b/vitookit/evaluation/eval_cls.py @@ -335,7 +335,10 @@ def main(args): if args.pretrained_weights: load_pretrained_weights(model, args.pretrained_weights, checkpoint_key=args.checkpoint_key, prefix=args.prefix) if args.compile: - model = torch.compile(model) + model = torch.compile(model) + import torch._dynamo + torch._dynamo.config.suppress_errors = True + trunc_normal_(model.head.weight, std=2e-5) model.to(device) diff --git a/vitookit/evaluation/eval_cls_ffcv.py b/vitookit/evaluation/eval_cls_ffcv.py index 9a477d16fb5ec0ac310b2b0396d0e87f8ac607f5..b71e3aa99207fba4aa80c0868d3e58c27ae10bd5 100644 --- a/vitookit/evaluation/eval_cls_ffcv.py +++ b/vitookit/evaluation/eval_cls_ffcv.py @@ -106,7 +106,7 @@ def get_args_parser(): help='LR decay rate (default: 0.1)') # Augmentation parameters - parser.add_argument('--ThreeAugment', action='store_true', default=False) #3augment + parser.add_argument('--ThreeAugment', action='store_true', default=True) #3augment parser.add_argument('--src',action='store_true', default=False, help="Use Simple Random Crop (SRC) or Random Resized Crop (RRC). Use SRC when there is less risk of overfitting, such as on ImageNet-21k.") parser.add_argument('--color_jitter', type=float, default=None, metavar='PCT', @@ -300,6 +300,8 @@ def main(args): load_pretrained_weights(model, args.pretrained_weights, checkpoint_key=args.checkpoint_key, prefix=args.prefix) if args.compile: model = torch.compile(model) + import torch._dynamo + torch._dynamo.config.suppress_errors = True trunc_normal_(model.head.weight, std=2e-5) model.to(device) diff --git a/vitookit/evaluation/eval_linear.py b/vitookit/evaluation/eval_linear.py index 8a2ec0eea121f5ce162e83c417b0aa80b563d6e1..aeac274b79e0354d5abe18357364804a6db6f861 100644 --- a/vitookit/evaluation/eval_linear.py +++ b/vitookit/evaluation/eval_linear.py @@ -213,6 +213,8 @@ def main(args): if args.compile: model = torch.compile(model) + import torch._dynamo + torch._dynamo.config.suppress_errors = True model.to(device) model_without_ddp = model diff --git a/vitookit/evaluation/eval_linear_ffcv.py b/vitookit/evaluation/eval_linear_ffcv.py index 5368cc36fa1a4b30cbdeb968844c936fbe2c036b..b37c0792811624006d8bd680178117c8213feff7 100644 --- a/vitookit/evaluation/eval_linear_ffcv.py +++ b/vitookit/evaluation/eval_linear_ffcv.py @@ -167,7 +167,9 @@ def main(args): if args.compile: model = torch.compile(model) - + import torch._dynamo + torch._dynamo.config.suppress_errors = True + model.to(device) model_without_ddp = model