diff --git a/condor/eval.sbatch b/condor/eval.sbatch
index 5e2882807a0a42ede6f765200926135a5c092a0b..1b83d453bd9f6bf66d88040eaa5511d4dd49903c 100644
--- a/condor/eval.sbatch
+++ b/condor/eval.sbatch
@@ -1,4 +1,17 @@
 #!/bin/bash
+prog=$1
+weight_path=$2
+output_dir=$3
+
+if [ -z "$prog" ] || [ -z "$weight_path" ] || [ -z "$output_dir" ]
+then
+    echo "prog, weight_path or output_dir is empty"
+    exit 1
+fi
+echo running $prog with $weight_path to $output_dir
+
+sbatch <<EOT
+#!/bin/bash
 #SBATCH -p big
 #SBATCH --time=24:00:00
 #SBATCH --nodes=1
@@ -7,8 +20,16 @@
 #SBATCH --mem=400G
 #SBATCH --job-name=eval
 #SBATCH --output=slurm.out
+#SBATCH --error=slurm.err
+
+prog=$1
+weight_path=$2
+output_dir=$3
 
-eval "$(conda shell.bash hook)"
-conda activate pytorch
+# eval "$(conda shell.bash hook)"
+# conda activate pytorch
+rsync data/IN1K_train_1000.ffcv $train_path
+rsync data/IN1K_val_1000.ffcv $val_path
 
-vitrun --nproc_per_node=8 ${@}
\ No newline at end of file
+vitrun --nproc_per_node=8 $prog --gin VisionTransformer.global_pool='\"avg\"' --train_path $train_path --val_path $val_path  -w $weight_path --output_dir=$output_dir
+EOT
\ No newline at end of file
diff --git a/vitookit/evaluation/eval_cls.py b/vitookit/evaluation/eval_cls.py
index de4568bc1992cbdfb423061aad6dcb01845ed27c..3c2863a3b2d98d840db75a3328329db57d5154ae 100644
--- a/vitookit/evaluation/eval_cls.py
+++ b/vitookit/evaluation/eval_cls.py
@@ -29,7 +29,7 @@ from vitookit.datasets.transform import three_augmentation
 from vitookit.utils.helper import *
 from vitookit.utils import misc
 from vitookit.models.build_model import build_model
-from vitookit.datasets import build_dataset
+from vitookit.datasets.build_dataset import build_dataset
 import wandb
 
 
diff --git a/vitookit/evaluation/eval_cls1_ffcv.py b/vitookit/evaluation/eval_cls1_ffcv.py
index 310a3c73f7aa870207de813854e49b324b072381..ddb2268a2d04701234e072875efd7b061a28f7d3 100644
--- a/vitookit/evaluation/eval_cls1_ffcv.py
+++ b/vitookit/evaluation/eval_cls1_ffcv.py
@@ -101,24 +101,8 @@ def get_args_parser():
 
     # Augmentation parameters
     parser.add_argument('--ThreeAugment', action='store_true', default=True) #3augment
-    parser.add_argument('--src',action='store_true', default=False, 
-                        help="Use Simple Random Crop (SRC) or Random Resized Crop (RRC). Use SRC when there is less risk of overfitting, such as on ImageNet-21k.")
-    parser.add_argument('--color_jitter', type=float, default=None, metavar='PCT',
-                        help='Color jitter factor (enabled only when not using Auto/RandAug)')
-    parser.add_argument('--aa', type=str, default='rand-m9-mstd0.5-inc1', metavar='NAME',
-                        help='Use AutoAugment policy. "v0" or "original". " + "(default: rand-m9-mstd0.5-inc1)'),
     parser.add_argument('--smoothing', type=float, default=0.1, help='Label smoothing (default: 0.1)')
 
-    # * Random Erase params
-    parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT',
-                        help='Random erase prob (default: 0.25)')
-    parser.add_argument('--remode', type=str, default='pixel',
-                        help='Random erase mode (default: "pixel")')
-    parser.add_argument('--recount', type=int, default=1,
-                        help='Random erase count (default: 1)')
-    parser.add_argument('--resplit', action='store_true', default=False,
-                        help='Do not random erase first (clean) augmentation split')
-
     # * Mixup params
     parser.add_argument('--mixup', type=float, default=0.8,
                         help='mixup alpha, mixup enabled if > 0. (default: 0.8)')
@@ -297,7 +281,13 @@ def main(args):
         import torch._dynamo
         torch._dynamo.config.suppress_errors = True 
     trunc_normal_(model.head.weight, std=2e-5)
-    
+    ## attn only [3 Things]
+    for name, param in model.named_parameters():
+        if 'head' in name or '.attn.' in name or 'pos_embed' in name:
+            param.requires_grad = True
+        else:
+            param.requires_grad = False
+    ## attn only [3 Things]    
     model.to(device)
 
     model_without_ddp = model
@@ -320,8 +310,7 @@ def main(args):
         model_without_ddp = model.module
     optimizer = create_optimizer(args, model_without_ddp)
     # hack to optimize patch embedding
-    print([ i.shape for i in optimizer.param_groups[1]['params']])
-    optimizer.param_groups[1]['lr_scale'] = 1.0
+    
     loss_scaler = NativeScaler()
 
     lr_scheduler, _ = create_scheduler(args, optimizer)
diff --git a/vitookit/evaluation/eval_cls_ffcv.py b/vitookit/evaluation/eval_cls_ffcv.py
index 0ee6485c0ae9930c41c3966ea5f69b42ea4cff4c..c63853e54917b5d5bb371a38eaa92d78338f6764 100644
--- a/vitookit/evaluation/eval_cls_ffcv.py
+++ b/vitookit/evaluation/eval_cls_ffcv.py
@@ -110,7 +110,7 @@ def get_args_parser():
     parser.add_argument('--smoothing', type=float, default=0.1, help='Label smoothing (default: 0.1)')
 
     # * Random Erase params
-    parser.add_argument('--reprob', type=float, default=0.25, metavar='PCT',
+    parser.add_argument('--reprob', type=float, default=None, metavar='PCT',
                         help='Random erase prob (default: 0.25)')
     parser.add_argument('--remode', type=str, default='pixel',
                         help='Random erase mode (default: "pixel")')