diff --git a/.gitignore b/.gitignore index 5fdd7ec4cd446bd96cf06f220df574fa640e8188..f99d6555e83aa8b29f402fa27b461026b897cd2a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ outputs wandb results - +*.pth # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/EVALUATION.md b/EVALUATION.md index 9ed4f0fe63b2a1b0869704b2541cf25643956a16..d3ba7c60c29b0c4a8a524c7a7f18e4a536d79254 100644 --- a/EVALUATION.md +++ b/EVALUATION.md @@ -48,14 +48,14 @@ Reference results for [MAE in linear probing](https://github.com/facebookresearc |:------------------:|:--------:|:---------:|:--------:| | paper (TF/TPU) | 68.0 | 75.8 | 76.6 | | MAE repo (PT/GPU) | 67.8 | 76.0 | 77.2 | -| Our repo (PT/GPU) | 67.8 | 76.0 | 77.2 | +| Our repo (PT/GPU) | [67.67](https://wandb.ai/dlib/EfficientSSL/runs/6r00w5jk) | - | - | To train a single classifier on frozen weights, run: ``` -submitit --module vitookit.evaluation.eval_linear_ffcv --train_path ~/data/ffcv/IN1K_train_500_95.ffcv --val_path ~/data/ffcv/IN1K_val_500_95.ffcv -w ~/models/mae_pretrain_vit_base.pth --checkpoint_key=model --gin VisionTransformer.global_pool='"avg"' --fast_dir /raid/local_scratch/jxw30-hxc19/ --batch_size=128 --accum_iter=16 --blr=0.05 +submitit --module vitookit.evaluation.eval_linear_ffcv --train_path ~/data/ffcv/IN1K_train_500_95.ffcv --val_path ~/data/ffcv/IN1K_val_500_95.ffcv -w ~/models/mae_pretrain_vit_base.pth --checkpoint_key=model --gin VisionTransformer.global_pool='"avg"' --fast_dir /raid/local_scratch/jxw30-hxc19/ --batch_size=128 --accum_iter=16 --blr=0.1 ``` -Effective batch size is 16384 = 128 (batch_size per gpu) * 16 (accum_iter) * 8. Learning rate is 3.2 = 0.05 * 16384 / 256. +Effective batch size is 16384 = 128 (batch_size per gpu) * 16 (accum_iter) * 8. Learning rate is 6.4 = 0.05 * 16384 / 256. diff --git a/vitookit/utils/helper.py b/vitookit/utils/helper.py index c5f664b8313c0845591f740548c6b0a218f18046..50749bbfdfc1c685d9dd5aefcae6f434ed53ac09 100644 --- a/vitookit/utils/helper.py +++ b/vitookit/utils/helper.py @@ -29,7 +29,8 @@ def aug_parse(parser: argparse.ArgumentParser): parser.add_argument('--gin', nargs='+', help='Overrides config values. e.g. --gin "section.option=value"') - args, _ = parser.parse_known_args() + args, unkowns = parser.parse_known_args() + print("warn! unknown args: ", unkowns) if args.output_dir: output_dir=Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -165,7 +166,7 @@ def load_pretrained_weights(model, pretrained_weights, path = pretrained_weights.replace("artifact:","") import wandb api = wandb.Api() - artifact = api.artifact(path+":v0", type='model') + artifact = api.artifact(path, type='model') artifact_dir = artifact.download(os.getenv("output_dir","/tmp/models")) print("Load pre-trained checkpoint from: %s" % (artifact_dir)) pretrained_weights = os.path.join(artifact_dir, "weights.pth") @@ -181,7 +182,7 @@ def load_pretrained_weights(model, pretrained_weights, elif os.path.isfile(pretrained_weights): state_dict = torch.load(pretrained_weights, map_location='cpu') else: - raise ValueError(f'load pretrained weights from {pretrained_weights} failed!') + raise ValueError(f'load pretrained weights from {pretrained_weights} failed!') epoch = state_dict['epoch'] if 'epoch' in state_dict else -1 print("Load pre-trained checkpoint from: %s[%s] at %d epoch" % (pretrained_weights, checkpoint_key, epoch)) diff --git a/vitookit/utils/submitit.py b/vitookit/utils/submitit.py index b275a94343f37d363d34fbcd7c3fc69c7c5995e0..3dc5233bfd32814c52ebd9efe7751d378f99e556 100644 --- a/vitookit/utils/submitit.py +++ b/vitookit/utils/submitit.py @@ -90,9 +90,9 @@ class Trainer(object): import os import submitit job_env = submitit.JobEnvironment() - print("Requeuing ", self.args) + print("Requeuing ", self.args, self.module_args) - output_dir = Path(str(self.args.job_dir)) + output_dir = self.module_args.output_dir checkpoint_file = os.path.join(output_dir, "checkpoint.pth") self.args.dist_url = get_init_file(output_dir).as_uri() @@ -107,7 +107,7 @@ class Trainer(object): import submitit module_args = self.module_args job_env = submitit.JobEnvironment() - output_dir = Path(str(self.args.job_dir).replace("%j", str(job_env.job_id))) + output_dir = str(self.args.job_dir).replace("%j", str(job_env.job_id)) module_args.output_dir = output_dir module_args.gpu = job_env.local_rank diff --git a/weights.pth b/weights.pth deleted file mode 100644 index cb59d5a289ac86d135da3cd6e204e5869ed415a7..0000000000000000000000000000000000000000 Binary files a/weights.pth and /dev/null differ