| { | |
| "query_token_id": "[unused0]", | |
| "doc_token_id": "[unused1]", | |
| "query_token": "[Q]", | |
| "doc_token": "[D]", | |
| "ncells": null, | |
| "centroid_score_threshold": null, | |
| "ndocs": null, | |
| "load_index_with_mmap": false, | |
| "index_path": null, | |
| "nbits": 1, | |
| "kmeans_niters": 20, | |
| "resume": false, | |
| "similarity": "cosine", | |
| "bsize": 4, | |
| "accumsteps": 1, | |
| "lr": 3e-6, | |
| "maxsteps": 400000, | |
| "save_every": null, | |
| "warmup": 20000, | |
| "warmup_bert": null, | |
| "relu": false, | |
| "nway": 64, | |
| "use_ib_negatives": false, | |
| "reranker": false, | |
| "distillation_alpha": 1.0, | |
| "ignore_scores": false, | |
| "model_name": null, | |
| "query_maxlen": 512, | |
| "attend_to_mask_tokens": false, | |
| "interaction": "colbert", | |
| "dim": 128, | |
| "doc_maxlen": 128, | |
| "mask_punctuation": true, | |
| "checkpoint": "\/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir", | |
| "triples": "\/future\/u\/okhattab\/root\/unit\/experiments\/2021.10\/downstream.distillation.round2.2_score\/round2.nway6.cosine.ib\/examples.64.json", | |
| "collection": "\/future\/u\/okhattab\/data\/MSMARCO\/collection.tsv", | |
| "queries": "\/future\/u\/okhattab\/data\/MSMARCO\/queries.train.tsv", | |
| "index_name": null, | |
| "overwrite": false, | |
| "root": "\/future\/u\/okhattab\/root\/unit\/experiments", | |
| "experiment": "2021.10", | |
| "index_root": null, | |
| "name": "kldR2.nway64.ib", | |
| "rank": 0, | |
| "nranks": 4, | |
| "amp": true, | |
| "gpus": 8, | |
| "meta": { | |
| "hostname": "gamma", | |
| "git_branch": "main", | |
| "git_hash": "58087227c8ead73499b761c7f7a569844d01d248", | |
| "git_commit_datetime": "2023-10-16 12:32:12+08:00", | |
| "current_datetime": "Apr 21, 2024 ; 12:29AM UTC (+0000)", | |
| "cmd": "supervised-fine-tune2.py --model_name_or_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-reason-hf --colbert_path \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/checkpoint-3500\/ir --bf16 True --data_path error_20.json --output_dir \/data\/experiment_data\/junda\/chatdoctor\/llama-13b-32k-medqa-open-ir\/ --cache_dir \/home\/jwang\/.cache --model_max_length 32768 --use_flash_attn True --low_rank_training True --num_train_epochs 1 --per_device_train_batch_size 1 --per_device_eval_batch_size 2 --gradient_accumulation_steps 8 --save_strategy epoch --save_total_limit 5 --learning_rate 1e-5 --weight_decay 0.0 --warmup_steps 20 --lr_scheduler_type constant_with_warmup --logging_steps 1 --deepspeed ds_configs\/stage2.json --tf32 True", | |
| "version": "colbert-v0.4" | |
| } | |
| } | |