H-EmbodVis
/

PUMA

dynamic-manipulation

vision-language-action

Model card Files Files and versions

PUMA / config.full.yaml

HENGFANG's picture

Add files using upload-large-folder tool

45bc8f7 verified about 2 months ago

history blame contribute delete

3.24 kB

	run_id: puma-domino-dynamic-35task
	run_root_dir: ./result/output/Dynamic_VLA
	seed: 42
	trackers:
	- jsonl
	- wandb
	wandb_entity: heng_
	wandb_project: Dynamic_VLA
	is_debug: false
	framework:
	name: PUMA
	history_flow_stage: stage2
	qwenvl:
	base_vlm: ./playground/Pretrained_models/Qwen3-VL-4B-Instruct-Action
	attn_implementation: sdpa
	vl_hidden_dim: 2560
	action_model:
	action_model_type: MLP
	action_hidden_dim: 2560
	action_dim: 14
	state_dim: 14
	future_action_window_size: 15
	past_action_window_size: 0
	world_model:
	enabled: true
	world_query_num: 4
	loss_weight: 0.05
	supervision: per_frame
	feature_loss: cosine
	grounding_mode: image
	future_view_index: 0
	dino_backbone: dinov2_vitb14
	world_token: <\|world\|>
	grounding:
	sam2_model_config: configs/sam2.1/sam2.1_hiera_l.yaml
	sam2_checkpoint: ./playground/Pretrained_models/grounded_sam2/sam2.1_hiera_large.pt
	grounding_dino_config: ./playground/Pretrained_models/grounded_sam2/GroundingDINO_SwinT_OGC.py
	grounding_dino_checkpoint: ./playground/Pretrained_models/grounded_sam2/groundingdino_swint_ogc.pth
	box_threshold: 0.35
	text_threshold: 0.25
	multimask_output: false
	max_boxes: 1
	video_prompt: mask
	cache:
	enabled: true
	read: true
	write: true
	dirname: grounding_cache
	version: v1
	debug:
	enabled: false
	output_dir: ./grounding_output
	include_box: true
	include_mask: true
	datasets:
	vla_data:
	dataset_py: lerobot_datasets
	num_workers: 8
	data_root_dir: ./data/robotwin/dynamic-35tasks-clean-level1
	data_mix: robotwin_dynamic_task
	action_type: abs_qpos
	default_image_resolution:
	- 3
	- 224
	- 224
	per_device_batch_size: 8
	load_all_data_for_training: true
	obs:
	- image_0
	image_size:
	- 224
	- 224
	video_backend: torchvision_av
	include_state: false
	future_k: 4
	future_stride: 4
	history_k: 4
	history_stride: 4
	history_mode: flow
	history_image_size:
	- 64
	- 64
	history_flow:
	compute_size:
	- 64
	- 64
	cpu_worker_num: 12
	cache:
	enabled: true
	read: true
	write: true
	dirname: history_flow_cache
	version: v1
	trainer:
	epochs: 100
	max_train_steps: 100000
	num_warmup_steps: 5000
	save_interval: 10000
	eval_interval: 1000
	learning_rate:
	base: 1.0e-05
	qwen_vl_interface: 1.0e-05
	action_model: 0.0001
	lr_scheduler_type: cosine_with_min_lr
	scheduler_specific_kwargs:
	min_lr: 5.0e-07
	freeze_modules: null
	loss_scale:
	vla: 1.0
	vlm: 0.0
	repeated_diffusion_steps: 4
	max_grad_norm: 1.0
	warmup_ratio: 0.1
	weight_decay: 0.0
	logging_frequency: 100
	gradient_clipping: 1.0
	gradient_accumulation_steps: 1
	optimizer:
	name: AdamW
	betas:
	- 0.9
	- 0.95
	eps: 1.0e-08
	weight_decay: 1.0e-08
	is_resume: false
	resume_epoch: null
	resume_step: null
	enable_gradient_checkpointing: true
	enable_mixed_precision_training: true
	output_dir: ./result/output/Dynamic_VLA/20260301-qwenoft-robotwin_dynamic_task-qwenaction-world-query-flow-stage2-h4s4f4s4-h64w64-dynamic-35task