""" A convenience script to playback random demonstrations using the decoupled_wbc controller from a set of demonstrations stored in a hdf5 file. Arguments: --dataset (str): Path to demonstrations --use-actions (optional): If this flag is provided, the actions are played back through the MuJoCo simulator, instead of loading the simulator states one by one. --use-wbc-goals (optional): If set, will use the stored WBC goals to control the robot, otherwise will use the actions directly. Only relevant if --use-actions is set. --use-teleop-cmd (optional): If set, will use teleop IK directly with WBC timing for action generation. Only relevant if --use-actions is set. --visualize-gripper (optional): If set, will visualize the gripper site --save-video (optional): If set, will save video of the playback using offscreen rendering --video-path (optional): Path to save the output video. If not specified, will use the nearest folder to dataset and save as playback_video.mp4 --num-episodes (optional): Number of episodes to playback/record (if None, plays random episodes) Example: $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/ --use-actions --use-wbc-goals $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/ --use-actions --use-teleop-cmd # Record video of the first 5 episodes using WBC goals $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/ --use-actions --use-wbc-goals --save-video --num-episodes 5 """ import json import os from pathlib import Path import time from typing import Optional import cv2 import numpy as np import rclpy from robosuite.environments.robot_env import RobotEnv from tqdm import tqdm import tyro from decoupled_wbc.control.main.teleop.configs.configs import SyncSimPlaybackConfig from decoupled_wbc.control.robot_model.instantiation import get_robot_type_and_model from decoupled_wbc.control.utils.sync_sim_utils import ( generate_frame, get_data_exporter, get_env, get_policies, ) from decoupled_wbc.data.constants import RS_VIEW_CAMERA_HEIGHT, RS_VIEW_CAMERA_WIDTH from decoupled_wbc.data.exporter import TypedLeRobotDataset CONTROL_NODE_NAME = "playback_node" GREEN_BOLD = "\033[1;32m" RED_BOLD = "\033[1;31m" RESET = "\033[0m" def load_lerobot_dataset(root_path, max_episodes=None): task_name = None episodes = [] start_index = 0 with open(Path(root_path) / "meta/episodes.jsonl", "r") as f: for line in f: episode = json.loads(line) episode["start_index"] = start_index start_index += episode["length"] assert ( task_name is None or task_name == episode["tasks"][0] ), "All episodes should have the same task name" task_name = episode["tasks"][0] episodes.append(episode) dataset = TypedLeRobotDataset( repo_id="tmp/test", root=root_path, load_video=False, ) script_config = dataset.meta.info["script_config"] assert len(dataset) == start_index, "Dataset length does not match expected length" # Limit episodes if specified if max_episodes is not None: episodes = episodes[:max_episodes] print( f"Loading only first {len(episodes)} episodes (limited by max_episodes={max_episodes})" ) f = {} seeds = [] for ep in tqdm(range(len(episodes))): seed = None f[f"data/demo_{ep + 1}/states"] = [] f[f"data/demo_{ep + 1}/actions"] = [] f[f"data/demo_{ep + 1}/teleop_cmd"] = [] f[f"data/demo_{ep + 1}/wbc_goal"] = [] start_index = episodes[ep]["start_index"] end_index = start_index + episodes[ep]["length"] for i in tqdm(range(start_index, end_index)): frame = dataset[i] # load the seed assert ( seed is None or seed == np.array(frame["observation.sim.seed"]).item() ), "All observations in an episode should have the same seed" seed = np.array(frame["observation.sim.seed"]).item() # load the state mujoco_state_len = frame["observation.sim.mujoco_state_len"] mujoco_state = frame["observation.sim.mujoco_state"] f[f"data/demo_{ep + 1}/states"].append(np.array(mujoco_state[:mujoco_state_len])) # load the action action = frame["action"] f[f"data/demo_{ep + 1}/actions"].append(np.array(action)) # load the teleop command teleop_cmd = { "left_wrist": np.array(frame["observation.sim.left_wrist"].reshape(4, 4)), "right_wrist": np.array(frame["observation.sim.right_wrist"].reshape(4, 4)), "left_fingers": { "position": np.array(frame["observation.sim.left_fingers"].reshape(25, 4, 4)), }, "right_fingers": { "position": np.array(frame["observation.sim.right_fingers"].reshape(25, 4, 4)), }, "target_upper_body_pose": np.array(frame["observation.sim.target_upper_body_pose"]), "base_height_command": np.array(frame["teleop.base_height_command"]), "navigate_cmd": np.array(frame["teleop.navigate_command"]), } f[f"data/demo_{ep + 1}/teleop_cmd"].append(teleop_cmd) # load the WBC goal wbc_goal = { "wrist_pose": np.array(frame["action.eef"]), "target_upper_body_pose": np.array(frame["observation.sim.target_upper_body_pose"]), "navigate_cmd": np.array(frame["teleop.navigate_command"]), "base_height_command": np.array(frame["teleop.base_height_command"]), } f[f"data/demo_{ep + 1}/wbc_goal"].append(wbc_goal) seeds.append(seed) return seeds, f, script_config def validate_state(recorded_state, playback_state, ep, step, tolerance=1e-5): """Validate that playback state matches recorded state within tolerance.""" if not np.allclose(recorded_state, playback_state, atol=tolerance): err = np.linalg.norm(recorded_state - playback_state) print(f"[warning] state diverged by {err:.12f} for ep {ep} at step {step}") return False return True def generate_and_save_frame( config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter ): """Generate and save a frame to LeRobot dataset if enabled.""" if config.save_lerobot: max_mujoco_state_len, mujoco_state_len, mujoco_state = sync_env.get_mujoco_state_info() frame = generate_frame( obs, wbc_action, seed, mujoco_state, mujoco_state_len, max_mujoco_state_len, teleop_cmd, wbc_goal, config.save_img_obs, ) gr00t_exporter.add_frame(frame) def playback_wbc_goals( sync_env, wbc_policy, wbc_goals, teleop_cmds, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ): """Playback using WBC goals to control the robot.""" ret = True num_wbc_goals = len(wbc_goals) if end_steps == -1 else min(end_steps, len(wbc_goals)) for jj in range(num_wbc_goals): wbc_goal = wbc_goals[jj] obs = sync_env.observe() wbc_policy.set_observation(obs) wbc_policy.set_goal(wbc_goal) wbc_action = wbc_policy.get_action() sync_env.queue_action(wbc_action) # Save frame if needed if config.save_lerobot: teleop_cmd = teleop_cmds[jj] generate_and_save_frame( config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter ) capture_or_render_frame(env, onscreen, config, video_writer) if jj < len(states) - 1: state_playback = env.sim.get_state().flatten() if not validate_state(states[jj + 1], state_playback, ep, jj): ret = False return ret def playback_teleop_cmd( sync_env, wbc_policy, teleop_policy, wbc_goals, teleop_cmds, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ): """Playback using teleop commands to control the robot.""" ret = True num_steps = len(wbc_goals) if end_steps == -1 else min(end_steps, len(wbc_goals)) for jj in range(num_steps): wbc_goal = wbc_goals[jj] teleop_cmd = teleop_cmds[jj] # Set IK goal from teleop command ik_data = { "body_data": { teleop_policy.retargeting_ik.body.supplemental_info.hand_frame_names[ "left" ]: teleop_cmd["left_wrist"], teleop_policy.retargeting_ik.body.supplemental_info.hand_frame_names[ "right" ]: teleop_cmd["right_wrist"], }, "left_hand_data": teleop_cmd["left_fingers"], "right_hand_data": teleop_cmd["right_fingers"], } teleop_policy.retargeting_ik.set_goal(ik_data) # Store original and get new upper body pose target_upper_body_pose = wbc_goal["target_upper_body_pose"].copy() wbc_goal["target_upper_body_pose"] = teleop_policy.retargeting_ik.get_action() # Execute WBC policy obs = sync_env.observe() wbc_policy.set_observation(obs) wbc_policy.set_goal(wbc_goal) wbc_action = wbc_policy.get_action() sync_env.queue_action(wbc_action) # Save frame if needed generate_and_save_frame( config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter ) # Render or capture frame capture_or_render_frame(env, onscreen, config, video_writer) # Validate states if jj < len(states) - 1: if not np.allclose( target_upper_body_pose, wbc_goal["target_upper_body_pose"], atol=1e-5 ): err = np.linalg.norm(target_upper_body_pose - wbc_goal["target_upper_body_pose"]) print( f"[warning] target_upper_body_pose diverged by {err:.12f} for ep {ep} at step {jj}" ) ret = False state_playback = env.sim.get_state().flatten() if not validate_state(states[jj + 1], state_playback, ep, jj): ret = False return ret def playback_actions( sync_env, actions, teleop_cmds, wbc_goals, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ): """Playback using actions directly.""" ret = True num_actions = len(actions) if end_steps == -1 else min(end_steps, len(actions)) for j in range(num_actions): sync_env.queue_action({"q": actions[j]}) # Save frame if needed if config.save_lerobot: obs = sync_env.observe() teleop_cmd = teleop_cmds[j] wbc_goal = wbc_goals[j] wbc_action = {"q": actions[j]} generate_and_save_frame( config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter ) capture_or_render_frame(env, onscreen, config, video_writer) if j < len(states) - 1: state_playback = env.sim.get_state().flatten() if not validate_state(states[j + 1], state_playback, ep, j): ret = False return ret def playback_states( sync_env, states, actions, teleop_cmds, wbc_goals, env, onscreen, config, video_writer, seed, gr00t_exporter, end_steps, ep, ): """Playback by forcing mujoco states directly.""" ret = True num_states = len(states) if end_steps == -1 else min(end_steps, len(states)) for i in range(num_states): sync_env.reset_to({"states": states[i]}) sync_env.render() # Validate that the state was set correctly if i < len(states): state_playback = env.sim.get_state().flatten() if not validate_state(states[i], state_playback, ep, i): ret = False # Save frame if needed if config.save_lerobot: obs = sync_env.observe() teleop_cmd = teleop_cmds[i] wbc_goal = wbc_goals[i] wbc_action = {"q": actions[i]} generate_and_save_frame( config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter ) capture_or_render_frame(env, onscreen, config, video_writer) return ret def main(config: SyncSimPlaybackConfig): ret = True start_time = time.time() np.set_printoptions(precision=5, suppress=True, linewidth=120) assert config.dataset is not None, "Folder must be specified for playback" seeds, f, script_config = load_lerobot_dataset(config.dataset) config.update( script_config, allowed_keys=[ "wbc_version", "wbc_model_path", "wbc_policy_class", "control_frequency", "enable_waist", "with_hands", "env_name", "robot", "task_name", "teleop_frequency", "data_collection_frequency", "enable_gravity_compensation", "gravity_compensation_joints", ], ) config.validate_args() robot_type, robot_model = get_robot_type_and_model(config.robot, config.enable_waist) # Setup rendering if config.save_video or config.save_img_obs: onscreen = False offscreen = True else: onscreen = True offscreen = False # Set default video path if not specified if config.save_video and config.video_path is None: if os.path.isfile(config.dataset): video_folder = Path(config.dataset).parent else: video_folder = Path(config.dataset) video_folder.mkdir(parents=True, exist_ok=True) config.video_path = str(video_folder / "playback_video.mp4") print(f"Video recording enabled. Output: {config.video_path}") sync_env = get_env(config, onscreen=onscreen, offscreen=offscreen) gr00t_exporter = None if config.save_lerobot: obs = sync_env.observe() gr00t_exporter = get_data_exporter(config, obs, robot_model) # Initialize policies wbc_policy, teleop_policy = get_policies( config, robot_type, robot_model, activate_keyboard_listener=False ) # List of all demonstrations episodes demos = [f"demo_{i + 1}" for i in range(len(seeds))] print(f"Loaded and will playback {len(demos)} episodes") env = sync_env.base_env # Setup video writer video_writer = None fourcc = None if config.save_video: fourcc = cv2.VideoWriter_fourcc(*"mp4v") video_writer = cv2.VideoWriter( config.video_path, fourcc, 20, (RS_VIEW_CAMERA_WIDTH, RS_VIEW_CAMERA_HEIGHT) ) print("Loaded {} episodes from {}".format(len(demos), config.dataset)) print("seeds:", seeds) print("demos:", demos, "\n\n") # Handle episode selection - either limited number or infinite random max_episodes = len(demos) episode_count = 0 while True: if episode_count >= max_episodes: break ep = demos[episode_count] print(f"Playing back episode: {ep}") episode_count += 1 # read the model xml, using the metadata stored in the attribute for this episode seed = seeds[int(ep.split("_")[-1]) - 1] sync_env.reset(seed=seed) # load the actions and states states = f["data/{}/states".format(ep)] actions = f["data/{}/actions".format(ep)] teleop_cmds = f["data/{}/teleop_cmd".format(ep)] wbc_goals = f["data/{}/wbc_goal".format(ep)] # reset the policies wbc_policy, teleop_policy, _ = get_policies( config, robot_type, robot_model, activate_keyboard_listener=False ) end_steps = 20 if config.ci_test else -1 if config.use_actions: # load the initial state sync_env.reset_to({"states": states[0]}) # load the actions and play them back open-loop if config.use_wbc_goals: # use the wbc_goals to control the robot episode_ret = playback_wbc_goals( sync_env, wbc_policy, wbc_goals, teleop_cmds, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ) ret = ret and episode_ret elif config.use_teleop_cmd: # use the teleop commands to control the robot episode_ret = playback_teleop_cmd( sync_env, wbc_policy, teleop_policy, wbc_goals, teleop_cmds, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ) ret = ret and episode_ret else: episode_ret = playback_actions( sync_env, actions, teleop_cmds, wbc_goals, states, env, onscreen, config, video_writer, ep, seed, gr00t_exporter, end_steps, ) ret = ret and episode_ret else: # force the sequence of internal mujoco states one by one episode_ret = playback_states( sync_env, states, actions, teleop_cmds, wbc_goals, env, onscreen, config, video_writer, seed, gr00t_exporter, end_steps, ep, ) ret = ret and episode_ret if config.save_lerobot: gr00t_exporter.save_episode() print(f"Episode {ep} playback finished.\n\n") # close the env sync_env.close() # Cleanup if video_writer is not None: video_writer.release() print(f"Video saved to: {config.video_path}") end_time = time.time() elapsed_time = end_time - start_time if config.save_lerobot: print(f"LeRobot dataset saved to: {gr00t_exporter.root}") print( f"{GREEN_BOLD}Playback with WBC version: {config.wbc_version}, {config.wbc_model_path}, " f"{config.wbc_policy_class}, use_actions: {config.use_actions}, use_wbc_goals: {config.use_wbc_goals}, " f"use_teleop_cmd: {config.use_teleop_cmd}{RESET}" ) if ret: print(f"{GREEN_BOLD}Playback completed successfully in {elapsed_time:.2f} seconds!{RESET}") else: print(f"{RED_BOLD}Playback encountered an error in {elapsed_time:.2f} seconds!{RESET}") return ret def capture_or_render_frame( env: RobotEnv, onscreen: bool, config: SyncSimPlaybackConfig, video_writer: Optional[cv2.VideoWriter], ): """Capture frame for video recording if enabled, or render the environment.""" if config.save_video: if hasattr(env, "sim") and hasattr(env.sim, "render"): img = env.sim.render( width=RS_VIEW_CAMERA_WIDTH, height=RS_VIEW_CAMERA_HEIGHT, camera_name=env.render_camera[0], ) img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img_bgr = np.flipud(img_bgr) video_writer.write(img_bgr) elif onscreen: env.render() if __name__ == "__main__": config = tyro.cli(SyncSimPlaybackConfig) rclpy.init(args=None) node = rclpy.create_node("playback_decoupled_wbc_control") main(config) rclpy.shutdown()