From 68ec67f0a540dd0a554df8ac109cb1429ddb5ae7 Mon Sep 17 00:00:00 2001
From: runyud <runyud@nvidia.com>
Date: Fri, 14 Nov 2025 15:36:33 -0800
Subject: [PATCH 1/2] [fix] fix lang key

---
 gr00t_wbc/control/envs/robocasa/sync_env.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gr00t_wbc/control/envs/robocasa/sync_env.py b/gr00t_wbc/control/envs/robocasa/sync_env.py
index bdfe7a2..62aa414 100644
--- a/gr00t_wbc/control/envs/robocasa/sync_env.py
+++ b/gr00t_wbc/control/envs/robocasa/sync_env.py
@@ -183,7 +183,7 @@ class SyncEnv(gym.Env):
         # Add state keys for model input
         obs = prepare_observation_for_eval(self.robot_model, obs)
 
-        obs["language.language_instruction"] = raw_obs["language.language_instruction"]
+        obs["annotation.human.task_description"] = raw_obs["language.language_instruction"]
 
         if hasattr(self.base_env, "get_privileged_obs_keys"):
             for key in self.base_env.get_privileged_obs_keys():
@@ -205,7 +205,7 @@ class SyncEnv(gym.Env):
     def get_observation(self):
         return self.base_env._get_observations()  # assumes base env is robosuite
 
-    def get_step_info(self) -> Dict[str, any]:
+    def get_step_info(self) -> Tuple[Dict[str, any], float, bool, bool, Dict[str, any]]:
         return (
             self.observe(),
             self.cache["reward"],
@@ -319,7 +319,7 @@ class SyncEnv(gym.Env):
 
         obs_space = prepare_gym_space_for_eval(self.robot_model, obs_space)
 
-        obs_space["language.language_instruction"] = gym.spaces.Text(
+        obs_space["annotation.human.task_description"] = gym.spaces.Text(
             max_length=256, charset=ALLOWED_LANGUAGE_CHARSET
         )
 

From 65e4c6cf4a16f20788cbd7af87b693e7c9953293 Mon Sep 17 00:00:00 2001
From: runyud <runyud@nvidia.com>
Date: Sun, 16 Nov 2025 11:45:49 -0800
Subject: [PATCH 2/2] [add] add W1 and W2 envs

---
 .../control/envs/robocasa/async_env_server.py |   4 +-
 gr00t_wbc/control/envs/robocasa/sync_env.py   |   7 +-
 .../control/main/teleop/configs/configs.py    |   3 +-
 .../control/policy/lerobot_replay_policy.py   |   6 +-
 gr00t_wbc/control/teleop/gui/main.py          |   4 +-
 .../instantiation/g1_hand_ik_instantiation.py |   4 +-
 gr00t_wbc/control/teleop/teleop_streamer.py   |   4 +-
 gr00t_wbc/control/utils/sync_sim_utils.py     |   1 +
 .../dexmg/gr00trobocasa/robocasa/__init__.py  |   6 +
 .../locomanipulation/locomanip_basic.py       | 157 +++++++++++
 .../locomanipulation/locomanip_dc.py          |  15 ++
 .../sim2mujoco/scripts/run_mujoco_gear_wbc.py | 244 +++++++++--------
 .../scripts/run_mujoco_gear_wbc_gait.py       | 254 +++++++++---------
 .../main/teleop/test_g1_control_loop.py       |   6 +-
 .../test_meshcat_visualizer_env.py            |   8 +-
 15 files changed, 469 insertions(+), 254 deletions(-)
 create mode 100644 gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py

diff --git a/gr00t_wbc/control/envs/robocasa/async_env_server.py b/gr00t_wbc/control/envs/robocasa/async_env_server.py
index b2bd746..c83f417 100644
--- a/gr00t_wbc/control/envs/robocasa/async_env_server.py
+++ b/gr00t_wbc/control/envs/robocasa/async_env_server.py
@@ -8,7 +8,9 @@ import numpy as np
 import rclpy
 
 from gr00t_wbc.control.envs.g1.sim.image_publish_utils import ImagePublishProcess
-from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import Gr00tLocomanipRoboCasaEnv  # noqa: F401
+from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import (
+    Gr00tLocomanipRoboCasaEnv,
+)  # noqa: F401
 from gr00t_wbc.control.robot_model.robot_model import RobotModel
 from gr00t_wbc.control.utils.keyboard_dispatcher import KeyboardListenerSubscriber
 
diff --git a/gr00t_wbc/control/envs/robocasa/sync_env.py b/gr00t_wbc/control/envs/robocasa/sync_env.py
index 62aa414..747c0f6 100644
--- a/gr00t_wbc/control/envs/robocasa/sync_env.py
+++ b/gr00t_wbc/control/envs/robocasa/sync_env.py
@@ -10,11 +10,14 @@ from robosuite.environments.robot_env import RobotEnv
 from scipy.spatial.transform import Rotation as R
 
 from gr00t_wbc.control.envs.g1.utils.joint_safety import JointSafetyMonitor
-from gr00t_wbc.control.envs.robocasa.utils.controller_utils import update_robosuite_controller_configs
+from gr00t_wbc.control.envs.robocasa.utils.controller_utils import (
+    update_robosuite_controller_configs,
+)
 from gr00t_wbc.control.envs.robocasa.utils.robocasa_env import (  # noqa: F401
     ALLOWED_LANGUAGE_CHARSET,
     Gr00tLocomanipRoboCasaEnv,
 )
+from gr00t_wbc.control.envs.robocasa.utils.sim_utils import change_simulation_timestep
 from gr00t_wbc.control.robot_model.instantiation import get_robot_type_and_model
 from gr00t_wbc.control.utils.n1_utils import (
     prepare_gym_space_for_eval,
@@ -36,6 +39,8 @@ class SyncEnv(gym.Env):
             robot_name, enable_waist_ik=kwargs.pop("enable_waist", False)
         )
 
+        change_simulation_timestep(kwargs.get("sim_freq", 1 / 0.005))
+
         env_kwargs = {
             "onscreen": kwargs.get("onscreen", True),
             "offscreen": kwargs.get("offscreen", False),
diff --git a/gr00t_wbc/control/main/teleop/configs/configs.py b/gr00t_wbc/control/main/teleop/configs/configs.py
index b1f6251..9a52a0d 100644
--- a/gr00t_wbc/control/main/teleop/configs/configs.py
+++ b/gr00t_wbc/control/main/teleop/configs/configs.py
@@ -77,8 +77,7 @@ class BaseConfig(ArgsConfigTemplate):
     """Version of the whole body controller."""
 
     wbc_model_path: str = (
-        "policy/GR00T-WholeBodyControl-Balance.onnx,"
-        "policy/GR00T-WholeBodyControl-Walk.onnx"
+        "policy/GR00T-WholeBodyControl-Balance.onnx," "policy/GR00T-WholeBodyControl-Walk.onnx"
     )
     """Path to WBC model file (relative to gr00t_wbc/sim2mujoco/resources/robots/g1)"""
     """gear_wbc model path: policy/GR00T-WholeBodyControl-Balance.onnx,policy/GR00T-WholeBodyControl-Walk.onnx"""
diff --git a/gr00t_wbc/control/policy/lerobot_replay_policy.py b/gr00t_wbc/control/policy/lerobot_replay_policy.py
index 2783ca8..78a89f1 100644
--- a/gr00t_wbc/control/policy/lerobot_replay_policy.py
+++ b/gr00t_wbc/control/policy/lerobot_replay_policy.py
@@ -3,7 +3,11 @@ import time
 import pandas as pd
 
 from gr00t_wbc.control.base.policy import Policy
-from gr00t_wbc.control.main.constants import DEFAULT_BASE_HEIGHT, DEFAULT_NAV_CMD, DEFAULT_WRIST_POSE
+from gr00t_wbc.control.main.constants import (
+    DEFAULT_BASE_HEIGHT,
+    DEFAULT_NAV_CMD,
+    DEFAULT_WRIST_POSE,
+)
 from gr00t_wbc.control.robot_model.robot_model import RobotModel
 from gr00t_wbc.data.viz.rerun_viz import RerunViz
 
diff --git a/gr00t_wbc/control/teleop/gui/main.py b/gr00t_wbc/control/teleop/gui/main.py
index a46af87..a2edcee 100644
--- a/gr00t_wbc/control/teleop/gui/main.py
+++ b/gr00t_wbc/control/teleop/gui/main.py
@@ -29,9 +29,7 @@ import gr00t_wbc
 signal.signal(signal.SIGINT, signal.SIG_DFL)
 
 
-GR00T_TELEOP_DATA_ROOT = os.path.join(
-    os.path.dirname(gr00t_wbc.__file__), "./external/teleop/data"
-)
+GR00T_TELEOP_DATA_ROOT = os.path.join(os.path.dirname(gr00t_wbc.__file__), "./external/teleop/data")
 
 
 class MainWindow(QMainWindow):
diff --git a/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py b/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py
index 23ec9d5..fc234d1 100644
--- a/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py
+++ b/gr00t_wbc/control/teleop/solver/hand/instantiation/g1_hand_ik_instantiation.py
@@ -1,4 +1,6 @@
-from gr00t_wbc.control.teleop.solver.hand.g1_gripper_ik_solver import G1GripperInverseKinematicsSolver
+from gr00t_wbc.control.teleop.solver.hand.g1_gripper_ik_solver import (
+    G1GripperInverseKinematicsSolver,
+)
 
 
 # initialize hand ik solvers for g1 robot
diff --git a/gr00t_wbc/control/teleop/teleop_streamer.py b/gr00t_wbc/control/teleop/teleop_streamer.py
index 6d0449d..a2b80a7 100644
--- a/gr00t_wbc/control/teleop/teleop_streamer.py
+++ b/gr00t_wbc/control/teleop/teleop_streamer.py
@@ -45,7 +45,9 @@ class TeleopStreamer:
                 self.body_streamer = IphoneStreamer()
                 self.body_streamer.start_streaming()
             elif body_control_device == "leapmotion":
-                from gr00t_wbc.control.teleop.streamers.leapmotion_streamer import LeapMotionStreamer
+                from gr00t_wbc.control.teleop.streamers.leapmotion_streamer import (
+                    LeapMotionStreamer,
+                )
 
                 self.body_streamer = LeapMotionStreamer()
                 self.body_streamer.start_streaming()
diff --git a/gr00t_wbc/control/utils/sync_sim_utils.py b/gr00t_wbc/control/utils/sync_sim_utils.py
index 0048f95..65c3984 100644
--- a/gr00t_wbc/control/utils/sync_sim_utils.py
+++ b/gr00t_wbc/control/utils/sync_sim_utils.py
@@ -503,6 +503,7 @@ def get_env(config: SyncSimDataCollectionConfig, **kwargs) -> SyncEnv:
             "enable_waist": config.enable_waist,
             "enable_gravity_compensation": config.enable_gravity_compensation,
             "gravity_compensation_joints": config.gravity_compensation_joints,
+            "sim_freq": config.sim_frequency,
         }
     )
     if robot_type == "g1":
diff --git a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/__init__.py b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/__init__.py
index d8288ae..eab921c 100644
--- a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/__init__.py
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/__init__.py
@@ -26,12 +26,18 @@ from robocasa.environments.locomanipulation.locomanip_basic import (
     LMNavPickBottleShelf,
     LMPickBottleShelfLow,
     LMNavPickBottleShelfLow,
+    LMPnPBottleToPlate,
+    LMPnPAppleToPlate,
 )
 from robocasa.environments.locomanipulation.locomanip_pnp import (
     LMBottlePnP,
     LMBoxPnP,
 )
 
+from robocasa.environments.locomanipulation.locomanip_dc import (
+    LMNavPickBottleDC,
+    LMPnPAppleToPlateDC,
+)
 
 # from robosuite.controllers import ALL_CONTROLLERS, load_controller_config
 from robosuite.controllers import ALL_PART_CONTROLLERS, load_composite_controller_config
diff --git a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py
index c54e67c..363ed18 100644
--- a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_basic.py
@@ -574,3 +574,160 @@ class LMNavPickBottleShelfLow(LMNavPickBottleShelf):
             )
         )
         return [self.shelf, self.bottle]
+
+
+class LMPnPBottleToPlate(LMPnPBottle):
+    def _get_objects(self) -> list[SceneObject]:
+        super()._get_objects()
+        self.plate = SceneObject(
+            ObjectConfig(
+                name="plate",
+                mjcf_path="objects/omniverse/locomanip/plate_1/model.xml",
+                scale=1.0,
+                static=True,
+                sampler_config=SamplingConfig(
+                    x_range=np.array([-0.2 - 0.08, -0.2 + 0.04]),
+                    y_range=np.array([-0.08, 0.08]),
+                    rotation=np.array([-np.pi, np.pi]),
+                    reference=ReferenceConfig(self.table_target),
+                ),
+            )
+        )
+        return [self.table, self.table_target, self.bottle, self.plate]
+
+    def _get_success_criteria(self) -> SuccessCriteria:
+        return AllCriteria(
+            IsUpright(self.bottle, symmetric=True), IsInContact(self.bottle, self.plate)
+        )
+
+    def _get_instruction(self) -> str:
+        return "Pick up the bottle and place it on the plate."
+
+    def get_object(self):
+        return dict(
+            bottle=dict(obj_name=self.bottle.mj_obj.root_body, obj_type="body"),
+            plate=dict(obj_name=self.plate.mj_obj.root_body, obj_type="body"),
+        )
+
+    def get_subtask_term_signals(self):
+        obj_z = self.sim.data.body_xpos[self.obj_body_id(self.bottle.mj_obj.name)][2]
+        target_table_pos = self.sim.data.body_xpos[self.obj_body_id(self.table_target.mj_obj.name)]
+        target_table_z = target_table_pos[2] + self.table_target.mj_obj.top_offset[2]
+        return dict(obj_off_table=int(obj_z - target_table_z > self.LIFT_OFFSET))
+
+    @staticmethod
+    def task_config():
+        task = DexMGConfigHelper.AttrDict()
+        task.task_spec_0.subtask_1 = dict(
+            object_ref="bottle",
+            subtask_term_signal="obj_off_table",
+            subtask_term_offset_range=(5, 10),
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        # Second subtask for placing on plate
+        task.task_spec_0.subtask_2 = dict(
+            object_ref="plate",
+            subtask_term_signal=None,
+            subtask_term_offset_range=None,
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        task.task_spec_1.subtask_1 = dict(
+            object_ref=None,
+            subtask_term_signal=None,
+            subtask_term_offset_range=None,
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        return task.to_dict()
+
+
+class LMPnPAppleToPlate(LMPnPBottleToPlate):
+    def _get_objects(self) -> list[SceneObject]:
+        super()._get_objects()
+        self.apple = SceneObject(
+            ObjectConfig(
+                name="apple",
+                mjcf_path="objects/omniverse/locomanip/apple_0/model.xml",
+                static=False,
+                scale=1.0,
+                sampler_config=SamplingConfig(
+                    x_range=np.array([-0.08, 0.04]),
+                    y_range=np.array([-0.08, 0.08]),
+                    rotation=np.array([-np.pi, np.pi]),
+                    reference_pos=np.array([0.4, 0, self.table.mj_obj.top_offset[2]]),
+                ),
+                rgba=(0.85, 0.1, 0.1, 1.0),
+            )
+        )
+        return [self.table, self.table_target, self.apple, self.plate]
+
+    def _get_success_criteria(self) -> SuccessCriteria:
+        return IsInContact(self.apple, self.plate)
+
+    def _get_instruction(self) -> str:
+        return "pick up the apple, walk left and place the apple on the plate."
+
+    def get_object(self):
+        return dict(
+            apple=dict(obj_name=self.apple.mj_obj.root_body, obj_type="body"),
+            plate=dict(obj_name=self.plate.mj_obj.root_body, obj_type="body"),
+        )
+
+    def get_subtask_term_signals(self):
+        obj_z = self.sim.data.body_xpos[self.obj_body_id(self.apple.mj_obj.name)][2]
+        target_table_pos = self.sim.data.body_xpos[self.obj_body_id(self.table_target.mj_obj.name)]
+        target_table_z = target_table_pos[2] + self.table_target.mj_obj.top_offset[2]
+        return dict(obj_off_table=int(obj_z - target_table_z > self.LIFT_OFFSET))
+
+    @staticmethod
+    def task_config():
+        task = DexMGConfigHelper.AttrDict()
+        task.task_spec_0.subtask_1 = dict(
+            object_ref="apple",
+            subtask_term_signal="obj_off_table",
+            subtask_term_offset_range=(5, 10),
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        # Second subtask for placing on plate
+        task.task_spec_0.subtask_2 = dict(
+            object_ref="plate",
+            subtask_term_signal=None,
+            subtask_term_offset_range=None,
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        task.task_spec_1.subtask_1 = dict(
+            object_ref=None,
+            subtask_term_signal=None,
+            subtask_term_offset_range=None,
+            selection_strategy="random",
+            selection_strategy_kwargs=None,
+            action_noise=0.05,
+            num_interpolation_steps=5,
+            num_fixed_steps=0,
+            apply_noise_during_interpolation=False,
+        )
+        return task.to_dict()
diff --git a/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py
new file mode 100644
index 0000000..465df91
--- /dev/null
+++ b/gr00t_wbc/dexmg/gr00trobocasa/robocasa/environments/locomanipulation/locomanip_dc.py
@@ -0,0 +1,15 @@
+from robocasa import (
+    LMNavPickBottle,
+    LMPnPAppleToPlate,
+)
+from robocasa.models.scenes.lab_arena import LabArena
+
+
+class LabEnvMixin:
+    MUJOCO_ARENA_CLS = LabArena
+
+
+class LMNavPickBottleDC(LabEnvMixin, LMNavPickBottle): ...
+
+
+class LMPnPAppleToPlateDC(LabEnvMixin, LMPnPAppleToPlate): ...
diff --git a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py
index 6d5b859..d3206f5 100644
--- a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py
+++ b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc.py
@@ -1,21 +1,16 @@
-import sys
-import time
 import collections
-import yaml
-import torch
-import numpy as np
-import sys
+import os
+import threading
 import time
-import collections
-import yaml
-import torch
-import numpy as np
+
 import mujoco
 import mujoco.viewer
+import numpy as np
 import onnxruntime as ort
-import threading
 from pynput import keyboard as pkb
-import os
+import torch
+import yaml
+
 
 class GearWbcController:
     def __init__(self, config_path):
@@ -24,22 +19,22 @@ class GearWbcController:
         self.config = self.load_config(os.path.join(self.CONFIG_PATH, "g1_gear_wbc.yaml"))
 
         self.control_dict = {
-            'loco_cmd': self.config['cmd_init'],
-            "height_cmd": self.config['height_cmd'],
-            "rpy_cmd": self.config.get('rpy_cmd', [0.0, 0.0, 0.0]),
-            "freq_cmd": self.config.get('freq_cmd', 1.5)
+            "loco_cmd": self.config["cmd_init"],
+            "height_cmd": self.config["height_cmd"],
+            "rpy_cmd": self.config.get("rpy_cmd", [0.0, 0.0, 0.0]),
+            "freq_cmd": self.config.get("freq_cmd", 1.5),
         }
 
-        self.model = mujoco.MjModel.from_xml_path(self.config['xml_path'])
+        self.model = mujoco.MjModel.from_xml_path(self.config["xml_path"])
         self.data = mujoco.MjData(self.model)
-        self.model.opt.timestep = self.config['simulation_dt']
+        self.model.opt.timestep = self.config["simulation_dt"]
         self.n_joints = self.data.qpos.shape[0] - 7
         self.torso_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "torso_link")
         self.base_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "pelvis")
-        self.action = np.zeros(self.config['num_actions'], dtype=np.float32)
-        self.target_dof_pos = self.config['default_angles'].copy()
-        self.policy = self.load_onnx_policy(self.config['policy_path'])
-        self.walk_policy = self.load_onnx_policy(self.config['walk_policy_path'])
+        self.action = np.zeros(self.config["num_actions"], dtype=np.float32)
+        self.target_dof_pos = self.config["default_angles"].copy()
+        self.policy = self.load_onnx_policy(self.config["policy_path"])
+        self.walk_policy = self.load_onnx_policy(self.config["walk_policy_path"])
         self.gait_indices = torch.zeros((1), dtype=torch.float32)
         self.counter = 0
         self.just_started = 0.0
@@ -50,15 +45,15 @@ class GearWbcController:
             self.data, self.config, self.action, self.control_dict, self.n_joints
         )
         self.obs_history = collections.deque(
-            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config['obs_history_len'],
-            maxlen=self.config['obs_history_len']
+            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config["obs_history_len"],
+            maxlen=self.config["obs_history_len"],
         )
-        self.obs = np.zeros(self.config['num_obs'], dtype=np.float32)
+        self.obs = np.zeros(self.config["num_obs"], dtype=np.float32)
         self.keyboard_listener(self.control_dict, self.config)
-        
 
     def keyboard_listener(self, control_dict, config):
         """Listen to key press events and update cmd and height_cmd"""
+
         def on_press(key):
             try:
                 k = key.char
@@ -66,58 +61,60 @@ class GearWbcController:
                 return  # Special keys ignored
 
             with self.cmd_lock:
-                if k == 'w':
-                    control_dict['loco_cmd'][0] += 0.1
-                elif k == 's':
-                    control_dict['loco_cmd'][0] -= 0.1
-                elif k == 'a':
-                    control_dict['loco_cmd'][1] += 0.1
-                elif k == 'd':
-                    control_dict['loco_cmd'][1] -= 0.1
-                elif k == 'q':
-                    control_dict['loco_cmd'][2] += 0.1
-                elif k == 'e':
-                    control_dict['loco_cmd'][2] -= 0.1
-                elif k == 'z':
-                    control_dict['loco_cmd'][:] = config['cmd_init']
-                    control_dict["height_cmd"] = config['height_cmd']
-                    control_dict['rpy_cmd'][:] = config['rpy_cmd']
-                    control_dict['freq_cmd'] = config['freq_cmd']
-                elif k == '1':
+                if k == "w":
+                    control_dict["loco_cmd"][0] += 0.1
+                elif k == "s":
+                    control_dict["loco_cmd"][0] -= 0.1
+                elif k == "a":
+                    control_dict["loco_cmd"][1] += 0.1
+                elif k == "d":
+                    control_dict["loco_cmd"][1] -= 0.1
+                elif k == "q":
+                    control_dict["loco_cmd"][2] += 0.1
+                elif k == "e":
+                    control_dict["loco_cmd"][2] -= 0.1
+                elif k == "z":
+                    control_dict["loco_cmd"][:] = config["cmd_init"]
+                    control_dict["height_cmd"] = config["height_cmd"]
+                    control_dict["rpy_cmd"][:] = config["rpy_cmd"]
+                    control_dict["freq_cmd"] = config["freq_cmd"]
+                elif k == "1":
                     control_dict["height_cmd"] += 0.05
-                elif k == '2':
+                elif k == "2":
                     control_dict["height_cmd"] -= 0.05
-                elif k == '3':
-                    control_dict['rpy_cmd'][0] += 0.2
-                elif k == '4':
-                    control_dict['rpy_cmd'][0] -= 0.2
-                elif k == '5':
-                    control_dict['rpy_cmd'][1] += 0.2
-                elif k == '6':
-                    control_dict['rpy_cmd'][1] -= 0.2
-                elif k == '7':
-                    control_dict['rpy_cmd'][2] += 0.2
-                elif k == '8':
-                    control_dict['rpy_cmd'][2] -= 0.2
-                elif k == 'm':
-                    control_dict['freq_cmd'] += 0.1
-                elif k == 'n':
-                    control_dict['freq_cmd'] -= 0.1
-
-                print(f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}")
+                elif k == "3":
+                    control_dict["rpy_cmd"][0] += 0.2
+                elif k == "4":
+                    control_dict["rpy_cmd"][0] -= 0.2
+                elif k == "5":
+                    control_dict["rpy_cmd"][1] += 0.2
+                elif k == "6":
+                    control_dict["rpy_cmd"][1] -= 0.2
+                elif k == "7":
+                    control_dict["rpy_cmd"][2] += 0.2
+                elif k == "8":
+                    control_dict["rpy_cmd"][2] -= 0.2
+                elif k == "m":
+                    control_dict["freq_cmd"] += 0.1
+                elif k == "n":
+                    control_dict["freq_cmd"] -= 0.1
+
+                print(
+                    f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}"
+                )
 
         listener = pkb.Listener(on_press=on_press)
         listener.daemon = True
         listener.start()
 
     def load_config(self, config_path):
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             config = yaml.safe_load(f)
 
-        for path_key in ['policy_path', 'xml_path', 'walk_policy_path']:
+        for path_key in ["policy_path", "xml_path", "walk_policy_path"]:
             config[path_key] = os.path.join(CONFIG_PATH, config[path_key])
 
-        array_keys = ['kps', 'kds', 'default_angles', 'cmd_scale', 'cmd_init']
+        array_keys = ["kps", "kds", "default_angles", "cmd_scale", "cmd_init"]
         for key in array_keys:
             config[key] = np.array(config[key], dtype=np.float32)
 
@@ -129,17 +126,19 @@ class GearWbcController:
     def quat_rotate_inverse(self, q, v):
         w, x, y, z = q
         q_conj = np.array([w, -x, -y, -z])
-        return np.array([
-            v[0] * (q_conj[0]**2 + q_conj[1]**2 - q_conj[2]**2 - q_conj[3]**2) +
-            v[1] * 2 * (q_conj[1]*q_conj[2] - q_conj[0]*q_conj[3]) +
-            v[2] * 2 * (q_conj[1]*q_conj[3] + q_conj[0]*q_conj[2]),
-            v[0] * 2 * (q_conj[1]*q_conj[2] + q_conj[0]*q_conj[3]) +
-            v[1] * (q_conj[0]**2 - q_conj[1]**2 + q_conj[2]**2 - q_conj[3]**2) +
-            v[2] * 2 * (q_conj[2]*q_conj[3] - q_conj[0]*q_conj[1]),
-            v[0] * 2 * (q_conj[1]*q_conj[3] - q_conj[0]*q_conj[2]) +
-            v[1] * 2 * (q_conj[2]*q_conj[3] + q_conj[0]*q_conj[1]) +
-            v[2] * (q_conj[0]**2 - q_conj[1]**2 - q_conj[2]**2 + q_conj[3]**2)
-        ])
+        return np.array(
+            [
+                v[0] * (q_conj[0] ** 2 + q_conj[1] ** 2 - q_conj[2] ** 2 - q_conj[3] ** 2)
+                + v[1] * 2 * (q_conj[1] * q_conj[2] - q_conj[0] * q_conj[3])
+                + v[2] * 2 * (q_conj[1] * q_conj[3] + q_conj[0] * q_conj[2]),
+                v[0] * 2 * (q_conj[1] * q_conj[2] + q_conj[0] * q_conj[3])
+                + v[1] * (q_conj[0] ** 2 - q_conj[1] ** 2 + q_conj[2] ** 2 - q_conj[3] ** 2)
+                + v[2] * 2 * (q_conj[2] * q_conj[3] - q_conj[0] * q_conj[1]),
+                v[0] * 2 * (q_conj[1] * q_conj[3] - q_conj[0] * q_conj[2])
+                + v[1] * 2 * (q_conj[2] * q_conj[3] + q_conj[0] * q_conj[1])
+                + v[2] * (q_conj[0] ** 2 - q_conj[1] ** 2 - q_conj[2] ** 2 + q_conj[3] ** 2),
+            ]
+        )
 
     def get_gravity_orientation(self, quat):
         gravity_vec = np.array([0.0, 0.0, -1.0])
@@ -147,11 +146,11 @@ class GearWbcController:
 
     def compute_observation(self, d, config, action, control_dict, n_joints):
         command = np.zeros(7, dtype=np.float32)
-        command[:3] = control_dict['loco_cmd'][:3] * config['cmd_scale']
-        command[3] = control_dict['height_cmd']
+        command[:3] = control_dict["loco_cmd"][:3] * config["cmd_scale"]
+        command[3] = control_dict["height_cmd"]
         # command[4] = control_dict['freq_cmd']
-        command[4:7] = control_dict['rpy_cmd']
-        
+        command[4:7] = control_dict["rpy_cmd"]
+
         # # gait indice
         # is_static = np.linalg.norm(command[:3]) < 0.1
         # just_entered_walk = (not is_static) and (not self.walking_mask)
@@ -192,7 +191,6 @@ class GearWbcController:
         # # Clock signal
         # clock = [torch.sin(2 * np.pi * fi) for fi in gait_pair]
 
-
         # for i, (clk, frozen_mask_attr) in enumerate(
         #     zip(clock, ['frozen_FL', 'frozen_FR'])
         # ):
@@ -206,25 +204,27 @@ class GearWbcController:
         #     clock[i] = clk
 
         # self.clock_inputs = torch.stack(clock).unsqueeze(0)
-        qj = d.qpos[7:7+n_joints].copy()
-        dqj = d.qvel[6:6+n_joints].copy()
+        qj = d.qpos[7 : 7 + n_joints].copy()
+        dqj = d.qvel[6 : 6 + n_joints].copy()
         quat = d.qpos[3:7].copy()
         omega = d.qvel[3:6].copy()
         # omega = self.data.xmat[self.base_index].reshape(3, 3).T @ self.data.cvel[self.base_index][3:6]
         padded_defaults = np.zeros(n_joints, dtype=np.float32)
-        L = min(len(config['default_angles']), n_joints)
-        padded_defaults[:L] = config['default_angles'][:L]
+        L = min(len(config["default_angles"]), n_joints)
+        padded_defaults[:L] = config["default_angles"][:L]
 
-        qj_scaled = (qj - padded_defaults) * config['dof_pos_scale']
-        dqj_scaled = dqj * config['dof_vel_scale']
+        qj_scaled = (qj - padded_defaults) * config["dof_pos_scale"]
+        dqj_scaled = dqj * config["dof_vel_scale"]
         gravity_orientation = self.get_gravity_orientation(quat)
-        omega_scaled = omega * config['ang_vel_scale']
+        omega_scaled = omega * config["ang_vel_scale"]
 
         torso_quat = self.data.xquat[self.torso_index]
-        torso_omega = self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
-        torso_omega_scaled = torso_omega * config['ang_vel_scale']
+        torso_omega = (
+            self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
+        )
+        torso_omega_scaled = torso_omega * config["ang_vel_scale"]
         torso_gravity_orientation = self.get_gravity_orientation(torso_quat)
-        
+
         single_obs_dim = 86
         single_obs = np.zeros(single_obs_dim, dtype=np.float32)
         single_obs[0:7] = command[:7]
@@ -232,19 +232,21 @@ class GearWbcController:
         single_obs[10:13] = gravity_orientation
         # single_obs[14:17] = 0.#torso_omega_scaled
         # single_obs[17:20] = 0.#torso_gravity_orientation
-        single_obs[13:13+n_joints] = qj_scaled
-        single_obs[13+n_joints:13+2*n_joints] = dqj_scaled
-        single_obs[13+2*n_joints:13+2*n_joints+15] = action
+        single_obs[13 : 13 + n_joints] = qj_scaled
+        single_obs[13 + n_joints : 13 + 2 * n_joints] = dqj_scaled
+        single_obs[13 + 2 * n_joints : 13 + 2 * n_joints + 15] = action
         # single_obs[20+2*n_joints+15:20+2*n_joints+15+2] = self.clock_inputs.cpu().numpy().reshape(2)
 
         return single_obs, single_obs_dim
 
     def load_onnx_policy(self, path):
         model = ort.InferenceSession(path)
+
         def run_inference(input_tensor):
             ort_inputs = {model.get_inputs()[0].name: input_tensor.cpu().numpy()}
             ort_outs = model.run(None, ort_inputs)
             return torch.tensor(ort_outs[0], device="cuda:0")
+
         return run_inference
 
     def run(self):
@@ -253,55 +255,61 @@ class GearWbcController:
 
         with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
             start = time.time()
-            while viewer.is_running() and time.time() - start < self.config['simulation_duration']:
+            while viewer.is_running() and time.time() - start < self.config["simulation_duration"]:
                 step_start = time.time()
 
                 leg_tau = self.pd_control(
                     self.target_dof_pos,
-                    self.data.qpos[7:7+self.config['num_actions']],
-                    self.config['kps'],
-                    np.zeros_like(self.config['kps']),
-                    self.data.qvel[6:6+self.config['num_actions']],
-                    self.config['kds']
+                    self.data.qpos[7 : 7 + self.config["num_actions"]],
+                    self.config["kps"],
+                    np.zeros_like(self.config["kps"]),
+                    self.data.qvel[6 : 6 + self.config["num_actions"]],
+                    self.config["kds"],
                 )
-                self.data.ctrl[:self.config['num_actions']] = leg_tau
+                self.data.ctrl[: self.config["num_actions"]] = leg_tau
 
-                if self.n_joints > self.config['num_actions']:
+                if self.n_joints > self.config["num_actions"]:
                     arm_tau = self.pd_control(
-                        np.zeros(self.n_joints - self.config['num_actions'], dtype=np.float32),
-                        self.data.qpos[7+self.config['num_actions']:7+self.n_joints],
-                        np.full(self.n_joints - self.config['num_actions'], 100.0),
-                        np.zeros(self.n_joints - self.config['num_actions']),
-                        self.data.qvel[6+self.config['num_actions']:6+self.n_joints],
-                        np.full(self.n_joints - self.config['num_actions'], 0.5)
+                        np.zeros(self.n_joints - self.config["num_actions"], dtype=np.float32),
+                        self.data.qpos[7 + self.config["num_actions"] : 7 + self.n_joints],
+                        np.full(self.n_joints - self.config["num_actions"], 100.0),
+                        np.zeros(self.n_joints - self.config["num_actions"]),
+                        self.data.qvel[6 + self.config["num_actions"] : 6 + self.n_joints],
+                        np.full(self.n_joints - self.config["num_actions"], 0.5),
                     )
-                    self.data.ctrl[self.config['num_actions']:] = arm_tau
+                    self.data.ctrl[self.config["num_actions"] :] = arm_tau
 
                 mujoco.mj_step(self.model, self.data)
-                
+
                 self.counter += 1
-                if self.counter % self.config['control_decimation'] == 0:
+                if self.counter % self.config["control_decimation"] == 0:
                     with self.cmd_lock:
                         current_cmd = self.control_dict
 
-                    single_obs, _ = self.compute_observation(self.data, self.config, self.action, current_cmd,  self.n_joints)
+                    single_obs, _ = self.compute_observation(
+                        self.data, self.config, self.action, current_cmd, self.n_joints
+                    )
                     self.obs_history.append(single_obs)
 
                     for i, hist_obs in enumerate(self.obs_history):
-                        self.obs[i * self.single_obs_dim:(i + 1) * self.single_obs_dim] = hist_obs
+                        self.obs[i * self.single_obs_dim : (i + 1) * self.single_obs_dim] = hist_obs
 
                     obs_tensor = torch.from_numpy(self.obs).unsqueeze(0)
-                    if (np.linalg.norm(np.array(current_cmd['loco_cmd']))) <= 0.05:
+                    if (np.linalg.norm(np.array(current_cmd["loco_cmd"]))) <= 0.05:
                         self.action = self.policy(obs_tensor).cpu().detach().numpy().squeeze()
                     else:
                         self.action = self.walk_policy(obs_tensor).cpu().detach().numpy().squeeze()
-                    self.target_dof_pos = self.action * self.config['action_scale'] + self.config['default_angles']
+                    self.target_dof_pos = (
+                        self.action * self.config["action_scale"] + self.config["default_angles"]
+                    )
 
                 viewer.sync()
                 # time.sleep(max(0, self.model.opt.timestep - (time.time() - step_start)))
 
 
 if __name__ == "__main__":
-    CONFIG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1")
+    CONFIG_PATH = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1"
+    )
     controller = GearWbcController(CONFIG_PATH)
-    controller.run()
\ No newline at end of file
+    controller.run()
diff --git a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py
index e909dcc..df85a0c 100644
--- a/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py
+++ b/gr00t_wbc/sim2mujoco/scripts/run_mujoco_gear_wbc_gait.py
@@ -1,21 +1,16 @@
-import sys
-import time
 import collections
-import yaml
-import torch
-import numpy as np
-import sys
+import os
+import threading
 import time
-import collections
-import yaml
-import torch
-import numpy as np
+
 import mujoco
 import mujoco.viewer
+import numpy as np
 import onnxruntime as ort
-import threading
 from pynput import keyboard as pkb
-import os
+import torch
+import yaml
+
 
 class GearWbcController:
     def __init__(self, config_path):
@@ -24,21 +19,21 @@ class GearWbcController:
         self.config = self.load_config(os.path.join(self.CONFIG_PATH, "g1_gear_wbc.yaml"))
 
         self.control_dict = {
-            'loco_cmd': self.config['cmd_init'],
-            "height_cmd": self.config['height_cmd'],
-            "rpy_cmd": self.config.get('rpy_cmd', [0.0, 0.0, 0.0]),
-            "freq_cmd": self.config.get('freq_cmd', 1.5)
+            "loco_cmd": self.config["cmd_init"],
+            "height_cmd": self.config["height_cmd"],
+            "rpy_cmd": self.config.get("rpy_cmd", [0.0, 0.0, 0.0]),
+            "freq_cmd": self.config.get("freq_cmd", 1.5),
         }
 
-        self.model = mujoco.MjModel.from_xml_path(self.config['xml_path'])
+        self.model = mujoco.MjModel.from_xml_path(self.config["xml_path"])
         self.data = mujoco.MjData(self.model)
-        self.model.opt.timestep = self.config['simulation_dt']
+        self.model.opt.timestep = self.config["simulation_dt"]
         self.n_joints = self.data.qpos.shape[0] - 7
         self.torso_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "torso_link")
         self.base_index = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "pelvis")
-        self.action = np.zeros(self.config['num_actions'], dtype=np.float32)
-        self.target_dof_pos = self.config['default_angles'].copy()
-        self.policy = self.load_onnx_policy(self.config['policy_path'])
+        self.action = np.zeros(self.config["num_actions"], dtype=np.float32)
+        self.target_dof_pos = self.config["default_angles"].copy()
+        self.policy = self.load_onnx_policy(self.config["policy_path"])
         self.gait_indices = torch.zeros((1), dtype=torch.float32)
         self.counter = 0
         self.just_started = 0.0
@@ -49,15 +44,15 @@ class GearWbcController:
             self.data, self.config, self.action, self.control_dict, self.n_joints
         )
         self.obs_history = collections.deque(
-            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config['obs_history_len'],
-            maxlen=self.config['obs_history_len']
+            [np.zeros(self.single_obs_dim, dtype=np.float32)] * self.config["obs_history_len"],
+            maxlen=self.config["obs_history_len"],
         )
-        self.obs = np.zeros(self.config['num_obs'], dtype=np.float32)
+        self.obs = np.zeros(self.config["num_obs"], dtype=np.float32)
         self.keyboard_listener(self.control_dict, self.config)
-        
 
     def keyboard_listener(self, control_dict, config):
         """Listen to key press events and update cmd and height_cmd"""
+
         def on_press(key):
             try:
                 k = key.char
@@ -65,58 +60,60 @@ class GearWbcController:
                 return  # Special keys ignored
 
             with self.cmd_lock:
-                if k == 'w':
-                    control_dict['loco_cmd'][0] += 0.2
-                elif k == 's':
-                    control_dict['loco_cmd'][0] -= 0.2
-                elif k == 'a':
-                    control_dict['loco_cmd'][1] += 0.5
-                elif k == 'd':
-                    control_dict['loco_cmd'][1] -= 0.5
-                elif k == 'q':
-                    control_dict['loco_cmd'][2] += 0.5
-                elif k == 'e':
-                    control_dict['loco_cmd'][2] -= 0.5
-                elif k == 'z':
-                    control_dict['loco_cmd'][:] = config['cmd_init']
-                    control_dict["height_cmd"] = config['height_cmd']
-                    control_dict['rpy_cmd'][:] = config['rpy_cmd']
-                    control_dict['freq_cmd'] = config['freq_cmd']
-                elif k == '1':
+                if k == "w":
+                    control_dict["loco_cmd"][0] += 0.2
+                elif k == "s":
+                    control_dict["loco_cmd"][0] -= 0.2
+                elif k == "a":
+                    control_dict["loco_cmd"][1] += 0.5
+                elif k == "d":
+                    control_dict["loco_cmd"][1] -= 0.5
+                elif k == "q":
+                    control_dict["loco_cmd"][2] += 0.5
+                elif k == "e":
+                    control_dict["loco_cmd"][2] -= 0.5
+                elif k == "z":
+                    control_dict["loco_cmd"][:] = config["cmd_init"]
+                    control_dict["height_cmd"] = config["height_cmd"]
+                    control_dict["rpy_cmd"][:] = config["rpy_cmd"]
+                    control_dict["freq_cmd"] = config["freq_cmd"]
+                elif k == "1":
                     control_dict["height_cmd"] += 0.05
-                elif k == '2':
+                elif k == "2":
                     control_dict["height_cmd"] -= 0.05
-                elif k == '3':
-                    control_dict['rpy_cmd'][0] += 0.2
-                elif k == '4':
-                    control_dict['rpy_cmd'][0] -= 0.2
-                elif k == '5':
-                    control_dict['rpy_cmd'][1] += 0.2
-                elif k == '6':
-                    control_dict['rpy_cmd'][1] -= 0.2
-                elif k == '7':
-                    control_dict['rpy_cmd'][2] += 0.2
-                elif k == '8':
-                    control_dict['rpy_cmd'][2] -= 0.2
-                elif k == 'm':
-                    control_dict['freq_cmd'] += 0.1
-                elif k == 'n':
-                    control_dict['freq_cmd'] -= 0.1
-
-                print(f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}")
+                elif k == "3":
+                    control_dict["rpy_cmd"][0] += 0.2
+                elif k == "4":
+                    control_dict["rpy_cmd"][0] -= 0.2
+                elif k == "5":
+                    control_dict["rpy_cmd"][1] += 0.2
+                elif k == "6":
+                    control_dict["rpy_cmd"][1] -= 0.2
+                elif k == "7":
+                    control_dict["rpy_cmd"][2] += 0.2
+                elif k == "8":
+                    control_dict["rpy_cmd"][2] -= 0.2
+                elif k == "m":
+                    control_dict["freq_cmd"] += 0.1
+                elif k == "n":
+                    control_dict["freq_cmd"] -= 0.1
+
+                print(
+                    f"Current Commands: loco_cmd = {control_dict['loco_cmd']}, height_cmd = {control_dict['height_cmd']}, rpy_cmd = {control_dict['rpy_cmd']}, freq_cmd = {control_dict['freq_cmd']}"
+                )
 
         listener = pkb.Listener(on_press=on_press)
         listener.daemon = True
         listener.start()
 
     def load_config(self, config_path):
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             config = yaml.safe_load(f)
 
-        for path_key in ['policy_path', 'xml_path']:
+        for path_key in ["policy_path", "xml_path"]:
             config[path_key] = os.path.join(CONFIG_PATH, config[path_key])
 
-        array_keys = ['kps', 'kds', 'default_angles', 'cmd_scale', 'cmd_init']
+        array_keys = ["kps", "kds", "default_angles", "cmd_scale", "cmd_init"]
         for key in array_keys:
             config[key] = np.array(config[key], dtype=np.float32)
 
@@ -128,17 +125,19 @@ class GearWbcController:
     def quat_rotate_inverse(self, q, v):
         w, x, y, z = q
         q_conj = np.array([w, -x, -y, -z])
-        return np.array([
-            v[0] * (q_conj[0]**2 + q_conj[1]**2 - q_conj[2]**2 - q_conj[3]**2) +
-            v[1] * 2 * (q_conj[1]*q_conj[2] - q_conj[0]*q_conj[3]) +
-            v[2] * 2 * (q_conj[1]*q_conj[3] + q_conj[0]*q_conj[2]),
-            v[0] * 2 * (q_conj[1]*q_conj[2] + q_conj[0]*q_conj[3]) +
-            v[1] * (q_conj[0]**2 - q_conj[1]**2 + q_conj[2]**2 - q_conj[3]**2) +
-            v[2] * 2 * (q_conj[2]*q_conj[3] - q_conj[0]*q_conj[1]),
-            v[0] * 2 * (q_conj[1]*q_conj[3] - q_conj[0]*q_conj[2]) +
-            v[1] * 2 * (q_conj[2]*q_conj[3] + q_conj[0]*q_conj[1]) +
-            v[2] * (q_conj[0]**2 - q_conj[1]**2 - q_conj[2]**2 + q_conj[3]**2)
-        ])
+        return np.array(
+            [
+                v[0] * (q_conj[0] ** 2 + q_conj[1] ** 2 - q_conj[2] ** 2 - q_conj[3] ** 2)
+                + v[1] * 2 * (q_conj[1] * q_conj[2] - q_conj[0] * q_conj[3])
+                + v[2] * 2 * (q_conj[1] * q_conj[3] + q_conj[0] * q_conj[2]),
+                v[0] * 2 * (q_conj[1] * q_conj[2] + q_conj[0] * q_conj[3])
+                + v[1] * (q_conj[0] ** 2 - q_conj[1] ** 2 + q_conj[2] ** 2 - q_conj[3] ** 2)
+                + v[2] * 2 * (q_conj[2] * q_conj[3] - q_conj[0] * q_conj[1]),
+                v[0] * 2 * (q_conj[1] * q_conj[3] - q_conj[0] * q_conj[2])
+                + v[1] * 2 * (q_conj[2] * q_conj[3] + q_conj[0] * q_conj[1])
+                + v[2] * (q_conj[0] ** 2 - q_conj[1] ** 2 - q_conj[2] ** 2 + q_conj[3] ** 2),
+            ]
+        )
 
     def get_gravity_orientation(self, quat):
         gravity_vec = np.array([0.0, 0.0, -1.0])
@@ -146,11 +145,11 @@ class GearWbcController:
 
     def compute_observation(self, d, config, action, control_dict, n_joints):
         command = np.zeros(8, dtype=np.float32)
-        command[:3] = control_dict['loco_cmd'][:3] * config['cmd_scale']
-        command[3] = control_dict['height_cmd']
-        command[4] = control_dict['freq_cmd']
-        command[5:8] = control_dict['rpy_cmd']
-        
+        command[:3] = control_dict["loco_cmd"][:3] * config["cmd_scale"]
+        command[3] = control_dict["height_cmd"]
+        command[4] = control_dict["freq_cmd"]
+        command[5:8] = control_dict["rpy_cmd"]
+
         # gait indice
         is_static = np.linalg.norm(command[:3]) < 0.1
         just_entered_walk = (not is_static) and (not self.walking_mask)
@@ -191,10 +190,7 @@ class GearWbcController:
         # Clock signal
         clock = [torch.sin(2 * np.pi * fi) for fi in gait_pair]
 
-
-        for i, (clk, frozen_mask_attr) in enumerate(
-            zip(clock, ['frozen_FL', 'frozen_FR'])
-        ):
+        for i, (clk, frozen_mask_attr) in enumerate(zip(clock, ["frozen_FL", "frozen_FR"])):
             frozen_mask = getattr(self, frozen_mask_attr)
             # Freeze condition: static and at sin peak
             if is_static and (not frozen_mask) and clk.item() > 0.98:
@@ -205,45 +201,51 @@ class GearWbcController:
             clock[i] = clk
 
         self.clock_inputs = torch.stack(clock).unsqueeze(0)
-        qj = d.qpos[7:7+n_joints].copy()
-        dqj = d.qvel[6:6+n_joints].copy()
+        qj = d.qpos[7 : 7 + n_joints].copy()
+        dqj = d.qvel[6 : 6 + n_joints].copy()
         quat = d.qpos[3:7].copy()
         omega = d.qvel[3:6].copy()
         # omega = self.data.xmat[self.base_index].reshape(3, 3).T @ self.data.cvel[self.base_index][3:6]
         padded_defaults = np.zeros(n_joints, dtype=np.float32)
-        L = min(len(config['default_angles']), n_joints)
-        padded_defaults[:L] = config['default_angles'][:L]
+        L = min(len(config["default_angles"]), n_joints)
+        padded_defaults[:L] = config["default_angles"][:L]
 
-        qj_scaled = (qj - padded_defaults) * config['dof_pos_scale']
-        dqj_scaled = dqj * config['dof_vel_scale']
+        qj_scaled = (qj - padded_defaults) * config["dof_pos_scale"]
+        dqj_scaled = dqj * config["dof_vel_scale"]
         gravity_orientation = self.get_gravity_orientation(quat)
-        omega_scaled = omega * config['ang_vel_scale']
+        omega_scaled = omega * config["ang_vel_scale"]
 
         torso_quat = self.data.xquat[self.torso_index]
-        torso_omega = self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
-        torso_omega_scaled = torso_omega * config['ang_vel_scale']
+        torso_omega = (
+            self.data.xmat[self.torso_index].reshape(3, 3).T @ self.data.cvel[self.torso_index][3:6]
+        )
+        torso_omega_scaled = torso_omega * config["ang_vel_scale"]
         torso_gravity_orientation = self.get_gravity_orientation(torso_quat)
-        
+
         single_obs_dim = 95
         single_obs = np.zeros(single_obs_dim, dtype=np.float32)
         single_obs[0:8] = command[:8]
         single_obs[8:11] = omega_scaled
         single_obs[11:14] = gravity_orientation
-        single_obs[14:17] = 0.#torso_omega_scaled
-        single_obs[17:20] = 0.#torso_gravity_orientation
-        single_obs[20:20+n_joints] = qj_scaled
-        single_obs[20+n_joints:20+2*n_joints] = dqj_scaled
-        single_obs[20+2*n_joints:20+2*n_joints+15] = action
-        single_obs[20+2*n_joints+15:20+2*n_joints+15+2] = self.clock_inputs.cpu().numpy().reshape(2)
+        single_obs[14:17] = 0.0  # torso_omega_scaled
+        single_obs[17:20] = 0.0  # torso_gravity_orientation
+        single_obs[20 : 20 + n_joints] = qj_scaled
+        single_obs[20 + n_joints : 20 + 2 * n_joints] = dqj_scaled
+        single_obs[20 + 2 * n_joints : 20 + 2 * n_joints + 15] = action
+        single_obs[20 + 2 * n_joints + 15 : 20 + 2 * n_joints + 15 + 2] = (
+            self.clock_inputs.cpu().numpy().reshape(2)
+        )
 
         return single_obs, single_obs_dim
 
     def load_onnx_policy(self, path):
         model = ort.InferenceSession(path)
+
         def run_inference(input_tensor):
             ort_inputs = {model.get_inputs()[0].name: input_tensor.cpu().numpy()}
             ort_outs = model.run(None, ort_inputs)
             return torch.tensor(ort_outs[0], device="cuda:0")
+
         return run_inference
 
     def run(self):
@@ -252,52 +254,58 @@ class GearWbcController:
 
         with mujoco.viewer.launch_passive(self.model, self.data) as viewer:
             start = time.time()
-            while viewer.is_running() and time.time() - start < self.config['simulation_duration']:
+            while viewer.is_running() and time.time() - start < self.config["simulation_duration"]:
                 step_start = time.time()
 
                 leg_tau = self.pd_control(
                     self.target_dof_pos,
-                    self.data.qpos[7:7+self.config['num_actions']],
-                    self.config['kps'],
-                    np.zeros_like(self.config['kps']),
-                    self.data.qvel[6:6+self.config['num_actions']],
-                    self.config['kds']
+                    self.data.qpos[7 : 7 + self.config["num_actions"]],
+                    self.config["kps"],
+                    np.zeros_like(self.config["kps"]),
+                    self.data.qvel[6 : 6 + self.config["num_actions"]],
+                    self.config["kds"],
                 )
-                self.data.ctrl[:self.config['num_actions']] = leg_tau
+                self.data.ctrl[: self.config["num_actions"]] = leg_tau
 
-                if self.n_joints > self.config['num_actions']:
+                if self.n_joints > self.config["num_actions"]:
                     arm_tau = self.pd_control(
-                        np.zeros(self.n_joints - self.config['num_actions'], dtype=np.float32),
-                        self.data.qpos[7+self.config['num_actions']:7+self.n_joints],
-                        np.full(self.n_joints - self.config['num_actions'], 100.0),
-                        np.zeros(self.n_joints - self.config['num_actions']),
-                        self.data.qvel[6+self.config['num_actions']:6+self.n_joints],
-                        np.full(self.n_joints - self.config['num_actions'], 0.5)
+                        np.zeros(self.n_joints - self.config["num_actions"], dtype=np.float32),
+                        self.data.qpos[7 + self.config["num_actions"] : 7 + self.n_joints],
+                        np.full(self.n_joints - self.config["num_actions"], 100.0),
+                        np.zeros(self.n_joints - self.config["num_actions"]),
+                        self.data.qvel[6 + self.config["num_actions"] : 6 + self.n_joints],
+                        np.full(self.n_joints - self.config["num_actions"], 0.5),
                     )
-                    self.data.ctrl[self.config['num_actions']:] = arm_tau
+                    self.data.ctrl[self.config["num_actions"] :] = arm_tau
 
                 mujoco.mj_step(self.model, self.data)
-                
+
                 self.counter += 1
-                if self.counter % self.config['control_decimation'] == 0:
+                if self.counter % self.config["control_decimation"] == 0:
                     with self.cmd_lock:
                         current_cmd = self.control_dict
 
-                    single_obs, _ = self.compute_observation(self.data, self.config, self.action, current_cmd,  self.n_joints)
+                    single_obs, _ = self.compute_observation(
+                        self.data, self.config, self.action, current_cmd, self.n_joints
+                    )
                     self.obs_history.append(single_obs)
 
                     for i, hist_obs in enumerate(self.obs_history):
-                        self.obs[i * self.single_obs_dim:(i + 1) * self.single_obs_dim] = hist_obs
+                        self.obs[i * self.single_obs_dim : (i + 1) * self.single_obs_dim] = hist_obs
 
                     obs_tensor = torch.from_numpy(self.obs).unsqueeze(0)
                     self.action = self.policy(obs_tensor).cpu().detach().numpy().squeeze()
-                    self.target_dof_pos = self.action * self.config['action_scale'] + self.config['default_angles']
+                    self.target_dof_pos = (
+                        self.action * self.config["action_scale"] + self.config["default_angles"]
+                    )
 
                 viewer.sync()
                 # time.sleep(max(0, self.model.opt.timestep - (time.time() - step_start)))
 
 
 if __name__ == "__main__":
-    CONFIG_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1")
+    CONFIG_PATH = os.path.join(
+        os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "resources", "robots", "g1"
+    )
     controller = GearWbcController(CONFIG_PATH)
-    controller.run()
\ No newline at end of file
+    controller.run()
diff --git a/tests/control/main/teleop/test_g1_control_loop.py b/tests/control/main/teleop/test_g1_control_loop.py
index 1248ab2..e19f667 100644
--- a/tests/control/main/teleop/test_g1_control_loop.py
+++ b/tests/control/main/teleop/test_g1_control_loop.py
@@ -12,7 +12,11 @@ import rclpy
 from scipy.spatial.transform import Rotation as R
 from std_msgs.msg import String as RosStringMsg
 
-from gr00t_wbc.control.main.constants import CONTROL_GOAL_TOPIC, KEYBOARD_INPUT_TOPIC, STATE_TOPIC_NAME
+from gr00t_wbc.control.main.constants import (
+    CONTROL_GOAL_TOPIC,
+    KEYBOARD_INPUT_TOPIC,
+    STATE_TOPIC_NAME,
+)
 from gr00t_wbc.control.utils.ros_utils import ROSMsgPublisher, ROSMsgSubscriber
 from gr00t_wbc.control.utils.term_color_constants import GREEN_BOLD, RESET, YELLOW_BOLD
 from gr00t_wbc.data.viz.rerun_viz import RerunViz
diff --git a/tests/control/visualization/test_meshcat_visualizer_env.py b/tests/control/visualization/test_meshcat_visualizer_env.py
index a09ed90..ee78e03 100644
--- a/tests/control/visualization/test_meshcat_visualizer_env.py
+++ b/tests/control/visualization/test_meshcat_visualizer_env.py
@@ -5,7 +5,9 @@ import numpy as np
 import pytest
 
 from gr00t_wbc.control.robot_model import RobotModel
-from gr00t_wbc.control.robot_model.supplemental_info.g1.g1_supplemental_info import G1SupplementalInfo
+from gr00t_wbc.control.robot_model.supplemental_info.g1.g1_supplemental_info import (
+    G1SupplementalInfo,
+)
 
 
 @pytest.fixture
@@ -17,7 +19,9 @@ def env_fixture():
     from gr00t_wbc.control.visualization.meshcat_visualizer_env import MeshcatVisualizerEnv
 
     root_dir = pathlib.Path(__file__).parent.parent.parent.parent
-    urdf_path = str(root_dir / "gr00t_wbc/control/robot_model/model_data/g1/g1_29dof_with_hand.urdf")
+    urdf_path = str(
+        root_dir / "gr00t_wbc/control/robot_model/model_data/g1/g1_29dof_with_hand.urdf"
+    )
     asset_path = str(root_dir / "gr00t_wbc/control/robot_model/model_data/g1")
     robot_config = {
         "asset_path": asset_path,