From 93652916ad24f9f6f827cb140c97fb41b9e9c986 Mon Sep 17 00:00:00 2001
From: Joe DiPrima <epilectrik@gmail.com>
Date: Thu, 19 Feb 2026 11:36:04 -0600
Subject: [PATCH] Add head rotation compensation, USB webcam support, graceful
 R3 fallback

- Head rotation compensation: de-rotate wrist data by head's rotation since
  calibration to prevent arm swing during body rotation (--head-rot-comp flag)
- USB webcam support: --webcam and --webcam-res flags bypass teleimager,
  stream webcam directly to Quest 3 via Vuer shared memory pipeline
- Graceful R3 controller: hasattr guards on get_wireless_remote() so teleop
  works with both modified and stock robot_arm.py

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 teleop/teleop_hand_and_arm.py | 167 +++++++++++++++++++++-------------
 1 file changed, 103 insertions(+), 64 deletions(-)

diff --git a/teleop/teleop_hand_and_arm.py b/teleop/teleop_hand_and_arm.py
index 2e3e2b2..3c01eab 100644
--- a/teleop/teleop_hand_and_arm.py
+++ b/teleop/teleop_hand_and_arm.py
@@ -54,7 +54,7 @@ RESET_ARMS       = False  # Trigger arm reset via 'h' key
 
 def parse_r3_buttons(data):
     """Parse R3 controller button bytes from wireless_remote. Returns dict of button states."""
-    if len(data) < 4:
+    if data is None or len(data) < 4:
         return {}
     btn1 = data[2]  # R1=0x01, L1=0x02, Start=0x04, Select=0x08, R2=0x10, L2=0x20
     btn2 = data[3]  # A=0x01, B=0x02, X=0x04, Y=0x08, Up=0x10, Right=0x20, Down=0x40, Left=0x80
@@ -156,6 +156,13 @@ if __name__ == '__main__':
                         help='Seconds to ramp tracking after calibration (default: 0.5)')
     parser.add_argument('--rot-blend', type=float, default=0.7,
                         help='Rotation tracking blend (0=position-only, 1=full rotation, default: 0.7)')
+    parser.add_argument('--head-rot-comp', type=float, default=1.0,
+                        help='Head rotation compensation blend (0=none, 1=full, default: 1.0)')
+    # webcam
+    parser.add_argument('--webcam', type=int, default=None,
+                        help='USB webcam device index (e.g. 0 for /dev/video0). Bypasses teleimager.')
+    parser.add_argument('--webcam-res', type=str, default='720p', choices=['480p', '720p', '1080p'],
+                        help='Webcam resolution (default: 720p)')
     # record mode and task info
     parser.add_argument('--record', action = 'store_true', help = 'Enable data recording mode')
     parser.add_argument('--task-dir', type = str, default = './utils/data/', help = 'path to save data')
@@ -200,11 +207,37 @@ if __name__ == '__main__':
                                                       daemon=True)
             listen_keyboard_thread.start()
 
-        # image client
-        img_client = ImageClient(host=args.img_server_ip, request_bgr=True)
-        camera_config = img_client.get_cam_config()
+        # image source: USB webcam or teleimager
+        webcam_cap = None
+        img_client = None
+        if args.webcam is not None:
+            import cv2 as _cv2
+            webcam_cap = _cv2.VideoCapture(args.webcam)
+            if not webcam_cap.isOpened():
+                logger_mp.error(f"[webcam] Cannot open /dev/video{args.webcam}")
+                exit(1)
+            res_map = {'480p': (640, 480), '720p': (1280, 720), '1080p': (1920, 1080)}
+            cam_w, cam_h = res_map[args.webcam_res]
+            webcam_cap.set(_cv2.CAP_PROP_FRAME_WIDTH, cam_w)
+            webcam_cap.set(_cv2.CAP_PROP_FRAME_HEIGHT, cam_h)
+            actual_w = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_WIDTH))
+            actual_h = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_HEIGHT))
+            logger_mp.info(f"[webcam] Opened /dev/video{args.webcam} at {actual_w}x{actual_h}")
+            camera_config = {
+                'head_camera': {
+                    'enable_zmq': True, 'enable_webrtc': False,
+                    'binocular': False, 'image_shape': (actual_h, actual_w),
+                    'fps': 30, 'webrtc_port': 0,
+                },
+                'left_wrist_camera': {'enable_zmq': False},
+                'right_wrist_camera': {'enable_zmq': False},
+            }
+            xr_need_local_img = True
+        else:
+            img_client = ImageClient(host=args.img_server_ip, request_bgr=True)
+            camera_config = img_client.get_cam_config()
+            xr_need_local_img = not (args.display_mode == 'pass-through' or camera_config['head_camera']['enable_webrtc'])
         logger_mp.debug(f"Camera config: {camera_config}")
-        xr_need_local_img = not (args.display_mode == 'pass-through' or camera_config['head_camera']['enable_webrtc'])
 
         # televuer_wrapper: obtain hand pose data from the XR device and transmit the robot's head camera image to the XR device.
         tv_wrapper = TeleVuerWrapper(use_hand_tracking=args.input_mode == "hand", 
@@ -341,15 +374,21 @@ if __name__ == '__main__':
         while not START and not STOP: # wait for start or stop signal.
             time.sleep(0.033)
             # Poll R3 A button in wait loop (edge-detected)
-            r3_data = arm_ctrl.get_wireless_remote()
-            r3_btns = parse_r3_buttons(r3_data)
-            if r3_btns.get('A') and not _r3_prev_buttons.get('A'):
-                START = True
-                logger_mp.info("[R3] A pressed → START tracking")
-            _r3_prev_buttons = r3_btns
-            if camera_config['head_camera']['enable_zmq'] and xr_need_local_img:
-                head_img = img_client.get_head_frame()
-                tv_wrapper.render_to_xr(head_img)
+            if hasattr(arm_ctrl, 'get_wireless_remote'):
+                r3_data = arm_ctrl.get_wireless_remote()
+                r3_btns = parse_r3_buttons(r3_data)
+                if r3_btns.get('A') and not _r3_prev_buttons.get('A'):
+                    START = True
+                    logger_mp.info("[R3] A pressed → START tracking")
+                _r3_prev_buttons = r3_btns
+            if xr_need_local_img:
+                if webcam_cap is not None:
+                    ret, frame = webcam_cap.read()
+                    if ret:
+                        tv_wrapper.render_to_xr(frame)
+                elif camera_config['head_camera']['enable_zmq']:
+                    head_img = img_client.get_head_frame()
+                    tv_wrapper.render_to_xr(head_img)
 
         logger_mp.info("---------------------🚀start Tracking🚀-------------------------")
         arm_ctrl.speed_gradual_max()
@@ -369,9 +408,7 @@ if __name__ == '__main__':
         vp_ref_right = None
         robot_ref_left = None    # 4x4: Robot FK pose at calibration moment
         robot_ref_right = None
-        head_rot_prev = None         # 3x3: previous frame head rotation (for gating)
-        head_pos_prev = None         # 3-vec: previous frame head position
-        head_compensation = np.zeros(3)  # accumulated rotation-gated head position correction
+        head_R_at_cal = None         # 3x3: head rotation at calibration (for body rotation compensation)
         prev_start = False
 
         # Initial calibration: capture reference poses on first START
@@ -381,9 +418,7 @@ if __name__ == '__main__':
             robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q)
             vp_ref_left = _cal_tele.left_wrist_pose.copy()
             vp_ref_right = _cal_tele.right_wrist_pose.copy()
-            head_rot_prev = _cal_tele.head_pose[:3, :3].copy()
-            head_pos_prev = _cal_tele.head_pose[:3, 3].copy()
-            head_compensation = np.zeros(3)
+            head_R_at_cal = _cal_tele.head_pose[:3, :3].copy()
             calibrated = True
             calibration_time = time.time()
             prev_start = True
@@ -402,7 +437,6 @@ if __name__ == '__main__':
                 i_error_right = np.zeros(3)
                 i_rot_error_left = np.zeros(3)
                 i_rot_error_right = np.zeros(3)
-                head_compensation = np.zeros(3)
                 arm_ctrl.ctrl_dual_arm(np.zeros(14), np.zeros(14))
                 reset_start = time.time()
                 while time.time() - reset_start < 3.0:
@@ -418,7 +452,7 @@ if __name__ == '__main__':
                 continue
 
             # Poll R3 controller buttons (edge-detected)
-            r3_data = arm_ctrl.get_wireless_remote()
+            r3_data = arm_ctrl.get_wireless_remote() if hasattr(arm_ctrl, 'get_wireless_remote') else None
             r3_btns = parse_r3_buttons(r3_data)
             if r3_btns.get('A') and not _r3_prev_buttons.get('A'):
                 START = not START
@@ -445,9 +479,7 @@ if __name__ == '__main__':
                 robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q)
                 vp_ref_left = _cal_tele.left_wrist_pose.copy()
                 vp_ref_right = _cal_tele.right_wrist_pose.copy()
-                head_rot_prev = _cal_tele.head_pose[:3, :3].copy()
-                head_pos_prev = _cal_tele.head_pose[:3, 3].copy()
-                head_compensation = np.zeros(3)
+                head_R_at_cal = _cal_tele.head_pose[:3, :3].copy()
                 calibrated = True
                 calibration_time = time.time()
                 i_error_left = np.zeros(3)
@@ -464,7 +496,6 @@ if __name__ == '__main__':
                 i_error_right = np.zeros(3)
                 i_rot_error_left = np.zeros(3)
                 i_rot_error_right = np.zeros(3)
-                head_compensation = np.zeros(3)
                 current_time = time.time()
                 time_elapsed = current_time - start_time
                 sleep_time = max(0, (1 / args.frequency) - time_elapsed)
@@ -472,17 +503,23 @@ if __name__ == '__main__':
                 continue
 
             # get image
-            if camera_config['head_camera']['enable_zmq']:
-                if args.record or xr_need_local_img:
-                    head_img = img_client.get_head_frame()
-                if xr_need_local_img:
-                    tv_wrapper.render_to_xr(head_img)
-            if camera_config['left_wrist_camera']['enable_zmq']:
-                if args.record:
-                    left_wrist_img = img_client.get_left_wrist_frame()
-            if camera_config['right_wrist_camera']['enable_zmq']:
-                if args.record:
-                    right_wrist_img = img_client.get_right_wrist_frame()
+            if webcam_cap is not None:
+                ret, webcam_frame = webcam_cap.read()
+                if ret:
+                    head_img = webcam_frame
+                    tv_wrapper.render_to_xr(webcam_frame)
+            else:
+                if camera_config['head_camera']['enable_zmq']:
+                    if args.record or xr_need_local_img:
+                        head_img = img_client.get_head_frame()
+                    if xr_need_local_img:
+                        tv_wrapper.render_to_xr(head_img)
+                if camera_config['left_wrist_camera']['enable_zmq']:
+                    if args.record:
+                        left_wrist_img = img_client.get_left_wrist_frame()
+                if camera_config['right_wrist_camera']['enable_zmq']:
+                    if args.record:
+                        right_wrist_img = img_client.get_right_wrist_frame()
 
             # record mode
             if args.record and RECORD_TOGGLE:
@@ -547,35 +584,29 @@ if __name__ == '__main__':
                 settle_elapsed = now - calibration_time
                 settle_alpha = min(1.0, settle_elapsed / max(args.settle_time, 0.01))
 
-                # Head decoupling via rotation gating: only compensate head position
-                # drift caused by head ROTATION (looking around), not body translation
-                # (stepping). When head rotates, gate opens and position change is accumulated.
-                # When head is still, gate stays closed so tv_wrapper's natural body-movement
-                # cancellation works correctly.
-                head_rot_now = tele_data.head_pose[:3, :3]
-                head_pos_now = tele_data.head_pose[:3, 3]
-                if head_rot_prev is not None:
-                    rot_delta = rotation_to_rotvec(head_rot_now @ head_rot_prev.T)
-                    angular_speed = np.linalg.norm(rot_delta) / max(dt, 0.001)
-                    # Gate: 0 when head still (stepping), 1 when rotating (looking around)
-                    gate = np.clip((angular_speed - 0.1) / 0.3, 0.0, 1.0)
-                    pos_delta = head_pos_now - head_pos_prev
-                    head_compensation += gate * pos_delta
-                head_rot_prev = head_rot_now.copy()
-                head_pos_prev = head_pos_now.copy()
-
-                # Position: robot_ref + settle_alpha * (vp_current + head_comp - vp_ref)
-                left_delta_pos = (tele_data.left_wrist_pose[:3, 3] + head_compensation) - vp_ref_left[:3, 3]
-                right_delta_pos = (tele_data.right_wrist_pose[:3, 3] + head_compensation) - vp_ref_right[:3, 3]
+                # Head rotation compensation: de-rotate wrist data by head's rotation
+                # since calibration. Converts world-frame positions/orientations back to
+                # a body-aligned frame, preventing arm swing during body rotation.
+                if head_R_at_cal is not None and args.head_rot_comp > 0.01:
+                    R_head_delta = tele_data.head_pose[:3, :3] @ head_R_at_cal.T
+                    R_comp = scale_rotation(R_head_delta.T, args.head_rot_comp)
+                else:
+                    R_comp = np.eye(3)
+
+                # De-rotate wrist positions, then compute delta from calibration
+                left_pos_comp = R_comp @ tele_data.left_wrist_pose[:3, 3]
+                right_pos_comp = R_comp @ tele_data.right_wrist_pose[:3, 3]
+                left_delta_pos = left_pos_comp - vp_ref_left[:3, 3]
+                right_delta_pos = right_pos_comp - vp_ref_right[:3, 3]
 
                 left_wrist_adjusted = np.eye(4)
                 right_wrist_adjusted = np.eye(4)
                 left_wrist_adjusted[:3, 3] = robot_ref_left[:3, 3] + settle_alpha * left_delta_pos
                 right_wrist_adjusted[:3, 3] = robot_ref_right[:3, 3] + settle_alpha * right_delta_pos
 
-                # Rotation: compute delta, scale by rot_blend (and settle_alpha)
-                left_delta_R = tele_data.left_wrist_pose[:3, :3] @ vp_ref_left[:3, :3].T
-                right_delta_R = tele_data.right_wrist_pose[:3, :3] @ vp_ref_right[:3, :3].T
+                # Rotation: compute delta (de-rotated), scale by rot_blend (and settle_alpha)
+                left_delta_R = (R_comp @ tele_data.left_wrist_pose[:3, :3]) @ vp_ref_left[:3, :3].T
+                right_delta_R = (R_comp @ tele_data.right_wrist_pose[:3, :3]) @ vp_ref_right[:3, :3].T
                 rot_alpha = args.rot_blend * settle_alpha
                 if rot_alpha < 0.01:
                     left_wrist_adjusted[:3, :3] = robot_ref_left[:3, :3]
@@ -846,10 +877,18 @@ if __name__ == '__main__':
         except Exception as e:
             logger_mp.error(f"Failed to stop keyboard listener or ipc server: {e}")
         
-        try:
-            img_client.close()
-        except Exception as e:
-            logger_mp.error(f"Failed to close image client: {e}")
+        if webcam_cap is not None:
+            try:
+                webcam_cap.release()
+                logger_mp.info("[webcam] Released")
+            except:
+                pass
+
+        if img_client is not None:
+            try:
+                img_client.close()
+            except Exception as e:
+                logger_mp.error(f"Failed to close image client: {e}")
 
         try:
             tv_wrapper.close()