Add head rotation compensation, USB webcam support, graceful R3 fallback

- Head rotation compensation: de-rotate wrist data by head's rotation since calibration to prevent arm swing during body rotation (--head-rot-comp flag) - USB webcam support: --webcam and --webcam-res flags bypass teleimager, stream webcam directly to Quest 3 via Vuer shared memory pipeline - Graceful R3 controller: hasattr guards on get_wireless_remote() so teleop works with both modified and stock robot_arm.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 month ago · 93652916ad
1 changed files with 103 additions and 64 deletions
--- a/teleop/teleop_hand_and_arm.py
+++ b/teleop/teleop_hand_and_arm.py
@ -54,7 +54,7 @@ RESET_ARMS       = False  # Trigger arm reset via 'h' key

 def parse_r3_buttons(data):
    """Parse R3 controller button bytes from wireless_remote. Returns dict of button states."""
-    if len(data) < 4:
+    if data is None or len(data) < 4:
        return {}
    btn1 = data[2]  # R1=0x01, L1=0x02, Start=0x04, Select=0x08, R2=0x10, L2=0x20
    btn2 = data[3]  # A=0x01, B=0x02, X=0x04, Y=0x08, Up=0x10, Right=0x20, Down=0x40, Left=0x80
@ -156,6 +156,13 @@ if __name__ == '__main__':
                        help='Seconds to ramp tracking after calibration (default: 0.5)')
    parser.add_argument('--rot-blend', type=float, default=0.7,
                        help='Rotation tracking blend (0=position-only, 1=full rotation, default: 0.7)')
+    parser.add_argument('--head-rot-comp', type=float, default=1.0,
+                        help='Head rotation compensation blend (0=none, 1=full, default: 1.0)')
+    # webcam
+    parser.add_argument('--webcam', type=int, default=None,
+                        help='USB webcam device index (e.g. 0 for /dev/video0). Bypasses teleimager.')
+    parser.add_argument('--webcam-res', type=str, default='720p', choices=['480p', '720p', '1080p'],
+                        help='Webcam resolution (default: 720p)')
    # record mode and task info
    parser.add_argument('--record', action = 'store_true', help = 'Enable data recording mode')
    parser.add_argument('--task-dir', type = str, default = './utils/data/', help = 'path to save data')
@ -200,11 +207,37 @@ if __name__ == '__main__':
                                                      daemon=True)
            listen_keyboard_thread.start()

-        # image client
+        # image source: USB webcam or teleimager
+        webcam_cap = None
+        img_client = None
+        if args.webcam is not None:
+            import cv2 as _cv2
+            webcam_cap = _cv2.VideoCapture(args.webcam)
+            if not webcam_cap.isOpened():
+                logger_mp.error(f"[webcam] Cannot open /dev/video{args.webcam}")
+                exit(1)
+            res_map = {'480p': (640, 480), '720p': (1280, 720), '1080p': (1920, 1080)}
+            cam_w, cam_h = res_map[args.webcam_res]
+            webcam_cap.set(_cv2.CAP_PROP_FRAME_WIDTH, cam_w)
+            webcam_cap.set(_cv2.CAP_PROP_FRAME_HEIGHT, cam_h)
+            actual_w = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_WIDTH))
+            actual_h = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_HEIGHT))
+            logger_mp.info(f"[webcam] Opened /dev/video{args.webcam} at {actual_w}x{actual_h}")
+            camera_config = {
+                'head_camera': {
+                    'enable_zmq': True, 'enable_webrtc': False,
+                    'binocular': False, 'image_shape': (actual_h, actual_w),
+                    'fps': 30, 'webrtc_port': 0,
+                },
+                'left_wrist_camera': {'enable_zmq': False},
+                'right_wrist_camera': {'enable_zmq': False},
+            }
+            xr_need_local_img = True
+        else:
            img_client = ImageClient(host=args.img_server_ip, request_bgr=True)
            camera_config = img_client.get_cam_config()
-        logger_mp.debug(f"Camera config: {camera_config}")
            xr_need_local_img = not (args.display_mode == 'pass-through' or camera_config['head_camera']['enable_webrtc'])
+        logger_mp.debug(f"Camera config: {camera_config}")

        # televuer_wrapper: obtain hand pose data from the XR device and transmit the robot's head camera image to the XR device.
        tv_wrapper = TeleVuerWrapper(use_hand_tracking=args.input_mode == "hand", 
@ -341,13 +374,19 @@ if __name__ == '__main__':
        while not START and not STOP: # wait for start or stop signal.
            time.sleep(0.033)
            # Poll R3 A button in wait loop (edge-detected)
+            if hasattr(arm_ctrl, 'get_wireless_remote'):
                r3_data = arm_ctrl.get_wireless_remote()
                r3_btns = parse_r3_buttons(r3_data)
                if r3_btns.get('A') and not _r3_prev_buttons.get('A'):
                    START = True
                    logger_mp.info("[R3] A pressed → START tracking")
                _r3_prev_buttons = r3_btns
-            if camera_config['head_camera']['enable_zmq'] and xr_need_local_img:
+            if xr_need_local_img:
+                if webcam_cap is not None:
+                    ret, frame = webcam_cap.read()
+                    if ret:
+                        tv_wrapper.render_to_xr(frame)
+                elif camera_config['head_camera']['enable_zmq']:
                    head_img = img_client.get_head_frame()
                    tv_wrapper.render_to_xr(head_img)

@ -369,9 +408,7 @@ if __name__ == '__main__':
        vp_ref_right = None
        robot_ref_left = None    # 4x4: Robot FK pose at calibration moment
        robot_ref_right = None
-        head_rot_prev = None         # 3x3: previous frame head rotation (for gating)
-        head_pos_prev = None         # 3-vec: previous frame head position
-        head_compensation = np.zeros(3)  # accumulated rotation-gated head position correction
+        head_R_at_cal = None         # 3x3: head rotation at calibration (for body rotation compensation)
        prev_start = False

        # Initial calibration: capture reference poses on first START
@ -381,9 +418,7 @@ if __name__ == '__main__':
            robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q)
            vp_ref_left = _cal_tele.left_wrist_pose.copy()
            vp_ref_right = _cal_tele.right_wrist_pose.copy()
-            head_rot_prev = _cal_tele.head_pose[:3, :3].copy()
-            head_pos_prev = _cal_tele.head_pose[:3, 3].copy()
-            head_compensation = np.zeros(3)
+            head_R_at_cal = _cal_tele.head_pose[:3, :3].copy()
            calibrated = True
            calibration_time = time.time()
            prev_start = True
@ -402,7 +437,6 @@ if __name__ == '__main__':
                i_error_right = np.zeros(3)
                i_rot_error_left = np.zeros(3)
                i_rot_error_right = np.zeros(3)
-                head_compensation = np.zeros(3)
                arm_ctrl.ctrl_dual_arm(np.zeros(14), np.zeros(14))
                reset_start = time.time()
                while time.time() - reset_start < 3.0:
@ -418,7 +452,7 @@ if __name__ == '__main__':
                continue

            # Poll R3 controller buttons (edge-detected)
-            r3_data = arm_ctrl.get_wireless_remote()
+            r3_data = arm_ctrl.get_wireless_remote() if hasattr(arm_ctrl, 'get_wireless_remote') else None
            r3_btns = parse_r3_buttons(r3_data)
            if r3_btns.get('A') and not _r3_prev_buttons.get('A'):
                START = not START
@ -445,9 +479,7 @@ if __name__ == '__main__':
                robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q)
                vp_ref_left = _cal_tele.left_wrist_pose.copy()
                vp_ref_right = _cal_tele.right_wrist_pose.copy()
-                head_rot_prev = _cal_tele.head_pose[:3, :3].copy()
-                head_pos_prev = _cal_tele.head_pose[:3, 3].copy()
-                head_compensation = np.zeros(3)
+                head_R_at_cal = _cal_tele.head_pose[:3, :3].copy()
                calibrated = True
                calibration_time = time.time()
                i_error_left = np.zeros(3)
@ -464,7 +496,6 @@ if __name__ == '__main__':
                i_error_right = np.zeros(3)
                i_rot_error_left = np.zeros(3)
                i_rot_error_right = np.zeros(3)
-                head_compensation = np.zeros(3)
                current_time = time.time()
                time_elapsed = current_time - start_time
                sleep_time = max(0, (1 / args.frequency) - time_elapsed)
@ -472,6 +503,12 @@ if __name__ == '__main__':
                continue

            # get image
+            if webcam_cap is not None:
+                ret, webcam_frame = webcam_cap.read()
+                if ret:
+                    head_img = webcam_frame
+                    tv_wrapper.render_to_xr(webcam_frame)
+            else:
                if camera_config['head_camera']['enable_zmq']:
                    if args.record or xr_need_local_img:
                        head_img = img_client.get_head_frame()
@ -547,35 +584,29 @@ if __name__ == '__main__':
                settle_elapsed = now - calibration_time
                settle_alpha = min(1.0, settle_elapsed / max(args.settle_time, 0.01))

-                # Head decoupling via rotation gating: only compensate head position
-                # drift caused by head ROTATION (looking around), not body translation
-                # (stepping). When head rotates, gate opens and position change is accumulated.
-                # When head is still, gate stays closed so tv_wrapper's natural body-movement
-                # cancellation works correctly.
-                head_rot_now = tele_data.head_pose[:3, :3]
-                head_pos_now = tele_data.head_pose[:3, 3]
-                if head_rot_prev is not None:
-                    rot_delta = rotation_to_rotvec(head_rot_now @ head_rot_prev.T)
-                    angular_speed = np.linalg.norm(rot_delta) / max(dt, 0.001)
-                    # Gate: 0 when head still (stepping), 1 when rotating (looking around)
-                    gate = np.clip((angular_speed - 0.1) / 0.3, 0.0, 1.0)
-                    pos_delta = head_pos_now - head_pos_prev
-                    head_compensation += gate * pos_delta
-                head_rot_prev = head_rot_now.copy()
-                head_pos_prev = head_pos_now.copy()
-
-                # Position: robot_ref + settle_alpha * (vp_current + head_comp - vp_ref)
-                left_delta_pos = (tele_data.left_wrist_pose[:3, 3] + head_compensation) - vp_ref_left[:3, 3]
-                right_delta_pos = (tele_data.right_wrist_pose[:3, 3] + head_compensation) - vp_ref_right[:3, 3]
+                # Head rotation compensation: de-rotate wrist data by head's rotation
+                # since calibration. Converts world-frame positions/orientations back to
+                # a body-aligned frame, preventing arm swing during body rotation.
+                if head_R_at_cal is not None and args.head_rot_comp > 0.01:
+                    R_head_delta = tele_data.head_pose[:3, :3] @ head_R_at_cal.T
+                    R_comp = scale_rotation(R_head_delta.T, args.head_rot_comp)
+                else:
+                    R_comp = np.eye(3)
+
+                # De-rotate wrist positions, then compute delta from calibration
+                left_pos_comp = R_comp @ tele_data.left_wrist_pose[:3, 3]
+                right_pos_comp = R_comp @ tele_data.right_wrist_pose[:3, 3]
+                left_delta_pos = left_pos_comp - vp_ref_left[:3, 3]
+                right_delta_pos = right_pos_comp - vp_ref_right[:3, 3]

                left_wrist_adjusted = np.eye(4)
                right_wrist_adjusted = np.eye(4)
                left_wrist_adjusted[:3, 3] = robot_ref_left[:3, 3] + settle_alpha * left_delta_pos
                right_wrist_adjusted[:3, 3] = robot_ref_right[:3, 3] + settle_alpha * right_delta_pos

-                # Rotation: compute delta, scale by rot_blend (and settle_alpha)
-                left_delta_R = tele_data.left_wrist_pose[:3, :3] @ vp_ref_left[:3, :3].T
-                right_delta_R = tele_data.right_wrist_pose[:3, :3] @ vp_ref_right[:3, :3].T
+                # Rotation: compute delta (de-rotated), scale by rot_blend (and settle_alpha)
+                left_delta_R = (R_comp @ tele_data.left_wrist_pose[:3, :3]) @ vp_ref_left[:3, :3].T
+                right_delta_R = (R_comp @ tele_data.right_wrist_pose[:3, :3]) @ vp_ref_right[:3, :3].T
                rot_alpha = args.rot_blend * settle_alpha
                if rot_alpha < 0.01:
                    left_wrist_adjusted[:3, :3] = robot_ref_left[:3, :3]
@ -846,6 +877,14 @@ if __name__ == '__main__':
        except Exception as e:
            logger_mp.error(f"Failed to stop keyboard listener or ipc server: {e}")
        
+        if webcam_cap is not None:
+            try:
+                webcam_cap.release()
+                logger_mp.info("[webcam] Released")
+            except:
+                pass
+
+        if img_client is not None:
            try:
                img_client.close()
            except Exception as e: