From 93652916ad24f9f6f827cb140c97fb41b9e9c986 Mon Sep 17 00:00:00 2001 From: Joe DiPrima Date: Thu, 19 Feb 2026 11:36:04 -0600 Subject: [PATCH] Add head rotation compensation, USB webcam support, graceful R3 fallback - Head rotation compensation: de-rotate wrist data by head's rotation since calibration to prevent arm swing during body rotation (--head-rot-comp flag) - USB webcam support: --webcam and --webcam-res flags bypass teleimager, stream webcam directly to Quest 3 via Vuer shared memory pipeline - Graceful R3 controller: hasattr guards on get_wireless_remote() so teleop works with both modified and stock robot_arm.py Co-Authored-By: Claude Opus 4.6 --- teleop/teleop_hand_and_arm.py | 167 +++++++++++++++++++++------------- 1 file changed, 103 insertions(+), 64 deletions(-) diff --git a/teleop/teleop_hand_and_arm.py b/teleop/teleop_hand_and_arm.py index 2e3e2b2..3c01eab 100644 --- a/teleop/teleop_hand_and_arm.py +++ b/teleop/teleop_hand_and_arm.py @@ -54,7 +54,7 @@ RESET_ARMS = False # Trigger arm reset via 'h' key def parse_r3_buttons(data): """Parse R3 controller button bytes from wireless_remote. Returns dict of button states.""" - if len(data) < 4: + if data is None or len(data) < 4: return {} btn1 = data[2] # R1=0x01, L1=0x02, Start=0x04, Select=0x08, R2=0x10, L2=0x20 btn2 = data[3] # A=0x01, B=0x02, X=0x04, Y=0x08, Up=0x10, Right=0x20, Down=0x40, Left=0x80 @@ -156,6 +156,13 @@ if __name__ == '__main__': help='Seconds to ramp tracking after calibration (default: 0.5)') parser.add_argument('--rot-blend', type=float, default=0.7, help='Rotation tracking blend (0=position-only, 1=full rotation, default: 0.7)') + parser.add_argument('--head-rot-comp', type=float, default=1.0, + help='Head rotation compensation blend (0=none, 1=full, default: 1.0)') + # webcam + parser.add_argument('--webcam', type=int, default=None, + help='USB webcam device index (e.g. 0 for /dev/video0). Bypasses teleimager.') + parser.add_argument('--webcam-res', type=str, default='720p', choices=['480p', '720p', '1080p'], + help='Webcam resolution (default: 720p)') # record mode and task info parser.add_argument('--record', action = 'store_true', help = 'Enable data recording mode') parser.add_argument('--task-dir', type = str, default = './utils/data/', help = 'path to save data') @@ -200,11 +207,37 @@ if __name__ == '__main__': daemon=True) listen_keyboard_thread.start() - # image client - img_client = ImageClient(host=args.img_server_ip, request_bgr=True) - camera_config = img_client.get_cam_config() + # image source: USB webcam or teleimager + webcam_cap = None + img_client = None + if args.webcam is not None: + import cv2 as _cv2 + webcam_cap = _cv2.VideoCapture(args.webcam) + if not webcam_cap.isOpened(): + logger_mp.error(f"[webcam] Cannot open /dev/video{args.webcam}") + exit(1) + res_map = {'480p': (640, 480), '720p': (1280, 720), '1080p': (1920, 1080)} + cam_w, cam_h = res_map[args.webcam_res] + webcam_cap.set(_cv2.CAP_PROP_FRAME_WIDTH, cam_w) + webcam_cap.set(_cv2.CAP_PROP_FRAME_HEIGHT, cam_h) + actual_w = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_WIDTH)) + actual_h = int(webcam_cap.get(_cv2.CAP_PROP_FRAME_HEIGHT)) + logger_mp.info(f"[webcam] Opened /dev/video{args.webcam} at {actual_w}x{actual_h}") + camera_config = { + 'head_camera': { + 'enable_zmq': True, 'enable_webrtc': False, + 'binocular': False, 'image_shape': (actual_h, actual_w), + 'fps': 30, 'webrtc_port': 0, + }, + 'left_wrist_camera': {'enable_zmq': False}, + 'right_wrist_camera': {'enable_zmq': False}, + } + xr_need_local_img = True + else: + img_client = ImageClient(host=args.img_server_ip, request_bgr=True) + camera_config = img_client.get_cam_config() + xr_need_local_img = not (args.display_mode == 'pass-through' or camera_config['head_camera']['enable_webrtc']) logger_mp.debug(f"Camera config: {camera_config}") - xr_need_local_img = not (args.display_mode == 'pass-through' or camera_config['head_camera']['enable_webrtc']) # televuer_wrapper: obtain hand pose data from the XR device and transmit the robot's head camera image to the XR device. tv_wrapper = TeleVuerWrapper(use_hand_tracking=args.input_mode == "hand", @@ -341,15 +374,21 @@ if __name__ == '__main__': while not START and not STOP: # wait for start or stop signal. time.sleep(0.033) # Poll R3 A button in wait loop (edge-detected) - r3_data = arm_ctrl.get_wireless_remote() - r3_btns = parse_r3_buttons(r3_data) - if r3_btns.get('A') and not _r3_prev_buttons.get('A'): - START = True - logger_mp.info("[R3] A pressed → START tracking") - _r3_prev_buttons = r3_btns - if camera_config['head_camera']['enable_zmq'] and xr_need_local_img: - head_img = img_client.get_head_frame() - tv_wrapper.render_to_xr(head_img) + if hasattr(arm_ctrl, 'get_wireless_remote'): + r3_data = arm_ctrl.get_wireless_remote() + r3_btns = parse_r3_buttons(r3_data) + if r3_btns.get('A') and not _r3_prev_buttons.get('A'): + START = True + logger_mp.info("[R3] A pressed → START tracking") + _r3_prev_buttons = r3_btns + if xr_need_local_img: + if webcam_cap is not None: + ret, frame = webcam_cap.read() + if ret: + tv_wrapper.render_to_xr(frame) + elif camera_config['head_camera']['enable_zmq']: + head_img = img_client.get_head_frame() + tv_wrapper.render_to_xr(head_img) logger_mp.info("---------------------🚀start Tracking🚀-------------------------") arm_ctrl.speed_gradual_max() @@ -369,9 +408,7 @@ if __name__ == '__main__': vp_ref_right = None robot_ref_left = None # 4x4: Robot FK pose at calibration moment robot_ref_right = None - head_rot_prev = None # 3x3: previous frame head rotation (for gating) - head_pos_prev = None # 3-vec: previous frame head position - head_compensation = np.zeros(3) # accumulated rotation-gated head position correction + head_R_at_cal = None # 3x3: head rotation at calibration (for body rotation compensation) prev_start = False # Initial calibration: capture reference poses on first START @@ -381,9 +418,7 @@ if __name__ == '__main__': robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q) vp_ref_left = _cal_tele.left_wrist_pose.copy() vp_ref_right = _cal_tele.right_wrist_pose.copy() - head_rot_prev = _cal_tele.head_pose[:3, :3].copy() - head_pos_prev = _cal_tele.head_pose[:3, 3].copy() - head_compensation = np.zeros(3) + head_R_at_cal = _cal_tele.head_pose[:3, :3].copy() calibrated = True calibration_time = time.time() prev_start = True @@ -402,7 +437,6 @@ if __name__ == '__main__': i_error_right = np.zeros(3) i_rot_error_left = np.zeros(3) i_rot_error_right = np.zeros(3) - head_compensation = np.zeros(3) arm_ctrl.ctrl_dual_arm(np.zeros(14), np.zeros(14)) reset_start = time.time() while time.time() - reset_start < 3.0: @@ -418,7 +452,7 @@ if __name__ == '__main__': continue # Poll R3 controller buttons (edge-detected) - r3_data = arm_ctrl.get_wireless_remote() + r3_data = arm_ctrl.get_wireless_remote() if hasattr(arm_ctrl, 'get_wireless_remote') else None r3_btns = parse_r3_buttons(r3_data) if r3_btns.get('A') and not _r3_prev_buttons.get('A'): START = not START @@ -445,9 +479,7 @@ if __name__ == '__main__': robot_ref_left, robot_ref_right = arm_ik.compute_fk_pose(_cal_arm_q) vp_ref_left = _cal_tele.left_wrist_pose.copy() vp_ref_right = _cal_tele.right_wrist_pose.copy() - head_rot_prev = _cal_tele.head_pose[:3, :3].copy() - head_pos_prev = _cal_tele.head_pose[:3, 3].copy() - head_compensation = np.zeros(3) + head_R_at_cal = _cal_tele.head_pose[:3, :3].copy() calibrated = True calibration_time = time.time() i_error_left = np.zeros(3) @@ -464,7 +496,6 @@ if __name__ == '__main__': i_error_right = np.zeros(3) i_rot_error_left = np.zeros(3) i_rot_error_right = np.zeros(3) - head_compensation = np.zeros(3) current_time = time.time() time_elapsed = current_time - start_time sleep_time = max(0, (1 / args.frequency) - time_elapsed) @@ -472,17 +503,23 @@ if __name__ == '__main__': continue # get image - if camera_config['head_camera']['enable_zmq']: - if args.record or xr_need_local_img: - head_img = img_client.get_head_frame() - if xr_need_local_img: - tv_wrapper.render_to_xr(head_img) - if camera_config['left_wrist_camera']['enable_zmq']: - if args.record: - left_wrist_img = img_client.get_left_wrist_frame() - if camera_config['right_wrist_camera']['enable_zmq']: - if args.record: - right_wrist_img = img_client.get_right_wrist_frame() + if webcam_cap is not None: + ret, webcam_frame = webcam_cap.read() + if ret: + head_img = webcam_frame + tv_wrapper.render_to_xr(webcam_frame) + else: + if camera_config['head_camera']['enable_zmq']: + if args.record or xr_need_local_img: + head_img = img_client.get_head_frame() + if xr_need_local_img: + tv_wrapper.render_to_xr(head_img) + if camera_config['left_wrist_camera']['enable_zmq']: + if args.record: + left_wrist_img = img_client.get_left_wrist_frame() + if camera_config['right_wrist_camera']['enable_zmq']: + if args.record: + right_wrist_img = img_client.get_right_wrist_frame() # record mode if args.record and RECORD_TOGGLE: @@ -547,35 +584,29 @@ if __name__ == '__main__': settle_elapsed = now - calibration_time settle_alpha = min(1.0, settle_elapsed / max(args.settle_time, 0.01)) - # Head decoupling via rotation gating: only compensate head position - # drift caused by head ROTATION (looking around), not body translation - # (stepping). When head rotates, gate opens and position change is accumulated. - # When head is still, gate stays closed so tv_wrapper's natural body-movement - # cancellation works correctly. - head_rot_now = tele_data.head_pose[:3, :3] - head_pos_now = tele_data.head_pose[:3, 3] - if head_rot_prev is not None: - rot_delta = rotation_to_rotvec(head_rot_now @ head_rot_prev.T) - angular_speed = np.linalg.norm(rot_delta) / max(dt, 0.001) - # Gate: 0 when head still (stepping), 1 when rotating (looking around) - gate = np.clip((angular_speed - 0.1) / 0.3, 0.0, 1.0) - pos_delta = head_pos_now - head_pos_prev - head_compensation += gate * pos_delta - head_rot_prev = head_rot_now.copy() - head_pos_prev = head_pos_now.copy() - - # Position: robot_ref + settle_alpha * (vp_current + head_comp - vp_ref) - left_delta_pos = (tele_data.left_wrist_pose[:3, 3] + head_compensation) - vp_ref_left[:3, 3] - right_delta_pos = (tele_data.right_wrist_pose[:3, 3] + head_compensation) - vp_ref_right[:3, 3] + # Head rotation compensation: de-rotate wrist data by head's rotation + # since calibration. Converts world-frame positions/orientations back to + # a body-aligned frame, preventing arm swing during body rotation. + if head_R_at_cal is not None and args.head_rot_comp > 0.01: + R_head_delta = tele_data.head_pose[:3, :3] @ head_R_at_cal.T + R_comp = scale_rotation(R_head_delta.T, args.head_rot_comp) + else: + R_comp = np.eye(3) + + # De-rotate wrist positions, then compute delta from calibration + left_pos_comp = R_comp @ tele_data.left_wrist_pose[:3, 3] + right_pos_comp = R_comp @ tele_data.right_wrist_pose[:3, 3] + left_delta_pos = left_pos_comp - vp_ref_left[:3, 3] + right_delta_pos = right_pos_comp - vp_ref_right[:3, 3] left_wrist_adjusted = np.eye(4) right_wrist_adjusted = np.eye(4) left_wrist_adjusted[:3, 3] = robot_ref_left[:3, 3] + settle_alpha * left_delta_pos right_wrist_adjusted[:3, 3] = robot_ref_right[:3, 3] + settle_alpha * right_delta_pos - # Rotation: compute delta, scale by rot_blend (and settle_alpha) - left_delta_R = tele_data.left_wrist_pose[:3, :3] @ vp_ref_left[:3, :3].T - right_delta_R = tele_data.right_wrist_pose[:3, :3] @ vp_ref_right[:3, :3].T + # Rotation: compute delta (de-rotated), scale by rot_blend (and settle_alpha) + left_delta_R = (R_comp @ tele_data.left_wrist_pose[:3, :3]) @ vp_ref_left[:3, :3].T + right_delta_R = (R_comp @ tele_data.right_wrist_pose[:3, :3]) @ vp_ref_right[:3, :3].T rot_alpha = args.rot_blend * settle_alpha if rot_alpha < 0.01: left_wrist_adjusted[:3, :3] = robot_ref_left[:3, :3] @@ -846,10 +877,18 @@ if __name__ == '__main__': except Exception as e: logger_mp.error(f"Failed to stop keyboard listener or ipc server: {e}") - try: - img_client.close() - except Exception as e: - logger_mp.error(f"Failed to close image client: {e}") + if webcam_cap is not None: + try: + webcam_cap.release() + logger_mp.info("[webcam] Released") + except: + pass + + if img_client is not None: + try: + img_client.close() + except Exception as e: + logger_mp.error(f"Failed to close image client: {e}") try: tv_wrapper.close()