From 321ae3c3222129ec5f36c5f2edbf64e1d55357a7 Mon Sep 17 00:00:00 2001 From: Joe DiPrima Date: Fri, 20 Feb 2026 11:04:54 -0600 Subject: [PATCH] Initial commit: Native Quest 3 teleop app with body tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Godot 4.3 project for Quest 3 that replaces the browser-based WebXR teleoperation with a native app using XR_FB_body_tracking (70 joints). Key advantage: chest-relative wrist tracking eliminates head-position subtraction artifacts and decouples body rotation from arm control. Godot app: body_tracker.gd, teleop_client.gd, webcam_display.gd Robot server: teleop_server.py (WebSocket, replaces Vuer ~900→~250 lines) Drop-in wrapper: native_tv_wrapper.py (compatible with teleop_hand_and_arm.py) Co-Authored-By: Claude Opus 4.6 --- .gitignore | 26 +++ Main.gd | 81 +++++++++ Main.tscn | 29 ++++ SETUP.md | 79 +++++++++ android/AndroidManifest.xml | 36 ++++ export_presets.cfg | 47 +++++ icon.svg | 5 + openxr_action_map.tres | 3 + project.godot | 47 +++++ scenes/webcam_quad.tscn | 17 ++ scripts/body_tracker.gd | 202 ++++++++++++++++++++++ scripts/teleop_client.gd | 179 +++++++++++++++++++ scripts/webcam_display.gd | 87 ++++++++++ server/__init__.py | 6 + server/native_tv_wrapper.py | 330 ++++++++++++++++++++++++++++++++++++ server/requirements.txt | 4 + server/teleop_server.py | 315 ++++++++++++++++++++++++++++++++++ 17 files changed, 1493 insertions(+) create mode 100644 .gitignore create mode 100644 Main.gd create mode 100644 Main.tscn create mode 100644 SETUP.md create mode 100644 android/AndroidManifest.xml create mode 100644 export_presets.cfg create mode 100644 icon.svg create mode 100644 openxr_action_map.tres create mode 100644 project.godot create mode 100644 scenes/webcam_quad.tscn create mode 100644 scripts/body_tracker.gd create mode 100644 scripts/teleop_client.gd create mode 100644 scripts/webcam_display.gd create mode 100644 server/__init__.py create mode 100644 server/native_tv_wrapper.py create mode 100644 server/requirements.txt create mode 100644 server/teleop_server.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f289cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +# Godot +.godot/ +*.import +android/build/ +build/ +*.apk + +# OS +.DS_Store +Thumbs.db +*.swp +*~ + +# IDE +.vscode/ +.idea/ + +# Python +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ + +# Export +*.aab diff --git a/Main.gd b/Main.gd new file mode 100644 index 0000000..63394a1 --- /dev/null +++ b/Main.gd @@ -0,0 +1,81 @@ +extends Node3D +## Main entry point for G1 Teleop Quest 3 app. +## Initializes XR session with passthrough, wires body tracker to WebSocket client. + +@onready var body_tracker: Node = $BodyTracker +@onready var teleop_client: Node = $TeleopClient +@onready var xr_origin: XROrigin3D = $XROrigin3D +@onready var xr_camera: XRCamera3D = $XROrigin3D/XRCamera3D +@onready var webcam_quad: MeshInstance3D = $XROrigin3D/XRCamera3D/WebcamQuad + +var xr_interface: XRInterface +var xr_is_focused: bool = false + + +func _ready() -> void: + # Initialize OpenXR interface + xr_interface = XRServer.find_interface("OpenXR") + if xr_interface and xr_interface.is_initialized(): + print("[Main] OpenXR already initialized") + _on_openxr_ready() + elif xr_interface: + xr_interface.connect("session_begun", _on_openxr_session_begun) + xr_interface.connect("session_focussed", _on_openxr_focused) + xr_interface.connect("session_stopping", _on_openxr_stopping) + if not xr_interface.initialize(): + printerr("[Main] Failed to initialize OpenXR") + get_tree().quit() + return + print("[Main] OpenXR initialized, waiting for session") + else: + printerr("[Main] OpenXR interface not found. Is the plugin enabled?") + get_tree().quit() + return + + # Enable XR on the viewport + get_viewport().use_xr = true + + # Wire body tracker output to teleop client + body_tracker.tracking_data_ready.connect(teleop_client._on_tracking_data) + + # Wire webcam frames from teleop client to webcam display + teleop_client.webcam_frame_received.connect(webcam_quad._on_webcam_frame) + + +func _on_openxr_session_begun() -> void: + print("[Main] OpenXR session begun") + _on_openxr_ready() + + +func _on_openxr_ready() -> void: + # Enable passthrough (Quest 3 mixed reality) + _enable_passthrough() + + +func _on_openxr_focused() -> void: + xr_is_focused = true + print("[Main] OpenXR session focused") + + +func _on_openxr_stopping() -> void: + xr_is_focused = false + print("[Main] OpenXR session stopping") + + +func _enable_passthrough() -> void: + # Request passthrough blend mode for mixed reality + # Environment blend mode 3 = alpha blend (passthrough) + var openxr = xr_interface as OpenXRInterface + if openxr: + # Try to start passthrough + var modes = openxr.get_supported_environment_blend_modes() + print("[Main] Supported blend modes: ", modes) + # XR_ENVIRONMENT_BLEND_MODE_ALPHA_BLEND = 2 in Godot's enum + if XRInterface.XR_ENV_BLEND_MODE_ALPHA_BLEND in modes: + openxr.set_environment_blend_mode(XRInterface.XR_ENV_BLEND_MODE_ALPHA_BLEND) + print("[Main] Passthrough enabled (alpha blend)") + # Clear background to transparent + get_viewport().transparent_bg = true + RenderingServer.set_default_clear_color(Color(0, 0, 0, 0)) + else: + print("[Main] Alpha blend not supported, using opaque mode") diff --git a/Main.tscn b/Main.tscn new file mode 100644 index 0000000..4d3e127 --- /dev/null +++ b/Main.tscn @@ -0,0 +1,29 @@ +[gd_scene load_steps=6 format=3 uid="uid://main_scene"] + +[ext_resource type="Script" path="res://Main.gd" id="1"] +[ext_resource type="Script" path="res://scripts/body_tracker.gd" id="2"] +[ext_resource type="Script" path="res://scripts/teleop_client.gd" id="3"] +[ext_resource type="Script" path="res://scripts/webcam_display.gd" id="4"] +[ext_resource type="PackedScene" path="res://scenes/webcam_quad.tscn" id="5"] + +[node name="Main" type="Node3D"] +script = ExtResource("1") + +[node name="XROrigin3D" type="XROrigin3D" parent="."] + +[node name="XRCamera3D" type="XRCamera3D" parent="XROrigin3D"] + +[node name="LeftController" type="XRController3D" parent="XROrigin3D"] +tracker = &"left_hand" + +[node name="RightController" type="XRController3D" parent="XROrigin3D"] +tracker = &"right_hand" + +[node name="BodyTracker" type="Node" parent="."] +script = ExtResource("2") + +[node name="TeleopClient" type="Node" parent="."] +script = ExtResource("3") + +[node name="WebcamQuad" parent="XROrigin3D/XRCamera3D" instance=ExtResource("5")] +transform = Transform3D(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, -0.3, -1.5) diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 0000000..53589ce --- /dev/null +++ b/SETUP.md @@ -0,0 +1,79 @@ +# G1 Teleop — Quest 3 Native App Setup + +## Prerequisites + +- **Godot 4.3+** (download from https://godotengine.org) +- **Android SDK + NDK** (install via Android Studio or standalone) +- **Meta Quest Developer Hub** (for sideloading) or `adb` +- Quest 3 in **developer mode** + +## Step 1: Install Meta OpenXR Vendors Plugin + +1. Open project in Godot: `godot --editor --path C:\git\g1-teleop` +2. Go to **AssetLib** tab (top center) +3. Search for "Godot OpenXR Vendors" +4. Download and install (includes Meta Quest support) +5. Enable the plugin: **Project → Project Settings → Plugins → godotopenxrvendors** → Enable +6. Restart Godot when prompted + +## Step 2: Configure Android Export + +1. Go to **Editor → Editor Settings → Export → Android** +2. Set the path to your Android SDK +3. Set Java SDK path +4. Go to **Project → Export → Quest 3** +5. Download Android export templates if prompted (Editor → Manage Export Templates → Download) + +## Step 3: Enable Body Tracking + +Body tracking is configured via: +- `export_presets.cfg`: `xr_features/hand_tracking=2` (required) +- `android/AndroidManifest.xml`: body tracking features and metadata + +The Meta OpenXR Vendors plugin v4.1.1+ exposes `XRBodyTracker` in GDScript. + +## Step 4: Build and Sideload + +1. Connect Quest 3 via USB (or Wi-Fi ADB) +2. In Godot: **Project → Export → Quest 3 → Export Project** (or one-click deploy) +3. APK will be at `build/g1-teleop.apk` +4. Sideload: `adb install build/g1-teleop.apk` + +## Step 5: Robot Server + +On the robot (or dev machine for testing): + +```bash +cd server/ +pip install -r requirements.txt +python teleop_server.py --port 8765 +``` + +## Step 6: Configure App + +Edit `scripts/teleop_client.gd` to set the robot's IP: +```gdscript +@export var server_host: String = "10.0.0.64" # Robot IP +@export var server_port: int = 8765 +``` + +Or configure at runtime via the Godot editor's export properties. + +## Network + +- Quest 3 and robot must be on the same network +- No SSL required (raw WebSocket) +- Default port: 8765 +- Firewall: allow TCP 8765 on the robot + +## Troubleshooting + +### Body tracking not activating +- Ensure Quest 3 system software is up to date +- Check Settings → Movement Tracking → Body Tracking is enabled +- Ensure the Meta OpenXR Vendors plugin version is ≥ 4.1.1 + +### WebSocket connection fails +- Verify robot IP is correct and reachable: `ping 10.0.0.64` +- Check server is running: `ss -tlnp | grep 8765` +- Check firewall: `sudo ufw allow 8765/tcp` diff --git a/android/AndroidManifest.xml b/android/AndroidManifest.xml new file mode 100644 index 0000000..8d39e82 --- /dev/null +++ b/android/AndroidManifest.xml @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/export_presets.cfg b/export_presets.cfg new file mode 100644 index 0000000..bd56fb3 --- /dev/null +++ b/export_presets.cfg @@ -0,0 +1,47 @@ +[preset.0] + +name="Quest 3" +platform="Android" +runnable=true +dedicated_server=false +custom_features="" +export_filter="all_resources" +include_filter="" +exclude_filter="" +export_path="build/g1-teleop.apk" +encryption_include_filters="" +encryption_exclude_filters="" +encrypt_pck=false +encrypt_directory=false + +[preset.0.options] + +custom_template/debug="" +custom_template/release="" +gradle_build/use_gradle_build=true +gradle_build/export_format=0 +gradle_build/min_sdk="29" +gradle_build/target_sdk="32" +architectures/arm64-v8a=true +architectures/armeabi-v7a=false +architectures/x86=false +architectures/x86_64=false +version/code=1 +version/name="1.0.0" +package/unique_name="org.opentesla.g1teleop" +package/name="G1 Teleop" +package/signed=true +package/classify_as_game=false +package/retain_data_on_uninstall=false +package/exclude_from_recents=false +launcher_icons/main_192x192="" +launcher_icons/adaptive_foreground_432x432="" +launcher_icons/adaptive_background_432x432="" +graphics/opengl_debug=false +xr_features/xr_mode=1 +xr_features/hand_tracking=2 +xr_features/hand_tracking_frequency=1 +xr_features/passthrough=2 +permissions/internet=true +permissions/access_network_state=true +permissions/access_wifi_state=true diff --git a/icon.svg b/icon.svg new file mode 100644 index 0000000..b6f0903 --- /dev/null +++ b/icon.svg @@ -0,0 +1,5 @@ + + + G1 + TELEOP + diff --git a/openxr_action_map.tres b/openxr_action_map.tres new file mode 100644 index 0000000..6c7a5c5 --- /dev/null +++ b/openxr_action_map.tres @@ -0,0 +1,3 @@ +[gd_resource type="OpenXRActionMap" format=3 uid="uid://openxr_actions"] + +[resource] diff --git a/project.godot b/project.godot new file mode 100644 index 0000000..e0ab4d9 --- /dev/null +++ b/project.godot @@ -0,0 +1,47 @@ +; Engine configuration file. +; It's best edited using the editor UI and not directly, +; since the parameters that go here are not all obvious. +; +; Format: +; [section] is a section +; param=value ; assigned a value +; param=value ; assigned a value + +config_version=5 + +[application] + +config/name="G1 Teleop" +config/description="Native Quest 3 teleoperation app for Unitree G1 humanoid robot with body tracking" +run/main_scene="res://Main.tscn" +config/features=PackedStringArray("4.3", "Mobile") +config/icon="res://icon.svg" + +[autoload] + +[display] + +window/size/viewport_width=1920 +window/size/viewport_height=1920 + +[rendering] + +renderer/rendering_method="mobile" +renderer/rendering_method.mobile="mobile" +textures/vram_compression/import_etc2_astc=true + +[xr] + +openxr/enabled=true +openxr/default_action_map="res://openxr_action_map.tres" +openxr/form_factor=0 +openxr/view_configuration=1 +openxr/reference_space=1 +openxr/environment_blend_mode=0 +openxr/foveation_level=3 +openxr/foveation_dynamic=true +shaders/enabled=true + +[editor_plugins] + +enabled=PackedStringArray("res://addons/godotopenxrvendors/plugin.cfg") diff --git a/scenes/webcam_quad.tscn b/scenes/webcam_quad.tscn new file mode 100644 index 0000000..60b2ecd --- /dev/null +++ b/scenes/webcam_quad.tscn @@ -0,0 +1,17 @@ +[gd_scene load_steps=4 format=3 uid="uid://webcam_quad"] + +[ext_resource type="Script" path="res://scripts/webcam_display.gd" id="1"] + +[sub_resource type="QuadMesh" id="QuadMesh_1"] +size = Vector2(0.8, 0.6) + +[sub_resource type="StandardMaterial3D" id="StandardMaterial3D_1"] +albedo_color = Color(1, 1, 1, 1) +shading_mode = 0 +disable_ambient_light = true +disable_fog = true + +[node name="WebcamQuad" type="MeshInstance3D"] +mesh = SubResource("QuadMesh_1") +material_override = SubResource("StandardMaterial3D_1") +script = ExtResource("1") diff --git a/scripts/body_tracker.gd b/scripts/body_tracker.gd new file mode 100644 index 0000000..240eeb9 --- /dev/null +++ b/scripts/body_tracker.gd @@ -0,0 +1,202 @@ +extends Node +## Reads XR_FB_body_tracking joints each frame via Godot's XRBodyTracker. +## Computes chest-relative wrist positions and emits tracking data. +## +## Meta body tracking provides 70 joints. We use: +## CHEST (5) - torso orientation (solves body rotation problem) +## HEAD (7) - head pose +## LEFT_HAND_WRIST (12) - left wrist +## RIGHT_HAND_WRIST (17) - right wrist +## Hand joints (18-69) - finger positions +## +## Output poses are in Godot's coordinate system (Y-up, -Z forward). +## The server handles conversion to robot conventions. + +## Emitted every frame with tracking data dict ready to send. +signal tracking_data_ready(data: Dictionary) + +## Joint indices from XR_FB_body_tracking +## Reference: Meta OpenXR body tracking extension +const JOINT_HIPS := 1 +const JOINT_SPINE_LOWER := 2 +const JOINT_SPINE_MIDDLE := 3 +const JOINT_SPINE_UPPER := 4 +const JOINT_CHEST := 5 +const JOINT_NECK := 6 +const JOINT_HEAD := 7 + +const JOINT_LEFT_SHOULDER := 8 +const JOINT_LEFT_SCAPULA := 9 +const JOINT_LEFT_ARM_UPPER := 10 +const JOINT_LEFT_ARM_LOWER := 11 +const JOINT_LEFT_HAND_WRIST := 12 + +const JOINT_RIGHT_SHOULDER := 13 +const JOINT_RIGHT_SCAPULA := 14 +const JOINT_RIGHT_ARM_UPPER := 15 +const JOINT_RIGHT_ARM_LOWER := 16 +const JOINT_RIGHT_HAND_WRIST := 17 + +# Hand joints start at index 18 for left hand, 43 for right hand +# 25 joints per hand (same layout as XR_EXT_hand_tracking) +const JOINT_LEFT_HAND_START := 18 +const JOINT_RIGHT_HAND_START := 43 +const HAND_JOINT_COUNT := 25 + +## Total body joint count +const BODY_JOINT_COUNT := 70 + +## Tracking state +var body_tracker_name: StringName = &"/user/body_tracker" +var is_tracking: bool = false +var frames_since_last_send: int = 0 + +## Send rate control: target ~30 Hz (every other frame at 72 Hz) +@export var send_every_n_frames: int = 2 + +## Debug logging +@export var debug_log: bool = false +var _log_counter: int = 0 + + +func _ready() -> void: + print("[BodyTracker] Initialized, waiting for body tracking data...") + + +func _process(_delta: float) -> void: + frames_since_last_send += 1 + if frames_since_last_send < send_every_n_frames: + return + frames_since_last_send = 0 + + var tracker := XRServer.get_tracker(body_tracker_name) as XRBodyTracker + if tracker == null: + if is_tracking: + print("[BodyTracker] Lost body tracking") + is_tracking = false + return + + if not tracker.get_has_tracking_data(): + if is_tracking: + print("[BodyTracker] Body tracking data unavailable") + is_tracking = false + return + + if not is_tracking: + print("[BodyTracker] Body tracking active!") + is_tracking = true + + # Read key joint poses + var chest_xform := tracker.get_joint_transform(JOINT_CHEST) + var head_xform := tracker.get_joint_transform(JOINT_HEAD) + var left_wrist_xform := tracker.get_joint_transform(JOINT_LEFT_HAND_WRIST) + var right_wrist_xform := tracker.get_joint_transform(JOINT_RIGHT_HAND_WRIST) + + # Compute chest-relative wrist positions + # This is the key advantage: eliminates head-position subtraction artifacts + var chest_inv := chest_xform.affine_inverse() + var left_wrist_rel := chest_inv * left_wrist_xform + var right_wrist_rel := chest_inv * right_wrist_xform + + # Build tracking data packet + var data := {} + data["type"] = "tracking" + data["timestamp"] = Time.get_ticks_msec() + + # Head pose (world space) - 7 floats: pos(3) + quat(4) + data["head"] = _xform_to_pose7(head_xform) + + # Chest pose (world space) - 7 floats: pos(3) + quat(4) + data["chest"] = _xform_to_pose7(chest_xform) + + # Chest-relative wrist poses as 4x4 matrices (16 floats each, column-major) + # These replace the world-space wrist poses; no head subtraction needed + data["left_wrist"] = _xform_to_mat16(left_wrist_rel) + data["right_wrist"] = _xform_to_mat16(right_wrist_rel) + + # Hand joint positions (25 joints per hand, relative to wrist) + var left_hand_pos := _get_hand_positions(tracker, JOINT_LEFT_HAND_START, left_wrist_xform) + var right_hand_pos := _get_hand_positions(tracker, JOINT_RIGHT_HAND_START, right_wrist_xform) + data["left_hand_pos"] = left_hand_pos + data["right_hand_pos"] = right_hand_pos + + # Hand joint rotations (25 joints per hand, relative to wrist) + var left_hand_rot := _get_hand_rotations(tracker, JOINT_LEFT_HAND_START, left_wrist_xform) + var right_hand_rot := _get_hand_rotations(tracker, JOINT_RIGHT_HAND_START, right_wrist_xform) + data["left_hand_rot"] = left_hand_rot + data["right_hand_rot"] = right_hand_rot + + # Debug logging every ~2 seconds + if debug_log: + _log_counter += 1 + if _log_counter >= 60: + _log_counter = 0 + print("[BodyTracker] chest=", chest_xform.origin, + " head=", head_xform.origin, + " L_wrist_rel=", left_wrist_rel.origin, + " R_wrist_rel=", right_wrist_rel.origin) + + tracking_data_ready.emit(data) + + +## Convert Transform3D to 7-float pose: [x, y, z, qx, qy, qz, qw] +func _xform_to_pose7(xform: Transform3D) -> Array: + var pos := xform.origin + var quat := xform.basis.get_rotation_quaternion() + return [pos.x, pos.y, pos.z, quat.x, quat.y, quat.z, quat.w] + + +## Convert Transform3D to 16-float column-major 4x4 matrix +## Layout: [m00, m10, m20, 0, m01, m11, m21, 0, m02, m12, m22, 0, tx, ty, tz, 1] +## This matches NumPy's Fortran ('F') order used by the existing teleop code. +func _xform_to_mat16(xform: Transform3D) -> Array: + var b := xform.basis + var o := xform.origin + # Godot Basis: b[col][row] — but we store column-major + # Column 0: basis x-axis + # Column 1: basis y-axis + # Column 2: basis z-axis + # Column 3: origin (translation) + return [ + b.x.x, b.x.y, b.x.z, 0.0, # Column 0 + b.y.x, b.y.y, b.y.z, 0.0, # Column 1 + b.z.x, b.z.y, b.z.z, 0.0, # Column 2 + o.x, o.y, o.z, 1.0 # Column 3 + ] + + +## Get 25 hand joint positions relative to wrist, as flat array (75 floats) +## Each joint: [x, y, z] relative to wrist +func _get_hand_positions(tracker: XRBodyTracker, start_idx: int, wrist_xform: Transform3D) -> Array: + var wrist_inv := wrist_xform.affine_inverse() + var positions := [] + positions.resize(HAND_JOINT_COUNT * 3) + for i in range(HAND_JOINT_COUNT): + var joint_xform := tracker.get_joint_transform(start_idx + i) + var rel := wrist_inv * joint_xform + positions[i * 3 + 0] = rel.origin.x + positions[i * 3 + 1] = rel.origin.y + positions[i * 3 + 2] = rel.origin.z + return positions + + +## Get 25 hand joint rotations relative to wrist, as flat array (225 floats) +## Each joint: 9 floats (3x3 rotation matrix, column-major) +func _get_hand_rotations(tracker: XRBodyTracker, start_idx: int, wrist_xform: Transform3D) -> Array: + var wrist_rot_inv := wrist_xform.basis.inverse() + var rotations := [] + rotations.resize(HAND_JOINT_COUNT * 9) + for i in range(HAND_JOINT_COUNT): + var joint_xform := tracker.get_joint_transform(start_idx + i) + var rel_basis := wrist_rot_inv * joint_xform.basis + # Store as column-major 3x3 + rotations[i * 9 + 0] = rel_basis.x.x + rotations[i * 9 + 1] = rel_basis.x.y + rotations[i * 9 + 2] = rel_basis.x.z + rotations[i * 9 + 3] = rel_basis.y.x + rotations[i * 9 + 4] = rel_basis.y.y + rotations[i * 9 + 5] = rel_basis.y.z + rotations[i * 9 + 6] = rel_basis.z.x + rotations[i * 9 + 7] = rel_basis.z.y + rotations[i * 9 + 8] = rel_basis.z.z + return rotations diff --git a/scripts/teleop_client.gd b/scripts/teleop_client.gd new file mode 100644 index 0000000..e2bfb34 --- /dev/null +++ b/scripts/teleop_client.gd @@ -0,0 +1,179 @@ +extends Node +## WebSocket client that connects to teleop_server.py on the robot. +## Sends body tracking data as JSON, receives JPEG webcam frames as binary. +## +## Protocol: +## Client → Server: JSON text messages with tracking data +## Server → Client: Binary messages with JPEG webcam frames +## +## No SSL required — raw WebSocket over local network. + +## Emitted when a JPEG webcam frame is received from the server. +signal webcam_frame_received(jpeg_bytes: PackedByteArray) + +## Emitted when connection state changes. +signal connection_state_changed(connected: bool) + +## Server address — robot's IP and teleop_server.py port +@export var server_host: String = "10.0.0.64" +@export var server_port: int = 8765 +@export var auto_connect: bool = true + +## Reconnection settings +@export var reconnect_delay_sec: float = 2.0 +@export var max_reconnect_attempts: int = 0 # 0 = unlimited + +## Connection state +var ws := WebSocketPeer.new() +var is_connected: bool = false +var _reconnect_timer: float = 0.0 +var _reconnect_attempts: int = 0 +var _was_connected: bool = false +var _pending_data: Array = [] + +## Stats +var messages_sent: int = 0 +var messages_received: int = 0 +var bytes_sent: int = 0 +var bytes_received: int = 0 +var last_send_time: int = 0 +var last_receive_time: int = 0 + + +func _ready() -> void: + if auto_connect: + connect_to_server() + + +func _process(delta: float) -> void: + ws.poll() + + var state := ws.get_ready_state() + + match state: + WebSocketPeer.STATE_OPEN: + if not is_connected: + is_connected = true + _was_connected = true + _reconnect_attempts = 0 + print("[TeleopClient] Connected to ws://%s:%d" % [server_host, server_port]) + connection_state_changed.emit(true) + + # Process incoming messages + while ws.get_available_packet_count() > 0: + var packet := ws.get_packet() + bytes_received += packet.size() + messages_received += 1 + last_receive_time = Time.get_ticks_msec() + + # Check if this is a binary (JPEG) or text message + if ws.was_string_packet(): + _handle_text_message(packet.get_string_from_utf8()) + else: + # Binary = JPEG webcam frame + webcam_frame_received.emit(packet) + + # Send any pending tracking data + for data in _pending_data: + _send_json(data) + _pending_data.clear() + + WebSocketPeer.STATE_CLOSING: + pass # Wait for close to complete + + WebSocketPeer.STATE_CLOSED: + if is_connected or _was_connected: + var code := ws.get_close_code() + var reason := ws.get_close_reason() + print("[TeleopClient] Disconnected (code=%d, reason=%s)" % [code, reason]) + is_connected = false + connection_state_changed.emit(false) + + # Auto-reconnect + if auto_connect and _was_connected: + _reconnect_timer -= delta + if _reconnect_timer <= 0: + _reconnect_timer = reconnect_delay_sec + _attempt_reconnect() + + WebSocketPeer.STATE_CONNECTING: + pass # Still connecting + + +func connect_to_server() -> void: + var url := "ws://%s:%d" % [server_host, server_port] + print("[TeleopClient] Connecting to %s..." % url) + var err := ws.connect_to_url(url) + if err != OK: + printerr("[TeleopClient] Failed to initiate connection: ", err) + + +func disconnect_from_server() -> void: + _was_connected = false + ws.close() + + +func _attempt_reconnect() -> void: + if max_reconnect_attempts > 0: + _reconnect_attempts += 1 + if _reconnect_attempts > max_reconnect_attempts: + print("[TeleopClient] Max reconnect attempts reached, giving up") + _was_connected = false + return + print("[TeleopClient] Reconnect attempt %d/%d..." % [_reconnect_attempts, max_reconnect_attempts]) + else: + _reconnect_attempts += 1 + if _reconnect_attempts % 5 == 1: # Log every 5th attempt to avoid spam + print("[TeleopClient] Reconnect attempt %d..." % _reconnect_attempts) + + connect_to_server() + + +## Called by body_tracker via signal when new tracking data is ready. +func _on_tracking_data(data: Dictionary) -> void: + if is_connected: + _send_json(data) + # Don't buffer if not connected — tracking data is perishable + + +func _send_json(data: Dictionary) -> void: + var json_str := JSON.stringify(data) + var err := ws.send_text(json_str) + if err == OK: + messages_sent += 1 + bytes_sent += json_str.length() + last_send_time = Time.get_ticks_msec() + else: + printerr("[TeleopClient] Failed to send: ", err) + + +func _handle_text_message(text: String) -> void: + # Server may send JSON status/config messages + var json := JSON.new() + var err := json.parse(text) + if err != OK: + printerr("[TeleopClient] Invalid JSON from server: ", text.left(100)) + return + + var data: Dictionary = json.data + var msg_type: String = data.get("type", "") + + match msg_type: + "config": + print("[TeleopClient] Server config: ", data) + "status": + print("[TeleopClient] Server status: ", data.get("message", "")) + _: + print("[TeleopClient] Unknown message type: ", msg_type) + + +## Get connection statistics as a dictionary +func get_stats() -> Dictionary: + return { + "connected": is_connected, + "messages_sent": messages_sent, + "messages_received": messages_received, + "bytes_sent": bytes_sent, + "bytes_received": bytes_received, + "reconnect_attempts": _reconnect_attempts, + } diff --git a/scripts/webcam_display.gd b/scripts/webcam_display.gd new file mode 100644 index 0000000..58d4190 --- /dev/null +++ b/scripts/webcam_display.gd @@ -0,0 +1,87 @@ +extends MeshInstance3D +## Displays JPEG webcam frames received from the robot on a quad mesh. +## The quad is positioned in front of the user's view (child of XRCamera3D). +## +## Receives JPEG bytes via the webcam_frame_received signal from TeleopClient. + +## Display settings +@export var default_color := Color(0.1, 0.1, 0.1, 0.8) + +## State +var _texture: ImageTexture +var _material: StandardMaterial3D +var _frame_count: int = 0 +var _last_frame_time: int = 0 +var _fps: float = 0.0 +var _fps_update_timer: float = 0.0 +var _has_received_frame: bool = false + + +func _ready() -> void: + # Get or create the material + _material = material_override as StandardMaterial3D + if _material == null: + _material = StandardMaterial3D.new() + material_override = _material + + # Configure material for unlit display (no scene lighting affects the webcam feed) + _material.shading_mode = BaseMaterial3D.SHADING_MODE_UNSHADED + _material.albedo_color = default_color + _material.transparency = BaseMaterial3D.TRANSPARENCY_ALPHA + + # Create initial texture (will be replaced on first frame) + _texture = ImageTexture.new() + + print("[WebcamDisplay] Ready, waiting for frames...") + + +func _process(delta: float) -> void: + # Update FPS counter + _fps_update_timer += delta + if _fps_update_timer >= 1.0: + _fps = _frame_count / _fps_update_timer + _frame_count = 0 + _fps_update_timer = 0.0 + + +## Called when a JPEG webcam frame is received from the server. +## Connected via signal from TeleopClient in Main.gd. +func _on_webcam_frame(jpeg_bytes: PackedByteArray) -> void: + if jpeg_bytes.size() < 2: + return + + var image := Image.new() + var err := image.load_jpg_from_buffer(jpeg_bytes) + if err != OK: + if _frame_count == 0: + printerr("[WebcamDisplay] Failed to decode JPEG frame (size=%d, err=%d)" % [jpeg_bytes.size(), err]) + return + + # Update texture from decoded image + if _texture.get_image() == null or _texture.get_width() != image.get_width() or _texture.get_height() != image.get_height(): + _texture = ImageTexture.create_from_image(image) + _material.albedo_texture = _texture + _material.albedo_color = Color.WHITE # Full brightness once we have a real image + print("[WebcamDisplay] Texture created: %dx%d" % [image.get_width(), image.get_height()]) + else: + _texture.update(image) + + _frame_count += 1 + _last_frame_time = Time.get_ticks_msec() + + if not _has_received_frame: + _has_received_frame = true + print("[WebcamDisplay] First frame received!") + + +## Get display FPS +func get_fps() -> float: + return _fps + + +## Check if frames are being received +func is_receiving() -> bool: + if not _has_received_frame: + return false + # Consider stale if no frame for 2 seconds + return (Time.get_ticks_msec() - _last_frame_time) < 2000 diff --git a/server/__init__.py b/server/__init__.py new file mode 100644 index 0000000..5f9751d --- /dev/null +++ b/server/__init__.py @@ -0,0 +1,6 @@ +""" +G1 Teleop Server — Quest 3 native app backend. +""" + +from .teleop_server import TeleopServer +from .native_tv_wrapper import NativeTeleWrapper, TeleData diff --git a/server/native_tv_wrapper.py b/server/native_tv_wrapper.py new file mode 100644 index 0000000..10cf90f --- /dev/null +++ b/server/native_tv_wrapper.py @@ -0,0 +1,330 @@ +""" +Native TeleVuerWrapper — drop-in replacement for TeleVuerWrapper that uses +the Quest 3 native app via TeleopServer instead of Vuer/browser. + +Returns TeleData in the same format as the original TeleVuerWrapper, but: +- Wrist poses arrive chest-relative from body tracking (no head subtraction needed) +- Chest orientation is available for body rotation decoupling +- No SSL, no browser, no Vuer dependency + +The coordinate transform pipeline is preserved: +1. Quest 3 sends poses in Godot's coordinate system (Y-up, -Z forward) +2. This wrapper converts to robot convention (X-forward, Y-left, Z-up) +3. Applies Unitree arm URDF convention transforms +4. Applies head-to-waist offset (same as original) + +Usage: + # Instead of: + # tv_wrapper = TeleVuerWrapper(use_hand_tracking=True, ...) + # Use: + # tv_wrapper = NativeTeleWrapper(port=8765) + # tv_wrapper.start() # Starts WebSocket server in background +""" + +import numpy as np +import time +import threading +from multiprocessing import Array, Value +from dataclasses import dataclass, field +from typing import Optional + +from teleop_server import TeleopServer + + +# --- Coordinate Transform Constants --- +# Godot coordinate system: Y-up, -Z forward, X-right +# This is equivalent to OpenXR convention. +# Robot convention: X-forward, Y-left, Z-up + +# Basis change: OpenXR (Y-up, -Z fwd) → Robot (Z-up, X fwd) +T_ROBOT_OPENXR = np.array([[ 0, 0,-1, 0], + [-1, 0, 0, 0], + [ 0, 1, 0, 0], + [ 0, 0, 0, 1]], dtype=np.float64) + +T_OPENXR_ROBOT = np.array([[ 0,-1, 0, 0], + [ 0, 0, 1, 0], + [-1, 0, 0, 0], + [ 0, 0, 0, 1]], dtype=np.float64) + +R_ROBOT_OPENXR = T_ROBOT_OPENXR[:3, :3] +R_OPENXR_ROBOT = T_OPENXR_ROBOT[:3, :3] + +# Arm initial pose convention transforms +T_TO_UNITREE_HUMANOID_LEFT_ARM = np.array([[1, 0, 0, 0], + [0, 0,-1, 0], + [0, 1, 0, 0], + [0, 0, 0, 1]], dtype=np.float64) + +T_TO_UNITREE_HUMANOID_RIGHT_ARM = np.array([[1, 0, 0, 0], + [0, 0, 1, 0], + [0,-1, 0, 0], + [0, 0, 0, 1]], dtype=np.float64) + +# Hand convention transform +T_TO_UNITREE_HAND = np.array([[0, 0, 1, 0], + [-1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, 0, 1]], dtype=np.float64) + +# Default poses (fallback when no data) +CONST_HEAD_POSE = np.array([[1, 0, 0, 0], + [0, 1, 0, 1.5], + [0, 0, 1, -0.2], + [0, 0, 0, 1]], dtype=np.float64) + +CONST_LEFT_ARM_POSE = np.array([[1, 0, 0, -0.15], + [0, 1, 0, 1.13], + [0, 0, 1, -0.3], + [0, 0, 0, 1]], dtype=np.float64) + +CONST_RIGHT_ARM_POSE = np.array([[1, 0, 0, 0.15], + [0, 1, 0, 1.13], + [0, 0, 1, -0.3], + [0, 0, 0, 1]], dtype=np.float64) + + +def fast_mat_inv(mat): + """Fast SE(3) inverse using rotation transpose.""" + ret = np.eye(4) + ret[:3, :3] = mat[:3, :3].T + ret[:3, 3] = -mat[:3, :3].T @ mat[:3, 3] + return ret + + +def safe_mat_update(prev_mat, mat): + """Return previous matrix if new matrix is singular.""" + det = np.linalg.det(mat) + if not np.isfinite(det) or np.isclose(det, 0.0, atol=1e-6): + return prev_mat, False + return mat, True + + +@dataclass +class TeleData: + """Matches the TeleData from tv_wrapper.py exactly.""" + head_pose: np.ndarray # (4,4) SE(3) pose of head + left_wrist_pose: np.ndarray # (4,4) SE(3) pose of left wrist (IK frame) + right_wrist_pose: np.ndarray # (4,4) SE(3) pose of right wrist (IK frame) + chest_pose: np.ndarray = None # (4,4) SE(3) NEW: chest orientation + # Hand tracking + left_hand_pos: np.ndarray = None # (25,3) 3D positions of left hand joints + right_hand_pos: np.ndarray = None # (25,3) 3D positions of right hand joints + left_hand_rot: np.ndarray = None # (25,3,3) rotation matrices + right_hand_rot: np.ndarray = None # (25,3,3) rotation matrices + # Hand state (computed from finger positions) + left_hand_pinch: bool = False + left_hand_pinchValue: float = 10.0 + left_hand_squeeze: bool = False + left_hand_squeezeValue: float = 0.0 + right_hand_pinch: bool = False + right_hand_pinchValue: float = 10.0 + right_hand_squeeze: bool = False + right_hand_squeezeValue: float = 0.0 + + +class NativeTeleWrapper: + """Drop-in replacement for TeleVuerWrapper using Quest 3 native app. + + The key difference: wrist poses from body tracking are chest-relative, + so the head position subtraction in the original TeleVuerWrapper is + unnecessary. Instead, we use chest-relative data directly. + """ + + def __init__(self, port: int = 8765, host: str = "0.0.0.0", + chest_to_waist_x: float = 0.15, + chest_to_waist_z: float = 0.28): + """ + Args: + port: WebSocket server port + host: WebSocket bind address + chest_to_waist_x: Forward offset from chest to IK solver origin (meters). + Original head-to-waist was 0.15; chest is slightly forward + of head, so this may need tuning. + chest_to_waist_z: Vertical offset from chest to IK solver origin (meters). + Original head-to-waist was 0.45; chest-to-waist is shorter + (~0.25-0.30). Default 0.28 is a starting estimate. + """ + self.server = TeleopServer(host=host, port=port) + self._server_thread = None + self._chest_to_waist_x = chest_to_waist_x + self._chest_to_waist_z = chest_to_waist_z + + def start(self): + """Start the WebSocket server in a background thread.""" + self._server_thread = self.server.start_in_thread() + + def get_tele_data(self) -> TeleData: + """Get processed motion data, transformed to robot convention. + + Returns TeleData compatible with teleop_hand_and_arm.py. + """ + # Read raw poses from shared memory (column-major 4x4) + with self.server.head_pose_shared.get_lock(): + head_raw = np.array(self.server.head_pose_shared[:]).reshape(4, 4, order='F') + with self.server.chest_pose_shared.get_lock(): + chest_raw = np.array(self.server.chest_pose_shared[:]).reshape(4, 4, order='F') + with self.server.left_arm_pose_shared.get_lock(): + left_wrist_raw = np.array(self.server.left_arm_pose_shared[:]).reshape(4, 4, order='F') + with self.server.right_arm_pose_shared.get_lock(): + right_wrist_raw = np.array(self.server.right_arm_pose_shared[:]).reshape(4, 4, order='F') + + # Validate matrices + head_raw, head_valid = safe_mat_update(CONST_HEAD_POSE, head_raw) + left_wrist_raw, left_valid = safe_mat_update(CONST_LEFT_ARM_POSE, left_wrist_raw) + right_wrist_raw, right_valid = safe_mat_update(CONST_RIGHT_ARM_POSE, right_wrist_raw) + + # --- Transform from Godot/OpenXR convention to Robot convention --- + + # Head: basis change (world-space pose) + Brobot_world_head = T_ROBOT_OPENXR @ head_raw @ T_OPENXR_ROBOT + + # Chest: basis change (world-space pose) + Brobot_world_chest = T_ROBOT_OPENXR @ chest_raw @ T_OPENXR_ROBOT + + # Wrist poses: these are CHEST-RELATIVE from body tracking. + # Apply basis change to chest-relative poses. + left_Brobot = T_ROBOT_OPENXR @ left_wrist_raw @ T_OPENXR_ROBOT + right_Brobot = T_ROBOT_OPENXR @ right_wrist_raw @ T_OPENXR_ROBOT + + # Apply Unitree arm URDF convention transforms + left_IPunitree = left_Brobot @ (T_TO_UNITREE_HUMANOID_LEFT_ARM if left_valid else np.eye(4)) + right_IPunitree = right_Brobot @ (T_TO_UNITREE_HUMANOID_RIGHT_ARM if right_valid else np.eye(4)) + + # CHEST-TO-WAIST offset: the IK solver origin is near the waist. + # Body tracking gives us chest-relative poses. The chest is lower + # than the head, so the vertical offset is smaller than the original + # head-to-waist offset (0.45). Configurable via constructor args. + left_wrist_final = left_IPunitree.copy() + right_wrist_final = right_IPunitree.copy() + left_wrist_final[0, 3] += self._chest_to_waist_x # x (forward) + right_wrist_final[0, 3] += self._chest_to_waist_x + left_wrist_final[2, 3] += self._chest_to_waist_z # z (up) + right_wrist_final[2, 3] += self._chest_to_waist_z + + # --- Hand positions --- + left_hand_pos = np.zeros((25, 3)) + right_hand_pos = np.zeros((25, 3)) + + if left_valid and right_valid: + with self.server.left_hand_position_shared.get_lock(): + raw_left = np.array(self.server.left_hand_position_shared[:]) + with self.server.right_hand_position_shared.get_lock(): + raw_right = np.array(self.server.right_hand_position_shared[:]) + + if np.any(raw_left != 0): + # Hand positions are wrist-relative from Quest 3, in OpenXR coords. + # Must apply basis change (OpenXR→Robot) before hand convention transform. + # Derivation: original code does T_TO_UNITREE_HAND @ inv(arm_robot) @ (T_ROBOT_OPENXR @ world_pos) + # which simplifies to T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ wrist_relative_openxr + left_pos_25x3 = raw_left.reshape(25, 3) + left_hom = np.concatenate([left_pos_25x3.T, np.ones((1, 25))]) + left_hand_pos = (T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ left_hom)[0:3, :].T + + if np.any(raw_right != 0): + right_pos_25x3 = raw_right.reshape(25, 3) + right_hom = np.concatenate([right_pos_25x3.T, np.ones((1, 25))]) + right_hand_pos = (T_TO_UNITREE_HAND @ T_ROBOT_OPENXR @ right_hom)[0:3, :].T + + # --- Hand rotations --- + left_hand_rot = None + right_hand_rot = None + # TODO: Transform hand rotations if needed for dexterous hand control + + # --- Pinch/squeeze from finger distances --- + # Compute pinch from thumb-tip to index-tip distance + left_pinch_val, left_pinch = _compute_pinch(left_hand_pos) + right_pinch_val, right_pinch = _compute_pinch(right_hand_pos) + left_squeeze_val, left_squeeze = _compute_squeeze(left_hand_pos) + right_squeeze_val, right_squeeze = _compute_squeeze(right_hand_pos) + + return TeleData( + head_pose=Brobot_world_head, + left_wrist_pose=left_wrist_final, + right_wrist_pose=right_wrist_final, + chest_pose=Brobot_world_chest, + left_hand_pos=left_hand_pos, + right_hand_pos=right_hand_pos, + left_hand_rot=left_hand_rot, + right_hand_rot=right_hand_rot, + left_hand_pinch=left_pinch, + left_hand_pinchValue=left_pinch_val, + left_hand_squeeze=left_squeeze, + left_hand_squeezeValue=left_squeeze_val, + right_hand_pinch=right_pinch, + right_hand_pinchValue=right_pinch_val, + right_hand_squeeze=right_squeeze, + right_hand_squeezeValue=right_squeeze_val, + ) + + def render_to_xr(self, img): + """Send a webcam frame to the Quest 3 app. + + Args: + img: BGR numpy array (OpenCV format). Will be JPEG-encoded and sent. + """ + import cv2 + # Encode as JPEG + _, jpeg_buf = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 70]) + self.server.set_webcam_frame(jpeg_buf.tobytes()) + + def close(self): + """Shutdown the server.""" + # The server thread is daemonic, it will stop when the process exits. + pass + + @property + def has_client(self) -> bool: + """Check if a Quest 3 client is connected.""" + return len(self.server._clients) > 0 + + @property + def last_data_time(self) -> float: + """Timestamp of last received tracking data.""" + with self.server.last_data_time_shared.get_lock(): + return self.server.last_data_time_shared.value + + +def _compute_pinch(hand_pos_25x3: np.ndarray) -> tuple: + """Compute pinch state from thumb tip (4) and index tip (8) distance. + + Returns (pinchValue, is_pinching). + pinchValue: ~15.0 (open) → 0.0 (pinched), scaled to match Vuer convention * 100. + """ + if np.all(hand_pos_25x3 == 0): + return 10.0, False + + # OpenXR hand joint indices: thumb tip = 4, index tip = 8 + thumb_tip = hand_pos_25x3[4] + index_tip = hand_pos_25x3[8] + dist = np.linalg.norm(thumb_tip - index_tip) + + # Map distance to pinch value (meters → normalized) + # Typical range: 0.0 (touching) to 0.10 (fully open) + pinch_value = min(dist * 150.0, 15.0) # Scale to ~0-15 range + is_pinching = dist < 0.025 # 2.5cm threshold + + return pinch_value * 100.0, is_pinching # Match TeleVuerWrapper scaling + + +def _compute_squeeze(hand_pos_25x3: np.ndarray) -> tuple: + """Compute squeeze (fist) state from average finger curl. + + Returns (squeezeValue, is_squeezing). + squeezeValue: 0.0 (open) → 1.0 (fist). + """ + if np.all(hand_pos_25x3 == 0): + return 0.0, False + + # Measure distance from fingertips to palm center + # Joint indices: palm=0, index_tip=8, middle_tip=12, ring_tip=16, pinky_tip=20 + palm = hand_pos_25x3[0] + tips = hand_pos_25x3[[8, 12, 16, 20]] + dists = np.linalg.norm(tips - palm, axis=1) + avg_dist = np.mean(dists) + + # Map: ~0.02m (fist) → 1.0, ~0.10m (open) → 0.0 + squeeze_value = np.clip(1.0 - (avg_dist - 0.02) / 0.08, 0.0, 1.0) + is_squeezing = squeeze_value > 0.7 + + return squeeze_value, is_squeezing diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..939c4d4 --- /dev/null +++ b/server/requirements.txt @@ -0,0 +1,4 @@ +# Robot-side server dependencies +websockets>=12.0 +numpy>=1.24 +opencv-python>=4.8 diff --git a/server/teleop_server.py b/server/teleop_server.py new file mode 100644 index 0000000..e4e4290 --- /dev/null +++ b/server/teleop_server.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python3 +""" +Lightweight WebSocket server for Quest 3 native teleop app. +Replaces Vuer (~900 lines) with ~200 lines. + +Receives body tracking data (JSON) from the Quest 3 native app, +writes poses to shared memory arrays compatible with teleop_hand_and_arm.py, +and sends webcam JPEG frames back to the app. + +Protocol: + Client → Server: JSON text messages with tracking data + Server → Client: Binary messages with JPEG webcam frames + Server → Client: JSON text messages for status/config + +Shared Memory Format (matches TeleVuer/TeleVuerWrapper interface): + head_pose_shared: Array('d', 16) — 4x4 SE(3), column-major ('F' order) + left_arm_pose_shared: Array('d', 16) — 4x4 SE(3), column-major ('F' order) + right_arm_pose_shared: Array('d', 16) — 4x4 SE(3), column-major ('F' order) + last_data_time_shared: Value('d') — Unix timestamp of last received data + chest_pose_shared: Array('d', 16) — 4x4 SE(3), NEW: chest orientation + + Hand tracking (if enabled): + left_hand_position_shared: Array('d', 75) — 25×3 positions + right_hand_position_shared: Array('d', 75) — 25×3 positions + left_hand_orientation_shared: Array('d', 225) — 25×3×3 rotations (col-major) + right_hand_orientation_shared: Array('d', 225) — 25×3×3 rotations (col-major) + +Usage: + # Standalone (for testing): + python teleop_server.py --port 8765 + + # Integrated with teleop_hand_and_arm.py: + # Import TeleopServer and pass shared memory arrays +""" + +import asyncio +import json +import time +import logging +import argparse +import threading +import numpy as np +from multiprocessing import Array, Value + +try: + import websockets + import websockets.asyncio.server +except ImportError: + print("ERROR: 'websockets' package required. Install with: pip install websockets") + raise + +logger = logging.getLogger("teleop_server") + + +class TeleopServer: + """WebSocket server that bridges Quest 3 native app to teleop shared memory.""" + + def __init__(self, host="0.0.0.0", port=8765, + head_pose_shared=None, + left_arm_pose_shared=None, + right_arm_pose_shared=None, + last_data_time_shared=None, + chest_pose_shared=None, + left_hand_position_shared=None, + right_hand_position_shared=None, + left_hand_orientation_shared=None, + right_hand_orientation_shared=None, + left_hand_pinch_shared=None, + left_hand_squeeze_shared=None, + right_hand_pinch_shared=None, + right_hand_squeeze_shared=None): + self.host = host + self.port = port + + # Create shared memory if not provided (standalone mode) + self.head_pose_shared = head_pose_shared or Array('d', 16, lock=True) + self.left_arm_pose_shared = left_arm_pose_shared or Array('d', 16, lock=True) + self.right_arm_pose_shared = right_arm_pose_shared or Array('d', 16, lock=True) + self.last_data_time_shared = last_data_time_shared or Value('d', 0.0, lock=True) + self.chest_pose_shared = chest_pose_shared or Array('d', 16, lock=True) + + # Hand tracking arrays + self.left_hand_position_shared = left_hand_position_shared or Array('d', 75, lock=True) + self.right_hand_position_shared = right_hand_position_shared or Array('d', 75, lock=True) + self.left_hand_orientation_shared = left_hand_orientation_shared or Array('d', 225, lock=True) + self.right_hand_orientation_shared = right_hand_orientation_shared or Array('d', 225, lock=True) + + # Pinch/squeeze (computed from finger positions) + self.left_hand_pinch_shared = left_hand_pinch_shared + self.left_hand_squeeze_shared = left_hand_squeeze_shared + self.right_hand_pinch_shared = right_hand_pinch_shared + self.right_hand_squeeze_shared = right_hand_squeeze_shared + + # Initialize with identity matrices + identity_flat = np.eye(4).flatten(order='F').tolist() + with self.head_pose_shared.get_lock(): + self.head_pose_shared[:] = identity_flat + with self.left_arm_pose_shared.get_lock(): + self.left_arm_pose_shared[:] = identity_flat + with self.right_arm_pose_shared.get_lock(): + self.right_arm_pose_shared[:] = identity_flat + with self.chest_pose_shared.get_lock(): + self.chest_pose_shared[:] = identity_flat + + # Webcam frame (set by external code, sent to clients) + self._webcam_frame = None + self._webcam_lock = threading.Lock() + self._webcam_event = asyncio.Event() + + # Connection tracking + self._clients = set() + self._message_count = 0 + self._last_log_time = 0 + + def set_webcam_frame(self, jpeg_bytes: bytes): + """Called by external code (e.g., ThreadedWebcam) to provide a new JPEG frame.""" + with self._webcam_lock: + self._webcam_frame = jpeg_bytes + # Signal the async send loop + try: + self._webcam_event.set() + except RuntimeError: + pass # Event loop not running yet + + async def _handle_client(self, websocket): + """Handle a single WebSocket client connection.""" + remote = websocket.remote_address + logger.info(f"Client connected: {remote}") + self._clients.add(websocket) + + # Send initial config + await websocket.send(json.dumps({ + "type": "config", + "version": "1.0", + "body_tracking": True, + })) + + # Start webcam sender task for this client + webcam_task = asyncio.create_task(self._send_webcam_loop(websocket)) + + try: + async for message in websocket: + if isinstance(message, str): + self._handle_tracking_message(message) + # Binary messages from client are ignored + except websockets.exceptions.ConnectionClosed: + logger.info(f"Client disconnected: {remote}") + finally: + self._clients.discard(websocket) + webcam_task.cancel() + try: + await webcam_task + except asyncio.CancelledError: + pass + + def _handle_tracking_message(self, message: str): + """Parse tracking JSON and write to shared memory.""" + try: + data = json.loads(message) + except json.JSONDecodeError: + logger.warning("Invalid JSON received") + return + + msg_type = data.get("type") + if msg_type != "tracking": + return + + self._message_count += 1 + now = time.time() + + # Rate-limited logging (every 5 seconds) + if now - self._last_log_time > 5.0: + logger.info(f"Tracking messages received: {self._message_count}") + self._last_log_time = now + + # Update timestamp + with self.last_data_time_shared.get_lock(): + self.last_data_time_shared.value = now + + # Head pose: 7 floats [x, y, z, qx, qy, qz, qw] → 4x4 column-major + head = data.get("head") + if head and len(head) == 7: + mat = _pose7_to_mat16(head) + with self.head_pose_shared.get_lock(): + self.head_pose_shared[:] = mat + + # Chest pose: 7 floats → 4x4 column-major + chest = data.get("chest") + if chest and len(chest) == 7: + mat = _pose7_to_mat16(chest) + with self.chest_pose_shared.get_lock(): + self.chest_pose_shared[:] = mat + + # Left wrist: 16 floats (already column-major 4x4) + left_wrist = data.get("left_wrist") + if left_wrist and len(left_wrist) == 16: + with self.left_arm_pose_shared.get_lock(): + self.left_arm_pose_shared[:] = left_wrist + + # Right wrist: 16 floats (already column-major 4x4) + right_wrist = data.get("right_wrist") + if right_wrist and len(right_wrist) == 16: + with self.right_arm_pose_shared.get_lock(): + self.right_arm_pose_shared[:] = right_wrist + + # Hand positions: 75 floats each (25 joints × 3) + left_hand_pos = data.get("left_hand_pos") + if left_hand_pos and len(left_hand_pos) == 75: + with self.left_hand_position_shared.get_lock(): + self.left_hand_position_shared[:] = left_hand_pos + + right_hand_pos = data.get("right_hand_pos") + if right_hand_pos and len(right_hand_pos) == 75: + with self.right_hand_position_shared.get_lock(): + self.right_hand_position_shared[:] = right_hand_pos + + # Hand rotations: 225 floats each (25 joints × 9) + left_hand_rot = data.get("left_hand_rot") + if left_hand_rot and len(left_hand_rot) == 225: + with self.left_hand_orientation_shared.get_lock(): + self.left_hand_orientation_shared[:] = left_hand_rot + + right_hand_rot = data.get("right_hand_rot") + if right_hand_rot and len(right_hand_rot) == 225: + with self.right_hand_orientation_shared.get_lock(): + self.right_hand_orientation_shared[:] = right_hand_rot + + async def _send_webcam_loop(self, websocket): + """Send webcam JPEG frames to a client at ~15 fps.""" + interval = 1.0 / 15.0 + while True: + await asyncio.sleep(interval) + with self._webcam_lock: + frame = self._webcam_frame + if frame is not None: + try: + await websocket.send(frame) + except websockets.exceptions.ConnectionClosed: + break + + async def serve(self): + """Start the WebSocket server.""" + logger.info(f"Starting teleop server on ws://{self.host}:{self.port}") + async with websockets.asyncio.server.serve( + self._handle_client, + self.host, + self.port, + max_size=2**20, # 1 MB max message size + ping_interval=20, + ping_timeout=20, + ) as server: + logger.info(f"Teleop server running on ws://{self.host}:{self.port}") + await asyncio.Future() # Run forever + + def start_in_thread(self): + """Start the server in a background thread. Returns the thread.""" + def _run(): + asyncio.run(self.serve()) + thread = threading.Thread(target=_run, daemon=True) + thread.start() + logger.info("Teleop server started in background thread") + return thread + + +def _pose7_to_mat16(pose7): + """Convert [x, y, z, qx, qy, qz, qw] to 16-float column-major 4x4 matrix. + + Compatible with NumPy reshape(4,4, order='F'). + """ + x, y, z, qx, qy, qz, qw = pose7 + + # Quaternion to rotation matrix + xx = qx * qx; yy = qy * qy; zz = qz * qz + xy = qx * qy; xz = qx * qz; yz = qy * qz + wx = qw * qx; wy = qw * qy; wz = qw * qz + + r00 = 1 - 2 * (yy + zz) + r01 = 2 * (xy - wz) + r02 = 2 * (xz + wy) + r10 = 2 * (xy + wz) + r11 = 1 - 2 * (xx + zz) + r12 = 2 * (yz - wx) + r20 = 2 * (xz - wy) + r21 = 2 * (yz + wx) + r22 = 1 - 2 * (xx + yy) + + # Column-major (Fortran order): column 0, column 1, column 2, column 3 + return [ + r00, r10, r20, 0.0, # Column 0 + r01, r11, r21, 0.0, # Column 1 + r02, r12, r22, 0.0, # Column 2 + x, y, z, 1.0, # Column 3 + ] + + +# --- Standalone mode --- + +def main(): + parser = argparse.ArgumentParser(description="Teleop WebSocket server for Quest 3 native app") + parser.add_argument("--host", default="0.0.0.0", help="Bind address (default: 0.0.0.0)") + parser.add_argument("--port", type=int, default=8765, help="WebSocket port (default: 8765)") + parser.add_argument("--verbose", action="store_true", help="Enable debug logging") + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.verbose else logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s" + ) + + server = TeleopServer(host=args.host, port=args.port) + asyncio.run(server.serve()) + + +if __name__ == "__main__": + main()