You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
454 lines
15 KiB
454 lines
15 KiB
#!/bin/bash
|
|
|
|
# Docker run script for gr00t_wbc with branch-based container isolation
|
|
#
|
|
# Usage:
|
|
# ./docker/run_docker.sh [OPTIONS]
|
|
#
|
|
# Options:
|
|
# --build Build Docker image
|
|
# --clean Clean containers
|
|
# --deploy Run in deploy mode
|
|
# --install Pull prebuilt Docker image
|
|
# --push Push built image to Docker Hub
|
|
# --branch Use branch-specific container names
|
|
#
|
|
# Branch-based Container Isolation (when --branch flag is used):
|
|
# - Each git branch gets its own isolated containers
|
|
# - Container names include branch identifier (e.g., gr00t_wbc-deploy-user-main)
|
|
# - Works with git worktrees, separate clones, or nested repositories
|
|
# - Clean and build operations only affect the current branch
|
|
|
|
# Exit on error
|
|
set -e
|
|
|
|
# Default values
|
|
BUILD=false
|
|
CLEAN=false
|
|
DEPLOY=false
|
|
INSTALL=false
|
|
# Flag to push the built Docker image to Docker Hub
|
|
# This should be used when someone updates the Docker image dependencies
|
|
# because this image is used for CI/CD pipelines
|
|
# When true, the image will be tagged and pushed to docker.io/nvgear/gr00t_wbc:latest (lowercased in practice)
|
|
DOCKER_HUB_PUSH=false
|
|
# Flag to build the docker with root user
|
|
# This could cause some of your local files to be owned by root
|
|
# If you get error like "PermissionError: [Errno 13] Permission denied:"
|
|
# You can run `sudo chown -R $USER:$USER .` in local machine to fix it
|
|
ROOT=false
|
|
BRANCH_MODE=false
|
|
EXTRA_ARGS=()
|
|
PROJECT_NAME="gr00t_wbc"
|
|
PROJECT_SLUG=$(echo "$PROJECT_NAME" | tr '[:upper:]' '[:lower:]')
|
|
REMOTE_IMAGE="nvgear/${PROJECT_SLUG}:latest"
|
|
|
|
# Parse command line arguments
|
|
while [[ $# -gt 0 ]]; do
|
|
case $1 in
|
|
--build)
|
|
BUILD=true
|
|
shift
|
|
;;
|
|
--clean)
|
|
CLEAN=true
|
|
shift
|
|
;;
|
|
--deploy)
|
|
DEPLOY=true
|
|
shift
|
|
;;
|
|
--install)
|
|
INSTALL=true
|
|
shift
|
|
;;
|
|
--push)
|
|
DOCKER_HUB_PUSH=true
|
|
shift
|
|
;;
|
|
--root)
|
|
ROOT=true
|
|
shift
|
|
;;
|
|
--branch)
|
|
BRANCH_MODE=true
|
|
shift
|
|
;;
|
|
*)
|
|
# Collect all unknown arguments as extra args for the deployment script
|
|
EXTRA_ARGS+=("$1")
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ "$INSTALL" = true ] && [ "$BUILD" = true ]; then
|
|
echo "Cannot use --install and --build together. Choose one."
|
|
exit 1
|
|
fi
|
|
|
|
|
|
# Function to get branch name for container naming
|
|
function get_branch_id {
|
|
# Check if we're in a git repository
|
|
if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
|
|
# Get current branch name (returns "HEAD" in detached state)
|
|
local branch_name=$(git rev-parse --abbrev-ref HEAD)
|
|
# Replace forward slashes with dashes for valid container names
|
|
echo "${branch_name//\//-}"
|
|
else
|
|
# Default: no branch identifier (not in git repo)
|
|
echo ""
|
|
fi
|
|
}
|
|
|
|
# Architecture detection helpers
|
|
is_arm64() { [ "$(dpkg --print-architecture)" = "arm64" ]; }
|
|
is_amd64() { [ "$(dpkg --print-architecture)" = "amd64" ]; }
|
|
|
|
# Get current user's username and UID
|
|
if [ "$ROOT" = true ]; then
|
|
USERNAME=root
|
|
USERID=0
|
|
DOCKER_HOME_DIR=/root
|
|
CACHE_FROM=${PROJECT_SLUG}-deploy-cache-root
|
|
else
|
|
USERNAME=$(whoami)
|
|
USERID=$(id -u)
|
|
DOCKER_HOME_DIR=/home/${USERNAME}
|
|
CACHE_FROM=${PROJECT_SLUG}-deploy-cache
|
|
fi
|
|
# Get input group ID for device access
|
|
INPUT_GID=$(getent group input | cut -d: -f3)
|
|
|
|
# Get script directory for path calculations
|
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
|
|
|
# Function to get the actual project directory (worktree-aware)
|
|
function get_project_dir {
|
|
# For worktrees, use the actual worktree root path
|
|
if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
|
|
git rev-parse --show-toplevel
|
|
else
|
|
# Fallback to script-based detection
|
|
dirname "$SCRIPT_DIR"
|
|
fi
|
|
}
|
|
|
|
# Get branch identifier
|
|
BRANCH_ID=$(get_branch_id)
|
|
|
|
# Set project directory (needs to be after branch detection)
|
|
PROJECT_DIR="$(get_project_dir)"
|
|
|
|
# Function to generate container name with optional branch support
|
|
function get_container_name {
|
|
local container_type="$1"
|
|
if [[ -n "$BRANCH_ID" ]] && [[ "$BRANCH_MODE" = true ]]; then
|
|
echo "${PROJECT_SLUG}-${container_type}-${USERNAME}-${BRANCH_ID}"
|
|
else
|
|
echo "${PROJECT_SLUG}-${container_type}-${USERNAME}"
|
|
fi
|
|
}
|
|
|
|
# Set common variables used throughout the script
|
|
DEPLOY_CONTAINER=$(get_container_name "deploy")
|
|
BASH_CONTAINER=$(get_container_name "bash")
|
|
WORKTREE_NAME=$(basename "$PROJECT_DIR")
|
|
|
|
# Debug output for branch detection
|
|
if [[ -n "$BRANCH_ID" ]] && [[ "$BRANCH_MODE" = true ]]; then
|
|
echo "Branch mode enabled - using branch: $BRANCH_ID"
|
|
echo "Project directory: $PROJECT_DIR"
|
|
elif [[ -n "$BRANCH_ID" ]]; then
|
|
echo "Branch mode disabled - using default containers"
|
|
echo "Project directory: $PROJECT_DIR"
|
|
else
|
|
echo "Running outside git repository"
|
|
echo "Project directory: $PROJECT_DIR"
|
|
fi
|
|
|
|
# Get host's hostname and append -docker
|
|
HOSTNAME=$(hostname)-docker
|
|
|
|
function clean_container {
|
|
echo "Cleaning up Docker containers..."
|
|
|
|
# Stop containers
|
|
sudo docker stop $DEPLOY_CONTAINER 2>/dev/null || true
|
|
sudo docker stop $BASH_CONTAINER 2>/dev/null || true
|
|
# Remove containers
|
|
echo "Removing containers..."
|
|
sudo docker rm $DEPLOY_CONTAINER 2>/dev/null || true
|
|
sudo docker rm $BASH_CONTAINER 2>/dev/null || true
|
|
echo "Containers cleaned!"
|
|
}
|
|
|
|
|
|
# Function to install Docker Buildx if needed
|
|
function install_docker_buildx {
|
|
# Check if Docker Buildx is already installed
|
|
if sudo docker buildx version &> /dev/null; then
|
|
echo "Docker Buildx is already installed."
|
|
return 0
|
|
fi
|
|
|
|
echo "Installing Docker Buildx..."
|
|
|
|
# Create directories and detect architecture
|
|
mkdir -p ~/.docker/cli-plugins/ && sudo mkdir -p /root/.docker/cli-plugins/
|
|
ARCH=$(dpkg --print-architecture)
|
|
[[ "$ARCH" == "arm64" ]] && BUILDX_ARCH="linux-arm64" || BUILDX_ARCH="linux-amd64"
|
|
|
|
# Get version (with fallback)
|
|
BUILDX_VERSION=$(curl -s https://api.github.com/repos/docker/buildx/releases/latest | grep tag_name | cut -d '"' -f 4)
|
|
BUILDX_VERSION=${BUILDX_VERSION:-v0.13.1}
|
|
|
|
# Download and install for both user and root
|
|
curl -L "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.${BUILDX_ARCH}" -o ~/.docker/cli-plugins/docker-buildx
|
|
sudo cp ~/.docker/cli-plugins/docker-buildx /root/.docker/cli-plugins/docker-buildx
|
|
chmod +x ~/.docker/cli-plugins/docker-buildx && sudo chmod +x /root/.docker/cli-plugins/docker-buildx
|
|
|
|
# Create builder
|
|
sudo docker buildx create --use --name mybuilder || true
|
|
sudo docker buildx inspect --bootstrap
|
|
|
|
echo "Docker Buildx installation complete!"
|
|
}
|
|
|
|
# Function to install NVIDIA Container Toolkit if needed
|
|
function install_nvidia_toolkit {
|
|
# Check if NVIDIA Container Toolkit is already installed
|
|
if command -v nvidia-container-toolkit &> /dev/null; then
|
|
echo "NVIDIA Container Toolkit is already installed."
|
|
return 0
|
|
fi
|
|
|
|
echo "Installing NVIDIA Container Toolkit..."
|
|
|
|
# Add the package repositories
|
|
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
|
|
|
# Check if GPG key exists and remove it if it does
|
|
if [ -f "/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg" ]; then
|
|
echo "Removing existing NVIDIA GPG key..."
|
|
sudo rm /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
|
fi
|
|
|
|
# Add new GPG key
|
|
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
|
|
|
# Add repository
|
|
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
|
|
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
|
sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
|
|
|
|
# Install nvidia-container-toolkit and docker if needed
|
|
sudo apt-get update
|
|
sudo apt-get install -y nvidia-container-toolkit
|
|
|
|
# Install docker if not already installed
|
|
if ! command -v docker &> /dev/null; then
|
|
sudo apt-get install -y docker.io
|
|
fi
|
|
|
|
# Configure Docker to use the NVIDIA runtime
|
|
sudo nvidia-ctk runtime configure --runtime=docker
|
|
|
|
# Restart the Docker daemon
|
|
sudo systemctl restart docker
|
|
|
|
echo "NVIDIA Container Toolkit installation complete!"
|
|
}
|
|
|
|
|
|
# Function to build Docker image for current branch
|
|
function build_docker_image {
|
|
echo "Building Docker image: $DEPLOY_CONTAINER"
|
|
|
|
sudo docker buildx build \
|
|
--build-arg USERNAME=$USERNAME \
|
|
--build-arg USERID=$USERID \
|
|
--build-arg HOME_DIR=$DOCKER_HOME_DIR \
|
|
--build-arg WORKTREE_NAME=$WORKTREE_NAME \
|
|
--cache-from $CACHE_FROM \
|
|
-t $DEPLOY_CONTAINER \
|
|
-f docker/Dockerfile.deploy \
|
|
--load \
|
|
.
|
|
|
|
# Tag for persistent cache
|
|
# sudo docker tag $DEPLOY_CONTAINER $CACHE_FROM
|
|
echo "Docker image build complete!"
|
|
}
|
|
|
|
# Build function
|
|
function build_with_cleanup {
|
|
echo "Building Docker image..."
|
|
echo "Removing existing containers and images..."
|
|
clean_container
|
|
# Tag for persistent cache before deleting the image
|
|
sudo docker tag $DEPLOY_CONTAINER $CACHE_FROM 2>/dev/null || true
|
|
sudo docker rmi $DEPLOY_CONTAINER 2>/dev/null || true
|
|
echo "Images cleaned!"
|
|
|
|
install_docker_buildx
|
|
install_nvidia_toolkit
|
|
build_docker_image
|
|
}
|
|
|
|
function install_remote_image {
|
|
echo "Installing Docker image from remote registry: $REMOTE_IMAGE"
|
|
echo "Removing existing containers to ensure a clean install..."
|
|
clean_container
|
|
sudo docker pull "$REMOTE_IMAGE"
|
|
sudo docker tag "$REMOTE_IMAGE" "$DEPLOY_CONTAINER"
|
|
sudo docker tag "$REMOTE_IMAGE" "$CACHE_FROM" 2>/dev/null || true
|
|
echo "Docker image install complete!"
|
|
}
|
|
|
|
# Clean up if requested
|
|
if [ "$CLEAN" = true ]; then
|
|
clean_container
|
|
exit 0
|
|
fi
|
|
|
|
# Build if requested
|
|
if [ "$BUILD" = true ]; then
|
|
build_with_cleanup
|
|
fi
|
|
|
|
if [ "$INSTALL" = true ]; then
|
|
install_remote_image
|
|
fi
|
|
|
|
if [ "$DOCKER_HUB_PUSH" = true ]; then
|
|
echo "Pushing Docker image to Docker Hub: docker.io/nvgear/${PROJECT_SLUG}:latest"
|
|
sudo docker tag $DEPLOY_CONTAINER docker.io/nvgear/${PROJECT_SLUG}:latest
|
|
sudo docker push docker.io/nvgear/${PROJECT_SLUG}:latest
|
|
echo "Docker image pushed to Docker Hub!"
|
|
exit 0
|
|
fi
|
|
|
|
# Setup X11 display forwarding
|
|
setup_x11() {
|
|
# Set display if missing and X server available
|
|
if [ -z "$DISPLAY" ] && command -v xset >/dev/null 2>&1 && xset q >/dev/null 2>&1; then
|
|
export DISPLAY=:1
|
|
echo "No DISPLAY set, using :1"
|
|
fi
|
|
|
|
# Enable X11 forwarding if possible
|
|
if [ -n "$DISPLAY" ] && command -v xhost >/dev/null 2>&1 && xhost +local:docker 2>/dev/null; then
|
|
echo "X11 forwarding enabled"
|
|
return 0
|
|
else
|
|
echo "Headless environment - X11 disabled"
|
|
export DISPLAY=""
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
X11_ENABLED=false
|
|
setup_x11 && X11_ENABLED=true
|
|
|
|
# Mount entire /dev directory for dynamic device access (including hidraw for joycon)
|
|
# This allows JoyCon controllers to be detected even when connected after container launch
|
|
sudo chmod g+r+w /dev/input/*
|
|
|
|
# Detect GPU setup and set appropriate environment variables
|
|
echo "Detecting GPU setup..."
|
|
GPU_ENV_VARS=""
|
|
|
|
# Check if we have both integrated and discrete GPUs (hybrid/Optimus setup)
|
|
HAS_AMD_GPU=$(lspci | grep -i "vga\|3d\|display" | grep -i amd | wc -l)
|
|
HAS_INTEL_GPU=$(lspci | grep -i "vga\|3d\|display" | grep -i intel | wc -l)
|
|
HAS_NVIDIA_GPU=$(lspci | grep -i "vga\|3d\|display" | grep -i nvidia | wc -l)
|
|
|
|
if [[ "$HAS_INTEL_GPU" -gt 0 ]] || [[ "$HAS_AMD_GPU" -gt 0 ]] && [[ "$HAS_NVIDIA_GPU" -gt 0 ]]; then
|
|
echo "Detected hybrid GPU setup (Intel/AMD integrated + NVIDIA discrete)"
|
|
echo "Setting NVIDIA Optimus environment variables for proper rendering offload..."
|
|
GPU_ENV_VARS="-e __NV_PRIME_RENDER_OFFLOAD=1 \
|
|
-e __VK_LAYER_NV_optimus=NVIDIA_only"
|
|
else
|
|
GPU_ENV_VARS=""
|
|
fi
|
|
|
|
# Set GPU runtime based on architecture
|
|
if is_arm64; then
|
|
echo "Detected ARM64 architecture (Jetson Orin), using device access instead of nvidia runtime..."
|
|
GPU_RUNTIME_ARGS="--device /dev/nvidia0 --device /dev/nvidiactl --device /dev/nvidia-modeset --device /dev/nvidia-uvm --device /dev/nvidia-uvm-tools"
|
|
else
|
|
GPU_RUNTIME_ARGS="--gpus all --runtime=nvidia"
|
|
fi
|
|
|
|
# Common Docker run parameters
|
|
DOCKER_RUN_ARGS="--hostname $HOSTNAME \
|
|
--user $USERNAME \
|
|
--group-add $INPUT_GID \
|
|
$GPU_RUNTIME_ARGS \
|
|
--ipc=host \
|
|
--network=host \
|
|
--privileged \
|
|
--device=/dev \
|
|
$GPU_ENV_VARS \
|
|
-p 5678:5678 \
|
|
-e DISPLAY=$DISPLAY \
|
|
-e NVIDIA_VISIBLE_DEVICES=all \
|
|
-e NVIDIA_DRIVER_CAPABILITIES=graphics,compute,utility \
|
|
-e __GLX_VENDOR_LIBRARY_NAME=nvidia \
|
|
-e USERNAME=$USERNAME \
|
|
-e GR00T_WBC_DIR="$DOCKER_HOME_DIR/Projects/$WORKTREE_NAME" \
|
|
-v /dev/bus/usb:/dev/bus/usb \
|
|
-v /tmp/.X11-unix:/tmp/.X11-unix \
|
|
-v $HOME/.ssh:$DOCKER_HOME_DIR/.ssh \
|
|
-v $HOME/.gear:$DOCKER_HOME_DIR/.gear \
|
|
-v $HOME/.Xauthority:$DOCKER_HOME_DIR/.Xauthority \
|
|
-v $PROJECT_DIR:$DOCKER_HOME_DIR/Projects/$(basename "$PROJECT_DIR")
|
|
--device /dev/snd \
|
|
--group-add audio \
|
|
-e PULSE_SERVER=unix:/run/user/$(id -u)/pulse/native \
|
|
-v /run/user/$(id -u)/pulse/native:/run/user/$(id -u)/pulse/native \
|
|
-v $HOME/.config/pulse/cookie:/home/$USERNAME/.config/pulse/cookie"
|
|
|
|
# Check if RL mode first, then handle container logic
|
|
if [ "$DEPLOY" = true ]; then
|
|
# Deploy mode - use gr00t_wbc-deploy-${USERNAME} container
|
|
|
|
# Always clean up old processes and create a new container
|
|
# Kill all gr00t_wbc processes across containers to prevent message passing conflicts
|
|
"$SCRIPT_DIR/kill_gr00t_wbc_processors.sh"
|
|
echo "Creating new deploy container..."
|
|
|
|
# Clean up old processes and create a fresh deploy container
|
|
# Remove existing deploy container if it exists
|
|
if sudo docker ps -a --format '{{.Names}}' | grep -q "^$DEPLOY_CONTAINER$"; then
|
|
echo "Removing existing deploy container..."
|
|
sudo docker rm -f $DEPLOY_CONTAINER
|
|
fi
|
|
sudo docker run -it --rm $DOCKER_RUN_ARGS \
|
|
-w $DOCKER_HOME_DIR/Projects/$WORKTREE_NAME \
|
|
--name $DEPLOY_CONTAINER \
|
|
$DEPLOY_CONTAINER \
|
|
/bin/bash -ic 'exec "$0" "$@"' \
|
|
"${DOCKER_HOME_DIR}/Projects/${WORKTREE_NAME}/docker/entrypoint/deploy.sh" \
|
|
"${EXTRA_ARGS[@]}"
|
|
else
|
|
# Bash mode - use gr00t_wbc-bash-${USERNAME} container
|
|
if sudo docker ps -a --format '{{.Names}}' | grep -q "^$BASH_CONTAINER$"; then
|
|
echo "Bash container exists, starting it..."
|
|
sudo docker start $BASH_CONTAINER > /dev/null
|
|
sudo docker exec -it $BASH_CONTAINER /bin/bash
|
|
else
|
|
echo "Creating new bash container with auto-install gr00t_wbc..."
|
|
sudo docker run -it $DOCKER_RUN_ARGS \
|
|
-w $DOCKER_HOME_DIR/Projects/$WORKTREE_NAME \
|
|
--name $BASH_CONTAINER \
|
|
$DEPLOY_CONTAINER \
|
|
/bin/bash -ic 'exec "$0"' \
|
|
"${DOCKER_HOME_DIR}/Projects/${WORKTREE_NAME}/docker/entrypoint/bash.sh"
|
|
fi
|
|
fi
|
|
|
|
# Cleanup X11 permissions
|
|
$X11_ENABLED && xhost -local:docker 2>/dev/null
|