"""
A convenience script to playback random demonstrations using the decoupled_wbc controller from
a set of demonstrations stored in a hdf5 file.

Arguments:
    --dataset (str): Path to demonstrations
    --use-actions (optional): If this flag is provided, the actions are played back
        through the MuJoCo simulator, instead of loading the simulator states
        one by one.
    --use-wbc-goals (optional): If set, will use the stored WBC goals to control the robot,
        otherwise will use the actions directly. Only relevant if --use-actions is set.
    --use-teleop-cmd (optional): If set, will use teleop IK directly with WBC timing
        for action generation. Only relevant if --use-actions is set.
    --visualize-gripper (optional): If set, will visualize the gripper site
    --save-video (optional): If set, will save video of the playback using offscreen rendering
    --video-path (optional): Path to save the output video. If not specified, will use the nearest
        folder to dataset and save as playback_video.mp4
    --num-episodes (optional): Number of episodes to playback/record (if None, plays random episodes)

Example:
    $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/
        --use-actions --use-wbc-goals

    $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/
        --use-actions --use-teleop-cmd

    # Record video of the first 5 episodes using WBC goals
    $ python decoupled_wbc/control/main/teleop/playback_sync_sim_data.py --dataset output/robocasa_datasets/
        --use-actions --use-wbc-goals --save-video --num-episodes 5
"""

import json
import os
from pathlib import Path
import time
from typing import Optional

import cv2
import numpy as np
import rclpy
from robosuite.environments.robot_env import RobotEnv
from tqdm import tqdm
import tyro

from decoupled_wbc.control.main.teleop.configs.configs import SyncSimPlaybackConfig
from decoupled_wbc.control.robot_model.instantiation import get_robot_type_and_model
from decoupled_wbc.control.utils.sync_sim_utils import (
    generate_frame,
    get_data_exporter,
    get_env,
    get_policies,
)
from decoupled_wbc.data.constants import RS_VIEW_CAMERA_HEIGHT, RS_VIEW_CAMERA_WIDTH
from decoupled_wbc.data.exporter import TypedLeRobotDataset

CONTROL_NODE_NAME = "playback_node"
GREEN_BOLD = "\033[1;32m"
RED_BOLD = "\033[1;31m"
RESET = "\033[0m"


def load_lerobot_dataset(root_path, max_episodes=None):
    task_name = None
    episodes = []
    start_index = 0
    with open(Path(root_path) / "meta/episodes.jsonl", "r") as f:
        for line in f:
            episode = json.loads(line)
            episode["start_index"] = start_index
            start_index += episode["length"]
            assert (
                task_name is None or task_name == episode["tasks"][0]
            ), "All episodes should have the same task name"
            task_name = episode["tasks"][0]
            episodes.append(episode)

    dataset = TypedLeRobotDataset(
        repo_id="tmp/test",
        root=root_path,
        load_video=False,
    )

    script_config = dataset.meta.info["script_config"]

    assert len(dataset) == start_index, "Dataset length does not match expected length"

    # Limit episodes if specified
    if max_episodes is not None:
        episodes = episodes[:max_episodes]
        print(
            f"Loading only first {len(episodes)} episodes (limited by max_episodes={max_episodes})"
        )

    f = {}
    seeds = []
    for ep in tqdm(range(len(episodes))):
        seed = None
        f[f"data/demo_{ep + 1}/states"] = []
        f[f"data/demo_{ep + 1}/actions"] = []
        f[f"data/demo_{ep + 1}/teleop_cmd"] = []
        f[f"data/demo_{ep + 1}/wbc_goal"] = []
        start_index = episodes[ep]["start_index"]
        end_index = start_index + episodes[ep]["length"]
        for i in tqdm(range(start_index, end_index)):
            frame = dataset[i]
            # load the seed
            assert (
                seed is None or seed == np.array(frame["observation.sim.seed"]).item()
            ), "All observations in an episode should have the same seed"
            seed = np.array(frame["observation.sim.seed"]).item()
            # load the state
            mujoco_state_len = frame["observation.sim.mujoco_state_len"]
            mujoco_state = frame["observation.sim.mujoco_state"]
            f[f"data/demo_{ep + 1}/states"].append(np.array(mujoco_state[:mujoco_state_len]))
            # load the action
            action = frame["action"]
            f[f"data/demo_{ep + 1}/actions"].append(np.array(action))

            # load the teleop command
            teleop_cmd = {
                "left_wrist": np.array(frame["observation.sim.left_wrist"].reshape(4, 4)),
                "right_wrist": np.array(frame["observation.sim.right_wrist"].reshape(4, 4)),
                "left_fingers": {
                    "position": np.array(frame["observation.sim.left_fingers"].reshape(25, 4, 4)),
                },
                "right_fingers": {
                    "position": np.array(frame["observation.sim.right_fingers"].reshape(25, 4, 4)),
                },
                "target_upper_body_pose": np.array(frame["observation.sim.target_upper_body_pose"]),
                "base_height_command": np.array(frame["teleop.base_height_command"]),
                "navigate_cmd": np.array(frame["teleop.navigate_command"]),
            }
            f[f"data/demo_{ep + 1}/teleop_cmd"].append(teleop_cmd)
            # load the WBC goal
            wbc_goal = {
                "wrist_pose": np.array(frame["action.eef"]),
                "target_upper_body_pose": np.array(frame["observation.sim.target_upper_body_pose"]),
                "navigate_cmd": np.array(frame["teleop.navigate_command"]),
                "base_height_command": np.array(frame["teleop.base_height_command"]),
            }
            f[f"data/demo_{ep + 1}/wbc_goal"].append(wbc_goal)

        seeds.append(seed)

    return seeds, f, script_config


def validate_state(recorded_state, playback_state, ep, step, tolerance=1e-5):
    """Validate that playback state matches recorded state within tolerance."""
    if not np.allclose(recorded_state, playback_state, atol=tolerance):
        err = np.linalg.norm(recorded_state - playback_state)
        print(f"[warning] state diverged by {err:.12f} for ep {ep} at step {step}")
        return False
    return True


def generate_and_save_frame(
    config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter
):
    """Generate and save a frame to LeRobot dataset if enabled."""
    if config.save_lerobot:
        max_mujoco_state_len, mujoco_state_len, mujoco_state = sync_env.get_mujoco_state_info()
        frame = generate_frame(
            obs,
            wbc_action,
            seed,
            mujoco_state,
            mujoco_state_len,
            max_mujoco_state_len,
            teleop_cmd,
            wbc_goal,
            config.save_img_obs,
        )
        gr00t_exporter.add_frame(frame)


def playback_wbc_goals(
    sync_env,
    wbc_policy,
    wbc_goals,
    teleop_cmds,
    states,
    env,
    onscreen,
    config,
    video_writer,
    ep,
    seed,
    gr00t_exporter,
    end_steps,
):
    """Playback using WBC goals to control the robot."""
    ret = True
    num_wbc_goals = len(wbc_goals) if end_steps == -1 else min(end_steps, len(wbc_goals))

    for jj in range(num_wbc_goals):
        wbc_goal = wbc_goals[jj]
        obs = sync_env.observe()
        wbc_policy.set_observation(obs)
        wbc_policy.set_goal(wbc_goal)
        wbc_action = wbc_policy.get_action()
        sync_env.queue_action(wbc_action)

        # Save frame if needed
        if config.save_lerobot:
            teleop_cmd = teleop_cmds[jj]
            generate_and_save_frame(
                config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter
            )

        capture_or_render_frame(env, onscreen, config, video_writer)

        if jj < len(states) - 1:
            state_playback = env.sim.get_state().flatten()
            if not validate_state(states[jj + 1], state_playback, ep, jj):
                ret = False

    return ret


def playback_teleop_cmd(
    sync_env,
    wbc_policy,
    teleop_policy,
    wbc_goals,
    teleop_cmds,
    states,
    env,
    onscreen,
    config,
    video_writer,
    ep,
    seed,
    gr00t_exporter,
    end_steps,
):
    """Playback using teleop commands to control the robot."""
    ret = True
    num_steps = len(wbc_goals) if end_steps == -1 else min(end_steps, len(wbc_goals))

    for jj in range(num_steps):
        wbc_goal = wbc_goals[jj]
        teleop_cmd = teleop_cmds[jj]

        # Set IK goal from teleop command
        ik_data = {
            "body_data": {
                teleop_policy.retargeting_ik.body.supplemental_info.hand_frame_names[
                    "left"
                ]: teleop_cmd["left_wrist"],
                teleop_policy.retargeting_ik.body.supplemental_info.hand_frame_names[
                    "right"
                ]: teleop_cmd["right_wrist"],
            },
            "left_hand_data": teleop_cmd["left_fingers"],
            "right_hand_data": teleop_cmd["right_fingers"],
        }
        teleop_policy.retargeting_ik.set_goal(ik_data)

        # Store original and get new upper body pose
        target_upper_body_pose = wbc_goal["target_upper_body_pose"].copy()
        wbc_goal["target_upper_body_pose"] = teleop_policy.retargeting_ik.get_action()

        # Execute WBC policy
        obs = sync_env.observe()
        wbc_policy.set_observation(obs)
        wbc_policy.set_goal(wbc_goal)
        wbc_action = wbc_policy.get_action()
        sync_env.queue_action(wbc_action)

        # Save frame if needed
        generate_and_save_frame(
            config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter
        )

        # Render or capture frame
        capture_or_render_frame(env, onscreen, config, video_writer)

        # Validate states
        if jj < len(states) - 1:
            if not np.allclose(
                target_upper_body_pose, wbc_goal["target_upper_body_pose"], atol=1e-5
            ):
                err = np.linalg.norm(target_upper_body_pose - wbc_goal["target_upper_body_pose"])
                print(
                    f"[warning] target_upper_body_pose diverged by {err:.12f} for ep {ep} at step {jj}"
                )
                ret = False

            state_playback = env.sim.get_state().flatten()
            if not validate_state(states[jj + 1], state_playback, ep, jj):
                ret = False

    return ret


def playback_actions(
    sync_env,
    actions,
    teleop_cmds,
    wbc_goals,
    states,
    env,
    onscreen,
    config,
    video_writer,
    ep,
    seed,
    gr00t_exporter,
    end_steps,
):
    """Playback using actions directly."""
    ret = True
    num_actions = len(actions) if end_steps == -1 else min(end_steps, len(actions))

    for j in range(num_actions):
        sync_env.queue_action({"q": actions[j]})

        # Save frame if needed
        if config.save_lerobot:
            obs = sync_env.observe()
            teleop_cmd = teleop_cmds[j]
            wbc_goal = wbc_goals[j]
            wbc_action = {"q": actions[j]}
            generate_and_save_frame(
                config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter
            )

        capture_or_render_frame(env, onscreen, config, video_writer)

        if j < len(states) - 1:
            state_playback = env.sim.get_state().flatten()
            if not validate_state(states[j + 1], state_playback, ep, j):
                ret = False

    return ret


def playback_states(
    sync_env,
    states,
    actions,
    teleop_cmds,
    wbc_goals,
    env,
    onscreen,
    config,
    video_writer,
    seed,
    gr00t_exporter,
    end_steps,
    ep,
):
    """Playback by forcing mujoco states directly."""
    ret = True
    num_states = len(states) if end_steps == -1 else min(end_steps, len(states))

    for i in range(num_states):
        sync_env.reset_to({"states": states[i]})
        sync_env.render()

        # Validate that the state was set correctly
        if i < len(states):
            state_playback = env.sim.get_state().flatten()
            if not validate_state(states[i], state_playback, ep, i):
                ret = False

        # Save frame if needed
        if config.save_lerobot:
            obs = sync_env.observe()
            teleop_cmd = teleop_cmds[i]
            wbc_goal = wbc_goals[i]
            wbc_action = {"q": actions[i]}
            generate_and_save_frame(
                config, sync_env, obs, wbc_action, seed, teleop_cmd, wbc_goal, gr00t_exporter
            )

        capture_or_render_frame(env, onscreen, config, video_writer)

    return ret


def main(config: SyncSimPlaybackConfig):
    ret = True
    start_time = time.time()

    np.set_printoptions(precision=5, suppress=True, linewidth=120)

    assert config.dataset is not None, "Folder must be specified for playback"

    seeds, f, script_config = load_lerobot_dataset(config.dataset)

    config.update(
        script_config,
        allowed_keys=[
            "wbc_version",
            "wbc_model_path",
            "wbc_policy_class",
            "control_frequency",
            "enable_waist",
            "with_hands",
            "env_name",
            "robot",
            "task_name",
            "teleop_frequency",
            "data_collection_frequency",
            "enable_gravity_compensation",
            "gravity_compensation_joints",
        ],
    )
    config.validate_args()

    robot_type, robot_model = get_robot_type_and_model(config.robot, config.enable_waist)

    # Setup rendering
    if config.save_video or config.save_img_obs:
        onscreen = False
        offscreen = True
    else:
        onscreen = True
        offscreen = False

    # Set default video path if not specified
    if config.save_video and config.video_path is None:
        if os.path.isfile(config.dataset):
            video_folder = Path(config.dataset).parent
        else:
            video_folder = Path(config.dataset)
        video_folder.mkdir(parents=True, exist_ok=True)
        config.video_path = str(video_folder / "playback_video.mp4")
        print(f"Video recording enabled. Output: {config.video_path}")

    sync_env = get_env(config, onscreen=onscreen, offscreen=offscreen)

    gr00t_exporter = None
    if config.save_lerobot:
        obs = sync_env.observe()
        gr00t_exporter = get_data_exporter(config, obs, robot_model)

    # Initialize policies
    wbc_policy, teleop_policy = get_policies(
        config, robot_type, robot_model, activate_keyboard_listener=False
    )

    # List of all demonstrations episodes
    demos = [f"demo_{i + 1}" for i in range(len(seeds))]
    print(f"Loaded and will playback {len(demos)} episodes")
    env = sync_env.base_env

    # Setup video writer
    video_writer = None
    fourcc = None
    if config.save_video:
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        video_writer = cv2.VideoWriter(
            config.video_path, fourcc, 20, (RS_VIEW_CAMERA_WIDTH, RS_VIEW_CAMERA_HEIGHT)
        )

    print("Loaded {} episodes from {}".format(len(demos), config.dataset))
    print("seeds:", seeds)
    print("demos:", demos, "\n\n")

    # Handle episode selection - either limited number or infinite random
    max_episodes = len(demos)
    episode_count = 0
    while True:
        if episode_count >= max_episodes:
            break
        ep = demos[episode_count]
        print(f"Playing back episode: {ep}")
        episode_count += 1

        # read the model xml, using the metadata stored in the attribute for this episode
        seed = seeds[int(ep.split("_")[-1]) - 1]
        sync_env.reset(seed=seed)

        # load the actions and states
        states = f["data/{}/states".format(ep)]
        actions = f["data/{}/actions".format(ep)]
        teleop_cmds = f["data/{}/teleop_cmd".format(ep)]
        wbc_goals = f["data/{}/wbc_goal".format(ep)]

        # reset the policies
        wbc_policy, teleop_policy, _ = get_policies(
            config, robot_type, robot_model, activate_keyboard_listener=False
        )
        end_steps = 20 if config.ci_test else -1

        if config.use_actions:
            # load the initial state
            sync_env.reset_to({"states": states[0]})
            # load the actions and play them back open-loop
            if config.use_wbc_goals:
                # use the wbc_goals to control the robot
                episode_ret = playback_wbc_goals(
                    sync_env,
                    wbc_policy,
                    wbc_goals,
                    teleop_cmds,
                    states,
                    env,
                    onscreen,
                    config,
                    video_writer,
                    ep,
                    seed,
                    gr00t_exporter,
                    end_steps,
                )
                ret = ret and episode_ret
            elif config.use_teleop_cmd:
                # use the teleop commands to control the robot
                episode_ret = playback_teleop_cmd(
                    sync_env,
                    wbc_policy,
                    teleop_policy,
                    wbc_goals,
                    teleop_cmds,
                    states,
                    env,
                    onscreen,
                    config,
                    video_writer,
                    ep,
                    seed,
                    gr00t_exporter,
                    end_steps,
                )
                ret = ret and episode_ret
            else:
                episode_ret = playback_actions(
                    sync_env,
                    actions,
                    teleop_cmds,
                    wbc_goals,
                    states,
                    env,
                    onscreen,
                    config,
                    video_writer,
                    ep,
                    seed,
                    gr00t_exporter,
                    end_steps,
                )
                ret = ret and episode_ret
        else:
            # force the sequence of internal mujoco states one by one
            episode_ret = playback_states(
                sync_env,
                states,
                actions,
                teleop_cmds,
                wbc_goals,
                env,
                onscreen,
                config,
                video_writer,
                seed,
                gr00t_exporter,
                end_steps,
                ep,
            )
            ret = ret and episode_ret

        if config.save_lerobot:
            gr00t_exporter.save_episode()

        print(f"Episode {ep} playback finished.\n\n")

    # close the env
    sync_env.close()

    # Cleanup
    if video_writer is not None:
        video_writer.release()
        print(f"Video saved to: {config.video_path}")

    end_time = time.time()
    elapsed_time = end_time - start_time

    if config.save_lerobot:
        print(f"LeRobot dataset saved to: {gr00t_exporter.root}")

    print(
        f"{GREEN_BOLD}Playback with WBC version: {config.wbc_version}, {config.wbc_model_path}, "
        f"{config.wbc_policy_class}, use_actions: {config.use_actions}, use_wbc_goals: {config.use_wbc_goals}, "
        f"use_teleop_cmd: {config.use_teleop_cmd}{RESET}"
    )
    if ret:
        print(f"{GREEN_BOLD}Playback completed successfully in {elapsed_time:.2f} seconds!{RESET}")
    else:
        print(f"{RED_BOLD}Playback encountered an error in {elapsed_time:.2f} seconds!{RESET}")

    return ret


def capture_or_render_frame(
    env: RobotEnv,
    onscreen: bool,
    config: SyncSimPlaybackConfig,
    video_writer: Optional[cv2.VideoWriter],
):
    """Capture frame for video recording if enabled, or render the environment."""
    if config.save_video:
        if hasattr(env, "sim") and hasattr(env.sim, "render"):
            img = env.sim.render(
                width=RS_VIEW_CAMERA_WIDTH,
                height=RS_VIEW_CAMERA_HEIGHT,
                camera_name=env.render_camera[0],
            )
            img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            img_bgr = np.flipud(img_bgr)
            video_writer.write(img_bgr)
    elif onscreen:
        env.render()


if __name__ == "__main__":
    config = tyro.cli(SyncSimPlaybackConfig)

    rclpy.init(args=None)
    node = rclpy.create_node("playback_decoupled_wbc_control")

    main(config)

    rclpy.shutdown()