gr00t-WholeBodyControl/decoupled_wbc/control/policy/interpolation_policy.py


								import numbers

								import time as time_module

								from typing import Any, Dict, Optional, Union


								import gymnasium as gym

								import numpy as np

								import scipy.interpolate as si


								from decoupled_wbc.control.base.policy import Policy


								class InterpolationPolicy(Policy):

								    def __init__(

								        self,

								        init_time: float,

								        init_values: dict[str, np.ndarray],

								        max_change_rate: float,

								    ):

								        """

								        Args:

								            init_time: The time of recording the initial values.

								            init_values: The initial values of the features.

								                The keys are the names of the features, and the values

								                are the initial values of the features (1D array).

								            max_change_rate: The maximum change rate.

								        """

								        super().__init__()

								        self.last_action = init_values  # Vecs are 1D arrays

								        self.concat_order = sorted(init_values.keys())

								        self.concat_dims = []

								        for key in self.concat_order:

								            vec = np.array(init_values[key])

								            if vec.ndim == 2 and vec.shape[0] == 1:

								                vec = vec[0]

								                init_values[key] = vec

								            assert vec.ndim == 1, f"The shape of {key} should be (D,). Got {vec.shape}."

								            self.concat_dims.append(vec.shape[0])


								        self.init_values_concat = self._concat_vecs(init_values, 1)

								        self.max_change_rate = max_change_rate

								        self.reset(init_time)


								    def reset(self, init_time: float = time_module.monotonic()):

								        self.interp = PoseTrajectoryInterpolator(np.array([init_time]), self.init_values_concat)

								        self.last_waypoint_time = init_time

								        self.max_change_rate = self.max_change_rate


								    def _concat_vecs(self, values: dict[str, np.ndarray], length: int) -> np.ndarray:

								        """

								        Concatenate the vectors into a 2D array to be used for interpolation.

								        Args:

								            values: The values to concatenate.

								            length: The length of the concatenated vectors (time dimension).

								        Returns:

								            The concatenated vectors (T, D) arrays.

								        """

								        concat_vecs = []

								        for key in self.concat_order:

								            if key in values:

								                vec = np.array(values[key])

								                if vec.ndim == 1:

								                    # If the vector is 1D, tile it to the length of the time dimension

								                    vec = np.tile(vec, (length, 1))

								                assert vec.ndim == 2, f"The shape of {key} should be (T, D). Got {vec.shape}."

								                concat_vecs.append(vec)

								            else:

								                # If the vector is not in the values, use the last action

								                # Since the last action is 1D, we need to tile it to the length of the time dimension

								                concat_vecs.append(np.tile(self.last_action[key], (length, 1)))

								        return np.concatenate(concat_vecs, axis=1)  # Vecs are 2D (T, D) arrays


								    def _unconcat_vecs(self, concat_vec: np.ndarray) -> dict[str, np.ndarray]:

								        curr_idx = 0

								        action = {}

								        assert (

								            concat_vec.ndim == 1

								        ), f"The shape of the concatenated vectors should be (T, D). Got {concat_vec.shape}."

								        for key, dim in zip(self.concat_order, self.concat_dims):

								            action[key] = concat_vec[curr_idx : curr_idx + dim]

								            curr_idx += dim

								        return action  # Vecs are 1D arrays


								    def __call__(

								        self, observation: Dict[str, Any], goal: Dict[str, Any], time: float

								    ) -> Dict[str, np.ndarray]:

								        raise NotImplementedError(

								            "`InterpolationPolicy` accepts goal and provide action in two separate methods."

								        )


								    def set_goal(self, goal: Dict[str, Any]) -> None:

								        if "target_time" not in goal:

								            return

								        assert (

								            "interpolation_garbage_collection_time" in goal

								        ), "`interpolation_garbage_collection_time` is required."

								        target_time = goal.pop("target_time")

								        interpolation_garbage_collection_time = goal.pop("interpolation_garbage_collection_time")


								        if isinstance(target_time, list):

								            for key, vec in goal.items():

								                assert isinstance(vec, list)

								                assert len(vec) == len(target_time), (

								                    f"The length of {key} and `target_time` should be the same. "

								                    f"Got {len(vec)} and {len(target_time)}."

								                )

								        else:

								            target_time = [target_time]

								            for key in goal:

								                goal[key] = [goal[key]]


								        # Concatenate all vectors in goal

								        concat_vecs = self._concat_vecs(goal, len(target_time))

								        assert concat_vecs.shape[0] == len(target_time), (

								            f"The length of the concatenated goal and `target_time` should be the same. "

								            f"Got {concat_vecs.shape[0]} and {len(target_time)}."

								        )


								        for tt, vec in zip(target_time, concat_vecs):

								            if tt < interpolation_garbage_collection_time:

								                continue

								            self.interp = self.interp.schedule_waypoint(

								                pose=vec,

								                time=tt,

								                max_change_rate=self.max_change_rate,

								                interpolation_garbage_collection_time=interpolation_garbage_collection_time,

								                last_waypoint_time=self.last_waypoint_time,

								            )

								            self.last_waypoint_time = tt


								    def get_action(self, time: Optional[float] = None) -> dict[str, Any]:

								        """Get the next action based on the (current) monotonic time."""

								        if time is None:

								            time = time_module.monotonic()

								        concat_vec = self.interp(time)

								        self.last_action.update(self._unconcat_vecs(concat_vec))

								        return self.last_action


								    def observation_space(self) -> gym.spaces.Dict:

								        """Return the observation space."""

								        pass


								    def action_space(self) -> gym.spaces.Dict:

								        """Return the action space."""

								        pass


								    def close(self) -> None:

								        """Clean up resources."""

								        pass


								class PoseTrajectoryInterpolator:

								    def __init__(self, times: np.ndarray, poses: np.ndarray):

								        assert len(times) >= 1

								        assert len(poses) == len(times)


								        times = np.asarray(times)

								        poses = np.asarray(poses)


								        self.num_joint = len(poses[0])


								        if len(times) == 1:

								            # special treatment for single step interpolation

								            self.single_step = True

								            self._times = times

								            self._poses = poses

								        else:

								            self.single_step = False

								            assert np.all(times[1:] >= times[:-1])

								            self.pose_interp = si.interp1d(times, poses, axis=0, assume_sorted=True)


								    @property

								    def times(self) -> np.ndarray:

								        if self.single_step:

								            return self._times

								        else:

								            return self.pose_interp.x


								    @property

								    def poses(self) -> np.ndarray:

								        if self.single_step:

								            return self._poses

								        else:

								            return self.pose_interp.y


								    def trim(self, start_t: float, end_t: float) -> "PoseTrajectoryInterpolator":

								        assert start_t <= end_t

								        times = self.times

								        should_keep = (start_t < times) & (times < end_t)

								        keep_times = times[should_keep]

								        all_times = np.concatenate([[start_t], keep_times, [end_t]])

								        # remove duplicates, Slerp requires strictly increasing x

								        all_times = np.unique(all_times)

								        # interpolate

								        all_poses = self(all_times)

								        return PoseTrajectoryInterpolator(times=all_times, poses=all_poses)


								    def schedule_waypoint(

								        self,

								        pose,

								        time,

								        max_change_rate=np.inf,

								        interpolation_garbage_collection_time=None,

								        last_waypoint_time=None,

								    ) -> "PoseTrajectoryInterpolator":

								        if not isinstance(max_change_rate, np.ndarray):

								            max_change_rate = np.array([max_change_rate] * self.num_joint)


								        assert len(max_change_rate) == self.num_joint

								        assert np.max(max_change_rate) > 0


								        if last_waypoint_time is not None:

								            assert interpolation_garbage_collection_time is not None


								        # trim current interpolator to between interpolation_garbage_collection_time and last_waypoint_time

								        start_time = self.times[0]

								        end_time = self.times[-1]

								        assert start_time <= end_time

								        if interpolation_garbage_collection_time is not None:

								            if time <= interpolation_garbage_collection_time:

								                # if insert time is earlier than current time

								                # no effect should be done to the interpolator

								                return self

								            # now, interpolation_garbage_collection_time < time

								            start_time = max(interpolation_garbage_collection_time, start_time)


								            if last_waypoint_time is not None:

								                # if last_waypoint_time is earlier than start_time

								                # use start_time

								                if time <= last_waypoint_time:

								                    end_time = interpolation_garbage_collection_time

								                else:

								                    end_time = max(last_waypoint_time, interpolation_garbage_collection_time)

								            else:

								                end_time = interpolation_garbage_collection_time


								        end_time = min(end_time, time)

								        start_time = min(start_time, end_time)

								        # end time should be the latest of all times except time

								        # after this we can assume order (proven by zhenjia, due to the 2 min operations)


								        # Constraints:

								        # start_time <= end_time <= time (proven by zhenjia)

								        # interpolation_garbage_collection_time <= start_time (proven by zhenjia)

								        # interpolation_garbage_collection_time <= time (proven by zhenjia)


								        # time can't change

								        # last_waypoint_time can't change

								        # interpolation_garbage_collection_time can't change

								        assert start_time <= end_time

								        assert end_time <= time

								        if last_waypoint_time is not None:

								            if time <= last_waypoint_time:

								                assert end_time == interpolation_garbage_collection_time

								            else:

								                assert end_time == max(last_waypoint_time, interpolation_garbage_collection_time)


								        if interpolation_garbage_collection_time is not None:

								            assert interpolation_garbage_collection_time <= start_time

								            assert interpolation_garbage_collection_time <= time

								        trimmed_interp = self.trim(start_time, end_time)

								        # after this, all waypoints in trimmed_interp is within start_time and end_time

								        # and is earlier than time


								        # determine speed

								        duration = time - end_time

								        end_pose = trimmed_interp(end_time)

								        pose_min_duration = np.max(np.abs(end_pose - pose) / max_change_rate)

								        duration = max(duration, pose_min_duration)

								        assert duration >= 0

								        last_waypoint_time = end_time + duration


								        # insert new pose

								        times = np.append(trimmed_interp.times, [last_waypoint_time], axis=0)

								        poses = np.append(trimmed_interp.poses, [pose], axis=0)


								        # create new interpolator

								        final_interp = PoseTrajectoryInterpolator(times, poses)

								        return final_interp


								    def __call__(self, t: Union[numbers.Number, np.ndarray]) -> np.ndarray:

								        is_single = False

								        if isinstance(t, numbers.Number):

								            is_single = True

								            t = np.array([t])


								        pose = np.zeros((len(t), self.num_joint))

								        if self.single_step:

								            pose[:] = self._poses[0]

								        else:

								            start_time = self.times[0]

								            end_time = self.times[-1]

								            t = np.clip(t, start_time, end_time)

								            pose = self.pose_interp(t)


								        if is_single:

								            pose = pose[0]

								        return pose