Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Part of solution to #795: add HumanReadableWrapper #796

Closed
wants to merge 60 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
5182ecf
first pass of dict obs functionality
NixGD Sep 13, 2023
61d816b
cleanup DictObs
NixGD Sep 13, 2023
c3331f6
add dict space to test_types.py, fix some problems
NixGD Sep 14, 2023
fc9838d
add dict-obs test for rollout
NixGD Sep 14, 2023
fb9498b
add bc.py test
NixGD Sep 14, 2023
e54c36c
cleanup
NixGD Sep 14, 2023
ee04383
small fixes
NixGD Sep 14, 2023
6e2218a
small fixes
NixGD Sep 14, 2023
68fe666
fix type error in interactive.py
NixGD Sep 14, 2023
9ad2aaf
fix introduced error in mce_irl.py
NixGD Sep 14, 2023
67341d5
fix minor ci complaint
NixGD Sep 14, 2023
c497b56
add basic dictobs tests
NixGD Sep 14, 2023
d3f79bf
change default bc policy for dict obs space
NixGD Sep 14, 2023
2de9e49
refine rollout.py typechecks, comments
NixGD Sep 14, 2023
c47cca6
check rollout produces dictobs of correct shape
NixGD Sep 14, 2023
276294b
cleanup types and dictobs helpers
NixGD Sep 14, 2023
071d2a7
clean useless lines
NixGD Sep 14, 2023
a2ccd7e
clean up print statements
NixGD Sep 14, 2023
93baa2d
fix typos
NixGD Sep 15, 2023
54f33af
assert matching keys in from_obs_list
NixGD Sep 15, 2023
c711abf
move maybe_wrap, clean rollout
NixGD Sep 15, 2023
58a0d70
change policy callable to take dict[str, np.ndarray] not dictobs
NixGD Sep 15, 2023
0f080d4
rollout info wrapper supports dictobs
NixGD Sep 15, 2023
c4d3e11
fix from_obs_list key consistency check
NixGD Sep 15, 2023
b93294a
xfail save/load tests with dictobs
NixGD Sep 15, 2023
3f17ff2
doc for dictobs wrapper
NixGD Sep 15, 2023
0212e0e
don't error on int observations
NixGD Sep 15, 2023
070ebf9
lint fixes
NixGD Sep 15, 2023
657e17e
cleanup bc test for dict obs
NixGD Sep 15, 2023
1f8c12a
cleanup bc.py unwrapping
NixGD Sep 15, 2023
bd70ecd
cleanup rollout.py
NixGD Sep 15, 2023
bec464c
cleanup dictobs interface
NixGD Sep 15, 2023
bef19e6
small cleanups
NixGD Sep 15, 2023
9aaf73f
coverage fixes, test fix
NixGD Sep 15, 2023
5d6aa77
adjust error types
NixGD Sep 15, 2023
86fbcf1
docstrings for type helpers
NixGD Sep 15, 2023
8d1e0d6
add dict obs space support for density
NixGD Sep 15, 2023
96978d5
fix typos
NixGD Sep 15, 2023
e95df9d
Adam suggestions from code review
NixGD Sep 16, 2023
161ec95
small changes for code review
NixGD Sep 16, 2023
90bdf57
fix docstring
NixGD Sep 16, 2023
6aa25ff
remove FloatReward
ZiyueWang25 Oct 2, 2023
bf48c76
Merge remote-tracking branch 'origin/master' into support-dict-obs-space
ZiyueWang25 Oct 2, 2023
4ce1b57
Fix test_bc
ZiyueWang25 Oct 2, 2023
de1b1c8
Turn off GPU finding to avoid using gpu device
ZiyueWang25 Oct 2, 2023
1a1a458
Check None to ensure __add__ can work
ZiyueWang25 Oct 2, 2023
f7866f4
fix docstring
ZiyueWang25 Oct 2, 2023
daa838d
bypass pytype and lint test
ZiyueWang25 Oct 2, 2023
803eab0
format with black
ZiyueWang25 Oct 2, 2023
0ac6f54
Test dict space in density algo
ZiyueWang25 Oct 2, 2023
be9798b
black format
ZiyueWang25 Oct 2, 2023
c7e6809
small fix
ZiyueWang25 Oct 2, 2023
82fb558
Add DictObs into test_wrappers
ZiyueWang25 Oct 3, 2023
03714cc
fix format
ZiyueWang25 Oct 3, 2023
187e881
minor fix
ZiyueWang25 Oct 3, 2023
ae96521
type and lint fix
ZiyueWang25 Oct 3, 2023
535a986
Add policy training test
ZiyueWang25 Oct 3, 2023
de027c4
suppress line too long lint check on a line
ZiyueWang25 Oct 3, 2023
be79cf5
acts to obs for clarity
ZiyueWang25 Oct 3, 2023
6e5c3e8
Add HumanReadableWrapper
ZiyueWang25 Oct 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 33 additions & 7 deletions src/imitation/algorithms/bc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
Any,
Callable,
Dict,
Iterable,
Iterator,
Mapping,
Expand All @@ -22,7 +23,7 @@
import numpy as np
import torch as th
import tqdm
from stable_baselines3.common import policies, utils, vec_env
from stable_baselines3.common import policies, torch_layers, utils, vec_env

from imitation.algorithms import base as algo_base
from imitation.data import rollout, types
Expand Down Expand Up @@ -99,7 +100,12 @@ class BehaviorCloningLossCalculator:
def __call__(
self,
policy: policies.ActorCriticPolicy,
obs: Union[th.Tensor, np.ndarray],
obs: Union[
types.AnyTensor,
types.DictObs,
Dict[str, np.ndarray],
Dict[str, th.Tensor],
],
acts: Union[th.Tensor, np.ndarray],
) -> BCTrainingMetrics:
"""Calculate the supervised learning loss used to train the behavioral clone.
Expand All @@ -113,9 +119,18 @@ def __call__(
A BCTrainingMetrics object with the loss and all the components it
consists of.
"""
obs = util.safe_to_tensor(obs)
tensor_obs = types.map_maybe_dict(
util.safe_to_tensor,
types.maybe_unwrap_dictobs(obs),
)
acts = util.safe_to_tensor(acts)
_, log_prob, entropy = policy.evaluate_actions(obs, acts)

# policy.evaluate_actions's type signatures are incorrect.
# See https://github.com/DLR-RM/stable-baselines3/issues/1679
(_, log_prob, entropy) = policy.evaluate_actions(
tensor_obs, # type: ignore[arg-type]
acts,
)
prob_true_act = th.exp(log_prob).mean()
log_prob = log_prob.mean()
entropy = entropy.mean() if entropy is not None else None
Expand Down Expand Up @@ -324,12 +339,18 @@ def __init__(
self.rng = rng

if policy is None:
extractor = (
torch_layers.CombinedExtractor
if isinstance(observation_space, gym.spaces.Dict)
else torch_layers.FlattenExtractor
)
policy = policy_base.FeedForward32Policy(
observation_space=observation_space,
action_space=action_space,
# Set lr_schedule to max value to force error if policy.optimizer
# is used by mistake (should use self.optimizer instead).
lr_schedule=lambda _: th.finfo(th.float32).max,
features_extractor_class=extractor,
)
self._policy = policy.to(utils.get_device(device))
# TODO(adam): make policy mandatory and delete observation/action space params?
Expand Down Expand Up @@ -464,9 +485,14 @@ def process_batch():
minibatch_size,
num_samples_so_far,
), batch in batches_with_stats:
obs = th.as_tensor(batch["obs"], device=self.policy.device).detach()
acts = th.as_tensor(batch["acts"], device=self.policy.device).detach()
training_metrics = self.loss_calculator(self.policy, obs, acts)
obs_tensor: Union[th.Tensor, Dict[str, th.Tensor]]
# unwraps the observation if it's a dictobs and converts arrays to tensors
obs_tensor = types.map_maybe_dict(
lambda x: util.safe_to_tensor(x, device=self.policy.device),
types.maybe_unwrap_dictobs(batch["obs"]),
)
acts = util.safe_to_tensor(batch["acts"], device=self.policy.device)
training_metrics = self.loss_calculator(self.policy, obs_tensor, acts)

# Renormalise the loss to be averaged over the whole
# batch size instead of the minibatch size.
Expand Down
2 changes: 1 addition & 1 deletion src/imitation/algorithms/dagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def create_trajectory_collector(self) -> InteractiveTrajectoryCollector:
beta = self.beta_schedule(self.round_num)
collector = InteractiveTrajectoryCollector(
venv=self.venv,
get_robot_acts=lambda acts: self.bc_trainer.policy.predict(acts)[0],
get_robot_acts=lambda obs: self.bc_trainer.policy.predict(obs)[0],
beta=beta,
save_dir=save_dir,
rng=self.rng,
Expand Down
106 changes: 51 additions & 55 deletions src/imitation/algorithms/density.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def __init__(

def _get_demo_from_batch(
self,
obs_b: np.ndarray,
obs_b: types.Observation,
act_b: np.ndarray,
next_obs_b: Optional[np.ndarray],
next_obs_b: Optional[types.Observation],
) -> Dict[Optional[int], List[np.ndarray]]:
if next_obs_b is None and self.density_type == DensityType.STATE_STATE_DENSITY:
raise ValueError(
Expand All @@ -145,11 +145,17 @@ def _get_demo_from_batch(
)

assert act_b.shape[1:] == self.venv.action_space.shape
assert obs_b.shape[1:] == self.venv.observation_space.shape

if isinstance(obs_b, types.DictObs):
exp_shape = {k: v.shape for k, v in self.venv.observation_space.items()} # type: ignore[attr-defined] # noqa: E501

obs_shape = {k: v.shape[1:] for k, v in obs_b.items()}
assert exp_shape == obs_shape, f"Expected {exp_shape}, got {obs_shape}"
else:
assert obs_b.shape[1:] == self.venv.observation_space.shape
assert len(act_b) == len(obs_b)
if next_obs_b is not None:
assert next_obs_b.shape[1:] == self.venv.observation_space.shape
assert len(next_obs_b) == len(obs_b)
assert next_obs_b.shape == obs_b.shape

if next_obs_b is not None:
next_obs_b_iterator: Iterable = next_obs_b
Expand Down Expand Up @@ -200,14 +206,17 @@ def set_demonstrations(self, demonstrations: base.AnyTransitions) -> None:
# analogous to cast above.
demonstrations = cast(Iterable[types.TransitionMapping], demonstrations)

def to_np_maybe_dictobs(x):
if isinstance(x, types.DictObs):
return x
else:
return util.safe_to_numpy(x, warn=True)

for batch in demonstrations:
transitions.update(
self._get_demo_from_batch(
util.safe_to_numpy(batch["obs"], warn=True),
util.safe_to_numpy(batch["acts"], warn=True),
util.safe_to_numpy(batch.get("next_obs"), warn=True),
),
)
obs = to_np_maybe_dictobs(batch["obs"])
acts = util.safe_to_numpy(batch["acts"], warn=True)
next_obs = to_np_maybe_dictobs(batch.get("next_obs"))
transitions.update(self._get_demo_from_batch(obs, acts, next_obs))
else:
raise TypeError(
f"Unsupported demonstration type {type(demonstrations)}",
Expand Down Expand Up @@ -253,65 +262,40 @@ def _fit_density(self, transitions: np.ndarray) -> neighbors.KernelDensity:

def _preprocess_transition(
self,
obs: np.ndarray,
obs: types.Observation,
act: np.ndarray,
next_obs: Optional[np.ndarray],
next_obs: Optional[types.Observation],
) -> np.ndarray:
"""Compute flattened transition on subset specified by `self.density_type`."""
flattened_obs = space_utils.flatten(
self.venv.observation_space,
types.maybe_unwrap_dictobs(obs),
)
flattened_obs = _check_data_is_np_array(flattened_obs, "observation")
if self.density_type == DensityType.STATE_DENSITY:
flat_observations = space_utils.flatten(self.venv.observation_space, obs)
if not isinstance(flat_observations, np.ndarray):
raise ValueError(
"The density estimator only supports spaces that "
"flatten to a numpy array but the observation space "
f"flattens to {type(flat_observations)}",
)

return flat_observations
return flattened_obs
elif self.density_type == DensityType.STATE_ACTION_DENSITY:
flat_observation = space_utils.flatten(self.venv.observation_space, obs)
flat_action = space_utils.flatten(self.venv.action_space, act)

if not isinstance(flat_observation, np.ndarray):
raise ValueError(
"The density estimator only supports spaces that "
"flatten to a numpy array but the observation space "
f"flattens to {type(flat_observation)}",
)
if not isinstance(flat_action, np.ndarray):
raise ValueError(
"The density estimator only supports spaces that "
"flatten to a numpy array but the action space "
f"flattens to {type(flat_action)}",
)

return np.concatenate([flat_observation, flat_action])
flattened_action = space_utils.flatten(self.venv.action_space, act)
flattened_action = _check_data_is_np_array(flattened_action, "action")
return np.concatenate([flattened_obs, flattened_action])
elif self.density_type == DensityType.STATE_STATE_DENSITY:
assert next_obs is not None
flat_observation = space_utils.flatten(self.venv.observation_space, obs)
flat_next_observation = space_utils.flatten(
flat_next_obs = space_utils.flatten(
self.venv.observation_space,
next_obs,
types.maybe_unwrap_dictobs(next_obs),
)
flat_next_obs = _check_data_is_np_array(flat_next_obs, "observation")
assert type(flattened_obs) is type(flat_next_obs)

if not isinstance(flat_observation, np.ndarray):
raise ValueError(
"The density estimator only supports spaces that "
"flatten to a numpy array but the observation space "
f"flattens to {type(flat_observation)}",
)

assert type(flat_observation) is type(flat_next_observation)

return np.concatenate([flat_observation, flat_next_observation])
return np.concatenate([flattened_obs, flat_next_obs])
else:
raise ValueError(f"Unknown density type {self.density_type}")

def __call__(
self,
state: np.ndarray,
state: types.Observation,
action: np.ndarray,
next_state: np.ndarray,
next_state: types.Observation,
done: np.ndarray,
steps: Optional[np.ndarray] = None,
) -> np.ndarray:
Expand Down Expand Up @@ -347,6 +331,8 @@ def __call__(

rew_list = []
assert len(state) == len(action) and len(state) == len(next_state)
state = types.maybe_wrap_in_dictobs(state)
next_state = types.maybe_wrap_in_dictobs(next_state)
for idx, (obs, act, next_obs) in enumerate(zip(state, action, next_state)):
flat_trans = self._preprocess_transition(obs, act, next_obs)
assert self._scaler is not None
Expand Down Expand Up @@ -424,3 +410,13 @@ def policy(self) -> base_class.BasePolicy:
assert self.rl_algo is not None
assert self.rl_algo.policy is not None
return self.rl_algo.policy


def _check_data_is_np_array(data: space_utils.FlatType, name: str) -> np.ndarray:
"""Raises error if the flattened data is not a numpy array."""
assert isinstance(data, np.ndarray), (
"The density estimator only supports spaces that "
f"flatten to a numpy array but the {name} space "
f"flattens to {type(data)}",
)
return data
36 changes: 28 additions & 8 deletions src/imitation/algorithms/mce_irl.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,18 @@
"""
import collections
import warnings
from typing import Any, Iterable, List, Mapping, NoReturn, Optional, Tuple, Type, Union
from typing import (
Any,
Dict,
Iterable,
List,
Mapping,
NoReturn,
Optional,
Tuple,
Type,
Union,
)

import gymnasium as gym
import numpy as np
Expand Down Expand Up @@ -347,7 +358,7 @@ def _set_demo_from_trajectories(self, trajs: Iterable[types.Trajectory]) -> None
num_demos = 0
for traj in trajs:
cum_discount = 1.0
for obs in traj.obs:
for obs in types.assert_not_dictobs(traj.obs):
self.demo_state_om[obs] += cum_discount
cum_discount *= self.discount
num_demos += 1
Expand Down Expand Up @@ -411,23 +422,32 @@ def set_demonstrations(self, demonstrations: MCEDemonstrations) -> None:

if isinstance(demonstrations, types.Transitions):
self._set_demo_from_obs(
demonstrations.obs,
types.assert_not_dictobs(demonstrations.obs),
demonstrations.dones,
demonstrations.next_obs,
types.assert_not_dictobs(demonstrations.next_obs),
)
elif isinstance(demonstrations, types.TransitionsMinimal):
self._set_demo_from_obs(demonstrations.obs, None, None)
self._set_demo_from_obs(
types.assert_not_dictobs(demonstrations.obs),
None,
None,
)
elif isinstance(demonstrations, Iterable):
# Demonstrations are a Torch DataLoader or other Mapping iterable
# Collect them together into one big NumPy array. This is inefficient,
# we could compute the running statistics instead, but in practice do
# not expect large dataset sizes together with MCE IRL.
collated_list = collections.defaultdict(list)
collated_list: Dict[
str,
List[types.AnyTensor],
] = collections.defaultdict(list)
for batch in demonstrations:
assert isinstance(batch, Mapping)
for k in ("obs", "dones", "next_obs"):
if k in batch:
collated_list[k].append(batch[k])
x = batch.get(k)
if x is not None:
assert isinstance(x, (np.ndarray, th.Tensor))
collated_list[k].append(x)
collated = {k: np.concatenate(v) for k, v in collated_list.items()}

assert "obs" in collated
Expand Down
4 changes: 2 additions & 2 deletions src/imitation/algorithms/preference_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,9 +465,9 @@ def rewards(self, transitions: Transitions) -> th.Tensor:
Shape - (num_transitions, ) for Single reward network and
(num_transitions, num_networks) for ensemble of networks.
"""
state = transitions.obs
state = types.assert_not_dictobs(transitions.obs)
action = transitions.acts
next_state = transitions.next_obs
next_state = types.assert_not_dictobs(transitions.next_obs)
done = transitions.dones
if self.ensemble_model is not None:
rews_np = self.ensemble_model.predict_processed_all(
Expand Down
10 changes: 5 additions & 5 deletions src/imitation/data/buffer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Buffers to store NumPy arrays and transitions in."""

import dataclasses
from typing import Any, Mapping, Optional, Tuple

import numpy as np
Expand Down Expand Up @@ -368,15 +367,16 @@ def from_data(
Returns:
A new ReplayBuffer.
"""
obs_shape = transitions.obs.shape[1:]
obs = types.assert_not_dictobs(transitions.obs)
obs_shape = obs.shape[1:]
act_shape = transitions.acts.shape[1:]
if capacity is None:
capacity = transitions.obs.shape[0]
capacity = obs.shape[0]
instance = cls(
capacity=capacity,
obs_shape=obs_shape,
act_shape=act_shape,
obs_dtype=transitions.obs.dtype,
obs_dtype=obs.dtype,
act_dtype=transitions.acts.dtype,
)
instance.store(transitions, truncate_ok=truncate_ok)
Expand Down Expand Up @@ -406,7 +406,7 @@ def store(self, transitions: types.Transitions, truncate_ok: bool = True) -> Non
Raises:
ValueError: The arguments didn't have the same length.
""" # noqa: DAR402
trans_dict = dataclasses.asdict(transitions)
trans_dict = types.dataclass_quick_asdict(transitions)
# Remove unnecessary fields
trans_dict = {k: trans_dict[k] for k in self._buffer.sample_shapes.keys()}
self._buffer.store(trans_dict, truncate_ok=truncate_ok)
Expand Down
2 changes: 2 additions & 0 deletions src/imitation/data/huggingface_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def trajectories_to_dict(
],
terminal=[traj.terminal for traj in trajectories],
)
if any(isinstance(traj.obs, types.DictObs) for traj in trajectories):
raise ValueError("DictObs are not currently supported")

# Encode infos as jsonpickled strings
trajectory_dict["infos"] = [
Expand Down
Loading
Loading