Source code for texar.tf.agents.episodic_agent_base

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Base class for episodic reinforcement learning agents.
"""

import tensorflow as tf

from texar.tf.agents.agent_base import AgentBase

# pylint: disable=too-many-instance-attributes


[docs]class EpisodicAgentBase(AgentBase): """Base class inherited by episodic RL agents. An agent is a wrapper of the **training process** that trains a model with RL algorithms. Agent itself does not create new trainable variables. An episodic RL agent typically provides 3 interfaces, namely, :meth:`reset`, :meth:`get_action` and :meth:`observe`, and is used as the following example. Example: .. code-block:: python env = SomeEnvironment(...) agent = PGAgent(...) while True: # Starts one episode agent.reset() observ = env.reset() while True: action = agent.get_action(observ) next_observ, reward, terminal = env.step(action) agent.observe(reward, terminal) observ = next_observ if terminal: break Args: env_config: An instance of :class:`~texar.tf.agents.EnvConfig` specifying action space, observation space, and reward range, etc. Use :func:`~texar.tf.agents.get_gym_env_config` to create an EnvConfig from a gym environment. hparams (dict or HParams, optional): Hyperparameters. Missing hyperparamerter will be set to default values. See :meth:`default_hparams` for the hyperparameter sturcture and default values. """ def __init__(self, env_config, hparams=None): AgentBase.__init__(self, hparams) self._env_config = env_config self._reset_tmplt_fn = tf.make_template( "{}_reset".format(self.name), self._reset) self._observe_tmplt_fn = tf.make_template( "{}_observe".format(self.name), self._observe) self._get_action_tmplt_fn = tf.make_template( "{}_get_action".format(self.name), self._get_action)
[docs] @staticmethod def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. code-block:: python { "name": "agent" } """ return { 'name': 'agent' }
[docs] def reset(self): """Resets the states to begin new episode. """ self._reset_tmplt_fn()
def _reset(self): raise NotImplementedError
[docs] def observe(self, reward, terminal, train_policy=True, feed_dict=None): """Observes experience from environment. Args: reward: Reward of the action. The configuration (e.g., shape) of the reward is defined in :attr:`env_config`. terminal (bool): Whether the episode is terminated. train_policy (bool): Wether to update the policy for this step. feed_dict (dict, optional): Any stuffs fed to running the training operator. """ return self._observe_tmplt_fn(reward, terminal, train_policy, feed_dict)
def _observe(self, reward, terminal, train_policy, feed_dict): raise NotImplementedError
[docs] def get_action(self, observ, feed_dict=None): """Gets action according to observation. Args: observ: Observation from the environment. Returns: action from the policy. """ return self._get_action_tmplt_fn(observ, feed_dict)
def _get_action(self, observ, feed_dict): raise NotImplementedError @property def env_config(self): """Environment configuration. """ return self._env_config