Source code for texar.tf.modules.qnets.qnets

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Q networks for RL.
"""

import numpy as np

import tensorflow as tf

from texar.tf.module_base import ModuleBase
from texar.tf.agents.agent_utils import Space
from texar.tf.utils import utils

# pylint: disable=no-member

__all__ = [
    'QNetBase',
    'CategoricalQNet'
]


[docs]class QNetBase(ModuleBase):
    """Base class inheritted by all Q net classes. A Q net takes in states
    and outputs Q value of actions.

    Args:
        network (optional): A network that takes in state and returns
            Q values. For example, an instance of subclass
            of :class:`~texar.tf.modules.FeedForwardNetworkBase`. If `None`,
            a network is created as specified in :attr:`hparams`.
        network_kwargs (dict, optional): Keyword arguments for network
            constructor.
            Note that the `hparams` argument for network
            constructor is specified in the "network_hparams" field of
            :attr:`hparams` and should not be included in `network_kwargs`.
            Ignored if :attr:`network` is given.
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.
    """
    def __init__(self,
                 network=None,
                 network_kwargs=None,
                 hparams=None):
        ModuleBase.__init__(self, hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            self._build_network(network, network_kwargs)

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. role:: python(code)
           :language: python

        .. code-block:: python

            {
                'network_type': 'FeedForwardNetwork',
                'network_hparams': {
                    'layers': [
                        {
                            'type': 'Dense',
                            'kwargs': {'units': 256, 'activation': 'relu'}
                        },
                        {
                            'type': 'Dense',
                            'kwargs': {'units': 256, 'activation': 'relu'}
                        },
                    ]
                },
                'name': 'q_net',
            }

        Here:

        "network_type": str or class or instance
            A network that takes in state and returns outputs for
            generating actions. This can be a class, its name or module path,
            or a class instance. Ignored if `network` is given to the
            constructor.

        "network_hparams": dict
            Hyperparameters for the network. With the :attr:`network_kwargs`
            argument to the constructor, a network is created with
            :python:`network_class(**network_kwargs, hparams=network_hparams)`.

            For example, the default values creates a two-layer dense network.

        "name": str
            Name of the Q net.
        """
        return {
            'network_type': 'FeedForwardNetwork',
            'network_hparams': {
                'layers': [
                    {
                        'type': 'Dense',
                        'kwargs': {'units': 256, 'activation': 'relu'}
                    },
                    {
                        'type': 'Dense',
                        'kwargs': {'units': 256, 'activation': 'relu'}
                    },
                ]
            },
            'name': 'q_net',
            '@no_typecheck': ['network_type', 'network_hparams']
        }

    def _build_network(self, network, kwargs):
        if network is not None:
            self._network = network
        else:
            kwargs = utils.get_instance_kwargs(
                kwargs, self._hparams.network_hparams)
            self._network = utils.check_or_get_instance(
                self._hparams.network_type,
                kwargs,
                module_paths=['texar.tf.modules', 'texar.tf.custom'])

    def _build(self, inputs, mode=None):  # pylint: disable=arguments-differ
        raise NotImplementedError

    @property
    def network(self):
        """The network.
        """
        return self._network


[docs]class CategoricalQNet(QNetBase):
    """Q net with categorical scalar action space.

    Args:
        action_space (optional): An instance of :class:`~texar.tf.agents.Space`
            specifying the action space. If not given, an discrete action space
            `[0, high]` is created with `high` specified in :attr:`hparams`.
        network (optional): A network that takes in state and returns
            Q values. For example, an instance of subclass
            of :class:`~texar.tf.modules.FeedForwardNetworkBase`. If `None`,
            a network is created as specified in :attr:`hparams`.
        network_kwargs (dict, optional): Keyword arguments for network
            constructor.
            Note that the `hparams` argument for network
            constructor is specified in the "network_hparams" field of
            :attr:`hparams` and should not be included in `network_kwargs`.
            Ignored if :attr:`network` is given.
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.

    .. document private functions
    .. automethod:: _build
    """
    def __init__(self,
                 action_space=None,
                 network=None,
                 network_kwargs=None,
                 hparams=None):
        QNetBase.__init__(self, hparams=hparams)

        with tf.variable_scope(self.variable_scope):
            if action_space is None:
                action_space = Space(
                    low=0, high=self._hparams.action_space, dtype=np.int32)
            self._action_space = action_space
            self._append_output_layer()

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                'network_type': 'FeedForwardNetwork',
                'network_hparams': {
                    'layers': [
                        {
                            'type': 'Dense',
                            'kwargs': {'units': 256, 'activation': 'relu'}
                        },
                        {
                            'type': 'Dense',
                            'kwargs': {'units': 256, 'activation': 'relu'}
                        },
                    ]
                },
                'action_space': 2,
                'make_output_layer': True,
                'name': 'q_net'
            }

        Here:

        "action_space": int
            Upper bound of the action space. The resulting action space is
            all discrete scalar numbers between 0 and the upper bound specified
            here (both inclusive).

        "make_output_layer": bool
            Whether to append a dense layer to the network to transform
            features to Q values. If `False`, the final layer
            output of network must match the action space.

        See :class:`~texar.tf.modules.QNetBase.default_hparams` for details
        of other hyperparameters.
        """
        hparams = QNetBase.default_hparams()
        hparams.update({
            'action_space': 2,
            'make_output_layer': True})
        return hparams

    def _append_output_layer(self):
        if not self._hparams.make_output_layer:
            return

        if self._action_space.shape != ():
            raise ValueError('Only scalar discrete action is supported.')
        else:
            output_size = self._action_space.high - self._action_space.low

        layer_hparams = {
            'type': 'Dense',
            'kwargs': {'units': output_size}}
        self._network.append_layer(layer_hparams)

[docs]    def _build(self, inputs, mode=None):
        """Takes in states and outputs Q values.

        Args:
            inputs: Inputs to the Q net with the first dimension
                the batch dimension.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. If `None`,
                :func:`texar.tf.global_mode` is used.

        Returns
            A `dict` including fields `"qvalues"`.
            where

            - **"qvalues"**: A Tensor of shape \
            `[batch_size] + action_space size` containing Q values of all\
            possible actions.
        """
        outputs = {
            "qvalues": self._network(inputs, mode=mode)
        }

        if not self._built:
            self._add_internal_trainable_variables()
            self._add_trainable_variable(self._network.trainable_variables)
            self._built = True

        return outputs

    @property
    def action_space(self):
        """An instance of :class:`~texar.tf.agents.Space` specifiying the
        action space.
        """
        return self._action_space