Source code for texar.tf.modules.encoders.rnn_encoders

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various RNN encoders.
"""

import functools
import numpy as np

import tensorflow as tf
from tensorflow.contrib.framework import nest

from texar.tf.modules.encoders.encoder_base import EncoderBase
from texar.tf.modules.networks.conv_networks import _to_list
from texar.tf.core import layers
from texar.tf.utils.mode import is_train_mode
from texar.tf.utils.shapes import mask_sequences
from texar.tf.hyperparams import HParams

# pylint: disable=too-many-arguments, too-many-locals, invalid-name, no-member

__all__ = [
    "_forward_single_output_layer",
    "RNNEncoderBase",
    "UnidirectionalRNNEncoder",
    "BidirectionalRNNEncoder"
]


def _default_output_layer_hparams():
    return {
        "num_layers": 0,
        "layer_size": 128,
        "activation": "identity",
        "final_layer_activation": None,
        "other_dense_kwargs": None,
        "dropout_layer_ids": [],
        "dropout_rate": 0.5,
        "variational_dropout": False,
        "@no_typecheck": ["activation", "final_layer_activation",
                          "layer_size", "dropout_layer_ids"]
    }


def _build_dense_output_layer(hparams):
    nlayers = hparams.num_layers

    if nlayers <= 0:
        return None

    layer_size = _to_list(
        hparams.layer_size, 'output_layer.layer_size', nlayers)

    other_kwargs = hparams.other_dense_kwargs or {}
    if isinstance(other_kwargs, HParams):
        other_kwargs = other_kwargs.todict()
    if not isinstance(other_kwargs, dict):
        raise ValueError(
            "hparams 'output_layer.other_dense_kwargs' must be a dict.")

    dense_layers = []
    for i in range(nlayers):
        if i == nlayers - 1:
            activation = hparams.final_layer_activation
        else:
            activation = hparams.activation

        kwargs_i = {"units": layer_size[i],
                    "activation": activation,
                    "name": "dense_%d" % (i + 1)}
        kwargs_i.update(other_kwargs)

        layer_hparams = {"type": "Dense", "kwargs": kwargs_i}
        dense_layers.append(layers.get_layer(hparams=layer_hparams))

    if len(dense_layers) == 1:
        dense_layers = dense_layers[0]

    return dense_layers


def _forward_single_output_layer(inputs, input_size, output_layer):
    """Forwards the input through a single output layer.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if
            :attr:`time_major=False`, or shape
            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.
        input_size: An `int` or 1D `int` array.
    """
    dim = np.prod(input_size)
    inputs_flat = inputs
    inputs_flat = tf.reshape(inputs_flat, [-1, dim])
    # Feed to the layer
    output_flat = output_layer(inputs_flat)
    output_size = output_layer.compute_output_shape([1, dim]).as_list()[1:]
    output_size = np.array(output_size)
    # Reshape output to [batch_size/max_time, max_time/batch_size] + output_size
    output_shape = tf.concat([tf.shape(inputs)[:2], output_size], axis=0)
    output = tf.reshape(output_flat, output_shape)
    return output, output_size


def _apply_dropout(inputs, time_major, hparams, training):
    """Applies dropout to the inputs.

    :attr:`inputs` is a Tensor of shape `[batch_size, max_time, dim]`
    if :attr:`time_major=False`, or shape `[max_time, batch_size, dim]`
    if :attr:`time_major=True`.
    """
    noise_shape = None
    if hparams.variational_dropout:
        if time_major:
            noise_shape = [1, None, None]
        else:
            noise_shape = [None, 1, None]
    return tf.layers.dropout(inputs, rate=hparams.dropout_rate,
                             noise_shape=noise_shape, training=training)


def _forward_output_layers(inputs, input_size, output_layer, time_major,
                           hparams, mode, sequence_length=None):
    """Forwards inputs through the output layers.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if
            :attr:`time_major=False`, or shape
            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.

    Returns:
        A pair :attr:`(outputs, outputs_size), where

        - :attr:`outputs`: A Tensor of shape \
          `[batch_size, max_time] + outputs_size`.

        - :attr:`outputs_size`: An `int` or 1D `int` array representing the \
          output size.
    """
    if output_layer is None:
        return inputs, input_size

    if hparams is None:
        # output_layer was passed in from the constructor
        if isinstance(output_layer, (list, tuple)):
            raise ValueError('output_layer must not be a list or tuple.')
        output, output_size = _forward_single_output_layer(
            inputs, input_size, output_layer)
    else:
        # output_layer was built based on hparams
        output_layer = _to_list(output_layer)

        dropout_layer_ids = _to_list(hparams.dropout_layer_ids)
        if len(dropout_layer_ids) > 0:
            training = is_train_mode(mode)

        output = inputs
        output_size = input_size
        for i, layer in enumerate(output_layer):
            if i in dropout_layer_ids:
                output = _apply_dropout(output, time_major, hparams, training)
            output, output_size = _forward_single_output_layer(
                output, output_size, layer)

        if len(output_layer) in dropout_layer_ids:
            output = _apply_dropout(output, time_major, hparams, training)

    if sequence_length is not None:
        output = mask_sequences(
            output, sequence_length, time_major=time_major, tensor_rank=3)

    return output, output_size


def _apply_rnn_encoder_output_layer(output_layer, time_major, hparams, mode,
                                    cell_outputs, cell_output_size):
    map_func = functools.partial(
        _forward_output_layers,
        output_layer=output_layer,
        time_major=time_major,
        hparams=hparams,
        mode=mode)
    cell_outputs_flat = nest.flatten(cell_outputs)
    cell_output_size_flat = nest.flatten(cell_output_size)
    o = [map_func(inputs=x, input_size=xs)
         for x, xs in zip(cell_outputs_flat, cell_output_size_flat)]
    outputs_flat, output_size_flat = zip(*o)
    outputs = nest.pack_sequence_as(cell_outputs, outputs_flat)
    output_size = nest.pack_sequence_as(cell_outputs, output_size_flat)
    return outputs, output_size


[docs]class RNNEncoderBase(EncoderBase):
    """Base class for all RNN encoder classes to inherit.

    Args:
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.
    """

    def __init__(self, hparams=None):
        EncoderBase.__init__(self, hparams)

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "name": "rnn_encoder"
            }
        """
        return {
            "name": "rnn_encoder"
        }

    def _build(self, inputs, *args, **kwargs):
        """Encodes the inputs.

        Args:
            inputs: Inputs to the encoder.
            *args: Other arguments.
            **kwargs: Keyword arguments.

        Returns:
            Encoding results.
        """
        raise NotImplementedError


[docs]class UnidirectionalRNNEncoder(RNNEncoderBase):
    """One directional RNN encoder.

    Args:
        cell: (RNNCell, optional) If not specified,
            a cell is created as specified in :attr:`hparams["rnn_cell"]`.
        cell_dropout_mode (optional): A Tensor taking value of
            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which
            toggles dropout in the RNN cell (e.g., activates dropout in
            TRAIN mode). If `None`, :func:`~texar.tf.global_mode` is used.
            Ignored if :attr:`cell` is given.
        output_layer (optional): An instance of
            :tf_main:`tf.layers.Layer <layers/Layer>`. Applies to the RNN cell
            output of each step. If `None` (default), the output layer is
            created as specified in :attr:`hparams["output_layer"]`.
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.

    See :meth:`_build` for the inputs and outputs of the encoder.

    Example:

        .. code-block:: python

            # Use with embedder
            embedder = WordEmbedder(vocab_size, hparams=emb_hparams)
            encoder = UnidirectionalRNNEncoder(hparams=enc_hparams)

            outputs, final_state = encoder(
                inputs=embedder(data_batch['text_ids']),
                sequence_length=data_batch['length'])

    .. document private functions
    .. automethod:: _build
    """

    def __init__(self,
                 cell=None,
                 cell_dropout_mode=None,
                 output_layer=None,
                 hparams=None):
        RNNEncoderBase.__init__(self, hparams)

        # Make RNN cell
        with tf.variable_scope(self.variable_scope):
            if cell is not None:
                self._cell = cell
            else:
                self._cell = layers.get_rnn_cell(
                    self._hparams.rnn_cell, cell_dropout_mode)

        # Make output layer
        with tf.variable_scope(self.variable_scope):
            if output_layer is not None:
                self._output_layer = output_layer
                self._output_layer_hparams = None
            else:
                self._output_layer = _build_dense_output_layer(
                    self._hparams.output_layer)
                self._output_layer_hparams = self._hparams.output_layer

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "rnn_cell": default_rnn_cell_hparams(),
                "output_layer": {
                    "num_layers": 0,
                    "layer_size": 128,
                    "activation": "identity",
                    "final_layer_activation": None,
                    "other_dense_kwargs": None,
                    "dropout_layer_ids": [],
                    "dropout_rate": 0.5,
                    "variational_dropout": False
                },
                "name": "unidirectional_rnn_encoder"
            }

        Here:

        "rnn_cell": dict
            A dictionary of RNN cell hyperparameters. Ignored if
            :attr:`cell` is given to the encoder constructor.

            The default value is defined in
            :func:`~texar.tf.core.default_rnn_cell_hparams`.

        "output_layer": dict
            Output layer hyperparameters. Ignored if :attr:`output_layer`
            is given to the encoder constructor. Includes:

            "num_layers": int
                The number of output (dense) layers. Set to 0 to avoid any
                output layers applied to the cell outputs..

            "layer_size": int or list
                The size of each of the output (dense) layers.

                If an `int`, each output layer will have the same size. If
                a list, the length must equal to :attr:`num_layers`.

            "activation": str or callable or None
                Activation function for each of the output (dense)
                layer except for the final layer. This can be
                a function, or its string name or module path.
                If function name is given, the function must be from
                module :tf_main:`tf.nn <nn>` or :tf_main:`tf < >`.
                For example

                .. code-block:: python

                    "activation": "relu" # function name
                    "activation": "my_module.my_activation_fn" # module path
                    "activation": my_module.my_activation_fn # function

                Default is `None` which maintains a linear activation.

            "final_layer_activation": str or callable or None
                The activation function for the final output layer.

            "other_dense_kwargs": dict or None
                Other keyword arguments to construct each of the output
                dense layers, e.g., `use_bias`. See
                :tf_main:`Dense <layers/Dense>` for the keyword arguments.

            "dropout_layer_ids": int or list
                The indexes of layers (starting from `0`) whose inputs
                are applied with dropout. The index = :attr:`num_layers`
                means dropout applies to the final layer output. E.g.,

                .. code-block:: python

                    {
                        "num_layers": 2,
                        "dropout_layer_ids": [0, 2]
                    }

                will leads to a series of layers as
                `-dropout-layer0-layer1-dropout-`.

                The dropout mode (training or not) is controlled
                by the :attr:`mode` argument of :meth:`_build`.

            "dropout_rate": float
                The dropout rate, between 0 and 1. E.g.,
                `"dropout_rate": 0.1` would drop out 10% of elements.

            "variational_dropout": bool
                Whether the dropout mask is the same across all time steps.

        "name": str
            Name of the encoder
        """
        hparams = RNNEncoderBase.default_hparams()
        hparams.update({
            "rnn_cell": layers.default_rnn_cell_hparams(),
            "output_layer": _default_output_layer_hparams(),
            "name": "unidirectional_rnn_encoder"
        })
        return hparams

[docs]    def _build(self,
               inputs,
               sequence_length=None,
               initial_state=None,
               time_major=False,
               mode=None,
               return_cell_output=False,
               return_output_size=False,
               **kwargs):
        """Encodes the inputs.

        Args:
            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.
                The first two dimensions
                :attr:`batch_size` and :attr:`max_time` are exchanged if
                :attr:`time_major=True` is specified.
            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.
                Sequence lengths
                of the batch inputs. Used to copy-through state and zero-out
                outputs when past a batch element's sequence length.
            initial_state (optional): Initial state of the RNN.
            time_major (bool): The shape format of the :attr:`inputs` and
                :attr:`outputs` Tensors. If `True`, these tensors are of shape
                `[max_time, batch_size, depth]`. If `False` (default),
                these tensors are of shape `[batch_size, max_time, depth]`.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout
                if the output layer is specified with :attr:`hparams`.
                If `None` (default), :func:`texar.tf.global_mode`
                is used.
            return_cell_output (bool): Whether to return the output of the RNN
                cell. This is the results prior to the output layer.
            return_output_size (bool): Whether to return the size of the
                output (i.e., the results after output layers).
            **kwargs: Optional keyword arguments of
                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,
                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.

        Returns:
            - By default (both `return_cell_output` and \
            `return_output_size` are False), returns a pair \
            :attr:`(outputs, final_state)`

                - :attr:`outputs`: The RNN output tensor by the output layer \
                (if exists) or the RNN cell (otherwise). The tensor is of \
                shape `[batch_size, max_time, output_size]` if \
                `time_major` is False, or \
                `[max_time, batch_size, output_size]` if \
                `time_major` is True. \
                If RNN cell output is a (nested) tuple of Tensors, then the \
                :attr:`outputs` will be a (nested) tuple having the same \
                nest structure as the cell output.

                - :attr:`final_state`: The final state of the RNN, which is a \
                Tensor of shape `[batch_size] + cell.state_size` or \
                a (nested) tuple of Tensors if `cell.state_size` is a (nested)\
                tuple.

            - If `return_cell_output` is True, returns a triple \
            :attr:`(outputs, final_state, cell_outputs)`

                - :attr:`cell_outputs`: The outputs by the RNN cell prior to \
                the \
                output layer, having the same structure with :attr:`outputs` \
                except for the `output_dim`.

            - If `return_output_size` is `True`, returns a tuple \
            :attr:`(outputs, final_state, output_size)`

                - :attr:`output_size`: A (possibly nested tuple of) int \
                representing the size of :attr:`outputs`. If a single int or \
                an int array, then `outputs` has shape \
                `[batch/time, time/batch] + output_size`. If \
                a (nested) tuple, then `output_size` has the same \
                structure as with `outputs`.

            - If both `return_cell_output` and \
            `return_output_size` are True, returns \
            :attr:`(outputs, final_state, cell_outputs, output_size)`.
        """
        if ('dtype' not in kwargs) and (initial_state is None):
            cell_outputs, state = tf.nn.dynamic_rnn(
                cell=self._cell,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state=initial_state,
                time_major=time_major,
                dtype=tf.float32,
                **kwargs)
        else:
            cell_outputs, state = tf.nn.dynamic_rnn(
                cell=self._cell,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state=initial_state,
                time_major=time_major,
                **kwargs)

        outputs, output_size = _apply_rnn_encoder_output_layer(
            self._output_layer, time_major, self._output_layer_hparams,
            mode, cell_outputs, self._cell.output_size)

        if not self._built:
            self._add_internal_trainable_variables()
            # Add trainable variables of `self._cell` and `self._output_layer`
            # which may be constructed externally.
            self._add_trainable_variable(
                layers.get_rnn_cell_trainable_variables(self._cell))
            if self._output_layer and \
                    not isinstance(self._output_layer, (list, tuple)):
                self._add_trainable_variable(
                    self._output_layer.trainable_variables)
            self._built = True

        rets = (outputs, state)
        if return_cell_output:
            rets += (cell_outputs, )
        if return_output_size:
            rets += (output_size, )
        return rets

    @property
    def cell(self):
        """The RNN cell.
        """
        return self._cell

    @property
    def state_size(self):
        """The state size of encoder cell.

        Same as :attr:`encoder.cell.state_size`.
        """
        return self.cell.state_size

    @property
    def output_layer(self):
        """The output layer.
        """
        return self._output_layer


[docs]class BidirectionalRNNEncoder(RNNEncoderBase):
    """Bidirectional forward-backward RNN encoder.

    Args:
        cell_fw (RNNCell, optional): The forward RNN cell. If not given,
            a cell is created as specified in :attr:`hparams["rnn_cell_fw"]`.
        cell_bw (RNNCell, optional): The backward RNN cell. If not given,
            a cell is created as specified in :attr:`hparams["rnn_cell_bw"]`.
        cell_dropout_mode (optional): A tensor taking value of
            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which
            toggles dropout in the RNN cells (e.g., activates dropout in
            TRAIN mode). If `None`, :func:`~texar.tf.global_mode()` is
            used. Ignored if respective cell is given.
        output_layer_fw (optional): An instance of
            :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the forward
            RNN cell output of each step. If `None` (default), the output
            layer is created as specified in :attr:`hparams["output_layer_fw"]`.
        output_layer_bw (optional): An instance of
            :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the backward
            RNN cell output of each step. If `None` (default), the output
            layer is created as specified in :attr:`hparams["output_layer_bw"]`.
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.

    See :meth:`_build` for the inputs and outputs of the encoder.

    Example:

        .. code-block:: python

            # Use with embedder
            embedder = WordEmbedder(vocab_size, hparams=emb_hparams)
            encoder = BidirectionalRNNEncoder(hparams=enc_hparams)

            outputs, final_state = encoder(
                inputs=embedder(data_batch['text_ids']),
                sequence_length=data_batch['length'])
            # outputs == (outputs_fw, outputs_bw)
            # final_state == (final_state_fw, final_state_bw)

    .. document private functions
    .. automethod:: _build
    """

    def __init__(self,
                 cell_fw=None,
                 cell_bw=None,
                 cell_dropout_mode=None,
                 output_layer_fw=None,
                 output_layer_bw=None,
                 hparams=None):
        RNNEncoderBase.__init__(self, hparams)

        # Make RNN cells
        with tf.variable_scope(self.variable_scope):
            if cell_fw is not None:
                self._cell_fw = cell_fw
            else:
                self._cell_fw = layers.get_rnn_cell(
                    self._hparams.rnn_cell_fw, cell_dropout_mode)

            if cell_bw is not None:
                self._cell_bw = cell_bw
            elif self._hparams.rnn_cell_share_config:
                self._cell_bw = layers.get_rnn_cell(
                    self._hparams.rnn_cell_fw, cell_dropout_mode)
            else:
                self._cell_bw = layers.get_rnn_cell(
                    self._hparams.rnn_cell_bw, cell_dropout_mode)

        # Make output layers
        with tf.variable_scope(self.variable_scope):
            if output_layer_fw is not None:
                self._output_layer_fw = output_layer_fw
                self._output_layer_hparams_fw = None
            else:
                self._output_layer_fw = _build_dense_output_layer(
                    self._hparams.output_layer_fw)
                self._output_layer_hparams_fw = self._hparams.output_layer_fw

            if output_layer_bw is not None:
                self._output_layer_bw = output_layer_bw
                self._output_layer_hparams_bw = None
            elif self._hparams.output_layer_share_config:
                self._output_layer_bw = _build_dense_output_layer(
                    self._hparams.output_layer_fw)
                self._output_layer_hparams_bw = self._hparams.output_layer_fw
            else:
                self._output_layer_bw = _build_dense_output_layer(
                    self._hparams.output_layer_bw)
                self._output_layer_hparams_bw = self._hparams.output_layer_bw

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                "rnn_cell_fw": default_rnn_cell_hparams(),
                "rnn_cell_bw": default_rnn_cell_hparams(),
                "rnn_cell_share_config": True,
                "output_layer_fw": {
                    "num_layers": 0,
                    "layer_size": 128,
                    "activation": "identity",
                    "final_layer_activation": None,
                    "other_dense_kwargs": None,
                    "dropout_layer_ids": [],
                    "dropout_rate": 0.5,
                    "variational_dropout": False
                },
                "output_layer_bw": {
                    # Same hyperparams and default values as "output_layer_fw"
                    # ...
                },
                "output_layer_share_config": True,
                "name": "bidirectional_rnn_encoder"
            }

        Here:

        "rnn_cell_fw": dict
            Hyperparameters of the forward RNN cell.
            Ignored if :attr:`cell_fw` is given to the encoder constructor.

            The default value is defined in
            :func:`~texar.tf.core.default_rnn_cell_hparams`.

        "rnn_cell_bw": dict
            Hyperparameters of the backward RNN cell.
            Ignored if :attr:`cell_bw` is given to the encoder constructor
            , or if :attr:`"rnn_cell_share_config"` is `True`.

            The default value is defined in
            :meth:`~texar.tf.core.default_rnn_cell_hparams`.

        "rnn_cell_share_config": bool
            Whether share hyperparameters of the backward cell with the
            forward cell. Note that the cell parameters (variables) are not
            shared.

        "output_layer_fw": dict
            Hyperparameters of the forward output layer. Ignored if
            :attr:`output_layer_fw` is given to the constructor.
            See the "output_layer" field of
            :meth:`~texar.tf.modules.UnidirectionalRNNEncoder.default_hparams` for
            details.

        "output_layer_bw": dict
            Hyperparameters of the backward output layer. Ignored if
            :attr:`output_layer_bw` is given to the constructor. Have the
            same structure and defaults with :attr:`"output_layer_fw"`.

            Ignored if :attr:`"output_layer_share_config"` is True.

        "output_layer_share_config": bool
            Whether share hyperparameters of the backward output layer
            with the forward output layer. Note that the layer parameters
            (variables) are not shared.

        "name": str
            Name of the encoder
        """
        hparams = RNNEncoderBase.default_hparams()
        hparams.update({
            "rnn_cell_fw": layers.default_rnn_cell_hparams(),
            "rnn_cell_bw": layers.default_rnn_cell_hparams(),
            "rnn_cell_share_config": True,
            "output_layer_fw": _default_output_layer_hparams(),
            "output_layer_bw": _default_output_layer_hparams(),
            "output_layer_share_config": True,
            "name": "bidirectional_rnn_encoder"
        })
        return hparams

[docs]    def _build(self,
               inputs,
               sequence_length=None,
               initial_state_fw=None,
               initial_state_bw=None,
               time_major=False,
               mode=None,
               return_cell_output=False,
               return_output_size=False,
               **kwargs):
        """Encodes the inputs.

        Args:
            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.
                The first two dimensions
                `batch_size` and `max_time` may be exchanged if
                `time_major=True` is specified.
            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.
                Sequence lengths
                of the batch inputs. Used to copy-through state and zero-out
                outputs when past a batch element's sequence length.
            initial_state (optional): Initial state of the RNN.
            time_major (bool): The shape format of the :attr:`inputs` and
                :attr:`outputs` Tensors. If `True`, these tensors are of shape
                `[max_time, batch_size, depth]`. If `False` (default),
                these tensors are of shape `[batch_size, max_time, depth]`.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout
                if the output layer is specified with :attr:`hparams`.
                If `None` (default), :func:`texar.tf.global_mode()`
                is used.
            return_cell_output (bool): Whether to return the output of the RNN
                cell. This is the results prior to the output layer.
            **kwargs: Optional keyword arguments of
                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,
                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.

        Returns:
            - By default (both `return_cell_output` and `return_output_size` \
            are False), returns a pair :attr:`(outputs, final_state)`

                - :attr:`outputs`: A tuple `(outputs_fw, outputs_bw)` \
                containing \
                the forward and the backward RNN outputs, each of which is of \
                shape `[batch_size, max_time, output_dim]` if \
                `time_major` is False, or \
                `[max_time, batch_size, output_dim]` if \
                `time_major` is True. \
                If RNN cell output is a (nested) tuple of Tensors, then \
                `outputs_fw` and `outputs_bw` will be a (nested) tuple having \
                the same structure as the cell output.

                - :attr:`final_state`: A tuple \
                `(final_state_fw, final_state_bw)` \
                containing the final states of the forward and backward \
                RNNs, each of which is a \
                Tensor of shape `[batch_size] + cell.state_size`, or \
                a (nested) tuple of Tensors if `cell.state_size` is a (nested)\
                tuple.

            - If `return_cell_output` is True, returns a triple \
            :attr:`(outputs, final_state, cell_outputs)` where

                - :attr:`cell_outputs`: A tuple \
                `(cell_outputs_fw, cell_outputs_bw)` containting the outputs \
                by the forward and backward RNN cells prior to the \
                output layers, having the same structure with :attr:`outputs` \
                except for the `output_dim`.

            - If `return_output_size` is True, returns a tuple \
            :attr:`(outputs, final_state, output_size)` where

                - :attr:`output_size`: A tupple \
                `(output_size_fw, output_size_bw)` containing the size of \
                `outputs_fw` and `outputs_bw`, respectively. \
                Take `*_fw` for example, \
                `output_size_fw` is a (possibly nested tuple of) int. \
                If a single int or an int array, then `outputs_fw` has shape \
                `[batch/time, time/batch] + output_size_fw`. If \
                a (nested) tuple, then `output_size_fw` has the same \
                structure as with `outputs_fw`. The same applies to  \
                `output_size_bw`.

            - If both `return_cell_output` and \
            `return_output_size` are True, returns \
            :attr:`(outputs, final_state, cell_outputs, output_size)`.
        """
        no_initial_state = initial_state_fw is None and initial_state_bw is None
        if ('dtype' not in kwargs) and no_initial_state:
            cell_outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=self._cell_fw,
                cell_bw=self._cell_bw,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state_fw=initial_state_fw,
                initial_state_bw=initial_state_bw,
                time_major=time_major,
                dtype=tf.float32,
                **kwargs)
        else:
            cell_outputs, states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=self._cell_fw,
                cell_bw=self._cell_bw,
                inputs=inputs,
                sequence_length=sequence_length,
                initial_state_fw=initial_state_fw,
                initial_state_bw=initial_state_bw,
                time_major=time_major,
                **kwargs)

        outputs_fw, output_size_fw = _apply_rnn_encoder_output_layer(
            self._output_layer_fw, time_major, self._output_layer_hparams_fw,
            mode, cell_outputs[0], self._cell_fw.output_size)

        outputs_bw, output_size_bw = _apply_rnn_encoder_output_layer(
            self._output_layer_bw, time_major, self._output_layer_hparams_bw,
            mode, cell_outputs[1], self._cell_bw.output_size)

        outputs = (outputs_fw, outputs_bw)
        output_size = (output_size_fw, output_size_bw)

        if not self._built:
            self._add_internal_trainable_variables()
            # Add trainable variables of cells and output layers
            # which may be constructed externally.
            self._add_trainable_variable(
                layers.get_rnn_cell_trainable_variables(self._cell_fw))
            self._add_trainable_variable(
                layers.get_rnn_cell_trainable_variables(self._cell_bw))
            if self._output_layer_fw and \
                    not isinstance(self._output_layer_fw, (list, tuple)):
                self._add_trainable_variable(
                    self._output_layer_fw.trainable_variables)
            if self._output_layer_bw and \
                    not isinstance(self._output_layer_bw, (list, tuple)):
                self._add_trainable_variable(
                    self._output_layer_bw.trainable_variables)
            self._built = True

        returns = (outputs, states)
        if return_cell_output:
            returns += (cell_outputs, )
        if return_output_size:
            returns += (output_size, )
        return returns

    @property
    def cell_fw(self):
        """The forward RNN cell.
        """
        return self._cell_fw

    @property
    def cell_bw(self):
        """The backward RNN cell.
        """
        return self._cell_bw

    @property
    def state_size_fw(self):
        """The state size of the forward encoder cell.

        Same as :attr:`encoder.cell_fw.state_size`.
        """
        return self.cell_fw.state_size

    @property
    def state_size_bw(self):
        """The state size of the backward encoder cell.

        Same as :attr:`encoder.cell_bw.state_size`.
        """
        return self.cell_bw.state_size

    @property
    def output_layer_fw(self):
        """The output layer of the forward RNN.
        """
        return self._output_layer_fw

    @property
    def output_layer_bw(self):
        """The output layer of the backward RNN.
        """
        return self._output_layer_bw