Source code for texar.tf.modules.encoders.rnn_encoders

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various RNN encoders.
"""

import functools
import numpy as np

import tensorflow as tf
from tensorflow.contrib.framework import nest

from texar.tf.modules.encoders.encoder_base import EncoderBase
from texar.tf.modules.networks.conv_networks import _to_list
from texar.tf.core import layers
from texar.tf.utils.mode import is_train_mode
from texar.tf.utils.shapes import mask_sequences
from texar.tf.hyperparams import HParams

# pylint: disable=too-many-arguments, too-many-locals, invalid-name, no-member

__all__ = [
    "_forward_single_output_layer",
    "RNNEncoderBase",
    "UnidirectionalRNNEncoder",
    "BidirectionalRNNEncoder"
]


def _default_output_layer_hparams():
    return {
        "num_layers": 0,
        "layer_size": 128,
        "activation": "identity",
        "final_layer_activation": None,
        "other_dense_kwargs": None,
        "dropout_layer_ids": [],
        "dropout_rate": 0.5,
        "variational_dropout": False,
        "@no_typecheck": ["activation", "final_layer_activation",
                          "layer_size", "dropout_layer_ids"]
    }


def _build_dense_output_layer(hparams):
    nlayers = hparams.num_layers

    if nlayers <= 0:
        return None

    layer_size = _to_list(
        hparams.layer_size, 'output_layer.layer_size', nlayers)

    other_kwargs = hparams.other_dense_kwargs or {}
    if isinstance(other_kwargs, HParams):
        other_kwargs = other_kwargs.todict()
    if not isinstance(other_kwargs, dict):
        raise ValueError(
            "hparams 'output_layer.other_dense_kwargs' must be a dict.")

    dense_layers = []
    for i in range(nlayers):
        if i == nlayers - 1:
            activation = hparams.final_layer_activation
        else:
            activation = hparams.activation

        kwargs_i = {"units": layer_size[i],
                    "activation": activation,
                    "name": "dense_%d" % (i + 1)}
        kwargs_i.update(other_kwargs)

        layer_hparams = {"type": "Dense", "kwargs": kwargs_i}
        dense_layers.append(layers.get_layer(hparams=layer_hparams))

    if len(dense_layers) == 1:
        dense_layers = dense_layers[0]

    return dense_layers


def _forward_single_output_layer(inputs, input_size, output_layer):
    """Forwards the input through a single output layer.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if
            :attr:`time_major=False`, or shape
            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.
        input_size: An `int` or 1D `int` array.
    """
    dim = np.prod(input_size)
    inputs_flat = inputs
    inputs_flat = tf.reshape(inputs_flat, [-1, dim])
    # Feed to the layer
    output_flat = output_layer(inputs_flat)
    output_size = output_layer.compute_output_shape([1, dim]).as_list()[1:]
    output_size = np.array(output_size)
    # Reshape output to [batch_size/max_time, max_time/batch_size] + output_size
    output_shape = tf.concat([tf.shape(inputs)[:2], output_size], axis=0)
    output = tf.reshape(output_flat, output_shape)
    return output, output_size


def _apply_dropout(inputs, time_major, hparams, training):
    """Applies dropout to the inputs.

    :attr:`inputs` is a Tensor of shape `[batch_size, max_time, dim]`
    if :attr:`time_major=False`, or shape `[max_time, batch_size, dim]`
    if :attr:`time_major=True`.
    """
    noise_shape = None
    if hparams.variational_dropout:
        if time_major:
            noise_shape = [1, None, None]
        else:
            noise_shape = [None, 1, None]
    return tf.layers.dropout(inputs, rate=hparams.dropout_rate,
                             noise_shape=noise_shape, training=training)


def _forward_output_layers(inputs, input_size, output_layer, time_major,
                           hparams, mode, sequence_length=None):
    """Forwards inputs through the output layers.

    Args:
        inputs: A Tensor of shape `[batch_size, max_time] + input_size` if
            :attr:`time_major=False`, or shape
            `[max_time, batch_size] + input_size` if :attr:`time_major=True`.

    Returns:
        A pair :attr:`(outputs, outputs_size), where

        - :attr:`outputs`: A Tensor of shape \
          `[batch_size, max_time] + outputs_size`.

        - :attr:`outputs_size`: An `int` or 1D `int` array representing the \
          output size.
    """
    if output_layer is None:
        return inputs, input_size

    if hparams is None:
        # output_layer was passed in from the constructor
        if isinstance(output_layer, (list, tuple)):
            raise ValueError('output_layer must not be a list or tuple.')
        output, output_size = _forward_single_output_layer(
            inputs, input_size, output_layer)
    else:
        # output_layer was built based on hparams
        output_layer = _to_list(output_layer)

        dropout_layer_ids = _to_list(hparams.dropout_layer_ids)
        if len(dropout_layer_ids) > 0:
            training = is_train_mode(mode)

        output = inputs
        output_size = input_size
        for i, layer in enumerate(output_layer):
            if i in dropout_layer_ids:
                output = _apply_dropout(output, time_major, hparams, training)
            output, output_size = _forward_single_output_layer(
                output, output_size, layer)

        if len(output_layer) in dropout_layer_ids:
            output = _apply_dropout(output, time_major, hparams, training)

    if sequence_length is not None:
        output = mask_sequences(
            output, sequence_length, time_major=time_major, tensor_rank=3)

    return output, output_size


def _apply_rnn_encoder_output_layer(output_layer, time_major, hparams, mode,
                                    cell_outputs, cell_output_size):
    map_func = functools.partial(
        _forward_output_layers,
        output_layer=output_layer,
        time_major=time_major,
        hparams=hparams,
        mode=mode)
    cell_outputs_flat = nest.flatten(cell_outputs)
    cell_output_size_flat = nest.flatten(cell_output_size)
    o = [map_func(inputs=x, input_size=xs)
         for x, xs in zip(cell_outputs_flat, cell_output_size_flat)]
    outputs_flat, output_size_flat = zip(*o)
    outputs = nest.pack_sequence_as(cell_outputs, outputs_flat)
    output_size = nest.pack_sequence_as(cell_outputs, output_size_flat)
    return outputs, output_size


[docs]class RNNEncoderBase(EncoderBase): """Base class for all RNN encoder classes to inherit. Args: hparams (dict or HParams, optional): Hyperparameters. Missing hyperparamerter will be set to default values. See :meth:`default_hparams` for the hyperparameter sturcture and default values. """ def __init__(self, hparams=None): EncoderBase.__init__(self, hparams)
[docs] @staticmethod def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. code-block:: python { "name": "rnn_encoder" } """ return { "name": "rnn_encoder" }
def _build(self, inputs, *args, **kwargs): """Encodes the inputs. Args: inputs: Inputs to the encoder. *args: Other arguments. **kwargs: Keyword arguments. Returns: Encoding results. """ raise NotImplementedError
[docs]class UnidirectionalRNNEncoder(RNNEncoderBase): """One directional RNN encoder. Args: cell: (RNNCell, optional) If not specified, a cell is created as specified in :attr:`hparams["rnn_cell"]`. cell_dropout_mode (optional): A Tensor taking value of :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which toggles dropout in the RNN cell (e.g., activates dropout in TRAIN mode). If `None`, :func:`~texar.tf.global_mode` is used. Ignored if :attr:`cell` is given. output_layer (optional): An instance of :tf_main:`tf.layers.Layer <layers/Layer>`. Applies to the RNN cell output of each step. If `None` (default), the output layer is created as specified in :attr:`hparams["output_layer"]`. hparams (dict or HParams, optional): Hyperparameters. Missing hyperparamerter will be set to default values. See :meth:`default_hparams` for the hyperparameter sturcture and default values. See :meth:`_build` for the inputs and outputs of the encoder. Example: .. code-block:: python # Use with embedder embedder = WordEmbedder(vocab_size, hparams=emb_hparams) encoder = UnidirectionalRNNEncoder(hparams=enc_hparams) outputs, final_state = encoder( inputs=embedder(data_batch['text_ids']), sequence_length=data_batch['length']) .. document private functions .. automethod:: _build """ def __init__(self, cell=None, cell_dropout_mode=None, output_layer=None, hparams=None): RNNEncoderBase.__init__(self, hparams) # Make RNN cell with tf.variable_scope(self.variable_scope): if cell is not None: self._cell = cell else: self._cell = layers.get_rnn_cell( self._hparams.rnn_cell, cell_dropout_mode) # Make output layer with tf.variable_scope(self.variable_scope): if output_layer is not None: self._output_layer = output_layer self._output_layer_hparams = None else: self._output_layer = _build_dense_output_layer( self._hparams.output_layer) self._output_layer_hparams = self._hparams.output_layer
[docs] @staticmethod def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. code-block:: python { "rnn_cell": default_rnn_cell_hparams(), "output_layer": { "num_layers": 0, "layer_size": 128, "activation": "identity", "final_layer_activation": None, "other_dense_kwargs": None, "dropout_layer_ids": [], "dropout_rate": 0.5, "variational_dropout": False }, "name": "unidirectional_rnn_encoder" } Here: "rnn_cell": dict A dictionary of RNN cell hyperparameters. Ignored if :attr:`cell` is given to the encoder constructor. The default value is defined in :func:`~texar.tf.core.default_rnn_cell_hparams`. "output_layer": dict Output layer hyperparameters. Ignored if :attr:`output_layer` is given to the encoder constructor. Includes: "num_layers": int The number of output (dense) layers. Set to 0 to avoid any output layers applied to the cell outputs.. "layer_size": int or list The size of each of the output (dense) layers. If an `int`, each output layer will have the same size. If a list, the length must equal to :attr:`num_layers`. "activation": str or callable or None Activation function for each of the output (dense) layer except for the final layer. This can be a function, or its string name or module path. If function name is given, the function must be from module :tf_main:`tf.nn <nn>` or :tf_main:`tf < >`. For example .. code-block:: python "activation": "relu" # function name "activation": "my_module.my_activation_fn" # module path "activation": my_module.my_activation_fn # function Default is `None` which maintains a linear activation. "final_layer_activation": str or callable or None The activation function for the final output layer. "other_dense_kwargs": dict or None Other keyword arguments to construct each of the output dense layers, e.g., `use_bias`. See :tf_main:`Dense <layers/Dense>` for the keyword arguments. "dropout_layer_ids": int or list The indexes of layers (starting from `0`) whose inputs are applied with dropout. The index = :attr:`num_layers` means dropout applies to the final layer output. E.g., .. code-block:: python { "num_layers": 2, "dropout_layer_ids": [0, 2] } will leads to a series of layers as `-dropout-layer0-layer1-dropout-`. The dropout mode (training or not) is controlled by the :attr:`mode` argument of :meth:`_build`. "dropout_rate": float The dropout rate, between 0 and 1. E.g., `"dropout_rate": 0.1` would drop out 10% of elements. "variational_dropout": bool Whether the dropout mask is the same across all time steps. "name": str Name of the encoder """ hparams = RNNEncoderBase.default_hparams() hparams.update({ "rnn_cell": layers.default_rnn_cell_hparams(), "output_layer": _default_output_layer_hparams(), "name": "unidirectional_rnn_encoder" }) return hparams
[docs] def _build(self, inputs, sequence_length=None, initial_state=None, time_major=False, mode=None, return_cell_output=False, return_output_size=False, **kwargs): """Encodes the inputs. Args: inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`. The first two dimensions :attr:`batch_size` and :attr:`max_time` are exchanged if :attr:`time_major=True` is specified. sequence_length (optional): A 1D int tensor of shape `[batch_size]`. Sequence lengths of the batch inputs. Used to copy-through state and zero-out outputs when past a batch element's sequence length. initial_state (optional): Initial state of the RNN. time_major (bool): The shape format of the :attr:`inputs` and :attr:`outputs` Tensors. If `True`, these tensors are of shape `[max_time, batch_size, depth]`. If `False` (default), these tensors are of shape `[batch_size, max_time, depth]`. mode (optional): A tensor taking value in :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout if the output layer is specified with :attr:`hparams`. If `None` (default), :func:`texar.tf.global_mode` is used. return_cell_output (bool): Whether to return the output of the RNN cell. This is the results prior to the output layer. return_output_size (bool): Whether to return the size of the output (i.e., the results after output layers). **kwargs: Optional keyword arguments of :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`, such as `swap_memory`, `dtype`, `parallel_iterations`, etc. Returns: - By default (both `return_cell_output` and \ `return_output_size` are False), returns a pair \ :attr:`(outputs, final_state)` - :attr:`outputs`: The RNN output tensor by the output layer \ (if exists) or the RNN cell (otherwise). The tensor is of \ shape `[batch_size, max_time, output_size]` if \ `time_major` is False, or \ `[max_time, batch_size, output_size]` if \ `time_major` is True. \ If RNN cell output is a (nested) tuple of Tensors, then the \ :attr:`outputs` will be a (nested) tuple having the same \ nest structure as the cell output. - :attr:`final_state`: The final state of the RNN, which is a \ Tensor of shape `[batch_size] + cell.state_size` or \ a (nested) tuple of Tensors if `cell.state_size` is a (nested)\ tuple. - If `return_cell_output` is True, returns a triple \ :attr:`(outputs, final_state, cell_outputs)` - :attr:`cell_outputs`: The outputs by the RNN cell prior to \ the \ output layer, having the same structure with :attr:`outputs` \ except for the `output_dim`. - If `return_output_size` is `True`, returns a tuple \ :attr:`(outputs, final_state, output_size)` - :attr:`output_size`: A (possibly nested tuple of) int \ representing the size of :attr:`outputs`. If a single int or \ an int array, then `outputs` has shape \ `[batch/time, time/batch] + output_size`. If \ a (nested) tuple, then `output_size` has the same \ structure as with `outputs`. - If both `return_cell_output` and \ `return_output_size` are True, returns \ :attr:`(outputs, final_state, cell_outputs, output_size)`. """ if ('dtype' not in kwargs) and (initial_state is None): cell_outputs, state = tf.nn.dynamic_rnn( cell=self._cell, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state, time_major=time_major, dtype=tf.float32, **kwargs) else: cell_outputs, state = tf.nn.dynamic_rnn( cell=self._cell, inputs=inputs, sequence_length=sequence_length, initial_state=initial_state, time_major=time_major, **kwargs) outputs, output_size = _apply_rnn_encoder_output_layer( self._output_layer, time_major, self._output_layer_hparams, mode, cell_outputs, self._cell.output_size) if not self._built: self._add_internal_trainable_variables() # Add trainable variables of `self._cell` and `self._output_layer` # which may be constructed externally. self._add_trainable_variable( layers.get_rnn_cell_trainable_variables(self._cell)) if self._output_layer and \ not isinstance(self._output_layer, (list, tuple)): self._add_trainable_variable( self._output_layer.trainable_variables) self._built = True rets = (outputs, state) if return_cell_output: rets += (cell_outputs, ) if return_output_size: rets += (output_size, ) return rets
@property def cell(self): """The RNN cell. """ return self._cell @property def state_size(self): """The state size of encoder cell. Same as :attr:`encoder.cell.state_size`. """ return self.cell.state_size @property def output_layer(self): """The output layer. """ return self._output_layer
[docs]class BidirectionalRNNEncoder(RNNEncoderBase): """Bidirectional forward-backward RNN encoder. Args: cell_fw (RNNCell, optional): The forward RNN cell. If not given, a cell is created as specified in :attr:`hparams["rnn_cell_fw"]`. cell_bw (RNNCell, optional): The backward RNN cell. If not given, a cell is created as specified in :attr:`hparams["rnn_cell_bw"]`. cell_dropout_mode (optional): A tensor taking value of :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which toggles dropout in the RNN cells (e.g., activates dropout in TRAIN mode). If `None`, :func:`~texar.tf.global_mode()` is used. Ignored if respective cell is given. output_layer_fw (optional): An instance of :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the forward RNN cell output of each step. If `None` (default), the output layer is created as specified in :attr:`hparams["output_layer_fw"]`. output_layer_bw (optional): An instance of :tf_main:`tf.layers.Layer <layers/Layer>`. Apply to the backward RNN cell output of each step. If `None` (default), the output layer is created as specified in :attr:`hparams["output_layer_bw"]`. hparams (dict or HParams, optional): Hyperparameters. Missing hyperparamerter will be set to default values. See :meth:`default_hparams` for the hyperparameter sturcture and default values. See :meth:`_build` for the inputs and outputs of the encoder. Example: .. code-block:: python # Use with embedder embedder = WordEmbedder(vocab_size, hparams=emb_hparams) encoder = BidirectionalRNNEncoder(hparams=enc_hparams) outputs, final_state = encoder( inputs=embedder(data_batch['text_ids']), sequence_length=data_batch['length']) # outputs == (outputs_fw, outputs_bw) # final_state == (final_state_fw, final_state_bw) .. document private functions .. automethod:: _build """ def __init__(self, cell_fw=None, cell_bw=None, cell_dropout_mode=None, output_layer_fw=None, output_layer_bw=None, hparams=None): RNNEncoderBase.__init__(self, hparams) # Make RNN cells with tf.variable_scope(self.variable_scope): if cell_fw is not None: self._cell_fw = cell_fw else: self._cell_fw = layers.get_rnn_cell( self._hparams.rnn_cell_fw, cell_dropout_mode) if cell_bw is not None: self._cell_bw = cell_bw elif self._hparams.rnn_cell_share_config: self._cell_bw = layers.get_rnn_cell( self._hparams.rnn_cell_fw, cell_dropout_mode) else: self._cell_bw = layers.get_rnn_cell( self._hparams.rnn_cell_bw, cell_dropout_mode) # Make output layers with tf.variable_scope(self.variable_scope): if output_layer_fw is not None: self._output_layer_fw = output_layer_fw self._output_layer_hparams_fw = None else: self._output_layer_fw = _build_dense_output_layer( self._hparams.output_layer_fw) self._output_layer_hparams_fw = self._hparams.output_layer_fw if output_layer_bw is not None: self._output_layer_bw = output_layer_bw self._output_layer_hparams_bw = None elif self._hparams.output_layer_share_config: self._output_layer_bw = _build_dense_output_layer( self._hparams.output_layer_fw) self._output_layer_hparams_bw = self._hparams.output_layer_fw else: self._output_layer_bw = _build_dense_output_layer( self._hparams.output_layer_bw) self._output_layer_hparams_bw = self._hparams.output_layer_bw
[docs] @staticmethod def default_hparams(): """Returns a dictionary of hyperparameters with default values. .. code-block:: python { "rnn_cell_fw": default_rnn_cell_hparams(), "rnn_cell_bw": default_rnn_cell_hparams(), "rnn_cell_share_config": True, "output_layer_fw": { "num_layers": 0, "layer_size": 128, "activation": "identity", "final_layer_activation": None, "other_dense_kwargs": None, "dropout_layer_ids": [], "dropout_rate": 0.5, "variational_dropout": False }, "output_layer_bw": { # Same hyperparams and default values as "output_layer_fw" # ... }, "output_layer_share_config": True, "name": "bidirectional_rnn_encoder" } Here: "rnn_cell_fw": dict Hyperparameters of the forward RNN cell. Ignored if :attr:`cell_fw` is given to the encoder constructor. The default value is defined in :func:`~texar.tf.core.default_rnn_cell_hparams`. "rnn_cell_bw": dict Hyperparameters of the backward RNN cell. Ignored if :attr:`cell_bw` is given to the encoder constructor , or if :attr:`"rnn_cell_share_config"` is `True`. The default value is defined in :meth:`~texar.tf.core.default_rnn_cell_hparams`. "rnn_cell_share_config": bool Whether share hyperparameters of the backward cell with the forward cell. Note that the cell parameters (variables) are not shared. "output_layer_fw": dict Hyperparameters of the forward output layer. Ignored if :attr:`output_layer_fw` is given to the constructor. See the "output_layer" field of :meth:`~texar.tf.modules.UnidirectionalRNNEncoder.default_hparams` for details. "output_layer_bw": dict Hyperparameters of the backward output layer. Ignored if :attr:`output_layer_bw` is given to the constructor. Have the same structure and defaults with :attr:`"output_layer_fw"`. Ignored if :attr:`"output_layer_share_config"` is True. "output_layer_share_config": bool Whether share hyperparameters of the backward output layer with the forward output layer. Note that the layer parameters (variables) are not shared. "name": str Name of the encoder """ hparams = RNNEncoderBase.default_hparams() hparams.update({ "rnn_cell_fw": layers.default_rnn_cell_hparams(), "rnn_cell_bw": layers.default_rnn_cell_hparams(), "rnn_cell_share_config": True, "output_layer_fw": _default_output_layer_hparams(), "output_layer_bw": _default_output_layer_hparams(), "output_layer_share_config": True, "name": "bidirectional_rnn_encoder" }) return hparams
[docs] def _build(self, inputs, sequence_length=None, initial_state_fw=None, initial_state_bw=None, time_major=False, mode=None, return_cell_output=False, return_output_size=False, **kwargs): """Encodes the inputs. Args: inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`. The first two dimensions `batch_size` and `max_time` may be exchanged if `time_major=True` is specified. sequence_length (optional): A 1D int tensor of shape `[batch_size]`. Sequence lengths of the batch inputs. Used to copy-through state and zero-out outputs when past a batch element's sequence length. initial_state (optional): Initial state of the RNN. time_major (bool): The shape format of the :attr:`inputs` and :attr:`outputs` Tensors. If `True`, these tensors are of shape `[max_time, batch_size, depth]`. If `False` (default), these tensors are of shape `[batch_size, max_time, depth]`. mode (optional): A tensor taking value in :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout if the output layer is specified with :attr:`hparams`. If `None` (default), :func:`texar.tf.global_mode()` is used. return_cell_output (bool): Whether to return the output of the RNN cell. This is the results prior to the output layer. **kwargs: Optional keyword arguments of :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`, such as `swap_memory`, `dtype`, `parallel_iterations`, etc. Returns: - By default (both `return_cell_output` and `return_output_size` \ are False), returns a pair :attr:`(outputs, final_state)` - :attr:`outputs`: A tuple `(outputs_fw, outputs_bw)` \ containing \ the forward and the backward RNN outputs, each of which is of \ shape `[batch_size, max_time, output_dim]` if \ `time_major` is False, or \ `[max_time, batch_size, output_dim]` if \ `time_major` is True. \ If RNN cell output is a (nested) tuple of Tensors, then \ `outputs_fw` and `outputs_bw` will be a (nested) tuple having \ the same structure as the cell output. - :attr:`final_state`: A tuple \ `(final_state_fw, final_state_bw)` \ containing the final states of the forward and backward \ RNNs, each of which is a \ Tensor of shape `[batch_size] + cell.state_size`, or \ a (nested) tuple of Tensors if `cell.state_size` is a (nested)\ tuple. - If `return_cell_output` is True, returns a triple \ :attr:`(outputs, final_state, cell_outputs)` where - :attr:`cell_outputs`: A tuple \ `(cell_outputs_fw, cell_outputs_bw)` containting the outputs \ by the forward and backward RNN cells prior to the \ output layers, having the same structure with :attr:`outputs` \ except for the `output_dim`. - If `return_output_size` is True, returns a tuple \ :attr:`(outputs, final_state, output_size)` where - :attr:`output_size`: A tupple \ `(output_size_fw, output_size_bw)` containing the size of \ `outputs_fw` and `outputs_bw`, respectively. \ Take `*_fw` for example, \ `output_size_fw` is a (possibly nested tuple of) int. \ If a single int or an int array, then `outputs_fw` has shape \ `[batch/time, time/batch] + output_size_fw`. If \ a (nested) tuple, then `output_size_fw` has the same \ structure as with `outputs_fw`. The same applies to \ `output_size_bw`. - If both `return_cell_output` and \ `return_output_size` are True, returns \ :attr:`(outputs, final_state, cell_outputs, output_size)`. """ no_initial_state = initial_state_fw is None and initial_state_bw is None if ('dtype' not in kwargs) and no_initial_state: cell_outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=self._cell_fw, cell_bw=self._cell_bw, inputs=inputs, sequence_length=sequence_length, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, time_major=time_major, dtype=tf.float32, **kwargs) else: cell_outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=self._cell_fw, cell_bw=self._cell_bw, inputs=inputs, sequence_length=sequence_length, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, time_major=time_major, **kwargs) outputs_fw, output_size_fw = _apply_rnn_encoder_output_layer( self._output_layer_fw, time_major, self._output_layer_hparams_fw, mode, cell_outputs[0], self._cell_fw.output_size) outputs_bw, output_size_bw = _apply_rnn_encoder_output_layer( self._output_layer_bw, time_major, self._output_layer_hparams_bw, mode, cell_outputs[1], self._cell_bw.output_size) outputs = (outputs_fw, outputs_bw) output_size = (output_size_fw, output_size_bw) if not self._built: self._add_internal_trainable_variables() # Add trainable variables of cells and output layers # which may be constructed externally. self._add_trainable_variable( layers.get_rnn_cell_trainable_variables(self._cell_fw)) self._add_trainable_variable( layers.get_rnn_cell_trainable_variables(self._cell_bw)) if self._output_layer_fw and \ not isinstance(self._output_layer_fw, (list, tuple)): self._add_trainable_variable( self._output_layer_fw.trainable_variables) if self._output_layer_bw and \ not isinstance(self._output_layer_bw, (list, tuple)): self._add_trainable_variable( self._output_layer_bw.trainable_variables) self._built = True returns = (outputs, states) if return_cell_output: returns += (cell_outputs, ) if return_output_size: returns += (output_size, ) return returns
@property def cell_fw(self): """The forward RNN cell. """ return self._cell_fw @property def cell_bw(self): """The backward RNN cell. """ return self._cell_bw @property def state_size_fw(self): """The state size of the forward encoder cell. Same as :attr:`encoder.cell_fw.state_size`. """ return self.cell_fw.state_size @property def state_size_bw(self): """The state size of the backward encoder cell. Same as :attr:`encoder.cell_bw.state_size`. """ return self.cell_bw.state_size @property def output_layer_fw(self): """The output layer of the forward RNN. """ return self._output_layer_fw @property def output_layer_bw(self): """The output layer of the backward RNN. """ return self._output_layer_bw