Source code for texar.tf.modules.classifiers.rnn_classifiers

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various RNN classifiers.
"""

import numpy as np

import tensorflow as tf
from tensorflow.contrib.framework import nest

from texar.tf.modules.classifiers.classifier_base import ClassifierBase
from texar.tf.modules.encoders.rnn_encoders import \
        UnidirectionalRNNEncoder, _forward_single_output_layer
from texar.tf.core import layers
from texar.tf.utils import utils, shapes
from texar.tf.hyperparams import HParams

# pylint: disable=too-many-arguments, invalid-name, no-member,
# pylint: disable=too-many-branches, too-many-locals, too-many-statements

__all__ = [
    "UnidirectionalRNNClassifier"
]

# def RNNClassifierBase(ClassifierBase):
#    """Base class inherited by all RNN classifiers.
#    """
#
#    def __init__(self, hparams=None):
#        ClassifierBase.__init__(self, hparams)


[docs]class UnidirectionalRNNClassifier(ClassifierBase):
    """One directional RNN classifier.
    This is a combination of the
    :class:`~texar.tf.modules.UnidirectionalRNNEncoder` with a classification
    layer. Both step-wise classification and sequence-level classification
    are supported, specified in :attr:`hparams`.

    Arguments are the same as in
    :class:`~texar.tf.modules.UnidirectionalRNNEncoder`.

    Args:
        cell: (RNNCell, optional) If not specified,
            a cell is created as specified in :attr:`hparams["rnn_cell"]`.
        cell_dropout_mode (optional): A Tensor taking value of
            :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, which
            toggles dropout in the RNN cell (e.g., activates dropout in
            TRAIN mode). If `None`, :func:`~texar.tf.global_mode` is used.
            Ignored if :attr:`cell` is given.
        output_layer (optional): An instance of
            :tf_main:`tf.layers.Layer <layers/Layer>`. Applies to the RNN cell
            output of each step. If `None` (default), the output layer is
            created as specified in :attr:`hparams["output_layer"]`.
        hparams (dict or HParams, optional): Hyperparameters. Missing
            hyperparamerter will be set to default values. See
            :meth:`default_hparams` for the hyperparameter sturcture and
            default values.

    .. document private functions
    .. automethod:: _build
    """

    def __init__(self,
                 cell=None,
                 cell_dropout_mode=None,
                 output_layer=None,
                 hparams=None):
        ClassifierBase.__init__(self, hparams)

        with tf.variable_scope(self.variable_scope):
            # Creates the underlying encoder
            encoder_hparams = utils.dict_fetch(
                hparams, UnidirectionalRNNEncoder.default_hparams())
            if encoder_hparams is not None:
                encoder_hparams['name'] = None
            self._encoder = UnidirectionalRNNEncoder(
                cell=cell,
                cell_dropout_mode=cell_dropout_mode,
                output_layer=output_layer,
                hparams=encoder_hparams)

            # Creates an additional classification layer if needed
            self._num_classes = self._hparams.num_classes
            if self._num_classes <= 0:
                self._logit_layer = None
            else:
                logit_kwargs = self._hparams.logit_layer_kwargs
                if logit_kwargs is None:
                    logit_kwargs = {}
                elif not isinstance(logit_kwargs, HParams):
                    raise ValueError(
                        "hparams['logit_layer_kwargs'] must be a dict.")
                else:
                    logit_kwargs = logit_kwargs.todict()
                logit_kwargs.update({"units": self._num_classes})
                if 'name' not in logit_kwargs:
                    logit_kwargs['name'] = "logit_layer"

                layer_hparams = {"type": "Dense", "kwargs": logit_kwargs}
                self._logit_layer = layers.get_layer(hparams=layer_hparams)

[docs]    @staticmethod
    def default_hparams():
        """Returns a dictionary of hyperparameters with default values.

        .. code-block:: python

            {
                # (1) Same hyperparameters as in UnidirectionalRNNEncoder
                ...

                # (2) Additional hyperparameters
                "num_classes": 2,
                "logit_layer_kwargs": None,
                "clas_strategy": "final_time",
                "max_seq_length": None,
                "name": "unidirectional_rnn_classifier"
            }

        Here:

        1. Same hyperparameters as in
        :class:`~texar.tf.modules.UnidirectionalRNNEncoder`.
        See the :meth:`~texar.tf.modules.UnidirectionalRNNEncoder.default_hparams`.
        An instance of UnidirectionalRNNEncoder is created for feature
        extraction.

        2. Additional hyperparameters:

            "num_classes": int
                Number of classes:

                - If **`> 0`**, an additional :tf_main:`Dense <layers/Dense>` \
                layer is appended to the encoder to compute the logits over \
                classes.
                - If **`<= 0`**, no dense layer is appended. The number of \
                classes is assumed to be the final dense layer size of the \
                encoder.

            "logit_layer_kwargs": dict
                Keyword arguments for the logit Dense layer constructor,
                except for argument "units" which is set to "num_classes".
                Ignored if no extra logit layer is appended.

            "clas_strategy": str
                The classification strategy, one of:

                - **"final_time"**: Sequence-leve classification based on \
                the output of the final time step. One sequence has one class.
                - **"all_time"**: Sequence-level classification based on \
                the output of all time steps. One sequence has one class.
                - **"time_wise"**: Step-wise classfication, i.e., make \
                classification for each time step based on its output.

            "max_seq_length": int, optional
                Maximum possible length of input sequences. Required if
                "clas_strategy" is "all_time".

            "name": str
                Name of the classifier.
        """
        hparams = UnidirectionalRNNEncoder.default_hparams()
        hparams.update({
            "num_classes": 2,
            "logit_layer_kwargs": None,
            "clas_strategy": "final_time",
            "max_seq_length": None,
            "name": "unidirectional_rnn_classifier"
        })
        return hparams

[docs]    def _build(self,
               inputs,
               sequence_length=None,
               initial_state=None,
               time_major=False,
               mode=None,
               **kwargs):
        """Feeds the inputs through the network and makes classification.

        The arguments are the same as in
        :class:`~texar.tf.modules.UnidirectionalRNNEncoder`.

        Args:
            inputs: A 3D Tensor of shape `[batch_size, max_time, dim]`.
                The first two dimensions
                `batch_size` and `max_time` may be exchanged if
                `time_major=True` is specified.
            sequence_length (optional): A 1D int tensor of shape `[batch_size]`.
                Sequence lengths
                of the batch inputs. Used to copy-through state and zero-out
                outputs when past a batch element's sequence length.
            initial_state (optional): Initial state of the RNN.
            time_major (bool): The shape format of the :attr:`inputs` and
                :attr:`outputs` Tensors. If `True`, these tensors are of shape
                `[max_time, batch_size, depth]`. If `False` (default),
                these tensors are of shape `[batch_size, max_time, depth]`.
            mode (optional): A tensor taking value in
                :tf_main:`tf.estimator.ModeKeys <estimator/ModeKeys>`, including
                `TRAIN`, `EVAL`, and `PREDICT`. Controls output layer dropout
                if the output layer is specified with :attr:`hparams`.
                If `None` (default), :func:`texar.tf.global_mode()`
                is used.
            return_cell_output (bool): Whether to return the output of the RNN
                cell. This is the results prior to the output layer.
            **kwargs: Optional keyword arguments of
                :tf_main:`tf.nn.dynamic_rnn <nn/dynamic_rnn>`,
                such as `swap_memory`, `dtype`, `parallel_iterations`, etc.

        Returns:
            A tuple `(logits, pred)`, containing the logits over classes and
            the predictions, respectively.

            - If "clas_strategy"=="final_time" or "all_time"

                - If "num_classes"==1, `logits` and `pred` are of both \
                shape `[batch_size]`
                - If "num_classes">1, `logits` is of shape \
                `[batch_size, num_classes]` and `pred` is of shape \
                `[batch_size]`.

            - If "clas_strategy"=="time_wise",

                - If "num_classes"==1, `logits` and `pred` are of both \
                shape `[batch_size, max_time]`
                - If "num_classes">1, `logits` is of shape \
                `[batch_size, max_time, num_classes]` and `pred` is of shape \
                `[batch_size, max_time]`.
                - If `time_major` is `True`, the batch and time dimensions are\
                exchanged.
        """
        enc_outputs, _, enc_output_size = self._encoder(
            inputs=inputs,
            sequence_length=sequence_length,
            initial_state=initial_state,
            time_major=time_major,
            mode=mode,
            return_output_size=True,
            **kwargs)

        # Flatten enc_outputs
        enc_outputs_flat = nest.flatten(enc_outputs)
        enc_output_size_flat = nest.flatten(enc_output_size)
        enc_output_dims_flat = [np.prod(xs) for xs in enc_output_size_flat]
        enc_outputs_flat = [shapes.flatten(x, 2, xs) for x, xs
                            in zip(enc_outputs_flat, enc_output_dims_flat)]
        if len(enc_outputs_flat) == 1:
            enc_outputs_flat = enc_outputs_flat[0]
        else:
            enc_outputs_flat = tf.concat(enc_outputs_flat, axis=2)

        # Compute logits
        stra = self._hparams.clas_strategy
        if stra == 'time_wise':
            logits = enc_outputs_flat
        elif stra == 'final_time':
            if time_major:
                logits = enc_outputs_flat[-1, :, :]
            else:
                logits = enc_outputs_flat[:, -1, :]
        elif stra == 'all_time':
            if self._logit_layer is None:
                raise ValueError(
                    'logit layer must not be `None` if '
                    'clas_strategy="all_time". Specify the logit layer by '
                    'either passing the layer in the constructor or '
                    'specifying the hparams.')
            if self._hparams.max_seq_length is None:
                raise ValueError(
                    'hparams.max_seq_length must not be `None` if '
                    'clas_strategy="all_time"')
        else:
            raise ValueError('Unknown classification strategy: {}'.format(stra))

        if self._logit_layer is not None:
            logit_input_dim = np.sum(enc_output_dims_flat)
            if stra == 'time_wise':
                logits, _ = _forward_single_output_layer(
                    logits, logit_input_dim, self._logit_layer)
            elif stra == 'final_time':
                logits = self._logit_layer(logits)
            elif stra == 'all_time':
                # Pad `enc_outputs_flat` to have max_seq_length before flatten
                length_diff = self._hparams.max_seq_length - tf.shape(inputs)[1]
                length_diff = tf.reshape(length_diff, [1, 1])
                # Set `paddings = [[0, 0], [0, length_dif], [0, 0]]`
                paddings = tf.pad(length_diff, paddings=[[1, 1], [1, 0]])
                logit_input = tf.pad(enc_outputs_flat, paddings=paddings)

                logit_input_dim *= self._hparams.max_seq_length
                logit_input = tf.reshape(logit_input, [-1, logit_input_dim])

                logits = self._logit_layer(logit_input)

        # Compute predications
        num_classes = self._hparams.num_classes
        is_binary = num_classes == 1
        is_binary = is_binary or (num_classes <= 0 and logits.shape[-1] == 1)

        if stra == 'time_wise':
            if is_binary:
                pred = tf.squeeze(tf.greater(logits, 0), -1)
                logits = tf.squeeze(logits, -1)
            else:
                pred = tf.argmax(logits, axis=-1)
        else:
            if is_binary:
                pred = tf.greater(logits, 0)
                logits = tf.reshape(logits, [-1])
            else:
                pred = tf.argmax(logits, axis=-1)
            pred = tf.reshape(pred, [-1])
        pred = tf.cast(pred, tf.int64)

        if not self._built:
            self._add_internal_trainable_variables()
            # Add trainable variables of `self._logit_layer`
            # which may be constructed externally.
            if self._logit_layer:
                self._add_trainable_variable(
                    self._logit_layer.trainable_variables)
            self._built = True

        return logits, pred

    @property
    def num_classes(self):
        """The number of classes, specified in :attr:`hparams`.
        """
        return self._hparams.num_classes