Source code for texar.tf.losses.mle_losses

# Copyright 2018 The Texar Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Various losses
"""

import tensorflow as tf

from texar.tf.losses.losses_utils import mask_and_reduce, reduce_dimensions
from texar.tf.utils import shapes

# pylint: disable=invalid-name, not-context-manager, protected-access,
# pylint: disable=too-many-arguments

__all__ = [
    "sequence_softmax_cross_entropy",
    "sequence_sparse_softmax_cross_entropy",
    "sequence_sigmoid_cross_entropy",
    "binary_sigmoid_cross_entropy",
    "binary_sigmoid_cross_entropy_with_clas"
]


[docs]def sequence_softmax_cross_entropy(labels, logits, sequence_length, average_across_batch=True, average_across_timesteps=False, sum_over_batch=False, sum_over_timesteps=True, time_major=False, stop_gradient_to_label=False, name=None): """Computes softmax cross entropy for each time step of sequence predictions. Args: labels: Target class distributions. - If :attr:`time_major` is `False` (default), this must be a\ Tensor of shape `[batch_size, max_time, num_classes]`. - If `time_major` is `True`, this must be a Tensor of shape\ `[max_time, batch_size, num_classes]`. Each row of `labels` should be a valid probability distribution, otherwise, the computation of the gradient will be incorrect. logits: Unscaled log probabilities. This must have the shape of `[max_time, batch_size, num_classes]` or `[batch_size, max_time, num_classes]` according to the value of `time_major`. sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the respective sequence lengths will have zero losses. average_across_timesteps (bool): If set, average the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. average_across_batch (bool): If set, average the loss across the batch dimension. Must not set `average_across_batch`' and `sum_over_batch` at the same time. sum_over_timesteps (bool): If set, sum the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. sum_over_batch (bool): If set, sum the loss across the batch dimension. Must not set `average_across_batch` and `sum_over_batch` at the same time. time_major (bool): The shape format of the inputs. If `True`, :attr:`labels` and :attr:`logits` must have shape `[max_time, batch_size, ...]`. If `False` (default), they must have shape `[batch_size, max_time, ...]`. stop_gradient_to_label (bool): If set, gradient propagation to :attr:`labels` will be disabled. name (str, optional): A name for the operation. Returns: A Tensor containing the loss, of rank 0, 1, or 2 depending on the arguments :attr:`{average_across}/{sum_over}_{timesteps}/{batch}`. For example: - If :attr:`sum_over_timesteps` and :attr:`average_across_batch` \ are `True` (default), the return Tensor is of rank 0. - If :attr:`average_across_batch` is `True` and other arguments are \ `False`, the return Tensor is of shape `[max_time]`. """ with tf.name_scope(name, "sequence_softmax_cross_entropy"): if stop_gradient_to_label: labels = tf.stop_gradient(labels) losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels, logits=logits) losses = mask_and_reduce( losses, sequence_length, rank=2, average_across_batch=average_across_batch, average_across_timesteps=average_across_timesteps, sum_over_batch=sum_over_batch, sum_over_timesteps=sum_over_timesteps, time_major=time_major) return losses
[docs]def sequence_sparse_softmax_cross_entropy(labels, logits, sequence_length, average_across_batch=True, average_across_timesteps=False, sum_over_batch=False, sum_over_timesteps=True, time_major=False, name=None): """Computes sparse softmax cross entropy for each time step of sequence predictions. Args: labels: Target class indexes. I.e., classes are mutually exclusive (each entry is in exactly one class). - If :attr:`time_major` is `False` (default), this must be\ a Tensor of shape `[batch_size, max_time]`. - If `time_major` is `True`, this must be a Tensor of shape\ `[max_time, batch_size].` logits: Unscaled log probabilities. This must have the shape of `[max_time, batch_size, num_classes]` or `[batch_size, max_time, num_classes]` according to the value of `time_major`. sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the respective sequence lengths will have zero losses. average_across_timesteps (bool): If set, average the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. average_across_batch (bool): If set, average the loss across the batch dimension. Must not set `average_across_batch`' and `sum_over_batch` at the same time. sum_over_timesteps (bool): If set, sum the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. sum_over_batch (bool): If set, sum the loss across the batch dimension. Must not set `average_across_batch` and `sum_over_batch` at the same time. time_major (bool): The shape format of the inputs. If `True`, :attr:`labels` and :attr:`logits` must have shape `[max_time, batch_size, ...]`. If `False` (default), they must have shape `[batch_size, max_time, ...]`. name (str, optional): A name for the operation. Returns: A Tensor containing the loss, of rank 0, 1, or 2 depending on the arguments :attr:`{average_across}/{sum_over}_{timesteps}/{batch}`. For example: - If :attr:`sum_over_timesteps` and :attr:`average_across_batch` \ are `True` (default), the return Tensor is of rank 0. - If :attr:`average_across_batch` is `True` and other arguments are \ `False`, the return Tensor is of shape `[max_time]`. Example: .. code-block:: python embedder = WordEmbedder(vocab_size=data.vocab.size) decoder = BasicRNNDecoder(vocab_size=data.vocab.size) outputs, _, _ = decoder( decoding_strategy='train_greedy', inputs=embedder(data_batch['text_ids']), sequence_length=data_batch['length']-1) loss = sequence_sparse_softmax_cross_entropy( labels=data_batch['text_ids'][:, 1:], logits=outputs.logits, sequence_length=data_batch['length']-1) """ with tf.name_scope(name, "sequence_sparse_softmax_cross_entropy"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits) losses = mask_and_reduce( losses, sequence_length, rank=2, average_across_batch=average_across_batch, average_across_timesteps=average_across_timesteps, sum_over_batch=sum_over_batch, sum_over_timesteps=sum_over_timesteps, time_major=time_major) return losses
[docs]def sequence_sigmoid_cross_entropy(labels, logits, sequence_length, average_across_batch=True, average_across_timesteps=False, average_across_classes=True, sum_over_batch=False, sum_over_timesteps=True, sum_over_classes=False, time_major=False, stop_gradient_to_label=False, name=None): """Computes sigmoid cross entropy for each time step of sequence predictions. Args: labels: Target class distributions. - If :attr:`time_major` is `False` (default), this must be a\ Tensor of shape `[batch_size, max_time(, num_classes)]`. - If `time_major` is `True`, this must be a Tensor of shape\ `[max_time, batch_size(, num_classes)]`. Each row of `labels` should be a valid probability distribution, otherwise, the computation of the gradient will be incorrect. logits: Unscaled log probabilities having the same shape as with :attr:`labels`. sequence_length: A Tensor of shape `[batch_size]`. Time steps beyond the respective sequence lengths will have zero losses. average_across_timesteps (bool): If set, average the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. average_across_batch (bool): If set, average the loss across the batch dimension. Must not set `average_across_batch`' and `sum_over_batch` at the same time. average_across_classes (bool): If set, average the loss across the class dimension (if exists). Must not set `average_across_classes`' and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 2D Tensor. sum_over_timesteps (bool): If set, sum the loss across the time dimension. Must not set `average_across_timesteps` and `sum_over_timesteps` at the same time. sum_over_batch (bool): If set, sum the loss across the batch dimension. Must not set `average_across_batch` and `sum_over_batch` at the same time. sum_over_classes (bool): If set, sum the loss across the class dimension. Must not set `average_across_classes` and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 2D Tensor. time_major (bool): The shape format of the inputs. If `True`, :attr:`labels` and :attr:`logits` must have shape `[max_time, batch_size, ...]`. If `False` (default), they must have shape `[batch_size, max_time, ...]`. stop_gradient_to_label (bool): If set, gradient propagation to :attr:`labels` will be disabled. name (str, optional): A name for the operation. Returns: A Tensor containing the loss, of rank 0, 1, or 2 depending on the arguments :attr:`{average_across}/{sum_over}_{timesteps}/{batch}/{classes}`. For example, if the class dimension does not exist, and - If :attr:`sum_over_timesteps` and :attr:`average_across_batch` \ are `True` (default), the return Tensor is of rank 0. - If :attr:`average_across_batch` is `True` and other arguments are \ `False`, the return Tensor is of shape `[max_time]`. """ with tf.name_scope(name, "sequence_sigmoid_cross_entropy"): if stop_gradient_to_label: labels = tf.stop_gradient(labels) losses = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits) rank = shapes.get_rank(logits) or shapes.get_rank(labels) if rank is None: raise ValueError( 'Cannot determine the rank of `logits` or `labels`.') losses = mask_and_reduce( losses, sequence_length, rank=rank, average_across_batch=average_across_batch, average_across_timesteps=average_across_timesteps, average_across_remaining=average_across_classes, sum_over_batch=sum_over_batch, sum_over_timesteps=sum_over_timesteps, sum_over_remaining=sum_over_classes, time_major=time_major) return losses
[docs]def binary_sigmoid_cross_entropy(pos_logits=None, neg_logits=None, average_across_batch=True, average_across_classes=True, sum_over_batch=False, sum_over_classes=False, return_pos_neg_losses=False, name=None): """Computes sigmoid cross entropy of binary predictions. Args: pos_logits: The logits of predicting positive on positive data. A tensor of shape `[batch_size(, num_classes)]`. neg_logits: The logits of predicting positive on negative data. A tensor of shape `[batch_size(, num_classes)]`. average_across_batch (bool): If set, average the loss across the batch dimension. Must not set `average_across_batch`' and `sum_over_batch` at the same time. average_across_classes (bool): If set, average the loss across the class dimension (if exists). Must not set `average_across_classes`' and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 1D Tensor. sum_over_batch (bool): If set, sum the loss across the batch dimension. Must not set `average_across_batch` and `sum_over_batch` at the same time. sum_over_classes (bool): If set, sum the loss across the class dimension. Must not set `average_across_classes` and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 2D Tensor. return_pos_neg_losses (bool): If set, additionally returns the losses on :attr:`pos_logits` and :attr:`neg_logits`, respectively. name (str, optional): A name for the operation. Returns: By default, a Tensor containing the loss, of rank 0, 1, or 2 depending on the arguments :attr:`{average_across}/{sum_over}_{batch}/{classes}`. For example: - If :attr:`sum_over_batch` and :attr:`average_across_classes` \ are `True` (default), the return Tensor is of rank 0. - If arguments are `False`, the return Tensor is of shape \ `[batch_size(, num_classes)]`. If :attr:`return_pos_neg_losses` is `True`, returns a tuple `(loss, pos_loss, neg_loss)`, where `loss` is the loss above; `pos_loss` is the loss on `pos_logits` only; and `neg_loss` is the loss on `neg_logits` only. They have `loss = pos_loss + neg_loss`. """ with tf.name_scope(name, "binary_sigmoid_cross_entropy"): average_axes, sum_axes = [], [] average_axes += [0] if average_across_batch else [] average_axes += [1] if average_across_classes else [] sum_axes += [0] if sum_over_batch else [] sum_axes += [1] if sum_over_classes else [] pos_loss = 0 if pos_logits is not None: pos_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=pos_logits, labels=tf.ones_like(pos_logits)) pos_loss = reduce_dimensions(pos_loss, average_axes, sum_axes) neg_loss = 0 if neg_logits is not None: neg_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=neg_logits, labels=tf.zeros_like(neg_logits)) neg_loss = reduce_dimensions(neg_loss, average_axes, sum_axes) loss = pos_loss + neg_loss if return_pos_neg_losses: return loss, pos_loss, neg_loss else: return loss
[docs]def binary_sigmoid_cross_entropy_with_clas(clas_fn, pos_inputs=None, neg_inputs=None, average_across_batch=True, average_across_classes=True, sum_over_batch=False, sum_over_classes=False, return_pos_neg_losses=False, name=None): """Computes sigmoid cross entropy of binary classifier. .. role:: python(code) :language: python Args: clas_fn: A callable takes data (e.g., :attr:`pos_inputs` and :attr:`fake_inputs`) and returns the logits of being positive. The signature of `clas_fn` must be: :python:`logits (, ...) = clas_fn(inputs)`. The return value of `clas_fn` can be the logits, or a tuple where the logits are the first element. pos_inputs: The positive data fed into `clas_fn`. neg_inputs: The negative data fed into `clas_fn`. average_across_batch (bool): If set, average the loss across the batch dimension. Must not set `average_across_batch`' and `sum_over_batch` at the same time. average_across_classes (bool): If set, average the loss across the class dimension (if exists). Must not set `average_across_classes`' and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 1D Tensor. sum_over_batch (bool): If set, sum the loss across the batch dimension. Must not set `average_across_batch` and `sum_over_batch` at the same time. sum_over_classes (bool): If set, sum the loss across the class dimension. Must not set `average_across_classes` and `sum_over_classes` at the same time. Ignored if :attr:`logits` is a 2D Tensor. return_pos_neg_losses (bool): If set, additionally returns the losses on :attr:`pos_logits` and :attr:`neg_logits`, respectively. name (str, optional): A name for the operation. Returns: By default, a Tensor containing the loss, of rank 0, 1, or 2 depending on the arguments :attr:`{average_across}/{sum_over}_{batch}/{classes}`. For example: - If :attr:`sum_over_batch` and :attr:`average_across_classes` \ are `True` (default), the return Tensor is of rank 0. - If arguments are `False`, the return Tensor is of shape \ `[batch_size(, num_classes)]`. If :attr:`return_pos_neg_losses`=`True`, returns a tuple `(loss, pos_loss, neg_loss)`, where `loss` is the loss above; `pos_loss` is the loss on `pos_logits` only; and `neg_loss` is the loss on `neg_logits` only. They have `loss = pos_loss + neg_loss`. """ pos_logits = None if pos_inputs is not None: pos_logits = clas_fn(pos_inputs) if isinstance(pos_logits, (list, tuple)): pos_logits = pos_logits[0] neg_logits = None if neg_inputs is not None: neg_logits = clas_fn(neg_inputs) if isinstance(neg_logits, (list, tuple)): neg_logits = neg_logits[0] return binary_sigmoid_cross_entropy( pos_logits=pos_logits, neg_logits=neg_logits, average_across_batch=average_across_batch, average_across_classes=average_across_classes, sum_over_batch=sum_over_batch, sum_over_classes=sum_over_classes, return_pos_neg_losses=return_pos_neg_losses, name=name)