caffe Sigmoid cross entropy loss 交叉熵损失函数_综合

Sigmoid 交叉熵损失函数（Sigmoid Cross Entropy Loss）

官方： loss ,

输入：

形状: 得分 , 这个层使用 sigmoid 函数映射到概率分布
形状: 标签

输出：

形状：计算公式:

应用场景：
预测目标概率分布

Parameters

bottom	input Blob vector (length 2) $(N \times C \times H \times W)$ the scores $x \in [-\infty, +\infty]$ , which this layer maps to probability predictions $\hat{p}_n = \sigma(x_n) \in [0, 1]$ using the sigmoid function $\sigma(.)$ (see SigmoidLayer). $(N \times C \times H \times W)$ the targets $y \in [0, 1]$
top	output Blob vector (length 1) $(1 \times 1 \times 1 \times 1)$ the computed cross-entropy loss: $E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n) \right]$

Computes the sigmoid cross-entropy loss error gradient w.r.t. the predictions.

Gradients cannot be computed with respect to the target inputs (bottom[1]), so this method ignores bottom[1] and requires !propagate_down[1], crashing if propagate_down[1] is set.

Parameters

top	output Blob vector (length 1), providing the error gradient with respect to the outputs $(1 \times 1 \times 1 \times 1)$ This Blob's diff will simply contain the loss_weight* $\lambda$ , as $\lambda$ is the coefficient of this layer's output $\ell_i$ in the overall Netloss $E = \lambda_i \ell_i + \mbox{other loss terms}$ ; hence $\frac{\partial E}{\partial \ell_i} = \lambda_i$ . (*Assuming that this top Blob is not used as a bottom (input) by any other layer of the Net.)
propagate_down	see Layer::Backward. propagate_down[1] must be false as gradient computation with respect to the targets is not implemented.
bottom	input Blob vector (length 2) $(N \times C \times H \times W)$ the predictions ; Backward computes diff $\frac{\partial E}{\partial x} = \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n)$ $(N \times 1 \times 1 \times 1)$ the labels – ignored as we can't compute their error gradients

[cpp]  view plain 
      copy
#include <algorithm>  
#include <cfloat>  
#include <vector>  
  
#include "caffe/layer.hpp"  
#include "caffe/util/math_functions.hpp"  
#include "caffe/vision_layers.hpp"  
  
// Computes the cross-entropy (logistic) loss ,it is often used for predicting targets interpreted   
// as probabilities. Detailed reference to the official document:  
// http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SigmoidCrossEntropyLossLayer.html  
  
  
namespace caffe {  
  
template <typename Dtype>  
void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(  
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  LossLayer<Dtype>::LayerSetUp(bottom, top);  
  sigmoid_bottom_vec_.clear();  
  sigmoid_bottom_vec_.push_back(bottom[0]);  
  sigmoid_top_vec_.clear();  
  sigmoid_top_vec_.push_back(sigmoid_output_.get());  
  sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);  
}  
  
template <typename Dtype>  
void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(  
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  LossLayer<Dtype>::Reshape(bottom, top);  
  CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<  
      "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";  
  sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);  
}  
  
template <typename Dtype>  
void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(  
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  // The forward pass computes the sigmoid outputs.  
  sigmoid_bottom_vec_[0] = bottom[0];  
  sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);  
  // Compute the loss (negative log likelihood)  
  const int count = bottom[0]->count();  
  const int num = bottom[0]->num();  
  // Stable version of loss computation from input data  
  const Dtype* input_data = bottom[0]->cpu_data();  
  const Dtype* target = bottom[1]->cpu_data();  
  Dtype loss = 0;  
  for (int i = 0; i < count; ++i) {  
    loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -  
        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));  
  }  
  top[0]->mutable_cpu_data()[0] = loss / num;  
}  
  
template <typename Dtype>  
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(  
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,  
    const vector<Blob<Dtype>*>& bottom) {  
  if (propagate_down[1]) {  
    LOG(FATAL) << this->type()  
               << " Layer cannot backpropagate to label inputs.";  
  }  
  if (propagate_down[0]) {  
    // First, compute the diff  
    const int count = bottom[0]->count();  
    const int num = bottom[0]->num();  
    const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();  
    const Dtype* target = bottom[1]->cpu_data();  
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();  
    caffe_sub(count, sigmoid_output_data, target, bottom_diff);  
    // Scale down gradient  
    const Dtype loss_weight = top[0]->cpu_diff()[0];  
    caffe_scal(count, loss_weight / num, bottom_diff);  
  }  
}  
  
#ifdef CPU_ONLY  
STUB_GPU_BACKWARD(SigmoidCrossEntropyLossLayer, Backward);  
#endif  
  
INSTANTIATE_CLASS(SigmoidCrossEntropyLossLayer);  
REGISTER_LAYER_CLASS(SigmoidCrossEntropyLoss);  
  
}  // namespace caffe