当前位置: 代码迷 >> 综合 >> caffe Sigmoid cross entropy loss 交叉熵损失函数
  详细解决方案

caffe Sigmoid cross entropy loss 交叉熵损失函数

热度:81   发布时间:2024-01-11 15:13:13.0

Sigmoid 交叉熵损失函数(Sigmoid Cross Entropy Loss)

caffe <wbr>study(3) <wbr>关于激活函数以及loss <wbr>function

官方: loss ,

输入:

  1. 形状:   得分  , 这个层使用 sigmoid 函数   映射到概率分布 

  2. 形状:  标签 

输出:

  1. 形状:  计算公式: 

应用场景: 
预测目标概率分布

Parameters
bottom input Blob vector (length 2)
  1. $ (N \times C \times H \times W) $ the scores $ x \in [-\infty, +\infty]$, which this layer maps to probability predictions $ \hat{p}_n = \sigma(x_n) \in [0, 1] $ using the sigmoid function $ \sigma(.) $ (see SigmoidLayer).
  2. $ (N \times C \times H \times W) $ the targets $ y \in [0, 1] $
top output Blob vector (length 1)
  1. $ (1 \times 1 \times 1 \times 1) $ the computed cross-entropy loss: $ E = \frac{-1}{n} \sum\limits_{n=1}^N \left[ p_n \log \hat{p}_n + (1 - p_n) \log(1 - \hat{p}_n) \right] $


Computes the sigmoid cross-entropy loss error gradient w.r.t. the predictions.

Gradients cannot be computed with respect to the target inputs (bottom[1]), so this method ignores bottom[1] and requires !propagate_down[1], crashing if propagate_down[1] is set.

Parameters
top output Blob vector (length 1), providing the error gradient with respect to the outputs
  1. $ (1 \times 1 \times 1 \times 1) $ This Blob's diff will simply contain the loss_weight* $ \lambda $, as $ \lambda $ is the coefficient of this layer's output $\ell_i$ in the overall Netloss $ E = \lambda_i \ell_i + \mbox{other loss terms}$; hence $ \frac{\partial E}{\partial \ell_i} = \lambda_i $. (*Assuming that this top Blob is not used as a bottom (input) by any other layer of the Net.)
propagate_down see Layer::Backward. propagate_down[1] must be false as gradient computation with respect to the targets is not implemented.
bottom input Blob vector (length 2)
  1. $ (N \times C \times H \times W) $ the predictions $x$; Backward computes diff $ \frac{\partial E}{\partial x} = \frac{1}{n} \sum\limits_{n=1}^N (\hat{p}_n - p_n) $
  2. $ (N \times 1 \times 1 \times 1) $ the labels – ignored as we can't compute their error gradients
[cpp]  view plain  copy
  1. #include <algorithm>  
  2. #include <cfloat>  
  3. #include <vector>  
  4.   
  5. #include "caffe/layer.hpp"  
  6. #include "caffe/util/math_functions.hpp"  
  7. #include "caffe/vision_layers.hpp"  
  8.   
  9. // Computes the cross-entropy (logistic) loss ,it is often used for predicting targets interpreted   
  10. // as probabilities. Detailed reference to the official document:  
  11. // http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SigmoidCrossEntropyLossLayer.html  
  12.   
  13.   
  14. namespace caffe {  
  15.   
  16. template <typename Dtype>  
  17. void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(  
  18.     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  19.   LossLayer<Dtype>::LayerSetUp(bottom, top);  
  20.   sigmoid_bottom_vec_.clear();  
  21.   sigmoid_bottom_vec_.push_back(bottom[0]);  
  22.   sigmoid_top_vec_.clear();  
  23.   sigmoid_top_vec_.push_back(sigmoid_output_.get());  
  24.   sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);  
  25. }  
  26.   
  27. template <typename Dtype>  
  28. void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(  
  29.     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  30.   LossLayer<Dtype>::Reshape(bottom, top);  
  31.   CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<  
  32.       "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";  
  33.   sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);  
  34. }  
  35.   
  36. template <typename Dtype>  
  37. void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(  
  38.     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {  
  39.   // The forward pass computes the sigmoid outputs.  
  40.   sigmoid_bottom_vec_[0] = bottom[0];  
  41.   sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);  
  42.   // Compute the loss (negative log likelihood)  
  43.   const int count = bottom[0]->count();  
  44.   const int num = bottom[0]->num();  
  45.   // Stable version of loss computation from input data  
  46.   const Dtype* input_data = bottom[0]->cpu_data();  
  47.   const Dtype* target = bottom[1]->cpu_data();  
  48.   Dtype loss = 0;  
  49.   for (int i = 0; i < count; ++i) {  
  50.     loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -  
  51.         log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));  
  52.   }  
  53.   top[0]->mutable_cpu_data()[0] = loss / num;  
  54. }  
  55.   
  56. template <typename Dtype>  
  57. void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(  
  58.     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,  
  59.     const vector<Blob<Dtype>*>& bottom) {  
  60.   if (propagate_down[1]) {  
  61.     LOG(FATAL) << this->type()  
  62.                << " Layer cannot backpropagate to label inputs.";  
  63.   }  
  64.   if (propagate_down[0]) {  
  65.     // First, compute the diff  
  66.     const int count = bottom[0]->count();  
  67.     const int num = bottom[0]->num();  
  68.     const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();  
  69.     const Dtype* target = bottom[1]->cpu_data();  
  70.     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();  
  71.     caffe_sub(count, sigmoid_output_data, target, bottom_diff);  
  72.     // Scale down gradient  
  73.     const Dtype loss_weight = top[0]->cpu_diff()[0];  
  74.     caffe_scal(count, loss_weight / num, bottom_diff);  
  75.   }  
  76. }  
  77.   
  78. #ifdef CPU_ONLY  
  79. STUB_GPU_BACKWARD(SigmoidCrossEntropyLossLayer, Backward);  
  80. #endif  
  81.   
  82. INSTANTIATE_CLASS(SigmoidCrossEntropyLossLayer);  
  83. REGISTER_LAYER_CLASS(SigmoidCrossEntropyLoss);  
  84.   
  85. }  // namespace caffe  

  相关解决方案