DL Development Note

本文记录开发Deep Learning 库的开发文档

2021.1.26 2021.11.6 Programing 835 2 分钟

Layers

公共父类Layer

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96


#pragma once
#include <vector>
#include <string>
#include <map>
#include <cmath>
#include "core/base.h"
#include "core/utils/tensor.h"
#include "methods/neural_network/initializers/initializer.h"

using std::vector;
using std::string;
using std::map;

class Layer {
 public:
  Layer();
  virtual ~Layer();
  virtual void InitParams(Initializer& weights_initializer, Initializer& bias_initializere);
  virtual void InitParams(Initializer& weights_initializer);


  virtual void Eval(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top);

  virtual void Eval(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top, Layer* previous_layer);

  virtual void Eval(shared_ptr<Tensor4D>& X);
  /**
   * @brief To do the forward phase.
   *
   * @param bottom A container contains 4D tensor {num, channels, height, width} which represents the input of layer. Each 4D tensor in the container represents a input from the previous layer or data source.
   * @param top A container contains 4D tensor {num, channels, height, width} which represents the output of layer
   */
  virtual void Forward(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top);

  virtual void Forward(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top, Layer* previous_layer);

  virtual void Forward(shared_ptr<Tensor4D>& X);
  /**
   * @brief To do the backward phase.
   *
   */
  virtual void Backward(shared_ptr<Tensor4D>& bottom_grad, shared_ptr<Tensor4D>& top_grad);
  virtual void Backward(shared_ptr<Tensor4D>& bottom_grad, shared_ptr<Tensor4D>& top_grad, Layer* next_layer);
  virtual void Backward(shared_ptr<Tensor4D>& Y);
  virtual void Backward(); // used in loss layer
  virtual void LinkNext(shared_ptr<Layer>& next_layer);
  virtual void LinkPrevious(shared_ptr<Layer>& previous_layers);
  virtual shared_ptr<Tensor4D>& get_prediction();

  virtual string get_name();
  virtual void set_name(string name);
  virtual LayerParam* get_params();
  virtual LayerParam* get_params_grad();
  virtual void Reset();
  virtual void SetLossLayer();
  virtual void SetNotLossLayer();
  virtual void Summary();
  virtual void Backtrace();
  /* the data come from previous layer or input data */
  vector<shared_ptr<Tensor4D> > bottom_;
  vector<shared_ptr<Tensor4D> > top_;
  vector<shared_ptr<Tensor4D> > bottom_grads_;
  vector<shared_ptr<Tensor4D> > top_grads_;
  /* Store some additional coefficients in the layer */
  map<string, double> coeff_;
  map<string, vector<double> > coeff2_;
  vector<shared_ptr<Layer> > next_layers_;
  vector<Layer*> previous_layers_;

 protected:
  /* Activate the next layers evaluation*/
  virtual void NextEval();
  /* Activate the next layers forward*/
  virtual void NextForward();
  /* Activate the previous layers backward*/
  virtual void PreviousBackward();
  /* Layer's parameters contains weights and bias */
  LayerParam params_;
  /* Layer's parameters gradients */
  LayerParam params_grad_;

  LayerParam weights_;
  LayerParam bias_;

  /* Additional message need be stored in layer */
  vector<shared_ptr<Tensor4D> > msg_;
  /* The name of layer. Notice: the name of each layer must be different */
  string name_;
  /* indicate the layer is loss layer or not */
  bool loss_layer_;
  /* count the layer has been created */
  static int layer_count_;
};

shared_ptr<Layer>& operator >> (shared_ptr<Layer>& pre, shared_ptr<Layer>& next);
shared_ptr<Layer>& operator << (shared_ptr<Layer>& next, shared_ptr<Layer>& pre);

对于继成Layer的子类有几个需要注意的地方：

一般必须要实现三个函数：

1
2
3


    void Eval(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top) override;
    void Forward(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top) override;
    void Backward(shared_ptr<Tensor4D>& bottom_grad, shared_ptr<Tensor4D>& top_grad) override;

对于子类layer包含权重系数的必须要将系数append到父类变量params_,weights_, bias_：

1
2
3
4
5
6
7
8


  params_.Append(ParameterD(kernels_, size));
  weights_.Append(ParameterD(kernels_, size));
  use_bias_ = use_bias;
  if (use_bias) {
    b_ = new double [num_filters];
    params_.Append(ParameterD(b_, num_filters));
    bias_.Append(ParameterD(b_, num_filters));
  }

对于子类layer包含权重系数的必须要对以下两个参数coeff_["fan_in"], coeff_["fan_out"]进行赋值（用于系数初始化）

1
2


  coeff_["fan_in"] = in_channels * kernel_size_[0] * kernel_size_[1];
  coeff_["fan_out"] = num_filters * kernel_size_[0] * kernel_size_[1];

子类在Backward函数内要初始化params_grad_

1
2


  if (params_grad_.Size() == 0)
    params_grad_.Copy(params_, 0);

子类在Forward和Backward都需要将输入和输出进行保存和调用下一个layer

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


void Forward(shared_ptr<Tensor4D>& bottom, shared_ptr<Tensor4D>& top) {
  top_.push_back(top);
  bottom_.push_back(bottom);
  NextForward();
}

void Backward(shared_ptr<Tensor4D>& bottom_grad, shared_ptr<Tensor4D>& top_grad) {
  bottom_grad = input_grad;
  bottom_grads_.push_back(bottom_grad);
  PreviousBackward();
}

作者：liudy
链接：https://liudyboy.github.io/posts/dl-development-note/
许可：CC BY-NC-SA 4.0

deep learning