This paper presents Layer Folding: an approach to reduce depth of a pre-trained deep neural network by identifying non-linear activations, such as ReLU, Tanh and Sigmoid, that can be removed, and next by folding the consequent linear layers, i.e. fully connected and convolutional, into one linear layer. The depth reduction can lead to smaller run times especially on edge devices. In addition, it can be shown that some tasks are characterized by so-called “Effective Degree of Non-Linearity (EDNL)”, which hints on how much model non-linear activations can be reduced without heavily compromising the model performance.
@article{BZRAV2022,
author = {Ben Dror, Amir and Zenghut, Niv and Raviv, Avraham and Artyomov, Evgeny and Vitek, Ran},
title = {Layer Folding: Neural Network Depth Reduction using Activation Linearization},
journal = {BMVC},
year = {2022},
}