Comments (10)
Hi,
Can you please share the prototxt of the model?
Thanks,
from caffe-jacinto.
Hi Manu,
Below is my train.prototxt.
I have tried to set both True and False for "in_place" parameter of bathnorm's but it doesn't seem to make a difference.
name: "train"
layer {
name: "data"
type: "ImageLabelListData"
top: "data"
top: "label"
transform_param {
mirror: true
crop_size: 72
mean_value: 0.0
}
image_label_data_param {
image_list_path: "train_image_list.txt"
label_list_path: "train_label_list.txt"
batch_size: 128
shuffle: true
scale_prob: 0.5
scale_min: 0.75
scale_max: 1.25
threads: 1
}
}
layer {
name: "data/bias"
type: "Bias"
bottom: "data"
top: "data/bias"
param {
lr_mult: 0.0
decay_mult: 0.0
}
bias_param {
filler {
type: "constant"
value: -128.0
}
}
}
layer {
name: "conv1a"
type: "Convolution"
bottom: "data/bias"
top: "conv1a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
bias_term: true
pad: 1
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "conv1a/bn"
type: "BatchNorm"
bottom: "conv1a"
top: "conv1a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1a/relu"
type: "ReLU"
bottom: "conv1a/bn"
top: "conv1a/bn"
}
layer {
name: "down1"
type: "Convolution"
bottom: "conv1a/bn"
top: "down1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
bias_term: true
pad: 1
kernel_size: 3
group: 4
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "down1/bn"
type: "BatchNorm"
bottom: "down1"
top: "down1/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "down1/relu"
type: "ReLU"
bottom: "down1/bn"
top: "down1/bn"
}
layer {
name: "conv2a"
type: "Convolution"
bottom: "down1/bn"
top: "conv2a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
bias_term: true
pad: 2
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv2a/bn"
type: "BatchNorm"
bottom: "conv2a"
top: "conv2a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv2a/relu"
type: "ReLU"
bottom: "conv2a/bn"
top: "conv2a/bn"
}
layer {
name: "conv3a"
type: "Convolution"
bottom: "conv2a/bn"
top: "conv3a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
bias_term: true
pad: 2
kernel_size: 3
group: 4
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv3a/bn"
type: "BatchNorm"
bottom: "conv3a"
top: "conv3a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv3a/relu"
type: "ReLU"
bottom: "conv3a/bn"
top: "conv3a/bn"
}
layer {
name: "res1_sum"
type: "Eltwise"
bottom: "down1/bn"
bottom: "conv3a/bn"
top: "res1_sum"
}
layer {
name: "down2"
type: "Convolution"
bottom: "res1_sum"
top: "down2"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
group: 4
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "down2/bn"
type: "BatchNorm"
bottom: "down2"
top: "down2/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "down2/relu"
type: "ReLU"
bottom: "down2/bn"
top: "down2/bn"
}
layer {
name: "conv4a"
type: "Convolution"
bottom: "down2/bn"
top: "conv4a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 2
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv4a/bn"
type: "BatchNorm"
bottom: "conv4a"
top: "conv4a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv4a/relu"
type: "ReLU"
bottom: "conv4a/bn"
top: "conv4a/bn"
}
layer {
name: "conv5a"
type: "Convolution"
bottom: "conv4a/bn"
top: "conv5a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 2
kernel_size: 3
group: 4
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv5a/bn"
type: "BatchNorm"
bottom: "conv5a"
top: "conv5a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv5a/relu"
type: "ReLU"
bottom: "conv5a/bn"
top: "conv5a/bn"
}
layer {
name: "res2_sum"
type: "Eltwise"
bottom: "down2/bn"
bottom: "conv5a/bn"
top: "res2_sum"
}
layer {
name: "conv6a"
type: "Convolution"
bottom: "res2_sum"
top: "conv6a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 2
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv6a/bn"
type: "BatchNorm"
bottom: "conv6a"
top: "conv6a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv6a/relu"
type: "ReLU"
bottom: "conv6a/bn"
top: "conv6a/bn"
}
layer {
name: "conv7a"
type: "Convolution"
bottom: "conv6a/bn"
top: "conv7a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 2
kernel_size: 3
group: 4
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv7a/bn"
type: "BatchNorm"
bottom: "conv7a"
top: "conv7a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv7a/relu"
type: "ReLU"
bottom: "conv7a/bn"
top: "conv7a/bn"
}
layer {
name: "res3_sum"
type: "Eltwise"
bottom: "res2_sum"
bottom: "conv7a/bn"
top: "res3_sum"
}
layer {
name: "down3"
type: "Convolution"
bottom: "res2_sum"
top: "down3"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
bias_term: true
pad: 1
kernel_size: 3
group: 4
stride: 2
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "down3/bn"
type: "BatchNorm"
bottom: "down3"
top: "down3/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "down3/relu"
type: "ReLU"
bottom: "down3/bn"
top: "down3/bn"
}
layer {
name: "conv6b"
type: "Convolution"
bottom: "down3/bn"
top: "conv6b"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
bias_term: true
pad: 2
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv6b/bn"
type: "BatchNorm"
bottom: "conv6b"
top: "conv6b/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv6b/relu"
type: "ReLU"
bottom: "conv6b/bn"
top: "conv6b/bn"
}
layer {
name: "conv7b"
type: "Convolution"
bottom: "conv6b/bn"
top: "conv7b"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 128
bias_term: true
pad: 2
kernel_size: 3
group: 4
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 2
}
}
layer {
name: "conv7b/bn"
type: "BatchNorm"
bottom: "conv7b"
top: "conv7b/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv7b/relu"
type: "ReLU"
bottom: "conv7b/bn"
top: "conv7b/bn"
}
layer {
name: "res4_sum"
type: "Eltwise"
bottom: "down3/bn"
bottom: "conv7b/bn"
top: "res4_sum"
}
layer {
name: "conv8b"
type: "Convolution"
bottom: "res4_sum"
top: "conv8b"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "conv8b/bn"
type: "BatchNorm"
bottom: "conv8b"
top: "conv8b/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv8b/relu"
type: "ReLU"
bottom: "conv8b/bn"
top: "conv8b/bn"
}
layer {
name: "deconv3"
type: "Deconvolution"
bottom: "conv8b/bn"
top: "deconv3"
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 4
group: 64
stride: 2
weight_filler {
type: "bilinear"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "concat"
type: "Concat"
bottom: "res3_sum"
bottom: "deconv3"
top: "concat"
}
layer {
name: "conv_final1a"
type: "Convolution"
bottom: "concat"
top: "conv_final1a"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "conv_final1a/bn"
type: "BatchNorm"
bottom: "conv_final1a"
top: "conv_final1a/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv_final1a/relu"
type: "ReLU"
bottom: "conv_final1a/bn"
top: "conv_final1a/bn"
}
layer {
name: "conv_final1b"
type: "Convolution"
bottom: "conv_final1a/bn"
top: "conv_final1b"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 64
bias_term: true
pad: 1
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "conv_final1b/bn"
type: "BatchNorm"
bottom: "conv_final1b"
top: "conv_final1b/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv_final1b/relu"
type: "ReLU"
bottom: "conv_final1b/bn"
top: "conv_final1b/bn"
}
layer {
name: "conv_final1c"
type: "Convolution"
bottom: "conv_final1b/bn"
top: "conv_final1c"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 7
bias_term: true
pad: 1
kernel_size: 3
group: 1
stride: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0.0
}
dilation: 1
}
}
layer {
name: "conv_final1c/bn"
type: "BatchNorm"
bottom: "conv_final1c"
top: "conv_final1c/bn"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv_final1c/relu"
type: "ReLU"
bottom: "conv_final1c/bn"
top: "conv_final1c/bn"
}
layer {
name: "deconv2"
type: "Deconvolution"
bottom: "conv_final1c/bn"
top: "deconv2"
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 7
bias_term: true
pad: 1
kernel_size: 4
group: 7
stride: 2
weight_filler {
type: "bilinear"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "deconv1"
type: "Deconvolution"
bottom: "deconv2"
top: "deconv1"
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 7
bias_term: true
pad: 1
kernel_size: 4
group: 7
stride: 2
weight_filler {
type: "bilinear"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "deconv1"
bottom: "label"
top: "loss"
propagate_down: true
propagate_down: false
loss_param {
normalization: VALID
}
}
layer {
name: "accuracy/top1"
type: "Accuracy"
bottom: "deconv1"
bottom: "label"
top: "accuracy/top1"
include {
phase: TEST
}
}
from caffe-jacinto.
If you notice the latest prototxt, you will notice that I am now using ImageLabelData and not ImageLabelListData. This is because ImageLabelData is significantly faster. But the downside is that you need to prepare LMDBs for it.
Try training with ImageLabelData and let me know if there is a speed improvement. That that will help to identify where the problem is. caffe-0.16 has some changes in the way data is loaded.
from caffe-jacinto.
Hi Manu,
I have tried training with ImageLabelData and LMDB. The training speed does improve a little bit but is still slower than caffe-0.15.
When I monitor the GPU utilization, caffe-0.15 runs at >75% utilization while caffe-0.16 fluctuates between 35%-65%.
William
from caffe-jacinto.
I am not sure what is happening, but let us try to analyze.
Improved BatchNorm layer is a key difference between caffe-0.15 and caffe-0.16. In the definition of BatchNorm layers:
What happens if you remove all those param {} blocks and also make it in-place by have same blob for input and output? Not: you need to change the input to the subsequent layer also as we don't use /bn in the blob for the output of BatchNorm . Example:
layer {
name: "conv1a/bn"
type: "BatchNorm"
bottom: "conv1a"
top: "conv1a"
batch_norm_param {
moving_average_fraction: 0.990000009537
eps: 9.99999974738e-05
scale_filler {
type: "constant"
value: 1.0
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv1a/relu"
type: "ReLU"
bottom: "conv1a"
top: "conv1a"
}
layer {
name: "down1"
type: "Convolution"
bottom: "conv1a"
top: "down1"
Note that the above in-place BtachNorm configuration will not train properly in caffe-0.15 - only in caffe-0.16.
from caffe-jacinto.
I have made the BathNorm layers in-place and removed the params. The training speed is the same. My latest prototxt is in the link below:
https://drive.google.com/file/d/1kZYGXeek1k-Z4Uf1V4rq79qk9DteaenV/view?usp=sharing
Could it be the switch from cudnn5.0 to cudnn6.0? My caffe-0.15 was compiled with cudnn5.0 while caffe-0.16 with cudnn6.0 since it is not compatible with lower version.
from caffe-jacinto.
Its possible. Let's try some changes: Try these changes one by one and check the speed.
(1) You have a few layers with dilations. Remove the dilations
(2) You have a layer with large number of groups (Deconvolution layer). Reduce the number of groups to 1
(3) If the above doesn't give a clue, remove the deconvolution layer if possible. I have seen that the Deconvolution layer affects the speed under certain conditions - but I have not be able to pin point exactly under what condition it is. Note that CUDNN layer itself is missing for deconvolution.
from caffe-jacinto.
Hi Manu,
I have tried (1) and (2). Both result in faster training speed in caffe-0.15 than in caffe-0.16 as before.
And your hypothesis about (3) is right. Caffe-0.16 is now running faster than caffe-0.15 if I remove the deconv layers, for both ImageLabelListData and ImageLabelData input layer type. Having said that, I have not identified under what circumstance would the training speed remain this way while still having the deconv layers.
Many thanks for helping to identify the source of problem.
William
from caffe-jacinto.
Not sure if it is related to the fact that CUDNN implementation is missing for Deconvolution - but then that would impact both caffe-0.15 and caffe-0.16 equally. NVIDIA#386
Btw, how were you able to remove Deconvolution completely? In an image to image problem we need to have Deconvolution layers. It is important to root cause this issue and get it fixed.
Can you file an issue in https://github.com/NVIDIA/caffe? You may have to share your prototxt model there so that they can try to root cause the issue.
from caffe-jacinto.
Thanks for the information. I shall try the cudnn_deconv_layer commit and see how it affects caffe-0.15 and caffe-0.16.
I removed deconvolution layers together with their corresponding downsampling/pooling layers.
from caffe-jacinto.
Related Issues (20)
- Upgrade to NVIDIA Caffe 0.17 HOT 3
- Make runtest failed HOT 3
- How to test ssd with fp16 inference time??? HOT 2
- Open CV issue on Jetson TX2 HOT 1
- Mobile SSD HOT 11
- Absorbing batch norm into conv weights HOT 2
- Quantization failed when testing SSD HOT 7
- Weighted Softmax Loss HOT 1
- how to use quantization HOT 2
- centerLossLayer HOT 3
- Depthswise Convolution from existing Convolution Layer? HOT 1
- TIDL quantize HOT 13
- Build with CPU HOT 6
- Incorrect results for certain models (Batch Normalization Issue)
- error occur while compilation HOT 2
- Build errors on Ubuntu 18.04
- Support for RoiPooling Layer HOT 10
- Incorrect results for SSD Mobilenet model trained on caffe (forked from ssd caffe by weiliu) HOT 6
- Fixed filter bank neural networks HOT 5
- Segmentation Fault
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from caffe-jacinto.