Hello there,
In the paper you say that the model has four views and that the pipelines for four views are separate until global average pool and then concatenated. Something like following ASCII art:
R-MLO |-a1-|-a2-|-a3-|--|--|--|--|--|--| \
L-MLO |-b1-|-b2-|-b3-|--|--|--|--|--|--|\ \
> >--|--|--|--|
R-CC |-c1-|-c2-|-c3-|--|--|--|--|--|--|/ /
L-CC |-d1-|-d2-|-d3-|--|--|--|--|--|--| /
But when I look at the layers for the python model, for MLO and CC, only one layer is defined for each one, instead of two. In the forward phase both MLO and CC images are put forward through same layer. Something like:
L-CC \ \ \
\|-a1-/|-a1-\|-a2-/|-a2-\|-a3-/|-a3-|...
R-CC / / /
Which creates a bizarre architecture like:
R-MLO |-a1-|-a2-|-a3-|--|--|--|--|--|--| \
L-MLO |-a1-|-a2-|-a3-|--|--|--|--|--|--|\ \
> >--|--|--|--|
R-CC |-b1-|-b2-|-b3-|--|--|--|--|--|--|/ /
L-CC |-b1-|-b2-|-b3-|--|--|--|--|--|--| /
Here is the code for conv. layers taken from layers_torch.py
def __init__(self, in_channels, number_of_filters=32, filter_size=(3, 3), stride=(1, 1)):
super(AllViewsConvLayer, self).__init__()
self.cc = nn.Conv2d(
in_channels=in_channels,
out_channels=number_of_filters,
kernel_size=filter_size,
stride=stride,
)
self.mlo = nn.Conv2d(
in_channels=in_channels,
out_channels=number_of_filters,
kernel_size=filter_size,
stride=stride,
)
def forward(self, x):
return {
"L-CC": F.relu(self.cc(x["L-CC"])), # [Addition] (1)
"L-MLO": F.relu(self.mlo(x["L-MLO"])),
"R-CC": F.relu(self.cc(x["R-CC"])), # [Addition] (2)
"R-MLO": F.relu(self.mlo(x["R-MLO"])),
}
Notice that in lines (1) and (2) L-CC and R-CC is forwarded from the same layer. Also the same issue for R-MLO and L-MLO.
Here is what I get when I try to reach for the first layer in the model, there is only two conv layers instead of four. Therefore, the model effectively has two views, not four.
model._conv_layer_ls[0]
Out[20]:
AllViewsConvLayer(
(cc): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2))
(mlo): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2))
)
For TF however, the layers seems to be in line with the paper:
def all_views_conv_layer(input_layer, layer_name, number_of_filters=32, filter_size=(3, 3), stride=(1, 1),
padding='VALID', biases_initializer=tf.zeros_initializer()):
"""Convolutional layers across all 4 views"""
input_l_cc, input_r_cc, input_l_mlo, input_r_mlo = input_layer
with tf.variable_scope(layer_name + "_CC") as cc_cope:
h_l_cc = tf.contrib.layers.convolution2d(inputs=input_l_cc, num_outputs=number_of_filters,
kernel_size=filter_size, stride=stride, padding=padding,
scope=cc_cope, biases_initializer=biases_initializer)
h_r_cc = tf.contrib.layers.convolution2d(inputs=input_r_cc, num_outputs=number_of_filters,
kernel_size=filter_size, stride=stride, padding=padding, reuse=True,
scope=cc_cope, biases_initializer=biases_initializer)
with tf.variable_scope(layer_name + "_MLO") as mlo_cope:
h_l_mlo = tf.contrib.layers.convolution2d(inputs=input_l_mlo, num_outputs=number_of_filters,
kernel_size=filter_size, stride=stride, padding=padding,
scope=mlo_cope, biases_initializer=biases_initializer)
h_r_mlo = tf.contrib.layers.convolution2d(inputs=input_r_mlo, num_outputs=number_of_filters,
kernel_size=filter_size, stride=stride, padding=padding, reuse=True,
scope=mlo_cope, biases_initializer=biases_initializer)
h = (h_l_cc, h_r_cc, h_l_mlo, h_r_mlo)
return h
Is this perhaps an oversight or am I missing something? Does this pytorch model achieve similar accuracy in the dataset?
Thanks.