This assessment covers building and training a tf.keras
Sequential
model, then applying regularization. The dataset comes from a "don't overfit" Kaggle competition. There are 300 features labeled 0-299, and a binary target called "target". There are only 250 records total, meaning this is a very small dataset to be used with a neural network.
You can assume that the dataset has already been scaled.
# Run this cell without changes
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential, regularizers
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
- Using
pandas
, open the filedata.csv
as a DataFrame - Drop the
"id"
column, since this is a unique identifier and not a feature - Separate the data into
X
(a DataFrame with all columns except"target"
) andy
(a Series with just the"target"
column) - The train-test split should work as-is once you create these variables
# Replace None with appropriate code
# Read in the data
df = None
# Drop the "id" column
None
# Separate into X and y
X = None
y = None
# your code here
raise NotImplementedError
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2021)
X_train.shape
assert type(df) == pd.DataFrame
assert type(X) == pd.DataFrame
assert type(y) == pd.Series
# PUT ALL WORK FOR THE ABOVE QUESTION ABOVE THIS CELL
# THIS UNALTERABLE CELL CONTAINS HIDDEN TESTS
In the cell below, create an instance of a Sequential
model (documentation here) called dense_model
with a name
of "dense"
and otherwise default arguments.
In other words, create a model without any layers. We will add layers in a future step.
# Replace None with appropriate code
dense_model = None
# your code here
raise NotImplementedError
dense_model.name
# Model should not have any layers yet
assert len(dense_model.layers) == 0
# PUT ALL WORK FOR THE ABOVE QUESTION ABOVE THIS CELL
# THIS UNALTERABLE CELL CONTAINS HIDDEN TESTS
How many input and output nodes should this model have?
Feel free to explore the attributes of X
and y
to determine this answer, or just to enter numbers based on the problem description above.
# Replace None with appropriate code
num_input_nodes = None
num_output_nodes = None
# your code here
raise NotImplementedError
# Both values should be integers
assert type(num_input_nodes) == int
assert type(num_output_nodes) == int
# PUT ALL WORK FOR THE ABOVE QUESTION ABOVE THIS CELL
# THIS UNALTERABLE CELL CONTAINS HIDDEN TESTS
The code below will use the input and output shapes you specified to add Dense
layers to the model:
# Run this cell without changes
# Add input layer
dense_model.add(Dense(units=64, input_shape=(num_input_nodes,)))
# Add hidden layers
dense_model.add(Dense(units=64))
dense_model.add(Dense(units=64))
dense_model.layers
Specify an appropriate activation function (documentation here).
We'll simplify the problem by specifying that you should use the string identifier for the function, and it should be one of these options:
sigmoid
softmax
Hint: is this a binary or a multi-class problem? This should guide your choice of activation function.
# Replace None with appropriate code
activation_function = None
# your code here
raise NotImplementedError
# activation_function should be a string
assert type(activation_function) == str
# PUT ALL WORK FOR THE ABOVE QUESTION ABOVE THIS CELL
# THIS UNALTERABLE CELL CONTAINS HIDDEN TESTS
Now we'll use that information to finalize the model.
If this code produces an error, consider restarting the kernel and re-running the code above. If it still produces an error, that is an indication that one or more of your answers above is incorrect.
# Run this cell without changes
# Add output layer
dense_model.add(Dense(units=num_output_nodes, activation=activation_function))
# Determine appropriate loss function
if num_output_nodes == 1:
loss = "binary_crossentropy"
else:
loss = "categorical_crossentropy"
# Compile model
dense_model.compile(
optimizer="adam",
loss=loss,
metrics=["accuracy"]
)
dense_model.summary()
# Run this cell without changes
# Fit the model to the training data, using a subset of the
# training data as validation data
dense_model_results = dense_model.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=1,
validation_split=0.4,
shuffle=False
)
# Run this cell without changes
def plot_loss_and_accuracy(results, final=False):
if final:
val_label="test"
else:
val_label="validation"
# Extracting metrics from model fitting
train_loss = results.history['loss']
val_loss = results.history['val_loss']
train_accuracy = results.history['accuracy']
val_accuracy = results.history['val_accuracy']
# Setting up plots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
# Plotting loss info
ax1.set_title("Loss")
sns.lineplot(x=results.epoch, y=train_loss, ax=ax1, label="train")
sns.lineplot(x=results.epoch, y=val_loss, ax=ax1, label=val_label)
ax1.legend()
# Plotting accuracy info
ax2.set_title("Accuracy")
sns.lineplot(x=results.epoch, y=train_accuracy, ax=ax2, label="train")
sns.lineplot(x=results.epoch, y=val_accuracy, ax=ax2, label=val_label)
ax2.legend()
plot_loss_and_accuracy(dense_model_results)
The model appears to be overfitting. To deal with this overfitting, modify the code below to include regularization in the model. You can add L1, L2, both L1 and L2, or dropout regularization.
Hint: these might be helpful
(EarlyStopping
is a type of regularization that is not applicable to this problem framing, since it's a callback and not a layer.)
Evaluation and Regularization of neural networks will be discussed in the future.
def build_model_with_regularization(n_input, n_output, activation, loss):
"""
Creates and compiles a tf.keras Sequential model with two hidden layers
This time regularization has been added
"""
# create classifier
classifier = Sequential(name="regularized")
# add input layer
classifier.add(Dense(units=64, input_shape=(n_input,)))
# add hidden layers
# your code here
raise NotImplementedError
# add output layer
classifier.add(Dense(units=n_output, activation=activation))
classifier.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
return classifier
model_with_regularization = build_model_with_regularization(
num_input_nodes, num_output_nodes, activation_function, loss
)
model_with_regularization.summary()
# Testing function to build model
assert type(model_with_regularization) == Sequential
# PUT ALL WORK FOR THE ABOVE QUESTION ABOVE THIS CELL
# THIS UNALTERABLE CELL CONTAINS HIDDEN TESTS
Now we'll evaluate the new model on the training set as well:
# Run this cell without changes
# Fit the model to the training data, using a subset of the
# training data as validation data
reg_model_results = model_with_regularization.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=0,
validation_split=0.4,
shuffle=False
)
plot_loss_and_accuracy(reg_model_results)
(Whether or not your regularization made a difference will partially depend on how strong of regularization you applied, as well as some random elements of your current TensorFlow configuration.)
Now we evaluate both models on the holdout set:
# Run this cell without changes
final_dense_model_results = dense_model.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=0,
validation_data=(X_test, y_test),
shuffle=False
)
plot_loss_and_accuracy(final_dense_model_results, final=True)
final_reg_model_results = model_with_regularization.fit(
x=X_train,
y=y_train,
batch_size=None,
epochs=20,
verbose=0,
validation_data=(X_test, y_test),
shuffle=False
)
plot_loss_and_accuracy(final_reg_model_results, final=True)