This project is about predicting if the client will subscribe to a term deposit or not based on various features.
The main idea of this code is to show how to use Amazon SageMaker to train a model on this dataset and deploy it as an endpoint to make predictions on new data.
fromsagemaker.predictorimportcsv_serializertest_data_array=test_data.drop(['y_no', 'y_yes'], axis=1).values#load the data into an arrayxgb_predictor.content_type_csv='text/csv'# set the data type for an inferencexgb_predictor.serializer=csv_serializer# set the serializer typepredictions=xgb_predictor.predict(test_data_array).decode('utf-8') # predict!predictions_array=np.fromstring(predictions[1:], sep=',') # and turn the prediction into an arraycm=pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
tn=cm.iloc[0,0]; fn=cm.iloc[1,0]; tp=cm.iloc[1,1]; fp=cm.iloc[0,1]; p= (tp+tn)/(tp+tn+fp+fn)*100print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
print("Observed")
print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))
print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))