-
Notifications
You must be signed in to change notification settings - Fork 2
/
NN_Playground.py
executable file
·133 lines (111 loc) · 3.94 KB
/
NN_Playground.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/python3
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import plot_confusion_matrix
import seaborn as sns
from math import sqrt
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
data = pd.read_csv('datasets_2607_4342_indian_liver_patient_labelled.csv')
# preprocessing
headers = list(data.columns)
headers.remove('Dataset')
for col in data.columns:
data[col] = data[col].fillna(0)
data = pd.concat([data, pd.get_dummies(data['Gender'], prefix='Gender')], axis=1)
headers.remove('Gender')
data['Dataset'] = data['Dataset'].replace([1], 0)
data['Dataset'] = data['Dataset'].replace([2], 1)
# creating input features and labels
X = data[headers]
Y_numeric = data[['Dataset']]
Y = to_categorical(data[['Dataset']])
# encoder = LabelEncoder()
# encoder.fit(Y)
# encoded_Y = encoder.transform(Y)
# encoded_Y = to_categorical(encoded_Y)
# building model
model = Sequential()
input_dims = X.shape # should be (583, 10)
model.add(Dense(10, activation='relu', input_dim=input_dims[1]))
model.add(Dense(8, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
# compiling model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fitting model on data
history = model.fit(X, Y, epochs=100, batch_size=10, validation_split=0.2)
# evaluating the model
loss, accuracy = model.evaluate(X, Y)
print('Loss on training data: %.2f' % (loss))
print('Accuracy on training data: %.2f' % (accuracy * 100))
# inspecting model
print(model.summary())
# print model history keys for debugging
# print(history.history.keys())
# create accuracy plots
plt.figure()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('Accuracy.png')
# plt.show()
# create loss plots
plt.figure()
ax2 = plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig('Loss.png')
# plt.show()
# obtain confusion matrix
prediction = model.predict_classes(X)
conf_matrix = confusion_matrix(Y_numeric, prediction)
print('Confusion Matrix')
print(conf_matrix)
(tn, fp), (fn, tp) = conf_matrix
# plot confusion matrix
class_names = [0, 1]
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="viridis", fmt='g')
ax.xaxis.set_label_position("bottom")
plt.title('Confusion matrix')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.savefig('Confusion_matrix.png')
plt.show()
# calculating more metrics
error_rate = (fp + fn) / (tp + tn + fp + fn)
accuracy = 1 - error_rate
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
precision = tp / (tp + fp)
false_positive_rate = fp / (tn + fp)
mcc = ((tp * tn) - (fp * fn)) / (sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)))
beta = 1
f_score = ((1 + beta * beta) * (precision * sensitivity)) / (beta * beta * precision + sensitivity)
metrics = {'Error rate': error_rate, "Accuracy": accuracy, "Loss": loss,
"Sensitivity": sensitivity, "Specificity": specificity, "Precision": precision, "FPR": false_positive_rate, "MCC": mcc, "F score": f_score}
print('=========================')
print(" Metrics")
print('=========================')
# print(pd.DataFrame(metrics, index=[0]).T)
print(pd.Series(metrics).T)