forked from rtk-rnjn/Diabetes-Checker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ml.py
123 lines (101 loc) · 3.53 KB
/
ml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from __future__ import annotations
from collections import namedtuple
from typing import Literal
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from utils import MISSING, ToAsync
Patient = namedtuple(
"Patient",
[
"gender",
"age",
"hypertension",
"heart_disease",
"smoking_history",
"bmi",
"HbA1c_level",
"blood_glucose_level",
],
)
class ML:
_encode_gender: ColumnTransformer
_encode_smoking: ColumnTransformer
_inputs: list[tuple]
_classifier: RandomForestClassifier
def __init__(self, file_name: str) -> None:
self.file_name = file_name
self.data = pd.read_csv(self.file_name)
def init(self) -> None:
self._x = self.data.iloc[:, :-1]
self._y = self.data.iloc[:, -1]
self.encoded_gender()
self.encoded_smoking()
def encoded_gender(self) -> ColumnTransformer:
encode_gender = ColumnTransformer(
transformers=[("encode", OneHotEncoder(), [0])], remainder="passthrough"
)
self._encode_gender = encode_gender
self._x = np.array(self._encode_gender.fit_transform(self._x))
self._inputs = np.array(self._encode_gender.transform(self._inputs))
return self._encode_gender
def encoded_smoking(self) -> ColumnTransformer:
encode_smoking = ColumnTransformer(
transformers=[("encode", OneHotEncoder(), [6])], remainder="passthrough"
)
self._encode_smoking = encode_smoking
self._x = np.array(self._encode_smoking.fit_transform(self._x))
self._inputs = np.array(self._encode_smoking.transform(self._inputs))
return self._encode_smoking
def prepare_classifier(
self, train_size: float = 0.9, random_state: int = 0
) -> RandomForestClassifier:
x_train, x_test, y_train, y_test = train_test_split(
self._x, self._y, train_size=train_size, random_state=random_state
)
classifier = RandomForestClassifier(n_estimators=10, random_state=0)
classifier.fit(x_train, y_train)
self._classifier = classifier
return self._classifier
@ToAsync()
def predict(
self,
input_data: tuple | None = None,
*,
gender: Literal["Male", "Female"] = MISSING,
age: float = MISSING,
hypertension: int = MISSING,
heart_disease: int = MISSING,
smoking_history: Literal["current", "former", "never"] = MISSING,
bmi: float = MISSING,
hb1ac_level: float = MISSING,
blood_glucose_level: float = MISSING,
) -> int:
if input_data:
self._inputs = [input_data]
else:
self._inputs = [
Patient(
gender,
age,
hypertension,
heart_disease,
smoking_history,
bmi,
hb1ac_level,
blood_glucose_level,
)
]
self.init()
self.prepare_classifier()
y_pred = self._classifier.predict(self._inputs)
return y_pred[0]
if __name__ == "__main__":
data = Patient("Male", 78.0, 1, 1, "current", 38.05, 13.0, 190)
ml = ML(r"quart_app/assests/datasets.csv")
r = ml.predict(input_data=data)
r = ml.predict(input_data=data)
print(r)