-
Notifications
You must be signed in to change notification settings - Fork 1
/
UnifiedClassificationModel.py
executable file
·289 lines (193 loc) · 12.1 KB
/
UnifiedClassificationModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
import numpy as np
from DepthCoefficients import max_SP
from DB_Calculations import extract_DB_vectors
from GW_Calculations import extract_GW_vectors
from Alignment import Align_Multimodal_Connectomes as Align_MC
from LDA import dim_reduction_with_LDA
from ModalityCombination import get_train_test_data
from Classification import predict_test_labels, calculate_scores, Classify_Unimodal_Connectomes
from Plot import plot_scores, get_Names_Labels_Colors
def Extract_Connectomic_Features(All_Graphs, feat_method='DB', P=3, d=20, eta=0.9, stdScale_GW=True):
"""
Extract DB or GW feature vectors from graphs of M distinct modalities.
Parameters:
----------
All_Graphs : list of M sets of graphs from M distinct modalities, each with shape (N_m, n_m, n_m)
feat_method : 'DB' (default) or 'GW',
Type of extracted connectomic features.
If 'DB', depth-based vector representations are calculated from graphs using "subgraph expansions".
If 'GW', structural GraphWave node embeddings are extracted using "heat diffusion wavelets".
P : int (default: 3),
Maximum depth level at which graphs are almost completely covered during subgraph expansions in DB-based alignment.
Also, length of the feature vectors extracted from graphs. It is used if feat_method=='DB', otherwise discarded.
d : int (default: 20),
Number of (equally spaced) time points at which diffusion wavelets are evaluated in GW-based alignment.
Length of the feature vectors extracted from graphs is 2*d. It is used if feat_method=='GW', otherwise discarded.
eta : float (default: 0.9),
Scaling parameter used in extraction of "GW" feature vectors. It is used if feat_method=='GW', otherwise discarded.
It adjusts the radius of the local network neighborhoods to be discovered. If it is large, larger local neigborhoods
are taken into account during feature extraction and vice versa. It must be selected in the range (eta_min, eta_max)
which are determined by the second and the largest eigenvalues of the graph laplacian plus pre-determined coefficients.
(For more, refer to the original paper: "Learning Structural Node Embeddings via Diffusion Wavelets")
stdScale_GW : Bool (default: True),
Whether or not to independently apply standard scaling to extracted features of each modality before graph alignment.
It is used if feat_method=='GW', otherwise discarded. If "True", for any modality-m, P-dimensional feature vectors
are scaled so that the distribution mean and standard deviation are 0 and 1, respectively. If "False", no changes
made to the feature vectors and they are input to the alignment phase in their original forms.
Return:
-------
out : list of M sets of feature matrices, each with shape (N_m, n_m, P)
"""
if feat_method == 'DB':
maxSP = max_SP(All_Graphs)
coeffs = maxSP / P
All_Features = extract_DB_vectors(All_Graphs, P=P, depth_coeffs=coeffs)
print('\n\n--> All depth-based vector representations are extracted.\n')
elif feat_method == 'GW':
All_Features = extract_GW_vectors(All_Graphs, d=d, eta=eta, stdScale=stdScale_GW)
print('\n\n--> All GraphWave embeddings are extracted.\n')
else:
raise ValueError('Invalid feature method\n')
return All_Features
def Classify_Aligned_Connectomes(All_Aligned_Graphs, All_Labels, Tr_Ind, Tst_Ind):
"""
Given training aligned connectomes and their labels, predict the labels of the testing aligned connectomes
using linear SVM and calculate the performance scores (i.e., accuracy, sensitivity, specificity).
Parameters:
----------
All_Aligned_Graphs : list of M sets of aligned graphs of M distinct modalities, each with shape (N_m, n_t, n_t)
All_Labels : list of M label arrays, each with length N_m
Tr_Ind : list of M 1-D index arrays,
the m-th index array holds the indices of the training subjects in
All_Graphs[m] or All_Features[m] during particular fold
Tst_Ind : list of M 1-D index arrays,
the m-th index array holds the indices of the testing subjects in
All_Graphs[m] or All_Features[m] during particular fold
Return:
-------
out : array of 3 performance metrics (i.e., accuracy, sensitivity, specificity)
"""
Train_Data, Test_Data = dim_reduction_with_LDA(All_Aligned_Graphs, All_Labels, Tr_Ind, Tst_Ind)
Train_Labels, Test_Labels = get_train_test_data(All_Labels, Tr_Ind, Tst_Ind)
Test_Labels_Pred = predict_test_labels(Train_Data, Test_Data, Train_Labels)
Scores = calculate_scores(Test_Labels, Test_Labels_Pred)
return Scores # Acc, Sens, Spec
def UMC(All_Graphs, All_Labels, nt=None, feat_method='DB', P=3, d=20, eta=0.9, stdScale_GW=True, corr='hard', Fold=5, seed=100):
"""
<<< MAIN FUNCTION OF THE PROPOSED METHOD ("UNIFIED MULTI-MODAL CLASSIFICATION") >>>
Classify (heterogeneous) multi-modal and multi-sized brain connectomes
derived from "M" distinct neuroimaging modalities using graph alignment.
Parameters:
----------
All_Graphs : list of M sets of graphs from M distinct modalities, each with shape (N_m, n_m, n_m)
All_Labels : list of M label arrays, each with length N_m
nt : int or None (default)
number of nodes in template graphs, number of nodes in resulting aligned graphs,
or number of cluster centroids during K-Means clustering. It should be lower than
the size of the smallest graphs across different modalities. If not provided (None),
it is set to int(min_size * 0.9), where "min_size" is the size of the smallest graphs.
feat_method : 'DB' (default) or 'GW',
Type of extracted connectomic features.
If 'DB', depth-based vector representations are calculated from graphs using "subgraph expansions".
If 'GW', structural GraphWave node embeddings are extracted using "heat diffusion wavelets".
P : int (default: 3),
Maximum depth level at which graphs are almost completely covered during subgraph expansions in DB-based alignment.
Also, length of the feature vectors extracted from graphs. It is used if feat_method=='DB', otherwise discarded.
d : int (default: 20),
Number of (equally spaced) time points at which diffusion wavelets are evaluated in GW-based alignment.
Length of the feature vectors extracted from graphs is 2*d. It is used if feat_method=='GW', otherwise discarded.
eta : float (default: 0.9),
Scaling parameter used in extraction of "GW" feature vectors. It is used if feat_method=='GW', otherwise discarded.
It adjusts the radius of the local network neighborhoods to be discovered. If it is large, larger local neigborhoods
are taken into account during feature extraction and vice versa. It must be selected in the range (eta_min, eta_max)
which are determined by the second and the largest eigenvalues of the graph laplacian plus pre-determined coefficients.
(For more, refer to the original paper: "Learning Structural Node Embeddings via Diffusion Wavelets")
stdScale_GW : Bool (default: True),
Whether or not to independently apply standard scaling to extracted features of each modality before graph alignment.
It is used if feat_method=='GW', otherwise discarded. If "True", for any modality-m, P-dimensional feature vectors
are scaled so that the distribution mean and standard deviation are 0 and 1, respectively. If "False", no changes
made to the feature vectors and they are input to the alignment phase in their original forms.
corr : 'hard' (default) or 'soft',
correspondence strategy used in feature extraction phase. If 'hard', correspondence of a node
in a graph is entirely reserved to the closest cluster centroid that is generated by KMeans clustering
(correspondence: 1) and the correspondences to the other centroids are set to 0. If 'soft',
unlike hard correspondence, correspondence of a node is distributed between cluster centroids based on
their euclidean distances to the node of interest such that all correspondences add up to 1.
In this case, correspondences are inversely proportional to pairwise distances.
Fold : int (default: 5),
number of fold to use in Cross-Validation
seed : int (default: 100),
seed value to anchor randomization for multiple runs
Return:
-------
out : array of 3 performance metrics (i.e., accuracy, sensitivity, specificity)
"""
print('\nCONNECTOMIC DATASET')
print('--------------------------')
for i in range(len(All_Graphs)):
print(f'Modality-{i+1}: {All_Graphs[i].shape}')
if nt is None:
min_size = min(map(lambda G:G.shape[-1], All_Graphs))
nt = int(min_size * 0.9)
All_Features = Extract_Connectomic_Features(All_Graphs, feat_method, P, d, eta, stdScale_GW)
generator = Align_MC(All_Graphs, All_Features, All_Labels, nt, feat_method=feat_method, corr=corr, Fold=Fold, seed=seed)
MEAN_LIST_fold = []
for f, (All_Aligned_Graphs, Tr_Ind, Tst_Ind) in enumerate(generator):
MEAN = Classify_Aligned_Connectomes(All_Aligned_Graphs, All_Labels, Tr_Ind, Tst_Ind)
MEAN_LIST_fold.append(MEAN)
Scores = np.average(MEAN_LIST_fold, axis=0)
Scores = np.round(Scores*100, 2)
return Scores
def MultipleClassifications(All_Graphs, All_Labels, nt_list, K_list, P=3, d=20, eta=0.9, stdScale_GW=True, Fold=5, seed=100, Names_UC=None):
def Classify_Diff_nts(All_Graphs, All_Features, All_Labels, nt_list, feat_method, corr, Fold, seed):
SCORES = []
for nt in nt_list:
generator = Align_MC(All_Graphs, All_Features, All_Labels, nt, feat_method, corr, Fold, seed)
MEAN_LIST_fold = []
for f, (All_Aligned_Graphs, Tr_Ind, Tst_Ind) in enumerate(generator):
MEAN = Classify_Aligned_Connectomes(All_Aligned_Graphs, All_Labels, Tr_Ind, Tst_Ind)
MEAN_LIST_fold.append(MEAN)
Scores = np.average(MEAN_LIST_fold, axis=0)
Scores = np.round(Scores*100, 2)
SCORES.append(Scores)
return np.array(SCORES)
# ---------------------------------------------------
def Classify_UC(All_Graphs, All_Labels, K_list, Fold, seed):
SCORES, ERRORS = [], []
FS_list = ['SNF','Averaging']
for Graphs_m, Labels_m in zip(All_Graphs, All_Labels):
for FS_strategy in FS_list:
Score_list_K = []
for K in K_list:
s = Classify_Unimodal_Connectomes(Graphs_m, Labels_m, FS_strategy, K, Fold, seed)
Score_list_K.append(s)
Scores = np.mean(Score_list_K, axis=0)
Errors = np.std(Score_list_K, axis=0)
SCORES.append(Scores)
ERRORS.append(Errors)
Scores_UC = np.array(SCORES).reshape(len(All_Graphs),len(FS_list),3)
Errors_UC = np.array(ERRORS).reshape(len(All_Graphs),len(FS_list),3)
return Scores_UC, Errors_UC
# ---------------------------------------------------
print('\nCONNECTOMIC DATASET')
print('--------------------------')
for i in range(len(All_Graphs)):
print(f'Modality-{i+1}: {All_Graphs[i].shape}')
All_GW_Features = Extract_Connectomic_Features(All_Graphs, 'GW', P, d, eta, stdScale_GW)
All_DB_Features = Extract_Connectomic_Features(All_Graphs, 'DB', P, d, eta, stdScale_GW)
GW_hard = Classify_Diff_nts(All_Graphs, All_GW_Features, All_Labels, nt_list, 'GW', 'hard', Fold, seed)[:,np.newaxis,:]
DB_hard = Classify_Diff_nts(All_Graphs, All_DB_Features, All_Labels, nt_list, 'DB', 'hard', Fold, seed)[:,np.newaxis,:]
GW_soft = Classify_Diff_nts(All_Graphs, All_GW_Features, All_Labels, nt_list, 'GW', 'soft', Fold, seed)[:,np.newaxis,:]
DB_soft = Classify_Diff_nts(All_Graphs, All_DB_Features, All_Labels, nt_list, 'DB', 'soft', Fold, seed)[:,np.newaxis,:]
Err_GW_hard = np.zeros_like(GW_hard)
Err_DB_hard = np.zeros_like(DB_hard)
Err_GW_soft = np.zeros_like(GW_soft)
Err_DB_soft = np.zeros_like(DB_soft)
Scores_MC = np.array([np.concatenate((GW_hard,DB_hard),axis=-2), np.concatenate((GW_soft,DB_soft),axis=-2)])
Errors_MC = np.array([np.concatenate((Err_GW_hard,Err_DB_hard),axis=-2), np.concatenate((Err_GW_soft,Err_DB_soft),axis=-2)])
Scores_UC, Errors_UC = Classify_UC(All_Graphs, All_Labels, K_list, Fold, seed)
Names_UC = list(map(lambda n:'$m_'+str(n)+'$',range(1,len(All_Graphs)+1))) if not Names_UC else Names_UC
Params1, Params2 = get_Names_Labels_Colors(Names_UC)
Params_MC = [Scores_MC, Errors_MC] + Params1
Params_UC = [Scores_UC, Errors_UC] + Params2
plot_scores(Params_MC, Params_UC, nt_list)