-
Notifications
You must be signed in to change notification settings - Fork 0
/
ari_zpercentage_normed_scores.py
103 lines (73 loc) · 2.86 KB
/
ari_zpercentage_normed_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy as np
import pandas as pd
import os
from calcSiCHAffy import calcSiCHAffy
from matplotlib import pyplot as plt
from ari_scores_zpercentage import ari_scores_zpercentage_normed
import plotly.graph_objs as go
from support_functions import rgb_color_random
import xlsxwriter
import plotly.io as pio
path_data = '/home/vladimir/Desktop/po_projekat/Affy/data/'
path_label = '/home/vladimir/Desktop/po_projekat/Affy/labels/'
file_header = ([" ", "Silhouette_cosine","Silhouette_braycurtis","Silhouette_canberra", "Silhouette_pearson", "Silhouette_hellinger","Silhouette_wasserstein",
"Silhouette_energy","Silhouette_kulczynski","Calinski_harabaz","Davies_Bouldin"])
distance_names=['cosine', 'braycurtis', 'pearson','canberra','wasserstein','energy','kulczynski','eucl']
data_file_names = sorted(os.listdir(path_data))
label_file_names = sorted(os.listdir(path_label))
scores = []
data_book = xlsxwriter.Workbook("/home/vladimir/Desktop/po_projekat/Affy/ARI/ARI_zpercentage_normed_results.xlsx")
worksheet = data_book.add_worksheet()
for i in range(0,len(data_file_names)):
path_data_tmp = path_data + data_file_names[i]
data = pd.read_csv(path_data_tmp, delimiter=' ')
label = pd.read_csv(path_label+ label_file_names[i], delimiter=' ')
ari = ari_scores_zpercentage_normed(np.array(data,dtype=float),label,data_file_names[i],0.1,0.05)
scores.append(ari)
print('\n')
print(scores)
row =1
col =0
for item in file_header :
worksheet.write(0,col,item)
col = col +1
col = 0
for file_scores in scores:
for item in file_scores:
worksheet.write(row,col,item)
col = col +1
row = row +1
col =0
data_book.close()
scrs = [scores[i][1:10] for i in range(0,21)]
scrs = np.array(scrs)
trace = []
for i in range(0,len(distance_names)):
trace1 = go.Bar(
x=data_file_names,
y= scrs[:,i],
name=distance_names[i],
marker=dict(
color=rgb_color_random()
)
)
trace.append(trace1)
print(scrs[:,i])
data = trace
layout = go.Layout(
# xaxis=dict(tickangle=-45),
barmode='group',
)
fig = go.Figure(data=data, layout=layout)
pio.write_image(fig, file='/home/vladimir/Desktop/po_projekat/Affy/ARI/ari_zpercentagenormed_scores.png', format='png')
#np.savetxt('calculations.txt', scores,fmt='%s')
# cosine braycurtis canberra person hellinger wasserstein energy distance kcz jackknife calinski harabaz
# 0.6706 je najveci koeficijent za sill i dobijen je sa wasserstein_distancom...
#jackknife uradjen sa wasserstein_distancom umesto pearsonovom ..
# dodaj euklidsko
# ARI sa kmeans predikcijom
# sa novim normalizacijama
# bez novih normalizacija
# uradi u zasebnom fajlu nove normalizacije i ARI
# proveri funkcije dal rade... na prvi pogled sve deluje okej
# -*- coding: utf-8 -*-