forked from shubha96ngi/kernel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Riemannian_herding_26_10_2023.py
144 lines (120 loc) · 5.26 KB
/
Riemannian_herding_26_10_2023.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# after calculating sigma, mu and pi from previous code
# kernel herding using gradient descent
#https://github.com/rrphys/KernelHerding/blob/master/Herding.ipynb
# still not getting good plot
def expKer(x,samples,gamma):
#--calcuates the expectation value of the exponential kernel so argmax_x can be found
#x = candidate super sample to optimize
#samples = the GMM samples
gamma = 1 #kernel hyperparameter, always 1 for my demo
#init vars
numSamples = 70 #X.shape[0]
k=np.zeros(numSamples)
#calculate estimate of expectation value of kernel
for i in range(numSamples):
# exponential gaussian kernel # so squared
#k[i] = np.exp(-1*distance_euclid(x,samples[i,:],squared=True)) # euclid distance norm based
# k[i] = np.exp(-1*distance_riemann(x,X[i,:],squared=True)) # riemannian distance eigenvalue based
k[i] = np.exp(-np.linalg.norm(x-samples[i,:])/gamma**2)
exp_est = sum(k)/numSamples;
return exp_est
def sumKer(x,xss,numSSsoFar,gamma):
#--calcuates the sum of k(x,x_ss) for the number of super samps so far
#x = candidate super sample to optimize
#samples = the GMM samples
#numSSsoFar = number of super sampls so far
#gamma = kernel hyperparameter, always 1 for my demo
#init vars
total=0;
k=np.zeros(numSSsoFar)
#calculate sof of kernels
for i in range(numSSsoFar):
k[i] = np.exp(-np.linalg.norm(x-xss[i,:])/gamma**2)
#k[i] = np.exp(-1*distance_riemann(x,xss,squared=True)) # riemannian distance eigenvalue based
total = np.sum(k)
s = total/(numSSsoFar+1)
return s
from geoopt.optim import RiemannianAdam
from geoopt.manifolds import Euclidean, ProductManifold, SymmetricPositiveDefinite
from scipy.optimize import minimize #to do grad descent to find argmax
D = 12; K = 320
# (1) Instantiate the manifold
manifold = ProductManifold([SymmetricPositiveDefinite(D + 1, k=K), Euclidean(K - 1)])
def herd(samples,totalSS,gamma):
#-- calculate totalSS super samples from the distribution estimated by samples with kernel hyperparam gamma
#init vars and extract useful info from inputs
#get GMM dims and num samples
numDim = samples.shape[1]
numSamples = samples.shape[0]
#init vars
gradientFail = 0; #count when optimization fails, debugging
xss = np.zeros((totalSS,numDim)) #open space in mem for array of super samples
i=1
#gradient descent can have some probems, so make bounds to terminate if goes too far away
minBound = np.min(samples)
maxBound = np.max(samples)
#start our search at the origin, could be a random point
bestSeed = np.zeros(numDim)
# tick = time.clock()
while i<totalSS-1:
print(".")
#debugging stuff
#print "Working on SS num ber i=%d" % i
#build function for gradient descent to find best point
f = lambda x: -expKer(x,samples,gamma)+sumKer(x,xss,i,gamma)
print(f) #.shape, f.ndim)
# if we would have been using Riemannian gradient descent method
# f is our cost function # there is also no problem function defined on geoopt
problem = Problem(manifold, f)
# (3) Instantiate a Pymanopt optimizer
optimizer = RiemannianAdam(params=[samples], lr=1e-2, stabilize=1)
# let Pymanopt do the rest
Xopt = optimizer.step() #run(problem).point
'''
results = minimize(f,
bestSeed,
method='nelder-mead',
options={'xtol': 1e-4, 'disp': False})
'''
# print "results.x"
# print results.x
#if grad descent failed, pick a random sample and try again
if np.min(results.x) < minBound or np.max(results.x) > maxBound:
bestSeed=samples[np.random.choice(numSamples)]
gradientFail=gradientFail+1
# print "Gradient descent failed.............."
continue
#pick next best start point to start minimization, this is how Chen, Welling, Smola do it
#find best super sample that maximizes argmax and use that as a seed for the next search
#init or clear seed array
seed=np.array([])
for j in range(i):
seed = np.append(seed,-expKer(xss[j,:],samples,gamma)+sumKer(xss[j,:],xss,i,gamma))
bestSeedIdx = np.argmin(seed)
bestSeed=xss[bestSeedIdx,:]
#grad descent succeeded (yay!), so assign new value to super samples
xss[i,:]=results.x
i=i+1
#toc = time.clock()
# print "Time elapsed %d" % (toc-tick)
return xss
totalSS=70
xss = herd(samples[:80],totalSS,gamma=1)
plt.plot(samples[:,0], samples[:,1], '.')
plt.plot(xss[:,0],xss[:,1],'o')
# Calculate herding error
mu_p = np.mean(samples,axis=0)
err=np.zeros(totalSS)
for i in range(totalSS):
err[i] = np.linalg.norm(mu_p-np.sum(xss[1:i,:] ,axis=0)/i)
idx = np.random.choice(70,totalSS)
samples_iid=samples[idx,:]
err_iid=np.zeros(totalSS)
for i in range(totalSS):
err_iid[i]= np.linalg.norm(mu_p-np.sum(samples_iid[1:i,:],axis=0)/i)
plt.plot(err,'-o')
plt.plot(err_iid)
plt.plot([1,totalSS],[1,1./totalSS],'--')
plt.yscale('log')
plt.xscale('log')
plt.legend(['Herding error','iid error','~1/N'],'lower left')