mittlerweile hat sich mein Hauptthread sehr weit von der Anfangsfrage entfernt, deswegen fange ich hier einen neuen an.
Zum Thema:
Ich versuche einen Demo-Code von https://github.com/yaringal/VSSGP unter Windows zum Laufen zu bringen.
Im Moment debugge ich herum und komme an manchen Stellen nicht weiter.
Da ich ursprünglich mit matlab viel zu tun hatte und nicht mit Python, stellt sich mir manches einfaches Problem als sehr schwer heraus und ich bitte euch mir hierbei zu helfen.
Mein derzeitiger Code ist:
vssgp_example.py:
[codebox=python file=Unbenannt.txt]import os;
os.chdir('C:/Users/flo9fe/Desktop/vSSGP_LVM')
from vssgp_opt import VSSGP_opt
from scipy.optimize import minimize
import numpy as np
from numpy.random import randn, rand
np.set_printoptions(precision=2, suppress=True)
import pylab; pylab.ion() # turn interactive mode on
def STARTME(N = 1000, Q=1, D=1, K=50, components=2, init_period=1e32, init_lengthscales=1, sf2s=np.array([1, 5]), tau=1):
# Some synthetic data to play with
X = rand(N,Q) * 5*np.pi
X = np.sort(X, axis=0)
Z = rand(Q,K,components) * 5*np.pi
#a, b, c, d, e, f = randn(), randn(), randn(), randn(), randn(), randn()
#a, b, c, d, e, f = 0.6, 0.7, -0.6, 0.5, -0.1, -0.8
#a, b, c, d, e, f = -0.6, -0.3, -0.6, 0.6, 0.7, 0.6
#a, b, c, d, e, f = -0.5, -0.3, -0.6, 0.1, 1.1, 0.1
a, b, c, d, e, f = 0.6, -1.8, -0.5, -0.5, 1.7, 0
Y = a*np.sin(b*X+c) + d*np.sin(e*X+f)
# Initialise near the posterior:
mu = randn(Q,K,components)
# TODO: Currently tuned by hand to smallest value that doesn't diverge; we break symmetry to allow for some to get very small while others very large
feature_lengthscale = 5 # features are non-diminishing up to feature_lengthscale / lengthscale from z / lengthscale
lSigma = np.log(randn(Q,K,components)**2 / feature_lengthscale**2) # feature weights are np.exp(-0.5 * (x-z)**2 * Sigma / lengthscale**2)
lalpha = np.log(rand(K,components)*2*np.pi)
lalpha_delta = np.log(rand(K,components) * (2*np.pi - lalpha))
m = randn(components*K,D)
ls = np.zeros((components*K,D)) - 4
lhyp = np.log(1 + 1e-2*randn(2*Q+1, components)) # break symmetry
lhyp[0,:] += np.log(sf2s) # sf2
lhyp[1:Q+1,:] += np.log(init_lengthscales) # length-scales
lhyp[Q+1:,:] += np.log(init_period) # period
ltau = np.log(tau) # precision
lstsq = np.linalg.lstsq(np.hstack([X, np.ones((N,1))]), Y)[0]
a = 0*np.atleast_2d(lstsq[0]) # mean function slope
b = 0*lstsq[1] # mean function intercept
opt_params = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lSigma': lSigma, 'lhyp': lhyp, 'ltau': ltau}
fixed_params = {'lalpha': lalpha, 'lalpha_delta': lalpha_delta, 'a': a, 'b': b}
inputs = {'X': X, 'Y': Y}
vssgp_opt = VSSGP_opt(N, Q, D, K, inputs, opt_params, fixed_params, use_exact_A=True, parallel = True, batch_size = 100, print_interval=1)
# LBFGS
x0 = np.concatenate([np.atleast_2d(opt_params[n]).flatten() for n in vssgp_opt.opt_param_names])
pylab.figure(num=None, figsize=(12, 9), dpi=80, facecolor='w', edgecolor='w')
vssgp_opt.callback(x0)
res = minimize(vssgp_opt.func, x0, method='L-BFGS-B', jac=vssgp_opt.fprime,
options={'ftol': 0, 'disp': False, 'maxiter': 500}, tol=0, callback=vssgp_opt.callback)
raw_input("PRESS ENTER TO CONTINUE.")
return (res)[/code]
sowie
vssgp_model.py:
[codebox=python file=Unbenannt.txt]# To speed Theano up, create ram disk: mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk
# Then use flag THEANO_FLAGS='base_compiledir=/mnt/ramdisk' python script.py
import sys; sys.path.insert(0, "../Theano"); sys.path.insert(0, "../../Theano")
import theano; import theano.tensor as T; import theano.sandbox.linalg as sT
import numpy as np
import cPickle
print('Theano version: ' + theano.__version__ + ', base compile dir: ' + theano.config.base_compiledir)
theano.config.mode = 'FAST_RUN'
theano.config.optimizer = 'fast_run'
theano.config.reoptimize_unpickled_function = False
class VSSGP:
def __init__(self, use_exact_A = False):
try:
print('Trying to load model...')
with open('model_exact_A.save' if use_exact_A else 'model.save', 'rb') as file_handle:
self.f, self.g = cPickle.load(file_handle)
print('Loaded!')
return
except:
print('Failed. Creating a new model...')
print('Setting up variables...')
Z, mu, lSigma = T.dtensor3s('Z', 'mu', 'lSigma')
X, Y, m, ls, lhyp, lalpha, lalpha_delta, a = T.dmatrices('X', 'Y', 'm', 'ls', 'lhyp', 'lalpha', 'lalpha_delta', 'a')
b = T.dvector('b')
ltau = T.dscalar('ltau')
Sigma, alpha, alpha_delta, tau = T.exp(lSigma), T.exp(lalpha), T.exp(lalpha_delta), T.exp(ltau)
alpha = alpha % 2*np.pi
beta = T.minimum(alpha + alpha_delta, 2*np.pi)
(N, Q), D, K = X.shape, Y.shape[1], mu.shape[1]
sf2s, lss, ps = T.exp(lhyp[0]), T.exp(lhyp[1:1+Q]), T.exp(lhyp[1+Q:]) # length-scales abd periods
print('Setting up model...')
if not use_exact_A:
LL, KL, Y_pred_mean, Y_pred_var, EPhi, EPhiTPhi, opt_A_mean, opt_A_cov = self.get_model(Y, X, Z, alpha, beta,
mu, Sigma, m, ls, sf2s, lss, ps, tau, a, b, N, Q, D, K)
else:
LL, KL, Y_pred_mean, Y_pred_var, EPhi, EPhiTPhi, opt_A_mean, opt_A_cov = self.get_model_exact_A(Y, X, Z, alpha, beta,
mu, Sigma, m, ls, sf2s, lss, ps, tau, a, b, N, Q, D, K)
print('Compiling model...')
inputs = {'X': X, 'Y': Y, 'Z': Z, 'mu': mu, 'lSigma': lSigma, 'm': m, 'ls': ls, 'lalpha': lalpha,
'lalpha_delta': lalpha_delta, 'lhyp': lhyp, 'ltau': ltau, 'a': a, 'b': b}
z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
f = zip(['opt_A_mean', 'opt_A_cov', 'EPhi', 'EPhiTPhi', 'Y_pred_mean', 'Y_pred_var', 'LL', 'KL'],
[opt_A_mean, opt_A_cov, EPhi, EPhiTPhi, Y_pred_mean, Y_pred_var, LL, KL])
self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
g = zip(['LL', 'KL'], [LL, KL])
wrt = {'Z': Z, 'mu': mu, 'lSigma': lSigma, 'm': m, 'ls': ls, 'lalpha': lalpha,
'lalpha_delta': lalpha_delta, 'lhyp': lhyp, 'ltau': ltau, 'a': a, 'b': b}
self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn,
on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}
with open('model_exact_A.save' if use_exact_A else 'model.save', 'wb') as file_handle:
print('Saving model...')
sys.setrecursionlimit(20000)
cPickle.dump([self.f, self.g], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
def get_EPhi(self, X, Z, alpha, beta, mu, Sigma, sf2s, lss, ps, K):
two_over_K = 2.*sf2s[None, None, :]/K # N x K x comp
mean_p, std_p = ps**-1, (2*np.pi*lss)**-1 # Q x comp
Ew = std_p[:, None, :] * mu + mean_p[:, None, :] # Q x K x comp
XBAR = 2 * np.pi * (X[:, :, None, None] - Z[None, :, :, :]) # N x Q x K x comp
decay = T.exp(-0.5 * ((std_p[None, :, None, :] * XBAR)**2 * Sigma[None, :, :, :]).sum(1)) # N x K x comp
cos_w = T.cos(alpha + (XBAR * Ew[None, :, :, :]).sum(1)) # N x K x comp
EPhi = two_over_K**0.5 * decay * cos_w
EPhi = EPhi.flatten(2) # N x K*comp
cos_2w = T.cos(2 * alpha + 2 * (XBAR * Ew[None, :, :, :]).sum(1)) # N x K x comp
E_cos_sq = two_over_K * (0.5 + 0.5*decay**4 * cos_2w) # N x K x comp
EPhiTPhi = (EPhi.T).dot(EPhi)
EPhiTPhi = EPhiTPhi - T.diag(T.diag(EPhiTPhi)) + T.diag(E_cos_sq.sum(0).flatten(1))
return EPhi, EPhiTPhi, E_cos_sq
def get_opt_A(self, tau, EPhiTPhi, YT_EPhi):
SigInv = EPhiTPhi + (tau**-1 + 1e-4) * T.identity_like(EPhiTPhi)
cholTauSigInv = tau**0.5 * sT.cholesky(SigInv)
invCholTauSigInv = sT.matrix_inverse(cholTauSigInv)
tauInvSig = invCholTauSigInv.T.dot(invCholTauSigInv)
Sig_EPhiT_Y = tau * tauInvSig.dot(YT_EPhi.T)
return Sig_EPhiT_Y, tauInvSig, cholTauSigInv
def get_model(self, Y, X, Z, alpha, beta, mu, Sigma, m, ls, sf2s, lss, ps, tau, a, b, N, Q, D, K):
s = T.exp(ls)
Y = Y - (X.dot(a) + b[None,:])
EPhi, EPhiTPhi, E_cos_sq = self.get_EPhi(X, Z, alpha, beta, mu, Sigma, sf2s, lss, ps, K)
YT_EPhi = Y.T.dot(EPhi)
LL = (-0.5*N*D * np.log(2 * np.pi) + 0.5*N*D * T.log(tau) - 0.5*tau*T.sum(Y**2)
- 0.5*tau * T.sum(EPhiTPhi * (T.diag(s.sum(1)) + T.sum(m[:,None,:]*m[None,:,:], axis=2)))
+ tau * T.sum((Y.T.dot(EPhi)) * m.T))
KL_A = 0.5 * (s + m**2 - ls - 1).sum()
KL_w = 0.5 * (Sigma + mu**2 - T.log(Sigma) - 1).sum()
KL = KL_A + KL_w
Y_pred_mean = EPhi.dot(m) + (X.dot(a) + b[None,:])
Psi = T.sum(E_cos_sq.flatten(2)[:, :, None] * s[None, :, :], 1) # N x K*comp
flat_diag_n = E_cos_sq.flatten(2) - EPhi**2 # N x K*comp
Y_pred_var = tau**-1 * T.eye(D) + np.transpose(m.T.dot(flat_diag_n[:, :, None] * m),(1,0,2)) \
+ T.eye(D)[None, :, :] * Psi[:, :, None]
opt_A_mean, opt_A_cov, _ = self.get_opt_A(tau, EPhiTPhi, YT_EPhi)
return LL, KL, Y_pred_mean, Y_pred_var, EPhi, EPhiTPhi, opt_A_mean, opt_A_cov
def get_model_exact_A(self, Y, X, Z, alpha, beta, mu, Sigma, m, ls, sf2s, lss, ps, tau, a, b, N, Q, D, K):
Y = Y - (X.dot(a) + b[None,:])
EPhi, EPhiTPhi, E_cos_sq = self.get_EPhi(X, Z, alpha, beta, mu, Sigma, sf2s, lss, ps, K)
YT_EPhi = Y.T.dot(EPhi)
opt_A_mean, opt_A_cov, cholSigInv = self.get_opt_A(tau, EPhiTPhi, YT_EPhi)
LL = (-0.5*N*D * np.log(2 * np.pi) + 0.5*N*D * T.log(tau) - 0.5*tau*T.sum(Y**2)
- 0.5*D * T.sum(2*T.log(T.diag(cholSigInv)))
+ 0.5*tau * T.sum(opt_A_mean.T * YT_EPhi))
KL_w = 0.5 * (Sigma + mu**2 - T.log(Sigma) - 1).sum()
''' For prediction, m is assumed to be [m_1, ..., m_d] with m_i = opt_a_i, and and ls = opt_A_cov '''
Y_pred_mean = EPhi.dot(m) + (X.dot(a) + b[None,:])
EphiTphi = EPhi[:, :, None] * EPhi[:, None, :] # N x K*comp x K*comp
comp = sf2s.shape[0]
EphiTphi = EphiTphi - T.eye(K*comp)[None, :, :] * EphiTphi + T.eye(K*comp)[None, :, :] * E_cos_sq.flatten(2)[:, :, None]
Psi = T.sum(T.sum(EphiTphi * ls[None, :, :], 2), 1) # N
flat_diag_n = E_cos_sq.flatten(2) - EPhi**2 # N x K*comp
Y_pred_var = tau**-1 * T.eye(D) + np.transpose(m.T.dot(flat_diag_n[:, :, None] * m),(1,0,2)) \
+ T.eye(D)[None, :, :] * Psi[:, None, None]
return LL, KL_w, Y_pred_mean, Y_pred_var, EPhi, EPhiTPhi, opt_A_mean, opt_A_cov[/code]
und zu guter letzt
vssgp_opt.py:
[codebox=python file=Unbenannt.txt]import numpy as np
from vssgp_model import VSSGP
# import multiprocessing
from pathos.pools import ThreadPool
class VSSGP_opt():
def __init__(self, N, Q, D, K, inputs, opt_params, fixed_params, use_exact_A = False, test_set = {},
parallel = False, batch_size = None, components = None, print_interval = None):
self.vssgp, self.N, self.Q, self.K, self.fixed_params = VSSGP(use_exact_A), N, Q, K, fixed_params
self.use_exact_A, self.parallel, self.batch_size = use_exact_A, parallel, batch_size
self.inputs, self.test_set = inputs, test_set
self.print_interval = 10 if print_interval is None else print_interval
self.opt_param_names = [n for n,_ in opt_params.iteritems()]
opt_param_values = [np.atleast_2d(opt_params[n]) for n in self.opt_param_names]
self.shapes = [v.shape for v in opt_param_values]
self.sizes = [sum([np.prod(x) for x in self.shapes[:i]]) for i in xrange(len(self.shapes)+1)]
self.components = opt_params['lSigma'].shape[2] if components is None else components
self.colours = [np.random.rand(3,1) for c in xrange(self.components)]
self.callback_counter = [0]
if batch_size is not None:
if parallel:
self.pool = ThreadPool(int(self.N / self.batch_size))
else:
self.params = np.concatenate([v.flatten() for v in opt_param_values])
self.param_updates = np.zeros_like(self.params)
self.moving_mean_squared = np.zeros_like(self.params)
self.learning_rates = 1e-2*np.ones_like(self.params)
def extend(self, x, y, z = {}):
return dict(x.items() + y.items() + z.items())
def eval_f_LL(self, arguments):
out_f = self.vssgp.f['LL'](**arguments)
return (out_f)
def eval_g_LL(self, arguments):
out_g = self.vssgp.g['LL'](**arguments)
return (out_g)
def unpack(self, x):
x_param_values = [x[self.sizes[i-1]:self.sizes].reshape(self.shapes[i-1]) for i in xrange(1,len(self.shapes)+1)]
params = {n:v for (n,v) in zip(self.opt_param_names, x_param_values)}
if 'ltau' in params:
params['ltau'] = params['ltau'].squeeze()
return params
def func(self, x):
params = self.extend(self.fixed_params, self.unpack(x))
if self.batch_size is not None:
X, Y, splits = self.inputs['X'], self.inputs['Y'], int(self.N / self.batch_size)
if self.parallel:
arguments = [(X[i::splits], Y[i::splits], params) for i in xrange(splits)]
LL = sum(self.pool.map(self.eval_f_LL, arguments).get(9999999))
KL = self.vssgp.f['KL'](**self.extend({'X': [[0]], 'Y': [[0]]}, params))
else:
split = np.random.randint(splits)
LL = self.N / self.batch_size * self.vssgp.f['LL'](**self.extend({'X': X[split::splits], 'Y': Y[split::splits]}, params))
print LL
KL = self.vssgp.f['KL'](**self.extend({'X': [[0]], 'Y': [[0]]}, params))
else:
params = self.extend(self.inputs, params)
LL, KL = self.vssgp.f['LL'](**params), self.vssgp.f['KL'](**params)
return -(LL - KL)
def fprime(self, x):
grads, params = [], self.extend(self.fixed_params, self.unpack(x))
for n in self.opt_param_names:
if self.batch_size is not None:
X, Y, splits = self.inputs['X'], self.inputs['Y'], int(self.N / self.batch_size)
if self.parallel:
arguments = [(n, X[i::splits], Y[i::splits], params) for i in xrange(splits)]
dLL = sum(self.pool.map(self.eval_g_LL, arguments).get(9999999))
dKL = self.vssgp.g[n]['KL'](**self.extend({'X': [[0]], 'Y': [[0]]}, params))
else:
split = np.random.randint(splits)
dLL = self.N / self.batch_size * self.vssgp.g[n]['LL'](**self.extend({'X': X[split::splits], 'Y': Y[split::splits]}, params))
dKL = self.vssgp.g[n]['KL'](**self.extend({'X': [[0]], 'Y': [[0]]}, params))
else:
params = self.extend(self.inputs, params)
dLL, dKL = self.vssgp.g[n]['LL'](**params), self.vssgp.g[n]['KL'](**params)
grads += [-(dLL - dKL)]
return np.concatenate([grad.flatten() for grad in grads])
def callback(self, x):
if self.callback_counter[0]%self.print_interval == 0:
opt_params = self.unpack(x)
params = self.extend(self.inputs, self.fixed_params, opt_params)
LL = self.vssgp.f['LL'](**params)
KL = self.vssgp.f['KL'](**params)
print LL - KL
self.callback_counter[0] += 1[/code]
Ich lade die Module, indem ich vssgp_example.py ausführe.
Anschließend mache ich
[codebox=pycon file=Unbenannt.txt]if __name__== '__main__' : STARTME()[/code]
Ich erhalte nun folgenden Fehler:
Code: Alles auswählen
Traceback (most recent call last):
File "<ipython-input-2-f919e99b6eea>", line 1, in <module>
if __name__== '__main__' : STARTME()
File "C:/Users/flo9fe/Desktop/DEEPvSSGP/vssgp_example.py", line 50, in STARTME
options={'ftol': 0, 'disp': False, 'maxiter': 500}, tol=0, callback=vssgp_opt.callback)
File "C:\Program Files\Anaconda2\lib\site-packages\scipy\optimize\_minimize.py", line 450, in minimize
callback=callback, **options)
File "C:\Program Files\Anaconda2\lib\site-packages\scipy\optimize\lbfgsb.py", line 328, in _minimize_lbfgsb
f, g = func_and_grad(x)
File "C:\Program Files\Anaconda2\lib\site-packages\scipy\optimize\lbfgsb.py", line 278, in func_and_grad
f = fun(x, *args)
File "C:\Program Files\Anaconda2\lib\site-packages\scipy\optimize\optimize.py", line 292, in function_wrapper
return function(*(wrapper_args + args))
File "vssgp_opt.py", line 54, in func
LL = sum(self.pool.map(self.eval_f_LL, arguments).get(9999999))
File "C:\Program Files\Anaconda2\lib\site-packages\pathos\threading.py", line 133, in map
return _pool.map(star(f), zip(*args)) # chunksize
File "C:\Program Files\Anaconda2\lib\site-packages\multiprocess\pool.py", line 251, in map
return self.map_async(func, iterable, chunksize).get()
File "C:\Program Files\Anaconda2\lib\site-packages\multiprocess\pool.py", line 567, in get
raise self._value
TypeError: Function object argument after ** must be a mapping, not tuple
Danke