Code: Alles auswählen
import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
mpl.rcParams['figure.figsize'] = (8, 6)
x = tf.Variable(6.0)
y = tf.Variable(5.0)
with tf.GradientTape() as t:
x_sq = x **3 -2*x** -15
with t.stop_recording():
y_sq = y**3 - 6*x**2 +10
z = x_sq + y_sq
grad = t.gradient(z, {'x': x, 'y': y})
print(grad)
print('dz/dx:', grad['x'])
print('dz/dy:', grad['y'])
x = tf.Variable(6.0)
y = tf.Variable(5.0)
with tf.GradientTape() as t:
y_sq = y**3 + np.sin(x**2)
z = x**3 + tf.stop_gradient(y_sq)
grad = t.gradient(z, {'x': x, 'y': y})
print('dz/dx:', grad['x'])
print('dz/dy:', grad['y'])
# Establish an identity operation, but clip during the gradient pass.
@tf.custom_gradient
def clip_gradients(y):
def backward(dy):
return tf.clip_by_norm(dy, 0.5)
return y, backward
v = tf.Variable(3.0)
with tf.GradientTape() as t:
output = clip_gradients(v * v)
print(t.gradient(output, v)) # calls "backward", which clips 4 to 2
class MyModule(tf.Module):
@tf.function(input_signature=[tf.TensorSpec(None)])
def call_custom_grad(self, x):
return clip_gradients(x)
model = MyModule()
tf.saved_model.save(
model,
'saved_model',
options=tf.saved_model.SaveOptions(experimental_custom_gradients=True))
# The loaded gradients will be the same as the above example.
v = tf.Variable(5.0)
loaded = tf.saved_model.load('saved_model')
with tf.GradientTape() as t:
output = loaded.call_custom_grad(v * v)
print(t.gradient(output, v))
x0 = tf.constant(0.0)
x1 = tf.constant(0.0)
with tf.GradientTape() as tape0, tf.GradientTape() as tape1:
tape0.watch(x0)
tape1.watch(x1)
y0 = tf.math.sin(x0)
y1 = tf.nn.sigmoid(x1)
y = y0 + y1
ys = tf.reduce_sum(y)
tape0.gradient(ys, x0).numpy() # cos(x) => 1.0
1.0
tape1.gradient(ys, x1).numpy() # sigmoid(x1)*(1-sigmoid(x1)) => 0.25
25
#############################
x = tf.Variable(1.0) # Create a Tensorflow variable initialized to 1.0
with tf.GradientTape() as t2:
with tf.GradientTape() as t1:
y = x **3 +np.cos(4*x)
# Compute the gradient inside the outer `t2` context manager
# which means the gradient computation is differentiable as well.
dy_dx = t1.gradient(y, x)
d2y_dx2 = t2.gradient(dy_dx, x)
print('dy_dx:', dy_dx.numpy())
print('d2y_dx2:', d2y_dx2.numpy())
dy_dx: 3.0
d2y_dx2: 6.0
#######################################
x = tf.random.normal([6, 5])
layer = tf.keras.layers.Dense(10, activation=tf.nn.relu)
with tf.GradientTape() as t2:
# The inner tape only takes the gradient with respect to the input,
# not the variables.
with tf.GradientTape(watch_accessed_variables=False) as t1:
t1.watch(x)
y = layer(x)
out = tf.reduce_sum(layer(x)**2)
# 1. Calculate the input gradient.
g1 = t1.gradient(out, x)
# 2. Calculate the magnitude of the input gradient.
g1_mag = tf.norm(g1)
# 3. Calculate the gradient of the magnitude with respect to the model.
dg1_mag = t2.gradient(g1_mag, layer.trainable_variables)
[var.shape for var in dg1_mag]
#Als erstes Beispiel ist hier die Jacobi-Matrix eines Vektorziels in Bezug auf eine Skalarquelle.
x = tf.linspace(-10.0, 10.0, 200+1)
delta = tf.Variable(3.0)
with tf.GradientTape() as tape:
y = tf.nn.sigmoid(x+delta)
dy_dx = tape.jacobian(y, delta)
print(y.shape)
print(dy_dx.shape)
#(201,)
plt.plot(x.numpy(), y, label='y')
plt.plot(x.numpy(), dy_dx, label='dy/dx')
plt.legend()
_ = plt.xlabel('x')
plt.title("Funktion und ihre automatische Differenzierung")
plt.grid()
plt.show()
###########################################
x = tf.random.normal([7, 5])
layer = tf.keras.layers.Dense(10, activation=tf.nn.relu)
with tf.GradientTape(persistent=True) as tape:
y = layer(x)
print(y.shape)
##Und die Form des Schichtkernels ist (5, 10):
layer.kernel.shape
#TensorShape([5, 10])
############################
layer.kernel.shape
##Die Form der Jacobi-Matrix der Ausgabe in Bezug auf den Kernel
#besteht aus diesen beiden miteinander verketteten Formen:
j = tape.jacobian(y, layer.kernel)
j.shape
#TensorShape([7, 10, 5, 10])
################################
g = tape.gradient(y, layer.kernel)
print('g.shape:', g.shape)
j_sum = tf.reduce_sum(j, axis=[0, 1])
delta = tf.reduce_max(abs(g - j_sum)).numpy()
assert delta < 1e-3
print('delta:', delta)
g.shape: (5, 10)
delta: 2.3841858e-07
############################
x = tf.random.normal([6, 5])
layer1 = tf.keras.layers.Dense(8, activation=tf.nn.relu)
layer2 = tf.keras.layers.Dense(6, activation=tf.nn.relu)
with tf.GradientTape() as t2:
with tf.GradientTape() as t1:
x = layer1(x)
x = layer2(x)
loss = tf.reduce_mean(x**2)
g = t1.gradient(loss, layer1.kernel)
h = t2.jacobian(g, layer1.kernel)
print(f'layer.kernel.shape: {layer1.kernel.shape}')
print(f'h.shape: {h.shape}')
layer.kernel.shape: (5, 8)
h.shape: (5, 8, 5, 8)
#############################
n_params = tf.reduce_prod(layer1.kernel.shape)
g_vec = tf.reshape(g, [n_params, 1])
h_mat = tf.reshape(h, [n_params, n_params])
#Die Hesse-Matrix sollte symmetrisch sein:
def imshow_zero_center(image, **kwargs):
lim = tf.reduce_max(abs(image))
plt.imshow(image, vmin=-lim, vmax=lim, cmap='seismic', **kwargs)
plt.colorbar()
imshow_zero_center(h_mat)
plt.show()
# Reshape the update and apply it to the variable.
#
##################################
x = tf.random.normal([8, 6])
layer1 = tf.keras.layers.Dense(6, activation=tf.nn.elu)
layer2 = tf.keras.layers.Dense(5, activation=tf.nn.elu)
with tf.GradientTape(persistent=True, watch_accessed_variables=False) as tape:
tape.watch(x)
y = layer1(x)
y = layer2(y)
y.shape
#TensorShape([7, 6])
#Die vollständige Jacobi-Matrix von y in Bezug auf x hat die Form (batch, ins, batch, outs),
#
j = tape.jacobian(y, x)
j.shape
#TensorShape([7, 6, 7, 5])
#
imshow_zero_center(j[:, 0, :, 0])
_ = plt.title('A (batch, batch) slice')
def plot_as_patches(j):
# Reorder axes so the diagonals will each form a contiguous patch.
j = tf.transpose(j, [1, 0, 3, 2])
# Pad in between each patch.
lim = tf.reduce_max(abs(j))
j = tf.pad(j, [[0, 0], [1, 1], [0, 0], [1, 1]],
constant_values=-lim)
# Reshape to form a single image.
s = j.shape
j = tf.reshape(j, [s[0]*s[1], s[2]*s[3]])
imshow_zero_center(j, extent=[-0.5, s[2]-0.5, s[0]-0.5, -0.5])
plot_as_patches(j)
_ = plt.title('All (batch, batch) slices are diagonal')
plt.title("Funktion und ihre autom,atische Ableitung")
plt.show()