Simple DNN Framework

参考自：Tensorflow 简明原理

class Node(object):
	"""
	Base class for nodes in the network.
	Arguments:
		`inbound_nodes`: A list of nodes with edges into this node.
	"""
	def __init__(self, inbound_nodes=[]):
		"""
		Node's constructor (runs when the object is instantiated). Sets
		properties that all nodes need.
		"""
		# A list of nodes with edges into this node.
		self.inbound_nodes = inbound_nodes
		# The eventual value of this node. Set by running
		# the forward() method.
		self.value = None
		# A list of nodes that this node outputs to.
		self.outbound_nodes = []
		# New property! Keys are the inputs to this node and
		# their values are the partials of this node with
		# respect to that input.
		self.gradients = {}
		# Sets this node as an outbound node for all of
		# this node's inputs.
		for node in inbound_nodes:
			node.outbound_nodes.append(self)
	def forward(self):
		"""
		Every node that uses this class as a base class will
		need to define its own `forward` method.
		"""
		raise NotImplementedError
	def backward(self):
		"""
		Every node that uses this class as a base class will
		need to define its own `backward` method.
		"""
		raise NotImplementedError
class Input(Node):
	"""
	A generic input into the network.
	"""
	def __init__(self):
		Node.__init__(self)
	def forward(self):
		pass
	def backward(self):
		self.gradients = {self: 0}
		for n in self.outbound_nodes:
			self.gradients[self] += n.gradients[self]
class Linear(Node):
	"""
	Represents a node that performs a linear transform.
	"""
	def __init__(self, X, W, b):
		Node.__init__(self, [X, W, b])
	def forward(self):
		"""
		Performs the math behind a linear transform.
		"""
		X = self.inbound_nodes[0].value
		W = self.inbound_nodes[1].value
		b = self.inbound_nodes[2].value
		self.value = np.dot(X, W) + b
	def backward(self):
		"""
		Calculates the gradient based on the output values.
		"""
		self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
		for n in self.outbound_nodes:
			grad_cost = n.gradients[self]
			self.gradients[self.inbound_nodes[0]] += np.dot(grad_cost, self.inbound_nodes[1].value.T)
			self.gradients[self.inbound_nodes[1]] += np.dot(self.inbound_nodes[0].value.T, grad_cost)
			self.gradients[self.inbound_nodes[2]] += np.sum(grad_cost, axis=0, keepdims=False)
class Sigmoid(Node):
	"""
	Represents a node that performs the sigmoid activation function.
	"""
	def __init__(self, node):
		Node.__init__(self, [node])
	def _sigmoid(self, x):
		"""
		This method is separate from `forward` because it
		will be used with `backward` as well.
		`x`: A numpy array-like object.
		"""
		return 1. / (1. + np.exp(-x))
	def forward(self):
		"""
		Perform the sigmoid function and set the value.
		"""
		input_value = self.inbound_nodes[0].value
		self.value = self._sigmoid(input_value)
	def backward(self):
		"""
		Calculates the gradient using the derivative of
		the sigmoid function.
		"""
		self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
		for n in self.outbound_nodes:
			grad_cost = n.gradients[self]
			sigmoid = self.value
			self.gradients[self.inbound_nodes[0]] += sigmoid * (1 - sigmoid) * grad_cost
class Tanh(Node):
	def __init__(self, node):
		"""
		The tanh cost function.
		Should be used as the last node for a network.
		"""
		Node.__init__(self, [node])
	def forward(self):
		"""
		Calculates the tanh.
		"""
		input_value = self.inbound_nodes[0].value
		self.value  = np.tanh(input_value)
	def backward(self):
		"""
		Calculates the gradient of the cost.
		"""
		self.gradients = {n: np.zeros_like(n.value) for n in self.inbound_nodes}
		for n in self.outbound_nodes:
			grad_cost = n.gradients[self]
			tanh = self.value
			self.gradients[self.inbound_nodes[0]] += (1 + tanh) * (1 - tanh) * grad_cost.T
class MSE(Node):
	def __init__(self, y, a):
		"""
		The mean squared error cost function.
		Should be used as the last node for a network.
		"""
		Node.__init__(self, [y, a])
	def forward(self):
		"""
		Calculates the mean squared error.
		"""
		y = self.inbound_nodes[0].value.reshape(-1, 1)
		a = self.inbound_nodes[1].value.reshape(-1, 1)
		self.m = self.inbound_nodes[0].value.shape[0]
		self.diff = y - a
		self.value = np.mean(self.diff**2)
	def backward(self):
		"""
		Calculates the gradient of the cost.
		"""
		self.gradients[self.inbound_nodes[0]] = (2 / self.m) * self.diff
		self.gradients[self.inbound_nodes[1]] = (-2 / self.m) * self.diff
		
def topological_sort(feed_dict):
	"""
	Sort the nodes in topological order using Kahn's Algorithm.
	`feed_dict`: A dictionary where the key is a `Input` Node and the value is the respective value feed to that Node.
	Returns a list of sorted nodes.
	"""
	input_nodes = [n for n in feed_dict.keys()]
	G = {}
	nodes = [n for n in input_nodes]
	while len(nodes) > 0:
		n = nodes.pop(0)
		if n not in G:
			G[n] = {'in': set(), 'out': set()}
		for m in n.outbound_nodes:
			if m not in G:
				G[m] = {'in': set(), 'out': set()}
			G[n]['out'].add(m)
			G[m]['in'].add(n)
			nodes.append(m)
	L = []
	S = set(input_nodes)
	while len(S) > 0:
		n = S.pop()
		if isinstance(n, Input):
			n.value = feed_dict[n]
		L.append(n)
		for m in n.outbound_nodes:
			G[n]['out'].remove(m)
			G[m]['in'].remove(n)
			if len(G[m]['in']) == 0:
				S.add(m)
	return L
def forward_and_backward(graph):
	"""
	Performs a forward pass and a backward pass through a list of sorted Nodes.
	Arguments:
		`graph`: The result of calling `topological_sort`.
	"""
	for n in graph:
		n.forward()
	for n in graph[::-1]:
		n.backward()
def sgd_update(trainables, learning_rate=1e-2):
	"""
	Updates the value of each trainable with SGD.
	Arguments:
		`trainables`: A list of `Input` Nodes representing weights/biases.
		`learning_rate`: The learning rate.
	"""
	for t in trainables:
		t.value = t.value - learning_rate * t.gradients[t]
# usage	
import numpy as np
from sklearn.utils import resample
np.random.seed(0)
w1_0 = np.array([[ 0.1,  0.2,  0.3,  0.4],
				 [ 0.5,  0.6,  0.7,  0.8],
				 [ 0.9,  1.0,  1.1,  1.2]])
w2_0 = np.array([[ 1.3,  1.4],
				 [ 1.5,  1.6],
				 [ 1.7,  1.8],
				 [ 1.9,  2.0]])
b1_0 = np.array( [-2.0, -6.0, -1.0, -7.0])
b2_0 = np.array( [-2.5, -5.0])
X_ = np.array([[1.0, 2.0, 3.0]])
y_ = np.array([[-0.85, 0.75]])
n_features = X_.shape[1]
W1_ = w1_0
b1_ = b1_0
W2_ = w2_0
b2_ = b2_0
X, y = Input(), Input()
W1, b1 = Input(), Input()
W2, b2 = Input(), Input()
l1 = Linear(X, W1, b1)
s1 = Sigmoid(l1)
l2 = Linear(s1, W2, b2)
t1 = Tanh(l2)
cost = MSE(y, t1)
feed_dict = {
	X: X_,   y: y_,
	W1: W1_, b1: b1_,
	W2: W2_, b2: b2_
}
epochs = 10
m = X_.shape[0]
batch_size = 1
steps_per_epoch = m // batch_size
graph = topological_sort(feed_dict)
trainables = [W1, b1, W2, b2]
l_Mat_W1 = [w1_0]
l_Mat_W2 = [w2_0]
l_Mat_out = []
l_val = []
for i in range(epochs):
	loss = 0
	for j in range(steps_per_epoch):
		X_batch, y_batch = resample(X_, y_, n_samples=batch_size)
		X.value = X_batch
		y.value = y_batch
		forward_and_backward(graph)
		sgd_update(trainables, 0.1)
		loss += graph[-1].value
	mat_W1 = []
	mat_W2 = []
	for i in graph:
		try:
			if  (i.value.shape[0] == 3) and (i.value.shape[1] == 4):
				mat_W1 = i.value
			if  (i.value.shape[0] == 4) and (i.value.shape[1] == 2):
				mat_W2 = i.value
		except:
			pass
	l_Mat_W1.append(mat_W1)
	l_Mat_W2.append(mat_W2)
	l_Mat_out.append(graph[9].value)
	# 可视化
import matplotlib.pyplot as plt
%matplotlib inline
fig = plt.figure( figsize=(14,10))
ax0 = fig.add_subplot(131)
#aax0 = fig.add_axes([0, 0, 0.3, 0.1])
c0 = ax0.imshow(np.array(l_Mat_out).reshape([-1,2]).T, interpolation='nearest',aspect='auto', cmap="Reds", vmax=1, vmin=-1)
ax0.set_title("Output")
cbar = fig.colorbar(c0, ticks=[-1, 0, 1])
ax1 = fig.add_subplot(132)
c1 = ax1.imshow(np.array(l_Mat_W1).reshape(len(l_Mat_W1), 12).T, interpolation='nearest',aspect='auto', cmap="Reds")
ax1.set_title("w1")
cbar = fig.colorbar(c1, ticks=[np.min(np.array(l_Mat_W1)), np.max(np.array(l_Mat_W1))])
ax2 = fig.add_subplot(133)
c2 = ax2.imshow(np.array(l_Mat_W2).reshape(len(l_Mat_W2), 8).T, interpolation='nearest',aspect='auto', cmap="Reds")
ax2.set_title("w2")
cbar = fig.colorbar(c2, ticks=[np.min(np.array(l_Mat_W2)), np.max(np.array(l_Mat_W2))])
ax0.set_yticks([0,1])
ax0.set_yticklabels(["out0", "out1"])
ax1.set_xlabel("epochs")
#for i in range(len(l_Mat_W1)):