I've got the neural net with both launches and partials written, although some internals need redone because I misunderstood ReLU. Those misunderstandings are documented in the Background.

temporaryWork^2
youainti 5 years ago
parent 2657f4d821
commit 7b51044057

@ -17,3 +17,18 @@ Also, neural networks can return more than just a single output as long as the
loss function that is used for optimization can consume both of them.
Thus I could write two separate neural networks (such as for launch and partials),
and then write a third NN that binds the two together.
## Notes on functions
ReLU is a linear rectifier, it does not have any training involved.
This makes it good for working as a final cleanup of the launch function.
This also makes it not so good for the partial derivatives.
Linear is a good but basic network type.
Upscaling allows you to create more features.
Downscaling reduces the number of features (by throwing data away?).
Instead of downscaling, use a linear function to change the dimensions.
# Remaining Questions
- How do you set it up to run over a set of variables, i.e. batches?

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"id": "comprehensive-toyota",
"id": "electric-scratch",
"metadata": {},
"source": [
"Note on pytorch. NN optimization acts imperitively/by side effect as follows.\n",
@ -23,17 +23,18 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "together-jewelry",
"id": "outdoor-essay",
"metadata": {},
"outputs": [],
"source": [
"import torch"
"import torch\n",
"import combined as c"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "hispanic-grain",
"id": "played-reward",
"metadata": {
"tags": []
},
@ -64,7 +65,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "practical-gilbert",
"id": "tribal-manor",
"metadata": {},
"outputs": [
{
@ -72,45 +73,45 @@
"output_type": "stream",
"text": [
"\n",
" tensor(10.7553, grad_fn=<AddBackward0>)\n",
" tensor(4.8121, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(64.3239, grad_fn=<AddBackward0>)\n",
" tensor(17.3775, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(17.9537, grad_fn=<AddBackward0>)\n",
" tensor(30.0737, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(60.9679, grad_fn=<AddBackward0>)\n",
" tensor(0.6026, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(30.1436, grad_fn=<AddBackward0>)\n",
" tensor(0.3996, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(89.3963, grad_fn=<AddBackward0>)\n",
" tensor(0.3020, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(70.8575, grad_fn=<AddBackward0>)\n",
" tensor(0.2092, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(24.7911, grad_fn=<AddBackward0>)\n",
" tensor(0.1412, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(695.9885, grad_fn=<AddBackward0>)\n",
" tensor(0.0893, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(339753.2500, grad_fn=<AddBackward0>)\n",
" tensor(0.0561, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(8.0135e+13, grad_fn=<AddBackward0>)\n",
" tensor(0.0341, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(inf, grad_fn=<AddBackward0>)\n",
" tensor(0.0208, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0125, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0075, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0045, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0027, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0016, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0010, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n",
" tensor(0.0006, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(nan, grad_fn=<AddBackward0>)\n"
" tensor(0.0003, grad_fn=<AddBackward0>)\n"
]
}
],
@ -149,7 +150,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "early-victoria",
"id": "uniform-union",
"metadata": {},
"outputs": [],
"source": [
@ -184,197 +185,386 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "sustained-avatar",
"metadata": {},
"outputs": [],
"id": "grand-vietnamese",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'loss_fn3' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-4ecf63ceaaa2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0ml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss_fn3\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_a\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_b\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'loss_fn3' is not defined"
]
}
],
"source": [
"model = SplitNetwork(input_size = 6, output_size_a=5, output_size_b=7, layers_size=15)\n",
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
"\n",
"data_in = torch.tensor([1.5,2,3,4,5,6])\n",
"for i in range(25):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = model.forward(data_in)\n",
" output\n",
"\n",
"target_a = torch.zeros(5)\n",
"target_b = torch.ones(7)\n",
" l = loss_fn3(output, target_a, target_b)\n",
"\n",
"def loss_fn3(output,target_a, target_b):\n",
" return sum((output[0] - target_a)**2) + sum((output[1] - target_b)**2)\n",
" #could add a simplicity assumption i.e. l1 on parameters."
" l.backward()\n",
"\n",
" optimizer.step()\n",
"\n",
" print(\"\\n\",l)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "settled-maple",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"id": "inclusive-rouge",
"id": "appointed-sandwich",
"metadata": {},
"outputs": [],
"source": [
"#This is a custom upscale module.\n",
"class PartialDerivativesEstimand(torch.nn.Module):\n",
" def __init__(self, state_tensor_size,layers_size,number_constellations):\n",
" \"\"\"\n",
" Description\n",
" \"\"\"\n",
" super().__init__()\n",
" self.number_constellations = number_constellations\n",
" \n",
" #Scale up the input from just the tensor of states to the layer_size X number_constellations\n",
" \n",
" #Increase to the layer size\n",
" self.linear_step_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)\n",
" #Upscale the tensor to be able to estimate for each constellation\n",
" #TODO: change to the standard upscaler\n",
" self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.number_constellations).view(x.numel(), self.number_constellations)\n",
" \n",
" #start adding useful layers (start small).\n",
" self.relu_3 = torch.nn.ReLU()\n",
" self.relu_4 = torch.nn.ReLU() #TODO:swap to linear or something like that.\n",
" #TODO:downscale to match the proper output values\n",
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values = self.linear_step_1(input_values)\n",
" intermediate_values = self.upscale_step(intermediate_values)\n",
" intermediate_values = self.relu_3(intermediate_values)\n",
" \n",
" intermediate_values = self.relu_4(intermediate_values)\n",
" \n",
" return intermediate_values"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "complete-gather",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0000, 0.0000, 0.0000],\n",
" [0.4215, 0.4215, 0.4215],\n",
" [0.5668, 0.5668, 0.5668],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0675, 0.0675, 0.0675],\n",
" [1.8888, 1.8888, 1.8888],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn = PartialDerivativesEstimand(3,12,3)\n",
"\n",
"test = torch.tensor([[[1.0,3,4]]])\n",
"\n",
"t = nn.forward(test)\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "iraqi-italic",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(30.5458, grad_fn=<AddBackward0>)\n",
"tensor(26.5162, grad_fn=<AddBackward0>)\n",
"tensor(24.9672, grad_fn=<AddBackward0>)\n",
"tensor(24.3718, grad_fn=<AddBackward0>)\n",
"tensor(24.1429, grad_fn=<AddBackward0>)\n",
"tensor(24.0549, grad_fn=<AddBackward0>)\n",
"tensor(24.0211, grad_fn=<AddBackward0>)\n",
"tensor(24.0081, grad_fn=<AddBackward0>)\n",
"tensor(24.0031, grad_fn=<AddBackward0>)\n",
"tensor(24.0012, grad_fn=<AddBackward0>)\n"
]
},
{
"data": {
"text/plain": [
"tensor([[0.0000, 0.0000, 0.0000],\n",
" [0.9951, 0.9951, 0.9951],\n",
" [0.9964, 0.9964, 0.9964],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.9922, 0.9922, 0.9922],\n",
" [1.0075, 1.0075, 1.0075],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(nn.parameters(),lr=0.01)\n",
"\n",
" tensor(8.4134, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(5.9490, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(4.8652, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(3.7577, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(2.5462, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(1.3803, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.5700, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.2055, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0747, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0274, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0101, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0037, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0014, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0005, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0002, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(7.1453e-05, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(2.6635e-05, grad_fn=<AddBackward0>)\n",
"#get loss function\n",
"def loss_fn4(output):\n",
" return sum(sum((1-output)**2))\n",
"\n",
" tensor(9.9370e-06, grad_fn=<AddBackward0>)\n",
"for i in range(10):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" tensor(3.7096e-06, grad_fn=<AddBackward0>)\n",
" output = nn.forward(test)\n",
"\n",
" tensor(1.3858e-06, grad_fn=<AddBackward0>)\n",
" l = loss_fn4(output)\n",
"\n",
" tensor(5.1807e-07, grad_fn=<AddBackward0>)\n",
" l.backward()\n",
"\n",
" tensor(1.9388e-07, grad_fn=<AddBackward0>)\n",
" optimizer.step()\n",
"\n",
" tensor(7.2581e-08, grad_fn=<AddBackward0>)\n",
" print(l)\n",
"\n",
" tensor(2.7196e-08, grad_fn=<AddBackward0>)\n",
"nn.forward(test)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "adaptive-period",
"metadata": {},
"outputs": [],
"source": [
"#This is a custom upscale module.\n",
"class LaunchFnEstimand(torch.nn.Module):\n",
" def __init__(self, state_tensor_size,layers_size,number_constellations):\n",
" \"\"\"\n",
" Description\n",
" \"\"\"\n",
" super().__init__()\n",
" self.number_constellations = number_constellations\n",
" \n",
" #Scale up the input from just the tensor of states to the layer_size X number_constellations\n",
" \n",
" #Increase to the layer size\n",
" self.linear_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)\n",
" self.relu = torch.nn.ReLU()\n",
" self.linear_3 = torch.nn.Linear(in_features=layers_size, out_features=layers_size)\n",
" self.linear_5 = torch.nn.Linear(in_features=layers_size, out_features=number_constellations)\n",
"\n",
" tensor(1.0235e-08, grad_fn=<AddBackward0>)\n"
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values = self.linear_1(input_values)\n",
" intermediate_values = self.relu(intermediate_values)\n",
" intermediate_values = self.linear_3(intermediate_values)\n",
" intermediate_values = self.relu(intermediate_values)\n",
" intermediate_values = self.linear_5(intermediate_values)\n",
" intermediate_values = self.relu(intermediate_values) #launches are always positive\n",
" \n",
" return intermediate_values"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "northern-vault",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(0.0315, grad_fn=<AddBackward0>)\n",
"tensor(0.0241, grad_fn=<AddBackward0>)\n",
"tensor(0.0184, grad_fn=<AddBackward0>)\n",
"tensor(0.0141, grad_fn=<AddBackward0>)\n",
"tensor(0.0107, grad_fn=<AddBackward0>)\n",
"tensor(0.0082, grad_fn=<AddBackward0>)\n",
"tensor(0.0062, grad_fn=<AddBackward0>)\n",
"tensor(0.0048, grad_fn=<AddBackward0>)\n",
"tensor(0.0036, grad_fn=<AddBackward0>)\n",
"tensor(0.0028, grad_fn=<AddBackward0>)\n"
]
},
{
"data": {
"text/plain": [
"tensor([[[0.0457, 0.0000]]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"launch = LaunchFnEstimand(3,12,2)\n",
"\n",
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
"optimizer = torch.optim.SGD(launch.parameters(),lr=0.01)\n",
"\n",
"for i in range(25):\n",
"#get loss function\n",
"def loss_fn5(output):\n",
" return sum(sum(sum((output)**2)))\n",
"\n",
"for i in range(10):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = model.forward(data_in)\n",
" output\n",
" output = launch.forward(test)\n",
"\n",
" l = loss_fn3(output, target_a, target_b)\n",
" l = loss_fn5(output)\n",
"\n",
" l.backward()\n",
"\n",
" optimizer.step()\n",
"\n",
" print(\"\\n\",l)"
" print(l)\n",
" \n",
"\n",
"launch.forward(test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "sound-insulation",
"execution_count": 11,
"id": "sensitive-pennsylvania",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[0.0457, 0.0000]]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"launch(test)"
]
},
{
"cell_type": "code",
"execution_count": 144,
"id": "promotional-accent",
"execution_count": 12,
"id": "utility-giant",
"metadata": {},
"outputs": [],
"source": [
"#This is a custom upscale module.\n",
"class CustomUpscale(torch.nn.Module):\n",
" def __init__(self, input_size,layers_size,scale_factor):\n",
"class EstimandNN(torch.nn.Module):\n",
" def __init__(self, state_tensor_size,layers_size,number_constellations):\n",
" super().__init__()\n",
" self.scale_factor = scale_factor\n",
" \n",
" #So, this next section constructs different layers within the NN\n",
" #sinlge linear section\n",
" self.linear_step_1a = torch.nn.Linear(in_features=input_size, out_features=layers_size)\n",
" self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.scale_factor).view(x.numel(),self.scale_factor)\n",
" #single linear section\n",
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values_a = self.linear_step_1a(input_values)\n",
" intermediate_values_b = self.upscale_step(intermediate_values_a)\n",
" self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations)\n",
" self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)\n",
" \n",
" return intermediate_values_b"
]
},
{
"cell_type": "code",
"execution_count": 145,
"id": "english-basement",
"metadata": {},
"outputs": [],
"source": [
"nn = MultiDimOut(3,12,3)"
" def forward(self, input_values):\n",
" partials = self.partials_estimator(input_values)\n",
" launch = self.launch_estimator(input_values)\n",
" return c.EstimandInterface(partials,launch)"
]
},
{
"cell_type": "code",
"execution_count": 146,
"id": "passive-chapel",
"execution_count": 13,
"id": "molecular-factory",
"metadata": {},
"outputs": [],
"source": [
"test = torch.tensor([[[1.0,3,4]]])"
"enn = EstimandNN(3,12,2)"
]
},
{
"cell_type": "code",
"execution_count": 147,
"id": "passing-heath",
"execution_count": 15,
"id": "artistic-washer",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[ 1.9191, 1.9191, 1.9191],\n",
" [-1.4519, -1.4519, -1.4519],\n",
" [ 0.4698, 0.4698, 0.4698],\n",
" [ 0.5203, 0.5203, 0.5203],\n",
" [-2.8474, -2.8474, -2.8474],\n",
" [ 2.1781, 2.1781, 2.1781],\n",
" [ 0.1220, 0.1220, 0.1220],\n",
" [ 3.4155, 3.4155, 3.4155],\n",
" [-0.5984, -0.5984, -0.5984],\n",
" [-0.8493, -0.8493, -0.8493],\n",
" [-0.6150, -0.6150, -0.6150],\n",
" [ 0.6329, 0.6329, 0.6329]], grad_fn=<ViewBackward>)"
"name": "stdout",
"output_type": "stream",
"text": [
"Launch Decisions and Partial Derivativs of value function with\n",
"\tlaunches\n",
"\t\t tensor([[[0.0000, 0.0020]]], grad_fn=<ReluBackward0>)\n",
"\tPartials\n",
"\t\ttensor([[0.0000, 0.0000],\n",
" [1.7938, 1.7938],\n",
" [0.0000, 0.0000],\n",
" [2.8751, 2.8751],\n",
" [1.4894, 1.4894],\n",
" [1.4614, 1.4614],\n",
" [0.0000, 0.0000],\n",
" [2.9800, 2.9800],\n",
" [0.0000, 0.0000],\n",
" [0.0000, 0.0000],\n",
" [0.0000, 0.0000],\n",
" [0.0000, 0.0000]], grad_fn=<ReluBackward0>)\n"
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn.forward(test)"
"print(enn(test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "herbal-mission",
"id": "purple-filling",
"metadata": {},
"outputs": [],
"source": []

Loading…
Cancel
Save