From 7b510440579e84ed4eb17c0fababa8f2759d0d10 Mon Sep 17 00:00:00 2001 From: youainti Date: Thu, 30 Sep 2021 21:45:24 -0700 Subject: [PATCH] I've got the neural net with both launches and partials written, although some internals need redone because I misunderstood ReLU. Those misunderstandings are documented in the Background. --- Code/Background information I've learned.md | 15 + Code/BasicNeuralNet2.ipynb | 468 ++++++++++++++------ 2 files changed, 344 insertions(+), 139 deletions(-) diff --git a/Code/Background information I've learned.md b/Code/Background information I've learned.md index f049b2c..508328b 100644 --- a/Code/Background information I've learned.md +++ b/Code/Background information I've learned.md @@ -17,3 +17,18 @@ Also, neural networks can return more than just a single output as long as the loss function that is used for optimization can consume both of them. Thus I could write two separate neural networks (such as for launch and partials), and then write a third NN that binds the two together. + + +## Notes on functions +ReLU is a linear rectifier, it does not have any training involved. +This makes it good for working as a final cleanup of the launch function. +This also makes it not so good for the partial derivatives. + +Linear is a good but basic network type. + +Upscaling allows you to create more features. +Downscaling reduces the number of features (by throwing data away?). +Instead of downscaling, use a linear function to change the dimensions. + +# Remaining Questions + - How do you set it up to run over a set of variables, i.e. batches? diff --git a/Code/BasicNeuralNet2.ipynb b/Code/BasicNeuralNet2.ipynb index 11756a7..f4b37ea 100644 --- a/Code/BasicNeuralNet2.ipynb +++ b/Code/BasicNeuralNet2.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "comprehensive-toyota", + "id": "electric-scratch", "metadata": {}, "source": [ "Note on pytorch. NN optimization acts imperitively/by side effect as follows.\n", @@ -23,17 +23,18 @@ { "cell_type": "code", "execution_count": 1, - "id": "together-jewelry", + "id": "outdoor-essay", "metadata": {}, "outputs": [], "source": [ - "import torch" + "import torch\n", + "import combined as c" ] }, { "cell_type": "code", "execution_count": 2, - "id": "hispanic-grain", + "id": "played-reward", "metadata": { "tags": [] }, @@ -64,7 +65,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "practical-gilbert", + "id": "tribal-manor", "metadata": {}, "outputs": [ { @@ -72,45 +73,45 @@ "output_type": "stream", "text": [ "\n", - " tensor(10.7553, grad_fn=)\n", + " tensor(4.8121, grad_fn=)\n", "\n", - " tensor(64.3239, grad_fn=)\n", + " tensor(17.3775, grad_fn=)\n", "\n", - " tensor(17.9537, grad_fn=)\n", + " tensor(30.0737, grad_fn=)\n", "\n", - " tensor(60.9679, grad_fn=)\n", + " tensor(0.6026, grad_fn=)\n", "\n", - " tensor(30.1436, grad_fn=)\n", + " tensor(0.3996, grad_fn=)\n", "\n", - " tensor(89.3963, grad_fn=)\n", + " tensor(0.3020, grad_fn=)\n", "\n", - " tensor(70.8575, grad_fn=)\n", + " tensor(0.2092, grad_fn=)\n", "\n", - " tensor(24.7911, grad_fn=)\n", + " tensor(0.1412, grad_fn=)\n", "\n", - " tensor(695.9885, grad_fn=)\n", + " tensor(0.0893, grad_fn=)\n", "\n", - " tensor(339753.2500, grad_fn=)\n", + " tensor(0.0561, grad_fn=)\n", "\n", - " tensor(8.0135e+13, grad_fn=)\n", + " tensor(0.0341, grad_fn=)\n", "\n", - " tensor(inf, grad_fn=)\n", + " tensor(0.0208, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0125, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0075, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0045, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0027, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0016, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0010, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n", + " tensor(0.0006, grad_fn=)\n", "\n", - " tensor(nan, grad_fn=)\n" + " tensor(0.0003, grad_fn=)\n" ] } ], @@ -149,7 +150,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "early-victoria", + "id": "uniform-union", "metadata": {}, "outputs": [], "source": [ @@ -184,83 +185,20 @@ { "cell_type": "code", "execution_count": 5, - "id": "sustained-avatar", - "metadata": {}, - "outputs": [], - "source": [ - "model = SplitNetwork(input_size = 6, output_size_a=5, output_size_b=7, layers_size=15)\n", - "\n", - "data_in = torch.tensor([1.5,2,3,4,5,6])\n", - "\n", - "\n", - "target_a = torch.zeros(5)\n", - "target_b = torch.ones(7)\n", - "\n", - "def loss_fn3(output,target_a, target_b):\n", - " return sum((output[0] - target_a)**2) + sum((output[1] - target_b)**2)\n", - " #could add a simplicity assumption i.e. l1 on parameters." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "inclusive-rouge", - "metadata": {}, + "id": "grand-vietnamese", + "metadata": { + "tags": [] + }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - " tensor(8.4134, grad_fn=)\n", - "\n", - " tensor(5.9490, grad_fn=)\n", - "\n", - " tensor(4.8652, grad_fn=)\n", - "\n", - " tensor(3.7577, grad_fn=)\n", - "\n", - " tensor(2.5462, grad_fn=)\n", - "\n", - " tensor(1.3803, grad_fn=)\n", - "\n", - " tensor(0.5700, grad_fn=)\n", - "\n", - " tensor(0.2055, grad_fn=)\n", - "\n", - " tensor(0.0747, grad_fn=)\n", - "\n", - " tensor(0.0274, grad_fn=)\n", - "\n", - " tensor(0.0101, grad_fn=)\n", - "\n", - " tensor(0.0037, grad_fn=)\n", - "\n", - " tensor(0.0014, grad_fn=)\n", - "\n", - " tensor(0.0005, grad_fn=)\n", - "\n", - " tensor(0.0002, grad_fn=)\n", - "\n", - " tensor(7.1453e-05, grad_fn=)\n", - "\n", - " tensor(2.6635e-05, grad_fn=)\n", - "\n", - " tensor(9.9370e-06, grad_fn=)\n", - "\n", - " tensor(3.7096e-06, grad_fn=)\n", - "\n", - " tensor(1.3858e-06, grad_fn=)\n", - "\n", - " tensor(5.1807e-07, grad_fn=)\n", - "\n", - " tensor(1.9388e-07, grad_fn=)\n", - "\n", - " tensor(7.2581e-08, grad_fn=)\n", - "\n", - " tensor(2.7196e-08, grad_fn=)\n", - "\n", - " tensor(1.0235e-08, grad_fn=)\n" + "ename": "NameError", + "evalue": "name 'loss_fn3' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0ml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss_fn3\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_a\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_b\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'loss_fn3' is not defined" ] } ], @@ -287,94 +225,346 @@ { "cell_type": "code", "execution_count": null, - "id": "sound-insulation", + "id": "settled-maple", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", - "execution_count": 144, - "id": "promotional-accent", + "execution_count": 6, + "id": "appointed-sandwich", "metadata": {}, "outputs": [], "source": [ "#This is a custom upscale module.\n", - "class CustomUpscale(torch.nn.Module):\n", - " def __init__(self, input_size,layers_size,scale_factor):\n", + "class PartialDerivativesEstimand(torch.nn.Module):\n", + " def __init__(self, state_tensor_size,layers_size,number_constellations):\n", + " \"\"\"\n", + " Description\n", + " \"\"\"\n", " super().__init__()\n", - " self.scale_factor = scale_factor\n", + " self.number_constellations = number_constellations\n", " \n", - " #So, this next section constructs different layers within the NN\n", - " #sinlge linear section\n", - " self.linear_step_1a = torch.nn.Linear(in_features=input_size, out_features=layers_size)\n", - " self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.scale_factor).view(x.numel(),self.scale_factor)\n", - " #single linear section\n", + " #Scale up the input from just the tensor of states to the layer_size X number_constellations\n", + " \n", + " #Increase to the layer size\n", + " self.linear_step_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)\n", + " #Upscale the tensor to be able to estimate for each constellation\n", + " #TODO: change to the standard upscaler\n", + " self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.number_constellations).view(x.numel(), self.number_constellations)\n", + " \n", + " #start adding useful layers (start small).\n", + " self.relu_3 = torch.nn.ReLU()\n", + " self.relu_4 = torch.nn.ReLU() #TODO:swap to linear or something like that.\n", + " #TODO:downscale to match the proper output values\n", " \n", " def forward(self, input_values):\n", " \n", - " intermediate_values_a = self.linear_step_1a(input_values)\n", - " intermediate_values_b = self.upscale_step(intermediate_values_a)\n", + " intermediate_values = self.linear_step_1(input_values)\n", + " intermediate_values = self.upscale_step(intermediate_values)\n", + " intermediate_values = self.relu_3(intermediate_values)\n", " \n", - " return intermediate_values_b" + " intermediate_values = self.relu_4(intermediate_values)\n", + " \n", + " return intermediate_values" ] }, { "cell_type": "code", - "execution_count": 145, - "id": "english-basement", + "execution_count": 7, + "id": "complete-gather", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[0.0000, 0.0000, 0.0000],\n", + " [0.4215, 0.4215, 0.4215],\n", + " [0.5668, 0.5668, 0.5668],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0675, 0.0675, 0.0675],\n", + " [1.8888, 1.8888, 1.8888],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000]], grad_fn=)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "nn = MultiDimOut(3,12,3)" + "nn = PartialDerivativesEstimand(3,12,3)\n", + "\n", + "test = torch.tensor([[[1.0,3,4]]])\n", + "\n", + "t = nn.forward(test)\n", + "t" ] }, { "cell_type": "code", - "execution_count": 146, - "id": "passive-chapel", + "execution_count": 8, + "id": "iraqi-italic", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(30.5458, grad_fn=)\n", + "tensor(26.5162, grad_fn=)\n", + "tensor(24.9672, grad_fn=)\n", + "tensor(24.3718, grad_fn=)\n", + "tensor(24.1429, grad_fn=)\n", + "tensor(24.0549, grad_fn=)\n", + "tensor(24.0211, grad_fn=)\n", + "tensor(24.0081, grad_fn=)\n", + "tensor(24.0031, grad_fn=)\n", + "tensor(24.0012, grad_fn=)\n" + ] + }, + { + "data": { + "text/plain": [ + "tensor([[0.0000, 0.0000, 0.0000],\n", + " [0.9951, 0.9951, 0.9951],\n", + " [0.9964, 0.9964, 0.9964],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.9922, 0.9922, 0.9922],\n", + " [1.0075, 1.0075, 1.0075],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000],\n", + " [0.0000, 0.0000, 0.0000]], grad_fn=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Prep Optimizer\n", + "optimizer = torch.optim.SGD(nn.parameters(),lr=0.01)\n", + "\n", + "#get loss function\n", + "def loss_fn4(output):\n", + " return sum(sum((1-output)**2))\n", + "\n", + "for i in range(10):\n", + " #training loop\n", + " optimizer.zero_grad()\n", + "\n", + " output = nn.forward(test)\n", + "\n", + " l = loss_fn4(output)\n", + "\n", + " l.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " print(l)\n", + "\n", + "nn.forward(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "adaptive-period", "metadata": {}, "outputs": [], "source": [ - "test = torch.tensor([[[1.0,3,4]]])" + "#This is a custom upscale module.\n", + "class LaunchFnEstimand(torch.nn.Module):\n", + " def __init__(self, state_tensor_size,layers_size,number_constellations):\n", + " \"\"\"\n", + " Description\n", + " \"\"\"\n", + " super().__init__()\n", + " self.number_constellations = number_constellations\n", + " \n", + " #Scale up the input from just the tensor of states to the layer_size X number_constellations\n", + " \n", + " #Increase to the layer size\n", + " self.linear_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)\n", + " self.relu = torch.nn.ReLU()\n", + " self.linear_3 = torch.nn.Linear(in_features=layers_size, out_features=layers_size)\n", + " self.linear_5 = torch.nn.Linear(in_features=layers_size, out_features=number_constellations)\n", + "\n", + " \n", + " def forward(self, input_values):\n", + " \n", + " intermediate_values = self.linear_1(input_values)\n", + " intermediate_values = self.relu(intermediate_values)\n", + " intermediate_values = self.linear_3(intermediate_values)\n", + " intermediate_values = self.relu(intermediate_values)\n", + " intermediate_values = self.linear_5(intermediate_values)\n", + " intermediate_values = self.relu(intermediate_values) #launches are always positive\n", + " \n", + " return intermediate_values" ] }, { "cell_type": "code", - "execution_count": 147, - "id": "passing-heath", + "execution_count": 10, + "id": "northern-vault", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(0.0315, grad_fn=)\n", + "tensor(0.0241, grad_fn=)\n", + "tensor(0.0184, grad_fn=)\n", + "tensor(0.0141, grad_fn=)\n", + "tensor(0.0107, grad_fn=)\n", + "tensor(0.0082, grad_fn=)\n", + "tensor(0.0062, grad_fn=)\n", + "tensor(0.0048, grad_fn=)\n", + "tensor(0.0036, grad_fn=)\n", + "tensor(0.0028, grad_fn=)\n" + ] + }, { "data": { "text/plain": [ - "tensor([[ 1.9191, 1.9191, 1.9191],\n", - " [-1.4519, -1.4519, -1.4519],\n", - " [ 0.4698, 0.4698, 0.4698],\n", - " [ 0.5203, 0.5203, 0.5203],\n", - " [-2.8474, -2.8474, -2.8474],\n", - " [ 2.1781, 2.1781, 2.1781],\n", - " [ 0.1220, 0.1220, 0.1220],\n", - " [ 3.4155, 3.4155, 3.4155],\n", - " [-0.5984, -0.5984, -0.5984],\n", - " [-0.8493, -0.8493, -0.8493],\n", - " [-0.6150, -0.6150, -0.6150],\n", - " [ 0.6329, 0.6329, 0.6329]], grad_fn=)" + "tensor([[[0.0457, 0.0000]]], grad_fn=)" ] }, - "execution_count": 147, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "nn.forward(test)" + "launch = LaunchFnEstimand(3,12,2)\n", + "\n", + "#Prep Optimizer\n", + "optimizer = torch.optim.SGD(launch.parameters(),lr=0.01)\n", + "\n", + "#get loss function\n", + "def loss_fn5(output):\n", + " return sum(sum(sum((output)**2)))\n", + "\n", + "for i in range(10):\n", + " #training loop\n", + " optimizer.zero_grad()\n", + "\n", + " output = launch.forward(test)\n", + "\n", + " l = loss_fn5(output)\n", + "\n", + " l.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " print(l)\n", + " \n", + "\n", + "launch.forward(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "sensitive-pennsylvania", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[[0.0457, 0.0000]]], grad_fn=)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "launch(test)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "utility-giant", + "metadata": {}, + "outputs": [], + "source": [ + "class EstimandNN(torch.nn.Module):\n", + " def __init__(self, state_tensor_size,layers_size,number_constellations):\n", + " super().__init__()\n", + " \n", + " #So, this next section constructs different layers within the NN\n", + " #sinlge linear section\n", + " \n", + " self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations)\n", + " self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)\n", + " \n", + " def forward(self, input_values):\n", + " partials = self.partials_estimator(input_values)\n", + " launch = self.launch_estimator(input_values)\n", + " return c.EstimandInterface(partials,launch)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "molecular-factory", + "metadata": {}, + "outputs": [], + "source": [ + "enn = EstimandNN(3,12,2)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "artistic-washer", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Launch Decisions and Partial Derivativs of value function with\n", + "\tlaunches\n", + "\t\t tensor([[[0.0000, 0.0020]]], grad_fn=)\n", + "\tPartials\n", + "\t\ttensor([[0.0000, 0.0000],\n", + " [1.7938, 1.7938],\n", + " [0.0000, 0.0000],\n", + " [2.8751, 2.8751],\n", + " [1.4894, 1.4894],\n", + " [1.4614, 1.4614],\n", + " [0.0000, 0.0000],\n", + " [2.9800, 2.9800],\n", + " [0.0000, 0.0000],\n", + " [0.0000, 0.0000],\n", + " [0.0000, 0.0000],\n", + " [0.0000, 0.0000]], grad_fn=)\n" + ] + } + ], + "source": [ + "print(enn(test))" ] }, { "cell_type": "code", "execution_count": null, - "id": "herbal-mission", + "id": "purple-filling", "metadata": {}, "outputs": [], "source": []