Got most of the neural network stuff working. Some parameter updates to manage in NeuralNetworkSpecifications

temporaryWork^2
youainti 5 years ago
parent 7b51044057
commit d8fff40288

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "electric-scratch", "id": "chief-solomon",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Note on pytorch. NN optimization acts imperitively/by side effect as follows.\n", "Note on pytorch. NN optimization acts imperitively/by side effect as follows.\n",
@ -23,7 +23,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "outdoor-essay", "id": "accepting-telephone",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -33,357 +33,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 56,
"id": "played-reward", "id": "major-transformation",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class DoubleNetwork(torch.nn.Module):\n",
" def __init__(self, input_size,output_size,layers_size):\n",
" super().__init__()\n",
" \n",
" #So, this next section constructs different layers within the NN\n",
" #sinlge linear section\n",
" self.linear_step_1a = torch.nn.Linear(input_size,layers_size)\n",
" \n",
" #single linear section\n",
" self.linear_step_2a = torch.nn.Linear(layers_size,output_size)\n",
" self.linear_step_2b = torch.nn.Linear(layers_size,output_size)\n",
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values_a = self.linear_step_1a(input_values)\n",
" \n",
" out_values_a = self.linear_step_2a(intermediate_values_a)\n",
" out_values_b = self.linear_step_2b(intermediate_values_a)\n",
" \n",
" return out_values_a,out_values_b"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "tribal-manor",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
" tensor(4.8121, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(17.3775, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(30.0737, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.6026, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.3996, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.3020, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.2092, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.1412, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0893, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0561, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0341, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0208, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0125, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0075, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0045, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0027, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0016, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0010, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0006, grad_fn=<AddBackward0>)\n",
"\n",
" tensor(0.0003, grad_fn=<AddBackward0>)\n"
]
}
],
"source": [
"model = DoubleNetwork(input_size = 5, output_size=5, layers_size=15)\n",
"\n",
"data_in = torch.tensor([1.5,2,3,4,5])\n",
"\n",
"data_in\n",
"\n",
"target = torch.zeros(5)\n",
"\n",
"def loss_fn2(output,target):\n",
" return sum((output[1] +output[0] - target)**2)\n",
" #could add a simplicity assumption i.e. l1 on parameters.\n",
"\n",
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
"\n",
"for i in range(20):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = model.forward(data_in)\n",
" output\n",
"\n",
" l = loss_fn2(output, target)\n",
"\n",
" l.backward()\n",
"\n",
" optimizer.step()\n",
"\n",
" print(\"\\n\",l)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "uniform-union",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"class SplitNetwork(torch.nn.Module):\n",
" def __init__(self, input_size,output_size_a,output_size_b,layers_size):\n",
" super().__init__()\n",
" \n",
" #So, this next section constructs different layers within the NN\n",
" #sinlge linear section\n",
" self.linear_step_1 = torch.nn.Linear(input_size,layers_size)\n",
" self.linear_step_2 = torch.nn.Linear(layers_size,layers_size)\n",
" self.linear_step_3 = torch.nn.Linear(layers_size,layers_size)\n",
" self.linear_step_4 = torch.nn.Linear(layers_size,layers_size)\n",
" \n",
" #single linear section\n",
" self.linear_step_split_a = torch.nn.Linear(layers_size,output_size_a)\n",
" self.linear_step_split_b = torch.nn.Linear(layers_size,output_size_b)\n",
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values = self.linear_step_1(input_values)\n",
" intermediate_values = self.linear_step_2(intermediate_values)\n",
" intermediate_values = self.linear_step_3(intermediate_values)\n",
" intermediate_values = self.linear_step_4(intermediate_values)\n",
" \n",
" out_values_a = self.linear_step_split_a(intermediate_values)\n",
" out_values_b = self.linear_step_split_b(intermediate_values)\n",
" \n",
" return out_values_a,out_values_b"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "grand-vietnamese",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'loss_fn3' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)",
"\u001b[0;32m<ipython-input-5-4ecf63ceaaa2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0ml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss_fn3\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_a\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_b\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 12\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0ml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'loss_fn3' is not defined"
]
}
],
"source": [
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
"\n",
"for i in range(25):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = model.forward(data_in)\n",
" output\n",
"\n",
" l = loss_fn3(output, target_a, target_b)\n",
"\n",
" l.backward()\n",
"\n", "\n",
" optimizer.step()\n",
"\n",
" print(\"\\n\",l)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "settled-maple",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"id": "appointed-sandwich",
"metadata": {},
"outputs": [],
"source": [
"#This is a custom upscale module.\n",
"class PartialDerivativesEstimand(torch.nn.Module):\n",
" def __init__(self, state_tensor_size,layers_size,number_constellations):\n",
" \"\"\"\n",
" Description\n",
" \"\"\"\n",
" super().__init__()\n",
" self.number_constellations = number_constellations\n",
" \n",
" #Scale up the input from just the tensor of states to the layer_size X number_constellations\n",
" \n",
" #Increase to the layer size\n",
" self.linear_step_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)\n",
" #Upscale the tensor to be able to estimate for each constellation\n",
" #TODO: change to the standard upscaler\n",
" self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.number_constellations).view(x.numel(), self.number_constellations)\n",
" \n",
" #start adding useful layers (start small).\n",
" self.relu_3 = torch.nn.ReLU()\n",
" self.relu_4 = torch.nn.ReLU() #TODO:swap to linear or something like that.\n",
" #TODO:downscale to match the proper output values\n",
" \n",
" def forward(self, input_values):\n",
" \n",
" intermediate_values = self.linear_step_1(input_values)\n",
" intermediate_values = self.upscale_step(intermediate_values)\n",
" intermediate_values = self.relu_3(intermediate_values)\n",
" \n",
" intermediate_values = self.relu_4(intermediate_values)\n",
" \n",
" return intermediate_values"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "complete-gather",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0.0000, 0.0000, 0.0000],\n",
" [0.4215, 0.4215, 0.4215],\n",
" [0.5668, 0.5668, 0.5668],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0675, 0.0675, 0.0675],\n",
" [1.8888, 1.8888, 1.8888],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nn = PartialDerivativesEstimand(3,12,3)\n",
"\n",
"test = torch.tensor([[[1.0,3,4]]])\n",
"\n",
"t = nn.forward(test)\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "iraqi-italic",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor(30.5458, grad_fn=<AddBackward0>)\n",
"tensor(26.5162, grad_fn=<AddBackward0>)\n",
"tensor(24.9672, grad_fn=<AddBackward0>)\n",
"tensor(24.3718, grad_fn=<AddBackward0>)\n",
"tensor(24.1429, grad_fn=<AddBackward0>)\n",
"tensor(24.0549, grad_fn=<AddBackward0>)\n",
"tensor(24.0211, grad_fn=<AddBackward0>)\n",
"tensor(24.0081, grad_fn=<AddBackward0>)\n",
"tensor(24.0031, grad_fn=<AddBackward0>)\n",
"tensor(24.0012, grad_fn=<AddBackward0>)\n"
]
},
{
"data": {
"text/plain": [
"tensor([[0.0000, 0.0000, 0.0000],\n",
" [0.9951, 0.9951, 0.9951],\n",
" [0.9964, 0.9964, 0.9964],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.9922, 0.9922, 0.9922],\n",
" [1.0075, 1.0075, 1.0075],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000],\n",
" [0.0000, 0.0000, 0.0000]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Prep Optimizer\n",
"optimizer = torch.optim.SGD(nn.parameters(),lr=0.01)\n",
"\n",
"#get loss function\n",
"def loss_fn4(output):\n",
" return sum(sum((1-output)**2))\n",
"\n",
"for i in range(10):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = nn.forward(test)\n",
"\n",
" l = loss_fn4(output)\n",
"\n",
" l.backward()\n",
"\n",
" optimizer.step()\n",
"\n",
" print(l)\n",
"\n",
"nn.forward(test)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "adaptive-period",
"metadata": {},
"outputs": [],
"source": [
"#This is a custom upscale module.\n",
"class LaunchFnEstimand(torch.nn.Module):\n", "class LaunchFnEstimand(torch.nn.Module):\n",
" def __init__(self, state_tensor_size,layers_size,number_constellations):\n", " def __init__(self, state_tensor_size,layers_size,number_constellations):\n",
" \"\"\"\n", " \"\"\"\n",
@ -391,6 +46,8 @@
" \"\"\"\n", " \"\"\"\n",
" super().__init__()\n", " super().__init__()\n",
" self.number_constellations = number_constellations\n", " self.number_constellations = number_constellations\n",
" self.layers_size = layers_size\n",
" self.state_tensor_size = state_tensor_size\n",
" \n", " \n",
" #Scale up the input from just the tensor of states to the layer_size X number_constellations\n", " #Scale up the input from just the tensor of states to the layer_size X number_constellations\n",
" \n", " \n",
@ -403,10 +60,9 @@
" \n", " \n",
" def forward(self, input_values):\n", " def forward(self, input_values):\n",
" \n", " \n",
" intermediate_values = self.linear_1(input_values)\n", " intermediate_values = self.relu(input_values) #states should be positive anyway.\n",
" intermediate_values = self.relu(intermediate_values)\n", " intermediate_values = self.linear_1(intermediate_values)\n",
" intermediate_values = self.linear_3(intermediate_values)\n", " intermediate_values = self.linear_3(intermediate_values)\n",
" intermediate_values = self.relu(intermediate_values)\n",
" intermediate_values = self.linear_5(intermediate_values)\n", " intermediate_values = self.linear_5(intermediate_values)\n",
" intermediate_values = self.relu(intermediate_values) #launches are always positive\n", " intermediate_values = self.relu(intermediate_values) #launches are always positive\n",
" \n", " \n",
@ -415,33 +71,33 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 61,
"id": "northern-vault", "id": "further-advice",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"tensor(0.0315, grad_fn=<AddBackward0>)\n", "tensor(0.7175, grad_fn=<AddBackward0>)\n",
"tensor(0.0241, grad_fn=<AddBackward0>)\n", "tensor(0.2107, grad_fn=<AddBackward0>)\n",
"tensor(0.0184, grad_fn=<AddBackward0>)\n", "tensor(0.0724, grad_fn=<AddBackward0>)\n",
"tensor(0.0141, grad_fn=<AddBackward0>)\n", "tensor(0.0259, grad_fn=<AddBackward0>)\n",
"tensor(0.0107, grad_fn=<AddBackward0>)\n", "tensor(0.0094, grad_fn=<AddBackward0>)\n",
"tensor(0.0082, grad_fn=<AddBackward0>)\n", "tensor(0.0034, grad_fn=<AddBackward0>)\n",
"tensor(0.0062, grad_fn=<AddBackward0>)\n", "tensor(0.0012, grad_fn=<AddBackward0>)\n",
"tensor(0.0048, grad_fn=<AddBackward0>)\n", "tensor(0.0004, grad_fn=<AddBackward0>)\n",
"tensor(0.0036, grad_fn=<AddBackward0>)\n", "tensor(0.0002, grad_fn=<AddBackward0>)\n",
"tensor(0.0028, grad_fn=<AddBackward0>)\n" "tensor(5.8468e-05, grad_fn=<AddBackward0>)\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"tensor([[[0.0457, 0.0000]]], grad_fn=<ReluBackward0>)" "tensor([[[0.0046, 0.0000]]], grad_fn=<ReluBackward0>)"
] ]
}, },
"execution_count": 10, "execution_count": 61,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -476,17 +132,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 60,
"id": "sensitive-pennsylvania", "id": "convinced-candidate",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"tensor([[[0.0457, 0.0000]]], grad_fn=<ReluBackward0>)" "tensor([[[0.0000, 0.9998]]], grad_fn=<ReluBackward0>)"
] ]
}, },
"execution_count": 11, "execution_count": 60,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -498,7 +154,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 12,
"id": "utility-giant", "id": "prompt-order",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -509,19 +165,20 @@
" #So, this next section constructs different layers within the NN\n", " #So, this next section constructs different layers within the NN\n",
" #sinlge linear section\n", " #sinlge linear section\n",
" \n", " \n",
" self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations)\n", " self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations) #TODO\n",
" self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)\n", " self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)\n",
" \n", " \n",
" def forward(self, input_values):\n", " def forward(self, input_values):\n",
" partials = self.partials_estimator(input_values)\n", " partials = self.partials_estimator(input_values)\n",
" launch = self.launch_estimator(input_values)\n", " launch = self.launch_estimator(input_values)\n",
" \n",
" return c.EstimandInterface(partials,launch)" " return c.EstimandInterface(partials,launch)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 13,
"id": "molecular-factory", "id": "three-hobby",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -531,7 +188,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 15,
"id": "artistic-washer", "id": "overall-league",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -564,7 +221,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "purple-filling", "id": "weird-municipality",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []

@ -0,0 +1,109 @@
import torch
import combined as c
"""
This module holds the neural networks I am going to use to estimate
the functions of interest.
"""
class LaunchFnEstimand(torch.nn.Module):
"""
This is used to estimate the launch function
"""
def __init__(self, state_tensor_size,layers_size,number_constellations):
super().__init__()
self.number_constellations = number_constellations
self.layers_size = layers_size
self.state_tensor_size = state_tensor_size
#Layers
self.linear_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)
self.relu = torch.nn.ReLU()
self.linear_3 = torch.nn.Linear(in_features=layers_size, out_features=layers_size)
self.linear_5 = torch.nn.Linear(in_features=layers_size, out_features=number_constellations)
def forward(self, input_values):
intermediate_values = self.relu(input_values) #states should be positive anyway.
intermediate_values = self.linear_1(intermediate_values)
intermediate_values = self.linear_3(intermediate_values)
intermediate_values = self.linear_5(intermediate_values)
intermediate_values = self.relu(intermediate_values) #launches are always positive
return intermediate_values
class PartialDerivativesEstimand(torch.nn.Module):
"""
This is used to estimate the partial derivatives of the value functions
"""
def __init__(self,batch_size, number_constellations, number_states, layer_size=12):
super().__init__()
self.batch_size = batch_size
self.number_constellations = number_constellations
self.number_states = number_states
self.layer_size = layer_size
#preprocess (single linear layer in case there is anything that needs to happen to all states)
self.preprocess = torch.nn.Sequential(
torch.nn.ReLU() #cleanup as states must be positive
,torch.nn.Linear(in_features = self.number_states, out_features=self.number_states)
)
#upscale to get the basic dimensionality correct. From (batch,State) to (batch, constellation, state). Includes a reshape
self.upsample = lambda x: torch.nn.Upsample(scale_factor=self.number_constellations)(x).view(self.batch_size
,self.number_constellations
,self.number_states)
#sequential steps
self.sequential = torch.nn.Sequential(
torch.nn.Linear(in_features=number_states, out_features=layer_size)
#who knows if a convolution might help here.
,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
)
#reduce the feature axis to match expected results
self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_states)
def forward(self, input_values):
#Note that the input values are just going to be the state variables
#TODO:check that input values match the prepared dimension?
#preprocess
intermediate = self.preprocess(input_values)
#upscale the input values
intermediate = self.upsample(intermediate)
#intermediate processing
intermediate = self.sequential(intermediate)
#reduce feature axis to match the expected number of partials
intermediate = self.feature_reduction(intermediate)
return intermediate
class EstimandNN(torch.nn.Module):
"""
This neural network takes the current states as input values and returns both
the partial derivatives of the value function and the launch function.
"""
def __init__(self, state_tensor_size,layers_size,number_constellations):
super().__init__()
#So, this next section constructs different layers within the NN
#sinlge linear section
pass
#TODO:verify these are correct
self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations) #TODO
self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)
def forward(self, input_values):
pass
partials = self.partials_estimator(input_values)
launch = self.launch_estimator(input_values)
return c.EstimandInterface(partials,launch)

@ -0,0 +1,203 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "least-cooling",
"metadata": {},
"outputs": [],
"source": [
"import torch"
]
},
{
"cell_type": "markdown",
"id": "statistical-temperature",
"metadata": {},
"source": [
"The purpose of this notebook is to allow me to investigate proper shaping of inputs.\n",
"\n",
"Typically pytorch chooses a tensor specification\n",
"$$\n",
"(N, .*)\n",
"$$\n",
"where $N$ is the batch size.\n",
"For example a Convolutional NN layer expects\n",
"$$\n",
" NCHW\n",
"$$\n",
"for BatchSize,ChannelSize,Height,Width.\n",
"On the other hand, Linear expects\n",
"$$\n",
" N.*H\n",
"$$\n",
"for BatchSize,any number of other dimensions, in_features\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "japanese-poultry",
"metadata": {},
"outputs": [],
"source": [
"class PartialDerivativesEstimand(torch.nn.Module):\n",
" def __init__(self,batch_size, number_constellations, number_states,scale_factor=4, layer_size=12):\n",
" \"\"\"\n",
" \n",
" \"\"\"\n",
" super().__init__()\n",
" self.batch_size = batch_size\n",
" self.number_constellations = number_constellations\n",
" self.number_states = number_states\n",
" self.scale_factor = scale_factor\n",
" self.layer_size = layer_size\n",
" \n",
" \n",
" #preprocess (single linear layer in case there is anything that needs to happen to all states)\n",
" self.preprocess = torch.nn.Sequential(\n",
" torch.nn.ReLU() #cleanup as states must be positive\n",
" ,torch.nn.Linear(in_features = self.number_states, out_features=self.number_states)\n",
" )\n",
" #upscale to get the basic dimensionality correct. From (batch,State) to (batch, constellation, state). Includes a reshape\n",
" self.upsample = lambda x: torch.nn.Upsample(scale_factor=self.number_constellations)(x).view(self.batch_size\n",
" ,self.number_constellations\n",
" ,self.number_states)\n",
" \n",
" #sequential steps\n",
" self.sequential = torch.nn.Sequential(\n",
" torch.nn.Linear(in_features=number_states, out_features=layer_size)\n",
" #who knows if a convolution might help here.\n",
" ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
" ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
" )\n",
"\n",
" #reduce axis to match expectation\n",
" self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_states)\n",
" \n",
" def forward(self, input_values):\n",
" #Note that the input values are just going to be the state variables\n",
" #TODO:check that input values match the prepared dimension?\n",
" \n",
" #preprocess\n",
" intermediate = self.preprocess(input_values)\n",
" \n",
" #upscale the input values\n",
" intermediate = self.upsample(intermediate)\n",
" \n",
" #intermediate processing\n",
" intermediate = self.sequential(intermediate)\n",
" \n",
" #reduce feature axis to match the expected number of partials\n",
" intermediate = self.feature_reduction(intermediate)\n",
" \n",
" \n",
" return intermediate"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "communist-teach",
"metadata": {},
"outputs": [],
"source": [
"batch_size = 2\n",
"constellations = 2\n",
"number_states = constellations+1\n",
"\n",
"#initialize the NN\n",
"a = PartialDerivativesEstimand(batch_size,constellations,number_states,scale_factor=2)\n",
"\n",
"#example state\n",
"s = torch.rand(size=(batch_size,1,number_states))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "chemical-revolution",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[0.9283, 0.9414, 0.3426]],\n",
"\n",
" [[0.1902, 0.0369, 0.4699]]])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "directed-lobby",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[-0.1991, 0.1335, 0.2821],\n",
" [-0.3549, 0.0213, 0.2322]],\n",
"\n",
" [[-0.1701, 0.1557, 0.2954],\n",
" [-0.3017, 0.0690, 0.2419]]], grad_fn=<AddBackward0>)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a(s)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "placed-coating",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "advised-execution",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save