You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
405 lines
12 KiB
Plaintext
405 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "comprehensive-toyota",
|
|
"metadata": {},
|
|
"source": [
|
|
"Note on pytorch. NN optimization acts imperitively/by side effect as follows.\n",
|
|
" - Define model\n",
|
|
" - loop\n",
|
|
" - Calculate loss\n",
|
|
" - Zero gradients\n",
|
|
" - backprop to model\n",
|
|
" - check conditions for exit\n",
|
|
" - report diagnostics\n",
|
|
" - disect results\n",
|
|
" \n",
|
|
" \n",
|
|
"## Split result from NN\n",
|
|
"Goal is to train the NN and then get a couple of outputs at the end that can be used to split between value function partials and launch functions."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "together-jewelry",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import torch"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "hispanic-grain",
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"class DoubleNetwork(torch.nn.Module):\n",
|
|
" def __init__(self, input_size,output_size,layers_size):\n",
|
|
" super().__init__()\n",
|
|
" \n",
|
|
" #So, this next section constructs different layers within the NN\n",
|
|
" #sinlge linear section\n",
|
|
" self.linear_step_1a = torch.nn.Linear(input_size,layers_size)\n",
|
|
" \n",
|
|
" #single linear section\n",
|
|
" self.linear_step_2a = torch.nn.Linear(layers_size,output_size)\n",
|
|
" self.linear_step_2b = torch.nn.Linear(layers_size,output_size)\n",
|
|
" \n",
|
|
" def forward(self, input_values):\n",
|
|
" \n",
|
|
" intermediate_values_a = self.linear_step_1a(input_values)\n",
|
|
" \n",
|
|
" out_values_a = self.linear_step_2a(intermediate_values_a)\n",
|
|
" out_values_b = self.linear_step_2b(intermediate_values_a)\n",
|
|
" \n",
|
|
" return out_values_a,out_values_b"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "practical-gilbert",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
" tensor(10.7553, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(64.3239, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(17.9537, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(60.9679, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(30.1436, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(89.3963, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(70.8575, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(24.7911, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(695.9885, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(339753.2500, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(8.0135e+13, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(inf, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(nan, grad_fn=<AddBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model = DoubleNetwork(input_size = 5, output_size=5, layers_size=15)\n",
|
|
"\n",
|
|
"data_in = torch.tensor([1.5,2,3,4,5])\n",
|
|
"\n",
|
|
"data_in\n",
|
|
"\n",
|
|
"target = torch.zeros(5)\n",
|
|
"\n",
|
|
"def loss_fn2(output,target):\n",
|
|
" return sum((output[1] +output[0] - target)**2)\n",
|
|
" #could add a simplicity assumption i.e. l1 on parameters.\n",
|
|
"\n",
|
|
"#Prep Optimizer\n",
|
|
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
|
|
"\n",
|
|
"for i in range(20):\n",
|
|
" #training loop\n",
|
|
" optimizer.zero_grad()\n",
|
|
"\n",
|
|
" output = model.forward(data_in)\n",
|
|
" output\n",
|
|
"\n",
|
|
" l = loss_fn2(output, target)\n",
|
|
"\n",
|
|
" l.backward()\n",
|
|
"\n",
|
|
" optimizer.step()\n",
|
|
"\n",
|
|
" print(\"\\n\",l)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "early-victoria",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class SplitNetwork(torch.nn.Module):\n",
|
|
" def __init__(self, input_size,output_size_a,output_size_b,layers_size):\n",
|
|
" super().__init__()\n",
|
|
" \n",
|
|
" #So, this next section constructs different layers within the NN\n",
|
|
" #sinlge linear section\n",
|
|
" self.linear_step_1 = torch.nn.Linear(input_size,layers_size)\n",
|
|
" self.linear_step_2 = torch.nn.Linear(layers_size,layers_size)\n",
|
|
" self.linear_step_3 = torch.nn.Linear(layers_size,layers_size)\n",
|
|
" self.linear_step_4 = torch.nn.Linear(layers_size,layers_size)\n",
|
|
" \n",
|
|
" #single linear section\n",
|
|
" self.linear_step_split_a = torch.nn.Linear(layers_size,output_size_a)\n",
|
|
" self.linear_step_split_b = torch.nn.Linear(layers_size,output_size_b)\n",
|
|
" \n",
|
|
" def forward(self, input_values):\n",
|
|
" \n",
|
|
" intermediate_values = self.linear_step_1(input_values)\n",
|
|
" intermediate_values = self.linear_step_2(intermediate_values)\n",
|
|
" intermediate_values = self.linear_step_3(intermediate_values)\n",
|
|
" intermediate_values = self.linear_step_4(intermediate_values)\n",
|
|
" \n",
|
|
" out_values_a = self.linear_step_split_a(intermediate_values)\n",
|
|
" out_values_b = self.linear_step_split_b(intermediate_values)\n",
|
|
" \n",
|
|
" return out_values_a,out_values_b"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "sustained-avatar",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"model = SplitNetwork(input_size = 6, output_size_a=5, output_size_b=7, layers_size=15)\n",
|
|
"\n",
|
|
"data_in = torch.tensor([1.5,2,3,4,5,6])\n",
|
|
"\n",
|
|
"\n",
|
|
"target_a = torch.zeros(5)\n",
|
|
"target_b = torch.ones(7)\n",
|
|
"\n",
|
|
"def loss_fn3(output,target_a, target_b):\n",
|
|
" return sum((output[0] - target_a)**2) + sum((output[1] - target_b)**2)\n",
|
|
" #could add a simplicity assumption i.e. l1 on parameters."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "inclusive-rouge",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
" tensor(8.4134, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(5.9490, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(4.8652, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(3.7577, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(2.5462, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(1.3803, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.5700, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.2055, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0747, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0274, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0101, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0037, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0014, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0005, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(0.0002, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(7.1453e-05, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(2.6635e-05, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(9.9370e-06, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(3.7096e-06, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(1.3858e-06, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(5.1807e-07, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(1.9388e-07, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(7.2581e-08, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(2.7196e-08, grad_fn=<AddBackward0>)\n",
|
|
"\n",
|
|
" tensor(1.0235e-08, grad_fn=<AddBackward0>)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"#Prep Optimizer\n",
|
|
"optimizer = torch.optim.SGD(model.parameters(),lr=0.01)\n",
|
|
"\n",
|
|
"for i in range(25):\n",
|
|
" #training loop\n",
|
|
" optimizer.zero_grad()\n",
|
|
"\n",
|
|
" output = model.forward(data_in)\n",
|
|
" output\n",
|
|
"\n",
|
|
" l = loss_fn3(output, target_a, target_b)\n",
|
|
"\n",
|
|
" l.backward()\n",
|
|
"\n",
|
|
" optimizer.step()\n",
|
|
"\n",
|
|
" print(\"\\n\",l)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "sound-insulation",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 144,
|
|
"id": "promotional-accent",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#This is a custom upscale module.\n",
|
|
"class CustomUpscale(torch.nn.Module):\n",
|
|
" def __init__(self, input_size,layers_size,scale_factor):\n",
|
|
" super().__init__()\n",
|
|
" self.scale_factor = scale_factor\n",
|
|
" \n",
|
|
" #So, this next section constructs different layers within the NN\n",
|
|
" #sinlge linear section\n",
|
|
" self.linear_step_1a = torch.nn.Linear(in_features=input_size, out_features=layers_size)\n",
|
|
" self.upscale_step = lambda x: torch.nn.functional.interpolate(x, scale_factor=self.scale_factor).view(x.numel(),self.scale_factor)\n",
|
|
" #single linear section\n",
|
|
" \n",
|
|
" def forward(self, input_values):\n",
|
|
" \n",
|
|
" intermediate_values_a = self.linear_step_1a(input_values)\n",
|
|
" intermediate_values_b = self.upscale_step(intermediate_values_a)\n",
|
|
" \n",
|
|
" return intermediate_values_b"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 145,
|
|
"id": "english-basement",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"nn = MultiDimOut(3,12,3)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 146,
|
|
"id": "passive-chapel",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"test = torch.tensor([[[1.0,3,4]]])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 147,
|
|
"id": "passing-heath",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"tensor([[ 1.9191, 1.9191, 1.9191],\n",
|
|
" [-1.4519, -1.4519, -1.4519],\n",
|
|
" [ 0.4698, 0.4698, 0.4698],\n",
|
|
" [ 0.5203, 0.5203, 0.5203],\n",
|
|
" [-2.8474, -2.8474, -2.8474],\n",
|
|
" [ 2.1781, 2.1781, 2.1781],\n",
|
|
" [ 0.1220, 0.1220, 0.1220],\n",
|
|
" [ 3.4155, 3.4155, 3.4155],\n",
|
|
" [-0.5984, -0.5984, -0.5984],\n",
|
|
" [-0.8493, -0.8493, -0.8493],\n",
|
|
" [-0.6150, -0.6150, -0.6150],\n",
|
|
" [ 0.6329, 0.6329, 0.6329]], grad_fn=<ViewBackward>)"
|
|
]
|
|
},
|
|
"execution_count": 147,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"nn.forward(test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "herbal-mission",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.8"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|