From d5635622ccefbfe2ec10bb9c6449f48cabe93405 Mon Sep 17 00:00:00 2001 From: youainti Date: Mon, 18 Oct 2021 17:16:10 -0700 Subject: [PATCH] current work, can't get convergence as I'd like --- Code/NeuralNetworkSpecifications.py | 70 +--- Code/SimplifiedApproach0.ipynb | 48 ++- Code/Untitled.ipynb | 304 +++++++++------- Code/Untitled1.ipynb | 520 ++++++++++++++++------------ Code/combined.py | 27 +- 5 files changed, 520 insertions(+), 449 deletions(-) diff --git a/Code/NeuralNetworkSpecifications.py b/Code/NeuralNetworkSpecifications.py index d90b3f7..0332f49 100644 --- a/Code/NeuralNetworkSpecifications.py +++ b/Code/NeuralNetworkSpecifications.py @@ -4,70 +4,6 @@ import itertools import math import abc -class LinearProfit: - """ - The simplest type of profit function available. - """ - def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0): - #track which constellation this is. - self.constellation_number = constellation_number - - #parameters describing the agent's situation - self.discount_factor = discount_factor - self.benefit_weights = benefit_weight - self.launch_cost = launch_cost - self.deorbit_cost = deorbit_cost - - def __str__(self): - return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost) - - def period_benefit(self,state,estimand_interface): - return self._period_benefit(state.stocks, state.debris, estimand_interface.choices) - - def _period_benefit(self,stocks,debris,choice): - profits = self.benefit_weights @ stocks \ - - self.launch_cost * choice[self.constellation_number] #\ - #- deorbit_cost @ deorbits[self.constellation_number] - return profits - - def period_benefit_jacobian_wrt_states(self, states, estimand_interface): - return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.choices) - - def _period_benefit_jacobian_wrt_states(self, stocks, debris, launches): - jac = jacobian(self._period_benefit, (stocks,debris,launches)) - return torch.cat((jac[0], jac[1])) - - def period_benefit_jacobian_wrt_launches(self, states, estimand_interface): - return self._period_benefit_jacobian_wrt_launches(states.stocks, states.debris, estimand_interface.choices) - - def _period_benefit_jacobian_wrt_launches(self,stocks,debris,launches): - jac = jacobian(self._period_benefit, (stocks,debris,launches)) - return jac[2] - -class States(): - """ - This is supposed to capture the state variables of the model, to create a common interface - when passing between functions. - """ - def __init__(self, stocks,debris): - self.stocks = stocks - self.debris = debris - - - def __str__(self): - return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris) - - @property - def values(self): - - - @property - def number_constellations(self): - return len(self.stocks) - @property - def number_debris_trackers(self): - return len(self.debris) - class EstimandInterface(): """ @@ -194,9 +130,9 @@ class PartialDerivativesOfValueEstimand(torch.nn.Module): """ def __init__(self ,batch_size - , number_constellations - , number_states - , layer_size=12): + ,number_constellations + ,number_states + ,layer_size=12): super().__init__() self.batch_size = batch_size #used for upscaling self.number_constellations = number_constellations diff --git a/Code/SimplifiedApproach0.ipynb b/Code/SimplifiedApproach0.ipynb index f7ad687..8265c50 100644 --- a/Code/SimplifiedApproach0.ipynb +++ b/Code/SimplifiedApproach0.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "ceramic-doctrine", + "id": "french-experiment", "metadata": {}, "outputs": [], "source": [ @@ -275,7 +275,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "executive-royal", + "id": "suited-nothing", "metadata": {}, "outputs": [], "source": [ @@ -307,7 +307,7 @@ }, { "cell_type": "markdown", - "id": "numerical-mexico", + "id": "recognized-story", "metadata": {}, "source": [ "# Testing\n", @@ -318,7 +318,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "packed-economics", + "id": "smart-association", "metadata": {}, "outputs": [], "source": [ @@ -331,37 +331,29 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "compliant-circle", + "execution_count": 84, + "id": "unsigned-hungary", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[[ 1., 30., 11.]],\n", - "\n", - " [[60., 74., 1.]],\n", - "\n", - " [[46., 33., 70.]],\n", - "\n", - " [[42., 29., 32.]],\n", - "\n", - " [[82., 72., 57.]]])" + "torch.Size([5, 1, 3])" ] }, - "execution_count": 4, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "stocks_and_debris" + "stocks_and_debris.size()" ] }, { "cell_type": "code", "execution_count": 6, - "id": "theoretical-spectrum", + "id": "regulated-conversation", "metadata": {}, "outputs": [ { @@ -409,7 +401,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "vulnerable-penalty", + "id": "rental-detection", "metadata": {}, "outputs": [], "source": [ @@ -421,7 +413,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "classified-estimate", + "id": "mechanical-joshua", "metadata": {}, "outputs": [], "source": [ @@ -435,7 +427,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "martial-premium", + "id": "charged-request", "metadata": {}, "outputs": [ { @@ -502,7 +494,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "corrected-jewelry", + "id": "perceived-permit", "metadata": {}, "outputs": [], "source": [ @@ -514,7 +506,7 @@ { "cell_type": "code", "execution_count": 53, - "id": "opened-figure", + "id": "atomic-variance", "metadata": {}, "outputs": [], "source": [ @@ -528,7 +520,7 @@ { "cell_type": "code", "execution_count": 74, - "id": "chicken-inspector", + "id": "biological-badge", "metadata": {}, "outputs": [ { @@ -600,7 +592,7 @@ { "cell_type": "code", "execution_count": 78, - "id": "southwest-diamond", + "id": "compliant-johnson", "metadata": {}, "outputs": [], "source": [ @@ -612,7 +604,7 @@ { "cell_type": "code", "execution_count": 81, - "id": "brave-treat", + "id": "alive-potato", "metadata": {}, "outputs": [], "source": [ @@ -626,7 +618,7 @@ { "cell_type": "code", "execution_count": 83, - "id": "functional-render", + "id": "changed-instruction", "metadata": {}, "outputs": [ { @@ -771,7 +763,7 @@ { "cell_type": "code", "execution_count": null, - "id": "voluntary-postage", + "id": "proved-amsterdam", "metadata": {}, "outputs": [], "source": [] diff --git a/Code/Untitled.ipynb b/Code/Untitled.ipynb index e5f9f0c..3c3b01d 100644 --- a/Code/Untitled.ipynb +++ b/Code/Untitled.ipynb @@ -3,216 +3,262 @@ { "cell_type": "code", "execution_count": 1, - "id": "departmental-hardware", + "id": "royal-trace", "metadata": {}, "outputs": [], "source": [ "import torch\n", - "from torch.autograd.functional import jacobian" + "import combined as c\n", + "import NeuralNetworkSpecifications as nns" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "differential-shock", + "execution_count": 2, + "id": "atlantic-finish", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[8., 5., 3.]],\n", + "\n", + " [[3., 6., 6.]],\n", + "\n", + " [[3., 7., 2.]],\n", + "\n", + " [[4., 8., 2.]],\n", + "\n", + " [[0., 6., 8.]]], grad_fn=) torch.Size([5, 1, 3])\n" + ] + } + ], "source": [ - "a = torch.tensor([1,2,3,4.2],requires_grad=False)\n", - "b = torch.tensor([2,2,2,2.0],requires_grad=True)" + "BATCH_SIZE = 5\n", + "STATES = 3\n", + "CONSTELLATIONS = STATES -1 #determined by debris tracking\n", + "MAX = 10\n", + "FEATURES = 1\n", + "\n", + "stocks = torch.randint(MAX,(BATCH_SIZE,1,CONSTELLATIONS), dtype=torch.float32, requires_grad=True)\n", + "debris = torch.randint(MAX,(BATCH_SIZE,1,1), dtype=torch.float32, requires_grad=True)\n", + "\n", + "s = c.States(stocks, debris)\n", + "\n", + "print(s.values,s.values.shape)" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "separated-pursuit", + "execution_count": null, + "id": "prostate-liverpool", "metadata": {}, "outputs": [], - "source": [ - "def test(x,y):\n", - " return (x@y)**2" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 17, - "id": "french-trunk", + "execution_count": 3, + "id": "simplified-permission", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "tensor(416.1600, grad_fn=)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[[[1.],\n", + " [0.]],\n", + "\n", + " [[0.],\n", + " [1.]]]]) torch.Size([1, 2, 2, 1])\n", + "tensor([[ 1.0000, 0.0000],\n", + " [ 0.0000, 1.0000],\n", + " [-0.2000, -0.2000]]) torch.Size([3, 2])\n" + ] } ], "source": [ - "test(a,b)" + "#launch_costs = torch.randint(3,(1,CONSTELLATIONS,CONSTELLATIONS,FEATURES), dtype=torch.float32)\n", + "launch_costs = torch.tensor([[[[1.0],[0]],[[0.0],[1]]]])\n", + "print(launch_costs, launch_costs.shape)\n", + "#payoff = torch.randint(5,(STATES,CONSTELLATIONS), dtype=torch.float32)\n", + "payoff = torch.tensor([[1.0, 0],[0,1.0],[-0.2,-0.2]])\n", + "print(payoff, payoff.shape)" ] }, { "cell_type": "code", - "execution_count": 57, - "id": "adverse-ceremony", + "execution_count": 4, + "id": "level-angle", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((tensor([81.6000, 81.6000, 81.6000, 81.6000]),\n", - " tensor([ 40.8000, 81.6000, 122.4000, 171.3600])),\n", - " tensor([2., 2., 2., 2.], requires_grad=True),\n", - " tensor(416.1600, grad_fn=))" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "j = jacobian(test,(a,b))\n", - "j,b,test(a,b)" + "def linear_profit(states, choices):\n", + " #Pay particular attention to the dimensions\n", + " #note that there is an extra dimension in there just ot match that of the profit vector we'll be giving out.\n", + " \n", + " #calculate launch expenses\n", + " \n", + " launch_expense = torch.tensordot(choices,launch_costs, [[-2,-1],[-2,-1]])\n", + "\n", + " #calculate revenue\n", + "\n", + " revenue = torch.tensordot(s.values, payoff, [[-1],[0]])\n", + "\n", + "\n", + " profit = revenue - launch_expense\n", + " return profit" ] }, { "cell_type": "code", - "execution_count": 58, - "id": "lovely-apple", + "execution_count": 5, + "id": "copyrighted-acting", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((tensor([-12.8304, -3.9878, 4.8547, 15.4658]),\n", - " tensor([-10.8365, -21.6729, -32.5094, -45.5132])),\n", - " tensor([ 1.1840, 0.3680, -0.4480, -1.4272], grad_fn=),\n", - " tensor(29.3573, grad_fn=))" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "b2 = b - j[1]*b*0.01\n", - "j2 = jacobian(test,(a,b2))\n", - "j2,b2,test(a,b2)" + "policy = nns.ChoiceFunction(BATCH_SIZE\n", + " ,STATES\n", + " ,FEATURES\n", + " ,CONSTELLATIONS\n", + " ,12\n", + " )" ] }, { - "cell_type": "code", - "execution_count": 63, - "id": "stretch-selection", + "cell_type": "markdown", + "id": "casual-career", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((tensor([-13.6581, -4.2906, 5.2787, 17.0284]),\n", - " tensor([-11.4119, -22.8239, -34.2358, -47.9301])),\n", - " tensor([ 1.1968, 0.3760, -0.4626, -1.4922], grad_fn=),\n", - " tensor(32.5580, grad_fn=))" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "b3 = b2 - j2[1]*b2*0.001\n", - "j3 = jacobian(test,(a,b3))\n", - "j3,b3,test(a,b3)" + "example to get profit = 1\n", + "```python\n", + "optimizer = torch.optim.Adam(policy.parameters(),lr=0.001)\n", + "\n", + "for i in range(10000):\n", + " #training loop\n", + " optimizer.zero_grad()\n", + "\n", + " output = policy.forward(s.values)\n", + "\n", + " l = ((1-linear_profit(s.values,output))**2).sum()\n", + "\n", + "\n", + " l.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " if i%200==0:\n", + " print(l)\n", + " \n", + "\n", + "results = policy.forward(s.values)\n", + "print(results.mean(dim=0), \"\\n\",results.std(dim=0))\n", + "```\n" ] }, { "cell_type": "code", - "execution_count": 64, - "id": "colored-visit", + "execution_count": 6, + "id": "straight-negative", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "((tensor([-14.5816, -4.6324, 5.7628, 18.8361]),\n", - " tensor([-12.0461, -24.0921, -36.1382, -50.5935])),\n", - " tensor([ 1.2105, 0.3846, -0.4784, -1.5637], grad_fn=),\n", - " tensor(36.2769, grad_fn=))" + "tensor([[[0.0000],\n", + " [0.0000]],\n", + "\n", + " [[0.0000],\n", + " [0.0000]],\n", + "\n", + " [[0.0000],\n", + " [0.0000]],\n", + "\n", + " [[0.0000],\n", + " [0.0000]],\n", + "\n", + " [[0.3742],\n", + " [0.0000]]], grad_fn=)" ] }, - "execution_count": 64, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "b4 = b3 - j3[1]*b3*0.001\n", - "j4 = jacobian(test,(a,b4))\n", - "j4,b4,test(a,b4)" + "output = policy.forward(s.values)\n", + "output" ] }, { "cell_type": "code", - "execution_count": 65, - "id": "familiar-pizza", + "execution_count": 7, + "id": "independent-deficit", "metadata": {}, + "outputs": [], + "source": [ + "t = torch.ones_like(output, requires_grad=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "romance-force", + "metadata": { + "tags": [] + }, "outputs": [ { - "data": { - "text/plain": [ - "((tensor([-15.6173, -5.0205, 6.3191, 20.9424]),\n", - " tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n", - " tensor([ 1.2251, 0.3938, -0.4957, -1.6428], grad_fn=),\n", - " tensor(40.6286, grad_fn=))" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" + "ename": "RuntimeError", + "evalue": "element 0 of tensors does not require grad and does not have a grad_fn", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n", + "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn" + ] } ], "source": [ - "b5 = b4 - j4[1]*b4*0.001\n", - "j5 = jacobian(test,(a,b5))\n", - "j5,b5,test(a,b5)" + "def test_loss(options):\n", + " return torch.autograd.functional.jacobian(linear_profit, (s.values, options))[0].sum()\n", + " #something is off here ^\n", + " #this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\n", + "\n", + "test_loss(output).backward()" ] }, { "cell_type": "code", - "execution_count": 66, - "id": "brilliant-squad", + "execution_count": 55, + "id": "asian-death", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "((tensor([-15.6173, -5.0205, 6.3191, 20.9424]),\n", - " tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n", - " tensor([ 1.2407, 0.4039, -0.5146, -1.7307], grad_fn=),\n", - " tensor(45.7605, grad_fn=))" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" + "ename": "RuntimeError", + "evalue": "element 0 of tensors does not require grad and does not have a grad_fn", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n", + "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn" + ] } ], - "source": [ - "b6 = b5 - j5[1]*b5*0.001\n", - "j6 = jacobian(test,(a,b5))\n", - "j6,b6,test(a,b6)" - ] + "source": [] }, { "cell_type": "code", "execution_count": null, - "id": "discrete-engineer", + "id": "prospective-nelson", "metadata": {}, "outputs": [], "source": [] diff --git a/Code/Untitled1.ipynb b/Code/Untitled1.ipynb index 94fbc08..fef5510 100644 --- a/Code/Untitled1.ipynb +++ b/Code/Untitled1.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "similar-ebony", + "id": "geographic-wilderness", "metadata": {}, "outputs": [], "source": [ @@ -15,22 +15,22 @@ { "cell_type": "code", "execution_count": 2, - "id": "spread-hygiene", + "id": "major-glucose", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tensor([[[6., 1., 0.]],\n", + "tensor([[[4., 9., 6.]],\n", "\n", - " [[2., 0., 4.]],\n", + " [[0., 5., 4.]],\n", "\n", - " [[7., 6., 9.]],\n", + " [[3., 1., 9.]],\n", "\n", - " [[3., 6., 9.]],\n", + " [[6., 4., 8.]],\n", "\n", - " [[9., 1., 2.]]])\n" + " [[8., 7., 6.]]], grad_fn=)\n" ] } ], @@ -39,8 +39,8 @@ "constellations = states -1 #determined by debris tracking\n", "max_start_state = 10\n", "\n", - "stocks = torch.randint(max_start_state,(batch_size,1,constellations),dtype=torch.float32)\n", - "debris = torch.randint(max_start_state,(batch_size,1,1),dtype=torch.float32)\n", + "stocks = torch.randint(max_start_state,(batch_size,1,constellations), dtype=torch.float32, requires_grad=True)\n", + "debris = torch.randint(max_start_state,(batch_size,1,1), dtype=torch.float32, requires_grad=True)\n", "\n", "s = c.States(stocks, debris)\n", "\n", @@ -50,13 +50,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "attended-making", + "id": "recognized-ability", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([6.3344e-07, 4.6190e-07])" + "tensor([5.6433e-07, 6.7631e-07], grad_fn=)" ] }, "execution_count": 3, @@ -65,14 +65,14 @@ } ], "source": [ - "constellation_collision_risk = 1e-6 * torch.rand(constellations)\n", + "constellation_collision_risk = 1e-6 * torch.rand(constellations, requires_grad=True)\n", "constellation_collision_risk" ] }, { "cell_type": "code", "execution_count": 4, - "id": "strategic-american", + "id": "elect-float", "metadata": {}, "outputs": [], "source": [ @@ -80,14 +80,14 @@ "launch_debris = 0.05\n", "debris_autocatalysis_rate = 1.4\n", "\n", - "benefit_weight0 = torch.tensor([1.0,-0.02])\n", - "benefit_weight1 = torch.tensor([0.0,1.0])" + "benefit_weight0 = torch.tensor([1.0,-0.02], requires_grad=True)\n", + "benefit_weight1 = torch.tensor([0.0,1.0], requires_grad=True)" ] }, { "cell_type": "code", "execution_count": 5, - "id": "hired-consent", + "id": "compressed-individual", "metadata": {}, "outputs": [], "source": [ @@ -102,122 +102,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "copyrighted-tackle", - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "__init__() missing 1 required positional argument: 'launch_cost'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mlaunch_cost\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m ea0 = c.LinearProfit(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;31m#constellation index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0;36m0.95\u001b[0m \u001b[0;31m#discount\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mbenefit_weight0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: __init__() missing 1 required positional argument: 'launch_cost'" - ] - } - ], - "source": [ - "launch_cost = 5\n", - "ea0 = c.LinearProfit(\n", - " 0 #constellation index\n", - " ,0.95 #discount\n", - " ,benefit_weight0\n", - " ,launch_cost #launch_cost\n", - " )\n", - "ea1 = c.LinearProfit(\n", - " 1 #constellation index\n", - " ,0.95 #discount\n", - " ,benefit_weight1\n", - " ,launch_cost #launch_cost\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "accepted-namibia", - "metadata": {}, - "outputs": [], - "source": [ - "enn = nns.EstimandNN(batch_size\n", - " ,states\n", - " ,choices\n", - " ,constellations\n", - " ,12)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "revolutionary-eight", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[[0.0000],\n", - " [0.0000]],\n", - "\n", - " [[0.0000],\n", - " [0.0021]],\n", - "\n", - " [[0.1109],\n", - " [0.0835]],\n", - "\n", - " [[0.0884],\n", - " [0.1051]],\n", - "\n", - " [[0.0000],\n", - " [0.0000]]], grad_fn=)" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "launch_decisions = enn.forward(s.values).choices\n", - "launch_decisions" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "abroad-mobile", - "metadata": {}, - "outputs": [], - "source": [ - "w = torch.tensor([[1.0,0],[0,-0.2]])\n", - "ww = torch.tensor([1.0, -0.2])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "seasonal-companion", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([5, 1, 2])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "stocks.size()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "jewish-zoning", + "id": "tracked-bachelor", "metadata": {}, "outputs": [], "source": [ @@ -240,10 +125,6 @@ " self.benefit_weights = benefit_weights\n", " self.launch_cost = launch_cost\n", " self.deorbit_cost = deorbit_cost\n", - "\n", - " def __str__(self):\n", - " return \"LinearProfit\\n Benefit weights:\\t{}\\n launch cost:\\t{}\\n Deorbit cost:\\t{}\".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)\n", - "\n", " \n", " def _period_benefit(self,stocks,debris,launches):\n", " # multiply benefits times stocks\n", @@ -260,177 +141,380 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "surgical-diversity", + "execution_count": 7, + "id": "confidential-philippines", "metadata": {}, "outputs": [], "source": [ - "def test(stocks,launches):\n", - " # multiply benefits times stocks\n", - " # sum across constellations\n", - " # reshape to standard dimensions\n", - " # subtract launch costs. \n", - " profit = torch.tensordot(ww,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n", - " return profit.view(batch_size,1)" + "launch_cost = 5\n", + "ea0 = LinearProfit(\n", + " batch_size\n", + " ,0 #constellation index\n", + " ,0.95 #discount\n", + " ,benefit_weight0\n", + " ,launch_cost #launch_cost\n", + " )" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "western-sixth", + "execution_count": 8, + "id": "religious-georgia", "metadata": {}, "outputs": [], "source": [ - "t = LinearProfit(batch_size #batch_size\n", - " ,0 #constellation index\n", - " ,0.95 #discount\n", - " ,benefit_weight0\n", - " ,launch_cost #launch_cost\n", - " )" + "enn = nns.EstimandNN(batch_size\n", + " ,states\n", + " ,choices\n", + " ,constellations\n", + " ,12)" ] }, { "cell_type": "code", - "execution_count": 19, - "id": "conscious-debut", + "execution_count": 9, + "id": "painful-republican", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<__main__.LinearProfit at 0x7f0664fad4c0>" + "tensor([[[0.],\n", + " [0.]],\n", + "\n", + " [[0.],\n", + " [0.]],\n", + "\n", + " [[0.],\n", + " [0.]],\n", + "\n", + " [[0.],\n", + " [0.]],\n", + "\n", + " [[0.],\n", + " [0.]]], grad_fn=)" ] }, - "execution_count": 19, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "t" + "launch_decisions = enn.forward(s.values).choices\n", + "launch_decisions" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "eight-cheat", + "execution_count": 10, + "id": "equal-raising", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor([[5.8800],\n", - " [1.9600],\n", - " [6.3054],\n", - " [2.4978],\n", - " [8.8200]], grad_fn=)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "t._period_benefit(s.stocks,s.debris,launch_decisions)" + "def test(stocks,launches):\n", + " # multiply benefits times stocks\n", + " # sum across constellations\n", + " # reshape to standard dimensions\n", + " # subtract launch costs. \n", + " profit = torch.tensordot(benefit_weight0,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n", + " return profit.view(batch_size,1)" ] }, { "cell_type": "code", - "execution_count": 25, - "id": "juvenile-barcelona", - "metadata": { - "tags": [] - }, + "execution_count": 11, + "id": "divine-editor", + "metadata": {}, "outputs": [], "source": [ - "def f(stocks, debris, launches):\n", - " return torch.autograd.functional.jacobian(t._period_benefit\n", - " ,(stocks,debris,launches)\n", - " ,create_graph=True\n", - " )\n", - "def ff(stocks, debris, launches):\n", - " return torch.autograd.functional.jacobian(f\n", - " ,(stocks,debris,launches)\n", - " ,create_graph=True\n", - " )" + "t = LinearProfit(batch_size #batch_size\n", + " ,0 #constellation index\n", + " ,0.95 #discount\n", + " ,benefit_weight0\n", + " ,launch_cost #launch_cost\n", + " )" ] }, { "cell_type": "code", - "execution_count": 30, - "id": "freelance-publicity", + "execution_count": 12, + "id": "japanese-captain", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "torch.Size([5, 1, 5, 1, 2, 5, 1, 2])" + "tensor([[3.9200],\n", + " [0.0000],\n", + " [2.9400],\n", + " [5.8800],\n", + " [7.8400]], grad_fn=)" ] }, - "execution_count": 30, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], - "source": [] + "source": [ + "test(stocks,launch_decisions)" + ] }, { "cell_type": "code", - "execution_count": 55, - "id": "vocational-operator", + "execution_count": 13, + "id": "determined-difference", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "tensor([[[6.0000, 1.0000],\n", - " [2.0000, 0.0000],\n", - " [7.0000, 6.0000],\n", - " [3.0000, 6.0000],\n", - " [9.0000, 1.0000]],\n", - "\n", - " [[4.8000, 0.8000],\n", - " [1.6000, 0.0000],\n", - " [5.6000, 4.8000],\n", - " [2.4000, 4.8000],\n", - " [7.2000, 0.8000]]])" + "tensor([[3.9200],\n", + " [0.0000],\n", + " [2.9400],\n", + " [5.8800],\n", + " [7.8400]], grad_fn=)" ] }, - "execution_count": 55, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "torch.tensordot(torch.tensor([[1.0,-0.2],[0,1]]),stocks, [[0],[1]])" + "t._period_benefit(s.stocks,s.debris,launch_decisions)" ] }, { "cell_type": "code", - "execution_count": 48, - "id": "nuclear-alberta", + "execution_count": 14, + "id": "tribal-least", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "torch.Size([5, 1, 2])" + "(tensor([[[[[0.9800, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.0000, 0.0000]],\n", + " \n", + " [[0.9800, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.9800, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.9800, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.0000, 0.0000]],\n", + " \n", + " [[0.9800, 0.0000]]]]], grad_fn=),\n", + " tensor([[[[[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]],\n", + " \n", + " [[0.]]]]]),\n", + " tensor([[[[[-5.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-5.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-5.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-5.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]]]],\n", + " \n", + " \n", + " \n", + " [[[[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-0.],\n", + " [ 0.]],\n", + " \n", + " [[-5.],\n", + " [ 0.]]]]]))" ] }, - "execution_count": 48, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "stocks.shape" + "#in this case, the debris isn't tracked because it isn't included, and launch_decisions has a similar issue.\n", + "torch.autograd.functional.jacobian(t._period_benefit, (s.stocks,s.debris,launch_decisions), create_graph=True)" ] }, { "cell_type": "code", "execution_count": null, - "id": "immune-machinery", + "id": "statutory-lyric", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "naked-health", "metadata": {}, "outputs": [], "source": [] diff --git a/Code/combined.py b/Code/combined.py index dc2316d..824d459 100644 --- a/Code/combined.py +++ b/Code/combined.py @@ -172,10 +172,16 @@ class States(): def __str__(self): return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris) + + @property + def values(self): + #return these as a single tensor. + return torch.cat((self.stocks,self.debris), dim=-1) @property def number_constellations(self): return len(self.stocks) + @property def number_debris_trackers(self): return len(self.debris) @@ -277,9 +283,15 @@ class LinearProfit(EconomicAgent): """ The simplest type of profit function available. """ - def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0): + def __init__(self, batch_size, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0, ): + self.batch_size = batch_size + + #track which constellation this is. self.constellation_number = constellation_number + + #get the number of constellations (pull from the benefit weight, in the dimension that counts across constellations) + self.number_of_constellations = benefit_weight.size()[1] #parameters describing the agent's situation self.discount_factor = discount_factor @@ -291,14 +303,15 @@ class LinearProfit(EconomicAgent): return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost) def period_benefit(self,state,estimand_interface): - return self._period_benefit(state.stocks, state.debris, estimand_interface.launches) + return self._period_benefit(state.stocks, state.debris, estimand_interface.choices) def _period_benefit(self,stocks,debris,launches): - profits = self.benefit_weights @ stocks \ - - self.launch_cost * launches[self.constellation_number] #\ - #- deorbit_cost @ deorbits[self.constellation_number] - return profits - + # multiply benefits times stocks + # sum across constellations + # reshape to standard dimensions + # subtract launch costs. + pass + def period_benefit_jacobian_wrt_states(self, states, estimand_interface): return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.launches)