current work, can't get convergence as I'd like

5 years ago · d5635622cc
parent 90a4a56baf
commit d5635622cc
5 changed files with 520 additions and 449 deletions
--- a/Code/NeuralNetworkSpecifications.py
+++ b/Code/NeuralNetworkSpecifications.py
@ -4,70 +4,6 @@ import itertools
 import math
 import abc

-class LinearProfit:
-    """
-    The simplest type of profit function available.
-    """
-    def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):
-        #track which constellation this is.
-        self.constellation_number = constellation_number
-
-        #parameters describing the agent's situation
-        self.discount_factor = discount_factor
-        self.benefit_weights = benefit_weight
-        self.launch_cost = launch_cost
-        self.deorbit_cost = deorbit_cost
-
-    def __str__(self):
-        return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)
-
-    def period_benefit(self,state,estimand_interface):
-        return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)
-    
-    def _period_benefit(self,stocks,debris,choice):
-        profits =  self.benefit_weights @ stocks \
-                    - self.launch_cost * choice[self.constellation_number] #\ 
-                    #- deorbit_cost @ deorbits[self.constellation_number]
-        return profits
-
-    def period_benefit_jacobian_wrt_states(self, states, estimand_interface):
-        return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.choices)
-
-    def _period_benefit_jacobian_wrt_states(self, stocks, debris, launches):
-        jac = jacobian(self._period_benefit, (stocks,debris,launches))
-        return torch.cat((jac[0], jac[1]))
-    
-    def period_benefit_jacobian_wrt_launches(self, states, estimand_interface):
-        return self._period_benefit_jacobian_wrt_launches(states.stocks, states.debris, estimand_interface.choices)
-
-    def _period_benefit_jacobian_wrt_launches(self,stocks,debris,launches):
-        jac = jacobian(self._period_benefit, (stocks,debris,launches))
-        return jac[2]
-
-class States():
-    """
-    This is supposed to capture the state variables of the model, to create a common interface 
-    when passing between functions.
-    """
-    def __init__(self, stocks,debris):
-        self.stocks = stocks
-        self.debris = debris
-        
-
-    def __str__(self):
-        return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris)
-
-    @property
-    def values(self):
-        
-        
-    @property
-    def number_constellations(self):
-        return len(self.stocks)
-    @property
-    def number_debris_trackers(self):
-        return len(self.debris)
-
    
 class EstimandInterface():
    """
@ -194,9 +130,9 @@ class PartialDerivativesOfValueEstimand(torch.nn.Module):
    """
    def __init__(self
                 ,batch_size
-                 , number_constellations
-                 , number_states
-                 , layer_size=12):
+                 ,number_constellations
+                 ,number_states
+                 ,layer_size=12):
        super().__init__()
        self.batch_size = batch_size #used for upscaling
        self.number_constellations = number_constellations
--- a/Code/SimplifiedApproach0.ipynb
+++ b/Code/SimplifiedApproach0.ipynb
@ -3,7 +3,7 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "ceramic-doctrine",
+   "id": "french-experiment",
   "metadata": {},
   "outputs": [],
   "source": [
@ -275,7 +275,7 @@
  {
   "cell_type": "code",
   "execution_count": 2,
-   "id": "executive-royal",
+   "id": "suited-nothing",
   "metadata": {},
   "outputs": [],
   "source": [
@ -307,7 +307,7 @@
  },
  {
   "cell_type": "markdown",
-   "id": "numerical-mexico",
+   "id": "recognized-story",
   "metadata": {},
   "source": [
    "# Testing\n",
@ -318,7 +318,7 @@
  {
   "cell_type": "code",
   "execution_count": 3,
-   "id": "packed-economics",
+   "id": "smart-association",
   "metadata": {},
   "outputs": [],
   "source": [
@ -331,37 +331,29 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
-   "id": "compliant-circle",
+   "execution_count": 84,
+   "id": "unsigned-hungary",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "tensor([[[ 1., 30., 11.]],\n",
-       "\n",
-       "        [[60., 74.,  1.]],\n",
-       "\n",
-       "        [[46., 33., 70.]],\n",
-       "\n",
-       "        [[42., 29., 32.]],\n",
-       "\n",
-       "        [[82., 72., 57.]]])"
+       "torch.Size([5, 1, 3])"
      ]
     },
-     "execution_count": 4,
+     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "stocks_and_debris"
+    "stocks_and_debris.size()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
-   "id": "theoretical-spectrum",
+   "id": "regulated-conversation",
   "metadata": {},
   "outputs": [
    {
@ -409,7 +401,7 @@
  {
   "cell_type": "code",
   "execution_count": 7,
-   "id": "vulnerable-penalty",
+   "id": "rental-detection",
   "metadata": {},
   "outputs": [],
   "source": [
@ -421,7 +413,7 @@
  {
   "cell_type": "code",
   "execution_count": 30,
-   "id": "classified-estimate",
+   "id": "mechanical-joshua",
   "metadata": {},
   "outputs": [],
   "source": [
@ -435,7 +427,7 @@
  {
   "cell_type": "code",
   "execution_count": 31,
-   "id": "martial-premium",
+   "id": "charged-request",
   "metadata": {},
   "outputs": [
    {
@ -502,7 +494,7 @@
  {
   "cell_type": "code",
   "execution_count": 45,
-   "id": "corrected-jewelry",
+   "id": "perceived-permit",
   "metadata": {},
   "outputs": [],
   "source": [
@ -514,7 +506,7 @@
  {
   "cell_type": "code",
   "execution_count": 53,
-   "id": "opened-figure",
+   "id": "atomic-variance",
   "metadata": {},
   "outputs": [],
   "source": [
@ -528,7 +520,7 @@
  {
   "cell_type": "code",
   "execution_count": 74,
-   "id": "chicken-inspector",
+   "id": "biological-badge",
   "metadata": {},
   "outputs": [
    {
@ -600,7 +592,7 @@
  {
   "cell_type": "code",
   "execution_count": 78,
-   "id": "southwest-diamond",
+   "id": "compliant-johnson",
   "metadata": {},
   "outputs": [],
   "source": [
@ -612,7 +604,7 @@
  {
   "cell_type": "code",
   "execution_count": 81,
-   "id": "brave-treat",
+   "id": "alive-potato",
   "metadata": {},
   "outputs": [],
   "source": [
@ -626,7 +618,7 @@
  {
   "cell_type": "code",
   "execution_count": 83,
-   "id": "functional-render",
+   "id": "changed-instruction",
   "metadata": {},
   "outputs": [
    {
@ -771,7 +763,7 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "voluntary-postage",
+   "id": "proved-amsterdam",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/Code/Untitled.ipynb
+++ b/Code/Untitled.ipynb
@ -3,216 +3,262 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "departmental-hardware",
+   "id": "royal-trace",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
-    "from torch.autograd.functional import jacobian"
+    "import combined as c\n",
+    "import NeuralNetworkSpecifications as nns"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
-   "id": "differential-shock",
+   "execution_count": 2,
+   "id": "atlantic-finish",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[8., 5., 3.]],\n",
+      "\n",
+      "        [[3., 6., 6.]],\n",
+      "\n",
+      "        [[3., 7., 2.]],\n",
+      "\n",
+      "        [[4., 8., 2.]],\n",
+      "\n",
+      "        [[0., 6., 8.]]], grad_fn=<CatBackward>) torch.Size([5, 1, 3])\n"
+     ]
+    }
+   ],
   "source": [
-    "a = torch.tensor([1,2,3,4.2],requires_grad=False)\n",
-    "b = torch.tensor([2,2,2,2.0],requires_grad=True)"
+    "BATCH_SIZE = 5\n",
+    "STATES = 3\n",
+    "CONSTELLATIONS = STATES -1 #determined by debris tracking\n",
+    "MAX = 10\n",
+    "FEATURES = 1\n",
+    "\n",
+    "stocks = torch.randint(MAX,(BATCH_SIZE,1,CONSTELLATIONS), dtype=torch.float32, requires_grad=True)\n",
+    "debris = torch.randint(MAX,(BATCH_SIZE,1,1), dtype=torch.float32, requires_grad=True)\n",
+    "\n",
+    "s = c.States(stocks, debris)\n",
+    "\n",
+    "print(s.values,s.values.shape)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
-   "id": "separated-pursuit",
+   "execution_count": null,
+   "id": "prostate-liverpool",
   "metadata": {},
   "outputs": [],
-   "source": [
-    "def test(x,y):\n",
-    "    return (x@y)**2"
-   ]
+   "source": []
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
-   "id": "french-trunk",
+   "execution_count": 3,
+   "id": "simplified-permission",
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "tensor(416.1600, grad_fn=<PowBackward0>)"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[[1.],\n",
+      "          [0.]],\n",
+      "\n",
+      "         [[0.],\n",
+      "          [1.]]]]) torch.Size([1, 2, 2, 1])\n",
+      "tensor([[ 1.0000,  0.0000],\n",
+      "        [ 0.0000,  1.0000],\n",
+      "        [-0.2000, -0.2000]]) torch.Size([3, 2])\n"
     ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
    }
   ],
   "source": [
-    "test(a,b)"
+    "#launch_costs = torch.randint(3,(1,CONSTELLATIONS,CONSTELLATIONS,FEATURES), dtype=torch.float32)\n",
+    "launch_costs = torch.tensor([[[[1.0],[0]],[[0.0],[1]]]])\n",
+    "print(launch_costs, launch_costs.shape)\n",
+    "#payoff = torch.randint(5,(STATES,CONSTELLATIONS), dtype=torch.float32)\n",
+    "payoff = torch.tensor([[1.0, 0],[0,1.0],[-0.2,-0.2]])\n",
+    "print(payoff, payoff.shape)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
-   "id": "adverse-ceremony",
+   "execution_count": 4,
+   "id": "level-angle",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "((tensor([81.6000, 81.6000, 81.6000, 81.6000]),\n",
-       "  tensor([ 40.8000,  81.6000, 122.4000, 171.3600])),\n",
-       " tensor([2., 2., 2., 2.], requires_grad=True),\n",
-       " tensor(416.1600, grad_fn=<PowBackward0>))"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
-    "j = jacobian(test,(a,b))\n",
-    "j,b,test(a,b)"
+    "def linear_profit(states, choices):\n",
+    "    #Pay particular attention to the dimensions\n",
+    "    #note that there is an extra dimension in there just ot match that of the profit vector we'll be giving out.\n",
+    "    \n",
+    "    #calculate launch expenses\n",
+    "    \n",
+    "    launch_expense = torch.tensordot(choices,launch_costs, [[-2,-1],[-2,-1]])\n",
+    "\n",
+    "    #calculate revenue\n",
+    "\n",
+    "    revenue = torch.tensordot(s.values, payoff, [[-1],[0]])\n",
+    "\n",
+    "\n",
+    "    profit = revenue - launch_expense\n",
+    "    return profit"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
-   "id": "lovely-apple",
+   "execution_count": 5,
+   "id": "copyrighted-acting",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "((tensor([-12.8304,  -3.9878,   4.8547,  15.4658]),\n",
-       "  tensor([-10.8365, -21.6729, -32.5094, -45.5132])),\n",
-       " tensor([ 1.1840,  0.3680, -0.4480, -1.4272], grad_fn=<SubBackward0>),\n",
-       " tensor(29.3573, grad_fn=<PowBackward0>))"
+   "outputs": [],
+   "source": [
+    "policy = nns.ChoiceFunction(BATCH_SIZE\n",
+    "                             ,STATES\n",
+    "                             ,FEATURES\n",
+    "                             ,CONSTELLATIONS\n",
+    "                             ,12\n",
+    "                            )"
   ]
  },
-     "execution_count": 58,
+  {
+   "cell_type": "markdown",
+   "id": "casual-career",
   "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
-    "b2 = b - j[1]*b*0.01\n",
-    "j2 = jacobian(test,(a,b2))\n",
-    "j2,b2,test(a,b2)"
+    "example to get profit = 1\n",
+    "```python\n",
+    "optimizer = torch.optim.Adam(policy.parameters(),lr=0.001)\n",
+    "\n",
+    "for i in range(10000):\n",
+    "    #training loop\n",
+    "    optimizer.zero_grad()\n",
+    "\n",
+    "    output = policy.forward(s.values)\n",
+    "\n",
+    "    l = ((1-linear_profit(s.values,output))**2).sum()\n",
+    "\n",
+    "\n",
+    "    l.backward()\n",
+    "\n",
+    "    optimizer.step()\n",
+    "\n",
+    "    if i%200==0:\n",
+    "        print(l)\n",
+    "  \n",
+    "\n",
+    "results = policy.forward(s.values)\n",
+    "print(results.mean(dim=0), \"\\n\",results.std(dim=0))\n",
+    "```\n"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 63,
-   "id": "stretch-selection",
+   "execution_count": 6,
+   "id": "straight-negative",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "((tensor([-13.6581,  -4.2906,   5.2787,  17.0284]),\n",
-       "  tensor([-11.4119, -22.8239, -34.2358, -47.9301])),\n",
-       " tensor([ 1.1968,  0.3760, -0.4626, -1.4922], grad_fn=<SubBackward0>),\n",
-       " tensor(32.5580, grad_fn=<PowBackward0>))"
+       "tensor([[[0.0000],\n",
+       "         [0.0000]],\n",
+       "\n",
+       "        [[0.0000],\n",
+       "         [0.0000]],\n",
+       "\n",
+       "        [[0.0000],\n",
+       "         [0.0000]],\n",
+       "\n",
+       "        [[0.0000],\n",
+       "         [0.0000]],\n",
+       "\n",
+       "        [[0.3742],\n",
+       "         [0.0000]]], grad_fn=<ReluBackward0>)"
      ]
     },
-     "execution_count": 63,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "b3 = b2 - j2[1]*b2*0.001\n",
-    "j3 = jacobian(test,(a,b3))\n",
-    "j3,b3,test(a,b3)"
+    "output = policy.forward(s.values)\n",
+    "output"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 64,
-   "id": "colored-visit",
+   "execution_count": 7,
+   "id": "independent-deficit",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "((tensor([-14.5816,  -4.6324,   5.7628,  18.8361]),\n",
-       "  tensor([-12.0461, -24.0921, -36.1382, -50.5935])),\n",
-       " tensor([ 1.2105,  0.3846, -0.4784, -1.5637], grad_fn=<SubBackward0>),\n",
-       " tensor(36.2769, grad_fn=<PowBackward0>))"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
-    "b4 = b3 - j3[1]*b3*0.001\n",
-    "j4 = jacobian(test,(a,b4))\n",
-    "j4,b4,test(a,b4)"
+    "t = torch.ones_like(output, requires_grad=True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 65,
-   "id": "familiar-pizza",
-   "metadata": {},
+   "execution_count": 57,
+   "id": "romance-force",
+   "metadata": {
+    "tags": []
+   },
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "((tensor([-15.6173,  -5.0205,   6.3191,  20.9424]),\n",
-       "  tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n",
-       " tensor([ 1.2251,  0.3938, -0.4957, -1.6428], grad_fn=<SubBackward0>),\n",
-       " tensor(40.6286, grad_fn=<PowBackward0>))"
+     "ename": "RuntimeError",
+     "evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-57-efee93d7c257>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;31m#this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    243\u001b[0m                 \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    244\u001b[0m                 inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    143\u001b[0m         \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m     Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m    146\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    147\u001b[0m         allow_unreachable=True, accumulate_grad=True)  # allow_unreachable flag\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
     ]
-     },
-     "execution_count": 65,
-     "metadata": {},
-     "output_type": "execute_result"
    }
   ],
   "source": [
-    "b5 = b4 - j4[1]*b4*0.001\n",
-    "j5 = jacobian(test,(a,b5))\n",
-    "j5,b5,test(a,b5)"
+    "def test_loss(options):\n",
+    "    return torch.autograd.functional.jacobian(linear_profit, (s.values, options))[0].sum()\n",
+    "    #something is off here ^\n",
+    "    #this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\n",
+    "\n",
+    "test_loss(output).backward()"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 66,
-   "id": "brilliant-squad",
+   "execution_count": 55,
+   "id": "asian-death",
   "metadata": {},
   "outputs": [
    {
-     "data": {
-      "text/plain": [
-       "((tensor([-15.6173,  -5.0205,   6.3191,  20.9424]),\n",
-       "  tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n",
-       " tensor([ 1.2407,  0.4039, -0.5146, -1.7307], grad_fn=<SubBackward0>),\n",
-       " tensor(45.7605, grad_fn=<PowBackward0>))"
+     "ename": "RuntimeError",
+     "evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-55-ac1f78ecd780>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m    243\u001b[0m                 \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    244\u001b[0m                 inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    247\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m    143\u001b[0m         \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m     Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m    146\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    147\u001b[0m         allow_unreachable=True, accumulate_grad=True)  # allow_unreachable flag\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
     ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
    }
   ],
-   "source": [
-    "b6 = b5 - j5[1]*b5*0.001\n",
-    "j6 = jacobian(test,(a,b5))\n",
-    "j6,b6,test(a,b6)"
-   ]
+   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "discrete-engineer",
+   "id": "prospective-nelson",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/Code/Untitled1.ipynb
+++ b/Code/Untitled1.ipynb
@ -3,7 +3,7 @@
  {
   "cell_type": "code",
   "execution_count": 1,
-   "id": "similar-ebony",
+   "id": "geographic-wilderness",
   "metadata": {},
   "outputs": [],
   "source": [
@ -15,22 +15,22 @@
  {
   "cell_type": "code",
   "execution_count": 2,
-   "id": "spread-hygiene",
+   "id": "major-glucose",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "tensor([[[6., 1., 0.]],\n",
+      "tensor([[[4., 9., 6.]],\n",
      "\n",
-      "        [[2., 0., 4.]],\n",
+      "        [[0., 5., 4.]],\n",
      "\n",
-      "        [[7., 6., 9.]],\n",
+      "        [[3., 1., 9.]],\n",
      "\n",
-      "        [[3., 6., 9.]],\n",
+      "        [[6., 4., 8.]],\n",
      "\n",
-      "        [[9., 1., 2.]]])\n"
+      "        [[8., 7., 6.]]], grad_fn=<CatBackward>)\n"
     ]
    }
   ],
@ -39,8 +39,8 @@
    "constellations = states -1 #determined by debris tracking\n",
    "max_start_state = 10\n",
    "\n",
-    "stocks = torch.randint(max_start_state,(batch_size,1,constellations),dtype=torch.float32)\n",
-    "debris = torch.randint(max_start_state,(batch_size,1,1),dtype=torch.float32)\n",
+    "stocks = torch.randint(max_start_state,(batch_size,1,constellations), dtype=torch.float32, requires_grad=True)\n",
+    "debris = torch.randint(max_start_state,(batch_size,1,1), dtype=torch.float32, requires_grad=True)\n",
    "\n",
    "s = c.States(stocks, debris)\n",
    "\n",
@ -50,13 +50,13 @@
  {
   "cell_type": "code",
   "execution_count": 3,
-   "id": "attended-making",
+   "id": "recognized-ability",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "tensor([6.3344e-07, 4.6190e-07])"
+       "tensor([5.6433e-07, 6.7631e-07], grad_fn=<MulBackward0>)"
      ]
     },
     "execution_count": 3,
@ -65,14 +65,14 @@
    }
   ],
   "source": [
-    "constellation_collision_risk = 1e-6 * torch.rand(constellations)\n",
+    "constellation_collision_risk = 1e-6 * torch.rand(constellations, requires_grad=True)\n",
    "constellation_collision_risk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
-   "id": "strategic-american",
+   "id": "elect-float",
   "metadata": {},
   "outputs": [],
   "source": [
@ -80,14 +80,14 @@
    "launch_debris = 0.05\n",
    "debris_autocatalysis_rate = 1.4\n",
    "\n",
-    "benefit_weight0 = torch.tensor([1.0,-0.02])\n",
-    "benefit_weight1 = torch.tensor([0.0,1.0])"
+    "benefit_weight0 = torch.tensor([1.0,-0.02], requires_grad=True)\n",
+    "benefit_weight1 = torch.tensor([0.0,1.0], requires_grad=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
-   "id": "hired-consent",
+   "id": "compressed-individual",
   "metadata": {},
   "outputs": [],
   "source": [
@ -102,122 +102,7 @@
  {
   "cell_type": "code",
   "execution_count": 6,
-   "id": "copyrighted-tackle",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "__init__() missing 1 required positional argument: 'launch_cost'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-6-54bb8ddad0e2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mlaunch_cost\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m ea0 = c.LinearProfit(\n\u001b[0m\u001b[1;32m      3\u001b[0m                     \u001b[0;36m0\u001b[0m \u001b[0;31m#constellation index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m                     \u001b[0;34m,\u001b[0m\u001b[0;36m0.95\u001b[0m \u001b[0;31m#discount\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m                     \u001b[0;34m,\u001b[0m\u001b[0mbenefit_weight0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mTypeError\u001b[0m: __init__() missing 1 required positional argument: 'launch_cost'"
-     ]
-    }
-   ],
-   "source": [
-    "launch_cost = 5\n",
-    "ea0 = c.LinearProfit(\n",
-    "                    0 #constellation index\n",
-    "                    ,0.95 #discount\n",
-    "                    ,benefit_weight0\n",
-    "                    ,launch_cost #launch_cost\n",
-    "                    )\n",
-    "ea1 = c.LinearProfit(\n",
-    "                    1 #constellation index\n",
-    "                    ,0.95 #discount\n",
-    "                    ,benefit_weight1\n",
-    "                    ,launch_cost #launch_cost\n",
-    "                    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "accepted-namibia",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "enn = nns.EstimandNN(batch_size\n",
-    "                 ,states\n",
-    "                 ,choices\n",
-    "                 ,constellations\n",
-    "                 ,12)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "revolutionary-eight",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[[0.0000],\n",
-       "         [0.0000]],\n",
-       "\n",
-       "        [[0.0000],\n",
-       "         [0.0021]],\n",
-       "\n",
-       "        [[0.1109],\n",
-       "         [0.0835]],\n",
-       "\n",
-       "        [[0.0884],\n",
-       "         [0.1051]],\n",
-       "\n",
-       "        [[0.0000],\n",
-       "         [0.0000]]], grad_fn=<ReluBackward0>)"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "launch_decisions = enn.forward(s.values).choices\n",
-    "launch_decisions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "abroad-mobile",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "w = torch.tensor([[1.0,0],[0,-0.2]])\n",
-    "ww = torch.tensor([1.0, -0.2])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "seasonal-companion",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "torch.Size([5, 1, 2])"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "stocks.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "jewish-zoning",
+   "id": "tracked-bachelor",
   "metadata": {},
   "outputs": [],
   "source": [
@ -240,10 +125,6 @@
    "        self.benefit_weights = benefit_weights\n",
    "        self.launch_cost = launch_cost\n",
    "        self.deorbit_cost = deorbit_cost\n",
-    "\n",
-    "    def __str__(self):\n",
-    "        return \"LinearProfit\\n Benefit weights:\\t{}\\n launch cost:\\t{}\\n Deorbit cost:\\t{}\".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)\n",
-    "\n",
    "    \n",
    "    def _period_benefit(self,stocks,debris,launches):\n",
    "        # multiply benefits times stocks\n",
@ -260,177 +141,380 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
-   "id": "surgical-diversity",
+   "execution_count": 7,
+   "id": "confidential-philippines",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def test(stocks,launches):\n",
-    "    # multiply benefits times stocks\n",
-    "    # sum across constellations\n",
-    "    # reshape to standard dimensions\n",
-    "    # subtract launch costs. \n",
-    "    profit = torch.tensordot(ww,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n",
-    "    return profit.view(batch_size,1)"
+    "launch_cost = 5\n",
+    "ea0 = LinearProfit(\n",
+    "                    batch_size\n",
+    "                    ,0 #constellation index\n",
+    "                    ,0.95 #discount\n",
+    "                    ,benefit_weight0\n",
+    "                    ,launch_cost #launch_cost\n",
+    "                    )"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
-   "id": "western-sixth",
+   "execution_count": 8,
+   "id": "religious-georgia",
   "metadata": {},
   "outputs": [],
   "source": [
-    "t = LinearProfit(batch_size #batch_size\n",
-    "                    ,0 #constellation index\n",
-    "                    ,0.95 #discount\n",
-    "                    ,benefit_weight0\n",
-    "                    ,launch_cost #launch_cost\n",
-    "                    )"
+    "enn = nns.EstimandNN(batch_size\n",
+    "                 ,states\n",
+    "                 ,choices\n",
+    "                 ,constellations\n",
+    "                 ,12)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
-   "id": "conscious-debut",
+   "execution_count": 9,
+   "id": "painful-republican",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "<__main__.LinearProfit at 0x7f0664fad4c0>"
+       "tensor([[[0.],\n",
+       "         [0.]],\n",
+       "\n",
+       "        [[0.],\n",
+       "         [0.]],\n",
+       "\n",
+       "        [[0.],\n",
+       "         [0.]],\n",
+       "\n",
+       "        [[0.],\n",
+       "         [0.]],\n",
+       "\n",
+       "        [[0.],\n",
+       "         [0.]]], grad_fn=<ReluBackward0>)"
      ]
     },
-     "execution_count": 19,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "t"
+    "launch_decisions = enn.forward(s.values).choices\n",
+    "launch_decisions"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
-   "id": "eight-cheat",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[5.8800],\n",
-       "        [1.9600],\n",
-       "        [6.3054],\n",
-       "        [2.4978],\n",
-       "        [8.8200]], grad_fn=<ViewBackward>)"
-      ]
-     },
-     "execution_count": 20,
+   "execution_count": 10,
+   "id": "equal-raising",
   "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
   "source": [
-    "t._period_benefit(s.stocks,s.debris,launch_decisions)"
+    "def test(stocks,launches):\n",
+    "    # multiply benefits times stocks\n",
+    "    # sum across constellations\n",
+    "    # reshape to standard dimensions\n",
+    "    # subtract launch costs. \n",
+    "    profit = torch.tensordot(benefit_weight0,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n",
+    "    return profit.view(batch_size,1)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
-   "id": "juvenile-barcelona",
-   "metadata": {
-    "tags": []
-   },
+   "execution_count": 11,
+   "id": "divine-editor",
+   "metadata": {},
   "outputs": [],
   "source": [
-    "def f(stocks, debris, launches):\n",
-    "    return torch.autograd.functional.jacobian(t._period_benefit\n",
-    "                                   ,(stocks,debris,launches)\n",
-    "                                   ,create_graph=True\n",
-    "                                  )\n",
-    "def ff(stocks, debris, launches):\n",
-    "        return torch.autograd.functional.jacobian(f\n",
-    "                                   ,(stocks,debris,launches)\n",
-    "                                   ,create_graph=True\n",
+    "t = LinearProfit(batch_size #batch_size\n",
+    "                    ,0 #constellation index\n",
+    "                    ,0.95 #discount\n",
+    "                    ,benefit_weight0\n",
+    "                    ,launch_cost #launch_cost\n",
    "                    )"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
-   "id": "freelance-publicity",
+   "execution_count": 12,
+   "id": "japanese-captain",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "torch.Size([5, 1, 5, 1, 2, 5, 1, 2])"
+       "tensor([[3.9200],\n",
+       "        [0.0000],\n",
+       "        [2.9400],\n",
+       "        [5.8800],\n",
+       "        [7.8400]], grad_fn=<ViewBackward>)"
      ]
     },
-     "execution_count": 30,
+     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
-   "source": []
+   "source": [
+    "test(stocks,launch_decisions)"
+   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
-   "id": "vocational-operator",
+   "execution_count": 13,
+   "id": "determined-difference",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "tensor([[[6.0000, 1.0000],\n",
-       "         [2.0000, 0.0000],\n",
-       "         [7.0000, 6.0000],\n",
-       "         [3.0000, 6.0000],\n",
-       "         [9.0000, 1.0000]],\n",
-       "\n",
-       "        [[4.8000, 0.8000],\n",
-       "         [1.6000, 0.0000],\n",
-       "         [5.6000, 4.8000],\n",
-       "         [2.4000, 4.8000],\n",
-       "         [7.2000, 0.8000]]])"
+       "tensor([[3.9200],\n",
+       "        [0.0000],\n",
+       "        [2.9400],\n",
+       "        [5.8800],\n",
+       "        [7.8400]], grad_fn=<ViewBackward>)"
      ]
     },
-     "execution_count": 55,
+     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "torch.tensordot(torch.tensor([[1.0,-0.2],[0,1]]),stocks, [[0],[1]])"
+    "t._period_benefit(s.stocks,s.debris,launch_decisions)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
-   "id": "nuclear-alberta",
+   "execution_count": 14,
+   "id": "tribal-least",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "torch.Size([5, 1, 2])"
+       "(tensor([[[[[0.9800, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.9800, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.9800, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.9800, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.0000, 0.0000]],\n",
+       " \n",
+       "           [[0.9800, 0.0000]]]]], grad_fn=<ViewBackward>),\n",
+       " tensor([[[[[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]],\n",
+       " \n",
+       "           [[0.]]]]]),\n",
+       " tensor([[[[[-5.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-5.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-5.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-5.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]]]],\n",
+       " \n",
+       " \n",
+       " \n",
+       "         [[[[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-0.],\n",
+       "            [ 0.]],\n",
+       " \n",
+       "           [[-5.],\n",
+       "            [ 0.]]]]]))"
      ]
     },
-     "execution_count": 48,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "stocks.shape"
+    "#in this case, the debris isn't tracked because it isn't included, and launch_decisions has a similar issue.\n",
+    "torch.autograd.functional.jacobian(t._period_benefit, (s.stocks,s.debris,launch_decisions), create_graph=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "immune-machinery",
+   "id": "statutory-lyric",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "naked-health",
   "metadata": {},
   "outputs": [],
   "source": []
--- a/Code/combined.py
+++ b/Code/combined.py
@ -173,9 +173,15 @@ class States():
    def __str__(self):
        return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris)
    
+    @property
+    def values(self):
+        #return these as a single tensor.
+        return torch.cat((self.stocks,self.debris), dim=-1)
+
    @property
    def number_constellations(self):
        return len(self.stocks)
+    
    @property
    def number_debris_trackers(self):
        return len(self.debris)
@ -277,10 +283,16 @@ class LinearProfit(EconomicAgent):
    """
    The simplest type of profit function available.
    """
-    def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):
+    def __init__(self, batch_size, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0, ):
+        self.batch_size = batch_size
+        
+        
        #track which constellation this is.
        self.constellation_number = constellation_number
        
+        #get the number of constellations (pull from the benefit weight, in the dimension that counts across constellations)
+        self.number_of_constellations = benefit_weight.size()[1] 
+
        #parameters describing the agent's situation
        self.discount_factor = discount_factor
        self.benefit_weights = benefit_weight
@ -291,13 +303,14 @@ class LinearProfit(EconomicAgent):
        return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)

    def period_benefit(self,state,estimand_interface):
-        return self._period_benefit(state.stocks, state.debris, estimand_interface.launches)
+        return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)
    
    def _period_benefit(self,stocks,debris,launches):
-        profits =  self.benefit_weights @ stocks \
-                    - self.launch_cost * launches[self.constellation_number] #\ 
-                    #- deorbit_cost @ deorbits[self.constellation_number]
-        return profits
+        # multiply benefits times stocks
+        # sum across constellations
+        # reshape to standard dimensions
+        # subtract launch costs. 
+        pass
    
    def period_benefit_jacobian_wrt_states(self, states, estimand_interface):
        return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.launches)