From 1ad55eb603ccd478da1f4c66a93779e1a108f224 Mon Sep 17 00:00:00 2001
From: youainti <youainti@protonmail.com>
Date: Wed, 13 Oct 2021 14:57:10 -0700
Subject: [PATCH] isolated issue with convergence of EstimandNN to the Partials
 NN, and moved previous work on dimension from SimplifiedApproach0 to
 NeuralNetworkSpecification

---
 Code/NeuralNetworkSpecifications.py | 233 +++++++++++++---
 Code/SimplifiedApproach0.ipynb      | 415 +++++++++++++++++++---------
 2 files changed, 478 insertions(+), 170 deletions(-)

diff --git a/Code/NeuralNetworkSpecifications.py b/Code/NeuralNetworkSpecifications.py
index 770cc7b..d90b3f7 100644
--- a/Code/NeuralNetworkSpecifications.py
+++ b/Code/NeuralNetworkSpecifications.py
@@ -1,46 +1,204 @@
 import torch
-import combined as c
+from torch.autograd.functional import jacobian
+import itertools
+import math
+import abc
 
-"""
-This module holds the neural networks I am going to use to estimate 
-the functions of interest.
-"""
+class LinearProfit:
+    """
+    The simplest type of profit function available.
+    """
+    def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):
+        #track which constellation this is.
+        self.constellation_number = constellation_number
+
+        #parameters describing the agent's situation
+        self.discount_factor = discount_factor
+        self.benefit_weights = benefit_weight
+        self.launch_cost = launch_cost
+        self.deorbit_cost = deorbit_cost
+
+    def __str__(self):
+        return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)
+
+    def period_benefit(self,state,estimand_interface):
+        return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)
+    
+    def _period_benefit(self,stocks,debris,choice):
+        profits =  self.benefit_weights @ stocks \
+                    - self.launch_cost * choice[self.constellation_number] #\ 
+                    #- deorbit_cost @ deorbits[self.constellation_number]
+        return profits
+
+    def period_benefit_jacobian_wrt_states(self, states, estimand_interface):
+        return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.choices)
+
+    def _period_benefit_jacobian_wrt_states(self, stocks, debris, launches):
+        jac = jacobian(self._period_benefit, (stocks,debris,launches))
+        return torch.cat((jac[0], jac[1]))
+    
+    def period_benefit_jacobian_wrt_launches(self, states, estimand_interface):
+        return self._period_benefit_jacobian_wrt_launches(states.stocks, states.debris, estimand_interface.choices)
+
+    def _period_benefit_jacobian_wrt_launches(self,stocks,debris,launches):
+        jac = jacobian(self._period_benefit, (stocks,debris,launches))
+        return jac[2]
 
-class LaunchFnEstimand(torch.nn.Module):
+class States():
+    """
+    This is supposed to capture the state variables of the model, to create a common interface 
+    when passing between functions.
+    """
+    def __init__(self, stocks,debris):
+        self.stocks = stocks
+        self.debris = debris
+        
+
+    def __str__(self):
+        return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris)
+
+    @property
+    def values(self):
+        
+        
+    @property
+    def number_constellations(self):
+        return len(self.stocks)
+    @property
+    def number_debris_trackers(self):
+        return len(self.debris)
+
+    
+class EstimandInterface():
+    """
+    This defines a clean interface for working with the estimand (i.e. thing we are trying to estimate).
+    In general, we are trying to estimate the choice variables and the partial derivatives of the value functions.
+    This 
+
+    This class wraps output for the neural network (or other estimand), allowing me to 
+        - easily substitute various types of launch functions by having a common interface
+            - this eases testing
+        - check dimensionality etc without dealing with randomness
+            - again, easing testing
+        - reason more cleanly about the component pieces
+            - easing programming
+        - provide a clean interface to find constellation level launch decisions etc.
+
+    It takes inputs of two general categories:
+        - the choice function results
+        - the partial derivatives of the value function
+    """
+    def __init__(self, partials, choices, deorbits=None):
+        self.partials = partials
+        self.choices = choices
+        
+    @property
+    def number_constellations(self):
+        pass #fix this
+        return self.choices.shape[-1]
+    @property
+    def number_states(self):
+        pass #fix this
+        return self.partials.shape[-1] #This depends on the debris trackers technically.
+
+    def choice_single(self, constellation):
+        #returns the launch decision for the constellation of interest
+        
+        filter_tensor = torch.zeros(self.number_constellations)
+        filter_tensor[constellation] = 1.0
+        
+        return self.choices @ filter_tensor
+    
+    def choice_vector(self, constellation):
+        #returns the launch decision for the constellation of interest as a vector
+        
+        filter_tensor = torch.zeros(self.number_constellations)
+        filter_tensor[constellation] = 1.0
+        
+        return self.choices * filter_tensor
+    
+    def partial_vector(self, constellation):
+        #returns the partials of the value function corresponding to the constellation of interest
+        
+        filter_tensor = torch.zeros(self.number_states)
+        filter_tensor[constellation] = 1.0
+        
+        return self.partials @ filter_tensor
+    
+    def partial_matrix(self, constellation):
+        #returns the partials of the value function corresponding to 
+        #the constellation of interest as a matrix
+        
+        filter_tensor = torch.zeros(self.number_states)
+        filter_tensor[constellation] = 1.0
+        
+        return self.partials * filter_tensor
+    
+    def __str__(self):
+        #just a human readable descriptor
+        return "Launch Decisions and Partial Derivativs of value function with\n\tlaunches\n\t\t {}\n\tPartials\n\t\t{}".format(self.choices,self.partials)
+
+
+class ChoiceFunction(torch.nn.Module):
     """
     This is used to estimate the launch function
     """
-    def __init__(self, state_tensor_size,layers_size,number_constellations):
+    def __init__(self
+                 ,batch_size
+                 ,number_states
+                 ,number_choices
+                 ,number_constellations
+                 ,layer_size=12
+                ):
         super().__init__()
-        self.number_constellations = number_constellations
-        self.layers_size = layers_size
-        self.state_tensor_size = state_tensor_size
         
+        #preprocess
+        self.preprocess = torch.nn.Linear(in_features=number_states, out_features=layer_size)
+        
+        #upsample
+        self.upsample = lambda x: torch.nn.Upsample(scale_factor=number_constellations)(x).view(batch_size
+                                                                                            ,number_constellations
+                                                                                            ,layer_size)
+        
+        self.relu = torch.nn.ReLU() #used for coersion to the state space we care about.
         
-        #Layers
-        self.linear_1 = torch.nn.Linear(in_features=state_tensor_size, out_features=layers_size)
-        self.relu = torch.nn.ReLU()
-        self.linear_3 = torch.nn.Linear(in_features=layers_size, out_features=layers_size)
-        self.linear_5 = torch.nn.Linear(in_features=layers_size, out_features=number_constellations)
+       
+        #sequential steps
+        self.sequential = torch.nn.Sequential(
+            torch.nn.Linear(in_features=layer_size, out_features=layer_size)
+            #who knows if a convolution might help here.
+            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
+            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
+        )
+
+        #reduce the feature axis to match expected results
+        self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_choices)
 
         
     def forward(self, input_values):
         
         intermediate_values = self.relu(input_values) #states should be positive anyway.
-        intermediate_values = self.linear_1(intermediate_values)
-        intermediate_values = self.linear_3(intermediate_values)
-        intermediate_values = self.linear_5(intermediate_values)
-        intermediate_values = self.relu(intermediate_values) #launches are always positive
+        
+        intermediate_values = self.preprocess(intermediate_values)
+        intermediate_values = self.upsample(intermediate_values)
+        intermediate_values = self.sequential(intermediate_values)
+        intermediate_values = self.feature_reduction(intermediate_values)
+        
+        intermediate_values = self.relu(intermediate_values) #launches are always positive, this may need removed for other types of choices.
         
         return intermediate_values
-    
-class PartialDerivativesEstimand(torch.nn.Module):
+
+class PartialDerivativesOfValueEstimand(torch.nn.Module):
     """
     This is used to estimate the partial derivatives of the value functions
     """
-    def __init__(self,batch_size, number_constellations, number_states, layer_size=12):
+    def __init__(self
+                 ,batch_size
+                 , number_constellations
+                 , number_states
+                 , layer_size=12):
         super().__init__()
-        self.batch_size = batch_size
+        self.batch_size = batch_size #used for upscaling
         self.number_constellations = number_constellations
         self.number_states = number_states
         self.layer_size = layer_size
@@ -51,7 +209,8 @@ class PartialDerivativesEstimand(torch.nn.Module):
             torch.nn.ReLU() #cleanup as states must be positive
             ,torch.nn.Linear(in_features = self.number_states, out_features=self.number_states)
         )
-        #upscale to get the basic dimensionality correct. From (batch,State) to (batch, constellation, state). Includes a reshape
+        
+        #upsample to get the basic dimensionality correct. From (batch,State) to (batch, constellation, state). Includes a reshape
         self.upsample = lambda x: torch.nn.Upsample(scale_factor=self.number_constellations)(x).view(self.batch_size
                                                                                             ,self.number_constellations
                                                                                             ,self.number_states)
@@ -59,7 +218,7 @@ class PartialDerivativesEstimand(torch.nn.Module):
         #sequential steps
         self.sequential = torch.nn.Sequential(
             torch.nn.Linear(in_features=number_states, out_features=layer_size)
-            #who knows if a convolution might help here.
+            #who knows if a convolution or other layer type might help here.
             ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
             ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)
         )
@@ -67,12 +226,12 @@ class PartialDerivativesEstimand(torch.nn.Module):
         #reduce the feature axis to match expected results
         self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_states)
         
-    def forward(self, input_values):
+    def forward(self, states):
         #Note that the input values are just going to be the state variables
         #TODO:check that input values match the prepared dimension?
         
         #preprocess
-        intermediate = self.preprocess(input_values)
+        intermediate = self.preprocess(states)
         
         #upscale the input values
         intermediate = self.upsample(intermediate)
@@ -86,24 +245,28 @@ class PartialDerivativesEstimand(torch.nn.Module):
         
         return intermediate
     
+
 class EstimandNN(torch.nn.Module):
     """
     This neural network takes the current states as input values and returns both
     the partial derivatives of the value function and the launch function.
     """
-    def __init__(self, state_tensor_size,layers_size,number_constellations):
+    def __init__(self
+                 ,batch_size
+                 ,number_states
+                 ,number_choices
+                 ,number_constellations
+                 ,layer_size=12
+                ):
         super().__init__()
         
-        #So, this next section constructs different layers within the NN
-        #sinlge linear section
-        pass
-        #TODO:verify these are correct
-        self.partials_estimator = PartialDerivativesEstimand(state_tensor_size,layers_size,number_constellations) #TODO
-        self.launch_estimator = LaunchFnEstimand(state_tensor_size,layers_size,number_constellations)
+
+        self.partials_estimator = PartialDerivativesOfValueEstimand(batch_size, number_constellations, number_states, layer_size)
+        self.launch_estimator = ChoiceFunction(batch_size, number_states, number_choices, number_constellations, layer_size)
         
     def forward(self, input_values):
         pass
         partials = self.partials_estimator(input_values)
         launch = self.launch_estimator(input_values)
         
-        return c.EstimandInterface(partials,launch)
\ No newline at end of file
+        return EstimandInterface(partials,launch)
\ No newline at end of file
diff --git a/Code/SimplifiedApproach0.ipynb b/Code/SimplifiedApproach0.ipynb
index adf7303..f7ad687 100644
--- a/Code/SimplifiedApproach0.ipynb
+++ b/Code/SimplifiedApproach0.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "religious-anaheim",
+   "id": "ceramic-doctrine",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,16 +11,8 @@
     "from torch.autograd.functional import jacobian\n",
     "import itertools\n",
     "import math\n",
-    "import abc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "green-brunei",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "import abc\n",
+    "\n",
     "class EconomicAgent(metaclass=abc.ABCMeta):\n",
     "    @abc.abstractmethod\n",
     "    def period_benefit(self,state,estimand_interface):\n",
@@ -99,16 +91,8 @@
     "        return len(self.stocks)\n",
     "    @property\n",
     "    def number_debris_trackers(self):\n",
-    "        return len(self.debris)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "sweet-injection",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "        return len(self.debris)\n",
+    "\n",
     "    \n",
     "class EstimandInterface():\n",
     "    \"\"\"\n",
@@ -177,16 +161,9 @@
     "    \n",
     "    def __str__(self):\n",
     "        #just a human readable descriptor\n",
-    "        return \"Launch Decisions and Partial Derivativs of value function with\\n\\tlaunches\\n\\t\\t {}\\n\\tPartials\\n\\t\\t{}\".format(self.choices,self.partials)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "right-dinner",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "        return \"Launch Decisions and Partial Derivativs of value function with\\n\\tlaunches\\n\\t\\t {}\\n\\tPartials\\n\\t\\t{}\".format(self.choices,self.partials)\n",
+    "\n",
+    "\n",
     "class ChoiceFunction(torch.nn.Module):\n",
     "    \"\"\"\n",
     "    This is used to estimate the launch function\n",
@@ -234,16 +211,8 @@
     "        \n",
     "        intermediate_values = self.relu(intermediate_values) #launches are always positive, this may need removed for other types of choices.\n",
     "        \n",
-    "        return intermediate_values"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "global-wallet",
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "        return intermediate_values\n",
+    "\n",
     "class PartialDerivativesOfValueEstimand(torch.nn.Module):\n",
     "    \"\"\"\n",
     "    This is used to estimate the partial derivatives of the value functions\n",
@@ -305,8 +274,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "resident-cooper",
+   "execution_count": 2,
+   "id": "executive-royal",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -338,7 +307,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "compatible-conviction",
+   "id": "numerical-mexico",
    "metadata": {},
    "source": [
     "# Testing\n",
@@ -348,8 +317,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "explicit-sponsorship",
+   "execution_count": 3,
+   "id": "packed-economics",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -362,25 +331,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "desperate-color",
+   "execution_count": 4,
+   "id": "compliant-circle",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "tensor([[[88., 68., 13.]],\n",
+       "tensor([[[ 1., 30., 11.]],\n",
        "\n",
-       "        [[23.,  8., 62.]],\n",
+       "        [[60., 74.,  1.]],\n",
        "\n",
-       "        [[96., 65., 89.]],\n",
+       "        [[46., 33., 70.]],\n",
        "\n",
-       "        [[16., 27., 62.]],\n",
+       "        [[42., 29., 32.]],\n",
        "\n",
-       "        [[40., 38., 20.]]])"
+       "        [[82., 72., 57.]]])"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -391,22 +360,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "median-nurse",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "enn = EstimandNN(batch_size\n",
-    "                 ,states\n",
-    "                 ,choices\n",
-    "                 ,constellations\n",
-    "                 ,12)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "under-monroe",
+   "execution_count": 6,
+   "id": "theoretical-spectrum",
    "metadata": {},
    "outputs": [
     {
@@ -415,35 +370,35 @@
      "text": [
       "Launch Decisions and Partial Derivativs of value function with\n",
       "\tlaunches\n",
-      "\t\t tensor([[[0.8138],\n",
-      "         [4.6481]],\n",
+      "\t\t tensor([[[0.0000],\n",
+      "         [0.0000]],\n",
       "\n",
-      "        [[1.1540],\n",
-      "         [2.0568]],\n",
+      "        [[2.0907],\n",
+      "         [0.1053]],\n",
       "\n",
-      "        [[2.1170],\n",
-      "         [6.2769]],\n",
+      "        [[2.9730],\n",
+      "         [2.2000]],\n",
       "\n",
-      "        [[1.3752],\n",
-      "         [2.4555]],\n",
+      "        [[2.3975],\n",
+      "         [1.2877]],\n",
       "\n",
-      "        [[0.7025],\n",
-      "         [2.5947]]], grad_fn=<ReluBackward0>)\n",
+      "        [[4.2107],\n",
+      "         [2.0752]]], grad_fn=<ReluBackward0>)\n",
       "\tPartials\n",
-      "\t\ttensor([[[-1.7285, -1.5841, -1.0559],\n",
-      "         [ 2.9694,  4.2772,  3.6800]],\n",
+      "\t\ttensor([[[ 0.1939,  0.3954,  0.0730],\n",
+      "         [-0.9428,  0.6145, -0.9247]],\n",
       "\n",
-      "        [[-0.6313, -1.6874, -0.1176],\n",
-      "         [ 2.3680,  3.5758,  2.4247]],\n",
+      "        [[ 1.1686,  3.0170,  0.3393],\n",
+      "         [-7.1474,  2.3495, -7.0566]],\n",
       "\n",
-      "        [[-2.1381, -3.2882, -0.9620],\n",
-      "         [ 5.2646,  7.8475,  5.8994]],\n",
+      "        [[-2.0849,  3.0883, -3.3791],\n",
+      "         [-0.6664,  0.0361, -2.2530]],\n",
       "\n",
-      "        [[-1.2167, -2.0969, -0.4998],\n",
-      "         [ 1.7140,  2.4235,  2.1813]],\n",
+      "        [[-0.7117,  2.5474, -1.6458],\n",
+      "         [-2.1937,  0.6897, -3.0382]],\n",
       "\n",
-      "        [[-1.1293, -1.2674, -0.6386],\n",
-      "         [ 1.5440,  2.1548,  2.0289]]], grad_fn=<AddBackward0>)\n"
+      "        [[-1.0262,  4.5973, -2.6606],\n",
+      "         [-5.4307,  1.4510, -6.6972]]], grad_fn=<AddBackward0>)\n"
      ]
     }
    ],
@@ -453,8 +408,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "nonprofit-castle",
+   "execution_count": 7,
+   "id": "vulnerable-penalty",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -465,12 +420,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "crucial-homeless",
+   "execution_count": 30,
+   "id": "classified-estimate",
    "metadata": {},
    "outputs": [],
    "source": [
-    "b = ChoiceFunction(batch_size\n",
+    "ch = ChoiceFunction(batch_size\n",
     "                 ,states\n",
     "                 ,choices\n",
     "                 ,constellations\n",
@@ -479,16 +434,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "practical-journalist",
+   "execution_count": 31,
+   "id": "martial-premium",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor(29.0569, grad_fn=<SumBackward0>)\n",
-      "tensor(23187.2695, grad_fn=<SumBackward0>)\n",
+      "tensor(46.8100, grad_fn=<SumBackward0>)\n",
+      "tensor(82442.4219, grad_fn=<SumBackward0>)\n",
       "tensor(0., grad_fn=<SumBackward0>)\n",
       "tensor(0., grad_fn=<SumBackward0>)\n",
       "tensor(0., grad_fn=<SumBackward0>)\n",
@@ -518,19 +473,19 @@
        "         [0.]]], grad_fn=<ReluBackward0>)"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "optimizer = torch.optim.SGD(b.parameters(),lr=0.01)\n",
+    "optimizer = torch.optim.SGD(ch.parameters(),lr=0.01)\n",
     "\n",
     "for i in range(10):\n",
     "    #training loop\n",
     "    optimizer.zero_grad()\n",
     "\n",
-    "    output = b.forward(stocks_and_debris)\n",
+    "    output = ch.forward(stocks_and_debris)\n",
     "\n",
     "    l = lossb(output)\n",
     "\n",
@@ -541,71 +496,260 @@
     "    print(l)\n",
     "    \n",
     "\n",
-    "b.forward(stocks_and_debris)"
+    "ch.forward(stocks_and_debris)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "correct-complex",
+   "execution_count": 45,
+   "id": "corrected-jewelry",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def lossa(a):\n",
+    "def lossc(a):\n",
     "    #test loss function\n",
-    "    return (a.choices**2).sum() + (a.partials**2).sum()"
+    "    return (a**2).sum()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "pharmaceutical-brush",
+   "execution_count": 53,
+   "id": "opened-figure",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd = PartialDerivativesOfValueEstimand(\n",
+    "                 batch_size\n",
+    "                 ,constellations\n",
+    "                 ,states\n",
+    "                 ,12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "id": "chicken-inspector",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor(336.1971, grad_fn=<AddBackward0>)\n",
-      "tensor(67583.6484, grad_fn=<AddBackward0>)\n",
-      "tensor(1.5658e+26, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n",
-      "tensor(nan, grad_fn=<AddBackward0>)\n"
+      "tensor(1.9948e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(1.7427e-05, grad_fn=<SumBackward0>)\n",
+      "tensor(5.7993e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(2.9985e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(6.5281e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(7.8818e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(4.4327e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(1.1240e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(1.2478e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(3.5818e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(4.3732e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(2.7699e-06, grad_fn=<SumBackward0>)\n",
+      "tensor(8.9659e-07, grad_fn=<SumBackward0>)\n",
+      "tensor(5.7541e-07, grad_fn=<SumBackward0>)\n",
+      "tensor(1.5010e-06, grad_fn=<SumBackward0>)\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "tensor([[[0.],\n",
-       "         [0.]],\n",
+       "tensor([[[ 0.0002, -0.0002, -0.0003],\n",
+       "         [ 0.0001, -0.0003, -0.0002]],\n",
        "\n",
-       "        [[0.],\n",
-       "         [0.]],\n",
+       "        [[ 0.0002, -0.0003, -0.0003],\n",
+       "         [ 0.0003, -0.0004, -0.0002]],\n",
        "\n",
-       "        [[0.],\n",
-       "         [0.]],\n",
+       "        [[ 0.0002, -0.0003, -0.0003],\n",
+       "         [ 0.0002, -0.0003, -0.0003]],\n",
        "\n",
-       "        [[0.],\n",
-       "         [0.]],\n",
+       "        [[ 0.0002, -0.0002, -0.0004],\n",
+       "         [ 0.0003, -0.0003, -0.0003]],\n",
        "\n",
-       "        [[0.],\n",
-       "         [0.]]], grad_fn=<ReluBackward0>)"
+       "        [[ 0.0003, -0.0003, -0.0002],\n",
+       "         [ 0.0003, -0.0003, -0.0002]]], grad_fn=<AddBackward0>)"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 74,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "optimizer = torch.optim.SGD(enn.parameters(),lr=0.001) #note the use of enn in the optimizer\n",
+    "optimizer = torch.optim.Adam(pd.parameters(),lr=0.0001)\n",
     "\n",
-    "for i in range(10):\n",
+    "for i in range(15):\n",
+    "    #training loop\n",
+    "    optimizer.zero_grad()\n",
+    "\n",
+    "    output = pd.forward(stocks_and_debris)\n",
+    "\n",
+    "    l = lossc(output)\n",
+    "\n",
+    "    l.backward()\n",
+    "\n",
+    "    optimizer.step()\n",
+    "\n",
+    "    print(l)\n",
+    "    \n",
+    "\n",
+    "pd.forward(stocks_and_debris)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "id": "southwest-diamond",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def lossa(a):\n",
+    "    #test loss function\n",
+    "    return (a.choices**2).sum() + (a.partials**2).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "brave-treat",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "enn = EstimandNN(batch_size\n",
+    "                 ,states\n",
+    "                 ,choices\n",
+    "                 ,constellations\n",
+    "                 ,12)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "id": "functional-render",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 tensor(112.1970, grad_fn=<AddBackward0>)\n",
+      "10 tensor(79.8152, grad_fn=<AddBackward0>)\n",
+      "20 tensor(55.6422, grad_fn=<AddBackward0>)\n",
+      "30 tensor(38.5636, grad_fn=<AddBackward0>)\n",
+      "40 tensor(26.9156, grad_fn=<AddBackward0>)\n",
+      "50 tensor(18.9986, grad_fn=<AddBackward0>)\n",
+      "60 tensor(13.6606, grad_fn=<AddBackward0>)\n",
+      "70 tensor(10.1881, grad_fn=<AddBackward0>)\n",
+      "80 tensor(8.0395, grad_fn=<AddBackward0>)\n",
+      "90 tensor(6.7618, grad_fn=<AddBackward0>)\n",
+      "100 tensor(6.0101, grad_fn=<AddBackward0>)\n",
+      "110 tensor(5.5517, grad_fn=<AddBackward0>)\n",
+      "120 tensor(5.2434, grad_fn=<AddBackward0>)\n",
+      "130 tensor(5.0054, grad_fn=<AddBackward0>)\n",
+      "140 tensor(4.7988, grad_fn=<AddBackward0>)\n",
+      "150 tensor(4.6069, grad_fn=<AddBackward0>)\n",
+      "160 tensor(4.4235, grad_fn=<AddBackward0>)\n",
+      "170 tensor(4.2468, grad_fn=<AddBackward0>)\n",
+      "180 tensor(4.0763, grad_fn=<AddBackward0>)\n",
+      "190 tensor(3.9117, grad_fn=<AddBackward0>)\n",
+      "200 tensor(3.7532, grad_fn=<AddBackward0>)\n",
+      "210 tensor(3.6005, grad_fn=<AddBackward0>)\n",
+      "220 tensor(3.4535, grad_fn=<AddBackward0>)\n",
+      "230 tensor(3.3121, grad_fn=<AddBackward0>)\n",
+      "240 tensor(3.1761, grad_fn=<AddBackward0>)\n",
+      "250 tensor(3.0454, grad_fn=<AddBackward0>)\n",
+      "260 tensor(2.9198, grad_fn=<AddBackward0>)\n",
+      "270 tensor(2.7991, grad_fn=<AddBackward0>)\n",
+      "280 tensor(2.6832, grad_fn=<AddBackward0>)\n",
+      "290 tensor(2.5720, grad_fn=<AddBackward0>)\n",
+      "300 tensor(2.4653, grad_fn=<AddBackward0>)\n",
+      "310 tensor(2.3629, grad_fn=<AddBackward0>)\n",
+      "320 tensor(2.2646, grad_fn=<AddBackward0>)\n",
+      "330 tensor(2.1704, grad_fn=<AddBackward0>)\n",
+      "340 tensor(2.0800, grad_fn=<AddBackward0>)\n",
+      "350 tensor(1.9933, grad_fn=<AddBackward0>)\n",
+      "360 tensor(1.9103, grad_fn=<AddBackward0>)\n",
+      "370 tensor(1.8306, grad_fn=<AddBackward0>)\n",
+      "380 tensor(1.7543, grad_fn=<AddBackward0>)\n",
+      "390 tensor(1.6812, grad_fn=<AddBackward0>)\n",
+      "400 tensor(1.6111, grad_fn=<AddBackward0>)\n",
+      "410 tensor(1.5440, grad_fn=<AddBackward0>)\n",
+      "420 tensor(1.4797, grad_fn=<AddBackward0>)\n",
+      "430 tensor(1.4180, grad_fn=<AddBackward0>)\n",
+      "440 tensor(1.3590, grad_fn=<AddBackward0>)\n",
+      "450 tensor(1.3025, grad_fn=<AddBackward0>)\n",
+      "460 tensor(1.2484, grad_fn=<AddBackward0>)\n",
+      "470 tensor(1.1965, grad_fn=<AddBackward0>)\n",
+      "480 tensor(1.1469, grad_fn=<AddBackward0>)\n",
+      "490 tensor(1.0994, grad_fn=<AddBackward0>)\n",
+      "500 tensor(1.0540, grad_fn=<AddBackward0>)\n",
+      "510 tensor(1.0104, grad_fn=<AddBackward0>)\n",
+      "520 tensor(0.9688, grad_fn=<AddBackward0>)\n",
+      "530 tensor(0.9290, grad_fn=<AddBackward0>)\n",
+      "540 tensor(0.8908, grad_fn=<AddBackward0>)\n",
+      "550 tensor(0.8544, grad_fn=<AddBackward0>)\n",
+      "560 tensor(0.8195, grad_fn=<AddBackward0>)\n",
+      "570 tensor(0.7861, grad_fn=<AddBackward0>)\n",
+      "580 tensor(0.7542, grad_fn=<AddBackward0>)\n",
+      "590 tensor(0.7237, grad_fn=<AddBackward0>)\n",
+      "600 tensor(0.6945, grad_fn=<AddBackward0>)\n",
+      "610 tensor(0.6667, grad_fn=<AddBackward0>)\n",
+      "620 tensor(0.6400, grad_fn=<AddBackward0>)\n",
+      "630 tensor(0.6146, grad_fn=<AddBackward0>)\n",
+      "640 tensor(0.5903, grad_fn=<AddBackward0>)\n",
+      "650 tensor(0.5671, grad_fn=<AddBackward0>)\n",
+      "660 tensor(0.5449, grad_fn=<AddBackward0>)\n",
+      "670 tensor(0.5237, grad_fn=<AddBackward0>)\n",
+      "680 tensor(0.5035, grad_fn=<AddBackward0>)\n",
+      "690 tensor(0.4842, grad_fn=<AddBackward0>)\n",
+      "700 tensor(0.4658, grad_fn=<AddBackward0>)\n",
+      "710 tensor(0.4482, grad_fn=<AddBackward0>)\n",
+      "720 tensor(0.4315, grad_fn=<AddBackward0>)\n",
+      "730 tensor(0.4155, grad_fn=<AddBackward0>)\n",
+      "740 tensor(0.4002, grad_fn=<AddBackward0>)\n",
+      "750 tensor(0.3857, grad_fn=<AddBackward0>)\n",
+      "760 tensor(0.3718, grad_fn=<AddBackward0>)\n",
+      "770 tensor(0.3586, grad_fn=<AddBackward0>)\n",
+      "780 tensor(0.3460, grad_fn=<AddBackward0>)\n",
+      "790 tensor(0.3340, grad_fn=<AddBackward0>)\n",
+      "800 tensor(0.3226, grad_fn=<AddBackward0>)\n",
+      "810 tensor(0.3117, grad_fn=<AddBackward0>)\n",
+      "820 tensor(0.3013, grad_fn=<AddBackward0>)\n",
+      "830 tensor(0.2914, grad_fn=<AddBackward0>)\n",
+      "840 tensor(0.2820, grad_fn=<AddBackward0>)\n",
+      "850 tensor(0.2730, grad_fn=<AddBackward0>)\n",
+      "860 tensor(0.2645, grad_fn=<AddBackward0>)\n",
+      "870 tensor(0.2564, grad_fn=<AddBackward0>)\n",
+      "880 tensor(0.2486, grad_fn=<AddBackward0>)\n",
+      "890 tensor(0.2413, grad_fn=<AddBackward0>)\n",
+      "900 tensor(0.2342, grad_fn=<AddBackward0>)\n",
+      "910 tensor(0.2276, grad_fn=<AddBackward0>)\n",
+      "920 tensor(0.2212, grad_fn=<AddBackward0>)\n",
+      "930 tensor(0.2151, grad_fn=<AddBackward0>)\n",
+      "940 tensor(0.2094, grad_fn=<AddBackward0>)\n",
+      "950 tensor(0.2039, grad_fn=<AddBackward0>)\n",
+      "960 tensor(0.1986, grad_fn=<AddBackward0>)\n",
+      "970 tensor(0.1936, grad_fn=<AddBackward0>)\n",
+      "980 tensor(0.1889, grad_fn=<AddBackward0>)\n",
+      "990 tensor(0.1844, grad_fn=<AddBackward0>)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<__main__.EstimandInterface at 0x7f85609fce20>"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "optimizer = torch.optim.Adam(enn.parameters(),lr=0.0001) #note the use of enn in the optimizer\n",
+    "\n",
+    "for i in range(1000):\n",
     "    #training loop\n",
     "    optimizer.zero_grad()\n",
     "\n",
@@ -617,16 +761,17 @@
     "\n",
     "    optimizer.step()\n",
     "\n",
-    "    print(l)\n",
+    "    if i%10==0:\n",
+    "        print(i, l)\n",
     "    \n",
     "\n",
-    "b.forward(stocks_and_debris)"
+    "enn.forward(stocks_and_debris)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "other-subdivision",
+   "id": "voluntary-postage",
    "metadata": {},
    "outputs": [],
    "source": []