{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "religious-anaheim",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.autograd.functional import jacobian\n",
    "import itertools\n",
    "import math\n",
    "import abc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "green-brunei",
   "metadata": {},
   "outputs": [],
   "source": [
    "class EconomicAgent(metaclass=abc.ABCMeta):\n",
    "    @abc.abstractmethod\n",
    "    def period_benefit(self,state,estimand_interface):\n",
    "        pass\n",
    "    @abc.abstractmethod\n",
    "    def _period_benefit(self):\n",
    "        pass\n",
    "    @abc.abstractmethod\n",
    "    def period_benefit_jacobian_wrt_states(self):\n",
    "        pass\n",
    "    @abc.abstractmethod\n",
    "    def _period_benefit_jacobian_wrt_states(self):\n",
    "        pass\n",
    "    @abc.abstractmethod\n",
    "    def period_benefit_jacobian_wrt_launches(self):\n",
    "        pass\n",
    "    @abc.abstractmethod\n",
    "    def _period_benefit_jacobian_wrt_launches(self):\n",
    "        pass\n",
    "\n",
    "class LinearProfit(EconomicAgent):\n",
    "    \"\"\"\n",
    "    The simplest type of profit function available.\n",
    "    \"\"\"\n",
    "    def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):\n",
    "        #track which constellation this is.\n",
    "        self.constellation_number = constellation_number\n",
    "\n",
    "        #parameters describing the agent's situation\n",
    "        self.discount_factor = discount_factor\n",
    "        self.benefit_weights = benefit_weight\n",
    "        self.launch_cost = launch_cost\n",
    "        self.deorbit_cost = deorbit_cost\n",
    "\n",
    "    def __str__(self):\n",
    "        return \"LinearProfit\\n Benefit weights:\\t{}\\n launch cost:\\t{}\\n Deorbit cost:\\t{}\".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)\n",
    "\n",
    "    def period_benefit(self,state,estimand_interface):\n",
    "        return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)\n",
    "    \n",
    "    def _period_benefit(self,stocks,debris,choice):\n",
    "        profits =  self.benefit_weights @ stocks \\\n",
    "                    - self.launch_cost * choice[self.constellation_number] #\\ \n",
    "                    #- deorbit_cost @ deorbits[self.constellation_number]\n",
    "        return profits\n",
    "\n",
    "    def period_benefit_jacobian_wrt_states(self, states, estimand_interface):\n",
    "        return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.choices)\n",
    "\n",
    "    def _period_benefit_jacobian_wrt_states(self, stocks, debris, launches):\n",
    "        jac = jacobian(self._period_benefit, (stocks,debris,launches))\n",
    "        return torch.cat((jac[0], jac[1]))\n",
    "    \n",
    "    def period_benefit_jacobian_wrt_launches(self, states, estimand_interface):\n",
    "        return self._period_benefit_jacobian_wrt_launches(states.stocks, states.debris, estimand_interface.choices)\n",
    "\n",
    "    def _period_benefit_jacobian_wrt_launches(self,stocks,debris,launches):\n",
    "        jac = jacobian(self._period_benefit, (stocks,debris,launches))\n",
    "        return jac[2]\n",
    "\n",
    "class States():\n",
    "    \"\"\"\n",
    "    This is supposed to capture the state variables of the model, to create a common interface \n",
    "    when passing between functions.\n",
    "    \"\"\"\n",
    "    def __init__(self, stocks,debris):\n",
    "        self.stocks = stocks\n",
    "        self.debris = debris\n",
    "        \n",
    "\n",
    "    def __str__(self):\n",
    "        return \"stocks\\t{} \\ndebris\\t {}\".format(self.stocks,self.debris)\n",
    "\n",
    "    @property\n",
    "    def number_constellations(self):\n",
    "        return len(self.stocks)\n",
    "    @property\n",
    "    def number_debris_trackers(self):\n",
    "        return len(self.debris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "sweet-injection",
   "metadata": {},
   "outputs": [],
   "source": [
    "    \n",
    "class EstimandInterface():\n",
    "    \"\"\"\n",
    "    This defines a clean interface for working with the estimand (i.e. thing we are trying to estimate).\n",
    "    In general, we are trying to estimate the choice variables and the partial derivatives of the value functions.\n",
    "    This \n",
    "\n",
    "    This class wraps output for the neural network (or other estimand), allowing me to \n",
    "        - easily substitute various types of launch functions by having a common interface\n",
    "            - this eases testing\n",
    "        - check dimensionality etc without dealing with randomness\n",
    "            - again, easing testing\n",
    "        - reason more cleanly about the component pieces\n",
    "            - easing programming\n",
    "        - provide a clean interface to find constellation level launch decisions etc.\n",
    "\n",
    "    It takes inputs of two general categories:\n",
    "        - the choice function results\n",
    "        - the partial derivatives of the value function\n",
    "    \"\"\"\n",
    "    def __init__(self, partials, choices, deorbits=None):\n",
    "        self.partials = partials\n",
    "        self.choices = choices\n",
    "        \n",
    "    @property\n",
    "    def number_constellations(self):\n",
    "        pass #fix this\n",
    "        return self.choices.shape[-1]\n",
    "    @property\n",
    "    def number_states(self):\n",
    "        pass #fix this\n",
    "        return self.partials.shape[-1] #This depends on the debris trackers technically.\n",
    "\n",
    "    def choice_single(self, constellation):\n",
    "        #returns the launch decision for the constellation of interest\n",
    "        \n",
    "        filter_tensor = torch.zeros(self.number_constellations)\n",
    "        filter_tensor[constellation] = 1.0\n",
    "        \n",
    "        return self.choices @ filter_tensor\n",
    "    \n",
    "    def choice_vector(self, constellation):\n",
    "        #returns the launch decision for the constellation of interest as a vector\n",
    "        \n",
    "        filter_tensor = torch.zeros(self.number_constellations)\n",
    "        filter_tensor[constellation] = 1.0\n",
    "        \n",
    "        return self.choices * filter_tensor\n",
    "    \n",
    "    def partial_vector(self, constellation):\n",
    "        #returns the partials of the value function corresponding to the constellation of interest\n",
    "        \n",
    "        filter_tensor = torch.zeros(self.number_states)\n",
    "        filter_tensor[constellation] = 1.0\n",
    "        \n",
    "        return self.partials @ filter_tensor\n",
    "    \n",
    "    def partial_matrix(self, constellation):\n",
    "        #returns the partials of the value function corresponding to \n",
    "        #the constellation of interest as a matrix\n",
    "        \n",
    "        filter_tensor = torch.zeros(self.number_states)\n",
    "        filter_tensor[constellation] = 1.0\n",
    "        \n",
    "        return self.partials * filter_tensor\n",
    "    \n",
    "    def __str__(self):\n",
    "        #just a human readable descriptor\n",
    "        return \"Launch Decisions and Partial Derivativs of value function with\\n\\tlaunches\\n\\t\\t {}\\n\\tPartials\\n\\t\\t{}\".format(self.choices,self.partials)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "right-dinner",
   "metadata": {},
   "outputs": [],
   "source": [
    "class ChoiceFunction(torch.nn.Module):\n",
    "    \"\"\"\n",
    "    This is used to estimate the launch function\n",
    "    \"\"\"\n",
    "    def __init__(self\n",
    "                 ,batch_size\n",
    "                 ,number_states\n",
    "                 ,number_choices\n",
    "                 ,number_constellations\n",
    "                 ,layer_size=12\n",
    "                ):\n",
    "        super().__init__()\n",
    "        \n",
    "        #preprocess\n",
    "        self.preprocess = torch.nn.Linear(in_features=number_states, out_features=layer_size)\n",
    "        \n",
    "        #upsample\n",
    "        self.upsample = lambda x: torch.nn.Upsample(scale_factor=number_constellations)(x).view(batch_size\n",
    "                                                                                            ,number_constellations\n",
    "                                                                                            ,layer_size)\n",
    "        \n",
    "        self.relu = torch.nn.ReLU() #used for coersion to the state space we care about.\n",
    "        \n",
    "       \n",
    "        #sequential steps\n",
    "        self.sequential = torch.nn.Sequential(\n",
    "            torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
    "            #who knows if a convolution might help here.\n",
    "            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
    "            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
    "        )\n",
    "\n",
    "        #reduce the feature axis to match expected results\n",
    "        self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_choices)\n",
    "\n",
    "        \n",
    "    def forward(self, input_values):\n",
    "        \n",
    "        intermediate_values = self.relu(input_values) #states should be positive anyway.\n",
    "        \n",
    "        intermediate_values = self.preprocess(intermediate_values)\n",
    "        intermediate_values = self.upsample(intermediate_values)\n",
    "        intermediate_values = self.sequential(intermediate_values)\n",
    "        intermediate_values = self.feature_reduction(intermediate_values)\n",
    "        \n",
    "        intermediate_values = self.relu(intermediate_values) #launches are always positive, this may need removed for other types of choices.\n",
    "        \n",
    "        return intermediate_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "global-wallet",
   "metadata": {},
   "outputs": [],
   "source": [
    "class PartialDerivativesOfValueEstimand(torch.nn.Module):\n",
    "    \"\"\"\n",
    "    This is used to estimate the partial derivatives of the value functions\n",
    "    \"\"\"\n",
    "    def __init__(self\n",
    "                 ,batch_size\n",
    "                 , number_constellations\n",
    "                 , number_states\n",
    "                 , layer_size=12):\n",
    "        super().__init__()\n",
    "        self.batch_size = batch_size #used for upscaling\n",
    "        self.number_constellations = number_constellations\n",
    "        self.number_states = number_states\n",
    "        self.layer_size = layer_size\n",
    "        \n",
    "        \n",
    "        #preprocess (single linear layer in case there is anything that needs to happen to all states)\n",
    "        self.preprocess = torch.nn.Sequential(\n",
    "            torch.nn.ReLU() #cleanup as states must be positive\n",
    "            ,torch.nn.Linear(in_features = self.number_states, out_features=self.number_states)\n",
    "        )\n",
    "        \n",
    "        #upsample to get the basic dimensionality correct. From (batch,State) to (batch, constellation, state). Includes a reshape\n",
    "        self.upsample = lambda x: torch.nn.Upsample(scale_factor=self.number_constellations)(x).view(self.batch_size\n",
    "                                                                                            ,self.number_constellations\n",
    "                                                                                            ,self.number_states)\n",
    "        \n",
    "        #sequential steps\n",
    "        self.sequential = torch.nn.Sequential(\n",
    "            torch.nn.Linear(in_features=number_states, out_features=layer_size)\n",
    "            #who knows if a convolution or other layer type might help here.\n",
    "            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
    "            ,torch.nn.Linear(in_features=layer_size, out_features=layer_size)\n",
    "        )\n",
    "\n",
    "        #reduce the feature axis to match expected results\n",
    "        self.feature_reduction = torch.nn.Linear(in_features=layer_size, out_features=number_states)\n",
    "        \n",
    "    def forward(self, states):\n",
    "        #Note that the input values are just going to be the state variables\n",
    "        #TODO:check that input values match the prepared dimension?\n",
    "        \n",
    "        #preprocess\n",
    "        intermediate = self.preprocess(states)\n",
    "        \n",
    "        #upscale the input values\n",
    "        intermediate = self.upsample(intermediate)\n",
    "        \n",
    "        #intermediate processing\n",
    "        intermediate = self.sequential(intermediate)\n",
    "        \n",
    "        #reduce feature axis to match the expected number of partials\n",
    "        intermediate = self.feature_reduction(intermediate)\n",
    "        \n",
    "        \n",
    "        return intermediate\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "resident-cooper",
   "metadata": {},
   "outputs": [],
   "source": [
    "class EstimandNN(torch.nn.Module):\n",
    "    \"\"\"\n",
    "    This neural network takes the current states as input values and returns both\n",
    "    the partial derivatives of the value function and the launch function.\n",
    "    \"\"\"\n",
    "    def __init__(self\n",
    "                 ,batch_size\n",
    "                 ,number_states\n",
    "                 ,number_choices\n",
    "                 ,number_constellations\n",
    "                 ,layer_size=12\n",
    "                ):\n",
    "        super().__init__()\n",
    "        \n",
    "\n",
    "        self.partials_estimator = PartialDerivativesOfValueEstimand(batch_size, number_constellations, number_states, layer_size)\n",
    "        self.launch_estimator = ChoiceFunction(batch_size, number_states, number_choices, number_constellations, layer_size)\n",
    "        \n",
    "    def forward(self, input_values):\n",
    "        pass\n",
    "        partials = self.partials_estimator(input_values)\n",
    "        launch = self.launch_estimator(input_values)\n",
    "        \n",
    "        return EstimandInterface(partials,launch)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "compatible-conviction",
   "metadata": {},
   "source": [
    "# Testing\n",
    "\n",
    "Test if states can handle the dimensionality needed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "explicit-sponsorship",
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size,states,choices = 5,3,1\n",
    "constellations = states -1 #determined by debris tracking\n",
    "max_start_state = 100\n",
    "\n",
    "stocks_and_debris = torch.randint(max_start_state,(batch_size,1,states),dtype=torch.float32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "desperate-color",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[[88., 68., 13.]],\n",
       "\n",
       "        [[23.,  8., 62.]],\n",
       "\n",
       "        [[96., 65., 89.]],\n",
       "\n",
       "        [[16., 27., 62.]],\n",
       "\n",
       "        [[40., 38., 20.]]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stocks_and_debris"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "median-nurse",
   "metadata": {},
   "outputs": [],
   "source": [
    "enn = EstimandNN(batch_size\n",
    "                 ,states\n",
    "                 ,choices\n",
    "                 ,constellations\n",
    "                 ,12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "under-monroe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Launch Decisions and Partial Derivativs of value function with\n",
      "\tlaunches\n",
      "\t\t tensor([[[0.8138],\n",
      "         [4.6481]],\n",
      "\n",
      "        [[1.1540],\n",
      "         [2.0568]],\n",
      "\n",
      "        [[2.1170],\n",
      "         [6.2769]],\n",
      "\n",
      "        [[1.3752],\n",
      "         [2.4555]],\n",
      "\n",
      "        [[0.7025],\n",
      "         [2.5947]]], grad_fn=<ReluBackward0>)\n",
      "\tPartials\n",
      "\t\ttensor([[[-1.7285, -1.5841, -1.0559],\n",
      "         [ 2.9694,  4.2772,  3.6800]],\n",
      "\n",
      "        [[-0.6313, -1.6874, -0.1176],\n",
      "         [ 2.3680,  3.5758,  2.4247]],\n",
      "\n",
      "        [[-2.1381, -3.2882, -0.9620],\n",
      "         [ 5.2646,  7.8475,  5.8994]],\n",
      "\n",
      "        [[-1.2167, -2.0969, -0.4998],\n",
      "         [ 1.7140,  2.4235,  2.1813]],\n",
      "\n",
      "        [[-1.1293, -1.2674, -0.6386],\n",
      "         [ 1.5440,  2.1548,  2.0289]]], grad_fn=<AddBackward0>)\n"
     ]
    }
   ],
   "source": [
    "print(a := enn.forward(stocks_and_debris))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "nonprofit-castle",
   "metadata": {},
   "outputs": [],
   "source": [
    "def lossb(a):\n",
    "    #test loss function\n",
    "    return (a**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "crucial-homeless",
   "metadata": {},
   "outputs": [],
   "source": [
    "b = ChoiceFunction(batch_size\n",
    "                 ,states\n",
    "                 ,choices\n",
    "                 ,constellations\n",
    "                 ,12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "practical-journalist",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(29.0569, grad_fn=<SumBackward0>)\n",
      "tensor(23187.2695, grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n",
      "tensor(0., grad_fn=<SumBackward0>)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([[[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]]], grad_fn=<ReluBackward0>)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "optimizer = torch.optim.SGD(b.parameters(),lr=0.01)\n",
    "\n",
    "for i in range(10):\n",
    "    #training loop\n",
    "    optimizer.zero_grad()\n",
    "\n",
    "    output = b.forward(stocks_and_debris)\n",
    "\n",
    "    l = lossb(output)\n",
    "\n",
    "    l.backward()\n",
    "\n",
    "    optimizer.step()\n",
    "\n",
    "    print(l)\n",
    "    \n",
    "\n",
    "b.forward(stocks_and_debris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "correct-complex",
   "metadata": {},
   "outputs": [],
   "source": [
    "def lossa(a):\n",
    "    #test loss function\n",
    "    return (a.choices**2).sum() + (a.partials**2).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "pharmaceutical-brush",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor(336.1971, grad_fn=<AddBackward0>)\n",
      "tensor(67583.6484, grad_fn=<AddBackward0>)\n",
      "tensor(1.5658e+26, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n",
      "tensor(nan, grad_fn=<AddBackward0>)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "tensor([[[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]],\n",
       "\n",
       "        [[0.],\n",
       "         [0.]]], grad_fn=<ReluBackward0>)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "optimizer = torch.optim.SGD(enn.parameters(),lr=0.001) #note the use of enn in the optimizer\n",
    "\n",
    "for i in range(10):\n",
    "    #training loop\n",
    "    optimizer.zero_grad()\n",
    "\n",
    "    output = enn.forward(stocks_and_debris)\n",
    "\n",
    "    l = lossa(output)\n",
    "\n",
    "    l.backward()\n",
    "\n",
    "    optimizer.step()\n",
    "\n",
    "    print(l)\n",
    "    \n",
    "\n",
    "b.forward(stocks_and_debris)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "other-subdivision",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}