From c2dd67c74444e5d3a666ced4f7809031a7b68378 Mon Sep 17 00:00:00 2001
From: youainti <youainti@protonmail.com>
Date: Mon, 15 Nov 2021 23:09:47 -0800
Subject: [PATCH] finished rewriting computational methods section.

---
 .../Assets/preambles/References.bib           |  14 +
 .../sections/07_ComputationalApproach.tex     | 257 ++++++++++--------
 2 files changed, 152 insertions(+), 119 deletions(-)

diff --git a/3rdyearpresentation/Assets/preambles/References.bib b/3rdyearpresentation/Assets/preambles/References.bib
index fc3a444..64da484 100644
--- a/3rdyearpresentation/Assets/preambles/References.bib
+++ b/3rdyearpresentation/Assets/preambles/References.bib
@@ -217,4 +217,18 @@ JEL Classification Nos.: H4, Q2},
   url    = {https://er.jsc.nasa.gov/seh/ricetalk.htm},
 }
 
+@Article{Flux.jl-2018,
+  author        = {Michael Innes and Elliot Saba and Keno Fischer and Dhairya Gandhi and Marco Concetto Rudilosso and Neethu Mariya Joy and Tejan Karmali and Avik Pal and Viral Shah},
+  title         = {Fashionable Modelling with Flux},
+  journal       = {CoRR},
+  year          = {2018},
+  volume        = {abs/1811.01457},
+  archiveprefix = {arXiv},
+  bibsource     = {dblp computer science bibliography, https://dblp.org},
+  biburl        = {https://dblp.org/rec/bib/journals/corr/abs-1811-01457},
+  eprint        = {1811.01457},
+  timestamp     = {Thu, 22 Nov 2018 17:58:30 +0100},
+  url           = {https://arxiv.org/abs/1811.01457},
+}
+
 @Comment{jabref-meta: databaseType:bibtex;}
diff --git a/CurrentWriting/sections/07_ComputationalApproach.tex b/CurrentWriting/sections/07_ComputationalApproach.tex
index 93ee881..e11af1a 100644
--- a/CurrentWriting/sections/07_ComputationalApproach.tex
+++ b/CurrentWriting/sections/07_ComputationalApproach.tex
@@ -2,16 +2,25 @@
 \graphicspath{{\subfix{Assets/img/}}}
 
 \begin{document}
-The computational approach I have decided to take is an application of 
-\cite{Maliar2019}, where the policy function is approximated using a
-neural network.
-
-The approach uses the fact that the euler equation implicitly defines the 
-optimal policy function, for example: 
-$[0] = f(x(\theta),\theta)$.
-This can easily be turned into a mean square objective function,
-$0 = f^2(x(\theta),\theta)$,
-allowing one to find $x(\dot)$ as the solution to a minimization problem.
+The computational approach I take is based on
+\cite{Maliar2019}'s Bellman Residual Minimization, with the 
+policy and value functions are approximated using a neural network.
+In summary the bellman equation is rewritten in the form:
+\begin{align}
+    Q = V(S_T,D_t) - F(S_t,D_t,X_t(S_t,D_t)) -\beta V(S_{t+1},D_{t+1})
+\end{align}
+With a policy maximization condition such as:
+\begin{align}
+    M =  \left[ F(S_t,D_t,X_t(S_t,D_t)) + \beta V(S_{t+1},D_{t+1})\right]
+\end{align}
+
+In the deterministic case, a loss function can be constructed in 
+either of the following equivalent cases:
+\begin{align}
+    \phi_1 = Q^2 - vM \\
+    \phi_2 = \left (M - Q - \frac{v}{2}\right)^2  - v \cdot \left(Q + \frac{v}{4}\right)
+\end{align}
+where $v$ is an external weighting parameter which can be cross validated.
 
 By choosing a neural network as the functional approximation, we are able to 
 use the fact that a NN with a single hidden layer can be used to approximate
@@ -20,129 +29,139 @@ under certain conditions \cref{White1990}.
 We can also
 take advantage of the significant computational and practical improvements
 currently revolutionizing Machine Learning.
-In particular, we can now use common frameworks, such as python, PyTorch,
-and various online accerators (Google Colab)
-which have been optimized for relatively high performance and 
-straightforward development.
+Some examples include the use of specialized hardware and the ability to transfer 
+learning between models, both of which can speed up functional approximation.
 
 \subsection{Computational Plan}
-I have decided to use python and the PyTorch Neural Network library for this project.
+The neural network library I've chosen to use is Flux.jl \cref{Flux.jl-2018},
+a Neural Network library implmented in and for the Julia language,
+although the Bellman Residual Minimization algorithm would work equally well in 
+PyTorch or TensorFlow
+\footnote{
+    The initial reason I investigated Flux/Julia is due to the source to source 
+    Automatic Differentiation capabilities, which I intended to use to implement
+    a generic version of \cref{Maliar2019}'s euler equation iteration method.
+    While I still believe this is possible and that Flux represents one of the 
+    best tools available for that specific purpose,
+    I've been unsuccessful at implementing the algorithm.
+}.
+Below I note some of the design, training, and implementation decisions.
+
+%Data Description
+The data used to train the network is simulated data, pulled from random distributions.
+One advantage of this approach is that by changing the distribution, the emphasis
+in the training changes. 
+Initially training can be focused on certain areas of the state space, but later
+training can put the focus on other areas as their importance is recognized.
+In the case that we don't know which data areas to investigate, it is possible to
+optimize over a given dataset, and the iterate stocks and debris forward
+many periods. 
+If the debris and stocks don't line up well with the initial training dataset, 
+we can change the distribution to cover the stocks and debris from the iteration,
+thus bootstrapping the distribution of the training set.
+
+\subsubsection{Constellation Operators}
+%Operators
+%   Branched Policy Topology
+%   Individual Value functions
+%   Training Loop
+Although there are multiple operators, the individual policy functions
+show up jointly as the code is currently implemented.
+For this reason, I've implemented each operator's policy function
+as a ``branch'' within a single neural network.
+These branches are configured such that they each recieve the same 
+inputs (stocks and debris), but decisions in each branch are made without reference
+These results are then concatenated together into the final policy vector.
+When training a given operator, the appropriate branch is unfrozen so that operator can train.
+Value functions are implemented as unique neural networks at the constellation operator level,
+much like the operator's bellman residual function.
+
+The training loops take the form of:
+
+For each epoch
+\begin{enumerate}
+    \item generate data
+    \item for each operator
+        \begin{enumerate}
+            \item Unfreeze branch
+            \item Train policy function on data
+            \item Freeze branch
+            \item Train Value function on data
+        \end{enumerate}
+    \item Check termination conditions
+\end{enumerate}
+
+Overall, this allows for each operator's policy and value functions to be approximated
+on it's own bellman residuals, while maintaining a convenient interface.
+
+\subsubsection{Planner}
+%Planner
+%   policy topology
+%   Value function topology
+%   Training loop
+
+The policy function for the Fleet Planner does not require any separate branches,
+although it could if desired for comparison purposes.
+The key point though, is that no parameter freezing is done during training, 
+allowing the repercussions on other constellations to be taken into account.
+Similarly there is a single neural network used to estimate the value function.
+
+The training loops take the form of:
+
+For each epoch
+\begin{enumerate}
+    \item generate data
+        \begin{enumerate}
+            \item Train policy function on data
+            \item Train Value function on data
+        \end{enumerate}
+    \item Check termination conditions
+\end{enumerate}
+
+\subsubsection{Heterogeneous Agents and Nash Equilibria}
+One key question is how to handle the case of heterogeneous agents.
+In the processes outlined above, the heterogeneous agents are simply 
+identified by their position in the state and action vectors and
+then the NN learns how to operate with each of them\footnote{
+    I believe it may be possible to create some classifications of 
+    different heterogeneous agent types that allows for simpler function transfers,
+    but the implementation will take some extensive code design work.
+}.
 
-The most difficult step is creating the euler equations. 
-When working with high dimensioned problems involving differentiation,
-three general computational approaches exist:
-\begin{itemize}
-    \item Using a symbolic library (sympy) or language (mathematica) to create the 
-        euler equations.
-        This has the disadvantage of being (very) slow, but the advantage that
-        for a single problem specification it only needs completed once.
-        It requires taking a matrix inverse, which can easily complicate formulas 
-        and is computationally complex, approximately $O(n^3)$ algorithm.
-    \item Using numerical differentiation (ND). 
-        The primary issue with ND is that errors can grow quite quickly when 
-        performing algebra on numerical derivatives. 
-        This requires tracking how errors can grow and compound within your
-        specific formulation of the problem.
-    \item Using automatic differentiation (AD) to differentiate the computer code 
-        directly.
-        This approach has a few major benefits. 
-        \begin{itemize}
-            \item Precision is high, because you are calcuating symbolic 
-                derivatives of your computer functions.
-            \item ML is heavily dependent on AD, thus the tools are plentiful
-                and tested.
-            \item The coupling of AD and ML lead to a tight integration with
-                the neural network libraries, simplifying the calibration procedure.
-        \end{itemize}
-\end{itemize}
-I have chosen to use the AD to generate a euler equation function, which will
-then be the basis of our objective function.
-
-
-The first step is to construct the intertemporal transition functions 
-(e.g \ref{put_refs_here}).
-%Not sure how much detail to use.
-%I'm debating on describing how it is done.
-These take derivatives of the value function at time $t$ as an input, and output
-derivatives of the value function at time $t+1$.
-Once this function has been finished, it can be combined with the laws of motion
-in an iterated manner to transition between times $t$ and times $t+k$.
-I did so by coding a function that iteratively compose the transition 
-and laws of motion functions, retuning a $k$-period transition function.
-
-The second step is to generate functions that represent the optimality conditions.
-By taking the appropriate derivatives with respect to the laws of motion and
-benefit functions, this can be constructed explicitly.
-Once these two functions are completed, they can be combined to create
-the euler equations, as described in appendix \ref{APX:Derivations:EulerEquations}.
-
-%%% Is it FaFCCs or recursion that allows this to occur?
-%%% I believe both are ways to approach the problem.
-%\paragraph{Functions As First Class Citizens}
-%The key computer science tool that makes this possible is the concept
-%of ``functions as first class citizens'' (FaFCCs).
-%In every computer language there are primitive values that functions
-%operate on. 
-%When a language considers FaFCCs, functions are one of the primitives
-%that functions can operate on.
-%This is how we can get 
-
-%AD in pytorch does not work by FaFCC though, instead constructing a computational graph.
-
-\paragraph{Training}
-
-With the euler equation and resulting objective function in place, 
-standard training approachs can be used to fit the function.
-I plan on using some variation on stochastic gradient descent.
-
-Normally, neural networks are trained on real world data.
-As this is a synthetic model, I am planning on training it on random selections
-from the state space.
-If I can data on how satellites are and have been distributed, I plan on
-selecting from that distribution.
-
-\paragraph{Heterogeneous Agents}
 
-One key question is how to handle the case of heterogeneous agents.
-When the laws of motion depend on other agents' decisions, as is the case 
-described in \ref{SEC:Laws}, intertemporal iteration may
-require knowing the other agents best response function.
-I believe I can model this in the constellation operator's case
-by solving for the policy functions of each class of operator
-simultaneously. 
-I would like to verify this approach as I have not dived into 
-some of the mathemeatics that deeply.
+When the laws of motion depend on other agents' decisions, the opportunity
+for Nash and other game theoretic equilibria to arise.
+One benefit of using neural networks is that they can find standard equilibrium concepts,
+including mixed nash equilibria if configured properly.
+%concerns about nash computability
 
 \subsection{Functional Forms}
-The simpleset functional forms for the model are similar to those in 
-\autocite{RaoRondina2020}, giving:
+The reference functional forms for the model are similar to those 
+given in \autocite{RaoRondina2020}.
 \begin{itemize}
-    \item The per-period benefit function:
+    \item The linear per-period benefit function:
         \begin{align}
-            u^i(\{s^j_t\}, D_t) = \pi s^i_t
+            u^i(S_t, D_t, X_t) = \pi s^i_t -  f \cdot x^i_t 
         \end{align}
-    \item The launch cost function:
+    \item Each constellation's satellite survival function:
         \begin{align}
-            F(x^i_t) =  f \cdot x^i_t 
-        \end{align}
-    \item The satellite destruction rate function:
-        \begin{align}
-            l^i(\{s^j_t\}, D_t) = 1 - e^{- d\cdot D_t - \sum^N_{j=1} h^j s^j_t}
-        \end{align}
-    \item The debris autocatalysis function:
-        \begin{align}
-            g(D_t) = g\cdot D_t
-            \\
-            g > 1
+            R^i(S_t, D_t) = e^{- d\cdot D_t - \sum^N_{j=1} h^j s^j_t}
         \end{align}
 \end{itemize}
 
-\subsection{Existence concerns}
+\subsubsection{Parameter Values}
+%I'm just guessing.
+Currently, I've not found a way to estimate the proper parameters to use,
+and there needs to be a discussion of how to calibrate those parameters.
+So far, my goal is to choose parameters with approximately 
+the correct order of magnitude.
+
+%\subsection{Existence concerns}
 %check matrix inverses etc.
 %
-I am currently working on a plan to guarantee existence of solutions.
-Some of what I want to do is check numerically crucial values and 
-mathematically necessary conditions for existence and uniqueness.
-Unfortunately this is little more than just a plan right now.
+%I am currently working on a plan to guarantee existence of solutions.
+%Some of what I want to do is check numerically crucial values and 
+%mathematically necessary conditions for existence and uniqueness.
+%Unfortunately this is little more than just a plan right now.
 
 \end{document}