Orbits/julia_code/Module/src/Orbits.jl

module Orbits

using Flux,LinearAlgebra,Zygote

include("physical_model.jl")
using .PhysicsModule

include("benefit_functions.jl")
using .BenefitFunctions

include("flux_helpers.jl")
using .NNTools

# Exports


# Code

abstract type GeneralizedLoss end

#=
This struct organizes information about a given constellation operator
Used to provide an interable loss function for training
=#
struct OperatorLoss <: GeneralizedLoss
        #econ model describing operator
        econ_model::EconomicModel
        #Operator's value and policy functions, as well as which parameters the operator can train.
        operator_value_fn::Flux.Chain
        collected_policies::Flux.Chain #this is held by all operators
        operator_policy_params::Flux.Params
        physics::PhysicalModel
end
# overriding function to calculate operator loss
function (operator::OperatorLoss)(
        s::Vector{Float32}
        ,d::Vector{Float32}
)


        #get actions
        a = operator.collected_policies((s,d))

        #get updated stocks and debris
        s′ = operator.stocks_transition(s ,d ,a)
        d′ = operator.debris_transition(s ,d ,a)


        bellman_residuals = operator.operator_value_fn((s,d)) - operator.econ_model(s,d,a) - operator.econ_model.β * operator.operator_value_fn((s′,d′))

        maximization_condition = - operator.econ_model(s ,d ,a) - operator.econ_model.β * operator.operator_value_fn((s′,d′))

        return Flux.mae(bellman_residuals.^2 ,maximization_condition)
end # struct
function (operator::OperatorLoss)(
        state::State
        ,policy::Flux.Chain #allow for another policy to be subsituted
)


        #get actions
        a = policy((s,d))

        #get updated stocks and debris
        s′ = operator.stocks_transition(s ,d ,a)
        d′ = operator.debris_transition(s ,d ,a)


        bellman_residuals = operator.operator_value_fn((s,d)) - operator.econ_model(s,d,a) - operator.econ_model.β * operator.operator_value_fn((s′,d′))

        maximization_condition = - operator.econ_model(s ,d ,a) - operator.econ_model.β * operator.operator_value_fn((s′,d′))

        return Flux.mae(bellman_residuals.^2 ,maximization_condition)
end #function

#=
Describe the Planners loss function
=#
struct PlannerLoss <: GeneralizedLoss
        #=
        Ideally, with just a well formed PlannerLoss and the training functions below, we should be able to train the approximation.

        There is an issue with appropriately training the value functions.
        In this case, it is not happening...
        =#
        β::Float32

        operators::Array{GeneralizedLoss}

        policy::Flux.Chain
        policy_params::Flux.Params
        value::Flux.Chain
        value_params::Flux.Params

        physical_model::PhysicalModel
end
function (planner::PlannerLoss)(
        state::State
)
        a = planner.policy((s ,d))
        #get updated stocks and debris
        state′ = state_transition(s ,d ,a)


                #calculate the total benefit from each of the models
        benefit = sum([ co.econ_model(s ,d ,a) for co in planner.operators])
                #issue here with mutating. Maybe generators/list comprehensions?


        bellman_residuals = planner.value((s,d)) - benefit - planner.β .* planner.value((s′,d′))

        maximization_condition = - benefit - planner.β .* planner.value((s′,d′))

        return Flux.mae(bellman_residuals.^2 ,maximization_condition)
end

end # end module