|
|
|
|
@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
module Orbits
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Add exports here
|
|
|
|
|
export State ,state_to_tuple ,state_transition ,PhysicalModel ,BasicPhysics
|
|
|
|
|
|
|
|
|
|
export BranchGenerator, value_function_generator,
|
|
|
|
|
cross_linked_planner_policy_function_generator,
|
|
|
|
|
operator_policy_function_generator
|
|
|
|
|
|
|
|
|
|
# Exports
|
|
|
|
|
export GeneralizedLoss ,OperatorLoss ,PlannerLoss ,UniformDataConstructor,
|
|
|
|
|
LinearModel ,BasicPhysics,
|
|
|
|
|
operator_policy_function_generator ,value_function_generator ,BranchGenerator,
|
|
|
|
|
TupleDuplicator, BranchGenerator
|
|
|
|
|
|
|
|
|
|
using Flux, LinearAlgebra
|
|
|
|
|
using BSON, Zygote
|
|
|
|
|
export LinearModel, BenefitFunction, EconomicModel
|
|
|
|
|
|
|
|
|
|
#==#
|
|
|
|
|
struct State
|
|
|
|
|
stocks::Array{Float32}
|
|
|
|
|
debris::Array{Float32}
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
function state_to_tuple(s::State)
|
|
|
|
|
return (s.stocks ,s.debris)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
abstract type PhysicalModel end
|
|
|
|
|
|
|
|
|
|
struct BasicPhysics <: PhysicalModel
|
|
|
|
|
#rate at which debris hits satellites
|
|
|
|
|
debris_collision_rate::Real
|
|
|
|
|
#rate at which satellites of different constellations collide
|
|
|
|
|
satellite_collision_rates::Matrix{Float64}
|
|
|
|
|
#rate at which debris exits orbits
|
|
|
|
|
decay_rate::Real
|
|
|
|
|
#rate at which satellites
|
|
|
|
|
autocatalysis_rate::Real
|
|
|
|
|
#ratio at which a collision between satellites produced debris
|
|
|
|
|
satellite_collision_debris_ratio::Real
|
|
|
|
|
#Ratio at which launches produce debris
|
|
|
|
|
launch_debris_ratio::Real
|
|
|
|
|
end
|
|
|
|
|
function state_transition(
|
|
|
|
|
physics::BasicPhysics
|
|
|
|
|
,state::State
|
|
|
|
|
,launches::Vector{Float32}
|
|
|
|
|
)
|
|
|
|
|
#=
|
|
|
|
|
Physical Transitions
|
|
|
|
|
=#
|
|
|
|
|
survival_rate = survival(state,physics)
|
|
|
|
|
|
|
|
|
|
# Debris transitions
|
|
|
|
|
|
|
|
|
|
# get changes in debris from natural dynamics
|
|
|
|
|
natural_debris_dynamics = (1 - physics.decay_rate + physics.autocatalysis_rate) * state.debris
|
|
|
|
|
|
|
|
|
|
# get changes in debris from satellite loss
|
|
|
|
|
satellite_loss_debris = physics.satellite_collision_debris_ratio * (1 .- survival_rate)' * state.stocks
|
|
|
|
|
|
|
|
|
|
# get changes in debris from launches
|
|
|
|
|
launch_debris = physics.launch_debris_ratio * sum(launches)
|
|
|
|
|
|
|
|
|
|
# total debris level
|
|
|
|
|
debris′ = natural_debris_dynamics .+ satellite_loss_debris .+ launch_debris
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Stocks Transitions
|
|
|
|
|
stocks′ = (LinearAlgebra.diagm(survival_rate) .- physics.decay_rate)*state.stocks + launches
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return State(stocks′,debris′)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function survival(
|
|
|
|
|
state::State
|
|
|
|
|
,physical_model::BasicPhysics
|
|
|
|
|
)
|
|
|
|
|
return exp.(-(physical_model.satellite_collision_rates .+ physical_model.decay_rate) * state.stocks .- (physical_model.debris_collision_rate * state.debris))
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#=
|
|
|
|
|
Benefit Functions:
|
|
|
|
|
|
|
|
|
|
These represent the benefits that different operators may recieve
|
|
|
|
|
=#
|
|
|
|
|
abstract type EconomicModel end
|
|
|
|
|
|
|
|
|
|
#basic linear model
|
|
|
|
|
struct LinearModel <: EconomicModel
|
|
|
|
|
β::Float32
|
|
|
|
|
payoff_array::Array{Float32}
|
|
|
|
|
policy_costs::Array{Float32}
|
|
|
|
|
end
|
|
|
|
|
function (em::LinearModel)(
|
|
|
|
|
state::State
|
|
|
|
|
,actions::Vector{Float32}
|
|
|
|
|
)
|
|
|
|
|
return em.payoff_array*state.stocks - em.policy_costs*actions
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
#basic CES model
|
|
|
|
|
struct CES <: EconomicModel
|
|
|
|
|
β::Float32
|
|
|
|
|
r::Float32 #elasticity of subsititution
|
|
|
|
|
payoff_array::Array{Float32}
|
|
|
|
|
policy_costs::Array{Float32}
|
|
|
|
|
debris_costs::Array{Float32}
|
|
|
|
|
end
|
|
|
|
|
function (em::CES)(
|
|
|
|
|
state::State
|
|
|
|
|
,actions::Vector{Float32}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#issue here with multiplication
|
|
|
|
|
r1 = em.payoff_array .* (state.stocks.^em.r)
|
|
|
|
|
r2 = - em.debris_costs .* (state.debris.^em.r)
|
|
|
|
|
r3 = - em.policy_costs .* (actions.^em.r)
|
|
|
|
|
return (r1 + r2 + r3) .^ (1/em.r)
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
#basic CRRA
|
|
|
|
|
struct CRRA <: EconomicModel
|
|
|
|
|
β::Float32
|
|
|
|
|
σ::Float32 #elasticity of subsititution
|
|
|
|
|
payoff_array::Array{Float32}
|
|
|
|
|
policy_costs::Array{Float32}
|
|
|
|
|
debris_costs::Array{Float32}
|
|
|
|
|
end
|
|
|
|
|
function (em::CRRA)(
|
|
|
|
|
state::State
|
|
|
|
|
,actions::Vector{Float32}
|
|
|
|
|
)
|
|
|
|
|
#issue here with multiplication
|
|
|
|
|
core = (em.payoff_array*state.stocks - em.debris_costs*state.debris - em.policy_costs*actions).^(1 - em.σ)
|
|
|
|
|
|
|
|
|
|
return (core-1)/(1-em.σ)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#= TupleDuplicator
|
|
|
|
|
This is used to create a tuple of size n with deepcopies of any object x
|
|
|
|
|
=#
|
|
|
|
|
struct TupleDuplicator
|
|
|
|
|
n::Int
|
|
|
|
|
end
|
|
|
|
|
#make tuples consisting of copies of whatever was provided
|
|
|
|
|
function (f::TupleDuplicator)(s::State)
|
|
|
|
|
st = state_to_tuple(s)
|
|
|
|
|
return f(st)
|
|
|
|
|
#BROKEN: Fails in test, but works in REPL.
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
(f::TupleDuplicator)(x) = tuple([deepcopy(x) for i=1:f.n]...)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct BranchGenerator
|
|
|
|
|
n::UInt
|
|
|
|
|
end
|
|
|
|
|
function (b::BranchGenerator)(branch::Flux.Parallel,join_fn::Function)
|
|
|
|
|
# used to deepcopy the branches and duplicate the inputs in the returned chain
|
|
|
|
|
f = TupleDuplicator(b.n)
|
|
|
|
|
|
|
|
|
|
return Flux.Chain(state_to_tuple,f,Flux.Parallel(join_fn,f(branch)...))
|
|
|
|
|
#note that it destructures the state to a tuple, duplicates,
|
|
|
|
|
# and then passes to the parallelized functions.
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Neural Network Generators
|
|
|
|
|
|
|
|
|
|
function value_function_generator(number_params=32)
|
|
|
|
|
return Flux.Chain(
|
|
|
|
|
Flux.Parallel(vcat
|
|
|
|
|
#parallel joins together stocks and debris, after a little bit of preprocessing
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_constellations, number_params*2,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params*2, number_params*2,Flux.σ)
|
|
|
|
|
)
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_debris, number_params,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params, number_params,Flux.σ)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
#Apply some transformations to the preprocessed data.
|
|
|
|
|
,Flux.Dense(number_params*3,number_params,Flux.σ)
|
|
|
|
|
,Flux.Dense(number_params,number_params,Flux.σ)
|
|
|
|
|
,Flux.Dense(number_params,1)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
function cross_linked_planner_policy_function_generator(number_params=32)
|
|
|
|
|
return Flux.Chain(
|
|
|
|
|
Flux.Parallel(vcat
|
|
|
|
|
#parallel joins together stocks and debris
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_constellations, number_params*2,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params*2, number_params*2,Flux.σ)
|
|
|
|
|
)
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_debris, number_params,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params, number_params)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
#Apply some transformations
|
|
|
|
|
,Flux.Dense(number_params*3,number_params,Flux.σ)
|
|
|
|
|
,Flux.Dense(number_params,number_params,Flux.σ)
|
|
|
|
|
,Flux.Dense(number_params,N_constellations,Flux.relu)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
function operator_policy_function_generator(
|
|
|
|
|
N_constellations::Int
|
|
|
|
|
,N_debris
|
|
|
|
|
,number_params=32
|
|
|
|
|
)
|
|
|
|
|
return Flux.Chain(
|
|
|
|
|
Flux.Parallel(vcat
|
|
|
|
|
#parallel joins together stocks and debris
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_constellations, number_params*2,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params*2, number_params*2,Flux.tanh)
|
|
|
|
|
)
|
|
|
|
|
,Flux.Chain(
|
|
|
|
|
Flux.Dense(N_debris, number_params,Flux.relu)
|
|
|
|
|
,Flux.Dense(number_params, number_params)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
#Apply some transformations
|
|
|
|
|
,Flux.Dense(number_params*3,number_params,Flux.tanh)
|
|
|
|
|
,Flux.Dense(number_params,number_params,Flux.tanh)
|
|
|
|
|
,Flux.Dense(number_params,1,x -> Flux.relu.(sinh.(x)))
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
abstract type GeneralizedLoss end
|
|
|
|
|
|
|
|
|
|
#=
|
|
|
|
|
This struct organizes information about a given constellation operator
|
|
|
|
|
Used to provide an interable loss function for training
|
|
|
|
|
=#
|
|
|
|
|
struct OperatorLoss <: GeneralizedLoss
|
|
|
|
|
#econ model describing operator
|
|
|
|
|
econ_model::EconomicModel
|
|
|
|
|
#Operator's value and policy functions, as well as which parameters the operator can train.
|
|
|
|
|
operator_value_fn::Flux.Chain
|
|
|
|
|
collected_policies::Flux.Chain #this is held by all operators
|
|
|
|
|
operator_policy_params::Flux.Params
|
|
|
|
|
physics::PhysicalModel
|
|
|
|
|
end
|
|
|
|
|
# overriding function to calculate operator loss
|
|
|
|
|
function (operator::OperatorLoss)(
|
|
|
|
|
state::State
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#get actions
|
|
|
|
|
a = operator.collected_policies((s,d))
|
|
|
|
|
|
|
|
|
|
#get updated stocks and debris
|
|
|
|
|
state′ = state_transition(operator.physics,state,a)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bellman_residuals = operator.operator_value_fn(state) - operator.econ_model(state,a) - operator.econ_model.β * operator.operator_value_fn(state′)
|
|
|
|
|
|
|
|
|
|
maximization_condition = - operator.econ_model(state ,a) - operator.econ_model.β * operator.operator_value_fn((state′))
|
|
|
|
|
|
|
|
|
|
return Flux.mae(bellman_residuals.^2 ,maximization_condition)
|
|
|
|
|
end # struct
|
|
|
|
|
function (operator::OperatorLoss)(
|
|
|
|
|
state::State
|
|
|
|
|
,policy::Flux.Chain #allow for another policy to be subsituted
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#get actions
|
|
|
|
|
a = policy(state)
|
|
|
|
|
|
|
|
|
|
#get updated stocks and debris
|
|
|
|
|
state′ = stake_transition(state,a)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bellman_residuals = operator.operator_value_fn(state) - operator.econ_model(state,a) - operator.econ_model.β * operator.operator_value_fn(state′)
|
|
|
|
|
|
|
|
|
|
maximization_condition = - operator.econ_model(state,a) - operator.econ_model.β * operator.operator_value_fn(state′)
|
|
|
|
|
|
|
|
|
|
return Flux.mae(bellman_residuals.^2 ,maximization_condition)
|
|
|
|
|
end #function
|
|
|
|
|
|
|
|
|
|
function train_operators!(op::OperatorLoss, data::State, opt)
|
|
|
|
|
Flux.train!(op, op.operator_policy_params, data, opt)
|
|
|
|
|
Flux.train!(op, Flux.params(op.operator_value_fn), data, opt)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#=
|
|
|
|
|
Describe the Planners loss function
|
|
|
|
|
=#
|
|
|
|
|
struct PlannerLoss <: GeneralizedLoss
|
|
|
|
|
#=
|
|
|
|
|
Ideally, with just a well formed PlannerLoss and the training functions below, we should be able to train the approximation.
|
|
|
|
|
|
|
|
|
|
There is an issue with appropriately training the value functions.
|
|
|
|
|
In this case, it is not happening...
|
|
|
|
|
=#
|
|
|
|
|
β::Float32
|
|
|
|
|
|
|
|
|
|
operators::Array{GeneralizedLoss}
|
|
|
|
|
|
|
|
|
|
policy::Flux.Chain
|
|
|
|
|
policy_params::Flux.Params
|
|
|
|
|
value::Flux.Chain
|
|
|
|
|
value_params::Flux.Params
|
|
|
|
|
|
|
|
|
|
physical_model::PhysicalModel
|
|
|
|
|
end
|
|
|
|
|
function (planner::PlannerLoss)(
|
|
|
|
|
state::State
|
|
|
|
|
)
|
|
|
|
|
a = planner.policy((s ,d))
|
|
|
|
|
#get updated stocks and debris
|
|
|
|
|
state′ = state_transition(s ,d ,a)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#calculate the total benefit from each of the models
|
|
|
|
|
benefit = sum([ co.econ_model(s ,d ,a) for co in planner.operators])
|
|
|
|
|
#issue here with mutating. Maybe generators/list comprehensions?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bellman_residuals = planner.value((s,d)) - benefit - planner.β .* planner.value((s′,d′))
|
|
|
|
|
|
|
|
|
|
maximization_condition = - benefit - planner.β .* planner.value((s′,d′))
|
|
|
|
|
|
|
|
|
|
return Flux.mae(bellman_residuals.^2 ,maximization_condition)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
function train_planner!(pl::PlannerLoss, N_epoch::Int, opt)
|
|
|
|
|
errors = []
|
|
|
|
|
for i = 1:N_epoch
|
|
|
|
|
data = data_gen()
|
|
|
|
|
Flux.train!(pl, pl.policy_params, data, opt)
|
|
|
|
|
Flux.train!(pl, pl.value_params, data, opt)
|
|
|
|
|
append!(errors, error(pl, data1) / 200)
|
|
|
|
|
end
|
|
|
|
|
return errors
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function train_operators!(pl::PlannerLoss, N_epoch::Int, opt)
|
|
|
|
|
errors = []
|
|
|
|
|
for i = 1:N_epoch
|
|
|
|
|
data = data_gen()
|
|
|
|
|
for op in pl.operators
|
|
|
|
|
train_operators!(op,data,opt)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
return errors
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Construct and manage data
|
|
|
|
|
abstract type DataConstructor end
|
|
|
|
|
|
|
|
|
|
struct UniformDataConstructor <: DataConstructor
|
|
|
|
|
N::UInt64
|
|
|
|
|
satellites_bottom::Float32
|
|
|
|
|
satellites_top::Float32
|
|
|
|
|
debris_bottom::Float32
|
|
|
|
|
debris_top::Float32
|
|
|
|
|
end
|
|
|
|
|
function (dc::UniformDataConstructor)()
|
|
|
|
|
#currently ignores the quantity of data it should construct.
|
|
|
|
|
State(
|
|
|
|
|
rand(dc.satellites_bottom:dc.satellites_top, N_constellations)
|
|
|
|
|
, rand(dc.debris_bottom:dc.debris_top, N_debris)
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
end # end module
|