current work, can't get convergence as I'd like

temporaryWork^2
youainti 5 years ago
parent 90a4a56baf
commit d5635622cc

@ -4,70 +4,6 @@ import itertools
import math
import abc
class LinearProfit:
"""
The simplest type of profit function available.
"""
def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):
#track which constellation this is.
self.constellation_number = constellation_number
#parameters describing the agent's situation
self.discount_factor = discount_factor
self.benefit_weights = benefit_weight
self.launch_cost = launch_cost
self.deorbit_cost = deorbit_cost
def __str__(self):
return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)
def period_benefit(self,state,estimand_interface):
return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)
def _period_benefit(self,stocks,debris,choice):
profits = self.benefit_weights @ stocks \
- self.launch_cost * choice[self.constellation_number] #\
#- deorbit_cost @ deorbits[self.constellation_number]
return profits
def period_benefit_jacobian_wrt_states(self, states, estimand_interface):
return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.choices)
def _period_benefit_jacobian_wrt_states(self, stocks, debris, launches):
jac = jacobian(self._period_benefit, (stocks,debris,launches))
return torch.cat((jac[0], jac[1]))
def period_benefit_jacobian_wrt_launches(self, states, estimand_interface):
return self._period_benefit_jacobian_wrt_launches(states.stocks, states.debris, estimand_interface.choices)
def _period_benefit_jacobian_wrt_launches(self,stocks,debris,launches):
jac = jacobian(self._period_benefit, (stocks,debris,launches))
return jac[2]
class States():
"""
This is supposed to capture the state variables of the model, to create a common interface
when passing between functions.
"""
def __init__(self, stocks,debris):
self.stocks = stocks
self.debris = debris
def __str__(self):
return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris)
@property
def values(self):
@property
def number_constellations(self):
return len(self.stocks)
@property
def number_debris_trackers(self):
return len(self.debris)
class EstimandInterface():
"""
@ -194,9 +130,9 @@ class PartialDerivativesOfValueEstimand(torch.nn.Module):
"""
def __init__(self
,batch_size
, number_constellations
, number_states
, layer_size=12):
,number_constellations
,number_states
,layer_size=12):
super().__init__()
self.batch_size = batch_size #used for upscaling
self.number_constellations = number_constellations

@ -3,7 +3,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "ceramic-doctrine",
"id": "french-experiment",
"metadata": {},
"outputs": [],
"source": [
@ -275,7 +275,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "executive-royal",
"id": "suited-nothing",
"metadata": {},
"outputs": [],
"source": [
@ -307,7 +307,7 @@
},
{
"cell_type": "markdown",
"id": "numerical-mexico",
"id": "recognized-story",
"metadata": {},
"source": [
"# Testing\n",
@ -318,7 +318,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "packed-economics",
"id": "smart-association",
"metadata": {},
"outputs": [],
"source": [
@ -331,37 +331,29 @@
},
{
"cell_type": "code",
"execution_count": 4,
"id": "compliant-circle",
"execution_count": 84,
"id": "unsigned-hungary",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[ 1., 30., 11.]],\n",
"\n",
" [[60., 74., 1.]],\n",
"\n",
" [[46., 33., 70.]],\n",
"\n",
" [[42., 29., 32.]],\n",
"\n",
" [[82., 72., 57.]]])"
"torch.Size([5, 1, 3])"
]
},
"execution_count": 4,
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks_and_debris"
"stocks_and_debris.size()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "theoretical-spectrum",
"id": "regulated-conversation",
"metadata": {},
"outputs": [
{
@ -409,7 +401,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "vulnerable-penalty",
"id": "rental-detection",
"metadata": {},
"outputs": [],
"source": [
@ -421,7 +413,7 @@
{
"cell_type": "code",
"execution_count": 30,
"id": "classified-estimate",
"id": "mechanical-joshua",
"metadata": {},
"outputs": [],
"source": [
@ -435,7 +427,7 @@
{
"cell_type": "code",
"execution_count": 31,
"id": "martial-premium",
"id": "charged-request",
"metadata": {},
"outputs": [
{
@ -502,7 +494,7 @@
{
"cell_type": "code",
"execution_count": 45,
"id": "corrected-jewelry",
"id": "perceived-permit",
"metadata": {},
"outputs": [],
"source": [
@ -514,7 +506,7 @@
{
"cell_type": "code",
"execution_count": 53,
"id": "opened-figure",
"id": "atomic-variance",
"metadata": {},
"outputs": [],
"source": [
@ -528,7 +520,7 @@
{
"cell_type": "code",
"execution_count": 74,
"id": "chicken-inspector",
"id": "biological-badge",
"metadata": {},
"outputs": [
{
@ -600,7 +592,7 @@
{
"cell_type": "code",
"execution_count": 78,
"id": "southwest-diamond",
"id": "compliant-johnson",
"metadata": {},
"outputs": [],
"source": [
@ -612,7 +604,7 @@
{
"cell_type": "code",
"execution_count": 81,
"id": "brave-treat",
"id": "alive-potato",
"metadata": {},
"outputs": [],
"source": [
@ -626,7 +618,7 @@
{
"cell_type": "code",
"execution_count": 83,
"id": "functional-render",
"id": "changed-instruction",
"metadata": {},
"outputs": [
{
@ -771,7 +763,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "voluntary-postage",
"id": "proved-amsterdam",
"metadata": {},
"outputs": [],
"source": []

@ -3,216 +3,262 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "departmental-hardware",
"id": "royal-trace",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from torch.autograd.functional import jacobian"
"import combined as c\n",
"import NeuralNetworkSpecifications as nns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "differential-shock",
"execution_count": 2,
"id": "atlantic-finish",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[[8., 5., 3.]],\n",
"\n",
" [[3., 6., 6.]],\n",
"\n",
" [[3., 7., 2.]],\n",
"\n",
" [[4., 8., 2.]],\n",
"\n",
" [[0., 6., 8.]]], grad_fn=<CatBackward>) torch.Size([5, 1, 3])\n"
]
}
],
"source": [
"a = torch.tensor([1,2,3,4.2],requires_grad=False)\n",
"b = torch.tensor([2,2,2,2.0],requires_grad=True)"
"BATCH_SIZE = 5\n",
"STATES = 3\n",
"CONSTELLATIONS = STATES -1 #determined by debris tracking\n",
"MAX = 10\n",
"FEATURES = 1\n",
"\n",
"stocks = torch.randint(MAX,(BATCH_SIZE,1,CONSTELLATIONS), dtype=torch.float32, requires_grad=True)\n",
"debris = torch.randint(MAX,(BATCH_SIZE,1,1), dtype=torch.float32, requires_grad=True)\n",
"\n",
"s = c.States(stocks, debris)\n",
"\n",
"print(s.values,s.values.shape)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "separated-pursuit",
"execution_count": null,
"id": "prostate-liverpool",
"metadata": {},
"outputs": [],
"source": [
"def test(x,y):\n",
" return (x@y)**2"
]
"source": []
},
{
"cell_type": "code",
"execution_count": 17,
"id": "french-trunk",
"execution_count": 3,
"id": "simplified-permission",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(416.1600, grad_fn=<PowBackward0>)"
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[[[1.],\n",
" [0.]],\n",
"\n",
" [[0.],\n",
" [1.]]]]) torch.Size([1, 2, 2, 1])\n",
"tensor([[ 1.0000, 0.0000],\n",
" [ 0.0000, 1.0000],\n",
" [-0.2000, -0.2000]]) torch.Size([3, 2])\n"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test(a,b)"
"#launch_costs = torch.randint(3,(1,CONSTELLATIONS,CONSTELLATIONS,FEATURES), dtype=torch.float32)\n",
"launch_costs = torch.tensor([[[[1.0],[0]],[[0.0],[1]]]])\n",
"print(launch_costs, launch_costs.shape)\n",
"#payoff = torch.randint(5,(STATES,CONSTELLATIONS), dtype=torch.float32)\n",
"payoff = torch.tensor([[1.0, 0],[0,1.0],[-0.2,-0.2]])\n",
"print(payoff, payoff.shape)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "adverse-ceremony",
"execution_count": 4,
"id": "level-angle",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([81.6000, 81.6000, 81.6000, 81.6000]),\n",
" tensor([ 40.8000, 81.6000, 122.4000, 171.3600])),\n",
" tensor([2., 2., 2., 2.], requires_grad=True),\n",
" tensor(416.1600, grad_fn=<PowBackward0>))"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"j = jacobian(test,(a,b))\n",
"j,b,test(a,b)"
"def linear_profit(states, choices):\n",
" #Pay particular attention to the dimensions\n",
" #note that there is an extra dimension in there just ot match that of the profit vector we'll be giving out.\n",
" \n",
" #calculate launch expenses\n",
" \n",
" launch_expense = torch.tensordot(choices,launch_costs, [[-2,-1],[-2,-1]])\n",
"\n",
" #calculate revenue\n",
"\n",
" revenue = torch.tensordot(s.values, payoff, [[-1],[0]])\n",
"\n",
"\n",
" profit = revenue - launch_expense\n",
" return profit"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "lovely-apple",
"execution_count": 5,
"id": "copyrighted-acting",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([-12.8304, -3.9878, 4.8547, 15.4658]),\n",
" tensor([-10.8365, -21.6729, -32.5094, -45.5132])),\n",
" tensor([ 1.1840, 0.3680, -0.4480, -1.4272], grad_fn=<SubBackward0>),\n",
" tensor(29.3573, grad_fn=<PowBackward0>))"
"outputs": [],
"source": [
"policy = nns.ChoiceFunction(BATCH_SIZE\n",
" ,STATES\n",
" ,FEATURES\n",
" ,CONSTELLATIONS\n",
" ,12\n",
" )"
]
},
"execution_count": 58,
{
"cell_type": "markdown",
"id": "casual-career",
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b2 = b - j[1]*b*0.01\n",
"j2 = jacobian(test,(a,b2))\n",
"j2,b2,test(a,b2)"
"example to get profit = 1\n",
"```python\n",
"optimizer = torch.optim.Adam(policy.parameters(),lr=0.001)\n",
"\n",
"for i in range(10000):\n",
" #training loop\n",
" optimizer.zero_grad()\n",
"\n",
" output = policy.forward(s.values)\n",
"\n",
" l = ((1-linear_profit(s.values,output))**2).sum()\n",
"\n",
"\n",
" l.backward()\n",
"\n",
" optimizer.step()\n",
"\n",
" if i%200==0:\n",
" print(l)\n",
" \n",
"\n",
"results = policy.forward(s.values)\n",
"print(results.mean(dim=0), \"\\n\",results.std(dim=0))\n",
"```\n"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "stretch-selection",
"execution_count": 6,
"id": "straight-negative",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([-13.6581, -4.2906, 5.2787, 17.0284]),\n",
" tensor([-11.4119, -22.8239, -34.2358, -47.9301])),\n",
" tensor([ 1.1968, 0.3760, -0.4626, -1.4922], grad_fn=<SubBackward0>),\n",
" tensor(32.5580, grad_fn=<PowBackward0>))"
"tensor([[[0.0000],\n",
" [0.0000]],\n",
"\n",
" [[0.0000],\n",
" [0.0000]],\n",
"\n",
" [[0.0000],\n",
" [0.0000]],\n",
"\n",
" [[0.0000],\n",
" [0.0000]],\n",
"\n",
" [[0.3742],\n",
" [0.0000]]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 63,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b3 = b2 - j2[1]*b2*0.001\n",
"j3 = jacobian(test,(a,b3))\n",
"j3,b3,test(a,b3)"
"output = policy.forward(s.values)\n",
"output"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "colored-visit",
"execution_count": 7,
"id": "independent-deficit",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([-14.5816, -4.6324, 5.7628, 18.8361]),\n",
" tensor([-12.0461, -24.0921, -36.1382, -50.5935])),\n",
" tensor([ 1.2105, 0.3846, -0.4784, -1.5637], grad_fn=<SubBackward0>),\n",
" tensor(36.2769, grad_fn=<PowBackward0>))"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"b4 = b3 - j3[1]*b3*0.001\n",
"j4 = jacobian(test,(a,b4))\n",
"j4,b4,test(a,b4)"
"t = torch.ones_like(output, requires_grad=True)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "familiar-pizza",
"metadata": {},
"execution_count": 57,
"id": "romance-force",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([-15.6173, -5.0205, 6.3191, 20.9424]),\n",
" tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n",
" tensor([ 1.2251, 0.3938, -0.4957, -1.6428], grad_fn=<SubBackward0>),\n",
" tensor(40.6286, grad_fn=<PowBackward0>))"
"ename": "RuntimeError",
"evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-57-efee93d7c257>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m#this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n",
"\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b5 = b4 - j4[1]*b4*0.001\n",
"j5 = jacobian(test,(a,b5))\n",
"j5,b5,test(a,b5)"
"def test_loss(options):\n",
" return torch.autograd.functional.jacobian(linear_profit, (s.values, options))[0].sum()\n",
" #something is off here ^\n",
" #this is where I lose the gradient. This is where I need a gradient so that I can call .backward below\n",
"\n",
"test_loss(output).backward()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "brilliant-squad",
"execution_count": 55,
"id": "asian-death",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"((tensor([-15.6173, -5.0205, 6.3191, 20.9424]),\n",
" tensor([-12.7481, -25.4962, -38.2443, -53.5421])),\n",
" tensor([ 1.2407, 0.4039, -0.5146, -1.7307], grad_fn=<SubBackward0>),\n",
" tensor(45.7605, grad_fn=<PowBackward0>))"
"ename": "RuntimeError",
"evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-55-ac1f78ecd780>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 244\u001b[0m inputs=inputs)\n\u001b[0;32m--> 245\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 246\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 247\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/miniconda3/envs/pytorch-CPU/lib/python3.8/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[1;32m 143\u001b[0m \u001b[0mretain_graph\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 145\u001b[0;31m Variable._execution_engine.run_backward(\n\u001b[0m\u001b[1;32m 146\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 147\u001b[0m allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag\n",
"\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"b6 = b5 - j5[1]*b5*0.001\n",
"j6 = jacobian(test,(a,b5))\n",
"j6,b6,test(a,b6)"
]
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "discrete-engineer",
"id": "prospective-nelson",
"metadata": {},
"outputs": [],
"source": []

@ -3,7 +3,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "similar-ebony",
"id": "geographic-wilderness",
"metadata": {},
"outputs": [],
"source": [
@ -15,22 +15,22 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "spread-hygiene",
"id": "major-glucose",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[[6., 1., 0.]],\n",
"tensor([[[4., 9., 6.]],\n",
"\n",
" [[2., 0., 4.]],\n",
" [[0., 5., 4.]],\n",
"\n",
" [[7., 6., 9.]],\n",
" [[3., 1., 9.]],\n",
"\n",
" [[3., 6., 9.]],\n",
" [[6., 4., 8.]],\n",
"\n",
" [[9., 1., 2.]]])\n"
" [[8., 7., 6.]]], grad_fn=<CatBackward>)\n"
]
}
],
@ -39,8 +39,8 @@
"constellations = states -1 #determined by debris tracking\n",
"max_start_state = 10\n",
"\n",
"stocks = torch.randint(max_start_state,(batch_size,1,constellations),dtype=torch.float32)\n",
"debris = torch.randint(max_start_state,(batch_size,1,1),dtype=torch.float32)\n",
"stocks = torch.randint(max_start_state,(batch_size,1,constellations), dtype=torch.float32, requires_grad=True)\n",
"debris = torch.randint(max_start_state,(batch_size,1,1), dtype=torch.float32, requires_grad=True)\n",
"\n",
"s = c.States(stocks, debris)\n",
"\n",
@ -50,13 +50,13 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "attended-making",
"id": "recognized-ability",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([6.3344e-07, 4.6190e-07])"
"tensor([5.6433e-07, 6.7631e-07], grad_fn=<MulBackward0>)"
]
},
"execution_count": 3,
@ -65,14 +65,14 @@
}
],
"source": [
"constellation_collision_risk = 1e-6 * torch.rand(constellations)\n",
"constellation_collision_risk = 1e-6 * torch.rand(constellations, requires_grad=True)\n",
"constellation_collision_risk"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "strategic-american",
"id": "elect-float",
"metadata": {},
"outputs": [],
"source": [
@ -80,14 +80,14 @@
"launch_debris = 0.05\n",
"debris_autocatalysis_rate = 1.4\n",
"\n",
"benefit_weight0 = torch.tensor([1.0,-0.02])\n",
"benefit_weight1 = torch.tensor([0.0,1.0])"
"benefit_weight0 = torch.tensor([1.0,-0.02], requires_grad=True)\n",
"benefit_weight1 = torch.tensor([0.0,1.0], requires_grad=True)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "hired-consent",
"id": "compressed-individual",
"metadata": {},
"outputs": [],
"source": [
@ -102,122 +102,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "copyrighted-tackle",
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "__init__() missing 1 required positional argument: 'launch_cost'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-6-54bb8ddad0e2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mlaunch_cost\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m ea0 = c.LinearProfit(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;31m#constellation index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0;36m0.95\u001b[0m \u001b[0;31m#discount\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m,\u001b[0m\u001b[0mbenefit_weight0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mTypeError\u001b[0m: __init__() missing 1 required positional argument: 'launch_cost'"
]
}
],
"source": [
"launch_cost = 5\n",
"ea0 = c.LinearProfit(\n",
" 0 #constellation index\n",
" ,0.95 #discount\n",
" ,benefit_weight0\n",
" ,launch_cost #launch_cost\n",
" )\n",
"ea1 = c.LinearProfit(\n",
" 1 #constellation index\n",
" ,0.95 #discount\n",
" ,benefit_weight1\n",
" ,launch_cost #launch_cost\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "accepted-namibia",
"metadata": {},
"outputs": [],
"source": [
"enn = nns.EstimandNN(batch_size\n",
" ,states\n",
" ,choices\n",
" ,constellations\n",
" ,12)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "revolutionary-eight",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[0.0000],\n",
" [0.0000]],\n",
"\n",
" [[0.0000],\n",
" [0.0021]],\n",
"\n",
" [[0.1109],\n",
" [0.0835]],\n",
"\n",
" [[0.0884],\n",
" [0.1051]],\n",
"\n",
" [[0.0000],\n",
" [0.0000]]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"launch_decisions = enn.forward(s.values).choices\n",
"launch_decisions"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "abroad-mobile",
"metadata": {},
"outputs": [],
"source": [
"w = torch.tensor([[1.0,0],[0,-0.2]])\n",
"ww = torch.tensor([1.0, -0.2])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "seasonal-companion",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([5, 1, 2])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.size()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "jewish-zoning",
"id": "tracked-bachelor",
"metadata": {},
"outputs": [],
"source": [
@ -240,10 +125,6 @@
" self.benefit_weights = benefit_weights\n",
" self.launch_cost = launch_cost\n",
" self.deorbit_cost = deorbit_cost\n",
"\n",
" def __str__(self):\n",
" return \"LinearProfit\\n Benefit weights:\\t{}\\n launch cost:\\t{}\\n Deorbit cost:\\t{}\".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)\n",
"\n",
" \n",
" def _period_benefit(self,stocks,debris,launches):\n",
" # multiply benefits times stocks\n",
@ -260,177 +141,380 @@
},
{
"cell_type": "code",
"execution_count": 17,
"id": "surgical-diversity",
"execution_count": 7,
"id": "confidential-philippines",
"metadata": {},
"outputs": [],
"source": [
"def test(stocks,launches):\n",
" # multiply benefits times stocks\n",
" # sum across constellations\n",
" # reshape to standard dimensions\n",
" # subtract launch costs. \n",
" profit = torch.tensordot(ww,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n",
" return profit.view(batch_size,1)"
"launch_cost = 5\n",
"ea0 = LinearProfit(\n",
" batch_size\n",
" ,0 #constellation index\n",
" ,0.95 #discount\n",
" ,benefit_weight0\n",
" ,launch_cost #launch_cost\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "western-sixth",
"execution_count": 8,
"id": "religious-georgia",
"metadata": {},
"outputs": [],
"source": [
"t = LinearProfit(batch_size #batch_size\n",
" ,0 #constellation index\n",
" ,0.95 #discount\n",
" ,benefit_weight0\n",
" ,launch_cost #launch_cost\n",
" )"
"enn = nns.EstimandNN(batch_size\n",
" ,states\n",
" ,choices\n",
" ,constellations\n",
" ,12)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "conscious-debut",
"execution_count": 9,
"id": "painful-republican",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<__main__.LinearProfit at 0x7f0664fad4c0>"
"tensor([[[0.],\n",
" [0.]],\n",
"\n",
" [[0.],\n",
" [0.]],\n",
"\n",
" [[0.],\n",
" [0.]],\n",
"\n",
" [[0.],\n",
" [0.]],\n",
"\n",
" [[0.],\n",
" [0.]]], grad_fn=<ReluBackward0>)"
]
},
"execution_count": 19,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t"
"launch_decisions = enn.forward(s.values).choices\n",
"launch_decisions"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "eight-cheat",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[5.8800],\n",
" [1.9600],\n",
" [6.3054],\n",
" [2.4978],\n",
" [8.8200]], grad_fn=<ViewBackward>)"
]
},
"execution_count": 20,
"execution_count": 10,
"id": "equal-raising",
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"t._period_benefit(s.stocks,s.debris,launch_decisions)"
"def test(stocks,launches):\n",
" # multiply benefits times stocks\n",
" # sum across constellations\n",
" # reshape to standard dimensions\n",
" # subtract launch costs. \n",
" profit = torch.tensordot(benefit_weight0,stocks, [[0],[1]])[:,0] - (launch_cost * launch_decisions)[:,0,0]\n",
" return profit.view(batch_size,1)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "juvenile-barcelona",
"metadata": {
"tags": []
},
"execution_count": 11,
"id": "divine-editor",
"metadata": {},
"outputs": [],
"source": [
"def f(stocks, debris, launches):\n",
" return torch.autograd.functional.jacobian(t._period_benefit\n",
" ,(stocks,debris,launches)\n",
" ,create_graph=True\n",
" )\n",
"def ff(stocks, debris, launches):\n",
" return torch.autograd.functional.jacobian(f\n",
" ,(stocks,debris,launches)\n",
" ,create_graph=True\n",
"t = LinearProfit(batch_size #batch_size\n",
" ,0 #constellation index\n",
" ,0.95 #discount\n",
" ,benefit_weight0\n",
" ,launch_cost #launch_cost\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "freelance-publicity",
"execution_count": 12,
"id": "japanese-captain",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([5, 1, 5, 1, 2, 5, 1, 2])"
"tensor([[3.9200],\n",
" [0.0000],\n",
" [2.9400],\n",
" [5.8800],\n",
" [7.8400]], grad_fn=<ViewBackward>)"
]
},
"execution_count": 30,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
"source": [
"test(stocks,launch_decisions)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "vocational-operator",
"execution_count": 13,
"id": "determined-difference",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[[6.0000, 1.0000],\n",
" [2.0000, 0.0000],\n",
" [7.0000, 6.0000],\n",
" [3.0000, 6.0000],\n",
" [9.0000, 1.0000]],\n",
"\n",
" [[4.8000, 0.8000],\n",
" [1.6000, 0.0000],\n",
" [5.6000, 4.8000],\n",
" [2.4000, 4.8000],\n",
" [7.2000, 0.8000]]])"
"tensor([[3.9200],\n",
" [0.0000],\n",
" [2.9400],\n",
" [5.8800],\n",
" [7.8400]], grad_fn=<ViewBackward>)"
]
},
"execution_count": 55,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torch.tensordot(torch.tensor([[1.0,-0.2],[0,1]]),stocks, [[0],[1]])"
"t._period_benefit(s.stocks,s.debris,launch_decisions)"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "nuclear-alberta",
"execution_count": 14,
"id": "tribal-least",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([5, 1, 2])"
"(tensor([[[[[0.9800, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.0000, 0.0000]],\n",
" \n",
" [[0.9800, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.9800, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.9800, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.0000, 0.0000]],\n",
" \n",
" [[0.9800, 0.0000]]]]], grad_fn=<ViewBackward>),\n",
" tensor([[[[[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]],\n",
" \n",
" [[0.]]]]]),\n",
" tensor([[[[[-5.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-5.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-5.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-5.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]]]],\n",
" \n",
" \n",
" \n",
" [[[[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-0.],\n",
" [ 0.]],\n",
" \n",
" [[-5.],\n",
" [ 0.]]]]]))"
]
},
"execution_count": 48,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"stocks.shape"
"#in this case, the debris isn't tracked because it isn't included, and launch_decisions has a similar issue.\n",
"torch.autograd.functional.jacobian(t._period_benefit, (s.stocks,s.debris,launch_decisions), create_graph=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "immune-machinery",
"id": "statutory-lyric",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "naked-health",
"metadata": {},
"outputs": [],
"source": []

@ -173,9 +173,15 @@ class States():
def __str__(self):
return "stocks\t{} \ndebris\t {}".format(self.stocks,self.debris)
@property
def values(self):
#return these as a single tensor.
return torch.cat((self.stocks,self.debris), dim=-1)
@property
def number_constellations(self):
return len(self.stocks)
@property
def number_debris_trackers(self):
return len(self.debris)
@ -277,10 +283,16 @@ class LinearProfit(EconomicAgent):
"""
The simplest type of profit function available.
"""
def __init__(self, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0):
def __init__(self, batch_size, constellation_number, discount_factor, benefit_weight, launch_cost, deorbit_cost=0, ):
self.batch_size = batch_size
#track which constellation this is.
self.constellation_number = constellation_number
#get the number of constellations (pull from the benefit weight, in the dimension that counts across constellations)
self.number_of_constellations = benefit_weight.size()[1]
#parameters describing the agent's situation
self.discount_factor = discount_factor
self.benefit_weights = benefit_weight
@ -291,13 +303,14 @@ class LinearProfit(EconomicAgent):
return "LinearProfit\n Benefit weights:\t{}\n launch cost:\t{}\n Deorbit cost:\t{}".format(self.benefit_weights, self.launch_cost, self.deorbit_cost)
def period_benefit(self,state,estimand_interface):
return self._period_benefit(state.stocks, state.debris, estimand_interface.launches)
return self._period_benefit(state.stocks, state.debris, estimand_interface.choices)
def _period_benefit(self,stocks,debris,launches):
profits = self.benefit_weights @ stocks \
- self.launch_cost * launches[self.constellation_number] #\
#- deorbit_cost @ deorbits[self.constellation_number]
return profits
# multiply benefits times stocks
# sum across constellations
# reshape to standard dimensions
# subtract launch costs.
pass
def period_benefit_jacobian_wrt_states(self, states, estimand_interface):
return self._period_benefit_jacobian_wrt_states(states.stocks, states.debris, estimand_interface.launches)

Loading…
Cancel
Save