Merge branch 'rewrite_section'

2 years ago · 52a88bcd61
parent b4c9052fd1 9238da8d6a
commit 52a88bcd61
19 changed files with 1394 additions and 219 deletions
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit c25565274403e454f03bdb2d7f72cf108a0db213
+Subproject commit d25f5c2a0e672c361937e8c3b490a575714b8ec1
--- a/Latex/Paper/Main.tex
+++ b/Latex/Paper/Main.tex
@ -12,6 +12,7 @@
 \input{../assets/preambles/GeneralPreamble}

 \usepackage{float}
+\usepackage{csquotes}


 %setup paragraph level indexing
@ -23,7 +24,8 @@
 \titlespacing*{\paragraph}
 {0pt}{3.25ex plus 1ex minus .2ex}{1.5ex plus .2ex}

-\title{The effects of market conditions on enrollment and completion of clinical trials\\ \small{Preliminary Draft}}
+\title{The effects of market conditions and enrollment on the 
+completion of clinical trials\\ \small{Preliminary Draft}}
 \author{William King}

 \usepackage{multirow}
@ -43,11 +45,14 @@
 \section{Introduction}\label{SEC:Introduction}
 %---------------------------------------------------------------

-\subfile{sections/01_introduction}
-%---------------------------------------------------------------
-%\section{Literature Review}\label{SEC:LiteratureReview}
-%---------------------------------------------------------------
-\subfile{sections/05_LitReview}
+\subfile{sections/11_intro_and_lit}
+% \subfile{sections/01_introduction}
+% %---------------------------------------------------------------
+% \section{Literature Review}\label{SEC:LiteratureReview}
+% %---------------------------------------------------------------
+% \subfile{sections/05_LitReview}
+\section{Clincal Trial Background}\label{SEC:ClinicalTrials}
+\subfile{sections/12_clinical_trial_background}

 The paper proceeds as follows. 
 Then section \ref{SEC:data} covers the data sources and the proposed
@ -63,10 +68,10 @@ Section \ref{SEC:Results} discusses the results of the analysis.
 \subfile{sections/10_CausalStory}
 \subfile{sections/02_data}

-% %---------------------------------------------------------------
-% \section{Causal Identification}\label{SEC:CausalIdentification}
-% %---------------------------------------------------------------
-% \subfile{sections/03_CausalIdentification}
+%---------------------------------------------------------------
+\section{Causal Identification}\label{SEC:CausalIdentification}
+%---------------------------------------------------------------
+\subfile{sections/03_CausalIdentification}

 %---------------------------------------------------------------
 \section{Econometric Model}\label{SEC:EconometricModel}
--- a/Latex/Paper/jmp_layout.kdl
+++ b/Latex/Paper/jmp_layout.kdl
@ -0,0 +1,84 @@
+layout {
+    tab name="Main and Compile" cwd="~/research/PhD_Deliverables/jmp/Latex/Paper/" hide_floating_panes=true focus=true {
+    // This tab is where I manage main from. 
+    // it opens up Main.txt for my JMP, opens the pdf in okular (in a floating tab), and then get's ready to build the pdf.
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+        pane split_direction="vertical" {
+            pane edit="Main.tex" focus=true // This is the editor
+
+            pane split_direction="horizontal" {
+
+                // this is the compilation window
+                pane size="60%" command="compiletex" {
+                    args "Main.tex"
+                    start_suspended true
+                }
+
+                // This is the ls of sections
+                pane size="35%" command="ls"{
+                    args "sections/"
+                }
+            }
+        }
+        floating_panes {
+            // here is where I run okular from, it is auto hidden
+            pane command="okular" { 
+                args "Main.pdf" 
+            }
+        }
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+
+    tab name="sections" cwd="~/research/PhD_Deliverables/jmp/Latex/Paper/sections/" {
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+        pane split_direction="vertical" {
+            pane
+            pane stacked=true {
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+            }
+        }
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+
+    tab name="git" cwd="~/research/PhD_Deliverables/jmp/Latex/Paper/" {
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+
+        pane split_direction="vertical" {
+            pane split_direction="horizontal" {
+                pane command="watch" {
+                    args "--color" "git status"
+                    // requires `git config --global color.status always` to be set
+                }
+                pane size="30%" {
+                    focus true
+                }   
+            }
+
+            pane command="git" {
+                args "log" "-n 10" "--all" "--oneline" "--graph" "--stat" "--decorate"
+            }
+        }
+
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+}
--- a/Latex/Paper/jmp_layout_laptop.kdl
+++ b/Latex/Paper/jmp_layout_laptop.kdl
@ -0,0 +1,84 @@
+layout {
+    tab name="Main and Compile" cwd="~/research/phd_deliverables/jmp/Latex/Paper" hide_floating_panes=true focus=true {
+    // This tab is where I manage main from. 
+    // it opens up Main.txt for my JMP, opens the pdf in okular (in a floating tab), and then get's ready to build the pdf.
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+        pane split_direction="vertical" {
+            pane edit="./Main.tex" focus=true // This is the editor
+
+            pane split_direction="horizontal" {
+
+                // this is the compilation window
+                pane size="60%" command="comlatex.sh" {
+                    args "Main.tex"
+                    start_suspended true
+                }
+
+                // This is the ls of sections
+                pane size="35%" command="ls"{
+                    args "sections/"
+                }
+            }
+        }
+        floating_panes {
+            // here is where I run okular from, it is auto hidden
+            pane command="okular" { 
+                args "Main.pdf" 
+            }
+        }
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+
+    tab name="sections" cwd="~/research/phd_deliverables/jmp/Latex/Paper/sections" {
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+        pane split_direction="vertical" {
+            pane
+            pane stacked=true {
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+                pane
+            }
+        }
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+
+    tab name="git" cwd="~/research/phd_deliverables/jmp/Latex/Paper/" {
+        pane size=1 borderless=true {
+            plugin location="tab-bar"
+        }
+
+        pane split_direction="vertical" {
+            pane split_direction="horizontal" {
+                pane command="watch" {
+                    args "--color" "git status"
+                    // requires `git config --global color.status always` to be set
+                }
+                pane size="30%" {
+                    focus true
+                }   
+            }
+
+            pane command="git" {
+                args "log" "-n 10" "--all" "--oneline" "--graph" "--stat" "--decorate"
+            }
+        }
+
+        pane size=2 borderless=true {
+            plugin location="status-bar"
+        }
+    }
+}
--- a/Latex/Paper/outliin4.txt
+++ b/Latex/Paper/outliin4.txt
@ -0,0 +1,18 @@
+NEXT STEPS IN WRITING
+
+- insert a description of the general approach I use: 
+    - predicting, based on snapshots, the likelihood of termination.
+    - this needs to go between the description of the snapshots and the 
+    causal inference introduction.
+- Then I can use what I've written about the graph, and follow up with more information about the data.
+
+Overall this would look like
+
+- [x] Introduction of the question and general issues of confoundedness.
+- [x] Clinical Trials Data Sources
+- [x] Explain basic econometric modelling approach
+- [ ] Then explain the graph, nodes, and confoundedness in more detail
+- [ ] Then go over the rest of the data.
+- [ ] Finally
+    - Discuss the number of datapoints.
+    - review major challenges to causal identification. (no enrollment model small data size)
--- a/Latex/Paper/outline.txt
+++ b/Latex/Paper/outline.txt
@ -0,0 +1,34 @@
+Outlining for jmp
+<intro>
+Introduction and problem statement
+*Explain what I am doing:*
+</intro>
+<literature
+Describe what has been done
+- measuring failure rates & impact
+Introduce different types of failure
+- Scientific
+- Strategic
+- Operational
+Efforts to measure failures
+Medbio story to illuistrate failure modes.
+Operational and strategic failures undermine scientific process of discovery
+*My effort is to separate...*: place my work in context
+Introduce clinical trials' progressions, stages, and statuses.
+</literature>
+<causal model>
+Derive causal model
+</causal model>
+<data>
+Summarize data sources
+</data>
+<econometrics>
+Introduce econometric model
+</econmetrics>
+<results>
+Discuss econometric results
+</results>
+Conclusion
+Appendicies
+- in-depth data source info
+- More econometric results
--- a/Latex/Paper/outline2.txt
+++ b/Latex/Paper/outline2.txt
@ -0,0 +1,58 @@
+In 19xx the United States Food and Drug Administration (FDA) was created to "QUOTE".
+As of Sept 2022 \todo{Check Date} they have approved 6,602 currently-marketed compounds with Structured Product Labels (SPL) 
+and 10,983 previously-marketed SPLs. 
+%from nsde table. Get number of unique application_nubmers_or_citations with most recent end date as null.
+In 2007, they began requiring that drug developers register and publish clinical trials on \url{https://clinicaltrials.gov}.
+This provides a public mechanism where clinical trial sponsors are responsible to explain
+what they are trying to acheive and how it will be measured, as well as provide the public the ability to
+search and find trials that they might enroll in.
+Data such as this has become part of multiple datasets 
+(e.g. the Cortellis Investigational Drugs dataset or the AACT dataset from the Clinical Trials Transformation Intiative) 
+used to evaluate what drugs might be entering the market soon.
+This brings up a question: can we use this public data on clinical trials to describe what effects their success or failure?
+In this work, I use updates to records on \url{https://ClinicalTrials.gov} to disentangle 
+the effect of participant enrollment and drugs on the market affect the success or failure of clinical trials.
+
+%Describe how clinical trials fit into the drug development landscape and how they proceed
+Clinical trials are a required part of drug development.
+Not only does the FDA require that a series of clinical trials demonstrate sufficient safety and efficacy of
+a novel pharmaceutical compound or device, producers of derivative medicines may be required to ensure that
+their generic small molecule compound -- such as ibuprofen or levothyroxine -- matches the
+performance of the originiator drug if delivery or dosage is changed.
+For large molecule generics (termed biosimilars) such as Adalimumab
+(Brand name Humira, with biosimilars Abrilada, Amjevita, Cyltezo, Hadlima, Hulio,
+Hyrimoz, Idacio, Simlandi, Yuflyma, and Yusimry),
+the biosimilars are required to prove they have similar efficacy and safety to the
+reference drug.
+
+When registering a clinical trial,
+the investigators are required to 
+% discuss how these are registered and what data is published.
+% Include image and discuss stages
+% Discuss challenges faced
+
+% Introduce my work
+
+In the world of drug development, these trials are classified into different phases of development.
+Pre-clinical studies may include 
+Phase I trials are the first attempt to evaluate safety and efficacy in humans, and usually \todo{Describe trial phases, get citation}
+Phase II trials typically \todo{}
+A Phase III trial is the final trial befor approval by the FDA
+Phase IV trials are used after approval to ensure safety and efficacy in the general populace ....
+
+In the economics literature, most of the focus has been on evaluating how drug candidates transition between
+different phases and then on to approval.
+
+% Now begin introducing work by Chris Adams
+% Lead into lit review
+
+
+% Causality
+
+% Data
+
+% Economic Model
+
+% Results
+
+% Conclusion
--- a/Latex/Paper/outline3.txt
+++ b/Latex/Paper/outline3.txt
@ -0,0 +1,42 @@
+How do I begin work on stuff
+
+- next step is causal story. key points include
+    - we are trying to separate strategic and operational concerns. (why is this a difficult problem?)
+        - we can't trust what we are told
+        - terminations could be due to safety, strategic, or operational concerns.
+    - explaining confounding between 
+        - population/market and enrollment.
+        -population/market and market conditions.
+        - market conditions and enrollment. 
+    - describe other confounders
+        - safety and effectiveness
+        - duration <--> enrollment/termination
+        - Condition 
+        - Decision to procede with Phase III trial
+    - How do I handle this?
+- Introduce Do-Calculus
+    - DAG model
+    - What do I need to control for, in some form or other?
+        CURRENTLY HERE:
+- Introduce Data
+    - Clinical Trial Progression
+        - AACT gives us information on
+            - terminated/completed status
+            - compound-indication pairs
+            - MeSH/RxNorm links
+        - Snapshots
+    - Market Conditions
+        - can't directly measure alternate treatments/standards of care.
+        - Can get measures of USP - formulary alternatives
+        - Can get number of generics or brand names with same drug.
+    - Population Sizes
+        - IHME Global Burden of Disease dataset. Best measure of impact of a given disease category.
+            - DALY's
+    - How much data do I have?
+- Econometric model
+    - for a given state, what is the probability it will terminate?
+        - more accurately for my dist-diff analysis: for a given state, what is the distribution of the probabilities it will terminate?
+    - basic bernoulli-logistic model, linear in parameters.
+    - What are the specific things I am looking at?
+        - number of competing treatments.
+        - delaying close of enrollment.
--- a/Latex/Paper/sections/01_introduction.tex
+++ b/Latex/Paper/sections/01_introduction.tex
@ -3,11 +3,8 @@

 \begin{document}

-Developing new, effective pharmaceutical compounds is a fundamentally 
-difficult task.
-Starting with challenges identifying promising treatment targets and potential 
-compounds to ensuring the drug can be properly delivered within the body, the 
-scientific work that needs to succeede is massive.
+Developing novel, safe, and effective pharmaceutical compounds is difficult.
+Starting with challenges identifying promising treatment targets and potential compounds, to ensuring the drug can be properly delivered within the body, the scientific work that needs to go well is massive.
 The regulatory and market conditions in which they exist add to this difficulty. 
 For example, regulations are designed to reduce the number of drugs released 
 to market with significan issues, such as in the case of VIOXX 
@ -20,6 +17,11 @@ increase the costs of developing new drugs, adding to the business concerns
 already present, including competitors already in the market or close to 
 entering and the overall demand to address a given condition.

+This work is the first that endeavors to separate the causal effect
+of an operational concern (participant enrollment) from that of strategic 
+concerns (market size and competitors in the market) 
+on individual clinical trials. 
+
 %begin discussing failures
 %I am thinking I'll discuss marketing and operational failures
 %I somehow need to step away from the drug development framing and soften it to 
@ -34,41 +36,10 @@ Similarly
 ascribe failues of Phase III trials to issues with safety, 
 efficacy, or other (buisness) concerns.

+Understanding both why and how the development of drugs fail -- for both 
+novel and derivative pharmaceuticals -- is key to ensuring that both innovation
+and availability are maximized.

-% The only one most ameniable to being targeted by policy 
-% is those ``other concerns''.
-Although decisions to continue drug development are driven 
-by long term profit analyses,
-pharmaceutical companies face short term operational challenges
-which can impede the development process. 
-Some operational reasons given for why a trial was stopped include: 
-\begin{itemize}
-    \item Organizational challenges (Principle Investigator left institution, 
-        changes in research focus, staff shortages)
-    \item Troubles with recruitment, (accural to slow/low, difficulty locating 
-        qualified participants, etc).
-    \item Changes in standards of care.
-    \item Sponsor withdraws support or provides insufficient financial support,
-        e.g Funding runs out.
-    \item Beginning or end of a pandemic.
-\end{itemize}
-% As an example, while a drug may have few competitors and 
-% strong evidence of safety, difficulties recruiting trial participants may 
-% prevent the clinical trials process from being completed successfully.
-Thus being able to isolate the effect of specific operational challenges from 
-strategic decisions allows us to more accurately predict the intended or 
-unintended effects of a given policy on clinical trials.

-In this work, I focus on separating the effects of enrollment and 
-competing drugs on clinical trial completion, specifically Phase III trials.
-To do this, I create a dataset extracted from 
-\url{ClinicalTrials.gov} 
-that tracks individual clinical trials as they progress towards completion. 
-I also introduce a novel causal model of individual clinical trial progression.
-Unlike previous research which generally focuses on the drug development 
-pipeline through multiple phases, I restrict my investigation to modelling 
-individual clinical trials.
-This restriction provides a way to separate the impact of different operational
-changes, specifically enrollment troubles and changes in the market.

 \end{document}
--- a/Latex/Paper/sections/02_data.tex
+++ b/Latex/Paper/sections/02_data.tex
@ -3,6 +3,7 @@

 \begin{document}
 In the sections below, I examine each source of data, their key features,
+how they match with the variables in the Structural Model DAG,
 and describe applicable terminology (\cref{datasources}).
 I then discuss how these sources were tied together (\cref{datalinks}) and 
 describe the specific data used in the analysis (\cref{dataintegration}).
--- a/Latex/Paper/sections/03_CausalIdentification.tex
+++ b/Latex/Paper/sections/03_CausalIdentification.tex
@ -2,81 +2,78 @@
 \graphicspath{{\subfix{Assets/img/}}}

 \begin{document}
+% % Introduce clinicaltrials.gov
+% % - Describe different statuses 
+% % - status flowchart
+% % Introduce causal model
+% % - Diagram
+% % - List each node and what they influence (and why)
+% % Begin Discussing Data
+% % - Where did I get data for each node?
+%
+% When any clinical trial is conducted, it goes through three distinct stages:
+% pre-trial, active, and decision to conclude.
+% In figure \ref{Fig:Stages}, you can see the component parts of each stage. 
+%
+% \begin{figure}[H] %use [H] to fix the figure here.
+%     \includegraphics[width=\textwidth]{../assets/img/ClinicalTrialStagesAndStatuses}
+%     \caption{Model of Statuses}
+%     \label{Fig:Stages}
+% \end{figure}
+%
+% In the pre-trial stage, the sponsoring organization chooses to run the trial,
+% they register the trial on  \url{ClinicalTrials.gov}, and then decide if they
+% will begin enrollment. 
+% Many registered trials are withdrawn at this point, before the trial has opened
+% for enrollment. 
+% Once enrollment has opened 

-Because running experiments on companies running clinical trials is not going
-to happen anytime soon, causal identification will depend on creating a 
-structural causal model.
-In \cref{Fig:CausalModel} I diagram the directed acyclic graph that describes
-the data generating model.
-The proposed data generating model consists of a decision maker, the study 
-sponsor, who must decide whether to let a trial run to completion or terminate
+
+
+Because running randomized experiments on companies running clinical trials 
+is unlikely to to happen anytime soon, 
+causal identification will depend on observational methods. 
+I use the do-calculus approach developed by Judea Pearl
+\cite{pearl_CausalityModels_2009}
+to describe what affects the success of a Phase III clinical trial. 
+I then use that model to derive the econometric model capable of estimating
+the effect of extending the recruiting period or of having an additional 
+competing drug.
+
+
+
+% In \cref{Fig:CausalModel} I diagram the directed acyclic graph that describes
+% the data generating model.
+The proposed data generating model consists of a decision maker
+-- the study sponsor --
+who must decide whether to let a trial run to completion or terminate
 the trial early. 
-While receiving updates regarding the status of the trial, they ask questions
-such as:
+While receiving updates regarding the status of the trial, they try to 
+answer questions such as:
 \begin{itemize}
    \item Do I need to terminate the trial due to safety incidents?
-    \item Does it appear that the drug is effective enough to achieve our 
-        goals, justifying continuing the trial?
+    \item Does it appear that the drug is effective?
    \item Are we recruiting enough participants to achive the statistical
        results we need?
-    \item Does the current market conditions and expectations about returns on 
+    \item Does the current market conditions and expectations about 
+        returns on 
        investment justify the expenditures we are making?
 \end{itemize}
-When appropriate, the study sponsor terminates the trial.
-If there are not enough issues to terminate the trial, it continues until it 
-is completed.
-
-While conducting a trial, the safety and efficacy of a drug are driven by
-fundamental pharmacokinetic properties of the compounds. 
-These are only imperfectly measured both prior to and during any given trial.
-Previously measured safety and efficacy inform the decision to start the trial
-in the first place while currently observed safety and efficiency results
-help the sponsor judge whether or not to continue the trial.
-Of course, these decisions are both affected by the specific condition being
-treated due to differences in the severity of the symptoms.
+Althought I treat this as a single agent, in reality, there are multiple 
+stakeholders involved in chosing whether the trial should continue, including
+those running the trial (which may be a separate firm), 
+the company developing the drug, additional rightsholders, 
+or funding organizations.

-When a trial has been started, it comes time to recruit participancts.
-Participants frequently depend on the advice of their physician when deciding 
-to join a trial or not. 
-As these physicians have a duty to seek their patients best interest; they, along
-with their patients will evaluate if the previously observed safety and efficacy
-results justify joining the trial over using current standard treatments.
-Thus the current market conditions may affect the rate at which participants 
-enroll in the trial.
+% When appropriate, the study sponsor terminates the trial.
+% If there are not enough issues to terminate the trial, it continues until it 
+% is completed.

-The enrollment of participants in a trial depends on a few other factors.
-The condition or disease of interest and how it progresses will determine how long
-recruitiment will be held open versus just an observation of treatment arms.
-Aditionally, a trial that has already reached a high enough enrollment will often
-close recruitment by switching to an "Active, not recruiting" stage to manage costs.
-Finally, enrolling participants depends on how difficult it is to find people 
-who suffer from the condition of interest.
+In the United States, clinical trials are required by law to be registered on 
+\url{ClinicalTrials.gov}, where they are made available to the public. 
+Trials must be registered 

-The preceeding issue of population size also affects the number of alternatives available.
-When there are less people affected by the disease, the smaller market reduces 
-possible profitability, all else equal.
-Thus the likelihood of companies paying the sunk costs to develop drugs for
-these conditions may be lower.
-Finally, the number of alternatives on the market may affect the return on
-investment directly, causing a trial to terminate early if the return is
-not high enough.
-
-\begin{figure}[H] %use [H] to fix the figure here.
-    \scalebox{0.6}{\tikzfig{../assets/tikzit/CausalGraph2}}
-    \caption{Causal Model}
-    \label{Fig:CausalModel}
-\end{figure}
 % 
-By using Judea Pearl's do-calculus, I can show that by choosing an adjustment 
-set of the decision to condut a phase III trial, the condition of interest, 
-the current status of the trial, and the population size will casually
-identify the direct effects of enrollment and market alternatives on the
-probability of termination.
-This is easily verified through the backdoor criterion, which states that
-if every path between the exposure and outcome that starts with an arrow 
-flowing into the exposure is blocked by one of the values in the adjustment
-set, then the effect of the exposure on outcome is causally identified
-(\cite{pearl_causality_2000}).
-It can be easily visually verified by the DAG on the graph that this is the case.
+

 \end{document}
--- a/Latex/Paper/sections/05_LitReview.tex
+++ b/Latex/Paper/sections/05_LitReview.tex
@ -3,22 +3,26 @@

 \begin{document}

-%%%%%%%%%%%%%%%% What do we know about clinical trials?
-\subsection{What do we know about clinical trials and their success rates?}
-
-Most studies of clinical trials attempt to model only those trials 
-which are involved in the drug approval process. 
-For example, 
-
-% Hwang, Carpenter, Lauffenburger, et al (2016)
-% - Why do investigational new drugs fail during late stage trials?
-\cite{hwang_failure_2016} 
-investigated causes for which late stage (Phase III)
-clinical trials fail across the USA, Europe, Japan, Canada, and Australia. 
-They found that for late stage trials that did not go on to recieve approval,
-57\% failed on efficacy grounds, 17\% failed on safety grounds, and 22\% failed
-on commercial or other grounds.
+% Outline
+% - Introduce and frame problem
+%   - Phases & regulatory part
+%   - Large number of failures at each phase
+%   - There are multiple ways to measure this
+%   - Estimation of failures at phase and failures per development path
+%   - Talk about impact of making these closer together
+% - Trying to develop more by tweaking external world:
+%   - Pull incentives 
+%   - Increase in market sizes.
+%   - Uncertanty in Intellectual Property
+% - Understanding failure modes
+%   - EK and Hwang
+%   - discuss missing section of operational concerns
+%   - Introduce metabio
+% - Once again bring up my work here.
+%   - 
+%   - 

+\subsection{Drug development process and failure rates}
 % Abrantes-Metz, Adams, Metz (2004)
 % - What correlates with successfully passing clinical trials and FDA review?
 % - 
@ -32,35 +36,32 @@ They found that as trials last longer, the rate of failure increases for
 Phase I and II trials, while Phase 3 trials generally have a higher rate of 
 success than failure after 91 months.

+%DiMasi FeldmanSeckler Wilson 2009
+\cite{dimasi_TrendsRisks_2010} examine the completion rate of clinical drug 
+develompent and find that for the 50 largest drug producers, 
+approximately X\% of their drugs under development
+\todo{FILL IN X}
+successfully completed the process. 
+They note a couple of changes in how drugs are developed over the years they 
+study (clinical development started between 1993 and 2004). 
+This included that drugs began to fail earlier in their development cycle in the 
+latter half of the time they studied. 
+This may be an operational change to reduce the cost of new drugs. 

-\cite{hay_ClinicalDevelopment_2014} tracks clinical trials based on 
-the number of indications studied. 
-They find that, for given indication, only 10.4\% of all novel drug development paths
-studied in a phase I trial are ultimately approved by the FDA.
+\cite{dimasi_ValueImproving_2002}
+used data on 68 investigational drugs from 10 firms to simulate how reducing
+time in development reduces the costs of developing drugs. 
+He estimates that reducing Phase III of clinical trials by one year would 
+reduce total costs by about 8.9\% and that moving 5\% of clinical trial failures
+from phase III to Phase II would reduce out of pocket costs by 5.6\%. 

-\cite{wong_EstimationClinical_2019} 
-estimate the probability of completing a given phase, conditional on starting a previous phase.
-In doing so, they found that 13.8\% of all drug development programs 
-completed successfully. % slightly higherothers have found\cite{hay_ClinicalDevelopment_2014}.
-One cause of this may be that they considered that a single drug might 
-be tested for multiple indications.

+% Waring, Arrosmith, Leach, et al (2015)
+% - Atrition of drug candidates from four major pharma companies
+% - Looked at how phisicochemical properties affected clinical failure due to safety issues
+% Don't think this is applicable.

-% Ekaterina Khmelnitskaya (2021)
-% - separates scientific from market failure of the clinical drug pipeline
-%In her doctoral dissertation, Ekaterina Khmelnitskaya
-\cite{khmelnitskaya_CompetitionAttrition_2021} approaches a slightly 
-different problem. 
-She created a multistage model to track the transition of 
-drug candidates between clinical trial phases. 
-Her key contribution was to find ways to disentangle strategic exits where 
-firms remove novel from the development pipeline and 
-exits due to scientific failures 
-(where safety and efficacy did not prove sufficient).
-She estimates that overall 8.4\% of all pipeline exits are due to strategic 
-terminations and that the rate of new drug production would be about 23\% 
-higher if those strategic terminatations were elimintated.
-
+\subsection{Market incentives and drug development}
 %%%%%%%%% What do we know about drug development incentives?
 \subsection{What do we know about drug development incentives?}
 % Introduce section
@ -73,6 +74,18 @@ of Medicare part D, they found that most development occurred in the least
 novel categories of drugs, in spite of a relatively constant growth in novel 
 compounds. 

+\cite{dranove_DoesConsumer_2022} use the implementation of Medicare part D 
+to examine whether the production of novel or follow up drugs increases during 
+the following 15 years. 
+They find that when Medicare part D was implemented -- increasing senior 
+citizens' ability to pay for drugs -- there was a (delayed) increase 
+in drug development, with effects concentrated among compounds that were least
+innovative according to their classification of innovations.
+They suggest that this is due to financial risk management, as novel 
+pharmaceuticals have a higher probability of failure compared to the less novel
+follow up development. 
+This is what leads risk-adverse companies to prefer follow up development.
+


 %   -  acemoglu and linn 2004 - population size matters.
@ -80,7 +93,7 @@ compounds.
 %   - In general, there are going to be many confounding variables.
 % - 
 % - Exogenous demographic trends has a large impact on the entry of non-generic drugs and new molecular entitites.
-On the side of market analysis, %TODO:remove when other sections are written up.
+On the side of market analysis, 
 \citeauthor{acemoglu_market_2004} 
 (\citeyear{acemoglu_market_2004})
 used exogenous deomographics changes to show that the
@ -142,11 +155,10 @@ in expected revenue.

 % Gupta
 % - Inperfect intellectual property rights in the pharmaceutical industry
-\cite{gupta_OneProduct_2020} 
-describes the impact that imperfect intellectual property rights have in the 
-the market for pharmaceuticals. 
-She describes how overlapping and ambiguous patent rights increase the time 
-to entry of generic drugs by about 3 years.
+\cite{gupta_OneProduct_2020} discovered that uncertainty around which patents
+might apply to a novel drug causes a delay in the entry of generics after 
+the primary patent has expired. 
+She found that this delay in delivery is around 3 years. 


 \subsection{What do we know about how Clinical Trials operations?}
@ -175,4 +187,91 @@ trials, I learned about how clinical trials will typically proceed.

 % Thus we should look at the effects that operational concerns have.

+\subsection{Understanding Failures in Drug Development}
+
+% DISCUSS: Different types of failures
+There are myriad of reasons that a drug candidate may not make it to market, 
+regardless of it's novelty or known safety.
+In this work, I focus on the failure of individual clinical trials, but the 
+categories of failure apply to the individual trials as well as the entire
+drug development pipeline.
+They generally fall into one of the following categories:
+\begin{itemize}
+    \item Scientific  Failure: When there are issues regarding 
+        safety and efficacy that must be addressed. 
+        The preeminient question is: 
+        ``Will the drug work for patients?''
+        %E.Khm, Gupta, etc.
+    \item Strategic Failure: When the sponsors stop development because of 
+        profitability
+        %Whether or not the drug will be profitiable, or align with
+        %the drug developer's future Research \& Development directions i.e.
+        ``Will producing the drug be beneficial to the 
+        company in the long term?''
+        %E.Khm, Gupta, GLP-1s, etc.
+    \item Operational concerns are answers to: 
+        %Whether or not the developer can successfully conduct
+        %operations to meet scientific or strategic goals, i.e. 
+        ``What has prevented the the company from being able to 
+        finance, develop, produce, and market the drug?''
+\end{itemize}
+It is likely that a drug fails to complete the development cycle due to some
+combination of these factors. 
+
+
+%USE MetaBio/CalBio GLP-1 story to illuistrate these different factors.
+\cite{flier_DrugDevelopment_2024} documents the case of MetaBio, a company
+he was involved in founding that was in the first stages of
+developing a GLP-1 based drug for diabetes or obesety before being shut down
+in . 
+MetaBio was a wholy owned subsidiary of CalBio, a metabolic drug development 
+firm, that recieved a \$30 million -- 5 year investment from Pfizer to 
+persue development of GLP-1 based therapies. 
+At the time it was shut down, it faced a few challenges:
+\begin{itemize}
+    \item The compound had a short half life and they were seeking methods to 
+        improve it's effectiveness; a scientific failure.
+    \item Pfizer imposed a requirement that it be delivered though a route
+        other than injection (the known delivery mechanism); a strategic failure. 
+    \item When Pfizer pulled the plug, CalBio closed MetaBio because they 
+        could not find other funding sources; an operational failure.
+\end{itemize}
+
+The author states in his conclusion:
+\begin{displayquote}
+    Despite every possibility of success, 
+    MetaBio went down because there were mistaken ideas about what was 
+    possible and what was not in the realm of metabolic therapeutics, and 
+    because proper corporate structure and adequate capital are always 
+    issues when attempting to survive predictable setbacks.
+\end{displayquote} 
+
+From this we see that there was a cascade of issues leading to the failure to 
+develop this novel drug. 
+
+% NOW discuss efforts to measure the impact of different aspects
+
+\citeauthor{hwang_failure_2016} (\citeyear{hwang_failure_2016}) 
+investigated causes for which late stage (Phase III)
+clinical trials fail across the USA, Europe, Japan, Canada, and Australia. 
+They found that for late stage trials that did not go on to recieve approval,
+57\% failed on efficacy grounds, 17\% failed on safety grounds, and 22\% failed
+on commercial or other grounds.
+
+
+In her doctoral dissertation, Ekaterina Khmelnitskaya studied the transition of
+drug candidates between clinical trial phases. 
+Her key contribution was to find ways to disentangle strategic exits from the 
+development pipeline and exits due to clinical failures.
+She found that overall 8.4\% of all pipeline exits are due to strategic 
+terminations and that the rate of new drug production would be about 23\% 
+higher if those strategic terminatations were elimintated
+(\cite{khmelnitskaya_competition_2021}).
+% causal separation of strategic exits etc.
+
+% I don't think I need to include modelling enrollment here. 
+% If it is applicable, it can show up in those sections later.
+
+
+
 \end{document}
--- a/Latex/Paper/sections/10_CausalStory.tex
+++ b/Latex/Paper/sections/10_CausalStory.tex
@ -3,14 +3,154 @@

 \begin{document}

+% Begin by talking about goal, what does it mean? This might need some work prior to give more background.
+As I am trying to separate strategic concerns 
+(the effect of a marginal treatment methodology) 
+and an operational concern 
+(the effect of a delay in closing enrollment), 
+we need to look at what confounds these effects and how we might measure them.
+
+The primary effects one might expect to see are that
+\begin{enumerate}
+    \item Adding more drugs to the market will make it harder to 
+        finish a trial as it is
+        more likely to be terminated due to concerns about profitabilty.
+    \item Adding more drugs will make it harder to recruit, slowing enrollment.
+    \item Enrollment challenges increase the likelihood that a trial will 
+        terminate.
+    % Mentioned below
+    % \item A large population/market will tends to have more drugs to treat it 
+    %     because it is more profitable. 
+    % \item A large population/market will make it easier to recruit, 
+    %     reducing the likelihood of a termination due to enrollment failure.
+\end{enumerate}
+
+There are a few fundamental issues that arise when trying to estimate 
+these effects.
+The first is that the severity of the disease and the size of the population 
+who has that disease affects the ease of enrolling participants. 
+For example, a large population may make it easier to find enough participants
+to achieve the required statistical discrimination between 
+control and treatment.
+Second, for some diseases there exists an endogenous dynamic 
+between the treatments available for a disease and the 
+market size/population with that disease. 
+\authorcite{cerda_EndogenousInnovations_2007} proposes two mechanisms
+that link the drugs on the market and market size. 
+The inverse is that for many chronic diseases with high mortality rates, 
+more drugs cause better survivability, increasing the size of those markets.
+The third major confound is that the drugs on the market affect enrollment. 
+If there is a treatment already on the market, patients or their doctors
+may be less inclined to participate in the trial, even if the current treatment
+has severe downsides. 
+
+There are additional problems. 
+One is in that the disease being treated affects the 
+safety and efficacy standards that the drug will be held too. 
+For example, if a particular cancer is very deadly and does not respond well
+to current treatments, Phase I trials will enroll patients with that cancer, 
+as opposed to the standard of enrolling healthy volunteers 
+\cite{commissioner_DrugDevelopment_2020} to establish safe dosages.
+The trial is more likely to be terminated early if the drug is unsafe or has no
+discernabile effect, therefore termination depends in part on a compound-disease 
+interaction.
+Another challenge comes from the interaction between duration and termination;
+in that if a trial terminates before closing enrollment for issues other 
+than enrollment, then the enrollment will still be low. 
+On the other hand, if enrollment is low, the trial might terminate.
+These outcomes are indistinguishable in the data provided by the final 
+\url{ClinicalTrials.gov} dataset.
+
+Finally, while conducting a trial, the safety and efficacy of a drug are driven by
+fundamental pharmacokinetic properties of the compounds. 
+These are only imperfectly measured both prior to and during any given trial.
+Previously measured safety and efficacy inform the decision to start the trial
+in the first place while currently observed safety and efficiency results
+help the sponsor judge whether or not to continue the trial.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\subsection{Clinical Trials Data Sources}
+%% Describe data here
+Since Sep 27th, 2007 those who conduct clinical trials of FDA controlled 
+drugs or devices on human subjects must register 
+their trial at \url{ClinicalTrials.gov}
+(\cite{noauthor_fdaaa_nodate}).
+This involves submitting information on the expected enrollment and duration of
+trials, drugs or devices that will be used, treatment protocols and study arms, 
+as well as contact information the trial sponsor and treatment sites.
+
+When starting a new trial, the required information must be submitted 
+``\dots not later than 21 calendar days after enrolling the first human subject\dots''.
+After the initial submission, the data is briefly reviewed for quality and 
+then the trial record is published and the trial is assigned a 
+National Clinical Trial (NCT) identifier.
+\cite{noauthor_fdaaa_nodate}.
+
+Each trial's record is updated periodically, including a final update that must occur 
+within a year of completing the primary objective, although exceptions are
+available for trials related to drug approvals or for trials with secondary
+objectives that require further observation\footnote{This rule came into effect in 2017}
+\cite{noauthor_fdaaa_nodate}.
+Other than the requirements for the the first and last submissions, all other
+updates occur at the discresion of the trial sponsor.
+Because the ClinicalTrials.gov website serves as a central point of information
+on which trials are active or recruting for a given condition or drug,
+most trials are updated multiple times during their progression.
+
+There are two primary ways to access data about clinical trials.
+The first is to search individual trials on ClinicalTrials.gov with a web browser.
+This web portal shows the current information about the trial and provides 
+access to snapshots of previously submitted information.
+Together, these features fulfill most of the needs of those seeking 
+to join a clinical trial.
+For this project I've been able to scrape these historical records to establish
+snapshots of the records provided.
+%include screenshots?
+The second way to access the data is through a normalized database setup by
+the 
+\href{https://aact.ctti-clinicaltrials.org/}{Clinical Trials Transformation Initiative}
+called AACT. %TODO: Get CITATION
+The AACT database is available as a PostgreSQL database dump or set of 
+flat-files. 
+These dumps match a near-current version of the ClinicalTrials.gov database.
+This format is ameniable to large scale analysis, but does not contain 
+information about the past state of trials.
+I combined these two sources, using the AACT dataset to select 
+trials of interest and then scraping \url{ClinicalTrials.gov} to get 
+a timeline of each trial.
+
+%%%%%%%%%%%%%%%%%%%%%%%% Model Outline
+
+The way I use this data is to predict the final status of the trial 
+from the snapshots that were taken, in effect asking:
+``how does the probability of a termination change from the current state 
+of the trial if X changes?''
+
+%% Return to causal identification
+\subsection{Causal Identification}
+
 Because running experiments on companies running clinical trials is not going
-to happen anytime soon, causal identification will depend on creating a 
+to happen anytime soon, causal identification depends on using a 
 structural causal model.
+Because the data generating process for the clinical trials records is rather 
+straightforward, this is an ideal place to use
+\authorcite{pearl_causality_2000}
+Do-Calculus.
+This process involves describing the data generating process in the form of 
+a directed acyclic graph, where the nodes represent different variables
+within the causal model and the directed edges (arrows) represent
+assumptions about which variables influence the other variables. 
+There are a few algorithms that then tell the researcher which of the 
+relationships will be confounded, which ones can be statistically estimated, 
+and provides some hypotheses that can be tested to ensure the model is 
+reasonably correct.
+
+
 In \cref{Fig:CausalModel} I diagram the directed acyclic graph that describes
-the data generating model.
-The proposed data generating model consists of a decision maker, the study 
-sponsor, who must decide whether to let a trial run to completion or terminate
-the trial early. 
+my proposed data generating process,  
+It revolves around the decisions made by the study sponsor, 
+who must decide whether to let a trial run to completion 
+or terminate the trial early. 
 While receiving updates regarding the status of the trial, they ask questions
 such as:
 \begin{itemize}
@ -18,65 +158,227 @@ such as:
    \item Does it appear that the drug is effective enough to achieve our 
        goals, justifying continuing the trial?
    \item Are we recruiting enough participants to achive the statistical
-        results we need?
+        results we need in the budget we have?
    \item Does the current market conditions and expectations about returns on 
        investment justify the expenditures we are making?
 \end{itemize}
-When appropriate, the study sponsor terminates the trial.
-If there are not enough issues to terminate the trial, it continues until it 
-is completed.
-
-While conducting a trial, the safety and efficacy of a drug are driven by
-fundamental pharmacokinetic properties of the compounds. 
-These are only imperfectly measured both prior to and during any given trial.
-Previously measured safety and efficacy inform the decision to start the trial
-in the first place while currently observed safety and efficiency results
-help the sponsor judge whether or not to continue the trial.
-Of course, these decisions are both affected by the specific condition being
-treated due to differences in the severity of the symptoms.
-
-When a trial has been started, it comes time to recruit participancts.
-Participants frequently depend on the advice of their physician when deciding 
-to join a trial or not. 
-As these physicians have a duty to seek their patients best interest; they, along
-with their patients will evaluate if the previously observed safety and efficacy
-results justify joining the trial over using current standard treatments.
-Thus the current market conditions may affect the rate at which participants 
-enroll in the trial.
-
-The enrollment of participants in a trial depends on a few other factors.
-The condition or disease of interest and how it progresses will determine how long
-recruitiment will be held open versus just an observation of treatment arms.
-Aditionally, a trial that has already reached a high enough enrollment will often
-close recruitment by switching to an "Active, not recruiting" stage to manage costs.
-Finally, enrolling participants depends on how difficult it is to find people 
-who suffer from the condition of interest.
-
-The preceeding issue of population size also affects the number of alternatives available.
-When there are less people affected by the disease, the smaller market reduces 
-possible profitability, all else equal.
-Thus the likelihood of companies paying the sunk costs to develop drugs for
-these conditions may be lower.
-Finally, the number of alternatives on the market may affect the return on
-investment directly, causing a trial to terminate early if the return is
-not high enough.
+When appropriate issues arise, the study sponsor terminates the trial, otherwise
+it continues to completion.

 \begin{figure}[H] %use [H] to fix the figure here.
-    \scalebox{0.6}{\tikzfig{../assets/tikzit/CausalGraph2}}
-    \caption{Causal Model}
+    \frame{
+    \scalebox{0.65}{
+             \tikzfig{../assets/tikzit/CausalGraph2}
+    }
+    }
+    \todo{check if this is the correct graph}
+    \caption{Graphical Causal Model}
+    
+    % \small{Crimson boxes are the variables of interest, 
+    % white boxes are unobserved, while the gray boxes will be controlled for.}
    \label{Fig:CausalModel}
 \end{figure}
-% 
-By using Judea Pearl's do-calculus, I can show that by choosing an adjustment 
-set of the decision to condut a phase III trial, the condition of interest, 
-the current status of the trial, and the population size will casually
-identify the direct effects of enrollment and market alternatives on the
-probability of termination.
-This is easily verified through the backdoor criterion, which states that
-if every path between the exposure and outcome that starts with an arrow 
-flowing into the exposure is blocked by one of the values in the adjustment
-set, then the effect of the exposure on outcome is causally identified
-(\cite{pearl_causality_2000}).
-It can be easily visually verified by the DAG on the graph that this is the case.

+
+% Constructing the model more explicitly
+% - quickly describe each node and line.
+\todo{I think I need to blend the data section in before this, to give some overall information on data.}
+\todo{I may need to add some information on snapshots so that this makes sense.}
+
+A quick summary of the nodes of the DAG, the exact representation in the data, and their impact: 
+\begin{itemize}
+    \item Main Interests (Crimson Boxes)
+        \begin{enumerate}
+            \item \texttt{Will Terminate?}: 
+                If the final status of the trial was \textit{terminated} 
+                and comes from the AACT dataset.
+                or \textit{completed}.
+            \item \texttt{Enrollment Status}: 
+                    This describes the current enrollment status of the snapshot, e.g. 
+                    \texttt{Recruiting},
+                    \texttt{Enrolling by invitation only},
+                    or
+                    \texttt{Active, not recruting}.
+            \item \texttt{Market Measures}: 
+                Various measures of the number of alternate drugs on the market.
+                These are either the number of other drugs with the same active ingredient as the trial
+                (both generic and originators),
+                and those considered alternatives in various formularies published by the United States Pharmacopeia.
+        \end{enumerate}
+    \item Observed Confounders (Gray Boxes)
+        \begin{enumerate}
+            \item \texttt{Condition}: 
+                The underlying condition, classified by IDC-10 group. 
+                This impacts every other aspect of the model and is pulled from
+                the AACT dataset.
+            \item \texttt{Population (market size)}: 
+                Multiple measures of the impact the disease.
+                These are measured by the DALY cost of the disease, and is 
+                separated by the impact on countries with
+                High, High-Medium, Medium, Medium-Low, and Low 
+                development scores.
+                This data comes from the Institute for Health Metrics' Global Burden of Disease study.
+            \item \texttt{Elapsed Duration}: 
+                A normalized measure of the time elapsed in the trial. 
+                Comes from the original estimate of the trial's primary completion date and the registered start date. 
+                I take the difference in days between these, and get the percentage of that time that has elapsed.
+                This calculation is based on data from the snapshots and the 
+                AACT final results.
+            \item \texttt{Decision to Proceed with Phase III}: 
+                If the compound development has progressed to Phase III.
+                This is included in the analysis by only including 
+                Phase III trials registered in the AACT dataset.
+        \end{enumerate}
+    \item Unobserved Confounders (White Boxes)
+        \begin{enumerate}
+            \item \texttt{Fundamental Efficacy and Safety}:
+                The underlying safety of the compound. 
+                Cannot be observed, only estimated through scientific study.
+            \item \texttt{Previously observed Efficacy and Safety}: 
+                The information gathered in previous studies. 
+                This is not available in my dataset because I don't 
+                have links to prior studies.
+            \item \texttt{Currently observed Efficiency and Safety}:
+                The information gathered during this study.
+                This is only partially available, and so is 
+                treated as unavailable. 
+                After a study is over, the investigators are 
+                often publish information about adverse events, but only
+                those that meet a certain threshold.
+                As this information doesn't appear to be provided to 
+                participants, we don't consider it.
+        \end{enumerate}
+\end{itemize}
+
+%
+
+\begin{itemize}
+    \item Relationships of interest
+        \begin{enumerate}
+            \item \texttt{Enrollment Status} $\rightarrow$ \texttt{Will Terminate?}:
+                This is the primary effect of interest.
+            \item \texttt{Market Measures} $\rightarrow$ \texttt{Will Terminate?}:
+                This is the secondary effect of interest.
+        \end{enumerate}
+    \item Confounding Pathways
+        \begin{enumerate}
+            \item 
+                \texttt{Condition}: 
+                Affects every other node. 
+                Part of the Adjustment Set.
+            \item Backdoor Pathway 
+                between \texttt{Will Terminate?} and 
+                \texttt{Enrollment Status} through safety and efficiency.
+                The concern is that since previously learned information 
+                and current information are driven by the same underlying 
+                physical reality, the enrollment process and 
+                termination decisions may be correlated.
+                Controlling for the decision to proceed with the trial is the 
+                best adjustment available to block this confounding pathway.
+                Below I describe the exact pathways.
+                \begin{enumerate}
+                    \item 
+                        \texttt{Fundamental Efficacy and Safety} 
+                        $\rightarrow$ 
+                        \texttt{Currently Observed Efficacy and Safety}:
+                        This relationship represents the measurements of
+                        safety and efficacy in the current trial. 
+                    \item 
+                        \texttt{Currently Observed Efficacy and Safety}:
+                        $\rightarrow$ 
+                        \texttt{Will Terminate?}:
+                        This is how the measurements of safety and efficacy in the 
+                        current trial affect the probability of termination.
+                        % typically, evidence of a lack safety or efficacy is 
+                        % enought to terminate the trial.
+                    \item \texttt{Fundamental Efficacy and Safety} 
+                        $\rightarrow$ 
+                        \texttt{Previously Observed Efficacy and Safety}:
+                        This relationship represents the measurements of
+                        safety and efficacy in work prior to the current trial. 
+                    \item 
+                        \texttt{Previously Observed Efficacy and Safety}:
+                        $\rightarrow$ 
+                        \texttt{Decision to proceed with Phase III}:
+                        Previously observed data is essential to the FDA's 
+                        decision to allow a phase III trial. 
+                \end{enumerate}
+            \item 
+                Backdoor Pathway from \texttt{Market Status} 
+                to \texttt{Enrollment} 
+                through \texttt{Population}. 
+                The concern with this pathway is that the rate of enrollment, and
+                thus the enrollment status, is affected by the Population with 
+                the disease. 
+                Additionally, there is a concern that the number of competitors
+                is driven by the total market size.
+                Thus adding Population to the adjustment set is necessary.
+                \begin{enumerate}
+                    \item 
+                        \texttt{Population} 
+                        $\rightarrow$ 
+                        \texttt{Enrollment Status}:
+                        This is fairly straightforward. 
+                        How easy it is to enroll participants depends in part  
+                        on how many people have the disease.
+                    \item 
+                        \texttt{Population} 
+                        $\rightarrow$ 
+                        \texttt{Market Measures}:
+                        This assumes that the population effect flows only one
+                        direction, i.e. that a large population size increases
+                        the likelihood of a large number of drugs. 
+                        %TODO: Think about this one a bit because it does mess
+                        % with identification, particularly of market effects. 
+                        % these two are jointly determined per cerda 2007.
+                        % If I can't justify separating them, then I'll need to 
+                        % merge population (market size) and market measures (drugs on market). 
+                \end{enumerate}
+            \item 
+                \texttt{Market Measures} 
+                $\rightarrow$ 
+                \texttt{Enrollment Status}:
+                This confounds the estimation of the effect of 
+                \texttt{Enrollment} on \texttt{Will Terminate?}, and 
+                so \texttt{Market Measures} is part of the adjustment set.
+            \item 
+                \texttt{Market Measures} 
+                $\rightarrow$ 
+                \texttt{Decision to proceed with Phase III}:
+                The alternative treatments on the market will affect a sponsors'
+                decision to move forward with a Phase III trial.
+                This is controlled for by only working with trials that 
+                successfully begin recruitment for a Phase III Trial.
+            \item 
+                \texttt{Elapsed Duration} 
+                $\rightarrow$ 
+                \texttt{Will Terminate?}:
+                The amount of time past helps drive the decision to continue
+                or terminate.
+            \item 
+                \texttt{Enrollment Status} 
+                $\leftrightarrow$ 
+                \texttt{Elapsed Duration}:
+                % This is jointly determined. and the weakest part of the causal identification without an accurate model of enrollment.
+                This is one of the weakest parts of the causal inference. 
+                Without a well defined model of enrollment, we can't separate
+                the interaction between the enrollment status and the elapsed
+                duration. 
+                For example, if enrollment is running slower than expected,
+                the trial may be terminated due to concerns that it will not
+                achive the primary objectives or that costs will exceed 
+                the budget allocated to the project.
+            \item 
+                \texttt{Decision to Proceed with Phase III} 
+                $\rightarrow$ 
+                \texttt{Will Terminate?}:
+                %obviously required. Maybe remove from listing and graph?
+                This effect is fairly straightforward, in that 
+                there is no possibility of a termination or completion
+                if the trial does not start. 
+                This is here to block a backdoor pathway between 
+                \texttt{Will Terminate?} and the enrollment status
+                through \texttt{Previously observed Safety and Efficacy}.
+        \end{enumerate}
+\end{itemize}
 \end{document}
--- a/Latex/Paper/sections/11_intro_and_lit.tex
+++ b/Latex/Paper/sections/11_intro_and_lit.tex
@ -0,0 +1,318 @@
+\documentclass[../Main.tex]{subfiles}
+\graphicspath{{\subfix{Assets/img/}}}
+
+\begin{document}
+
+In 1938 President Franklin D Rosevelt signed the Food, Drug, and Cosmetic Act,
+granting the Food and Drug Administration (FDA) authority to require 
+pre-market approval of pharmaceuticals. 
+\cite{commissioner_MilestonesUS_2023}.
+As of Sept 2022 \todo{Check Date} they have approved 6,602 currently-marketed 
+compounds with Structured Product Labels (SPLs) 
+and 10,983 previously-marketed SPLs
+\cite{commissioner_NSDE_2024}. 
+%from nsde table. Get number of unique application_nubmers_or_citations with most recent end date as null.
+In 1999, they began requiring that drug developers register and 
+publish clinical trials on \url{https://clinicaltrials.gov}.
+This provides a public mechanism where clinical trial sponsors are 
+responsible to explain what they are trying to acheive and how it will be 
+measured, as well as provide the public the ability to search and find trials 
+that they might enroll in.
+Multiple derived datasets such as the Cortellis Investigational Drugs dataset 
+or the AACT dataset from the Clinical Trials Transformation Intiative
+integrate these data. 
+This brings up a question: 
+Can we use this public data on clinical trials to identify what effects the 
+success or failure of trials?
+In this work, I use updates to records on 
+\url{https://ClinicalTrials.gov} 
+to do exactly that, disentangle the effect of participant enrollment 
+and competing drugs on the market affect the success or failure of 
+clinical trials.
+
+%Describe how clinical trials fit into the drug development landscape and how they proceed
+Clinical trials are a required part of drug development.
+Not only does the FDA require that a series of clinical trials demonstrate sufficient safety and efficacy of
+a novel pharmaceutical compound or device, producers of derivative medicines may be required to ensure that
+their generic small molecule compound -- such as ibuprofen or levothyroxine -- matches the
+performance of the originiator drug if delivery or dosage is changed.
+For large molecule generics (termed biosimilars) such as Adalimumab
+(Brand name Humira, with biosimilars Abrilada, Amjevita, Cyltezo, Hadlima, Hulio,
+Hyrimoz, Idacio, Simlandi, Yuflyma, and Yusimry),
+the biosimilars are required to prove they have similar efficacy and safety to the
+reference drug.
+
+When registering these clinical trials
+% discuss how these are registered and what data is published.
+% Include image and discuss stages
+% Discuss challenges faced
+
+% Introduce my work
+
+In the world of drug development, these trials are classified into different 
+phases of development.
+\cite{FDADrugApprovalProcess_2022} 
+provide an overview of this process
+\cite{commissioner_DrugDevelopment_2020}
+while describes the actual details.
+Pre-clinical studies primarily establish toxicity and potential dosing levels 
+\cite{commissioner_DrugDevelopment_2020}.
+Phase I trials are the first attempt to evaluate safety and efficacy in humans. 
+Participants typically are heathy individuals, and they measure how the drug 
+affects healthy bodies, potential side effects, and adjust dosing levels. 
+Sample sizes are often less than 100 participants. 
+\cite{commissioner_DrugDevelopment_2020}.
+Phase II trials typically involve a few hundred participants and is where 
+investigators will dial in dosing, research methods, and safety.
+\cite{commissioner_DrugDevelopment_2020}.
+A Phase III trial is the final trial befor approval by the FDA, and is where 
+the investigator must demonstrate safety and efficacy with a large number of 
+participants, usually on the order of hundreds or thousands.
+\cite{commissioner_DrugDevelopment_2020}.
+Occassionally, a trial will be a multiphase trial, covering aspects of either
+Phases I and II or Phases II and III. 
+
+
+After a successful Phase III trial, the sponsor will decide whether or not 
+to submit an application for approval from the FDA. 
+Before filing this application, the developer must have completed 
+"two large, controlled clinical trials."
+\cite{commissioner_DrugDevelopment_2020}.
+Phase IV trials are used after the drug has recieved marketing approval to 
+validate safety and efficacy in the general populace.
+Throughout this whole process, the FDA is available to assist in decisionmaking
+regarding topics such as study design, document review, and whether or not
+they should terminate the trial. 
+The FDA also reserves the right to place a hold on the clinical trial for 
+safety or other operational concerns, although this is rare. 
+\cite{commissioner_DrugDevelopment_2020}.
+
+In the economics literature, most of the focus has been on evaluating how 
+drug candidates transition between different phases and their probability 
+of final approval.
+% Lead into lit review
+% Abrantes-Metz, Adams, Metz (2004)
+\cite{abrantes-metz_pharmaceutical_2004}, 
+described the relationship between
+various drug characteristics and how the drug progressed through clinical trials.
+% This descriptive estimate was notable for using a 
+% mixed state proportional hazard model and estimating the impact of 
+% observed characteristics in each of the three phases.
+They found that as Phase I and II trials last longer, 
+the rate of failure increases. 
+In contrast, Phase 3 trials generally have a higher rate of 
+success than failure after 91 months.
+This may be due to the fact that the purpose of Phases I and II are different
+from the purpose of Phase III.
+
+Continuing on this theme,
+%DiMasi FeldmanSeckler Wilson 2009
+\cite{dimasi_TrendsRisks_2010} examine the completion rate of clinical drug 
+develompent and find that for the 50 largest drug producers, 
+approximately 19\% of their drugs under development between 1993 and 2004
+successfully moved from Phase I to recieving an New Drug Application (NDA) 
+or Biologics License Application (BLA). 
+They note a couple of changes in how drugs are developed over the years they 
+study, most notably that
+drugs began to fail earlier in their development cycle in the 
+latter half of the time they studied. 
+They note that this may reduce the cost of new drugs by eliminating late 
+and costly failures in the development pipeline.
+
+Earlier work by 
+\authorcite{dimasi_ValueImproving_2002}
+used data on 68 investigational drugs from 10 firms to simulate how reducing
+time in development reduces the costs of developing drugs. 
+He estimates that reducing Phase III of clinical trials by one year would 
+reduce total costs by about 8.9\% and that moving 5\% of clinical trial failures
+from phase III to Phase II would reduce out of pocket costs by 5.6\%. 
+
+Like much of the work in this field, the focus of the the work by 
+\citeauthor{dimasi_ValueImproving_2002}
+and
+\citeauthor{dimasi_TrendsRisks_2010}
+tends to be on the drug development pipeline, i.e. the progression between 
+phases and towards marketing approval. 
+A key contribution to this drug development literature is the work by 
+\authorcite{khmelnitskaya_CompetitionAttrition_2021}
+on a causal identification strategy
+to disentangle strategic exits from exits due to clinical failures 
+in the drug development pipeline.
+She found that overall 8.4\% of all pipeline exits are due to strategic 
+terminations and that the rate of new drug production would be about 23\% 
+higher if those strategic terminatations were elimintated.
+
+The work that is closest to mine is the work by 
+\authorcite{hwang_FailureInvestigational_2016}
+who investigated causes for which late stage (Phase III)
+clinical trials fail -- with a focus on trials in the USA, 
+Europe, Japan, Canada, and Australia. 
+They identified 640 novel therapies and then studied each therapy's 
+development history, as outlined in commercial datasets.
+They found that for late stage trials that did not go on to recieve approval,
+57\% failed on efficacy grounds, 17\% failed on safety grounds, and 22\% failed
+on commercial or other grounds.
+
+% Begin Discussing what I do. Then introduce 
+Unlike the majority of the literature, I focus on the progress of 
+individual clinical trials, not on the drug development pipeline. 
+In both 
+\authorcite{khmelnitskaya_CompetitionAttrition_2021}
+and
+\authorcite{hwang_FailureInvestigational_2016}
+the authors describe failures due to safety, efficacy, or strategic concerns.
+There is another category of concerns that arise for individual clinical trials,
+that of operational failures. 
+Operational failures can arise when a trial struggles to recruit participants, 
+the principle investigator or other key member leaves for another opportunity,
+or other studies prove that the trial requires a protocol change. 
+
+% In a personal review of 199 randomly selected clinical trials from the AACT
+% database, the 
+% \begin{table}
+%     \caption{}\label{tab:}
+%     \begin{center}
+%         \begin{tabular}[c]{|l|l|}
+%             \hline
+%             Reason & Percentage Mentioned \\
+%             \hline
+%             Safety or Efficacy & 14.5\% \\
+%             Funding Problems & 9.1\% \\
+%             Enrollment Issues & 31\% \\
+%             \hline
+%         \end{tabular}
+%     \end{center}
+% \end{table}
+
+
+
+This paper proposes the first model to separate the causal effects of 
+market conditions (a strategic concern) from the effects of 
+participant enrollment (an operational concern) on Phase III Clinical trials. 
+This will allow me to answer the questions:
+\begin{itemize}
+    \item What is the marginal effect on trial completion of an additional 
+        generic drug on the market?
+    \item What is the marginal effect on trial completion of a delay in 
+        closing enrollment?
+\end{itemize}
+To undderstand how I do this, we'll cover some background information on 
+clinical trials in section \ref{SEC:ClinicalTrials}, 
+explain the data in section \ref{SEC:DataSources}, 
+and then examine causal identification and econometric model in sections 
+\ref{SEC:CausalIdentificationAndModel}. 
+Finally I'll review the results and conclusion in sections 
+\ref{SEC:Results}
+and
+\ref{SEC:Conclusion}
+respectively.
+
+% \subsection{Market incentives and drug development}
+% %%%%%%%%% What do we know about drug development incentives?
+%
+% \cite{dranove_DoesConsumer_2022} use the implementation of Medicare part D 
+% to examine whether the production of novel or follow up drugs increases during 
+% the following 15 years. 
+% They find that when Medicare part D was implemented -- increasing senior 
+% citizens' ability to pay for drugs -- there was a (delayed) increase 
+% in drug development, with effects concentrated among compounds that were least
+% innovative according to their classification of innovations.
+% They suggest that this is due to financial risk management, as novel 
+% pharmaceuticals have a higher probability of failure compared to the less novel
+% follow up development. 
+% This is what leads risk-adverse companies to prefer follow up development.
+%
+%
+% % Acemoglu and Linn
+% % - Market size in innovation
+% % - Exogenous demographic trends has a large impact on the entry of non-generic drugs and new molecular entitites.
+% On the side of market analysis, 
+% \citeauthor{acemoglu_market_2004} 
+% (\citeyear{acemoglu_market_2004})
+% used exogenous deomographics changes to show that the
+% entry of novel compounds is highly driven by the underlying aged population.
+% They estimate that a 1\% increase in applicable demographics increase the
+% entry of new drugs by 6\%, mostly concentrated among generics.
+% Among non-generics, a 1\% increase in potential market size 
+% (as measured by demographic groups) leads to a 4\% increase in novel therapies.
+%
+% % Gupta
+% % - Inperfect intellectual property rights in the pharmaceutical industry
+% \cite{gupta_OneProduct_2020} discovered that uncertainty around which patents
+% might apply to a novel drug causes a delay in the entry of generics after 
+% the primary patent has expired. 
+% She found that this delay in delivery is around 3 years. 
+%
+% % Agarwal and Gaule 2022
+% % - Retrospective on impact from COVID-19 pandemic
+% % Not in this version
+%
+% \subsection{Understanding Failures in Drug Development}
+%
+% % DISCUSS: Different types of failures
+% There are myriad of reasons that a drug candidate may not make it to market, 
+% regardless of it's novelty or known safety.
+% In this work, I focus on the failure of individual clinical trials, but the 
+% categories of failure apply to the individual trials as well as the entire
+% drug development pipeline.
+% They generally fall into one of the following categories:
+% \begin{itemize}
+%     \item Scientific  Failure: When there are issues regarding 
+%         safety and efficacy that must be addressed. 
+%         The preeminient question is: 
+%         ``Will the drug work for patients?''
+%         %E.Khm, Gupta, etc.
+%     \item Strategic Failure: When the sponsors stop development because of 
+%         profitability
+%         %Whether or not the drug will be profitiable, or align with
+%         %the drug developer's future Research \& Development directions i.e.
+%         ``Will producing the drug be beneficial to the 
+%         company in the long term?''
+%         %E.Khm, Gupta, GLP-1s, etc.
+%     \item Operational concerns are answers to: 
+%         %Whether or not the developer can successfully conduct
+%         %operations to meet scientific or strategic goals, i.e. 
+%         ``What has prevented the the company from being able to 
+%         finance, develop, produce, and market the drug?''
+% \end{itemize}
+% It is likely that a drug fails to complete the development cycle due to some
+% combination of these factors. 
+%
+%
+% %USE MetaBio/CalBio GLP-1 story to illuistrate these different factors.
+% \cite{flier_DrugDevelopment_2024} documents the case of MetaBio, a company
+% he was involved in founding that was in the first stages of
+% developing a GLP-1 based drug for diabetes or obesety before being shut down
+% in . 
+% MetaBio was a wholy owned subsidiary of CalBio, a metabolic drug development 
+% firm, that recieved a \$30 million -- 5 year investment from Pfizer to 
+% persue development of GLP-1 based therapies. 
+% At the time it was shut down, it faced a few challenges:
+% \begin{itemize}
+%     \item The compound had a short half life and they were seeking methods to 
+%         improve it's effectiveness; a scientific failure.
+%     \item Pfizer imposed a requirement that it be delivered though a route
+%         other than injection (the known delivery mechanism); a strategic failure. 
+%     \item When Pfizer pulled the plug, CalBio closed MetaBio because they 
+%         could not find other funding sources; an operational failure.
+% \end{itemize}
+%
+% The author states in his conclusion:
+% \begin{displayquote}
+%     Despite every possibility of success, 
+%     MetaBio went down because there were mistaken ideas about what was 
+%     possible and what was not in the realm of metabolic therapeutics, and 
+%     because proper corporate structure and adequate capital are always 
+%     issues when attempting to survive predictable setbacks.
+% \end{displayquote} 
+%
+% From this we see that there was a cascade of issues leading to the failure to 
+% develop this novel drug. 
+%
+%
+% % I don't think I need to include modelling enrollment here. 
+% % If it is applicable, it can show up in those sections later.
+%
+%
+
+\end{document}
--- a/Latex/Paper/sections/12_clinical_trial_background.tex
+++ b/Latex/Paper/sections/12_clinical_trial_background.tex
@ -0,0 +1,93 @@
+\documentclass[../Main.tex]{subfiles}
+\graphicspath{{\subfix{Assets/img/}}}
+
+\begin{document}
+
+% Clinical Trials Background Outline
+% - ClinicalTrials.gov
+% - Clincial trial progression
+% - 
+% - 
+% - 
+% - 
+% - 
+%   - 
+%   - 
+
+To understand how my administrative clinical trial data is obtained
+and what it can be used for, 
+let's take a look at how trial investigators record data on 
+\url{ClinicalTrials.gov} operate.
+Figure \ref{Fig:Stages} illuistrates the process I describe below.
+During the Pre-Trial period the trial investigators will design the trial, 
+choose primary and secondary objectives, 
+and decide on how many participants they need to enroll. 
+Once they have decided on these details, they post the trial to \url{ClinicalTrials.com}
+and decide on a date to begin enrolling trial participants.
+If the investigators decide to not continue with the trial before enrolling any participants,
+the trial is marked as ``Withdrawn''. 
+On the other hand, if they begin enrolling participants, there are two methods to do so.
+The first is to enter a general ``Recruiting'' state, where patients attempt to enroll.
+The second is to enter an "Enrollment by invitation only" state.
+After a trial has enrolled their participants, they wil typically move to an 
+"Active, not recruiting" state to inform potential participants that they are
+not recruiting. 
+Finally, when the investigators have obtained enough data to achieve their primary
+objective, the clinical trial will be closed, and marked as ``Completed'' in
+\url{ClinicalTrials.gov}
+If the trial is closed before achieving the primary objective, the trial is 
+marked as ``Terminated'' on
+\url{ClinicalTrials.gov}.
+
+
+\begin{figure}%[H] %use [H] to fix the figure here.
+    \includegraphics[width=\textwidth]{../assets/img/ClinicalTrialStagesAndStatuses}
+    \par \small 
+        Diamonds represent decision points while
+	Squares represent states of the clinical trial and Rhombuses represend data obtained by the trial.
+    \caption[Clinical Trial Stages and Progression]{Clinical Trial Stages and Progression}
+    \label{Fig:Stages}
+\end{figure}
+
+Note the information we obtain about the trial from the final status: 
+``Withdrawn'', ``Terminated'', or ``Completed''.
+Although \cite{khm} describes a clinical failure due to safety or efficacy as a 
+\textit{scientific} failure, it is better described as a compound failure.
+Discovering that a compound doesn't work as hoped is not a failure but the whole
+purpose of the clinical trials process. 
+On the other hand, when a trial terminates early due to reasons 
+other than safety or efficacy concerns, the trial operator does not learn
+if the drug is effective or safe. 
+This is a true failure in that we did not learn if the drug was effective or not.
+Unfortunately, although termination documentation typically includes a 
+description of a reason for the clinical trial termination, this doesn't necessarily
+list all the reasons contributing to the trial termination and may not exist for a given trial.
+
+As a trial goes through the different stages of recruitment, the investigators
+update the records on ClinicalTrials.gov. 
+Even though there are only a few times that investigators are required 
+to update this information, it tends to be updated somewhat regularly as it is 
+a way to communicate with potential enrollees. 
+When a trial is first posted, it tends to include information
+such as planned enrollment, 
+planned end dates, 
+the sites at which it is being conducted, 
+the diseases that it is investigating, 
+the drugs or other treatments that will be used,
+the experimental arms that will be used,
+and who is sponsoring the trial.
+As enrollment is opened and closed and sites are added or removed, 
+investigators will update the status and information
+to help doctors and potential participants understand whether they should apply. 
+
+
+
+% - 
+% - 
+% - 
+% - 
+% - 
+% - 
+
+
+\end{document}
--- a/Latex/Paper/sql
+++ b/Latex/Paper/sql
@ -0,0 +1,54 @@
+--get a list of the most recent activations that exist for a given application.
+create temp table nsde_activations as
+select
+    application_number_or_citation,
+    count(distinct package_ndc) as package_count,
+    max(marketing_start_date) as most_recent_start,
+    max(marketing_end_date) as most_recent_end,
+    max(inactivation_date) as most_recent_inactivation,
+    max(reactivation_date) as most_recent_reactivation
+from spl.nsde
+group by application_number_or_citation
+;
+
+select count(*) from nsde_activations
+where most_recent_end is null
+;
+/*
+count
+-----
+ 6602
+*/
+
+
+select count(*) from nsde_activations
+where most_recent_end is NOT null
+;
+/*
+count
+-----
+ 10983
+*/
+
+/*
+So, the current number of marketed compounds is how many NDA or ANDA (ANADA?) compounds there are.
+
+*/
+
+-- get count of drugs that you can select by first 3 letters
+select
+    left(application_number_or_citation, 3) as first_3,
+    count(*) as row_count
+from nsde_activations
+group by first_3
+;
+
+
+
+select
+    left(application_number_or_citation, 3) as first_3,
+    count(*) as row_count
+from nsde_activations
+where first_3 in ()
+group by first_3
+;
--- a/Latex/assets/img/ClinicalTrialStagesAndStatuses.png
+++ b/Latex/assets/img/ClinicalTrialStagesAndStatuses.png
--- a/Latex/assets/preambles/BibPreamble.tex
+++ b/Latex/assets/preambles/BibPreamble.tex
@ -4,6 +4,20 @@


 %%% Setup Bibliography
-\usepackage[backend=biber,autocite=inline]{biblatex}
+\usepackage[
+    backend=biber,
+    autocite=inline,
+    style=alphabetic,
+]{biblatex}


+% Simpl command to read the shell variable with my zotero bibliography
+% \immediate\write18{echo $ZOTERO_BIB > \jobname.tmp}
+% \CatchFileDef{\bibpath}{\jobname.tmp}{}
+% Set up bibliography using the shell variable
+% \addbibresource{\bibpath}
+
+% Manually add zotero library
+\addbibresource{~/.local/state/nvim_telescope_local_search/ZoteroLibrary.bib}
+
+\newcommand{\authorcite}[1]{\citeauthor{#1} \cite{#1}}
--- a/Latex/assets/preambles/GeneralPreamble.tex
+++ b/Latex/assets/preambles/GeneralPreamble.tex
@ -2,6 +2,7 @@
 \usepackage{booktabs} % Allows the use of \toprule, \midrule and \bottomrule in tables
 \usepackage{hyperref} %Includes Hyperlinks
 \usepackage{cleveref} %Simplifies citations and references
+\usepackage{todonotes} % Allows using todo-notes


 %Setup some language stuff