Compare commits
94 Commits
history-do
...
main
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d0dc87a7 | 1 year ago |
|
|
1782372a45 | 1 year ago |
|
|
fc478517ac | 1 year ago |
|
|
d912408456 | 1 year ago |
|
|
2488cceebc | 1 year ago |
|
|
eca4795d2f | 1 year ago |
|
|
87074aa42b | 1 year ago |
|
|
3311159ab6 | 2 years ago |
|
|
bb374dbde9 | 2 years ago |
|
|
635cfe42d9 | 2 years ago |
|
|
79902f400a | 2 years ago |
|
|
495955170c | 2 years ago |
|
|
de3698052b | 2 years ago |
|
|
dfb041d12b | 2 years ago |
|
|
9aaf007791 | 2 years ago |
|
|
b4a3cec7e6 | 2 years ago |
|
|
211151e223 | 2 years ago |
|
|
d90539a679 | 2 years ago |
|
|
142670d08a | 2 years ago |
|
|
6a931b3a49 | 3 years ago |
|
|
1c3d749ef4 | 3 years ago |
|
|
ef68adae89 | 3 years ago |
|
|
a336fb92d9 | 3 years ago |
|
|
05a96a3a29 | 3 years ago |
|
|
256177e569 | 3 years ago |
|
|
b7290c271b | 3 years ago |
|
|
c4b8484cab | 3 years ago |
|
|
2a9b8349ba | 3 years ago |
|
|
9a718f72a0 | 3 years ago |
|
|
e88f450b8c | 3 years ago |
|
|
4643351305 | 3 years ago |
|
|
c1f9f6e528 | 3 years ago |
|
|
277b5b9bd5 | 3 years ago |
|
|
1de1ff9e4a | 3 years ago |
|
|
47996ba607 | 3 years ago |
|
|
9ac4cffe61 | 3 years ago |
|
|
5600ad932d | 3 years ago |
|
|
d1edac3c4f | 3 years ago |
|
|
e2edf1eb6b | 3 years ago |
|
|
c5f3bfcdec | 3 years ago |
|
|
12c3c69304 | 3 years ago |
|
|
29644a0ad5 | 3 years ago |
|
|
123fe3b5e4 | 3 years ago |
|
|
470dfc2611 | 3 years ago |
|
|
6876779c17 | 3 years ago |
|
|
bbdd7552a8 | 3 years ago |
|
|
4831864805 | 3 years ago |
|
|
6b5a48c77a | 3 years ago |
|
|
2aba3469d3 | 3 years ago |
|
|
ef7ed7001b | 3 years ago |
|
|
2ec314180f | 3 years ago |
|
|
f6b56da261 | 3 years ago |
|
|
ed49d8728a | 3 years ago |
|
|
39397cc224 | 3 years ago |
|
|
804a90c247 | 3 years ago |
|
|
8dbf4e8c2e | 3 years ago |
|
|
1a106a553e | 3 years ago |
|
|
f6f687fff5 | 3 years ago |
|
|
016a449258 | 3 years ago |
|
|
091fd63366 | 3 years ago |
|
|
bbf8c77e6d | 3 years ago |
|
|
4283719d3d | 3 years ago |
|
|
52f8152afd | 3 years ago |
|
|
23826fb576 | 3 years ago |
|
|
966171c840 | 3 years ago |
|
|
f5788051f7 | 3 years ago |
|
|
97af862419 | 3 years ago |
|
|
4cc4c5c99f | 3 years ago |
|
|
fa37dccfff | 3 years ago |
|
|
339a83117e | 3 years ago |
|
|
266c1c9686 | 3 years ago |
|
|
dfbd82de54 | 3 years ago |
|
|
fc38a2e92c | 3 years ago |
|
|
3eb9a4130a | 3 years ago |
|
|
ee3e37e834 | 3 years ago |
|
|
4ae3064bf2 | 3 years ago |
|
|
61dc377e0a | 3 years ago |
|
|
321e756cc6 | 3 years ago |
|
|
5d2140accd | 3 years ago |
|
|
9b26cd99df | 3 years ago |
|
|
1bdcc2fd83 | 3 years ago |
|
|
4a2b1ea2ed | 3 years ago |
|
|
91eeea100e | 3 years ago |
|
|
e240fff07c | 3 years ago |
|
|
a2c0e4dcc7 | 4 years ago |
|
|
e4971ae2f6 | 4 years ago |
|
|
f3d73a5ac1 | 4 years ago |
|
|
9d5a726494 | 4 years ago |
|
|
453e82974e | 4 years ago |
|
|
a9027c9467 | 4 years ago |
|
|
b1c146d550 | 4 years ago |
|
|
71e87a9abe | 4 years ago |
|
|
d8d00101fa | 4 years ago |
|
|
9850f4c677 | 4 years ago |
@ -0,0 +1,8 @@
|
||||
*.sql.gzip filter=lfs diff=lfs merge=lfs -text
|
||||
*.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/backup/aact_db_backup_20250106_184236.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ClinicalTrialsDataProcessing</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.jkiss.dbeaver.DBeaverNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
||||
@ -1,7 +0,0 @@
|
||||
FROM youainti/aact_from_dump
|
||||
LABEL AUTHOR 'Will King (youainti@protonmail.com)'
|
||||
LABEL DESCRIPTION 'add extra processing to the aact database in preparation for downloading history.'
|
||||
|
||||
#copy additional init scripts
|
||||
COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
|
||||
#these will be run after the database is initialized
|
||||
@ -1,26 +0,0 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA history;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
CREATE ROLE history_writer;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_writer;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_writer;
|
||||
|
||||
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
|
||||
|
||||
|
||||
--Create role for anyone who only needs selection access to historical data, such as for analysis
|
||||
CREATE ROLE history_reader;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_reader;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_reader;
|
||||
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
|
||||
|
||||
|
||||
|
||||
/* History Tables
|
||||
Below is where I would construct the parsed trial history tables that I need.
|
||||
*/
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
version: '3'
|
||||
|
||||
volumes:
|
||||
aact_pg_database: #This is to hold the database.
|
||||
|
||||
services:
|
||||
|
||||
aact:
|
||||
build: ./ClinicalTrialHistory #build and use the clinical trial history db.
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: root
|
||||
POSTGRES_DB: aact_db
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
- aact_pg_database:/var/lib/postgresql/ # this is persistant storage for the database
|
||||
- ./20220201_clinical_trials/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
# Adobe Pdf Character ID (cid:\d+) parser
|
||||
# The purpose is to allow someone to create their own table equivalent to the "\toUnicode" that
|
||||
# should be provided in every PDF using cid's (but is often mangled).
|
||||
|
||||
def get_digits(string):
|
||||
"""
|
||||
Extract leading the digits from a cid tag.
|
||||
"""
|
||||
splat = string.split(")")
|
||||
num = splat[0]
|
||||
l = len(num)
|
||||
return int(num),l
|
||||
|
||||
def token_generator(string):
|
||||
"""
|
||||
An iterable that returns tokens describing a string in a pdf.
|
||||
Tokens take two forms:
|
||||
- Integers: these represend CID codes
|
||||
- Characters: these represent the arbitrary characters often returned amidst cid's.
|
||||
|
||||
It is a python generator becasue that simplifies the ordering and allows us to avoid recursion.
|
||||
"""
|
||||
start = 0
|
||||
str_len = len(string)
|
||||
|
||||
while start < str_len:
|
||||
substring = string[start:]
|
||||
|
||||
#check for cid
|
||||
if (str_len - start > 6) and (substring[0:5] == "(cid:"):
|
||||
|
||||
num,length = get_digits(substring[5:])
|
||||
start += length + 6
|
||||
yield num
|
||||
|
||||
elif (str_len - start > 1):
|
||||
start += 1
|
||||
yield substring[0]
|
||||
else:
|
||||
start += 1
|
||||
yield substring
|
||||
|
||||
|
||||
class UnknownSymbol():
|
||||
"""
|
||||
Represents a token that is not in the parser's dictionary.
|
||||
"""
|
||||
def __init__(self, symbol):
|
||||
self.symbol = symbol
|
||||
|
||||
def __repr__(self):
|
||||
return "UnknownSymbol: {} of type {}".format(self.symbol, type(self.symbol))
|
||||
|
||||
def __str__(self):
|
||||
return "\uFFFD"
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Translates from tokens to character arrays or strings, handling errors as it goes.
|
||||
|
||||
It requires a dictionary during instantiation.
|
||||
This dictionary is what is used to perform lookups.
|
||||
|
||||
It exposes 3 methods
|
||||
- convert attempts to convert a single token
|
||||
- convert_stream will try to convert an iterable of tokens into an iterable of text.
|
||||
- check_list_of_strings will try to convert a list of strings containing cids and other symbols into
|
||||
- strings, if there are no Unknown symbols.
|
||||
- lists, containing characters and Unknown symbols.
|
||||
"""
|
||||
def __init__(self, lookup_table):
|
||||
self._lookup_table = lookup_table
|
||||
|
||||
def convert(self,token):
|
||||
try:
|
||||
return self._lookup_table[token]
|
||||
except:
|
||||
return UnknownSymbol(token)
|
||||
|
||||
def convert_list(self,token_stream):
|
||||
for token in token_stream:
|
||||
yield self.convert(token)
|
||||
|
||||
def convert_list_of_strings(self, list_of_strings):
|
||||
for token_stream in list_of_stings:
|
||||
arr = [x for x in ob2020.convert_stream(token_generator(token_stream))]
|
||||
try:
|
||||
print("".join(arr))
|
||||
except:
|
||||
print(arr)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Plan was to accept and proceess a symbol table and text. Apparently it has not been implemented."
|
||||
@ -1,371 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "40358f02-c376-4431-be39-cdd477f17e7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import polars as pl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "8fb27ee2-72c1-4e80-9d00-de54f2834fe8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"polars.datatypes.Datetime"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pl.datatypes.Datetime"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "2c0edd77-c2d0-4184-a094-8c01783d2f0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"products = pl.scan_csv(file=\"./EOBZIP_2022_04/products.txt\", sep=\"~\")\n",
|
||||
"patents = pl.scan_csv(file=\"./EOBZIP_2022_04/patent.txt\", sep=\"~\")\n",
|
||||
"exclusivity = pl.scan_csv(file=\"./EOBZIP_2022_04/exclusivity.txt\", sep=\"~\", parse_dates=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "023f211d-23aa-4a2c-843d-1b60cec91079",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def set_exclusivity_types(df):\n",
|
||||
" return df.with_columns([\n",
|
||||
" pl.col(\"Exclusivity_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\")\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"id": "a1da42c9-e47a-4437-b089-e9b91f789a0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>\n",
|
||||
"Appl_Type\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Appl_No\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Product_No\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Exclusivity_Code\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Exclusivity_Date\n",
|
||||
"</th>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"str\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"i64\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"i64\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"str\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"date\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"11366\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"ODE-96\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-08-07\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"11\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"10\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"9\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"8\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (5, 5)\n",
|
||||
"┌───────────┬─────────┬────────────┬──────────────────┬──────────────────┐\n",
|
||||
"│ Appl_Type ┆ Appl_No ┆ Product_No ┆ Exclusivity_Code ┆ Exclusivity_Date │\n",
|
||||
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
||||
"│ str ┆ i64 ┆ i64 ┆ str ┆ date │\n",
|
||||
"╞═══════════╪═════════╪════════════╪══════════════════╪══════════════════╡\n",
|
||||
"│ N ┆ 11366 ┆ 2 ┆ ODE-96 ┆ 2022-08-07 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 11 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 10 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 9 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 8 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"└───────────┴─────────┴────────────┴──────────────────┴──────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"exclusivity.pipe(set_exclusivity_types).head(5).collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"id": "92fe99fa-1963-460c-99ea-7f614b4b2e25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def set_patent_types(df):\n",
|
||||
" return df.with_columns([\n",
|
||||
" pl.col(\"Patent_Expire_Date_Text\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
||||
" pl.col(\"Submission_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
||||
" pl.col(\"Drug_Substance_Flag\") == \"Y\",\n",
|
||||
" pl.col(\"Drug_Product_Flag\") == \"Y\",\n",
|
||||
" pl.col(\"Delist_Flag\") == \"Y\"\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"id": "13707ca6-094f-4ed7-94cb-824087e97874",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>\n",
|
||||
"Patent_Expire_Date_Text\n",
|
||||
"</th>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"date\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"2022-01-02\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (1, 1)\n",
|
||||
"┌─────────────────────────┐\n",
|
||||
"│ Patent_Expire_Date_Text │\n",
|
||||
"│ --- │\n",
|
||||
"│ date │\n",
|
||||
"╞═════════════════════════╡\n",
|
||||
"│ 2022-01-02 │\n",
|
||||
"└─────────────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"patents.pipe(set_patent_types).select(\"Patent_Expire_Date_Text\").min().collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"id": "18ad8df7-45d5-4454-8955-c5f28a7d7f1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"polars.datatypes.Null"
|
||||
]
|
||||
},
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pl.datatypes.Null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79e4b3d9-29ae-4302-bee1-4be02e0ba654",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
https://www.fda.gov/media/76860/download
|
||||
@ -1,145 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "51bf48a1-920a-4e64-ac5f-323ff3a27ebf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Will use tool 'Tesseract (sh)'\n",
|
||||
"Available languages: eng, osd\n",
|
||||
"Will use language 'eng'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the required libraries\n",
|
||||
"from wand.image import Image\n",
|
||||
"from PIL import Image as PI\n",
|
||||
"import pyocr\n",
|
||||
"import pyocr.builders\n",
|
||||
"import io, sys\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Get the handle of the OCR library (in this case, tesseract)\n",
|
||||
"tools = pyocr.get_available_tools()\n",
|
||||
"if len(tools) == 0:\n",
|
||||
"\tprint(\"No OCR tool found!\")\n",
|
||||
"\tsys.exit(1)\n",
|
||||
"tool = tools[0]\n",
|
||||
"print(\"Will use tool '%s'\" % (tool.get_name()))\n",
|
||||
"\n",
|
||||
"# Get the language\n",
|
||||
"langs = tool.get_available_languages()\n",
|
||||
"print(\"Available languages: %s\" % \", \".join(langs)) \n",
|
||||
"lang = langs[0] # For English\n",
|
||||
"print(\"Will use language '%s'\" % (lang))\n",
|
||||
"\n",
|
||||
"# Setup two lists which will be used to hold our images and final_text\n",
|
||||
"req_image = []\n",
|
||||
"final_text = []\n",
|
||||
"\n",
|
||||
"# Open the PDF file using wand and convert it to jpeg\n",
|
||||
"image_pdf = Image(filename=\"/home/will/research/ClinicalTrialsDataProcessing/Orangebook/Orangebooks/testprint.pdf\", resolution=300)\n",
|
||||
"image_jpeg = image_pdf.convert('pdf')\n",
|
||||
"\n",
|
||||
"# wand has converted all the separate pages in the PDF into separate image\n",
|
||||
"# blobs. We can loop over them and append them as a blob into the req_image\n",
|
||||
"# list.\n",
|
||||
"for img in image_jpeg.sequence:\n",
|
||||
"\timg_page = Image(image=img)\n",
|
||||
"\treq_image.append(img_page.make_blob('jpeg'))\n",
|
||||
"\n",
|
||||
"# Now we just need to run OCR over the image blobs and store all of the \n",
|
||||
"# recognized text in final_text.\n",
|
||||
"for img in req_image:\n",
|
||||
"\ttxt = tool.image_to_string(\n",
|
||||
"\t\tPI.open(io.BytesIO(img)),\n",
|
||||
"\t\tlang=lang,\n",
|
||||
"\t\tbuilder=pyocr.builders.TextBuilder()\n",
|
||||
"\t)\n",
|
||||
"\tfinal_text.append(txt)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f0d5f1d6-7e15-4ee6-b4ee-cbd41c5afb99",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"The final text is: \n",
|
||||
"\n",
|
||||
"40TH EDITION - 2020 - APPROVED DRUG PRODUCT LIST\n",
|
||||
"\n",
|
||||
"PRESCRIPTION DRUG PRODUCT LIST\n",
|
||||
"\n",
|
||||
"ABACAVIR SULFATE\n",
|
||||
"SOLUTION; ORAL\n",
|
||||
"ABACAVIR SULFATE\n",
|
||||
"\n",
|
||||
"EQ 2 5 /ML\n",
|
||||
"\n",
|
||||
"EQ 2 Ee /ML\n",
|
||||
"\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"\\nThe final text is: \\n\")\n",
|
||||
"print(final_text[0][0:200])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1cac17e7-079d-4e32-bdbf-ae49194b2078",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"it appears taht this does not have the required precision. I'll need to do this some other way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2283e290-fab3-4cda-8ce9-55a0b3533c98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,56 +1,91 @@
|
||||
# ClinicalTrialsDataProcessing
|
||||
|
||||
This is used to build tools which process and standardize the data.
|
||||
This represents my
|
||||
|
||||
More data later.
|
||||
## Prerequisites
|
||||
|
||||
# Outline
|
||||
> Python >= 3.10 (requires match statement)
|
||||
> Docker >= 20.10
|
||||
> Curl >= 7
|
||||
> Just >= 1.9
|
||||
|
||||
## Directory Tree
|
||||
AACT_downloader
|
||||
|
||||
# Usage
|
||||
|
||||
## Key files index
|
||||
|
||||
# Background on Docker
|
||||
Docker uses the following flow
|
||||
## Basic usage
|
||||
|
||||
1. configuration using `docker-compose.yaml` or a `Dockerfile`
|
||||
2. `docker build .` to generate an image
|
||||
3. `docker run xxxxxx` to take the image and create a container.
|
||||
- when the container is created, it starts, running commands as configured in the dockerfile.
|
||||
- Consequently, the AACT database image when run must initialize the postgres db, then run the initalization details.
|
||||
- Here is where bind mounts come into play.
|
||||
Check prerequisites
|
||||
```bash
|
||||
just check-status
|
||||
```
|
||||
|
||||
## Multistage builds
|
||||
https://stackoverflow.com/questions/53659993/docker-multi-stage-how-to-split-up-into-multiple-dockerfiles
|
||||
Setup the underlying AACT database including downloading both
|
||||
the AACT dump and historical data.
|
||||
```bash
|
||||
just create
|
||||
just select-trials
|
||||
just count=1000 get-histories
|
||||
```
|
||||
replacing the 1000 in `count=1000` with the number of trials you want to download.
|
||||
|
||||
https://docs.docker.com/develop/develop-images/multistage-build/
|
||||
## Advanced Usage
|
||||
|
||||
Basically
|
||||
If you need to reset the db without downloading the AACT dump
|
||||
```bash
|
||||
just rebuild
|
||||
just select-trials
|
||||
just count=1000 get-histories
|
||||
```
|
||||
|
||||
## Dockerfile vs docker-compose.yaml
|
||||
|
||||
A `Dockerfile` is used to create images.
|
||||
### Description of all the `just` recipes
|
||||
|
||||
A `docker-compose.yaml` is used to automate the deployment of containers.
|
||||
# Background information
|
||||
|
||||
## Types of storage
|
||||
This is designed to run on a linux machine with bash.
|
||||
If you are using a shell other than bash you should be aware of what
|
||||
is needed to run all of this using bash
|
||||
|
||||
### COPY/ADD (Dockerfile)
|
||||
If any of the discussions below don't make sense, talk to your sysadmin,
|
||||
a local linux user, or reach out to the author.
|
||||
|
||||
In a dockerfile, this adds a file permanently to the image.
|
||||
## Just installation
|
||||
|
||||
This adds files one way to or from the container when initialized.
|
||||
I use the command runner `just` to automate/simplfy setting up the
|
||||
docker containers and running many of the python scripts.
|
||||
It is similar to `make` in many ways but is designed to do less.
|
||||
|
||||
### Volumes (docker-compose.yaml && Dockerfile)
|
||||
Just can be installed from https://github.com/casey/just/
|
||||
|
||||
Useable in both docker-compose and Dockerfile's, this creates a permanent storage.
|
||||
It can be maintained by docker or stored in a particular location.
|
||||
## Python installation
|
||||
|
||||
Good for longer term storage such as databases.
|
||||
This requires python 3.10 or above due to the use of match-case statements
|
||||
in the html parser.
|
||||
|
||||
### Bind mounts (docker-compose.yaml)
|
||||
Check which version of python you have by typing `python --version`.
|
||||
If you do not have the required version, I would recommend installing
|
||||
the conda python manager and setting up a conda environment with python 3.10.
|
||||
Instructions for doing so are on the internet.
|
||||
|
||||
Bind mounts are used to make a host filesystem resource
|
||||
available
|
||||
## Docker and Postgres
|
||||
Docker is a tool to manage and run OCI containers.
|
||||
What this means in regards to this project is that docker makes it
|
||||
easy to setup containers.
|
||||
|
||||
Install docker based on instructions for your linux distribution.
|
||||
I use podman (an alternative from RedHat) because it allows for running without root permissions.
|
||||
|
||||
### Docker networking
|
||||
|
||||
It is helpful to construct an external docker network by running
|
||||
|
||||
`docker network create network_name`
|
||||
|
||||
and then including that network in the docker-compose.yaml
|
||||
|
||||
# Environment Variables (`.env` file)
|
||||
I use an single .env file to setup the docker containers and pass configuration variables to
|
||||
the python scripts. I would suggest changing the default values in `sample.env` to match your needs.
|
||||
If you do need to think about the security of your database I would recommend
|
||||
you start by changing these.
|
||||
|
||||
@ -0,0 +1,133 @@
|
||||
|
||||
/* OVERVIEW
|
||||
*
|
||||
* This links trials to the first date each drug (indexed by NDA/ANDA etc) is
|
||||
* put on the market.
|
||||
*
|
||||
* It takes 3 views to build up to it.
|
||||
* */
|
||||
|
||||
--Match trials to brands and ingredients
|
||||
create or replace view public.match_trials_to_bn_in as
|
||||
with trialncts as (
|
||||
SELECT DISTINCT nct_id FROM history.trial_snapshots TS
|
||||
)
|
||||
SELECT
|
||||
bi.nct_id ,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2 ,
|
||||
rr.rxcui2 as bn_or_in_cui, --brand or ingredient
|
||||
count(*)
|
||||
FROM ctgov.browse_interventions bi
|
||||
left outer JOIN rxnorm_migrated.rxnorm_props AS rp
|
||||
on bi.downcase_mesh_term = rp.propvalue1 --link names to drug cuis ()
|
||||
left outer join rxnorm_migrated.rxnorm_relations rr
|
||||
on rr.rxcui1 = rp.rxcui
|
||||
WHERE
|
||||
bi.nct_id in (
|
||||
SELECT nct_id FROM trialncts
|
||||
)
|
||||
and
|
||||
bi.mesh_type='mesh-list'
|
||||
and rp.propname = 'Active_ingredient_name'
|
||||
and rr.tty2 in ('BN', 'IN', 'MIN')
|
||||
group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2
|
||||
order by bi.nct_id
|
||||
;
|
||||
--running out of space.
|
||||
|
||||
-- get list of interventions assoicated with trials of interest
|
||||
create temp table tmp_interventions as
|
||||
select * from ctgov.browse_interventions bi
|
||||
where
|
||||
bi.mesh_type ='mesh-list'
|
||||
and
|
||||
bi.nct_id in (select distinct nct_id from history.trial_snapshots)
|
||||
;
|
||||
select * from tmp_interventions;
|
||||
|
||||
--drop table tmp_join_interv_rxcui;
|
||||
create temp table tmp_join_interv_rxcui as
|
||||
select *
|
||||
from
|
||||
tmp_interventions tint
|
||||
inner join
|
||||
rxnorm_migrated.rxnorm_props rp
|
||||
on tint.downcase_mesh_term = rp.propvalue1
|
||||
where propname='RxNorm Name'
|
||||
;-- get the rxcui for ingredients
|
||||
|
||||
select * from tmp_join_interv_rxcui;
|
||||
|
||||
--filter rxcui -> is human prescribable
|
||||
create temp view tmp_view_prescribable as
|
||||
select count(*) from rxnorm_migrated.rxnorm_props rp
|
||||
where
|
||||
rp.propname = 'PRESCRIBABLE'
|
||||
and
|
||||
rp.propvalue1 = 'Y'
|
||||
;
|
||||
|
||||
--link prescribable to brand ingredients or brand names.
|
||||
|
||||
|
||||
--get relationships of IN -> BN
|
||||
select *
|
||||
from
|
||||
rxnorm_migrated.rxnorm_relations rr
|
||||
where
|
||||
rr.tty1 in ('IN','MIN')
|
||||
and rr.rxcui1 in (select distinct rxcui from tmp_join_interv_rxcui tjir)
|
||||
and rr.tty2 = 'BN'
|
||||
;
|
||||
|
||||
|
||||
|
||||
--match trials to through brands NDC11
|
||||
create or replace view public.match_trial_to_ndc11 as
|
||||
select
|
||||
mttbi.nct_id,
|
||||
ah.ndc,
|
||||
count(*)
|
||||
from public.match_trials_to_bn_in as mttbi
|
||||
left outer join rxnorm_migrated.rxnorm_relations as rr
|
||||
on mttbi.bn_or_in_cui = rr.rxcui1
|
||||
left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah
|
||||
on rr.rxcui2 = ah.rxcui
|
||||
where
|
||||
rr.tty1 = 'BN'
|
||||
and
|
||||
rr.tty2 in ('SBD', 'BPCK')
|
||||
and
|
||||
ah.sab='RXNORM'
|
||||
group by mttbi.nct_id, ah.ndc
|
||||
order by mttbi.nct_id, ah.ndc
|
||||
;
|
||||
|
||||
|
||||
|
||||
---associate trials to marketing start dates
|
||||
create or replace view public.match_trial_to_marketing_start_date as
|
||||
select
|
||||
mttn.nct_id,
|
||||
n.application_number_or_citation,
|
||||
min(n.marketing_start_date )
|
||||
from match_trial_to_ndc11 mttn
|
||||
inner join spl.nsde n
|
||||
on mttn.ndc = n.package_ndc11
|
||||
where
|
||||
n.product_type = 'HUMAN PRESCRIPTION DRUG'
|
||||
and
|
||||
n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC')
|
||||
group by mttn.nct_id,n.application_number_or_citation
|
||||
order by mttn.nct_id
|
||||
;
|
||||
|
||||
---Number of trials after a certain date
|
||||
select nct_id,count(distinct application_number_or_citation)
|
||||
from public.match_trial_to_marketing_start_date mttmsd
|
||||
where "min" > '2012-06-01'
|
||||
group by nct_id
|
||||
;
|
||||
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
backup_dir="/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/containers/AACT_Reloader/backup/"
|
||||
date_stamp=$(date +%Y%m%d_%H%M%S)
|
||||
filename="aact_db_backup_${date_stamp}.sql"
|
||||
container_name = ${1:-aact_db}
|
||||
|
||||
podman exec "$container_name" pg_dump -U root aact_db > "${backup_dir}/${filename}"
|
||||
|
||||
# Optional: compress the backup
|
||||
gzip "${backup_dir}/${filename}"
|
||||
@ -0,0 +1,117 @@
|
||||
/*
|
||||
I started by creating a formularies schema,
|
||||
then importing the usp - dc formulary data through DBeaver's csv import.
|
||||
*/
|
||||
|
||||
-- DROP SCHEMA "Formularies";
|
||||
|
||||
CREATE SCHEMA "Formularies" AUTHORIZATION root;
|
||||
|
||||
-- "Formularies".usp_dc_2023 definition
|
||||
|
||||
-- Drop table
|
||||
|
||||
-- DROP TABLE "Formularies".usp_dc_2023;
|
||||
|
||||
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||
rxcui varchar(15) NULL, --yes even though this is a number, it is represented as a string elsewhere.
|
||||
tty varchar(10) NULL,
|
||||
"Name" varchar(256) NULL,
|
||||
"Related BN" varchar(250) NULL,
|
||||
"Related DF" varchar(25050) NULL,
|
||||
"USP Category" varchar(250) NULL,
|
||||
"USP Class" varchar(250) NULL,
|
||||
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||
"API Concept" varchar(250) NULL
|
||||
);
|
||||
|
||||
/*
|
||||
I then linked the data back on itself with a materialized view, using claude.ai for simplicity.
|
||||
|
||||
Claude.ai > I need a postres sql statement to create a materialized view that will take the following table and link from a given rxcui to the other rxcui's that share the same category and class
|
||||
|
||||
```sql
|
||||
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||
rxcui int4 NULL,
|
||||
tty varchar(10) NULL,
|
||||
"Name" varchar(256) NULL,
|
||||
"Related BN" varchar(250) NULL,
|
||||
"Related DF" varchar(25050) NULL,
|
||||
"USP Category" varchar(250) NULL,
|
||||
"USP Class" varchar(250) NULL,
|
||||
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||
"API Concept" varchar(250) NULL
|
||||
);
|
||||
```
|
||||
|
||||
It links rxcuis to other rxcuis where they have a matching USP Categories and Class
|
||||
This gives alternative RXCUIs based on category an class.
|
||||
*/
|
||||
CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS
|
||||
WITH base AS (
|
||||
SELECT DISTINCT
|
||||
a.rxcui as source_rxcui,
|
||||
b.rxcui as linked_rxcui,
|
||||
a."USP Category" as category,
|
||||
a."USP Class" as class
|
||||
FROM "Formularies".usp_dc_2023 a
|
||||
JOIN "Formularies".usp_dc_2023 b
|
||||
ON a."USP Category" = b."USP Category"
|
||||
AND a."USP Class" = b."USP Class"
|
||||
AND a.rxcui != b.rxcui
|
||||
WHERE a.rxcui IS NOT NULL
|
||||
AND b.rxcui IS NOT NULL
|
||||
)
|
||||
SELECT * FROM base;
|
||||
|
||||
-- Add indexes for better query performance
|
||||
CREATE INDEX ON "Formularies".rxcui_category_class_links (source_rxcui);
|
||||
CREATE INDEX ON "Formularies".rxcui_category_class_links (linked_rxcui);
|
||||
|
||||
/*
|
||||
Next step is linking a given nct -> compounds -> formulary alternatives -> compounds -> brands/generics.
|
||||
I'll' break this into two steps.
|
||||
|
||||
1. link formulary alternatives to compounds and brands,
|
||||
2. link nct_id to formulary alternatives
|
||||
*/
|
||||
drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis;
|
||||
drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade;
|
||||
|
||||
create materialized view "Formularies".rxcui_to_brand_through_uspdc AS
|
||||
select distinct
|
||||
rccl.source_rxcui
|
||||
,rccl.linked_rxcui
|
||||
,rccl.category
|
||||
,rccl."class"
|
||||
,rr.tty1
|
||||
--,rr.tty2
|
||||
,rr.rxcui2
|
||||
from "Formularies".rxcui_category_class_links rccl
|
||||
join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui
|
||||
where rr.tty2 = 'BN'
|
||||
;
|
||||
|
||||
/* So this one takes each RXCUI and it's associated RXCUIs from the same
|
||||
category and class, and filters it down to associated RXCUI's that
|
||||
represent brand names.
|
||||
*/
|
||||
|
||||
create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as
|
||||
select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis
|
||||
from match_trials_to_bn_in mttbi
|
||||
join "Formularies".rxcui_to_brand_through_uspdc rtbtu
|
||||
on mttbi.bn_or_in_cui = rtbtu.rxcui2
|
||||
;
|
||||
|
||||
/*
|
||||
This takes the list of ingredients and brands associated with a trial, and
|
||||
links it to the list of alternative brand names.
|
||||
*/
|
||||
|
||||
--renamed the view
|
||||
CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc
|
||||
AS SELECT mtctabr.nct_id,
|
||||
count(*) AS brand_name_counts
|
||||
FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr
|
||||
GROUP BY mtctabr.nct_id;
|
||||
@ -0,0 +1,100 @@
|
||||
|
||||
/* How many trials were included?
|
||||
* How many trial were inspected?
|
||||
* How many trials were reserved for downloaded?
|
||||
* How many trials didn't get included for some technical reason?
|
||||
*
|
||||
********* Data from 2023-03-29 ***********
|
||||
Of Interest 1981
|
||||
Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier
|
||||
Downloaded 1960
|
||||
Incomplete 3 #there were are few http 500 and 404 codes
|
||||
******************************************
|
||||
* Note there were 21 missing trials of interest.
|
||||
* */
|
||||
select status,count(distinct nct_id) from http.download_status ds
|
||||
group by status;
|
||||
|
||||
/* Get a list of trials
|
||||
* -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29)
|
||||
* -- There are currently 1138 trials for which I was able to extract unique snapshots (2023-04-03)
|
||||
* */
|
||||
select count(distinct nct_id) from history.trial_snapshots ts
|
||||
|
||||
/* Get the number of listed conditions
|
||||
* -- There are only 609 listed (MeSH classified) conditions from 284 trials(2023-03-29)
|
||||
* I may need to expand how I address conditions
|
||||
*/
|
||||
select count(*)
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
|
||||
select count(distinct nct_id)
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
|
||||
/*
|
||||
* If I were to expand that to non-coded conditions that would be
|
||||
* 304 trials with 398 conditions
|
||||
* */
|
||||
select count(distinct nct_id)
|
||||
from ctgov.conditions bc
|
||||
where
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
select count(*) from ctgov.conditions c
|
||||
where
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
|
||||
|
||||
/* Get the number of matches from UMLS
|
||||
* There are about 5,808 proposed matches.
|
||||
*
|
||||
*/
|
||||
select count(*) from "DiseaseBurden".trial_to_icd10 tti ;
|
||||
--1383 before run at 8pm 2023-03-29
|
||||
--5209 at 2023-04-03T11:21
|
||||
|
||||
|
||||
/*Get the number of trials that have links to icd10 trials*/
|
||||
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||
group by tti.approved;
|
||||
|
||||
-- finding and removing duplicates from the trial linking stuff. Useful when you redownload trials.
|
||||
/*
|
||||
with CTE as (
|
||||
select row_number() over (partition by nct_id, "condition",ui) as rownum, *
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
)
|
||||
delete from "DiseaseBurden".trial_to_icd10 tti2
|
||||
where id in (
|
||||
select id from cte where rownum > 1
|
||||
);
|
||||
*/
|
||||
|
||||
--get the number of completed vs terminated trials
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved ='accepted' )
|
||||
group by overall_status
|
||||
;
|
||||
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
group by overall_status
|
||||
;
|
||||
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts )
|
||||
group by overall_status
|
||||
;
|
||||
@ -0,0 +1,38 @@
|
||||
--TODO: Document and migrate to setup
|
||||
|
||||
drop table if exists "DiseaseBurden".trial_to_icd10;
|
||||
drop type if exists "DiseaseBurden".validation_type;
|
||||
|
||||
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
|
||||
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
|
||||
|
||||
|
||||
CREATE TABLE "DiseaseBurden".trial_to_icd10 (
|
||||
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
||||
nct_id varchar NOT NULL,
|
||||
"condition" varchar NOT NULL,
|
||||
ui varchar NULL,
|
||||
uri varchar NULL,
|
||||
rootsource varchar NULL,
|
||||
"name" varchar NULL,
|
||||
"source" varchar null,
|
||||
approved "DiseaseBurden".validation_type,
|
||||
approval_timestamp timestamp,
|
||||
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id)
|
||||
);
|
||||
comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.';
|
||||
|
||||
|
||||
|
||||
DROP TABLE if exists "DiseaseBurden".icd10_to_cause;
|
||||
|
||||
CREATE TABLE "DiseaseBurden".icd10_to_cause (
|
||||
id SERIAL NOT NULL ,
|
||||
code varchar NOT NULL,
|
||||
cause_text varchar NOT NULL,
|
||||
CONSTRAINT icd10_to_cause_pk PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,38 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema' -- Replace with your schema name
|
||||
;
|
||||
|
||||
SELECT
|
||||
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'
|
||||
;
|
||||
|
||||
SELECT
|
||||
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||
FROM (
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
column_name || ' ' || data_type ||
|
||||
CASE
|
||||
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||
ELSE ''
|
||||
END ||
|
||||
CASE
|
||||
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||
ELSE ''
|
||||
END as column_definition
|
||||
FROM pg_catalog.pg_tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.schemaname = c.table_schema
|
||||
AND t.tablename = c.table_name
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'-- Replace with your schema name
|
||||
) t
|
||||
GROUP BY schemaname, tablename;
|
||||
@ -0,0 +1,658 @@
|
||||
create extension tablefunc;
|
||||
|
||||
/*Getting Trial Data all together
|
||||
* There are 3 main datasets to join per trial:
|
||||
*
|
||||
* - Trial Data (still need to stick it together)
|
||||
* - Duration and enrollment data
|
||||
* - Compound Marketing (can get for any trial)
|
||||
* - how many individual brands per compound at the start of the trial
|
||||
* - Disease Data (can get for verified trials)
|
||||
* - Population upper limit (Global Burdens of Disease)
|
||||
* - Category (ICD10 2nd level groups)
|
||||
*/
|
||||
|
||||
/*Disease Data*/
|
||||
-- ICD10 Category and GBD data
|
||||
with cte as (
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from trial_to_cause
|
||||
group by nct_id
|
||||
), cte2 as (
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
from trial_to_cause ttc
|
||||
join cte
|
||||
on cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
order by nct_id,ui
|
||||
), cte3 as (
|
||||
select
|
||||
nct_id,
|
||||
substring(cte2.ui for 3) as code,
|
||||
cte2."condition",
|
||||
cte2.cause_text,
|
||||
cte2.cause_id,
|
||||
ic.id as category_id,
|
||||
ic.group_name
|
||||
from cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(cte2.ui for 3) >= ic.start_code
|
||||
)
|
||||
select nct_id, cause_id,category_id
|
||||
from cte3
|
||||
group by nct_id, cause_id, category_id
|
||||
;
|
||||
--still need to link to actual disease burdens.
|
||||
|
||||
/*Compound Marketing Data*/
|
||||
---Number of trials after a certain date
|
||||
with marketing_cte as (
|
||||
select nct_id,count(distinct application_number_or_citation)
|
||||
from public.match_trial_to_marketing_start_date mttmsd
|
||||
where "min" > '2012-06-01'
|
||||
group by nct_id
|
||||
)
|
||||
select * from marketing_cte
|
||||
;
|
||||
|
||||
/*Get versions*/
|
||||
/* Ignore this version
|
||||
with cte1 as (
|
||||
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date < ts.submission_date
|
||||
), cte2 as (
|
||||
select * from cte1
|
||||
group by nct_id, min_version
|
||||
order by nct_id
|
||||
), cte3 as (
|
||||
select
|
||||
ts2.nct_id,
|
||||
ts2."version",
|
||||
ts2.overall_status,
|
||||
ts2.submission_date,
|
||||
ts2.start_date,
|
||||
ts2.enrollment,
|
||||
ts2.enrollment_category,
|
||||
ts2.primary_completion_date,
|
||||
ts2.primary_completion_date_category ,
|
||||
--mv.nct_id,
|
||||
mv.min_version
|
||||
from history.trial_snapshots ts2
|
||||
join cte2 mv
|
||||
on mv.nct_id = ts2.nct_id
|
||||
where
|
||||
ts2."version" = mv.min_version
|
||||
order by ts2.nct_id
|
||||
), cte4 as (
|
||||
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||
from cte3
|
||||
)
|
||||
select avg(submission_presecence)
|
||||
from cte4
|
||||
;
|
||||
--avg 61 day difference
|
||||
*/
|
||||
|
||||
--use this version
|
||||
with cte1 as ( --get trials
|
||||
select nct_id,max("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date > ts.submission_date
|
||||
), cte2 as ( --
|
||||
select * from cte1
|
||||
group by nct_id, min_version
|
||||
order by nct_id
|
||||
), cte3 as (
|
||||
select
|
||||
ts2.nct_id,
|
||||
ts2."version",
|
||||
ts2.overall_status,
|
||||
ts2.submission_date,
|
||||
ts2.start_date,
|
||||
ts2.enrollment,
|
||||
ts2.enrollment_category,
|
||||
ts2.primary_completion_date,
|
||||
ts2.primary_completion_date_category ,
|
||||
--mv.nct_id,
|
||||
mv.min_version
|
||||
from history.trial_snapshots ts2
|
||||
join cte2 mv
|
||||
on mv.nct_id = ts2.nct_id
|
||||
where
|
||||
ts2."version" = mv.min_version
|
||||
order by ts2.nct_id
|
||||
)
|
||||
select *
|
||||
from cte3
|
||||
where
|
||||
enrollment is null
|
||||
or enrollment_category is null
|
||||
or primary_completion_date is null
|
||||
or primary_completion_date_category is null
|
||||
or start_date is null
|
||||
/*, cte4 as (
|
||||
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||
from cte3
|
||||
)
|
||||
select avg(submission_presecence)
|
||||
from cte4
|
||||
; -- -33 day difference on average
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
with cte1_min as (
|
||||
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date <= ts.submission_date
|
||||
),cte1_max as (
|
||||
select nct_id,max("version") over (partition by nct_id) as max_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date >= ts.submission_date
|
||||
), cte2_min as (
|
||||
select * from cte1_min
|
||||
group by nct_id, min_version
|
||||
), cte2_max as (
|
||||
select * from cte1_max
|
||||
group by nct_id, max_version
|
||||
)
|
||||
select *
|
||||
from cte2_min
|
||||
join cte2_max
|
||||
on cte2_min.nct_id=cte2_max.nct_id
|
||||
where min_version >= max_version
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* Neet to take a different tack in filling out the is version of the data.
|
||||
* The idea is that we need the latest of each major category
|
||||
* before the start date.
|
||||
* */
|
||||
|
||||
--get the set of trials which have
|
||||
with cte as (
|
||||
/* Get the absolute difference between the start date and the
|
||||
* submission_date for each version of the trial (measured in days)
|
||||
*
|
||||
*/
|
||||
select
|
||||
s.nct_id,
|
||||
s.start_date,
|
||||
ts."version",
|
||||
ts.submission_date,
|
||||
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||
from ctgov.studies s
|
||||
join history.trial_snapshots ts
|
||||
on s.nct_id = ts.nct_id
|
||||
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
),cte2 as (
|
||||
/* Rank each version based on it's proximity to the start date
|
||||
* */
|
||||
select
|
||||
cte.nct_id,
|
||||
cte."version",
|
||||
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||
cte.submission_date,
|
||||
cte.start_deviance,
|
||||
cte.start_date,
|
||||
ts.primary_completion_date ,
|
||||
ts.primary_completion_date_category ,
|
||||
ts.overall_status ,
|
||||
ts.enrollment ,
|
||||
ts.enrollment_category
|
||||
from cte
|
||||
join history.trial_snapshots ts
|
||||
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||
), cte3_primary_completion as (
|
||||
/* for each trial
|
||||
* select the version with a filled out primary_completion_source
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as primary_completion_source
|
||||
from cte2
|
||||
where cte2.primary_completion_date is not null
|
||||
group by cte2.nct_id
|
||||
), cte3_enrollment as (
|
||||
/* for each trial
|
||||
* select the version with a filled out enrollment
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||
from cte2
|
||||
where cte2.enrollment is not null
|
||||
group by cte2.nct_id
|
||||
), cte4 as (
|
||||
/* join the best options together to get the data of interest.
|
||||
*
|
||||
* On further inspection there are just a view of those, with
|
||||
* many of them having a 7+ month difference between the two versions.
|
||||
* I think I am going to drop them.
|
||||
* */
|
||||
select
|
||||
c3e.nct_id,
|
||||
--c2a.submission_date as submission_date_a,
|
||||
--c2b.submission_date as submission_date_b,
|
||||
--c3e.enrollment_source,
|
||||
c2a."version" as version_a,
|
||||
c2a.enrollment,
|
||||
c2a.enrollment_category,
|
||||
--c3p.primary_completion_source ,
|
||||
c2b."version" as version_b,
|
||||
c2b.primary_completion_date,
|
||||
c2b.primary_completion_date_category
|
||||
from cte3_enrollment c3e
|
||||
join cte2 c2a
|
||||
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||
join cte3_primary_completion c3p
|
||||
on c3e.nct_id = c3p.nct_id
|
||||
join cte2 c2b
|
||||
on c3p.nct_id=c2b.nct_id and c3p.primary_completion_source = c2b.rownum
|
||||
), cte5 as (
|
||||
select nct_id
|
||||
from cte4 where version_a != version_b
|
||||
)
|
||||
select
|
||||
c.nct_id,
|
||||
s2.overall_status,
|
||||
c.enrollment as planned_enrollment,
|
||||
s2.enrollment,
|
||||
s2.start_date,
|
||||
c.primary_completion_date as planned_primary_completion_date,
|
||||
s2.primary_completion_date,
|
||||
extract(epoch from c.primary_completion_date - s2.start_date)/(24*60*60) as planned_duration,
|
||||
s2.primary_completion_date - s2.start_date as actual_duration
|
||||
from cte4 c
|
||||
join ctgov.studies s2
|
||||
on c.nct_id = s2.nct_id
|
||||
where c.nct_id not in (select nct_id from cte5)
|
||||
;
|
||||
|
||||
|
||||
/*
|
||||
* Concern about causal inference
|
||||
*
|
||||
* When putting the data together for CBO it looked like we got occasional updates about
|
||||
* the status of trials that included enrollment updates.
|
||||
* That doesn't appear to be the case, but that messes with the ability to causally identify
|
||||
* any results. I need to be careful about this data is used.
|
||||
*
|
||||
* I created the statements below to get the data that I need.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
----get the set of trial snapshots
|
||||
create or replace view public.view_cte as
|
||||
select
|
||||
nct_id,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
enrollment,
|
||||
start_date,
|
||||
enrollment_category ,
|
||||
overall_status,
|
||||
--count("version"),
|
||||
min(submission_date) as earliest_date_observed
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved='accepted')
|
||||
and submission_date >= start_date
|
||||
and overall_status not in ('Completed','Terminated')
|
||||
group by
|
||||
nct_id,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
start_date,
|
||||
enrollment,
|
||||
enrollment_category ,
|
||||
overall_status
|
||||
;
|
||||
create or replace view public.view_disbur_cte0 as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
;
|
||||
create or replace view public.view_trial_to_cause as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
order by nct_id
|
||||
;--does this duplicate the view above?
|
||||
|
||||
create or replace view public.view_disbur_cte as
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from view_disbur_cte0
|
||||
group by nct_id
|
||||
|
||||
;
|
||||
create or replace view public.view_disbur_cte2 as
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
disbur_cte.max_level
|
||||
from view_trial_to_cause ttc
|
||||
join view_disbur_cte as disbur_cte
|
||||
on disbur_cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=disbur_cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
disbur_cte.max_level
|
||||
order by nct_id,ui
|
||||
;
|
||||
create or replace view public.view_disbur_cte3 as
|
||||
select
|
||||
nct_id,
|
||||
substring(disbur_cte2.ui for 3) as code,
|
||||
disbur_cte2."condition",
|
||||
disbur_cte2.cause_text,
|
||||
disbur_cte2.cause_id,
|
||||
ic.chapter_code as category_id,
|
||||
ic.group_name,
|
||||
disbur_cte2.max_level
|
||||
from view_disbur_cte2 as disbur_cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(disbur_cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(disbur_cte2.ui for 3) >= ic.start_code
|
||||
where ic."level" = 1
|
||||
|
||||
;
|
||||
create or replace view public.view_burdens_cte as
|
||||
select *
|
||||
from "DiseaseBurden".burdens b
|
||||
where b.sex_id = 3 --both sexes
|
||||
and b.metric_id = 1 --number/count
|
||||
and b.measure_id = 2 --DALYs
|
||||
and b.age_id =22 --all ages
|
||||
;
|
||||
create or replace view public.view_burdens_cte2 as
|
||||
select
|
||||
--c1.location_id,
|
||||
c1.cause_id,
|
||||
c1."year",
|
||||
--high sdi
|
||||
c1.val as h_sdi_val,
|
||||
c1.upper_95 as h_sdi_u95,
|
||||
c1.lower_95 as h_sdi_l95,
|
||||
--high-middle sdi
|
||||
c2.val as hm_sdi_val,
|
||||
c2.upper_95 as hm_sdi_u95,
|
||||
c2.lower_95 as hm_sdi_l95,
|
||||
--middle sdi
|
||||
c3.val as m_sdi_val,
|
||||
c3.upper_95 as m_sdi_u95,
|
||||
c3.lower_95 as m_sdi_l95,
|
||||
--low-middle sdi
|
||||
c4.val as lm_sdi_val,
|
||||
c4.upper_95 as lm_sdi_u95,
|
||||
c4.lower_95 as lm_sdi_l95,
|
||||
--low sdi
|
||||
c5.val as l_sdi_val,
|
||||
c5.upper_95 as l_sdi_u95,
|
||||
c5.lower_95 as l_sdi_l95
|
||||
from view_burdens_cte c1
|
||||
join view_burdens_cte c2
|
||||
on c1.cause_id = c2.cause_id
|
||||
and c1."year" = c2."year"
|
||||
join view_burdens_cte c3
|
||||
on c1.cause_id = c3.cause_id
|
||||
and c1."year" = c3."year"
|
||||
join view_burdens_cte c4
|
||||
on c1.cause_id = c4.cause_id
|
||||
and c1."year" = c4."year"
|
||||
join view_burdens_cte c5
|
||||
on c1.cause_id = c5.cause_id
|
||||
and c1."year" = c5."year"
|
||||
where c1.location_id = 44635
|
||||
and c2.location_id = 44634
|
||||
and c3.location_id = 44639
|
||||
and c4.location_id = 44636
|
||||
and c5.location_id = 44637
|
||||
;
|
||||
--drop view if exists public.formatted_data cascade;
|
||||
create or replace view public.formatted_data as
|
||||
select
|
||||
cte.nct_id,
|
||||
cte.start_date,
|
||||
cte.enrollment as current_enrollment,
|
||||
cte.enrollment_category,
|
||||
cte.overall_status as current_status,
|
||||
cte.earliest_date_observed,
|
||||
extract( epoch from (cte.earliest_date_observed - cte.start_date))/extract( epoch from (cte.primary_completion_date - cte.start_date)) as elapsed_duration
|
||||
,count(distinct mttmsd."application_number_or_citation") as n_brands
|
||||
,dbc3.code
|
||||
,dbc3."condition"
|
||||
,dbc3.cause_text
|
||||
,dbc3.cause_id
|
||||
,dbc3.category_id
|
||||
,dbc3.group_name
|
||||
,dbc3.max_level
|
||||
--c1.location_id,
|
||||
--,b.cause_id
|
||||
,b."year",
|
||||
--high sdi
|
||||
b.h_sdi_val,
|
||||
b.h_sdi_u95,
|
||||
b.h_sdi_l95,
|
||||
--high-middle sdi
|
||||
b.hm_sdi_val,
|
||||
b.hm_sdi_u95,
|
||||
b.hm_sdi_l95,
|
||||
--middle sdi
|
||||
b.m_sdi_val,
|
||||
b.m_sdi_u95,
|
||||
b.m_sdi_l95,
|
||||
--low-middle sdi
|
||||
b.lm_sdi_val,
|
||||
b.lm_sdi_u95,
|
||||
b.lm_sdi_l95,
|
||||
--low sdi
|
||||
b.l_sdi_val,
|
||||
b.l_sdi_u95,
|
||||
b.l_sdi_l95
|
||||
from view_cte as cte
|
||||
join public.match_trial_to_marketing_start_date mttmsd
|
||||
on cte.nct_id = mttmsd."nct_id"
|
||||
join view_disbur_cte3 dbc3
|
||||
on dbc3.nct_id = cte.nct_id
|
||||
join view_burdens_cte2 b
|
||||
on b.cause_id = dbc3.cause_id and extract(year from b."year") = extract(year from cte.earliest_date_observed)
|
||||
where
|
||||
mttmsd."min" <= cte.earliest_date_observed
|
||||
group by
|
||||
cte.nct_id,
|
||||
cte.start_date,
|
||||
cte.enrollment,
|
||||
cte.enrollment_category,
|
||||
cte.overall_status,
|
||||
cte.earliest_date_observed,
|
||||
elapsed_duration
|
||||
,dbc3.code
|
||||
,dbc3."condition"
|
||||
,dbc3.cause_text
|
||||
,dbc3.cause_id
|
||||
,dbc3.category_id
|
||||
,dbc3.group_name
|
||||
,dbc3.max_level
|
||||
--c1.location_id,
|
||||
,b.cause_id,
|
||||
b."year",
|
||||
--high sdi
|
||||
b.h_sdi_val,
|
||||
b.h_sdi_u95,
|
||||
b.h_sdi_l95,
|
||||
--high-middle sdi
|
||||
b.hm_sdi_val,
|
||||
b.hm_sdi_u95,
|
||||
b.hm_sdi_l95,
|
||||
--middle sdi
|
||||
b.m_sdi_val,
|
||||
b.m_sdi_u95,
|
||||
b.m_sdi_l95,
|
||||
--low-middle sdi
|
||||
b.lm_sdi_val,
|
||||
b.lm_sdi_u95,
|
||||
b.lm_sdi_l95,
|
||||
--low sdi
|
||||
b.l_sdi_val,
|
||||
b.l_sdi_u95,
|
||||
b.l_sdi_l95
|
||||
order by cte.nct_id ,cte.earliest_date_observed
|
||||
;--used this one 2023-04-05
|
||||
--get the planned enrollment
|
||||
create or replace view public.time_between_submission_and_start_view as
|
||||
/* Get the absolute difference between the start date and the
|
||||
* submission_date for each version of the trial (measured in days)
|
||||
*
|
||||
*/
|
||||
select
|
||||
s.nct_id,
|
||||
s.start_date,
|
||||
ts."version",
|
||||
ts.submission_date,
|
||||
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||
from ctgov.studies s
|
||||
join history.trial_snapshots ts
|
||||
on s.nct_id = ts.nct_id
|
||||
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
;
|
||||
create or replace view rank_proximity_to_start_time_view as
|
||||
/* Rank each version based on it's proximity to the start date
|
||||
* */
|
||||
select
|
||||
cte.nct_id,
|
||||
cte."version",
|
||||
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||
cte.submission_date,
|
||||
cte.start_deviance,
|
||||
cte.start_date,
|
||||
ts.primary_completion_date ,
|
||||
ts.primary_completion_date_category ,
|
||||
ts.overall_status ,
|
||||
ts.enrollment ,
|
||||
ts.enrollment_category
|
||||
from public.time_between_submission_and_start_view cte
|
||||
join history.trial_snapshots ts
|
||||
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||
;
|
||||
create or replace view enrollment_closest_to_start_view as
|
||||
/* for each trial
|
||||
* select the version with a filled out enrollment
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||
from rank_proximity_to_start_time_view cte2
|
||||
where cte2.enrollment is not null
|
||||
group by cte2.nct_id
|
||||
;
|
||||
--drop view public.formatted_data_with_planned_enrollment ;
|
||||
create or replace view formatted_data_with_planned_enrollment as
|
||||
select
|
||||
f.*,
|
||||
s.overall_status as final_status,
|
||||
c2a."version",
|
||||
c2a.enrollment as planned_enrollment
|
||||
from formatted_data f
|
||||
join ctgov.studies s
|
||||
on f.nct_id = s.nct_id
|
||||
join enrollment_closest_to_start_view c3e
|
||||
on c3e.nct_id = f.nct_id
|
||||
join rank_proximity_to_start_time_view c2a
|
||||
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||
;
|
||||
select * from formatted_data_with_planned_enrollment
|
||||
|
||||
-------------------GET COUNTS------------------
|
||||
select count(distinct nct_id) from public.view_cte; --88
|
||||
select count(distinct nct_id) from public.view_disbur_cte0; --130
|
||||
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||
select count(distinct nct_id) from public.view_disbur_cte;--130
|
||||
select count(distinct nct_id) from public.view_disbur_cte2;--130
|
||||
select count(distinct nct_id) from public.view_disbur_cte3;--130
|
||||
select count(distinct nct_id) from public.formatted_data; --48 probably because there are so many trials that don't fall into a GBD category/cause
|
||||
select count(distinct nct_id) from public.time_between_submission_and_start_view;--1067
|
||||
select count(distinct nct_id) from rank_proximity_to_start_time_view;--1067
|
||||
select count(distinct nct_id) from enrollment_closest_to_start_view;--1067
|
||||
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||
|
||||
|
||||
|
||||
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||
|
||||
|
||||
--get durations and count snapshots per trial per trial
|
||||
with cte1 as (
|
||||
select
|
||||
nct_id,
|
||||
start_date ,
|
||||
primary_completion_date,
|
||||
overall_status ,
|
||||
primary_completion_date - start_date as duration
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from http.download_status ds)
|
||||
), cte2 as (
|
||||
select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
|
||||
group by nct_id
|
||||
)
|
||||
select a.nct_id, a.overall_status, a.duration,b.snapshot_count
|
||||
from cte1 as a
|
||||
join cte2 as b
|
||||
on a.nct_id=b.nct_id
|
||||
;
|
||||
@ -0,0 +1,104 @@
|
||||
select * from "DiseaseBurden".icd10_to_cause itc ;
|
||||
select * from "DiseaseBurden".cause c ;
|
||||
|
||||
|
||||
|
||||
|
||||
select c.id, count(distinct code)
|
||||
from "DiseaseBurden".cause c
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on c.cause = itc.cause_text
|
||||
group by c.id
|
||||
order by c.id
|
||||
;
|
||||
|
||||
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||
group by tti.approved;
|
||||
|
||||
select nct_id, "condition", ui
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved = 'accepted';
|
||||
|
||||
drop view trial_to_cause;
|
||||
|
||||
---Link trials to their causes
|
||||
|
||||
create temp view trial_to_cause as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
order by nct_id
|
||||
;
|
||||
|
||||
select distinct nct_id, count(*), min("level"), max("level")
|
||||
from trial_to_cause ttc
|
||||
group by nct_id
|
||||
;
|
||||
|
||||
select nct_id,cause_text,cause_id from trial_to_cause
|
||||
where level = 3
|
||||
group by nct_id,cause_text,cause_id
|
||||
order by cause_id
|
||||
;
|
||||
|
||||
select cause_id,"condition",cause_text,count(distinct nct_id) as c
|
||||
from trial_to_cause
|
||||
where level >= 3
|
||||
group by cause_id,"condition",cause_text
|
||||
--having count(distinct nct_id) > 2
|
||||
order by cause_id
|
||||
;
|
||||
|
||||
with cte as (
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from trial_to_cause
|
||||
group by nct_id
|
||||
), cte2 as (
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
from trial_to_cause ttc
|
||||
join cte
|
||||
on cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
order by nct_id,ui
|
||||
), cte3 as (
|
||||
select
|
||||
nct_id,
|
||||
substring(cte2.ui for 3) as code,
|
||||
cte2."condition",
|
||||
cte2.cause_text,
|
||||
cte2.cause_id,
|
||||
ic.id as category_id,
|
||||
ic.group_name
|
||||
from cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(cte2.ui for 3) >= ic.start_code
|
||||
)
|
||||
select nct_id, cause_id,category_id
|
||||
from cte3
|
||||
group by nct_id, cause_id, category_id
|
||||
;
|
||||
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
|
||||
drop view if exists public.match_trial_to_marketing_start_date;
|
||||
DROP VIEW if exists public.match_trial_to_ndc11;
|
||||
drop view if exists public.match_trials_to_bn_in;
|
||||
|
||||
drop view if exists history.match_drugs_to_trials;
|
||||
DROP TABLE IF EXISTS history.trial_snapshots;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS history.trial_snapshots
|
||||
(
|
||||
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
|
||||
version integer NOT NULL,
|
||||
submission_date timestamp without time zone,
|
||||
primary_completion_date timestamp without time zone,
|
||||
primary_completion_date_category history.updatable_catetories,
|
||||
start_date timestamp without time zone,
|
||||
start_date_category history.updatable_catetories,
|
||||
completion_date timestamp without time zone,
|
||||
completion_date_category history.updatable_catetories,
|
||||
overall_status history.study_statuses,
|
||||
enrollment integer,
|
||||
enrollment_category history.updatable_catetories,
|
||||
sponsor character varying COLLATE pg_catalog."default",
|
||||
responsible_party character varying COLLATE pg_catalog."default",
|
||||
CONSTRAINT trial_snapshots_pkey PRIMARY KEY (nct_id, version)
|
||||
);
|
||||
|
||||
|
||||
ALTER TABLE IF EXISTS history.trial_snapshots
|
||||
OWNER to root;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials
|
||||
AS SELECT bi.nct_id,
|
||||
rp.rxcui,
|
||||
rp.propvalue1
|
||||
FROM ctgov.browse_interventions bi
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
WHERE rp.propname::text = 'RxNorm Name'::text AND (bi.nct_id::text IN ( SELECT trial_snapshots.nct_id
|
||||
FROM history.trial_snapshots));
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trials_to_bn_in
|
||||
AS WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
)
|
||||
SELECT bi.nct_id,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2,
|
||||
rr.rxcui2 AS bn_or_in_cui,
|
||||
count(*) AS count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||
ORDER BY bi.nct_id;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_ndc11
|
||||
AS SELECT mttbi.nct_id,
|
||||
ah.ndc,
|
||||
count(*) AS count
|
||||
FROM match_trials_to_bn_in mttbi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON mttbi.bn_or_in_cui = rr.rxcui1
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON rr.rxcui2 = ah.rxcui
|
||||
WHERE rr.tty1 = 'BN'::bpchar AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ah.sab::text = 'RXNORM'::text
|
||||
GROUP BY mttbi.nct_id, ah.ndc
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date
|
||||
AS SELECT mttn.nct_id,
|
||||
n.application_number_or_citation,
|
||||
min(n.marketing_start_date) AS min
|
||||
FROM match_trial_to_ndc11 mttn
|
||||
JOIN spl.nsde n ON mttn.ndc = n.package_ndc11::bpchar
|
||||
WHERE n.product_type::text = 'HUMAN PRESCRIPTION DRUG'::text AND (n.marketing_category::text = ANY (ARRAY['NDA'::character varying, 'ANDA'::character varying, 'BLA'::character varying, 'NDA authorized generic'::character varying, 'NDA AUTHORIZED GENERIC'::character varying]::text[]))
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation
|
||||
ORDER BY mttn.nct_id;
|
||||
|
||||
@ -0,0 +1,308 @@
|
||||
select * from formatted_data_with_planned_enrollment fdwpe
|
||||
;
|
||||
|
||||
|
||||
select * from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
select count(distinct condition ) from formatted_data_mat fdm
|
||||
|
||||
select nct_id, fdm.current_status , count(*)
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id , fdm.current_status
|
||||
order by nct_id
|
||||
;
|
||||
|
||||
select * from formatted_data_mat fdm ;
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
;
|
||||
|
||||
select count(distinct category_id ) from
|
||||
|
||||
|
||||
select distinct category_id from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
; --TODO: join to usp dc dataset
|
||||
|
||||
|
||||
|
||||
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
), nct_to_cui AS (
|
||||
SELECT bi.nct_id,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2,
|
||||
rr.rxcui2 AS approved_drug_rxcui,
|
||||
count(*) AS count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||
)
|
||||
SELECT nct_to_cui.nct_id,
|
||||
ud."USP Category",
|
||||
ud."USP Class"
|
||||
FROM nct_to_cui
|
||||
JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
|
||||
GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
|
||||
ORDER BY nct_to_cui.nct_id;
|
||||
|
||||
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
|
||||
AS
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
)
|
||||
SELECT
|
||||
bi.nct_id,
|
||||
count( distinct rr2.rxcui2 ) as brand_name_count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis
|
||||
WHERE
|
||||
(bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list
|
||||
AND
|
||||
bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
|
||||
AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/
|
||||
AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
|
||||
AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
|
||||
GROUP BY bi.nct_id --remove duplicates
|
||||
;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
select
|
||||
fdqpe.nct_id
|
||||
--,fdqpe.start_date
|
||||
--,fdqpe.current_enrollment
|
||||
--,fdqpe.enrollment_category
|
||||
,fdqpe.current_status
|
||||
,fdqpe.earliest_date_observed
|
||||
,fdqpe.elapsed_duration
|
||||
,fdqpe.n_brands as identical_brands
|
||||
,ntbtu.brand_name_count
|
||||
,fdqpe.category_id
|
||||
,fdqpe.final_status
|
||||
,fdqpe.h_sdi_val
|
||||
--,fdqpe.h_sdi_u95
|
||||
--,fdqpe.h_sdi_l95
|
||||
,fdqpe.hm_sdi_val
|
||||
--,fdqpe.hm_sdi_u95
|
||||
--,fdqpe.hm_sdi_l95
|
||||
,fdqpe.m_sdi_val
|
||||
--,fdqpe.m_sdi_u95
|
||||
--,fdqpe.m_sdi_l95
|
||||
,fdqpe.lm_sdi_val
|
||||
--,fdqpe.lm_sdi_u95
|
||||
--,fdqpe.lm_sdi_l95
|
||||
,fdqpe.l_sdi_val
|
||||
--,fdqpe.l_sdi_u95
|
||||
--,fdqpe.l_sdi_l95
|
||||
from formatted_data_mat fdqpe
|
||||
join "Formularies".nct_to_brands_through_uspdc ntbtu
|
||||
on fdqpe.nct_id = ntbtu.nct_id
|
||||
;
|
||||
|
||||
--example of multiple reopenings
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
where nct_id = 'NCT01239797'
|
||||
|
||||
--attempt to automatically find transition periods
|
||||
with cte1 as (
|
||||
select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status = 'Active, not recruiting'
|
||||
), cte2 as (
|
||||
select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status != 'Active, not recruiting'
|
||||
)
|
||||
select
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment
|
||||
from cte1
|
||||
join cte2 on cte1.nct_id = cte2.nct_id
|
||||
/*group by
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* So ocassionally a study reopens enrollment.
|
||||
* If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
|
||||
* to get the transitions
|
||||
* Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change.
|
||||
*
|
||||
* Better yet, just get the last shift to ANR.
|
||||
* */
|
||||
|
||||
|
||||
/* Take each entry and get the status from a lagged snapshot
|
||||
* Then select each snapshot moving from previous_state to ANR
|
||||
* and filter out everything except the last one.
|
||||
* */
|
||||
with cte as (
|
||||
select
|
||||
nct_id
|
||||
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
||||
,current_status
|
||||
,earliest_date_observed as date_current
|
||||
from formatted_data_mat fdm
|
||||
), cte2 as (
|
||||
select
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,max(date_current) as date_current_max
|
||||
from cte
|
||||
where
|
||||
previous_status != current_status
|
||||
and
|
||||
current_status = 'Active, not recruiting'
|
||||
group by
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,date_current
|
||||
)
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
join cte2
|
||||
on cte2.nct_id = fdm.nct_id
|
||||
and cte2.date_current_max = fdm.earliest_date_observed
|
||||
; --join back into
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Uses
|
||||
#
|
||||
# Defauls
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: pg_export container_name [database_name] [username]"
|
||||
return 1
|
||||
fi
|
||||
|
||||
CONTAINER=$1
|
||||
DBNAME=${2:-aact_db}
|
||||
USER=${3:-root}
|
||||
|
||||
#
|
||||
# for sqlfile in ../export/export_data_*.sql; do
|
||||
# if [[ -f "$sqlfile" ]]; then
|
||||
# outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||
# # podman exec -t "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f - < "$sqlfile" > "$outfile"
|
||||
# # podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f "$sqlfile" > "$outfile"
|
||||
# podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||
# podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f /tmp/query.sql > "$outfile"
|
||||
# fi
|
||||
# done
|
||||
#
|
||||
|
||||
for sqlfile in ../export/export_data_*.sql; do
|
||||
if [[ -f "$sqlfile" ]]; then
|
||||
outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||
podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||
podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -f "/tmp/query.sql" > "$outfile"
|
||||
fi
|
||||
done
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
/***************CREATE VIEWS*******************/
|
||||
create or replace view
|
||||
history.match_drugs_to_trials as
|
||||
select nct_id, rxcui, propvalue1
|
||||
from
|
||||
ctgov.browse_interventions as bi
|
||||
join
|
||||
rxnorm_migrated.rxnorm_props as rp
|
||||
on bi.downcase_mesh_term = rp.propvalue1
|
||||
where
|
||||
propname='RxNorm Name'
|
||||
and
|
||||
nct_id in (select nct_id from history.trial_snapshots)
|
||||
;
|
||||
|
||||
|
||||
/********************IN DEVLEOPMENT*********************/
|
||||
|
||||
/* Get the count of brand names attached to each trial
|
||||
* I should develop this into a view that matches trials to brands
|
||||
* then create a view that gets the counts.
|
||||
*/
|
||||
select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr
|
||||
where
|
||||
rxcui1 in (select rxcui from history.match_drugs_to_trials)
|
||||
and
|
||||
tty2 = 'BN'
|
||||
group by rxcui1
|
||||
order by count(rxcui2) desc
|
||||
;
|
||||
@ -0,0 +1,3 @@
|
||||
# TODO
|
||||
|
||||
Code up a data extraction tool that uses llama3 or a similar quality source to extract the data that I need from the extended aact_database
|
||||
@ -1,44 +0,0 @@
|
||||
SELECT why_stopped FROM ctgov.studies
|
||||
WHERE why_stopped IS NOT NULL
|
||||
LIMIT 100;
|
||||
|
||||
SELECT study_type, count(*) from ctgov.studies
|
||||
group by study_type;
|
||||
|
||||
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
|
||||
GROUP BY is_fda_regulated_drug;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Note that there is a decent number of trials that have expanded access
|
||||
*/
|
||||
SELECT
|
||||
study_type
|
||||
, phase
|
||||
, has_expanded_access
|
||||
, has_dmc
|
||||
, count(*)
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug is true
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
start_date > date('2007-01-01')
|
||||
group by study_type, phase, has_expanded_access, has_dmc;
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Find different mesh terms as assigned by clinicaltrials.gov
|
||||
*/
|
||||
select * from ctgov.browse_conditions
|
||||
order by nct_id desc,mesh_type
|
||||
limit 200;
|
||||
|
||||
select * from ctgov.browse_interventions
|
||||
order by nct_id desc
|
||||
limit 200;
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
import psycopg2 as psyco
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.cluster import KMeans
|
||||
import re
|
||||
|
||||
|
||||
def preporcess_text(text):
|
||||
|
||||
text = text.lower()
|
||||
text = re.sub("[^A-Za-z]+", " ", text)
|
||||
#make tokens
|
||||
tokens = nltk.word_tokenize(text)
|
||||
|
||||
#remove stopwords
|
||||
tokens = [ w for w in tokens if not w in stopwords.words("english")]
|
||||
|
||||
#rejoin
|
||||
return " ".join(tokens).strip()
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
|
||||
|
||||
curse = conn.cursor()
|
||||
|
||||
curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
|
||||
results = curse.fetchall()
|
||||
|
||||
curse.close()
|
||||
conn.close()
|
||||
|
||||
data = pd.DataFrame(results, columns = ["corpus"])
|
||||
data["cleaned"] = data.corpus.apply(preporcess_text)
|
||||
|
||||
vectorizer = TfidfVectorizer(sublinear_tf=True)
|
||||
|
||||
X = vectorizer.fit_transform(data.cleaned)
|
||||
|
||||
kmeans = KMeans(n_clusters=10, random_state=11021585)
|
||||
kmeans.fit(X)
|
||||
|
||||
data["cluster"] = kmeans.labels_
|
||||
|
||||
print(data.groupby(["cluster"])["cleaned"].count())
|
||||
|
||||
|
||||
@ -0,0 +1 @@
|
||||
backup/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
RESTORE_DUMP_GZ="${1:-aact_db_backup_20250107_133822.sql.gz}"
|
||||
POSTGRES_USER=root
|
||||
POSTGRES_PASSWORD=root
|
||||
POSTGRES_DB=aact_db
|
||||
|
||||
CONTAINER_NAME="${POSTGRES_DB}-restored-$(date -I)"
|
||||
|
||||
#start container
|
||||
podman run \
|
||||
-e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
|
||||
-e POSTGRES_USER="${POSTGRES_USER}" \
|
||||
-e POSTGRES_DB="${POSTGRES_DB}" \
|
||||
--name "${CONTAINER_NAME}" \
|
||||
--detach \
|
||||
--network research-network \
|
||||
--shm-size=512mb \
|
||||
--volume ./backup/:/backup/ \
|
||||
-p 5432:5432\
|
||||
postgres:14-alpine
|
||||
|
||||
|
||||
sleep 10
|
||||
|
||||
# Function to check if PostgreSQL is ready
|
||||
function check_postgres {
|
||||
podman exec -i "${CONTAINER_NAME}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1
|
||||
}
|
||||
|
||||
# Wait for PostgreSQL to be ready
|
||||
until check_postgres; do
|
||||
echo "Waiting for PostgreSQL to be ready..."
|
||||
sleep 4
|
||||
done
|
||||
|
||||
echo "PostgreSQL is ready. Restoring the database..."
|
||||
|
||||
# Decompress the dump file and restore it to the database
|
||||
podman exec -i "${CONTAINER_NAME}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}"
|
||||
|
||||
echo "Database restoration complete."
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,31 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
shm_size: '4gb' #adjust the shared memeory /dev/shm when running
|
||||
#https://stackoverflow.com/questions/30210362/how-to-increase-the-size-of-the-dev-shm-in-docker-container
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: root
|
||||
POSTGRES_DB: aact_db
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./db_store/:/var/lib/postgresql/
|
||||
# this is the database dump to restore from
|
||||
- ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
env_file:
|
||||
../.env
|
||||
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
--Create ctti user and grant permissions
|
||||
CREATE ROLE ctti;
|
||||
GRANT ALL PRIVILEGES ON DATABASE aact_db TO ctti;
|
||||
|
||||
/*
|
||||
Add the root user if it doesn't exist.
|
||||
With the default configuration this shouldn't be an issue,
|
||||
but I can see myself forgetting and changing the default POSTGRES_USER
|
||||
*/
|
||||
DO LANGUAGE plpgsql
|
||||
$do$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT FROM pg_catalog.pg_roles -- SELECT list can be empty for this
|
||||
WHERE rolname = 'root')
|
||||
THEN
|
||||
CREATE ROLE root LOGIN PASSWORD 'root'; --SECURITY ISSUE
|
||||
GRANT ALL PRIVILEGES ON DATABASE aact_db TO root;
|
||||
END IF;
|
||||
END
|
||||
$do$
|
||||
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -x
|
||||
|
||||
|
||||
# This file loads the database dump into the postgres database
|
||||
|
||||
dump_path="/mnt/host_data/postgres_data.dmp"
|
||||
|
||||
#Double check the postgres user and database are set
|
||||
if [[ -z $POSTGRES_USER && -z $POSTGRES_DB ]]; #if either POSTGRES_USER or POSTGRES_DB are empty, throw an error.
|
||||
then
|
||||
echo "Missing either the POSTGRESS_USER or the POSTGRES_DB environment variable"
|
||||
exit 4
|
||||
else
|
||||
#restore the DB
|
||||
pg_restore -e -v -O -x --dbname="$POSTGRES_DB" --username="$POSTGRES_USER" --no-owner "$dump_path"
|
||||
fi
|
||||
@ -0,0 +1,127 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA history;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
CREATE ROLE history_writer;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_writer;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_writer;
|
||||
|
||||
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
|
||||
|
||||
|
||||
--Create role for anyone who only needs selection access to historical data, such as for analysis
|
||||
CREATE ROLE history_reader;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_reader;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_reader;
|
||||
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
|
||||
|
||||
|
||||
/* History Tables
|
||||
Below is where I would construct the parsed trial history tables that I need.
|
||||
|
||||
Possible fields
|
||||
nct_id
|
||||
version
|
||||
--Study Status
|
||||
overall_status^
|
||||
primary_completion_date^
|
||||
completion_date^
|
||||
last_update_submitted_date
|
||||
--SponsorCollaborators
|
||||
sponsor (multi?)
|
||||
collaborators (multi?)
|
||||
--Oversight
|
||||
fda_regulated_drug (ignore)
|
||||
fda_regulated_device (ignore)
|
||||
dmc (ignore)
|
||||
--StuldyDescription
|
||||
summary
|
||||
detailed_description
|
||||
--Conditions
|
||||
Conditions
|
||||
Keywords
|
||||
--StudyDesign
|
||||
Study type
|
||||
Primary Purpose
|
||||
Study Phase
|
||||
Interventional Study Model
|
||||
Number of Arms
|
||||
Masking
|
||||
Allocation
|
||||
Enrollment ^
|
||||
--ArmsAndInterventions
|
||||
Arms (multiple) (Ignore)
|
||||
--ProtocolOutcomeMeasures
|
||||
--Eligibility
|
||||
--ContactsLocation
|
||||
--IPDSharing
|
||||
--References
|
||||
--ParticipantFlow
|
||||
--BaselineCharacteristics
|
||||
--ROutcomeMeasures
|
||||
--AdverseEvents
|
||||
--LimitationsAndCaveats
|
||||
--More Information
|
||||
|
||||
*/
|
||||
|
||||
CREATE TYPE history.updatable_catetories AS ENUM
|
||||
('Actual', 'Anticipated', 'Expected');
|
||||
|
||||
ALTER TYPE history.updatable_catetories
|
||||
OWNER TO root;
|
||||
|
||||
COMMENT ON TYPE history.updatable_catetories
|
||||
IS 'This enum is used to capture the different types of categories that a date or enrollemnt figure may have.';
|
||||
|
||||
|
||||
|
||||
CREATE TYPE history.study_statuses AS ENUM
|
||||
('Available', 'Withdrawn', 'Withheld', 'Temporarily not available', 'Active, not recruiting', 'Recruiting', 'Not yet recruiting', 'Enrolling by invitation', 'Suspended', 'No longer available', 'Approved for marketing', 'Unknown status', 'Completed', 'Terminated');
|
||||
|
||||
ALTER TYPE history.study_statuses
|
||||
OWNER TO root;
|
||||
|
||||
COMMENT ON TYPE history.study_statuses
|
||||
IS 'This enum is used to record study status. These are pulled from the ClinicalTrials.gov documentation.';
|
||||
|
||||
|
||||
-- Table: history.trial_snapshots
|
||||
|
||||
DROP TABLE IF EXISTS history.trial_snapshots;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS history.trial_snapshots
|
||||
(
|
||||
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
|
||||
version integer NOT NULL,
|
||||
submission_date timestamp NOT NULL,
|
||||
primary_completion_date timestamp without time zone,
|
||||
primary_completion_date_category history.updatable_catetories,
|
||||
start_date timestamp without time zone,
|
||||
start_date_category history.updatable_catetories,
|
||||
completion_date timestamp without time zone,
|
||||
completion_date_category history.updatable_catetories,
|
||||
overall_status history.study_statuses,
|
||||
enrollment integer,
|
||||
enrollment_category history.updatable_catetories,
|
||||
sponsor character varying COLLATE pg_catalog."default",
|
||||
responsible_party character varying COLLATE pg_catalog."default",
|
||||
CONSTRAINT trial_snapshots_pkey PRIMARY KEY (nct_id, version)
|
||||
);
|
||||
|
||||
|
||||
ALTER TABLE IF EXISTS history.trial_snapshots
|
||||
OWNER to root;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials
|
||||
AS SELECT bi.nct_id,
|
||||
rp.rxcui,
|
||||
rp.propvalue1
|
||||
FROM ctgov.browse_interventions bi
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
WHERE rp.propname::text = 'RxNorm Name'::text AND (bi.nct_id::text IN ( SELECT trial_snapshots.nct_id
|
||||
FROM history.trial_snapshots));
|
||||
@ -0,0 +1,40 @@
|
||||
CREATE SCHEMA spl AUTHORIZATION root;
|
||||
|
||||
DROP TABLE IF EXISTS spl.nsde;
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS spl.nsde_id_seq
|
||||
INCREMENT 1
|
||||
START 1
|
||||
MINVALUE 1
|
||||
MAXVALUE 9223372036854775807
|
||||
CACHE 1;
|
||||
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNER TO root;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS spl.nsde
|
||||
(
|
||||
id integer NOT NULL DEFAULT nextval('spl.nsde_id_seq'::regclass),
|
||||
package_ndc11 character varying(11) COLLATE pg_catalog."default",
|
||||
application_number_or_citation character varying(25) COLLATE pg_catalog."default",
|
||||
package_ndc character varying(50) COLLATE pg_catalog."default",
|
||||
proprietary_name character varying(500) COLLATE pg_catalog."default",
|
||||
product_type character varying(90) COLLATE pg_catalog."default",
|
||||
marketing_category character varying(160) COLLATE pg_catalog."default",
|
||||
dosage_form character varying(155) COLLATE pg_catalog."default",
|
||||
billing_unit character varying(35) COLLATE pg_catalog."default",
|
||||
marketing_start_date date,
|
||||
marketing_end_date date,
|
||||
inactivation_date date,
|
||||
reactivation_date date,
|
||||
CONSTRAINT nsde_pkey PRIMARY KEY (id)
|
||||
)
|
||||
|
||||
TABLESPACE pg_default;
|
||||
|
||||
ALTER TABLE IF EXISTS spl.nsde
|
||||
OWNER to root;
|
||||
|
||||
-- if the table is dropped, the sequence is as well
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNED BY spl.nsde.id;
|
||||
@ -0,0 +1,6 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA rxnorm_migrated;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA rxnorm_migrated TO root;
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,7 @@
|
||||
# Instructions:
|
||||
Go go [RxNavInABox](https://lhncbc.nlm.nih.gov/RxNav/applications/RxNav-in-a-Box.html) and download the most recent version.
|
||||
|
||||
I have included the version I use.
|
||||
|
||||
Then unzip and run docker-compose.yaml
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
external: true
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
restart: "no"
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
VERSION: podman
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./AACT_downloader/postgresql/:/var/lib/postgresql/
|
||||
# this is the database dump to restore from
|
||||
- ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
shm_size: 512mb
|
||||
|
||||
|
||||
rxnav-db:
|
||||
image: mariadb:10.4
|
||||
restart: "no"
|
||||
ports:
|
||||
- "3306:3306"
|
||||
volumes:
|
||||
- ./RxNav-In-a-box/rxnav-in-a-box-20230103/mysql:/docker-entrypoint-initdb.d:ro
|
||||
- ./RxNav-In-a-box/rxnav_data:/var/lib/mysql
|
||||
environment:
|
||||
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
|
||||
MYSQL_USER: ${MYSQL_USER}
|
||||
MYSQL_PASSWORD: ${MYSQL_PASSWORD}
|
||||
VERSION: podman
|
||||
env_file:
|
||||
.env
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
@ -0,0 +1,23 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
container_name: DrugCentral
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_PASSWORD: root
|
||||
ports:
|
||||
- "54320:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./db_store/:/var/lib/postgresql/
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
filename="drugcentral.dump.08222022.sql.gz"
|
||||
|
||||
cd ./docker-entrypoint-initdb.d/
|
||||
|
||||
curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename"
|
||||
|
||||
gzip -d $filename
|
||||
@ -0,0 +1,44 @@
|
||||
from flask import Flask
|
||||
import os
|
||||
from dotenv import dotenv_values
|
||||
|
||||
|
||||
|
||||
env_path = "../../containers/.env"
|
||||
ENV = dotenv_values(env_path)
|
||||
|
||||
def create_app(test_config=None):
|
||||
# create and configure the app
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
app.config.from_mapping(
|
||||
SECRET_KEY='6e674d6e41b733270fd01c6257b3a1b4769eb80f3f773cd0fe8eff25f350fc1f',
|
||||
POSTGRES_DB=ENV["POSTGRES_DB"],
|
||||
POSTGRES_USER=ENV["POSTGRES_USER"],
|
||||
POSTGRES_HOST=ENV["POSTGRES_HOST"],
|
||||
POSTGRES_PORT=ENV["POSTGRES_PORT"],
|
||||
POSTGRES_PASSWORD=ENV["POSTGRES_PASSWORD"],
|
||||
)
|
||||
|
||||
|
||||
|
||||
# ensure the instance folder exists
|
||||
try:
|
||||
os.makedirs(app.instance_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# a simple page that says hello
|
||||
@app.route('/')
|
||||
def hello():
|
||||
return 'Hello, World!'
|
||||
|
||||
|
||||
from . import db_interface
|
||||
db_interface.init_database(app)
|
||||
|
||||
from . import validation
|
||||
app.register_blueprint(validation.bp)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@ -0,0 +1,175 @@
|
||||
import psycopg2 as psyco
|
||||
from psycopg2 import extras
|
||||
from datetime import datetime
|
||||
|
||||
import click #used for cli commands. Not needed for what I am doing.
|
||||
from flask import current_app, g
|
||||
|
||||
def get_db(**kwargs):
|
||||
|
||||
if "db" not in g:
|
||||
g.db = psyco.connect(
|
||||
dbname=current_app.config["POSTGRES_DB"]
|
||||
,user=current_app.config["POSTGRES_USER"]
|
||||
,host=current_app.config["POSTGRES_HOST"]
|
||||
,port=current_app.config["POSTGRES_PORT"]
|
||||
,password=current_app.config["POSTGRES_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
return g.db
|
||||
|
||||
def close_db(e=None):
|
||||
db = g.pop('db', None)
|
||||
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
def check_initialization(app):
|
||||
db = get_db()
|
||||
with db.cursor() as curse:
|
||||
curse.execute("select count(*) from \"DiseaseBurden\".trial_to_icd10")
|
||||
curse.fetchall()
|
||||
#just checking if everything is going to fail
|
||||
|
||||
def init_database(app):
|
||||
#check_initialization(app)
|
||||
app.teardown_appcontext(close_db)
|
||||
|
||||
|
||||
|
||||
|
||||
def select_remaing_trials_to_analyze(db_conn):
|
||||
'''
|
||||
This will get the set of trials that need to be analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved is null
|
||||
order by nct_id
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def select_analyzed_trials(db_conn):
|
||||
'''
|
||||
This will get the set of trials that have been analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id, max(approval_timestamp)
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved in ('accepted','rejected')
|
||||
group by nct_id
|
||||
order by max(approval_timestamp) desc
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
def select_unmatched_trials(db_conn):
|
||||
'''
|
||||
This will get the set of trials that have been analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved = 'unmatched'
|
||||
order by nct_id
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def get_trial_conditions_and_proposed_matches(db_conn, nct_id):
|
||||
sql = '''
|
||||
select *
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where nct_id = %s
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql,[nct_id])
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def store_validation(db_conn, list_of_insert_data):
|
||||
sql = """
|
||||
update "DiseaseBurden".trial_to_icd10
|
||||
set approved=%s, approval_timestamp=%s
|
||||
where id=%s
|
||||
;
|
||||
"""
|
||||
with db_conn.cursor() as cursor:
|
||||
for l in list_of_insert_data:
|
||||
cursor.execute(sql, l)
|
||||
db_conn.commit()
|
||||
|
||||
def get_trial_summary(db_conn,nct_id):
|
||||
sql_summary ="""
|
||||
select
|
||||
s.nct_id,
|
||||
brief_title ,
|
||||
official_title ,
|
||||
bs.description as brief_description,
|
||||
dd.description as detailed_description
|
||||
from ctgov.studies s
|
||||
left join ctgov.brief_summaries bs
|
||||
on bs.nct_id = s.nct_id
|
||||
left join ctgov.detailed_descriptions dd
|
||||
on dd.nct_id = s.nct_id
|
||||
where s.nct_id = %s
|
||||
;
|
||||
"""
|
||||
sql_conditions="""
|
||||
--conditions mentioned
|
||||
select * from ctgov.conditions c
|
||||
where c.nct_id = %s
|
||||
;
|
||||
"""
|
||||
sql_keywords="""
|
||||
select nct_id ,downcase_name
|
||||
from ctgov.keywords k
|
||||
where k.nct_id = %s
|
||||
;
|
||||
"""
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql_summary,[nct_id])
|
||||
summary = curse.fetchall()
|
||||
|
||||
curse.execute(sql_keywords,[nct_id])
|
||||
keywords = curse.fetchall()
|
||||
|
||||
curse.execute(sql_conditions,[nct_id])
|
||||
conditions = curse.fetchall()
|
||||
|
||||
return {"summary":summary, "keywords":keywords, "conditions":conditions}
|
||||
|
||||
def get_list_icd10_codes(db_conn):
|
||||
sql = """
|
||||
select distinct code
|
||||
from "DiseaseBurden".icd10_to_cause itc
|
||||
order by code;
|
||||
"""
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql)
|
||||
codes = curse.fetchall()
|
||||
|
||||
return [ x[0] for x in codes ]
|
||||
|
||||
def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
|
||||
sql1 = """
|
||||
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||
(nct_id,"condition",ui,"source",approved,approval_timestamp)
|
||||
VALUES (%s,%s,%s,'hand matched','accepted',%s)
|
||||
;
|
||||
"""
|
||||
|
||||
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql1,[nct_id,condition,icd10_code,datetime.now()])
|
||||
db_conn.commit()
|
||||
@ -0,0 +1 @@
|
||||
#at some point I need to add a login or something.
|
||||
@ -0,0 +1,25 @@
|
||||
<!doctype html>
|
||||
<title>{% block title %}{% endblock %} - ClinicalTrialsProject</title>
|
||||
<!--<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">-->
|
||||
|
||||
<nav>
|
||||
<h1>Nav</h1>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="{{ url_for('validation.remaining') }}">Validation Home</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://icd.who.int/browse10/2019/en">WHO ICD-10 Codes (2019)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://uts.nlm.nih.gov/uts/umls/home">UMLS Metathesaurs browser (requires login)</a>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
<section class="content">
|
||||
<header>
|
||||
{% block header %}{% endblock %}
|
||||
</header>
|
||||
{% block content %}{% endblock %}
|
||||
</section>
|
||||
@ -0,0 +1,49 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %} ICD-10 to Trial Conditions Validation {% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<h2>Trials to Validate</h2>
|
||||
|
||||
<table>
|
||||
<th>Trials</th>
|
||||
{% for trial in list_to_validate %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
<h2>Trials that have been Validated</h2>
|
||||
|
||||
<table>
|
||||
<th>Trials Links</th>
|
||||
{% for trial in validated_list %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
(Most recently updated {{trial[1]}})
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
<h2>Trials that don't have a good match</h2>
|
||||
|
||||
<table>
|
||||
<th>Trial Links</th>
|
||||
{% for trial in unmatched_list %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,95 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1> ICD-10 to Trial Conditions Validation: {{ nct_id }} </h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<section class="summary">
|
||||
<h3>Trial Summary</h3>
|
||||
|
||||
<div class="text_summary">
|
||||
<ul>
|
||||
<li>NCT: {{ summary_dats["summary"][0][0] }}</li>
|
||||
<li>Brief Title: {{ summary_dats["summary"][0][1] }}</li>
|
||||
<li>Long Title: {{ summary_dats["summary"][0][2] }}</li>
|
||||
<li>Brief Description: {{ summary_dats["summary"][0][3] }}</li>
|
||||
<li>Long Description: {{ summary_dats["summary"][0][4] }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="keywords">
|
||||
<h4>Keywords</h4>
|
||||
<ul>
|
||||
{% for keyword in summary_dats["keywords"] %}
|
||||
<li>
|
||||
{{ keyword[1] }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
<div class="conditions">
|
||||
<h4>Raw Conditions </h4>
|
||||
<ul>
|
||||
{% for condition in summary_dats["conditions"] %}
|
||||
<li>
|
||||
{{ condition[3] }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="proposed_conditions">
|
||||
<h3>Proposed Conditions</h3>
|
||||
<form method="post">
|
||||
<table>
|
||||
<tr>
|
||||
<th>Approve</th>
|
||||
<th>Condition (MeSH normalized)</th>
|
||||
<th>Identifier</th>
|
||||
<th>Source</th>
|
||||
<th>Description</th>
|
||||
<th>Source</th>
|
||||
</tr>
|
||||
{% for condition in condition_list %}
|
||||
|
||||
<tr>
|
||||
<td> <input type="checkbox" id="{{ condition[0] }}" name="{{condition[0]}}" value="accepted" {% if condition[8] == "accepted" %}checked{% endif %}> </td>
|
||||
<td> {{condition[2]}} </td>
|
||||
<td> {{condition[3]}} </td>
|
||||
<td> {{condition[5]}} </td>
|
||||
<td> {{condition[6]}} </td>
|
||||
<td> {{condition[7]}} </td>
|
||||
</tr>
|
||||
|
||||
{% endfor %}
|
||||
</table>
|
||||
<input type="submit" name="submission" value="Submit approvals">
|
||||
<br/>
|
||||
<input type="submit" name="marked_unmatched" value="Mark unmmatched">
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="submit_alternate">
|
||||
<h3>Submit Alternate Conditions</h3>
|
||||
<!--For each listed condition, provide a spot to enter a ICT10 code-->
|
||||
<form method="post">
|
||||
<label for="alternate_sub">Please enter the proposed code that appears to be the best match:</label>
|
||||
<input name="alt_sub" id="alternate_sub">
|
||||
<br/>
|
||||
<label for="condition">
|
||||
Please give a name to the condition you used to match this<br/>
|
||||
Condition:
|
||||
</label>
|
||||
<input name="condition", id="condition">
|
||||
<br/>
|
||||
<input type="submit" name="alternate_submission" value="Submit alternate ICD-10 code">
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="approved">
|
||||
<!--TODO:This will list the already approved values-->
|
||||
</section>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,98 @@
|
||||
import functools
|
||||
from flask import (Blueprint, flash, g, redirect, render_template, request, session, url_for)
|
||||
from Icd10ConditionsMatching.db_interface import (
|
||||
get_db,select_remaing_trials_to_analyze,
|
||||
select_analyzed_trials,
|
||||
select_unmatched_trials,
|
||||
get_trial_conditions_and_proposed_matches,
|
||||
store_validation,
|
||||
get_trial_summary,
|
||||
get_list_icd10_codes,
|
||||
record_suggested_matches,
|
||||
)
|
||||
from datetime import datetime
|
||||
|
||||
#### First Blueprint: Checking Data
|
||||
bp = Blueprint("validation", __name__, url_prefix="/validation")
|
||||
|
||||
|
||||
|
||||
@bp.route("/",methods=["GET"])
|
||||
def remaining():
|
||||
db_conn = get_db()
|
||||
|
||||
|
||||
to_validate = select_remaing_trials_to_analyze(db_conn)
|
||||
validated = select_analyzed_trials(db_conn)
|
||||
unmatched_list = select_unmatched_trials(db_conn)
|
||||
|
||||
|
||||
return render_template(
|
||||
"validation_index.html",
|
||||
list_to_validate=to_validate,
|
||||
validated_list = validated,
|
||||
unmatched_list = unmatched_list
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/<nct_id>", methods=["GET","POST"])
|
||||
def validate_trial(nct_id):
|
||||
|
||||
if request.method == "GET":
|
||||
db_conn = get_db()
|
||||
|
||||
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||
summary_dats = get_trial_summary(db_conn, nct_id)
|
||||
|
||||
return render_template(
|
||||
"validation_of_trial.html",
|
||||
nct_id=nct_id,
|
||||
condition_list=condition_list,
|
||||
summary_dats=summary_dats,
|
||||
)
|
||||
elif request.method == "POST":
|
||||
db_conn = get_db()
|
||||
|
||||
list_of_insert_data = []
|
||||
|
||||
db_conn = get_db()
|
||||
|
||||
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||
|
||||
print(request.form)
|
||||
|
||||
if "submission" in request.form:
|
||||
#if it is a submission:
|
||||
#grab all match ids from db
|
||||
#if match id in submitted form, mark as approved, otherwise mark as rejected
|
||||
for condition in condition_list:
|
||||
id = condition[0]
|
||||
list_of_insert_data.append((request.form.get(str(id),"rejected"), datetime.now(),id))
|
||||
|
||||
store_validation(db_conn, list_of_insert_data)
|
||||
return redirect(url_for("validation.remaining"))
|
||||
elif "marked_unmatched" in request.form:
|
||||
#if this was marked as "unmatched", store that for each entry.
|
||||
for condition in condition_list:
|
||||
id = condition[0]
|
||||
list_of_insert_data.append(( "unmatched", datetime.now(), id))
|
||||
|
||||
store_validation(db_conn, list_of_insert_data)
|
||||
return redirect(url_for("validation.remaining"))
|
||||
elif "alternate_submission" in request.form:
|
||||
code = request.form["alt_sub"]
|
||||
code = code.strip().replace(".",'').ljust(7,"-")
|
||||
|
||||
condition = request.form["condition"].strip()
|
||||
|
||||
codelist = get_list_icd10_codes(db_conn)
|
||||
if code in codelist:
|
||||
record_suggested_matches(db_conn, nct_id, condition, code)
|
||||
return redirect(request.path)
|
||||
else:
|
||||
record_suggested_matches(db_conn, nct_id, condition + "| Code not in GBD list", code)
|
||||
return """
|
||||
Entered `{}`, which is not in the list of available ICD-10 codes. <a href={}>Return to trial summary</a>
|
||||
""".format(code.strip("-"),request.path), 422
|
||||
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='Icd10ConditionsMatching',
|
||||
packages=['Icd10ConditionsMatching'],
|
||||
include_package_data=True,
|
||||
install_requires=[
|
||||
'flask',
|
||||
'psycopg2',
|
||||
'datetime',
|
||||
'python-dotenv',
|
||||
],
|
||||
)
|
||||
@ -0,0 +1 @@
|
||||
waitress-serve --port=5000 --call 'Icd10ConditionsMatching:create_app'
|
||||
@ -0,0 +1,11 @@
|
||||
from drugtools.env_setup import postgres_conn, mariadb_conn, ENV
|
||||
|
||||
print(ENV)
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor() as curse:
|
||||
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
||||
print(curse.fetchall())
|
||||
|
||||
with mariadb_conn() as mconn, mconn.cursor() as mcurse:
|
||||
mcurse.execute("select * FROM ALLNDC_HISTORY LIMIT 10;")
|
||||
print(mcurse.fetchall())
|
||||
@ -0,0 +1,96 @@
|
||||
import json
|
||||
from psycopg2.extras import execute_values
|
||||
import datetime as dt
|
||||
from drugtools.env_setup import postgres_conn, ENV
|
||||
import requests
|
||||
import zipfile
|
||||
import io
|
||||
|
||||
URL_STEM = 'https://download.open.fda.gov/other/nsde/'
|
||||
NUMBER_OF_NSDE_FILES = int(ENV["NUMBER_OF_NSDE_FILES"])
|
||||
|
||||
def filename_generator(max_num):
|
||||
for itt in range(1,max_num+1):
|
||||
filename = "other-nsde-{:0>4}-of-{:0>4}.json.zip".format(itt,max_num)
|
||||
yield filename
|
||||
|
||||
def get_date(result,key):
|
||||
r = result.get(key)
|
||||
if r:
|
||||
return dt.datetime.strptime(r, "%Y%m%d")
|
||||
else:
|
||||
return None
|
||||
|
||||
def build_values(result):
|
||||
#adjust types
|
||||
proprietary_name = result.get("proprietary_name")
|
||||
application_number_or_citation = result.get("application_number_or_citation")
|
||||
product_type = result.get("product_type")
|
||||
package_ndc = result.get("package_ndc")
|
||||
marketing_category = result.get("marketing_category")
|
||||
package_ndc11 = result.get("package_ndc11")
|
||||
dosage_form = result.get("dosage_form")
|
||||
billing_unit = result.get("billing_unit")
|
||||
marketing_start_date = get_date(result,"marketing_start_date")
|
||||
marketing_end_date = get_date(result, "marketing_end_date")
|
||||
inactivation_date = get_date(result, "inactivation_date")
|
||||
reactivation_date = get_date(result,"reactivation_date")
|
||||
|
||||
return (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_start_date
|
||||
,marketing_end_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
|
||||
def download_and_extract_zip(base_url,filename):
|
||||
response = requests.get(base_url + filename)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as the_zip:
|
||||
contents_list = the_zip.infolist()
|
||||
for content_name in contents_list:
|
||||
return the_zip.read(content_name)
|
||||
|
||||
def run():
|
||||
for filename in filename_generator(NUMBER_OF_NSDE_FILES):
|
||||
#It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
|
||||
with (postgres_conn() as con , con.cursor() as curse):
|
||||
print(filename)
|
||||
|
||||
j = download_and_extract_zip(URL_STEM, filename)
|
||||
results = json.loads(j)["results"]
|
||||
query = """
|
||||
INSERT INTO spl.nsde (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_start_date
|
||||
,marketing_end_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
VALUES %s;
|
||||
"""
|
||||
|
||||
values = [build_values(y) for y in results]
|
||||
execute_values(curse,query,values)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,43 @@
|
||||
import pymysql
|
||||
import psycopg2 as psyco
|
||||
from psycopg2.sql import SQL
|
||||
from dotenv import dotenv_values
|
||||
|
||||
env_path = "../containers/.env"
|
||||
ENV = dotenv_values(env_path)
|
||||
|
||||
def mariadb_conn(**kwargs):
|
||||
return pymysql.connect(
|
||||
database=ENV["MYSQL_DB"]
|
||||
,user=ENV["MYSQL_USER"]
|
||||
,host=ENV["MYSQL_HOST"]
|
||||
,port=int(ENV["MYSQL_PORT"])
|
||||
,password=ENV["MYSQL_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
def postgres_conn(**kwargs):
|
||||
return psyco.connect(
|
||||
dbname=ENV["POSTGRES_DB"]
|
||||
,user=ENV["POSTGRES_USER"]
|
||||
,host=ENV["POSTGRES_HOST"]
|
||||
,port=ENV["POSTGRES_PORT"]
|
||||
,password=ENV["POSTGRES_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
|
||||
def get_tables_of_interest():
|
||||
return ENV["TABLES_OF_INTEREST"].split(",")
|
||||
|
||||
def postgres_table_delete_entries(schema,table):
|
||||
with postgres_conn() as con:
|
||||
with con.cursor() as curse:
|
||||
delete_statement = SQL("delete from {schema}.{table}").format(
|
||||
schema=Identifier(schema),
|
||||
talbe=Identifier(table)
|
||||
)
|
||||
curse.execute(delete_statement)
|
||||
con.commit()
|
||||
|
||||
|
||||
@ -0,0 +1,465 @@
|
||||
from collections import namedtuple
|
||||
from copy import copy
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from tqdm import tqdm
|
||||
#requires Python 3.10
|
||||
|
||||
#### GLOBALS
|
||||
VERBOSE = True if ENV["VERBOSE"] == "True" else False
|
||||
|
||||
###CLASSES AND CONSTRUCTORS
|
||||
|
||||
TagDatePair = namedtuple("TagDatePair", ["tag","date"])
|
||||
TagTextPair = namedtuple("TagTextPair", ["tag","text"])
|
||||
|
||||
#superclasses
|
||||
class VersionData():
|
||||
"""
|
||||
This class holds two types of data:
|
||||
- Data with a 1-to-1 relationship with the trial/version pair.
|
||||
- Data with a child relationship with the trial/version pair.
|
||||
|
||||
This initializes with None attributes, and implements setter
|
||||
methods to load them (just to double check types)
|
||||
That way I can just pass around the VersionData instance
|
||||
and add data as I go.
|
||||
|
||||
It will also implement the ability to load the data to the database
|
||||
"""
|
||||
def __init__(self,nct_id,version_id,submission_date):
|
||||
#identifiers
|
||||
self.nct_id = nct_id.strip()
|
||||
self.version_id = version_id
|
||||
self.submission_date = submission_date
|
||||
|
||||
#Study Status
|
||||
self._primary_completion_date = None
|
||||
self._primary_completion_date_category = None
|
||||
self._start_date = None
|
||||
self._start_date_category = None
|
||||
self._completion_date = None
|
||||
self._completion_date_category = None
|
||||
self._overall_status = None
|
||||
self._enrollment = None
|
||||
self._enrollment_category = None
|
||||
self._sponsor = None
|
||||
#self._sponsor_category = None #I don't believe this is included in the raw data
|
||||
self._responsible_party = None
|
||||
#self._responsible_party_category = None #I don't believe this is included in the raw data
|
||||
#self._collaborators = None #currently going to ignore as I've not fount it in AACT
|
||||
|
||||
def load_to_db(self,db_connection):
|
||||
#load to initial table, then load any extra details into other tables
|
||||
sql = """
|
||||
INSERT INTO history.trial_snapshots
|
||||
(
|
||||
nct_id,
|
||||
version,
|
||||
submission_date,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
start_date,
|
||||
start_date_category,
|
||||
completion_date,
|
||||
completion_date_category,
|
||||
overall_status,
|
||||
enrollment,
|
||||
enrollment_category,
|
||||
sponsor,
|
||||
responsible_party
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s
|
||||
)
|
||||
"""
|
||||
|
||||
with db_connection.cursor() as db_cursor:
|
||||
try:
|
||||
db_cursor.execute(
|
||||
sql,
|
||||
(
|
||||
self.nct_id,
|
||||
self.version_id,
|
||||
self.submission_date,
|
||||
self._primary_completion_date,
|
||||
self._primary_completion_date_category,
|
||||
self._start_date,
|
||||
self._start_date_category,
|
||||
self._completion_date,
|
||||
self._completion_date_category,
|
||||
self._overall_status,
|
||||
self._enrollment,
|
||||
self._enrollment_category,
|
||||
self._sponsor,
|
||||
self._responsible_party
|
||||
)
|
||||
)
|
||||
except Exception as err:
|
||||
#catch any error, print the applicable information, and raise the error.
|
||||
print(self)
|
||||
raise err
|
||||
|
||||
db_connection.commit()
|
||||
|
||||
############ Functions
|
||||
def extract_submission_dates(soup):
|
||||
"""
|
||||
Extract dates for each version
|
||||
"""
|
||||
table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")
|
||||
|
||||
version_date_dict = {}
|
||||
|
||||
for row in table_rows:
|
||||
# if it is <td headers="VersionNumber">xx</td> then it contains what we need.
|
||||
version_number = None
|
||||
version_date = None
|
||||
for td in row.findChildren("td"):
|
||||
if ("headers" in td.attrs):
|
||||
if (td.attrs["headers"][0]=="VersionNumber"):
|
||||
version_number = int(td.text)
|
||||
elif (td.attrs["headers"][0]=="VersionDate"):
|
||||
version_date = datetime.strptime(td.text.strip() , "%B %d, %Y")
|
||||
|
||||
version_date_dict[version_number] = version_date
|
||||
return version_date_dict
|
||||
|
||||
def optional_strip(possible_string):
|
||||
if type(possible_string) == str:
|
||||
return possible_string.strip()
|
||||
else:
|
||||
return possible_string
|
||||
|
||||
def extract_study_statuses(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
|
||||
match tr_to_td(trow):
|
||||
case ["Primary Completion:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._primary_completion_date = tagdate1.date
|
||||
version_a._primary_completion_date_category = optional_strip(tagdate1.tag)
|
||||
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._primary_completion_date = tagdate2.date
|
||||
version_b._primary_completion_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Study Start:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._start_date = tagdate1.date
|
||||
version_a._start_date_category = optional_strip(tagdate1.tag)
|
||||
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._start_date = tagdate2.date
|
||||
version_b._start_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Study Completion:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._completion_date = tagdate1.date
|
||||
version_a._completion_date_category = optional_strip(tagdate1.tag)
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._completion_date = tagdate2.date
|
||||
version_b._completion_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Overall Status:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
#split out any notes such as "Suspended [reason for suspenstion ]"
|
||||
version_a._overall_status = optional_strip(old.text.split("[")[0])
|
||||
#split out any notes such as "Suspended [reason for suspenstion ]"
|
||||
version_b._overall_status = optional_strip(new.text.split("[")[0])
|
||||
#FIX: There is an issue with NCT00789633 where the overall status includes information as to why it was suspended.
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
|
||||
def extract_study_design(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
match tr_to_td(trow):
|
||||
case ["Enrollment:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
#Extract tag and text, add them to preallocated object
|
||||
tagtext1 = extract_text_and_tag(old.text)
|
||||
version_a._enrollment = tagtext1.text
|
||||
version_a._enrollment_category = optional_strip(tagtext1.tag)
|
||||
|
||||
tagtext2 = extract_text_and_tag(new.text)
|
||||
version_b._enrollment = tagtext2.text
|
||||
version_b._enrollment_category = optional_strip(tagtext2.tag)
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
def extract_sponsor_data(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
match tr_to_td(trow):
|
||||
case ["Sponsor:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
version_a._sponsor = optional_strip(old.text)
|
||||
version_b._sponsor = optional_strip(new.text)
|
||||
|
||||
case ["Responsible Party:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
version_a._responsible_party = optional_strip(old.text)
|
||||
version_b._responsible_party = optional_strip(new.text)
|
||||
|
||||
case ["Collaborators:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
#TODO: find a trial with multiple collaborators and figure out how to identify/count them:w
|
||||
# So far can't figure out where this is in AACT, so I'm going to ignore it.
|
||||
pass
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
|
||||
def split_by_version(tag):
|
||||
'''
|
||||
OUTDATED: With the new format that separates old and new versions, I don't technically need this. It is a nice place to identify exact changes if those are every needed though and it removes the highlights cleanly.
|
||||
'''
|
||||
#clone elements and remove sub-tags that are not needed.
|
||||
old = copy(tag)
|
||||
for span in old.find_all(class_="add_hilite"):
|
||||
span.extract()
|
||||
|
||||
new = copy(tag)
|
||||
for span in new.find_all(class_="drop_hilite"):
|
||||
span.extract()
|
||||
return old,new
|
||||
|
||||
|
||||
def extract_date_and_tag(text):
|
||||
"""
|
||||
Extracts a datetype according to the date format
|
||||
and the estimate tag based on
|
||||
|
||||
"""
|
||||
|
||||
text = text.strip()
|
||||
|
||||
#handle various empty cases
|
||||
if not text or text == '':
|
||||
return TagDatePair(None, None)
|
||||
|
||||
date_split = text.split("[")
|
||||
if len(date_split) > 1:
|
||||
estimate_tag = date_split[1].split("]")[0].strip()
|
||||
else:
|
||||
estimate_tag = None
|
||||
|
||||
try:
|
||||
date_object = datetime.strptime(date_split[0].strip(), date_MMMM_YYYY)
|
||||
except ValueError as ve:
|
||||
date_object = datetime.strptime(date_split[0].strip(), date_MMMM_DD_YYYY)
|
||||
|
||||
return TagDatePair(estimate_tag, date_object)
|
||||
|
||||
|
||||
def extract_text_and_tag(text):
|
||||
"""
|
||||
Extracts a datetype according to the date format
|
||||
and the estimate tag based on
|
||||
|
||||
"""
|
||||
text = text.strip()
|
||||
|
||||
#handle various empty cases
|
||||
if not text or text == '':
|
||||
return TagTextPair(None, None)
|
||||
|
||||
date_split = text.split("[")
|
||||
if len(date_split) > 1:
|
||||
estimate_tag = date_split[1].split("]")[0].strip()
|
||||
else:
|
||||
estimate_tag = None
|
||||
text_object = date_split[0].strip()
|
||||
|
||||
return TagTextPair(estimate_tag, text_object)
|
||||
|
||||
### FUNCTIONS
|
||||
|
||||
def tr_to_td(tr) -> tuple[str, str, str]:
|
||||
"""
|
||||
Takes an html data row of interest, extracts the record_name from the first <td>, and the data from the second <td>.
|
||||
|
||||
For the data, it just extracts the text.
|
||||
The text itself then needs processed separately, based on what it should contain.
|
||||
"""
|
||||
#get list of cells
|
||||
td_list = tr.find_all("td")
|
||||
if len(td_list) == 3:
|
||||
return td_list[0].text, td_list[1], td_list[2]
|
||||
else:
|
||||
return None, None, None
|
||||
|
||||
def get_forms(soup,version_a,version_b):
|
||||
|
||||
#extract all forms
|
||||
for form in soup.body.find_all("form"):
|
||||
#Match forms against ID types
|
||||
if not "id" in form.attrs:
|
||||
continue
|
||||
|
||||
#for each type of form (identified by the ID field)
|
||||
# extract and add the data to the preallocated objects
|
||||
match form.attrs["id"]:
|
||||
case "form_StudyStatus":
|
||||
extract_study_statuses(form,version_a,version_b)
|
||||
case "form_SponsorCollaborators":
|
||||
extract_sponsor_data(form, version_a, version_b)
|
||||
case "form_Oversight":
|
||||
pass
|
||||
case "form_StudyDescription":
|
||||
pass
|
||||
case "form_Conditions":
|
||||
pass
|
||||
case "form_StudyDesign":
|
||||
extract_study_design(form,version_a,version_b)
|
||||
case "form_ArmsandInterventions":
|
||||
pass
|
||||
case "form_ProtocolOutcomeMeasures":
|
||||
pass
|
||||
case "form_Eligibility":
|
||||
pass
|
||||
case "form_ContactsLocations":
|
||||
pass
|
||||
case "form_IPDSharing":
|
||||
pass
|
||||
case "form_References":
|
||||
pass
|
||||
case "form_ParticipantFlow":
|
||||
pass
|
||||
case "form_BaselineCharacteristics":
|
||||
pass
|
||||
case "form_ROutcomeMeasures":
|
||||
pass
|
||||
case "form_AdverseEvents":
|
||||
pass
|
||||
case "form_LimitationsandCaveats":
|
||||
pass
|
||||
case "form_MoreInformation":
|
||||
pass
|
||||
case _ as form_name:
|
||||
print("form not matched: {}".format(form_name)) if VERBOSE else ""
|
||||
|
||||
|
||||
### CONSTANTS
|
||||
date_MMMM_YYYY = "%B %Y"
|
||||
date_MMMM_DD_YYYY = "%B %d, %Y"
|
||||
|
||||
def get_data_from_versions(nct_id,html, version_a_int, version_b_int):
|
||||
soup = BeautifulSoup(html,"lxml")
|
||||
|
||||
version_date_dict = extract_submission_dates(soup)
|
||||
|
||||
#preallocate version data
|
||||
version_a = VersionData(nct_id, version_a_int, version_date_dict.get(version_a_int))
|
||||
version_b = VersionData(nct_id, version_b_int, version_date_dict.get(version_b_int))
|
||||
|
||||
#extract data from html and put it in the preallocated objects
|
||||
get_forms(soup, version_a, version_b)
|
||||
|
||||
return version_a,version_b
|
||||
|
||||
|
||||
|
||||
def run():
|
||||
with postgres_conn() as db_connection:
|
||||
#pull the requests from the db
|
||||
with db_connection.cursor() as curse:
|
||||
sql = """
|
||||
SELECT nct_id, version_a,version_b, html
|
||||
FROM http.responses
|
||||
WHERE response_code = 200
|
||||
"""
|
||||
curse.execute(sql)
|
||||
for response in tqdm(curse.fetchall()):
|
||||
nct_id, version_a, version_b, html = response
|
||||
|
||||
print(nct_id, version_a, version_b) if VERBOSE else ""
|
||||
|
||||
version1, version2 = get_data_from_versions(nct_id, html, version_a, version_b)
|
||||
|
||||
if version_b == version_a + 1:
|
||||
version1.load_to_db(db_connection)
|
||||
version2.load_to_db(db_connection)
|
||||
else:
|
||||
version2.load_to_db(db_connection)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
||||
"""
|
||||
Documentation:
|
||||
|
||||
TO add a new field to extraction-lib
|
||||
|
||||
1. Locate the field in the HTML
|
||||
- form id (e.g. <form id="form_StudyStatus> gives the form id "form_StudyStatus)
|
||||
- Table row's data label. This corresponds to the text of first column in the row and will
|
||||
look something like
|
||||
<td class="rowLabel" style="min-width: 210px;">Record Verification:</td>.
|
||||
"Record Verification:" is the data label in the example above.
|
||||
2. Identify what data you will be extracting
|
||||
- type (date, text, int, etc)
|
||||
- if it contains a category ([Actual] vs [Anticipated] etc)
|
||||
3. Add data to:
|
||||
- sql table: history.trial_snapshots
|
||||
- the VersionData class
|
||||
- the VersionData.load_to_db() function
|
||||
4. Ensure the field matcher in `get_forms(*)` is matching on the form ID and has a function processing the form
|
||||
5. Ensure the function processing the form has a match entry to proceess the row
|
||||
- This should match on data label and then process the data by
|
||||
- splitting into old and new versions
|
||||
- Extracting the data for both old and new
|
||||
- add the data to the passed VersionData objects
|
||||
"""
|
||||
@ -0,0 +1,15 @@
|
||||
from .env_setup import postgres_conn
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run():
|
||||
#get relative path
|
||||
p = Path(__file__).with_name("selected_trials.sql")
|
||||
with open(p,'r') as fh:
|
||||
sqlfile = fh.read()
|
||||
with postgres_conn() as connection:
|
||||
with connection.cursor() as curse:
|
||||
curse.execute(sqlfile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,118 @@
|
||||
import psycopg2 as psyco
|
||||
from psycopg2 import sql
|
||||
from psycopg2 import extras
|
||||
import pymysql
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from .env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
|
||||
|
||||
|
||||
##############NOTE
|
||||
'''
|
||||
|
||||
|
||||
mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
|
||||
|
||||
I will have the ability to reduce memory usage and simplify what I am doing.
|
||||
|
||||
|
||||
'''
|
||||
|
||||
############### GLOBALS
|
||||
#these are hardcoded so they shouldn't require any updates
|
||||
mschema="rxnorm_current"
|
||||
pschema="rxnorm_migrated"
|
||||
|
||||
########FUNCTIONS#################
|
||||
|
||||
|
||||
def convert_column(d):
|
||||
"""
|
||||
Given the metadata about a column in mysql, make the portion of the `create table`
|
||||
statement that corresponds to that column in postgres
|
||||
"""
|
||||
#extract
|
||||
data_type = d["DATA_TYPE"]
|
||||
position = d["ORDINAL_POSITION"]
|
||||
table_name = d["TABLE_NAME"]
|
||||
d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
|
||||
|
||||
#convert
|
||||
if data_type=="varchar":
|
||||
string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="char":
|
||||
string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="tinyint":
|
||||
string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="decimal":
|
||||
string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="int":
|
||||
string = "{COLUMN_NAME} integer {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="enum":
|
||||
string = None
|
||||
elif data_type=="text":
|
||||
string = None
|
||||
|
||||
return string
|
||||
|
||||
def run():
|
||||
#get & convert datatypes for each table of interest
|
||||
tables_of_interest = get_tables_of_interest()
|
||||
|
||||
|
||||
with mariadb_conn(cursorclass=pymysql.cursors.DictCursor) as mcon, postgres_conn() as pcon:
|
||||
with mcon.cursor() as mcurse, pcon.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
for table in tables_of_interest: #create equivalent table in postgres
|
||||
|
||||
#get columns from mysql
|
||||
q = "SELECT * FROM INFORMATION_SCHEMA.columns WHERE TABLE_SCHEMA=%s and TABLE_NAME=%s;"
|
||||
mcurse.execute(q,[mschema,table])
|
||||
|
||||
#convert mysql column names and types to postgres column statements.
|
||||
columns = [convert_column(a) for a in mcurse.fetchall() ]
|
||||
#TODO make sure this uses psycopg colums correctly.
|
||||
column_sql = sql.SQL(",\n".join(columns))
|
||||
|
||||
#build a header and footer
|
||||
header=sql.SQL("CREATE TABLE IF NOT EXISTS {}\n(").format(sql.Identifier(pschema,table))
|
||||
footer=sql.SQL(");")
|
||||
|
||||
#Joint the header, columns, and footer.
|
||||
create_table_statement = sql.SQL("\n").join([header,column_sql,footer])
|
||||
print(create_table_statement.as_string(pcon))
|
||||
|
||||
#Create the table in postgres
|
||||
pcurse.execute(create_table_statement)
|
||||
pcon.commit()
|
||||
|
||||
#Get the data from mysql
|
||||
mcurse.execute("SELECT * FROM {schema}.{table}".format(schema=mschema,table=table))
|
||||
#FIX setting up sql this^^^ way is improper.
|
||||
results = mcurse.fetchall()
|
||||
|
||||
#build the insert statement template
|
||||
#get list of field names
|
||||
column_list = [sql.SQL(x) for x in results[0]]
|
||||
column_inserts = [sql.SQL("%({})s".format(x)) for x in results[0]] #fix with sql.Placeholder
|
||||
#generate insert statement
|
||||
psql_insert = sql.SQL("INSERT INTO {table} ({columns}) VALUES %s ").format(
|
||||
table=sql.Identifier(pschema,table)
|
||||
,columns=sql.SQL(",").join(column_list)
|
||||
)
|
||||
#Note that this^^^^ does not contain parenthases around the placeholder
|
||||
|
||||
#Building the values template.
|
||||
#Note that it must include the parenthases so that the
|
||||
#VALUES portion is formatted correctly.
|
||||
template = sql.SQL(",").join(column_inserts)
|
||||
template = sql.Composed([
|
||||
sql.SQL("(")
|
||||
,template
|
||||
,sql.SQL(")")
|
||||
])
|
||||
|
||||
#insert the data with page_size
|
||||
extras.execute_values(pcurse,psql_insert,argslist=results,template=template, page_size=1000)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,21 @@
|
||||
DELETE FROM http.download_status;
|
||||
|
||||
INSERT INTO http.download_status (nct_id, status)
|
||||
SELECT nct_id, 'Of Interest'::http.history_download_status AS status
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug=TRUE
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
phase='Phase 3'
|
||||
AND
|
||||
overall_status in ('Terminated', 'Completed')
|
||||
AND
|
||||
start_date > '2010-01-01'
|
||||
AND
|
||||
completion_date < '2022-01-01'
|
||||
;
|
||||
|
||||
|
||||
SELECT count(*) FROM http.download_status ;
|
||||
@ -0,0 +1,36 @@
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from psycopg2 import extras
|
||||
from collections import namedtuple
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
FILES=[
|
||||
"../non-db_data_sources/GBD and ICD-10_(2019 version)/NONFATAL_cause2code.psv",
|
||||
"../non-db_data_sources/GBD and ICD-10_(2019 version)/COD_cause2code.psv"
|
||||
]
|
||||
SEP="|"
|
||||
|
||||
sql = """
|
||||
INSERT INTO "DiseaseBurden".icd10_to_cause
|
||||
(code,cause_text)
|
||||
VALUES %s
|
||||
"""
|
||||
|
||||
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
entries = []
|
||||
for fpath in FILES:
|
||||
print(fpath)
|
||||
|
||||
with open(fpath,"r") as fh:
|
||||
for line in tqdm(fh.readlines(),desc=fpath):
|
||||
code,cause = line.split(SEP)
|
||||
code = code.strip()
|
||||
cause = cause.strip()
|
||||
|
||||
entries.append((code,cause))
|
||||
|
||||
extras.execute_values(pcurse, sql , entries)
|
||||
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
|
||||
|
||||
rm -r ../containers/AACT_downloader/postgresql/data
|
||||
@ -0,0 +1,24 @@
|
||||
from drugtools import env_setup
|
||||
from drugtools import historical_trial_selector as hts
|
||||
from drugtools import historical_nct_downloader as hnd
|
||||
from drugtools import historical_nct_extractor as hne
|
||||
from drugtools import download_and_extract_nsde as daen
|
||||
from drugtools import migrate_mysql2pgsql as mm2p
|
||||
|
||||
print("Current Environment")
|
||||
print(env_setup.ENV)
|
||||
|
||||
cont = input("Are you willing to continue with the current environmnet? y/[n]")
|
||||
|
||||
if cont == "Y" or cont == "y":
|
||||
print("SelectingTrials")
|
||||
#hts.run()
|
||||
print("downloading trials")
|
||||
#hnd.run()
|
||||
print("extracting trials")
|
||||
hne.run()
|
||||
exit(0)
|
||||
daen.run()
|
||||
mm2p.run()
|
||||
else:
|
||||
print("Please fix your .env file and try again")
|
||||
@ -0,0 +1,87 @@
|
||||
import requests
|
||||
import json
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from psycopg2 import extras
|
||||
from collections import namedtuple
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
|
||||
|
||||
class Requestor():
|
||||
def __init__(self,api_key):
|
||||
self.key = api_key
|
||||
|
||||
def search(self,search_term,inputType="sourceUi", returnIdType="code", addnl_terms={}):
|
||||
query_terms = {
|
||||
"apiKey":self.key,
|
||||
"sabs":"ICD10",
|
||||
"string":search_term,
|
||||
"returnIdType":returnIdType,
|
||||
"inputType":inputType
|
||||
} | addnl_terms
|
||||
query = "https://uts-ws.nlm.nih.gov/rest/search/current/"
|
||||
|
||||
r = requests.get(query,params=query_terms)
|
||||
return r
|
||||
|
||||
|
||||
r = Requestor(ENV.get("UMLS_API_KEY"))
|
||||
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
sql = """
|
||||
select nct_id, downcase_mesh_term
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
order by nct_id
|
||||
;
|
||||
"""
|
||||
sql2 = """
|
||||
with cte as (
|
||||
/* Keywords added too much noise
|
||||
select nct_id,downcase_name
|
||||
from ctgov.keywords k
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
union */
|
||||
select nct_id, downcase_name
|
||||
from ctgov.conditions c
|
||||
union
|
||||
select nct_id ,downcase_mesh_term as downcase_name
|
||||
from ctgov.browse_conditions bc
|
||||
where mesh_type = 'mesh-list'
|
||||
)
|
||||
select nct_id, downcase_name from cte
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
order by nct_id
|
||||
"""
|
||||
pcurse.execute(sql2)
|
||||
rows = pcurse.fetchall()
|
||||
|
||||
entries = []
|
||||
|
||||
for row in tqdm(rows,desc="Search MeSH terms"):
|
||||
nctid = row[0]
|
||||
condition = row[1]
|
||||
# print(nctid,condition)
|
||||
|
||||
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
|
||||
|
||||
#if results are empty?
|
||||
if not results:
|
||||
entries.append(RecordStuff(nctid,condition,None,None,None,None))
|
||||
else:
|
||||
for entry in results:
|
||||
entries.append(RecordStuff(nctid, condition, entry["ui"], entry["uri"], entry["rootSource"], entry["name"]))
|
||||
|
||||
sql_insert = """
|
||||
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||
(nct_id, "condition", ui,uri,rootsource,"name","source",approved,approval_timestamp)
|
||||
VALUES
|
||||
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search', null,null)
|
||||
"""
|
||||
for entry in tqdm(entries,desc="Inserting entries to DB"):
|
||||
pcurse.execute(sql_insert,entry._asdict())
|
||||
@ -0,0 +1,6 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'
|
||||
;
|
||||
@ -0,0 +1,24 @@
|
||||
SELECT
|
||||
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||
FROM (
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
column_name || ' ' || data_type ||
|
||||
CASE
|
||||
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||
ELSE ''
|
||||
END ||
|
||||
CASE
|
||||
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||
ELSE ''
|
||||
END as column_definition
|
||||
FROM pg_catalog.pg_tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.schemaname = c.table_schema
|
||||
AND t.tablename = c.table_name
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'-- Replace with your schema name
|
||||
) t
|
||||
GROUP BY schemaname, tablename;
|
||||
@ -0,0 +1,6 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema' -- Replace with your schema name
|
||||
;
|
||||
@ -0,0 +1,415 @@
|
||||
?column?
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_conditions +
|
||||
GROUP BY browse_conditions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_interventions +
|
||||
GROUP BY browse_interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||
FROM ctgov.conditions +
|
||||
GROUP BY conditions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||
FROM ctgov.countries +
|
||||
WHERE (countries.removed IS NOT TRUE) +
|
||||
GROUP BY countries.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||
FROM ctgov.design_groups +
|
||||
GROUP BY design_groups.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||
FROM ctgov.id_information +
|
||||
GROUP BY id_information.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||
FROM ctgov.keywords +
|
||||
GROUP BY keywords.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||
FROM ctgov.sponsors +
|
||||
GROUP BY sponsors.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.categories AS SELECT search_results.id, +
|
||||
search_results.nct_id, +
|
||||
search_results.name, +
|
||||
search_results.created_at, +
|
||||
search_results.updated_at, +
|
||||
search_results."grouping", +
|
||||
search_results.study_search_id +
|
||||
FROM ctgov.search_results;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||
s.overall_status, +
|
||||
s.study_type, +
|
||||
s.official_title, +
|
||||
s.acronym, +
|
||||
s.phase, +
|
||||
s.why_stopped, +
|
||||
s.has_dmc, +
|
||||
s.enrollment, +
|
||||
s.is_fda_regulated_device, +
|
||||
s.is_fda_regulated_drug, +
|
||||
s.is_unapproved_device, +
|
||||
s.has_expanded_access, +
|
||||
s.study_first_submitted_date, +
|
||||
s.last_update_posted_date, +
|
||||
s.results_first_posted_date, +
|
||||
s.start_date, +
|
||||
s.primary_completion_date, +
|
||||
s.completion_date, +
|
||||
s.study_first_posted_date, +
|
||||
cv.number_of_facilities, +
|
||||
cv.has_single_facility, +
|
||||
cv.nlm_download_date, +
|
||||
s.number_of_arms, +
|
||||
s.number_of_groups, +
|
||||
sp.name AS lead_sponsor, +
|
||||
aid.names AS other_ids, +
|
||||
e.gender, +
|
||||
e.gender_based, +
|
||||
e.gender_description, +
|
||||
e.population, +
|
||||
e.minimum_age, +
|
||||
e.maximum_age, +
|
||||
e.criteria, +
|
||||
e.healthy_volunteers, +
|
||||
ak.names AS keywords, +
|
||||
ai.names AS interventions, +
|
||||
ac.names AS conditions, +
|
||||
d.primary_purpose, +
|
||||
d.allocation, +
|
||||
d.observational_model, +
|
||||
d.intervention_model, +
|
||||
d.masking, +
|
||||
d.subject_masked, +
|
||||
d.caregiver_masked, +
|
||||
d.investigator_masked, +
|
||||
d.outcomes_assessor_masked, +
|
||||
ado.names AS design_outcomes, +
|
||||
bs.description AS brief_summary, +
|
||||
dd.description AS detailed_description +
|
||||
FROM (((((((((((ctgov.studies s +
|
||||
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||
FROM ctgov.search_results +
|
||||
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||
rp.rxcui, +
|
||||
rp.propvalue1 +
|
||||
FROM (ctgov.browse_interventions bi +
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||
FROM history.trial_snapshots)));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||
t.status, +
|
||||
t.update_timestamp +
|
||||
FROM ( SELECT download_status.id, +
|
||||
download_status.nct_id, +
|
||||
download_status.status, +
|
||||
download_status.update_timestamp, +
|
||||
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||
FROM http.download_status) t +
|
||||
WHERE (t.rn = 1) +
|
||||
ORDER BY t.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||
s.start_date, +
|
||||
ts.version, +
|
||||
ts.submission_date, +
|
||||
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||
FROM (ctgov.studies s +
|
||||
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||
cte.version, +
|
||||
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||
cte.submission_date, +
|
||||
cte.start_deviance, +
|
||||
cte.start_date, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.overall_status, +
|
||||
ts.enrollment, +
|
||||
ts.enrollment_category +
|
||||
FROM (time_between_submission_and_start_view cte +
|
||||
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||
min(cte2.rownum) AS enrollment_source +
|
||||
FROM rank_proximity_to_start_time_view cte2 +
|
||||
WHERE (cte2.enrollment IS NOT NULL) +
|
||||
GROUP BY cte2.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||
SELECT DISTINCT ts.nct_id +
|
||||
FROM history.trial_snapshots ts +
|
||||
) +
|
||||
SELECT bi.nct_id, +
|
||||
bi.downcase_mesh_term, +
|
||||
rr.tty2, +
|
||||
rr.rxcui2 AS bn_or_in_cui, +
|
||||
count(*) AS count +
|
||||
FROM ((ctgov.browse_interventions bi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||
ORDER BY bi.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||
ah.ndc, +
|
||||
count(*) AS count +
|
||||
FROM ((match_trials_to_bn_in mttbi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||
GROUP BY mttbi.nct_id, ah.ndc +
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||
n.application_number_or_citation, +
|
||||
min(n.marketing_start_date) AS min +
|
||||
FROM (match_trial_to_ndc11 mttn +
|
||||
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||
ORDER BY mttn.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||
b.location_id, +
|
||||
b.sex_id, +
|
||||
b.age_id, +
|
||||
b.cause_id, +
|
||||
b.metric_id, +
|
||||
b.year, +
|
||||
b.val, +
|
||||
b.upper_95, +
|
||||
b.lower_95, +
|
||||
b.key_column +
|
||||
FROM "DiseaseBurden".burdens b +
|
||||
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||
c1.year, +
|
||||
c1.val AS h_sdi_val, +
|
||||
c1.upper_95 AS h_sdi_u95, +
|
||||
c1.lower_95 AS h_sdi_l95, +
|
||||
c2.val AS hm_sdi_val, +
|
||||
c2.upper_95 AS hm_sdi_u95, +
|
||||
c2.lower_95 AS hm_sdi_l95, +
|
||||
c3.val AS m_sdi_val, +
|
||||
c3.upper_95 AS m_sdi_u95, +
|
||||
c3.lower_95 AS m_sdi_l95, +
|
||||
c4.val AS lm_sdi_val, +
|
||||
c4.upper_95 AS lm_sdi_u95, +
|
||||
c4.lower_95 AS lm_sdi_l95, +
|
||||
c5.val AS l_sdi_val, +
|
||||
c5.upper_95 AS l_sdi_u95, +
|
||||
c5.lower_95 AS l_sdi_l95 +
|
||||
FROM ((((view_burdens_cte c1 +
|
||||
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.enrollment, +
|
||||
ts.start_date, +
|
||||
ts.enrollment_category, +
|
||||
ts.overall_status, +
|
||||
min(ts.submission_date) AS earliest_date_observed +
|
||||
FROM history.trial_snapshots ts +
|
||||
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||
max(view_disbur_cte0.level) AS max_level +
|
||||
FROM view_disbur_cte0 +
|
||||
GROUP BY view_disbur_cte0.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||
ORDER BY tti.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||
ttc.ui, +
|
||||
ttc.condition, +
|
||||
ttc.cause_text, +
|
||||
ttc.cause_id, +
|
||||
disbur_cte.max_level +
|
||||
FROM (view_trial_to_cause ttc +
|
||||
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||
WHERE (ttc.level = disbur_cte.max_level) +
|
||||
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||
ORDER BY ttc.nct_id, ttc.ui;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||
disbur_cte2.condition, +
|
||||
disbur_cte2.cause_text, +
|
||||
disbur_cte2.cause_id, +
|
||||
ic.chapter_code AS category_id, +
|
||||
ic.group_name, +
|
||||
disbur_cte2.max_level +
|
||||
FROM (view_disbur_cte2 disbur_cte2 +
|
||||
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||
WHERE (ic.level = 1);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||
cte.start_date, +
|
||||
cte.enrollment AS current_enrollment, +
|
||||
cte.enrollment_category, +
|
||||
cte.overall_status AS current_status, +
|
||||
cte.earliest_date_observed, +
|
||||
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||
dbc3.code, +
|
||||
dbc3.condition, +
|
||||
dbc3.cause_text, +
|
||||
dbc3.cause_id, +
|
||||
dbc3.category_id, +
|
||||
dbc3.group_name, +
|
||||
dbc3.max_level, +
|
||||
b.year, +
|
||||
b.h_sdi_val, +
|
||||
b.h_sdi_u95, +
|
||||
b.h_sdi_l95, +
|
||||
b.hm_sdi_val, +
|
||||
b.hm_sdi_u95, +
|
||||
b.hm_sdi_l95, +
|
||||
b.m_sdi_val, +
|
||||
b.m_sdi_u95, +
|
||||
b.m_sdi_l95, +
|
||||
b.lm_sdi_val, +
|
||||
b.lm_sdi_u95, +
|
||||
b.lm_sdi_l95, +
|
||||
b.l_sdi_val, +
|
||||
b.l_sdi_u95, +
|
||||
b.l_sdi_l95 +
|
||||
FROM (((view_cte cte +
|
||||
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||
f.start_date, +
|
||||
f.current_enrollment, +
|
||||
f.enrollment_category, +
|
||||
f.current_status, +
|
||||
f.earliest_date_observed, +
|
||||
f.elapsed_duration, +
|
||||
f.n_brands, +
|
||||
f.code, +
|
||||
f.condition, +
|
||||
f.cause_text, +
|
||||
f.cause_id, +
|
||||
f.category_id, +
|
||||
f.group_name, +
|
||||
f.max_level, +
|
||||
f.year, +
|
||||
f.h_sdi_val, +
|
||||
f.h_sdi_u95, +
|
||||
f.h_sdi_l95, +
|
||||
f.hm_sdi_val, +
|
||||
f.hm_sdi_u95, +
|
||||
f.hm_sdi_l95, +
|
||||
f.m_sdi_val, +
|
||||
f.m_sdi_u95, +
|
||||
f.m_sdi_l95, +
|
||||
f.lm_sdi_val, +
|
||||
f.lm_sdi_u95, +
|
||||
f.lm_sdi_l95, +
|
||||
f.l_sdi_val, +
|
||||
f.l_sdi_u95, +
|
||||
f.l_sdi_l95, +
|
||||
s.overall_status AS final_status, +
|
||||
c2a.version, +
|
||||
c2a.enrollment AS planned_enrollment +
|
||||
FROM (((formatted_data f +
|
||||
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||
FROM http.most_recent_download_status +
|
||||
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||
do2.nct_id, +
|
||||
do2.outcome_type, +
|
||||
do2.measure, +
|
||||
do2.time_frame, +
|
||||
do2.population, +
|
||||
do2.description +
|
||||
FROM ctgov.design_outcomes do2 +
|
||||
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||
FROM formatted_data fd)));
|
||||
(40 rows)
|
||||
|
||||
@ -0,0 +1,920 @@
|
||||
?column?
|
||||
-------------------------------------------------------
|
||||
CREATE TABLE DiseaseBurden.age_group +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
age_group character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.burdens +
|
||||
( +
|
||||
measure_id integer NOT NULL, +
|
||||
location_id integer NOT NULL, +
|
||||
sex_id integer NOT NULL, +
|
||||
age_id integer NOT NULL, +
|
||||
cause_id integer NOT NULL, +
|
||||
metric_id integer NOT NULL, +
|
||||
year date NOT NULL, +
|
||||
val double precision NOT NULL, +
|
||||
upper_95 double precision NOT NULL, +
|
||||
lower_95 double precision NOT NULL, +
|
||||
key_column integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.cause +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
cause character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.cause_hierarchy +
|
||||
( +
|
||||
cause_id integer NOT NULL, +
|
||||
cause_name character varying, +
|
||||
parent_id integer NOT NULL, +
|
||||
parent_nae character varying, +
|
||||
level integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.icd10_categories +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
start_code character varying NOT NULL, +
|
||||
end_code character varying NOT NULL, +
|
||||
group_name character varying NOT NULL, +
|
||||
level integer NOT NULL, +
|
||||
chapter character varying NOT NULL, +
|
||||
chapter_code integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.icd10_to_cause +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
code character varying NOT NULL, +
|
||||
cause_text character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.location +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
location character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.measures +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.metric +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
metric_label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.rei +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
rei_label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.sex +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
sex character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.trial_to_icd10 +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying NOT NULL, +
|
||||
condition character varying NOT NULL, +
|
||||
ui character varying, +
|
||||
uri character varying, +
|
||||
rootsource character varying, +
|
||||
name character varying, +
|
||||
source character varying, +
|
||||
approved USER-DEFINED, +
|
||||
approval_timestamp timestamp without time zone +
|
||||
); +
|
||||
|
||||
CREATE TABLE Formularies.usp_dc_2023 +
|
||||
( +
|
||||
USP Class character varying(250), +
|
||||
USP Pharmacotherapeutic Group character varying(250),+
|
||||
API Concept character varying(250), +
|
||||
rxcui character varying(15), +
|
||||
tty character varying(10), +
|
||||
Name character varying(256), +
|
||||
Related BN character varying(250), +
|
||||
Related DF character varying(25050), +
|
||||
USP Category character varying(250) +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.active_storage_attachments +
|
||||
( +
|
||||
id bigint NOT NULL, +
|
||||
name character varying NOT NULL, +
|
||||
record_type character varying NOT NULL, +
|
||||
record_id bigint NOT NULL, +
|
||||
blob_id bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.active_storage_blobs +
|
||||
( +
|
||||
metadata text, +
|
||||
checksum character varying NOT NULL, +
|
||||
byte_size bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
id bigint NOT NULL, +
|
||||
key character varying NOT NULL, +
|
||||
filename character varying NOT NULL, +
|
||||
content_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.baseline_counts +
|
||||
( +
|
||||
count integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
ctgov_group_code character varying, +
|
||||
units character varying, +
|
||||
scope character varying, +
|
||||
result_group_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.baseline_measurements +
|
||||
( +
|
||||
param_value character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
classification character varying, +
|
||||
category character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
units character varying, +
|
||||
param_type character varying, +
|
||||
param_value_num numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value character varying, +
|
||||
dispersion_value_num numeric, +
|
||||
dispersion_lower_limit numeric, +
|
||||
dispersion_upper_limit numeric, +
|
||||
explanation_of_na character varying, +
|
||||
number_analyzed integer, +
|
||||
number_analyzed_units character varying, +
|
||||
population_description character varying, +
|
||||
calculate_percentage character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.brief_summaries +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.browse_conditions +
|
||||
( +
|
||||
mesh_term character varying, +
|
||||
id integer NOT NULL, +
|
||||
mesh_type character varying, +
|
||||
downcase_mesh_term character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.browse_interventions +
|
||||
( +
|
||||
downcase_mesh_term character varying, +
|
||||
mesh_term character varying, +
|
||||
mesh_type character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.calculated_values +
|
||||
( +
|
||||
number_of_secondary_outcomes_to_measure integer, +
|
||||
maximum_age_unit character varying, +
|
||||
minimum_age_unit character varying, +
|
||||
maximum_age_num integer, +
|
||||
minimum_age_num integer, +
|
||||
has_single_facility boolean, +
|
||||
has_us_facility boolean, +
|
||||
months_to_report_results integer, +
|
||||
number_of_sae_subjects integer, +
|
||||
were_results_reported boolean, +
|
||||
registered_in_calendar_year integer, +
|
||||
nlm_download_date date, +
|
||||
actual_duration integer, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
number_of_facilities integer, +
|
||||
number_of_nsae_subjects integer, +
|
||||
number_of_other_outcomes_to_measure integer, +
|
||||
number_of_primary_outcomes_to_measure integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.central_contacts +
|
||||
( +
|
||||
phone_extension character varying, +
|
||||
nct_id character varying, +
|
||||
role character varying, +
|
||||
id integer NOT NULL, +
|
||||
contact_type character varying, +
|
||||
name character varying, +
|
||||
phone character varying, +
|
||||
email character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.conditions +
|
||||
( +
|
||||
downcase_name character varying, +
|
||||
name character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.countries +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
removed boolean +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_group_interventions +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
design_group_id integer, +
|
||||
intervention_id integer, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_groups +
|
||||
( +
|
||||
group_type character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
title character varying, +
|
||||
description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_outcomes +
|
||||
( +
|
||||
description text, +
|
||||
measure text, +
|
||||
outcome_type character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
time_frame text, +
|
||||
population character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.designs +
|
||||
( +
|
||||
masking_description text, +
|
||||
subject_masked boolean, +
|
||||
caregiver_masked boolean, +
|
||||
investigator_masked boolean, +
|
||||
outcomes_assessor_masked boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
allocation character varying, +
|
||||
intervention_model character varying, +
|
||||
observational_model character varying, +
|
||||
primary_purpose character varying, +
|
||||
time_perspective character varying, +
|
||||
masking character varying, +
|
||||
intervention_model_description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.detailed_descriptions +
|
||||
( +
|
||||
description text, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.documents +
|
||||
( +
|
||||
comment text, +
|
||||
id integer NOT NULL, +
|
||||
url character varying, +
|
||||
document_type character varying, +
|
||||
nct_id character varying, +
|
||||
document_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.drop_withdrawals +
|
||||
( +
|
||||
period character varying, +
|
||||
reason character varying, +
|
||||
count integer, +
|
||||
ctgov_group_code character varying, +
|
||||
result_group_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.eligibilities +
|
||||
( +
|
||||
older_adult boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
sampling_method character varying, +
|
||||
gender character varying, +
|
||||
minimum_age character varying, +
|
||||
maximum_age character varying, +
|
||||
healthy_volunteers character varying, +
|
||||
population text, +
|
||||
criteria text, +
|
||||
gender_description text, +
|
||||
gender_based boolean, +
|
||||
adult boolean, +
|
||||
child boolean +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facilities +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
status character varying, +
|
||||
name character varying, +
|
||||
city character varying, +
|
||||
state character varying, +
|
||||
zip character varying, +
|
||||
country character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facility_contacts +
|
||||
( +
|
||||
contact_type character varying, +
|
||||
name character varying, +
|
||||
email character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
phone character varying, +
|
||||
phone_extension character varying, +
|
||||
facility_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facility_investigators +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
facility_id integer, +
|
||||
role character varying, +
|
||||
name character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.file_records +
|
||||
( +
|
||||
url character varying, +
|
||||
id bigint NOT NULL, +
|
||||
filename character varying, +
|
||||
file_size bigint, +
|
||||
file_type character varying, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.id_information +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
id_source character varying, +
|
||||
nct_id character varying, +
|
||||
id_link character varying, +
|
||||
id_value character varying, +
|
||||
id_type_description character varying, +
|
||||
id_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.intervention_other_names +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
intervention_id integer, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.interventions +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
name character varying, +
|
||||
intervention_type character varying, +
|
||||
description text, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.ipd_information_types +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.keywords +
|
||||
( +
|
||||
name character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
downcase_name character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.links +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
description text, +
|
||||
url character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.mesh_headings +
|
||||
( +
|
||||
qualifier character varying, +
|
||||
id integer NOT NULL, +
|
||||
subcategory character varying, +
|
||||
heading character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.mesh_terms +
|
||||
( +
|
||||
description character varying, +
|
||||
tree_number character varying, +
|
||||
qualifier character varying, +
|
||||
id integer NOT NULL, +
|
||||
downcase_mesh_term character varying, +
|
||||
mesh_term character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.milestones +
|
||||
( +
|
||||
count_units character varying, +
|
||||
count integer, +
|
||||
description text, +
|
||||
period character varying, +
|
||||
title character varying, +
|
||||
ctgov_group_code character varying, +
|
||||
result_group_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
milestone_description character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_analyses +
|
||||
( +
|
||||
other_analysis_description text, +
|
||||
param_type character varying, +
|
||||
non_inferiority_type character varying, +
|
||||
outcome_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
param_value numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value numeric, +
|
||||
p_value_modifier character varying, +
|
||||
p_value double precision, +
|
||||
ci_n_sides character varying, +
|
||||
ci_percent numeric, +
|
||||
ci_lower_limit numeric, +
|
||||
ci_upper_limit numeric, +
|
||||
ci_upper_limit_na_comment character varying, +
|
||||
p_value_description character varying, +
|
||||
method character varying, +
|
||||
method_description text, +
|
||||
estimate_description text, +
|
||||
groups_description text, +
|
||||
non_inferiority_description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_analysis_groups +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_analysis_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_counts +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
scope character varying, +
|
||||
units character varying, +
|
||||
count integer, +
|
||||
outcome_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_measurements +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
classification character varying, +
|
||||
category character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
units character varying, +
|
||||
param_type character varying, +
|
||||
param_value character varying, +
|
||||
param_value_num numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value character varying, +
|
||||
dispersion_value_num numeric, +
|
||||
dispersion_lower_limit numeric, +
|
||||
dispersion_upper_limit numeric, +
|
||||
explanation_of_na text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcomes +
|
||||
( +
|
||||
population text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_type character varying, +
|
||||
title text, +
|
||||
description text, +
|
||||
time_frame text, +
|
||||
anticipated_posting_date date, +
|
||||
anticipated_posting_month_year character varying, +
|
||||
units character varying, +
|
||||
units_analyzed character varying, +
|
||||
dispersion_type character varying, +
|
||||
param_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.overall_officials +
|
||||
( +
|
||||
name character varying, +
|
||||
affiliation character varying, +
|
||||
role character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.participant_flows +
|
||||
( +
|
||||
count_units integer, +
|
||||
nct_id character varying, +
|
||||
pre_assignment_details text, +
|
||||
units_analyzed character varying, +
|
||||
drop_withdraw_comment character varying, +
|
||||
reason_comment character varying, +
|
||||
recruitment_details text, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.pending_results +
|
||||
( +
|
||||
event_date_description character varying, +
|
||||
event_date date, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
event character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.provided_documents +
|
||||
( +
|
||||
has_sap boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
document_type character varying, +
|
||||
has_protocol boolean, +
|
||||
has_icf boolean, +
|
||||
document_date date, +
|
||||
url character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.reported_event_totals +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
subjects_at_risk integer, +
|
||||
subjects_affected integer, +
|
||||
classification character varying NOT NULL, +
|
||||
event_type character varying, +
|
||||
ctgov_group_code character varying NOT NULL, +
|
||||
nct_id character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.reported_events +
|
||||
( +
|
||||
vocab character varying, +
|
||||
nct_id character varying, +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
time_frame text, +
|
||||
event_type character varying, +
|
||||
default_vocab character varying, +
|
||||
default_assessment character varying, +
|
||||
subjects_affected integer, +
|
||||
subjects_at_risk integer, +
|
||||
description text, +
|
||||
event_count integer, +
|
||||
organ_system character varying, +
|
||||
adverse_event_term character varying, +
|
||||
frequency_threshold integer, +
|
||||
assessment character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.responsible_parties +
|
||||
( +
|
||||
affiliation text, +
|
||||
nct_id character varying, +
|
||||
responsible_party_type character varying, +
|
||||
name character varying, +
|
||||
title character varying, +
|
||||
organization character varying, +
|
||||
id integer NOT NULL, +
|
||||
old_name_title character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_agreements +
|
||||
( +
|
||||
other_details text, +
|
||||
restrictive_agreement character varying, +
|
||||
restriction_type character varying, +
|
||||
agreement text, +
|
||||
pi_employee character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_contacts +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
organization character varying, +
|
||||
name character varying, +
|
||||
phone character varying, +
|
||||
email character varying, +
|
||||
extension character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_groups +
|
||||
( +
|
||||
result_type character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
ctgov_group_code character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.retractions +
|
||||
( +
|
||||
pmid character varying, +
|
||||
id bigint NOT NULL, +
|
||||
nct_id character varying, +
|
||||
source character varying, +
|
||||
reference_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.search_results +
|
||||
( +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
nct_id character varying NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
grouping character varying NOT NULL, +
|
||||
study_search_id integer, +
|
||||
name character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.sponsors +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
name character varying, +
|
||||
lead_or_collaborator character varying, +
|
||||
agency_class character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.studies +
|
||||
( +
|
||||
phase character varying, +
|
||||
delayed_posting character varying, +
|
||||
source_class character varying, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
plan_to_share_ipd_description character varying, +
|
||||
plan_to_share_ipd character varying, +
|
||||
ipd_url character varying, +
|
||||
ipd_access_criteria character varying, +
|
||||
ipd_time_frame character varying, +
|
||||
biospec_description text, +
|
||||
biospec_retention character varying, +
|
||||
is_us_export boolean, +
|
||||
is_ppsd boolean, +
|
||||
is_unapproved_device boolean, +
|
||||
is_fda_regulated_device boolean, +
|
||||
is_fda_regulated_drug boolean, +
|
||||
has_dmc boolean, +
|
||||
expanded_access_type_treatment boolean, +
|
||||
expanded_access_type_intermediate boolean, +
|
||||
expanded_access_type_individual boolean, +
|
||||
has_expanded_access boolean, +
|
||||
why_stopped character varying, +
|
||||
number_of_groups integer, +
|
||||
number_of_arms integer, +
|
||||
limitations_and_caveats character varying, +
|
||||
source character varying, +
|
||||
enrollment_type character varying, +
|
||||
enrollment integer, +
|
||||
expanded_access_nctid character varying, +
|
||||
last_known_status character varying, +
|
||||
overall_status character varying, +
|
||||
official_title text, +
|
||||
brief_title text, +
|
||||
baseline_population text, +
|
||||
acronym character varying, +
|
||||
study_type character varying, +
|
||||
target_duration character varying, +
|
||||
results_first_submitted_date date, +
|
||||
study_first_submitted_date date, +
|
||||
nlm_download_date_description character varying, +
|
||||
primary_completion_date date, +
|
||||
nct_id character varying, +
|
||||
primary_completion_date_type character varying, +
|
||||
primary_completion_month_year character varying, +
|
||||
completion_date date, +
|
||||
completion_date_type character varying, +
|
||||
completion_month_year character varying, +
|
||||
verification_date date, +
|
||||
verification_month_year character varying, +
|
||||
start_date date, +
|
||||
start_date_type character varying, +
|
||||
start_month_year character varying, +
|
||||
last_update_posted_date_type character varying, +
|
||||
last_update_posted_date date, +
|
||||
last_update_submitted_qc_date date, +
|
||||
disposition_first_posted_date_type character varying,+
|
||||
disposition_first_posted_date date, +
|
||||
disposition_first_submitted_qc_date date, +
|
||||
results_first_posted_date_type character varying, +
|
||||
results_first_posted_date date, +
|
||||
results_first_submitted_qc_date date, +
|
||||
study_first_posted_date_type character varying, +
|
||||
study_first_posted_date date, +
|
||||
study_first_submitted_qc_date date, +
|
||||
last_update_submitted_date date, +
|
||||
disposition_first_submitted_date date, +
|
||||
baseline_type_units_analyzed character varying, +
|
||||
fdaaa801_violation boolean, +
|
||||
expanded_access_status_for_nctid character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_records +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
sha character varying, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
type character varying, +
|
||||
content json, +
|
||||
id bigint NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_references +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
citation text, +
|
||||
reference_type character varying, +
|
||||
pmid character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_searches +
|
||||
( +
|
||||
query character varying NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
beta_api boolean NOT NULL, +
|
||||
name character varying NOT NULL, +
|
||||
grouping character varying NOT NULL, +
|
||||
save_tsv boolean NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.verifiers +
|
||||
( +
|
||||
id bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
source json, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
load_event_id integer, +
|
||||
last_run timestamp without time zone, +
|
||||
differences json NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE history.trial_snapshots +
|
||||
( +
|
||||
completion_date timestamp without time zone, +
|
||||
nct_id character varying(15) NOT NULL, +
|
||||
version integer NOT NULL, +
|
||||
submission_date timestamp without time zone, +
|
||||
primary_completion_date timestamp without time zone, +
|
||||
primary_completion_date_category USER-DEFINED, +
|
||||
start_date timestamp without time zone, +
|
||||
start_date_category USER-DEFINED, +
|
||||
completion_date_category USER-DEFINED, +
|
||||
overall_status USER-DEFINED, +
|
||||
enrollment integer, +
|
||||
enrollment_category USER-DEFINED, +
|
||||
sponsor character varying, +
|
||||
responsible_party character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE http.download_status +
|
||||
( +
|
||||
status USER-DEFINED NOT NULL, +
|
||||
nct_id character varying(15) NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
update_timestamp timestamp with time zone +
|
||||
); +
|
||||
|
||||
CREATE TABLE http.responses +
|
||||
( +
|
||||
nct_id character varying(15), +
|
||||
version_a smallint, +
|
||||
version_b smallint, +
|
||||
url character varying(255), +
|
||||
response_code smallint, +
|
||||
response_date timestamp with time zone, +
|
||||
id integer NOT NULL, +
|
||||
html text +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.ALLNDC_HISTORY +
|
||||
( +
|
||||
sab character varying(10), +
|
||||
ndc11_left9 character(9) NOT NULL, +
|
||||
rowid integer NOT NULL, +
|
||||
ndc character(13) NOT NULL, +
|
||||
suppress character(1), +
|
||||
edate character(6), +
|
||||
sdate character(6), +
|
||||
rxcui character(16) +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.ALLRXCUI_HISTORY +
|
||||
( +
|
||||
tty character varying(5), +
|
||||
sts character(1), +
|
||||
rxindb character(1), +
|
||||
indb character(1), +
|
||||
rowid integer NOT NULL, +
|
||||
rxcui character(16) NOT NULL, +
|
||||
sab character varying(20), +
|
||||
str character varying(3000), +
|
||||
sdate character(6), +
|
||||
edate character(6) +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.rxnorm_props +
|
||||
( +
|
||||
rxcui character(8) NOT NULL, +
|
||||
pres smallint NOT NULL, +
|
||||
propvalue1 character varying(4000) NOT NULL, +
|
||||
propname character varying(30) NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.rxnorm_relations +
|
||||
( +
|
||||
tty2 character(4) NOT NULL, +
|
||||
rxcui1 character(8) NOT NULL, +
|
||||
tty1 character(4) NOT NULL, +
|
||||
cvf character(4) NOT NULL, +
|
||||
rxcui2 character(8) NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE spl.nsde +
|
||||
( +
|
||||
proprietary_name character varying(500), +
|
||||
package_ndc character varying(50), +
|
||||
application_number_or_citation character varying(25),+
|
||||
package_ndc11 character varying(11), +
|
||||
id integer NOT NULL, +
|
||||
reactivation_date date, +
|
||||
inactivation_date date, +
|
||||
marketing_start_date date, +
|
||||
marketing_end_date date, +
|
||||
billing_unit character varying(35), +
|
||||
dosage_form character varying(155), +
|
||||
marketing_category character varying(160), +
|
||||
product_type character varying(90) +
|
||||
); +
|
||||
|
||||
(76 rows)
|
||||
|
||||
@ -0,0 +1,415 @@
|
||||
?column?
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_conditions +
|
||||
GROUP BY browse_conditions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_interventions +
|
||||
GROUP BY browse_interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||
FROM ctgov.conditions +
|
||||
GROUP BY conditions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||
FROM ctgov.countries +
|
||||
WHERE (countries.removed IS NOT TRUE) +
|
||||
GROUP BY countries.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||
FROM ctgov.design_groups +
|
||||
GROUP BY design_groups.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||
FROM ctgov.id_information +
|
||||
GROUP BY id_information.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||
FROM ctgov.keywords +
|
||||
GROUP BY keywords.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||
FROM ctgov.sponsors +
|
||||
GROUP BY sponsors.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.categories AS SELECT search_results.id, +
|
||||
search_results.nct_id, +
|
||||
search_results.name, +
|
||||
search_results.created_at, +
|
||||
search_results.updated_at, +
|
||||
search_results."grouping", +
|
||||
search_results.study_search_id +
|
||||
FROM ctgov.search_results;
|
||||
CREATE OR REPLACE VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||
s.overall_status, +
|
||||
s.study_type, +
|
||||
s.official_title, +
|
||||
s.acronym, +
|
||||
s.phase, +
|
||||
s.why_stopped, +
|
||||
s.has_dmc, +
|
||||
s.enrollment, +
|
||||
s.is_fda_regulated_device, +
|
||||
s.is_fda_regulated_drug, +
|
||||
s.is_unapproved_device, +
|
||||
s.has_expanded_access, +
|
||||
s.study_first_submitted_date, +
|
||||
s.last_update_posted_date, +
|
||||
s.results_first_posted_date, +
|
||||
s.start_date, +
|
||||
s.primary_completion_date, +
|
||||
s.completion_date, +
|
||||
s.study_first_posted_date, +
|
||||
cv.number_of_facilities, +
|
||||
cv.has_single_facility, +
|
||||
cv.nlm_download_date, +
|
||||
s.number_of_arms, +
|
||||
s.number_of_groups, +
|
||||
sp.name AS lead_sponsor, +
|
||||
aid.names AS other_ids, +
|
||||
e.gender, +
|
||||
e.gender_based, +
|
||||
e.gender_description, +
|
||||
e.population, +
|
||||
e.minimum_age, +
|
||||
e.maximum_age, +
|
||||
e.criteria, +
|
||||
e.healthy_volunteers, +
|
||||
ak.names AS keywords, +
|
||||
ai.names AS interventions, +
|
||||
ac.names AS conditions, +
|
||||
d.primary_purpose, +
|
||||
d.allocation, +
|
||||
d.observational_model, +
|
||||
d.intervention_model, +
|
||||
d.masking, +
|
||||
d.subject_masked, +
|
||||
d.caregiver_masked, +
|
||||
d.investigator_masked, +
|
||||
d.outcomes_assessor_masked, +
|
||||
ado.names AS design_outcomes, +
|
||||
bs.description AS brief_summary, +
|
||||
dd.description AS detailed_description +
|
||||
FROM (((((((((((ctgov.studies s +
|
||||
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||
FROM ctgov.search_results +
|
||||
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||
rp.rxcui, +
|
||||
rp.propvalue1 +
|
||||
FROM (ctgov.browse_interventions bi +
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||
FROM history.trial_snapshots)));
|
||||
CREATE OR REPLACE VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||
t.status, +
|
||||
t.update_timestamp +
|
||||
FROM ( SELECT download_status.id, +
|
||||
download_status.nct_id, +
|
||||
download_status.status, +
|
||||
download_status.update_timestamp, +
|
||||
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||
FROM http.download_status) t +
|
||||
WHERE (t.rn = 1) +
|
||||
ORDER BY t.nct_id;
|
||||
CREATE OR REPLACE VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||
s.start_date, +
|
||||
ts.version, +
|
||||
ts.submission_date, +
|
||||
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||
FROM (ctgov.studies s +
|
||||
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||
CREATE OR REPLACE VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||
cte.version, +
|
||||
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||
cte.submission_date, +
|
||||
cte.start_deviance, +
|
||||
cte.start_date, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.overall_status, +
|
||||
ts.enrollment, +
|
||||
ts.enrollment_category +
|
||||
FROM (time_between_submission_and_start_view cte +
|
||||
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||
CREATE OR REPLACE VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||
min(cte2.rownum) AS enrollment_source +
|
||||
FROM rank_proximity_to_start_time_view cte2 +
|
||||
WHERE (cte2.enrollment IS NOT NULL) +
|
||||
GROUP BY cte2.nct_id;
|
||||
CREATE OR REPLACE VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||
SELECT DISTINCT ts.nct_id +
|
||||
FROM history.trial_snapshots ts +
|
||||
) +
|
||||
SELECT bi.nct_id, +
|
||||
bi.downcase_mesh_term, +
|
||||
rr.tty2, +
|
||||
rr.rxcui2 AS bn_or_in_cui, +
|
||||
count(*) AS count +
|
||||
FROM ((ctgov.browse_interventions bi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||
ORDER BY bi.nct_id;
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||
ah.ndc, +
|
||||
count(*) AS count +
|
||||
FROM ((match_trials_to_bn_in mttbi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||
GROUP BY mttbi.nct_id, ah.ndc +
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||
n.application_number_or_citation, +
|
||||
min(n.marketing_start_date) AS min +
|
||||
FROM (match_trial_to_ndc11 mttn +
|
||||
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||
ORDER BY mttn.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||
b.location_id, +
|
||||
b.sex_id, +
|
||||
b.age_id, +
|
||||
b.cause_id, +
|
||||
b.metric_id, +
|
||||
b.year, +
|
||||
b.val, +
|
||||
b.upper_95, +
|
||||
b.lower_95, +
|
||||
b.key_column +
|
||||
FROM "DiseaseBurden".burdens b +
|
||||
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||
CREATE OR REPLACE VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||
c1.year, +
|
||||
c1.val AS h_sdi_val, +
|
||||
c1.upper_95 AS h_sdi_u95, +
|
||||
c1.lower_95 AS h_sdi_l95, +
|
||||
c2.val AS hm_sdi_val, +
|
||||
c2.upper_95 AS hm_sdi_u95, +
|
||||
c2.lower_95 AS hm_sdi_l95, +
|
||||
c3.val AS m_sdi_val, +
|
||||
c3.upper_95 AS m_sdi_u95, +
|
||||
c3.lower_95 AS m_sdi_l95, +
|
||||
c4.val AS lm_sdi_val, +
|
||||
c4.upper_95 AS lm_sdi_u95, +
|
||||
c4.lower_95 AS lm_sdi_l95, +
|
||||
c5.val AS l_sdi_val, +
|
||||
c5.upper_95 AS l_sdi_u95, +
|
||||
c5.lower_95 AS l_sdi_l95 +
|
||||
FROM ((((view_burdens_cte c1 +
|
||||
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||
CREATE OR REPLACE VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.enrollment, +
|
||||
ts.start_date, +
|
||||
ts.enrollment_category, +
|
||||
ts.overall_status, +
|
||||
min(ts.submission_date) AS earliest_date_observed +
|
||||
FROM history.trial_snapshots ts +
|
||||
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||
max(view_disbur_cte0.level) AS max_level +
|
||||
FROM view_disbur_cte0 +
|
||||
GROUP BY view_disbur_cte0.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||
ORDER BY tti.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||
ttc.ui, +
|
||||
ttc.condition, +
|
||||
ttc.cause_text, +
|
||||
ttc.cause_id, +
|
||||
disbur_cte.max_level +
|
||||
FROM (view_trial_to_cause ttc +
|
||||
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||
WHERE (ttc.level = disbur_cte.max_level) +
|
||||
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||
ORDER BY ttc.nct_id, ttc.ui;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||
disbur_cte2.condition, +
|
||||
disbur_cte2.cause_text, +
|
||||
disbur_cte2.cause_id, +
|
||||
ic.chapter_code AS category_id, +
|
||||
ic.group_name, +
|
||||
disbur_cte2.max_level +
|
||||
FROM (view_disbur_cte2 disbur_cte2 +
|
||||
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||
WHERE (ic.level = 1);
|
||||
CREATE OR REPLACE VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||
cte.start_date, +
|
||||
cte.enrollment AS current_enrollment, +
|
||||
cte.enrollment_category, +
|
||||
cte.overall_status AS current_status, +
|
||||
cte.earliest_date_observed, +
|
||||
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||
dbc3.code, +
|
||||
dbc3.condition, +
|
||||
dbc3.cause_text, +
|
||||
dbc3.cause_id, +
|
||||
dbc3.category_id, +
|
||||
dbc3.group_name, +
|
||||
dbc3.max_level, +
|
||||
b.year, +
|
||||
b.h_sdi_val, +
|
||||
b.h_sdi_u95, +
|
||||
b.h_sdi_l95, +
|
||||
b.hm_sdi_val, +
|
||||
b.hm_sdi_u95, +
|
||||
b.hm_sdi_l95, +
|
||||
b.m_sdi_val, +
|
||||
b.m_sdi_u95, +
|
||||
b.m_sdi_l95, +
|
||||
b.lm_sdi_val, +
|
||||
b.lm_sdi_u95, +
|
||||
b.lm_sdi_l95, +
|
||||
b.l_sdi_val, +
|
||||
b.l_sdi_u95, +
|
||||
b.l_sdi_l95 +
|
||||
FROM (((view_cte cte +
|
||||
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||
CREATE OR REPLACE VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||
f.start_date, +
|
||||
f.current_enrollment, +
|
||||
f.enrollment_category, +
|
||||
f.current_status, +
|
||||
f.earliest_date_observed, +
|
||||
f.elapsed_duration, +
|
||||
f.n_brands, +
|
||||
f.code, +
|
||||
f.condition, +
|
||||
f.cause_text, +
|
||||
f.cause_id, +
|
||||
f.category_id, +
|
||||
f.group_name, +
|
||||
f.max_level, +
|
||||
f.year, +
|
||||
f.h_sdi_val, +
|
||||
f.h_sdi_u95, +
|
||||
f.h_sdi_l95, +
|
||||
f.hm_sdi_val, +
|
||||
f.hm_sdi_u95, +
|
||||
f.hm_sdi_l95, +
|
||||
f.m_sdi_val, +
|
||||
f.m_sdi_u95, +
|
||||
f.m_sdi_l95, +
|
||||
f.lm_sdi_val, +
|
||||
f.lm_sdi_u95, +
|
||||
f.lm_sdi_l95, +
|
||||
f.l_sdi_val, +
|
||||
f.l_sdi_u95, +
|
||||
f.l_sdi_l95, +
|
||||
s.overall_status AS final_status, +
|
||||
c2a.version, +
|
||||
c2a.enrollment AS planned_enrollment +
|
||||
FROM (((formatted_data f +
|
||||
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||
CREATE OR REPLACE VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||
FROM http.most_recent_download_status +
|
||||
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||
CREATE OR REPLACE VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||
do2.nct_id, +
|
||||
do2.outcome_type, +
|
||||
do2.measure, +
|
||||
do2.time_frame, +
|
||||
do2.population, +
|
||||
do2.description +
|
||||
FROM ctgov.design_outcomes do2 +
|
||||
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||
FROM formatted_data fd)));
|
||||
(40 rows)
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
import psycopg2 as psyco
|
||||
|
||||
conn = psyco.connect(dbname="aact_db", user="root", host="will-office", password="root")
|
||||
|
||||
curse = conn.cursor()
|
||||
|
||||
curse.execute("select * FROM http.responses LIMIT 100;")
|
||||
print(curse.fetchall())
|
||||
|
||||
curse.close()
|
||||
conn.close()
|
||||
@ -1,21 +0,0 @@
|
||||
DELETE FROM http.download_status;
|
||||
|
||||
INSERT INTO http.download_status (nct_id, status)
|
||||
SELECT nct_id, 'Of Interest'::http.history_download_status AS status
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug=TRUE
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
phase='Phase 3'
|
||||
AND
|
||||
overall_status in ('Terminated', 'Completed')
|
||||
AND
|
||||
start_date > '2008-01-01'
|
||||
AND
|
||||
completion_date < '2022-01-01'
|
||||
;
|
||||
|
||||
|
||||
SELECT count(*) FROM http.download_status ;
|
||||
@ -0,0 +1,112 @@
|
||||
#justfile, used for automating build/setup
|
||||
# TODO
|
||||
# - setup a .env file so things can be shared between just and docker
|
||||
# - move network name to .env
|
||||
# - move postgress login credentials (allow them to be printed from just while setting up)
|
||||
|
||||
|
||||
data_link := "https://ctti-aact.nyc3.digitaloceanspaces.com/27grtsnhtccplxapj2o8ak9aotvv"
|
||||
data_file := "2022-12-23_postgres_data.zip"
|
||||
data_path := "./containers/AACT_downloader/aact_downloads"
|
||||
data_filepath := data_path / data_file
|
||||
|
||||
#must match the 'container name: aact_db' in the docker-compose.yaml
|
||||
docker_container := `docker container ls -a | grep aact_db | cut -f 1 -d " " | tr "\n" " "`
|
||||
|
||||
#Various paths for docker stuff
|
||||
docker-compose_path := "./AACT_downloader/docker-compose.yaml"
|
||||
|
||||
#rxnorm_mappings
|
||||
rxnorm_mappings_url := "https://dailymed-data.nlm.nih.gov/public-release-files/rxnorm_mappings.zip"
|
||||
|
||||
#Number of historical trials to download.
|
||||
count := "100"
|
||||
|
||||
|
||||
#check for necessary dependencies
|
||||
check-status:
|
||||
docker --version
|
||||
#check if python version > 3.10.
|
||||
python --version
|
||||
python -c 'import sys; exit(sys.hexversion >= 50859504)'
|
||||
curl --version
|
||||
echo "current docker containers:{{docker_container}}"
|
||||
|
||||
#Setup the AACT container
|
||||
setup-containers:
|
||||
@echo "Check for downloaded data"
|
||||
[ -s {{data_path}}/postgres_data.dmp ]
|
||||
|
||||
#run docker compose
|
||||
@echo "Setting up AACT container"
|
||||
docker-compose -f {{docker-compose_path}} up -d
|
||||
|
||||
#Stop the appropriate docker container
|
||||
stop-containers:
|
||||
#stop all docker containers if they are currently running.
|
||||
#The if [empty string] statement because sometimes there are no running containers
|
||||
if [ -n "{{docker_container}}" ]; then docker stop {{docker_container}}; fi
|
||||
@echo "confirmed that docker containers {{docker_container}} are stopped"
|
||||
|
||||
#Remove the appropriate docker container as well as associated volumes
|
||||
clean-docker: stop-containers
|
||||
# remove docker containers
|
||||
if [ -n "{{docker_container}}" ]; then docker rm {{docker_container}}; fi
|
||||
# cleanup docker network
|
||||
docker network prune
|
||||
# cleanup docker volumes
|
||||
docker volume prune
|
||||
|
||||
|
||||
#Download the AACT data
|
||||
download-aact-data:
|
||||
curl {{data_link}} > ./AACT_downloader/aact_downloads/{{data_file}}
|
||||
unzip {{data_filepath}} -d {{data_path}}
|
||||
rm {{data_filepath}}
|
||||
|
||||
|
||||
#build based on previously downloaded data
|
||||
build: check-status setup-containers
|
||||
#system built from downloaded data
|
||||
|
||||
#remove containers and rebuild based on previously downloaded data
|
||||
rebuild: clean-docker build
|
||||
#system will be built from scratch, using previously downloaded data
|
||||
|
||||
#download data and create the containers
|
||||
create: check-status download-aact-data build
|
||||
# downloaded data and built from scratch
|
||||
|
||||
#remove containers, redownload data, then rebuild containers
|
||||
recreate: clean-docker create
|
||||
# removed containers, redownloaded data, then rebuilt containers
|
||||
|
||||
#Register trials of interest in the database based on ./history_downloader/selected_trials.sql
|
||||
select-trials:
|
||||
cd history_downloader && python ./select_trials.py
|
||||
|
||||
#Download trial histories based on registered trials of interest.
|
||||
download-trial-histories:
|
||||
cd history_downloader && python ./downloader.py --count {{count}}
|
||||
|
||||
#Check if you can connect to the db
|
||||
test-db-connection:
|
||||
cd history_downloader && python db_connection.py
|
||||
|
||||
#Parse previously downloaded histories into tables.
|
||||
parse-trial-histories:
|
||||
cd Parser && python extraction_lib.py
|
||||
|
||||
#Download and install
|
||||
get-histories: download-trial-histories parse-trial-histories
|
||||
|
||||
#download market data
|
||||
get-nsde:
|
||||
cd market_data && bash download_nsde.sh
|
||||
cd market_data && python extract_nsde.py
|
||||
|
||||
get-rxnorm-mappings:
|
||||
#this may not be needed, all it does is match spls to rxcuis and I think I already have that.
|
||||
curl {{rxnorm_mappings_url}} > ./market_data/rxnorm_mappings.zip
|
||||
cd ./market_data && unzip ./rxnorm_mappings.zip
|
||||
rm ./market_data/rxnorm_mappings.zip
|
||||
@ -0,0 +1,55 @@
|
||||
import ollama
|
||||
|
||||
import psycopg
|
||||
from psycopg.rows import dict_row
|
||||
from typing import List, Dict
|
||||
|
||||
def fetch_all_rows(conn_params: dict) -> List[Dict]:
|
||||
# Establish a connection to the PostgreSQL database
|
||||
conn = psycopg.connect(**conn_params, row_factory=dict_row)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Define your SQL query to select all rows from the table
|
||||
sql_query = "SELECT * FROM public.primary_design_outcomes;"
|
||||
|
||||
# Execute the query
|
||||
cursor.execute(sql_query)
|
||||
|
||||
# Fetch all rows from the result set
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Close the cursor and connection
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return rows
|
||||
|
||||
# Example usage
|
||||
conn_params = {
|
||||
"dbname": "aact_db",
|
||||
"user": "root",
|
||||
"password": "root",
|
||||
"host": "localhost",
|
||||
"port": "5432"
|
||||
}
|
||||
|
||||
outcome_description = '''
|
||||
Measure: {measure}
|
||||
Time Frame: {time_frame}
|
||||
Description: {description}
|
||||
'''
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#check for model
|
||||
|
||||
#get information
|
||||
rows_dicts = fetch_all_rows(conn_params)
|
||||
|
||||
for row in rows_dicts[:3]:
|
||||
text_data = outcome_description.format(**row)
|
||||
r = ollama.generate(model='youainti/llama3.1-extractor:2024-08-28.2', prompt=text_data)
|
||||
|
||||
print(text_data)
|
||||
print(r["response"])
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
FROM llama3.1
|
||||
PARAMETER num_ctx 8192
|
||||
PARAMETER seed 11021585
|
||||
SYSTEM """
|
||||
You are a Natural Language Processor, tasked with extracting data about outcome measures from textual tables.
|
||||
You are to extract the longest observation time from the primary objectives for this clinical trial.
|
||||
I need you to distinguish between trials that have a specified period during which they track participants
|
||||
and those trials that don't.
|
||||
|
||||
Return results as JSON, with the format
|
||||
```json
|
||||
{
|
||||
"longest_observation_scalar": <number>,
|
||||
"longest_observation_unit: <string: minutes, hours, days, weeks, months, years>
|
||||
}
|
||||
```
|
||||
Do not return any other commentary.
|
||||
If the study does not have a specified end of observation, set the values to `null`.
|
||||
If the text does not appear to be related to clinical trials, return `{ null }`
|
||||
|
||||
For example:
|
||||
- 'baseline to week 3' should give: `{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }`
|
||||
- 'tracked 4 months' should give: `{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}`
|
||||
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||
- 'After day 1 to week 48' should give `{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}`
|
||||
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||
- 'baseline until death' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||
- 'progression free survival up to 4 years' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||
- 'the quick brown fox jumped over the lazy dog for one hour' should give: `{null}`
|
||||
|
||||
"""
|
||||
|
@ -0,0 +1,19 @@
|
||||
### Template:
|
||||
{
|
||||
"longest_observation_scalar": "",
|
||||
"longest_observation_unit": "",
|
||||
}
|
||||
### Examples:
|
||||
|
||||
|
||||
### Text:
|
||||
|
||||
{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }
|
||||
{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}
|
||||
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
https://doi.org/10.6069/GHCW-8955
|
||||
@ -0,0 +1 @@
|
||||
,dad,home-pc,03.04.2023 15:13,file:///home/dad/.config/libreoffice/4;
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,365 @@
|
||||
Cause ID,Cause Name,Parent ID,Parent Name,Level,Cause Outline,Sort Order,YLL Only,YLD Only
|
||||
294,All causes,294,All causes,0,Total,1,,
|
||||
295,"Communicable, maternal, neonatal, and nutritional diseases",294,All causes,1,A,2,,
|
||||
955,HIV/AIDS and sexually transmitted infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.1,3,,
|
||||
298,HIV/AIDS,955,HIV/AIDS and sexually transmitted infections,3,A.1.1,4,,
|
||||
948,HIV/AIDS - Drug-susceptible Tuberculosis,298,HIV/AIDS,4,A.1.1.1,5,,
|
||||
949,HIV/AIDS - Multidrug-resistant Tuberculosis without extensive drug resistance,298,HIV/AIDS,4,A.1.1.2,6,,
|
||||
950,HIV/AIDS - Extensively drug-resistant Tuberculosis,298,HIV/AIDS,4,A.1.1.3,7,,
|
||||
300,HIV/AIDS resulting in other diseases,298,HIV/AIDS,4,A.1.1.4,8,,
|
||||
393,Sexually transmitted infections excluding HIV,955,HIV/AIDS and sexually transmitted infections,3,A.1.2,9,,
|
||||
394,Syphilis,393,Sexually transmitted infections excluding HIV,4,A.1.2.1,10,,
|
||||
395,Chlamydial infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.2,11,,
|
||||
396,Gonococcal infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.3,12,,
|
||||
397,Trichomoniasis,393,Sexually transmitted infections excluding HIV,4,A.1.2.4,13,,X
|
||||
398,Genital herpes,393,Sexually transmitted infections excluding HIV,4,A.1.2.5,14,,X
|
||||
399,Other sexually transmitted infections,393,Sexually transmitted infections excluding HIV,4,A.1.2.6,15,,
|
||||
956,Respiratory infections and tuberculosis,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.2,16,,
|
||||
297,Tuberculosis,956,Respiratory infections and tuberculosis,3,A.2.1,17,,
|
||||
954,Latent tuberculosis infection,297,Tuberculosis,4,A.2.1.1,18,,X
|
||||
934,Drug-susceptible tuberculosis,297,Tuberculosis,4,A.2.1.2,19,,
|
||||
946,Multidrug-resistant tuberculosis without extensive drug resistance,297,Tuberculosis,4,A.2.1.3,20,,
|
||||
947,Extensively drug-resistant tuberculosis,297,Tuberculosis,4,A.2.1.4,21,,
|
||||
322,Lower respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.2,22,,
|
||||
328,Upper respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.3,23,,
|
||||
329,Otitis media,956,Respiratory infections and tuberculosis,3,A.2.4,24,,
|
||||
957,Enteric infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.3,25,,
|
||||
302,Diarrheal diseases,957,Enteric infections,3,A.3.1,26,,
|
||||
958,Typhoid and paratyphoid,957,Enteric infections,3,A.3.2,27,,
|
||||
319,Typhoid fever,958,Typhoid and paratyphoid,4,A.3.2.1,28,,
|
||||
320,Paratyphoid fever,958,Typhoid and paratyphoid,4,A.3.2.2,29,,
|
||||
959,Invasive Non-typhoidal Salmonella (iNTS),957,Enteric infections,3,A.3.3,30,,
|
||||
321,Other intestinal infectious diseases,957,Enteric infections,3,A.3.4,31,,
|
||||
344,Neglected tropical diseases and malaria,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.4,32,,
|
||||
345,Malaria,344,Neglected tropical diseases and malaria,3,A.4.1,33,,
|
||||
346,Chagas disease,344,Neglected tropical diseases and malaria,3,A.4.2,34,,
|
||||
347,Leishmaniasis,344,Neglected tropical diseases and malaria,3,A.4.3,35,,
|
||||
348,Visceral leishmaniasis,347,Leishmaniasis,4,A.4.3.1,36,,
|
||||
349,Cutaneous and mucocutaneous leishmaniasis,347,Leishmaniasis,4,A.4.3.2,37,,X
|
||||
350,African trypanosomiasis,344,Neglected tropical diseases and malaria,3,A.4.4,38,,
|
||||
351,Schistosomiasis,344,Neglected tropical diseases and malaria,3,A.4.5,39,,
|
||||
352,Cysticercosis,344,Neglected tropical diseases and malaria,3,A.4.6,40,,
|
||||
353,Cystic echinococcosis,344,Neglected tropical diseases and malaria,3,A.4.7,41,,
|
||||
354,Lymphatic filariasis,344,Neglected tropical diseases and malaria,3,A.4.8,42,,X
|
||||
355,Onchocerciasis,344,Neglected tropical diseases and malaria,3,A.4.9,43,,X
|
||||
356,Trachoma,344,Neglected tropical diseases and malaria,3,A.4.10,44,,X
|
||||
357,Dengue,344,Neglected tropical diseases and malaria,3,A.4.11,45,,
|
||||
358,Yellow fever,344,Neglected tropical diseases and malaria,3,A.4.12,46,,
|
||||
359,Rabies,344,Neglected tropical diseases and malaria,3,A.4.13,47,,
|
||||
360,Intestinal nematode infections,344,Neglected tropical diseases and malaria,3,A.4.14,48,,
|
||||
361,Ascariasis,360,Intestinal nematode infections,4,A.4.14.1,49,,
|
||||
362,Trichuriasis,360,Intestinal nematode infections,4,A.4.14.2,50,,X
|
||||
363,Hookworm disease,360,Intestinal nematode infections,4,A.4.14.3,51,,X
|
||||
364,Food-borne trematodiases,344,Neglected tropical diseases and malaria,3,A.4.15,52,,X
|
||||
405,Leprosy,344,Neglected tropical diseases and malaria,3,A.4.16,53,,X
|
||||
843,Ebola,344,Neglected tropical diseases and malaria,3,A.4.17,54,,
|
||||
935,Zika virus,344,Neglected tropical diseases and malaria,3,A.4.18,55,,
|
||||
936,Guinea worm disease,344,Neglected tropical diseases and malaria,3,A.4.19,56,,X
|
||||
365,Other neglected tropical diseases,344,Neglected tropical diseases and malaria,3,A.4.20,57,,
|
||||
961,Other infectious diseases,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.5,58,,
|
||||
332,Meningitis,961,Other infectious diseases,3,A.5.1,59,,
|
||||
337,Encephalitis,961,Other infectious diseases,3,A.5.2,60,,
|
||||
338,Diphtheria,961,Other infectious diseases,3,A.5.3,61,,
|
||||
339,Whooping cough,961,Other infectious diseases,3,A.5.4,62,,
|
||||
340,Tetanus,961,Other infectious diseases,3,A.5.5,63,,
|
||||
341,Measles,961,Other infectious diseases,3,A.5.6,64,,
|
||||
342,Varicella and herpes zoster,961,Other infectious diseases,3,A.5.7,65,,
|
||||
400,Acute hepatitis,961,Other infectious diseases,3,A.5.8,66,,
|
||||
401,Acute hepatitis A,400,Acute hepatitis,4,A.5.8.1,67,,
|
||||
402,Acute hepatitis B,400,Acute hepatitis,4,A.5.8.2,68,,
|
||||
403,Acute hepatitis C,400,Acute hepatitis,4,A.5.8.3,69,,
|
||||
404,Acute hepatitis E,400,Acute hepatitis,4,A.5.8.4,70,,
|
||||
408,Other unspecified infectious diseases,961,Other infectious diseases,3,A.5.9,71,,
|
||||
962,Maternal and neonatal disorders,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.6,72,,
|
||||
366,Maternal disorders,962,Maternal and neonatal disorders,3,A.6.1,73,,
|
||||
367,Maternal hemorrhage,366,Maternal disorders,4,A.6.1.1,74,,
|
||||
368,Maternal sepsis and other maternal infections,366,Maternal disorders,4,A.6.1.2,75,,
|
||||
369,Maternal hypertensive disorders,366,Maternal disorders,4,A.6.1.3,76,,
|
||||
370,Maternal obstructed labor and uterine rupture,366,Maternal disorders,4,A.6.1.4,77,,
|
||||
995,Maternal abortion and miscarriage,366,Maternal disorders,4,A.6.1.5,78,,
|
||||
374,Ectopic pregnancy,366,Maternal disorders,4,A.6.1.6,79,,
|
||||
375,Indirect maternal deaths,366,Maternal disorders,4,A.6.1.7,80,X,
|
||||
376,Late maternal deaths,366,Maternal disorders,4,A.6.1.8,81,X,
|
||||
741,Maternal deaths aggravated by HIV/AIDS,366,Maternal disorders,4,A.6.1.9,82,X,
|
||||
379,Other maternal disorders,366,Maternal disorders,4,A.6.1.10,83,,
|
||||
380,Neonatal disorders,962,Maternal and neonatal disorders,3,A.6.2,84,,
|
||||
381,Neonatal preterm birth,380,Neonatal disorders,4,A.6.2.1,85,,
|
||||
382,Neonatal encephalopathy due to birth asphyxia and trauma,380,Neonatal disorders,4,A.6.2.2,86,,
|
||||
383,Neonatal sepsis and other neonatal infections,380,Neonatal disorders,4,A.6.2.3,87,,
|
||||
384,Hemolytic disease and other neonatal jaundice,380,Neonatal disorders,4,A.6.2.4,88,,
|
||||
385,Other neonatal disorders,380,Neonatal disorders,4,A.6.2.5,89,,
|
||||
386,Nutritional deficiencies,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.7,90,,
|
||||
387,Protein-energy malnutrition,386,Nutritional deficiencies,3,A.7.1,91,,
|
||||
388,Iodine deficiency,386,Nutritional deficiencies,3,A.7.2,92,,X
|
||||
389,Vitamin A deficiency,386,Nutritional deficiencies,3,A.7.3,93,,X
|
||||
390,Dietary iron deficiency,386,Nutritional deficiencies,3,A.7.4,94,,X
|
||||
391,Other nutritional deficiencies,386,Nutritional deficiencies,3,A.7.5,95,,
|
||||
409,Non-communicable diseases,294,All causes,1,B,96,,
|
||||
410,Neoplasms,409,Non-communicable diseases,2,B.1,97,,
|
||||
444,Lip and oral cavity cancer,410,Neoplasms,3,B.1.1,98,,
|
||||
447,Nasopharynx cancer,410,Neoplasms,3,B.1.2,99,,
|
||||
450,Other pharynx cancer,410,Neoplasms,3,B.1.3,100,,
|
||||
411,Esophageal cancer,410,Neoplasms,3,B.1.4,101,,
|
||||
414,Stomach cancer,410,Neoplasms,3,B.1.5,102,,
|
||||
441,Colon and rectum cancer,410,Neoplasms,3,B.1.6,103,,
|
||||
417,Liver cancer,410,Neoplasms,3,B.1.7,104,,
|
||||
418,Liver cancer due to hepatitis B,417,Liver cancer,4,B.1.7.1,105,,
|
||||
419,Liver cancer due to hepatitis C,417,Liver cancer,4,B.1.7.2,106,,
|
||||
420,Liver cancer due to alcohol use,417,Liver cancer,4,B.1.7.3,107,,
|
||||
996,Liver cancer due to NASH,417,Liver cancer,4,B.1.7.4,108,,
|
||||
1021,Liver cancer due to other causes,417,Liver cancer,4,B.1.7.5,109,,
|
||||
453,Gallbladder and biliary tract cancer,410,Neoplasms,3,B.1.8,110,,
|
||||
456,Pancreatic cancer,410,Neoplasms,3,B.1.9,111,,
|
||||
423,Larynx cancer,410,Neoplasms,3,B.1.10,112,,
|
||||
426,"Tracheal, bronchus, and lung cancer",410,Neoplasms,3,B.1.11,113,,
|
||||
459,Malignant skin melanoma,410,Neoplasms,3,B.1.12,114,,
|
||||
462,Non-melanoma skin cancer,410,Neoplasms,3,B.1.13,115,,
|
||||
849,Non-melanoma skin cancer (squamous-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.1,116,,
|
||||
850,Non-melanoma skin cancer (basal-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.2,117,,X
|
||||
429,Breast cancer,410,Neoplasms,3,B.1.14,118,,
|
||||
432,Cervical cancer,410,Neoplasms,3,B.1.15,119,,
|
||||
435,Uterine cancer,410,Neoplasms,3,B.1.16,120,,
|
||||
465,Ovarian cancer,410,Neoplasms,3,B.1.17,121,,
|
||||
438,Prostate cancer,410,Neoplasms,3,B.1.18,122,,
|
||||
468,Testicular cancer,410,Neoplasms,3,B.1.19,123,,
|
||||
471,Kidney cancer,410,Neoplasms,3,B.1.20,124,,
|
||||
474,Bladder cancer,410,Neoplasms,3,B.1.21,125,,
|
||||
477,Brain and central nervous system cancer,410,Neoplasms,3,B.1.22,126,,
|
||||
480,Thyroid cancer,410,Neoplasms,3,B.1.23,127,,
|
||||
483,Mesothelioma,410,Neoplasms,3,B.1.24,128,,
|
||||
484,Hodgkin lymphoma,410,Neoplasms,3,B.1.25,129,,
|
||||
485,Non-Hodgkin lymphoma,410,Neoplasms,3,B.1.26,130,,
|
||||
486,Multiple myeloma,410,Neoplasms,3,B.1.27,131,,
|
||||
487,Leukemia,410,Neoplasms,3,B.1.28,132,,
|
||||
845,Acute lymphoid leukemia,487,Leukemia,4,B.1.28.1,133,,
|
||||
846,Chronic lymphoid leukemia,487,Leukemia,4,B.1.28.2,134,,
|
||||
847,Acute myeloid leukemia,487,Leukemia,4,B.1.28.3,135,,
|
||||
848,Chronic myeloid leukemia,487,Leukemia,4,B.1.28.4,136,,
|
||||
943,Other leukemia,487,Leukemia,4,B.1.28.5,137,,
|
||||
1022,Other malignant neoplasms,410,Neoplasms,3,B.1.29,138,,
|
||||
490,Other neoplasms,410,Neoplasms,3,B.1.30,139,,
|
||||
964,"Myelodysplastic, myeloproliferative, and other hematopoietic neoplasms",490,Other neoplasms,4,B.1.30.1,140,,
|
||||
965,Benign and in situ intestinal neoplasms,490,Other neoplasms,4,B.1.30.2,141,,X
|
||||
966,Benign and in situ cervical and uterine neoplasms,490,Other neoplasms,4,B.1.30.3,142,,X
|
||||
967,Other benign and in situ neoplasms,490,Other neoplasms,4,B.1.30.4,143,,X
|
||||
491,Cardiovascular diseases,409,Non-communicable diseases,2,B.2,144,,
|
||||
492,Rheumatic heart disease,491,Cardiovascular diseases,3,B.2.1,145,,
|
||||
493,Ischemic heart disease,491,Cardiovascular diseases,3,B.2.2,146,,
|
||||
494,Stroke,491,Cardiovascular diseases,3,B.2.3,147,,
|
||||
495,Ischemic stroke,494,Stroke,4,B.2.3.1,148,,
|
||||
496,Intracerebral hemorrhage,494,Stroke,4,B.2.3.2,149,,
|
||||
497,Subarachnoid hemorrhage,494,Stroke,4,B.2.3.3,150,,
|
||||
498,Hypertensive heart disease,491,Cardiovascular diseases,3,B.2.4,151,,
|
||||
504,Non-rheumatic valvular heart disease,491,Cardiovascular diseases,3,B.2.5,152,,
|
||||
968,Non-rheumatic calcific aortic valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.1,153,,
|
||||
969,Non-rheumatic degenerative mitral valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.2,154,,
|
||||
970,Other non-rheumatic valve diseases,504,Non-rheumatic valvular heart disease,4,B.2.5.3,155,,
|
||||
499,Cardiomyopathy and myocarditis,491,Cardiovascular diseases,3,B.2.6,156,,
|
||||
942,Myocarditis,499,Cardiomyopathy and myocarditis,4,B.2.6.1,157,,
|
||||
938,Alcoholic cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.2,158,,
|
||||
944,Other cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.3,159,,
|
||||
500,Atrial fibrillation and flutter,491,Cardiovascular diseases,3,B.2.8,160,,
|
||||
501,Aortic aneurysm,491,Cardiovascular diseases,3,B.2.9,161,X,
|
||||
502,Peripheral artery disease,491,Cardiovascular diseases,3,B.2.10,162,,
|
||||
503,Endocarditis,491,Cardiovascular diseases,3,B.2.11,163,,
|
||||
1023,Other cardiovascular and circulatory diseases,491,Cardiovascular diseases,3,B.2.12,164,,
|
||||
508,Chronic respiratory diseases,409,Non-communicable diseases,2,B.3,165,,
|
||||
509,Chronic obstructive pulmonary disease,508,Chronic respiratory diseases,3,B.3.1,166,,
|
||||
510,Pneumoconiosis,508,Chronic respiratory diseases,3,B.3.2,167,,
|
||||
511,Silicosis,510,Pneumoconiosis,4,B.3.2.1,168,,
|
||||
512,Asbestosis,510,Pneumoconiosis,4,B.3.2.2,169,,
|
||||
513,Coal workers pneumoconiosis,510,Pneumoconiosis,4,B.3.2.3,170,,
|
||||
514,Other pneumoconiosis,510,Pneumoconiosis,4,B.3.2.4,171,,
|
||||
515,Asthma,508,Chronic respiratory diseases,3,B.3.3,172,,
|
||||
516,Interstitial lung disease and pulmonary sarcoidosis,508,Chronic respiratory diseases,3,B.3.4,173,,
|
||||
520,Other chronic respiratory diseases,508,Chronic respiratory diseases,3,B.3.5,174,,
|
||||
526,Digestive diseases,409,Non-communicable diseases,2,B.4,175,,
|
||||
521,Cirrhosis and other chronic liver diseases,526,Digestive diseases,3,B.4.1,176,,
|
||||
522,Cirrhosis and other chronic liver diseases due to hepatitis B,521,Cirrhosis and other chronic liver diseases,4,B.4.1.1,177,,
|
||||
523,Cirrhosis and other chronic liver diseases due to hepatitis C,521,Cirrhosis and other chronic liver diseases,4,B.4.1.2,178,,
|
||||
524,Cirrhosis and other chronic liver diseases due to alcohol use,521,Cirrhosis and other chronic liver diseases,4,B.4.1.3,179,,
|
||||
971,Cirrhosis and other chronic liver diseases due to NAFLD,521,Cirrhosis and other chronic liver diseases,4,B.4.1.4,180,,
|
||||
525,Cirrhosis and other chronic liver diseases due to other causes,521,Cirrhosis and other chronic liver diseases,4,B.4.1.5,181,,
|
||||
992,Upper digestive system diseases,526,Digestive diseases,3,B.4.2,182,,
|
||||
527,Peptic ulcer disease,992,Upper digestive system diseases,4,B.4.2.1,183,,
|
||||
528,Gastritis and duodenitis,992,Upper digestive system diseases,4,B.4.2.2,184,,
|
||||
536,Gastroesophageal reflux disease,992,Upper digestive system diseases,4,B.4.2.3,185,,X
|
||||
529,Appendicitis,526,Digestive diseases,3,B.4.3,186,,
|
||||
530,Paralytic ileus and intestinal obstruction,526,Digestive diseases,3,B.4.4,187,,
|
||||
531,"Inguinal, femoral, and abdominal hernia",526,Digestive diseases,3,B.4.5,188,,
|
||||
532,Inflammatory bowel disease,526,Digestive diseases,3,B.4.6,189,,
|
||||
533,Vascular intestinal disorders,526,Digestive diseases,3,B.4.7,190,,
|
||||
534,Gallbladder and biliary diseases,526,Digestive diseases,3,B.4.8,191,,
|
||||
535,Pancreatitis,526,Digestive diseases,3,B.4.9,192,,
|
||||
541,Other digestive diseases,526,Digestive diseases,3,B.4.10,193,,
|
||||
542,Neurological disorders,409,Non-communicable diseases,2,B.5,194,,
|
||||
543,Alzheimer's disease and other dementias,542,Neurological disorders,3,B.5.1,195,,
|
||||
544,Parkinson's disease,542,Neurological disorders,3,B.5.2,196,,
|
||||
545,Idiopathic epilepsy,542,Neurological disorders,3,B.5.3,197,,
|
||||
546,Multiple sclerosis,542,Neurological disorders,3,B.5.4,198,,
|
||||
554,Motor neuron disease,542,Neurological disorders,3,B.5.5,199,,
|
||||
972,Headache disorders,542,Neurological disorders,3,B.5.6,200,,X
|
||||
547,Migraine,972,Headache disorders,4,B.5.6.1,201,,X
|
||||
548,Tension-type headache,972,Headache disorders,4,B.5.6.2,202,,X
|
||||
557,Other neurological disorders,542,Neurological disorders,3,B.5.7,203,,
|
||||
558,Mental disorders,409,Non-communicable diseases,2,B.6,204,,
|
||||
559,Schizophrenia,558,Mental disorders,3,B.6.1,205,,X
|
||||
567,Depressive disorders,558,Mental disorders,3,B.6.2,206,,X
|
||||
568,Major depressive disorder,567,Depressive disorders,4,B.6.2.1,207,,X
|
||||
569,Dysthymia,567,Depressive disorders,4,B.6.2.2,208,,X
|
||||
570,Bipolar disorder,558,Mental disorders,3,B.6.3,209,,X
|
||||
571,Anxiety disorders,558,Mental disorders,3,B.6.4,210,,X
|
||||
572,Eating disorders,558,Mental disorders,3,B.6.5,211,,
|
||||
573,Anorexia nervosa,572,Eating disorders,4,B.6.5.1,212,,
|
||||
574,Bulimia nervosa,572,Eating disorders,4,B.6.5.2,213,,
|
||||
575,Autism spectrum disorders,558,Mental disorders,3,B.6.6,214,,X
|
||||
578,Attention-deficit/hyperactivity disorder,558,Mental disorders,3,B.6.7,215,,X
|
||||
579,Conduct disorder,558,Mental disorders,3,B.6.8,216,,X
|
||||
582,Idiopathic developmental intellectual disability,558,Mental disorders,3,B.6.9,217,,X
|
||||
585,Other mental disorders,558,Mental disorders,3,B.6.10,218,,X
|
||||
973,Substance use disorders,409,Non-communicable diseases,2,B.7,219,,
|
||||
560,Alcohol use disorders,973,Substance use disorders,3,B.7.1,220,,
|
||||
561,Drug use disorders,973,Substance use disorders,3,B.7.2,221,,
|
||||
562,Opioid use disorders,561,Drug use disorders,4,B.7.2.1,222,,
|
||||
563,Cocaine use disorders,561,Drug use disorders,4,B.7.2.2,223,,
|
||||
564,Amphetamine use disorders,561,Drug use disorders,4,B.7.2.3,224,,
|
||||
565,Cannabis use disorders,561,Drug use disorders,4,B.7.2.4,225,,X
|
||||
566,Other drug use disorders,561,Drug use disorders,4,B.7.2.5,226,,
|
||||
974,Diabetes and kidney diseases,409,Non-communicable diseases,2,B.8,227,,
|
||||
587,Diabetes mellitus,974,Diabetes and kidney diseases,3,B.8.1,228,,
|
||||
975,Diabetes mellitus type 1,587,Diabetes mellitus,4,B.8.1.1,229,,
|
||||
976,Diabetes mellitus type 2,587,Diabetes mellitus,4,B.8.1.2,230,,
|
||||
589,Chronic kidney disease,974,Diabetes and kidney diseases,3,B.8.2,231,,
|
||||
997,Chronic kidney disease due to diabetes mellitus type 1,589,Chronic kidney disease,4,B.8.2.1,232,,
|
||||
998,Chronic kidney disease due to diabetes mellitus type 2,589,Chronic kidney disease,4,B.8.2.2,233,,
|
||||
591,Chronic kidney disease due to hypertension,589,Chronic kidney disease,4,B.8.2.3,234,,
|
||||
592,Chronic kidney disease due to glomerulonephritis,589,Chronic kidney disease,4,B.8.2.4,235,,
|
||||
593,Chronic kidney disease due to other and unspecified causes,589,Chronic kidney disease,4,B.8.2.5,236,,
|
||||
588,Acute glomerulonephritis,974,Diabetes and kidney diseases,3,B.8.3,237,,
|
||||
653,Skin and subcutaneous diseases,409,Non-communicable diseases,2,B.9,238,,
|
||||
654,Dermatitis,653,Skin and subcutaneous diseases,3,B.9.1,239,,X
|
||||
977,Atopic dermatitis,654,Dermatitis,4,B.9.1.1,240,,X
|
||||
978,Contact dermatitis,654,Dermatitis,4,B.9.1.2,241,,X
|
||||
979,Seborrhoeic dermatitis,654,Dermatitis,4,B.9.1.3,242,,X
|
||||
655,Psoriasis,653,Skin and subcutaneous diseases,3,B.9.2,243,,X
|
||||
980,Bacterial skin diseases,653,Skin and subcutaneous diseases,3,B.9.3,244,,
|
||||
656,Cellulitis,980,Bacterial skin diseases,4,B.9.3.1,245,,
|
||||
657,Pyoderma,980,Bacterial skin diseases,4,B.9.3.2,246,,
|
||||
658,Scabies,653,Skin and subcutaneous diseases,3,B.9.4,247,,X
|
||||
659,Fungal skin diseases,653,Skin and subcutaneous diseases,3,B.9.5,248,,X
|
||||
660,Viral skin diseases,653,Skin and subcutaneous diseases,3,B.9.6,249,,X
|
||||
661,Acne vulgaris,653,Skin and subcutaneous diseases,3,B.9.7,250,,X
|
||||
662,Alopecia areata,653,Skin and subcutaneous diseases,3,B.9.8,251,,X
|
||||
663,Pruritus,653,Skin and subcutaneous diseases,3,B.9.9,252,,X
|
||||
664,Urticaria,653,Skin and subcutaneous diseases,3,B.9.10,253,,X
|
||||
665,Decubitus ulcer,653,Skin and subcutaneous diseases,3,B.9.11,254,,
|
||||
668,Other skin and subcutaneous diseases,653,Skin and subcutaneous diseases,3,B.9.12,255,,
|
||||
669,Sense organ diseases,409,Non-communicable diseases,2,B.10,256,,X
|
||||
981,Blindness and vision loss,669,Sense organ diseases,3,B.10.1,257,,X
|
||||
670,Glaucoma,981,Blindness and vision loss,4,B.10.1.1,258,,X
|
||||
671,Cataract,981,Blindness and vision loss,4,B.10.1.2,259,,X
|
||||
672,Age-related macular degeneration,981,Blindness and vision loss,4,B.10.1.3,260,,X
|
||||
999,Refraction disorders,981,Blindness and vision loss,4,B.10.1.4,261,,X
|
||||
1000,Near vision loss,981,Blindness and vision loss,4,B.10.1.5,262,,X
|
||||
675,Other vision loss,981,Blindness and vision loss,4,B.10.1.6,263,,X
|
||||
674,Age-related and other hearing loss,669,Sense organ diseases,3,B.10.2,264,,X
|
||||
679,Other sense organ diseases,669,Sense organ diseases,3,B.10.3,265,,X
|
||||
626,Musculoskeletal disorders,409,Non-communicable diseases,2,B.11,266,,
|
||||
627,Rheumatoid arthritis,626,Musculoskeletal disorders,3,B.11.1,267,,
|
||||
628,Osteoarthritis,626,Musculoskeletal disorders,3,B.11.2,268,,X
|
||||
1014,Osteoarthritis hip,628,Osteoarthritis,4,B.11.2.1,269,,X
|
||||
1015,Osteoarthritis knee,628,Osteoarthritis,4,B.11.2.2,270,,X
|
||||
1016,Osteoarthritis hand,628,Osteoarthritis,4,B.11.2.3,271,,X
|
||||
1017,Osteoarthritis other,628,Osteoarthritis,4,B.11.2.4,272,,X
|
||||
630,Low back pain,626,Musculoskeletal disorders,3,B.11.3,273,,X
|
||||
631,Neck pain,626,Musculoskeletal disorders,3,B.11.4,274,,X
|
||||
632,Gout,626,Musculoskeletal disorders,3,B.11.5,275,,X
|
||||
639,Other musculoskeletal disorders,626,Musculoskeletal disorders,3,B.11.6,276,,
|
||||
640,Other non-communicable diseases,409,Non-communicable diseases,2,B.12,277,,
|
||||
641,Congenital birth defects,640,Other non-communicable diseases,3,B.12.1,278,,
|
||||
642,Neural tube defects,641,Congenital birth defects,4,B.12.1.1,279,,
|
||||
643,Congenital heart anomalies,641,Congenital birth defects,4,B.12.1.2,280,,
|
||||
644,Orofacial clefts,641,Congenital birth defects,4,B.12.1.3,281,,
|
||||
645,Down syndrome,641,Congenital birth defects,4,B.12.1.4,282,,
|
||||
646,Turner syndrome,641,Congenital birth defects,4,B.12.1.5,283,,X
|
||||
647,Klinefelter syndrome,641,Congenital birth defects,4,B.12.1.6,284,,X
|
||||
648,Other chromosomal abnormalities,641,Congenital birth defects,4,B.12.1.7,285,,
|
||||
649,Congenital musculoskeletal and limb anomalies,641,Congenital birth defects,4,B.12.1.8,286,,
|
||||
650,Urogenital congenital anomalies,641,Congenital birth defects,4,B.12.1.9,287,,
|
||||
651,Digestive congenital anomalies,641,Congenital birth defects,4,B.12.1.10,288,,
|
||||
652,Other congenital birth defects,641,Congenital birth defects,4,B.12.1.11,289,,
|
||||
594,Urinary diseases and male infertility,640,Other non-communicable diseases,3,B.12.2,290,,
|
||||
595,Urinary tract infections and interstitial nephritis,594,Urinary diseases and male infertility,4,B.12.2.1,291,,
|
||||
596,Urolithiasis,594,Urinary diseases and male infertility,4,B.12.2.2,292,,
|
||||
597,Benign prostatic hyperplasia,594,Urinary diseases and male infertility,4,B.12.2.3,293,,X
|
||||
598,Male infertility,594,Urinary diseases and male infertility,4,B.12.2.4,294,,X
|
||||
602,Other urinary diseases,594,Urinary diseases and male infertility,4,B.12.2.5,295,,
|
||||
603,Gynecological diseases,640,Other non-communicable diseases,3,B.12.3,296,,
|
||||
604,Uterine fibroids,603,Gynecological diseases,4,B.12.3.1,297,,
|
||||
605,Polycystic ovarian syndrome,603,Gynecological diseases,4,B.12.3.2,298,,X
|
||||
606,Female infertility,603,Gynecological diseases,4,B.12.3.3,299,,X
|
||||
607,Endometriosis,603,Gynecological diseases,4,B.12.3.4,300,,
|
||||
608,Genital prolapse,603,Gynecological diseases,4,B.12.3.5,301,,
|
||||
609,Premenstrual syndrome,603,Gynecological diseases,4,B.12.3.6,302,,X
|
||||
612,Other gynecological diseases,603,Gynecological diseases,4,B.12.3.7,303,,
|
||||
613,Hemoglobinopathies and hemolytic anemias,640,Other non-communicable diseases,3,B.12.4,304,,
|
||||
614,Thalassemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.1,305,,
|
||||
837,Thalassemias trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.2,306,,X
|
||||
615,Sickle cell disorders,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.3,307,,
|
||||
838,Sickle cell trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.4,308,,X
|
||||
616,G6PD deficiency,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.5,309,,
|
||||
839,G6PD trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.6,310,,X
|
||||
618,Other hemoglobinopathies and hemolytic anemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.7,311,,
|
||||
619,"Endocrine, metabolic, blood, and immune disorders",640,Other non-communicable diseases,3,B.12.5,312,,
|
||||
680,Oral disorders,640,Other non-communicable diseases,3,B.12.6,313,,X
|
||||
681,Caries of deciduous teeth,680,Oral disorders,4,B.12.6.1,314,,X
|
||||
682,Caries of permanent teeth,680,Oral disorders,4,B.12.6.2,315,,X
|
||||
683,Periodontal diseases,680,Oral disorders,4,B.12.6.3,316,,X
|
||||
684,Edentulism,680,Oral disorders,4,B.12.6.4,317,,X
|
||||
685,Other oral disorders,680,Oral disorders,4,B.12.6.5,318,,X
|
||||
686,Sudden infant death syndrome,640,Other non-communicable diseases,3,B.12.7,319,X,
|
||||
687,Injuries,294,All causes,1,C,320,,
|
||||
688,Transport injuries,687,Injuries,2,C.1,321,,
|
||||
689,Road injuries,688,Transport injuries,3,C.1.1,322,,
|
||||
690,Pedestrian road injuries,689,Road injuries,4,C.1.1.1,323,,
|
||||
691,Cyclist road injuries,689,Road injuries,4,C.1.1.2,324,,
|
||||
692,Motorcyclist road injuries,689,Road injuries,4,C.1.1.3,325,,
|
||||
693,Motor vehicle road injuries,689,Road injuries,4,C.1.1.4,326,,
|
||||
694,Other road injuries,689,Road injuries,4,C.1.1.5,327,,
|
||||
695,Other transport injuries,688,Transport injuries,3,C.1.2,328,,
|
||||
696,Unintentional injuries,687,Injuries,2,C.2,329,,
|
||||
697,Falls,696,Unintentional injuries,3,C.2.1,330,,
|
||||
698,Drowning,696,Unintentional injuries,3,C.2.2,331,,
|
||||
699,"Fire, heat, and hot substances",696,Unintentional injuries,3,C.2.3,332,,
|
||||
700,Poisonings,696,Unintentional injuries,3,C.2.4,333,,
|
||||
701,Poisoning by carbon monoxide,700,Poisonings,4,C.2.4.1,334,,
|
||||
703,Poisoning by other means,700,Poisonings,4,C.2.4.2,335,,
|
||||
704,Exposure to mechanical forces,696,Unintentional injuries,3,C.2.5,336,,
|
||||
705,Unintentional firearm injuries,704,Exposure to mechanical forces,4,C.2.5.1,337,,
|
||||
707,Other exposure to mechanical forces,704,Exposure to mechanical forces,4,C.2.5.2,338,,
|
||||
708,Adverse effects of medical treatment,696,Unintentional injuries,3,C.2.6,339,,
|
||||
709,Animal contact,696,Unintentional injuries,3,C.2.7,340,,
|
||||
710,Venomous animal contact,709,Animal contact,4,C.2.7.1,341,,
|
||||
711,Non-venomous animal contact,709,Animal contact,4,C.2.7.2,342,,
|
||||
712,Foreign body,696,Unintentional injuries,3,C.2.8,343,,
|
||||
713,Pulmonary aspiration and foreign body in airway,712,Foreign body,4,C.2.8.1,344,,
|
||||
714,Foreign body in eyes,712,Foreign body,4,C.2.8.2,345,,X
|
||||
715,Foreign body in other body part,712,Foreign body,4,C.2.8.3,346,,
|
||||
842,Environmental heat and cold exposure,696,Unintentional injuries,3,C.2.9,347,,
|
||||
729,Exposure to forces of nature,696,Unintentional injuries,3,C.2.10,348,,
|
||||
716,Other unintentional injuries,696,Unintentional injuries,3,C.2.11,349,,
|
||||
717,Self-harm and interpersonal violence,687,Injuries,2,C.3,350,,
|
||||
718,Self-harm,717,Self-harm and interpersonal violence,3,C.3.1,351,,
|
||||
721,Self-harm by firearm,718,Self-harm,4,C.3.1.1,352,,
|
||||
723,Self-harm by other specified means,718,Self-harm,4,C.3.1.2,353,,
|
||||
724,Interpersonal violence,717,Self-harm and interpersonal violence,3,C.3.2,354,,
|
||||
725,Physical violence by firearm,724,Interpersonal violence,4,C.3.2.1,355,,
|
||||
726,Physical violence by sharp object,724,Interpersonal violence,4,C.3.2.2,356,,
|
||||
941,Sexual violence,724,Interpersonal violence,4,C.3.2.3,357,,X
|
||||
727,Physical violence by other means,724,Interpersonal violence,4,C.3.2.4,358,,
|
||||
945,Conflict and terrorism,717,Self-harm and interpersonal violence,3,C.3.3,359,,
|
||||
854,Executions and police conflict,717,Self-harm and interpersonal violence,3,C.3.4,360,,
|
||||
1029,Total cancers,294,All causes,1,D,361,,
|
||||
1026,Total burden related to hepatitis B,294,All causes,1,E,362,,
|
||||
1027,Total burden related to hepatitis C,294,All causes,1,F,363,,
|
||||
1028,Total burden related to Non-alcoholic fatty liver disease (NAFLD),294,All causes,1,G,364,,
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
https://doi.org/10.6069/Q0YC-CR46
|
||||
@ -0,0 +1,103 @@
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import itertools
|
||||
|
||||
|
||||
|
||||
IHME_COD_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_COD_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
||||
IHME_NONFATAL_FILEPATH = "./GlobalBurdenDisease/IHME_GBD_2019_NONFATAL_CAUSE_ICD_CODE_MAP_Y2020M10D15.XLSX"
|
||||
ICD10CM_ORDER_FILEPATH = "./icd10_combined-who-cms.psv"
|
||||
|
||||
def justify(string):
|
||||
'''
|
||||
The purpose of this is to transform codes such as A00 and A000
|
||||
into a normalized, sortable format e.g. 'A00----' and 'A000---'
|
||||
'''
|
||||
return string.ljust(7,"-")
|
||||
|
||||
|
||||
class CodeRange():
|
||||
def __init__(self,cause,code_book,codes):
|
||||
self.cause = cause
|
||||
self.code_book = code_book
|
||||
self.code_list = []
|
||||
|
||||
codes = "" if type(codes) is float else codes #normalize codes to string...
|
||||
codes = [x.strip().replace('.','').split('-') for x in codes.split(",")]
|
||||
|
||||
for rng in codes:
|
||||
|
||||
if rng[0] is None:
|
||||
raise Exception("Listed ICD10 Code (Begin:{}) is not in codebook".format(rng), rng)
|
||||
|
||||
#lookup codes
|
||||
if len(rng) == 1:
|
||||
begin = justify(rng[0])
|
||||
if self.code_book.get(begin) is None:
|
||||
continue
|
||||
else:
|
||||
self.code_list.append(begin)
|
||||
else:
|
||||
begin = justify(rng[0])
|
||||
end = justify(rng[1])
|
||||
begin_bitmask = [x >= begin for x in list(self.code_book)]
|
||||
end_bitmask = [x <= end for x in list(self.code_book)]
|
||||
|
||||
bitmask = [x and y for x,y in zip(begin_bitmask,end_bitmask)]
|
||||
|
||||
self.code_list.extend(list(itertools.compress(list(self.code_book),bitmask)))
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
txt = ''
|
||||
for item in self.code_list:
|
||||
txt += "{} | {}\n".format(item, self.cause)
|
||||
|
||||
return txt
|
||||
|
||||
#READ in ICD10CM codes
|
||||
|
||||
icd10_codes = {}
|
||||
|
||||
|
||||
with open(ICD10CM_ORDER_FILEPATH,"r") as icd_fh:
|
||||
for idx,line in enumerate(icd_fh.readlines()):
|
||||
#read info
|
||||
code, descr, source = line.split("|")
|
||||
#cleanup info
|
||||
code = justify(code.strip())
|
||||
descr = descr.strip()
|
||||
source = source.strip()
|
||||
|
||||
#Store in code dict
|
||||
icd10_codes[code] = (idx,descr, source)
|
||||
|
||||
|
||||
|
||||
cod = pd.read_excel(IHME_COD_FILEPATH,header=1)
|
||||
|
||||
with open("COD_cause2code.psv", "w") as outfh:
|
||||
itt = 0
|
||||
for row in cod.itertuples():
|
||||
cause = row[1]
|
||||
codes = row[2]
|
||||
|
||||
c = CodeRange(cause,icd10_codes,codes)
|
||||
|
||||
outfh.write(c.__str__())
|
||||
|
||||
|
||||
|
||||
|
||||
nonfatal = pd.read_excel(IHME_NONFATAL_FILEPATH,header=1)
|
||||
with open("NONFATAL_cause2code.psv", "w") as outfh:
|
||||
itt = 0
|
||||
for row in nonfatal.itertuples():
|
||||
cause = row[2]
|
||||
codes= row[3]
|
||||
c = CodeRange(cause,icd10_codes,codes)
|
||||
|
||||
outfh.write(c.__str__())
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1 @@
|
||||
,dad,home-pc,06.04.2023 22:41,file:///home/dad/.config/libreoffice/4;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue