Compare commits
90 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d0dc87a7 | 1 year ago |
|
|
1782372a45 | 1 year ago |
|
|
fc478517ac | 1 year ago |
|
|
d912408456 | 1 year ago |
|
|
2488cceebc | 1 year ago |
|
|
eca4795d2f | 1 year ago |
|
|
87074aa42b | 1 year ago |
|
|
3311159ab6 | 2 years ago |
|
|
bb374dbde9 | 2 years ago |
|
|
635cfe42d9 | 2 years ago |
|
|
79902f400a | 2 years ago |
|
|
495955170c | 2 years ago |
|
|
de3698052b | 2 years ago |
|
|
dfb041d12b | 2 years ago |
|
|
9aaf007791 | 2 years ago |
|
|
b4a3cec7e6 | 2 years ago |
|
|
211151e223 | 2 years ago |
|
|
d90539a679 | 2 years ago |
|
|
142670d08a | 2 years ago |
|
|
6a931b3a49 | 3 years ago |
|
|
1c3d749ef4 | 3 years ago |
|
|
ef68adae89 | 3 years ago |
|
|
a336fb92d9 | 3 years ago |
|
|
05a96a3a29 | 3 years ago |
|
|
256177e569 | 3 years ago |
|
|
b7290c271b | 3 years ago |
|
|
c4b8484cab | 3 years ago |
|
|
2a9b8349ba | 3 years ago |
|
|
9a718f72a0 | 3 years ago |
|
|
e88f450b8c | 3 years ago |
|
|
4643351305 | 3 years ago |
|
|
c1f9f6e528 | 3 years ago |
|
|
277b5b9bd5 | 3 years ago |
|
|
1de1ff9e4a | 3 years ago |
|
|
47996ba607 | 3 years ago |
|
|
9ac4cffe61 | 3 years ago |
|
|
5600ad932d | 3 years ago |
|
|
d1edac3c4f | 3 years ago |
|
|
e2edf1eb6b | 3 years ago |
|
|
c5f3bfcdec | 3 years ago |
|
|
12c3c69304 | 3 years ago |
|
|
29644a0ad5 | 3 years ago |
|
|
123fe3b5e4 | 3 years ago |
|
|
470dfc2611 | 3 years ago |
|
|
6876779c17 | 3 years ago |
|
|
bbdd7552a8 | 3 years ago |
|
|
4831864805 | 3 years ago |
|
|
6b5a48c77a | 3 years ago |
|
|
2aba3469d3 | 3 years ago |
|
|
ef7ed7001b | 3 years ago |
|
|
2ec314180f | 3 years ago |
|
|
f6b56da261 | 3 years ago |
|
|
ed49d8728a | 3 years ago |
|
|
39397cc224 | 3 years ago |
|
|
804a90c247 | 3 years ago |
|
|
8dbf4e8c2e | 3 years ago |
|
|
1a106a553e | 3 years ago |
|
|
f6f687fff5 | 3 years ago |
|
|
016a449258 | 3 years ago |
|
|
091fd63366 | 3 years ago |
|
|
bbf8c77e6d | 3 years ago |
|
|
4283719d3d | 3 years ago |
|
|
52f8152afd | 3 years ago |
|
|
23826fb576 | 3 years ago |
|
|
966171c840 | 3 years ago |
|
|
f5788051f7 | 3 years ago |
|
|
97af862419 | 3 years ago |
|
|
4cc4c5c99f | 3 years ago |
|
|
fa37dccfff | 3 years ago |
|
|
339a83117e | 3 years ago |
|
|
266c1c9686 | 3 years ago |
|
|
dfbd82de54 | 3 years ago |
|
|
fc38a2e92c | 3 years ago |
|
|
3eb9a4130a | 3 years ago |
|
|
ee3e37e834 | 3 years ago |
|
|
4ae3064bf2 | 3 years ago |
|
|
61dc377e0a | 3 years ago |
|
|
321e756cc6 | 3 years ago |
|
|
5d2140accd | 3 years ago |
|
|
9b26cd99df | 3 years ago |
|
|
1bdcc2fd83 | 3 years ago |
|
|
e4971ae2f6 | 4 years ago |
|
|
f3d73a5ac1 | 4 years ago |
|
|
9d5a726494 | 4 years ago |
|
|
453e82974e | 4 years ago |
|
|
a9027c9467 | 4 years ago |
|
|
b1c146d550 | 4 years ago |
|
|
71e87a9abe | 4 years ago |
|
|
d8d00101fa | 4 years ago |
|
|
9850f4c677 | 4 years ago |
@ -0,0 +1,8 @@
|
||||
*.sql.gzip filter=lfs diff=lfs merge=lfs -text
|
||||
*.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text
|
||||
other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/backup/aact_db_backup_20250106_184236.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>ClinicalTrialsDataProcessing</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.jkiss.dbeaver.DBeaverNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
||||
@ -1,7 +0,0 @@
|
||||
FROM youainti/aact_from_dump
|
||||
LABEL AUTHOR 'Will King (youainti@protonmail.com)'
|
||||
LABEL DESCRIPTION 'add extra processing to the aact database in preparation for downloading history.'
|
||||
|
||||
#copy additional init scripts
|
||||
COPY ./docker-entrypoint-initdb.d/ /docker-entrypoint-initdb.d/
|
||||
#these will be run after the database is initialized
|
||||
@ -1,26 +0,0 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA history;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
CREATE ROLE history_writer;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_writer;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_writer;
|
||||
|
||||
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
|
||||
|
||||
|
||||
--Create role for anyone who only needs selection access to historical data, such as for analysis
|
||||
CREATE ROLE history_reader;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_reader;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_reader;
|
||||
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
|
||||
|
||||
|
||||
|
||||
/* History Tables
|
||||
Below is where I would construct the parsed trial history tables that I need.
|
||||
*/
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
version: '3'
|
||||
|
||||
volumes:
|
||||
aact_pg_database: #This is to hold the database.
|
||||
|
||||
networks:
|
||||
pharmaceutical_research:
|
||||
external: true
|
||||
|
||||
services:
|
||||
aact:
|
||||
build: ./ClinicalTrialHistory #build and use the clinical trial history db.
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: root
|
||||
POSTGRES_DB: aact_db
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
- aact_pg_database:/var/lib/postgresql/ # this is persistant storage for the database
|
||||
- ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
|
||||
@ -1 +0,0 @@
|
||||
This is what is needed to setup the database.
|
||||
@ -1,93 +0,0 @@
|
||||
# Adobe Pdf Character ID (cid:\d+) parser
|
||||
# The purpose is to allow someone to create their own table equivalent to the "\toUnicode" that
|
||||
# should be provided in every PDF using cid's (but is often mangled).
|
||||
|
||||
def get_digits(string):
|
||||
"""
|
||||
Extract leading the digits from a cid tag.
|
||||
"""
|
||||
splat = string.split(")")
|
||||
num = splat[0]
|
||||
l = len(num)
|
||||
return int(num),l
|
||||
|
||||
def token_generator(string):
|
||||
"""
|
||||
An iterable that returns tokens describing a string in a pdf.
|
||||
Tokens take two forms:
|
||||
- Integers: these represend CID codes
|
||||
- Characters: these represent the arbitrary characters often returned amidst cid's.
|
||||
|
||||
It is a python generator becasue that simplifies the ordering and allows us to avoid recursion.
|
||||
"""
|
||||
start = 0
|
||||
str_len = len(string)
|
||||
|
||||
while start < str_len:
|
||||
substring = string[start:]
|
||||
|
||||
#check for cid
|
||||
if (str_len - start > 6) and (substring[0:5] == "(cid:"):
|
||||
|
||||
num,length = get_digits(substring[5:])
|
||||
start += length + 6
|
||||
yield num
|
||||
|
||||
elif (str_len - start > 1):
|
||||
start += 1
|
||||
yield substring[0]
|
||||
else:
|
||||
start += 1
|
||||
yield substring
|
||||
|
||||
|
||||
class UnknownSymbol():
|
||||
"""
|
||||
Represents a token that is not in the parser's dictionary.
|
||||
"""
|
||||
def __init__(self, symbol):
|
||||
self.symbol = symbol
|
||||
|
||||
def __repr__(self):
|
||||
return "UnknownSymbol: {} of type {}".format(self.symbol, type(self.symbol))
|
||||
|
||||
def __str__(self):
|
||||
return "\uFFFD"
|
||||
|
||||
class Parser:
|
||||
"""
|
||||
Translates from tokens to character arrays or strings, handling errors as it goes.
|
||||
|
||||
It requires a dictionary during instantiation.
|
||||
This dictionary is what is used to perform lookups.
|
||||
|
||||
It exposes 3 methods
|
||||
- convert attempts to convert a single token
|
||||
- convert_stream will try to convert an iterable of tokens into an iterable of text.
|
||||
- check_list_of_strings will try to convert a list of strings containing cids and other symbols into
|
||||
- strings, if there are no Unknown symbols.
|
||||
- lists, containing characters and Unknown symbols.
|
||||
"""
|
||||
def __init__(self, lookup_table):
|
||||
self._lookup_table = lookup_table
|
||||
|
||||
def convert(self,token):
|
||||
try:
|
||||
return self._lookup_table[token]
|
||||
except:
|
||||
return UnknownSymbol(token)
|
||||
|
||||
def convert_list(self,token_stream):
|
||||
for token in token_stream:
|
||||
yield self.convert(token)
|
||||
|
||||
def convert_list_of_strings(self, list_of_strings):
|
||||
for token_stream in list_of_stings:
|
||||
arr = [x for x in ob2020.convert_stream(token_generator(token_stream))]
|
||||
try:
|
||||
print("".join(arr))
|
||||
except:
|
||||
print(arr)
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Plan was to accept and proceess a symbol table and text. Apparently it has not been implemented."
|
||||
@ -1,371 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "40358f02-c376-4431-be39-cdd477f17e7a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import polars as pl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "8fb27ee2-72c1-4e80-9d00-de54f2834fe8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"polars.datatypes.Datetime"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pl.datatypes.Datetime"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "2c0edd77-c2d0-4184-a094-8c01783d2f0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"products = pl.scan_csv(file=\"./EOBZIP_2022_04/products.txt\", sep=\"~\")\n",
|
||||
"patents = pl.scan_csv(file=\"./EOBZIP_2022_04/patent.txt\", sep=\"~\")\n",
|
||||
"exclusivity = pl.scan_csv(file=\"./EOBZIP_2022_04/exclusivity.txt\", sep=\"~\", parse_dates=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 58,
|
||||
"id": "023f211d-23aa-4a2c-843d-1b60cec91079",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def set_exclusivity_types(df):\n",
|
||||
" return df.with_columns([\n",
|
||||
" pl.col(\"Exclusivity_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\")\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 61,
|
||||
"id": "a1da42c9-e47a-4437-b089-e9b91f789a0c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>\n",
|
||||
"Appl_Type\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Appl_No\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Product_No\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Exclusivity_Code\n",
|
||||
"</th>\n",
|
||||
"<th>\n",
|
||||
"Exclusivity_Date\n",
|
||||
"</th>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"str\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"i64\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"i64\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"str\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"date\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"11366\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"ODE-96\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-08-07\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"11\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"10\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"9\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"\"N\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"20287\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"8\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"\"NPP\"\n",
|
||||
"</td>\n",
|
||||
"<td>\n",
|
||||
"2022-05-16\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (5, 5)\n",
|
||||
"┌───────────┬─────────┬────────────┬──────────────────┬──────────────────┐\n",
|
||||
"│ Appl_Type ┆ Appl_No ┆ Product_No ┆ Exclusivity_Code ┆ Exclusivity_Date │\n",
|
||||
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
||||
"│ str ┆ i64 ┆ i64 ┆ str ┆ date │\n",
|
||||
"╞═══════════╪═════════╪════════════╪══════════════════╪══════════════════╡\n",
|
||||
"│ N ┆ 11366 ┆ 2 ┆ ODE-96 ┆ 2022-08-07 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 11 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 10 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 9 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
||||
"│ N ┆ 20287 ┆ 8 ┆ NPP ┆ 2022-05-16 │\n",
|
||||
"└───────────┴─────────┴────────────┴──────────────────┴──────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 61,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"exclusivity.pipe(set_exclusivity_types).head(5).collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"id": "92fe99fa-1963-460c-99ea-7f614b4b2e25",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def set_patent_types(df):\n",
|
||||
" return df.with_columns([\n",
|
||||
" pl.col(\"Patent_Expire_Date_Text\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
||||
" pl.col(\"Submission_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
||||
" pl.col(\"Drug_Substance_Flag\") == \"Y\",\n",
|
||||
" pl.col(\"Drug_Product_Flag\") == \"Y\",\n",
|
||||
" pl.col(\"Delist_Flag\") == \"Y\"\n",
|
||||
" ])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"id": "13707ca6-094f-4ed7-94cb-824087e97874",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
||||
"<thead>\n",
|
||||
"<tr>\n",
|
||||
"<th>\n",
|
||||
"Patent_Expire_Date_Text\n",
|
||||
"</th>\n",
|
||||
"</tr>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"date\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</thead>\n",
|
||||
"<tbody>\n",
|
||||
"<tr>\n",
|
||||
"<td>\n",
|
||||
"2022-01-02\n",
|
||||
"</td>\n",
|
||||
"</tr>\n",
|
||||
"</tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (1, 1)\n",
|
||||
"┌─────────────────────────┐\n",
|
||||
"│ Patent_Expire_Date_Text │\n",
|
||||
"│ --- │\n",
|
||||
"│ date │\n",
|
||||
"╞═════════════════════════╡\n",
|
||||
"│ 2022-01-02 │\n",
|
||||
"└─────────────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"patents.pipe(set_patent_types).select(\"Patent_Expire_Date_Text\").min().collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 81,
|
||||
"id": "18ad8df7-45d5-4454-8955-c5f28a7d7f1e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"polars.datatypes.Null"
|
||||
]
|
||||
},
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pl.datatypes.Null"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79e4b3d9-29ae-4302-bee1-4be02e0ba654",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
https://www.fda.gov/media/76860/download
|
||||
@ -1 +0,0 @@
|
||||
Most of these are related to potentially parsing orangebook data from the pdfs.
|
||||
@ -1,145 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "51bf48a1-920a-4e64-ac5f-323ff3a27ebf",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Will use tool 'Tesseract (sh)'\n",
|
||||
"Available languages: eng, osd\n",
|
||||
"Will use language 'eng'\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Import the required libraries\n",
|
||||
"from wand.image import Image\n",
|
||||
"from PIL import Image as PI\n",
|
||||
"import pyocr\n",
|
||||
"import pyocr.builders\n",
|
||||
"import io, sys\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Get the handle of the OCR library (in this case, tesseract)\n",
|
||||
"tools = pyocr.get_available_tools()\n",
|
||||
"if len(tools) == 0:\n",
|
||||
"\tprint(\"No OCR tool found!\")\n",
|
||||
"\tsys.exit(1)\n",
|
||||
"tool = tools[0]\n",
|
||||
"print(\"Will use tool '%s'\" % (tool.get_name()))\n",
|
||||
"\n",
|
||||
"# Get the language\n",
|
||||
"langs = tool.get_available_languages()\n",
|
||||
"print(\"Available languages: %s\" % \", \".join(langs)) \n",
|
||||
"lang = langs[0] # For English\n",
|
||||
"print(\"Will use language '%s'\" % (lang))\n",
|
||||
"\n",
|
||||
"# Setup two lists which will be used to hold our images and final_text\n",
|
||||
"req_image = []\n",
|
||||
"final_text = []\n",
|
||||
"\n",
|
||||
"# Open the PDF file using wand and convert it to jpeg\n",
|
||||
"image_pdf = Image(filename=\"/home/will/research/ClinicalTrialsDataProcessing/Orangebook/Orangebooks/testprint.pdf\", resolution=300)\n",
|
||||
"image_jpeg = image_pdf.convert('pdf')\n",
|
||||
"\n",
|
||||
"# wand has converted all the separate pages in the PDF into separate image\n",
|
||||
"# blobs. We can loop over them and append them as a blob into the req_image\n",
|
||||
"# list.\n",
|
||||
"for img in image_jpeg.sequence:\n",
|
||||
"\timg_page = Image(image=img)\n",
|
||||
"\treq_image.append(img_page.make_blob('jpeg'))\n",
|
||||
"\n",
|
||||
"# Now we just need to run OCR over the image blobs and store all of the \n",
|
||||
"# recognized text in final_text.\n",
|
||||
"for img in req_image:\n",
|
||||
"\ttxt = tool.image_to_string(\n",
|
||||
"\t\tPI.open(io.BytesIO(img)),\n",
|
||||
"\t\tlang=lang,\n",
|
||||
"\t\tbuilder=pyocr.builders.TextBuilder()\n",
|
||||
"\t)\n",
|
||||
"\tfinal_text.append(txt)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "f0d5f1d6-7e15-4ee6-b4ee-cbd41c5afb99",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"The final text is: \n",
|
||||
"\n",
|
||||
"40TH EDITION - 2020 - APPROVED DRUG PRODUCT LIST\n",
|
||||
"\n",
|
||||
"PRESCRIPTION DRUG PRODUCT LIST\n",
|
||||
"\n",
|
||||
"ABACAVIR SULFATE\n",
|
||||
"SOLUTION; ORAL\n",
|
||||
"ABACAVIR SULFATE\n",
|
||||
"\n",
|
||||
"EQ 2 5 /ML\n",
|
||||
"\n",
|
||||
"EQ 2 Ee /ML\n",
|
||||
"\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"EQ 300MG BASE\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"\\nThe final text is: \\n\")\n",
|
||||
"print(final_text[0][0:200])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1cac17e7-079d-4e32-bdbf-ae49194b2078",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"it appears taht this does not have the required precision. I'll need to do this some other way."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2283e290-fab3-4cda-8ce9-55a0b3533c98",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,56 +1,91 @@
|
||||
# ClinicalTrialsDataProcessing
|
||||
|
||||
This is used to build tools which process and standardize the data.
|
||||
This represents my
|
||||
|
||||
More data later.
|
||||
## Prerequisites
|
||||
|
||||
# Outline
|
||||
> Python >= 3.10 (requires match statement)
|
||||
> Docker >= 20.10
|
||||
> Curl >= 7
|
||||
> Just >= 1.9
|
||||
|
||||
## Directory Tree
|
||||
AACT_downloader
|
||||
|
||||
# Usage
|
||||
|
||||
## Key files index
|
||||
|
||||
# Background on Docker
|
||||
Docker uses the following flow
|
||||
## Basic usage
|
||||
|
||||
1. configuration using `docker-compose.yaml` or a `Dockerfile`
|
||||
2. `docker build .` to generate an image
|
||||
3. `docker run xxxxxx` to take the image and create a container.
|
||||
- when the container is created, it starts, running commands as configured in the dockerfile.
|
||||
- Consequently, the AACT database image when run must initialize the postgres db, then run the initalization details.
|
||||
- Here is where bind mounts come into play.
|
||||
Check prerequisites
|
||||
```bash
|
||||
just check-status
|
||||
```
|
||||
|
||||
## Multistage builds
|
||||
https://stackoverflow.com/questions/53659993/docker-multi-stage-how-to-split-up-into-multiple-dockerfiles
|
||||
Setup the underlying AACT database including downloading both
|
||||
the AACT dump and historical data.
|
||||
```bash
|
||||
just create
|
||||
just select-trials
|
||||
just count=1000 get-histories
|
||||
```
|
||||
replacing the 1000 in `count=1000` with the number of trials you want to download.
|
||||
|
||||
https://docs.docker.com/develop/develop-images/multistage-build/
|
||||
## Advanced Usage
|
||||
|
||||
Basically
|
||||
If you need to reset the db without downloading the AACT dump
|
||||
```bash
|
||||
just rebuild
|
||||
just select-trials
|
||||
just count=1000 get-histories
|
||||
```
|
||||
|
||||
## Dockerfile vs docker-compose.yaml
|
||||
|
||||
A `Dockerfile` is used to create images.
|
||||
### Description of all the `just` recipes
|
||||
|
||||
A `docker-compose.yaml` is used to automate the deployment of containers.
|
||||
# Background information
|
||||
|
||||
## Types of storage
|
||||
This is designed to run on a linux machine with bash.
|
||||
If you are using a shell other than bash you should be aware of what
|
||||
is needed to run all of this using bash
|
||||
|
||||
### COPY/ADD (Dockerfile)
|
||||
If any of the discussions below don't make sense, talk to your sysadmin,
|
||||
a local linux user, or reach out to the author.
|
||||
|
||||
In a dockerfile, this adds a file permanently to the image.
|
||||
## Just installation
|
||||
|
||||
This adds files one way to or from the container when initialized.
|
||||
I use the command runner `just` to automate/simplfy setting up the
|
||||
docker containers and running many of the python scripts.
|
||||
It is similar to `make` in many ways but is designed to do less.
|
||||
|
||||
### Volumes (docker-compose.yaml && Dockerfile)
|
||||
Just can be installed from https://github.com/casey/just/
|
||||
|
||||
Useable in both docker-compose and Dockerfile's, this creates a permanent storage.
|
||||
It can be maintained by docker or stored in a particular location.
|
||||
## Python installation
|
||||
|
||||
Good for longer term storage such as databases.
|
||||
This requires python 3.10 or above due to the use of match-case statements
|
||||
in the html parser.
|
||||
|
||||
### Bind mounts (docker-compose.yaml)
|
||||
Check which version of python you have by typing `python --version`.
|
||||
If you do not have the required version, I would recommend installing
|
||||
the conda python manager and setting up a conda environment with python 3.10.
|
||||
Instructions for doing so are on the internet.
|
||||
|
||||
Bind mounts are used to make a host filesystem resource
|
||||
available
|
||||
## Docker and Postgres
|
||||
Docker is a tool to manage and run OCI containers.
|
||||
What this means in regards to this project is that docker makes it
|
||||
easy to setup containers.
|
||||
|
||||
Install docker based on instructions for your linux distribution.
|
||||
I use podman (an alternative from RedHat) because it allows for running without root permissions.
|
||||
|
||||
### Docker networking
|
||||
|
||||
It is helpful to construct an external docker network by running
|
||||
|
||||
`docker network create network_name`
|
||||
|
||||
and then including that network in the docker-compose.yaml
|
||||
|
||||
# Environment Variables (`.env` file)
|
||||
I use an single .env file to setup the docker containers and pass configuration variables to
|
||||
the python scripts. I would suggest changing the default values in `sample.env` to match your needs.
|
||||
If you do need to think about the security of your database I would recommend
|
||||
you start by changing these.
|
||||
|
||||
@ -0,0 +1,133 @@
|
||||
|
||||
/* OVERVIEW
|
||||
*
|
||||
* This links trials to the first date each drug (indexed by NDA/ANDA etc) is
|
||||
* put on the market.
|
||||
*
|
||||
* It takes 3 views to build up to it.
|
||||
* */
|
||||
|
||||
--Match trials to brands and ingredients
|
||||
create or replace view public.match_trials_to_bn_in as
|
||||
with trialncts as (
|
||||
SELECT DISTINCT nct_id FROM history.trial_snapshots TS
|
||||
)
|
||||
SELECT
|
||||
bi.nct_id ,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2 ,
|
||||
rr.rxcui2 as bn_or_in_cui, --brand or ingredient
|
||||
count(*)
|
||||
FROM ctgov.browse_interventions bi
|
||||
left outer JOIN rxnorm_migrated.rxnorm_props AS rp
|
||||
on bi.downcase_mesh_term = rp.propvalue1 --link names to drug cuis ()
|
||||
left outer join rxnorm_migrated.rxnorm_relations rr
|
||||
on rr.rxcui1 = rp.rxcui
|
||||
WHERE
|
||||
bi.nct_id in (
|
||||
SELECT nct_id FROM trialncts
|
||||
)
|
||||
and
|
||||
bi.mesh_type='mesh-list'
|
||||
and rp.propname = 'Active_ingredient_name'
|
||||
and rr.tty2 in ('BN', 'IN', 'MIN')
|
||||
group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2
|
||||
order by bi.nct_id
|
||||
;
|
||||
--running out of space.
|
||||
|
||||
-- get list of interventions assoicated with trials of interest
|
||||
create temp table tmp_interventions as
|
||||
select * from ctgov.browse_interventions bi
|
||||
where
|
||||
bi.mesh_type ='mesh-list'
|
||||
and
|
||||
bi.nct_id in (select distinct nct_id from history.trial_snapshots)
|
||||
;
|
||||
select * from tmp_interventions;
|
||||
|
||||
--drop table tmp_join_interv_rxcui;
|
||||
create temp table tmp_join_interv_rxcui as
|
||||
select *
|
||||
from
|
||||
tmp_interventions tint
|
||||
inner join
|
||||
rxnorm_migrated.rxnorm_props rp
|
||||
on tint.downcase_mesh_term = rp.propvalue1
|
||||
where propname='RxNorm Name'
|
||||
;-- get the rxcui for ingredients
|
||||
|
||||
select * from tmp_join_interv_rxcui;
|
||||
|
||||
--filter rxcui -> is human prescribable
|
||||
create temp view tmp_view_prescribable as
|
||||
select count(*) from rxnorm_migrated.rxnorm_props rp
|
||||
where
|
||||
rp.propname = 'PRESCRIBABLE'
|
||||
and
|
||||
rp.propvalue1 = 'Y'
|
||||
;
|
||||
|
||||
--link prescribable to brand ingredients or brand names.
|
||||
|
||||
|
||||
--get relationships of IN -> BN
|
||||
select *
|
||||
from
|
||||
rxnorm_migrated.rxnorm_relations rr
|
||||
where
|
||||
rr.tty1 in ('IN','MIN')
|
||||
and rr.rxcui1 in (select distinct rxcui from tmp_join_interv_rxcui tjir)
|
||||
and rr.tty2 = 'BN'
|
||||
;
|
||||
|
||||
|
||||
|
||||
--match trials to through brands NDC11
|
||||
create or replace view public.match_trial_to_ndc11 as
|
||||
select
|
||||
mttbi.nct_id,
|
||||
ah.ndc,
|
||||
count(*)
|
||||
from public.match_trials_to_bn_in as mttbi
|
||||
left outer join rxnorm_migrated.rxnorm_relations as rr
|
||||
on mttbi.bn_or_in_cui = rr.rxcui1
|
||||
left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah
|
||||
on rr.rxcui2 = ah.rxcui
|
||||
where
|
||||
rr.tty1 = 'BN'
|
||||
and
|
||||
rr.tty2 in ('SBD', 'BPCK')
|
||||
and
|
||||
ah.sab='RXNORM'
|
||||
group by mttbi.nct_id, ah.ndc
|
||||
order by mttbi.nct_id, ah.ndc
|
||||
;
|
||||
|
||||
|
||||
|
||||
---associate trials to marketing start dates
|
||||
create or replace view public.match_trial_to_marketing_start_date as
|
||||
select
|
||||
mttn.nct_id,
|
||||
n.application_number_or_citation,
|
||||
min(n.marketing_start_date )
|
||||
from match_trial_to_ndc11 mttn
|
||||
inner join spl.nsde n
|
||||
on mttn.ndc = n.package_ndc11
|
||||
where
|
||||
n.product_type = 'HUMAN PRESCRIPTION DRUG'
|
||||
and
|
||||
n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC')
|
||||
group by mttn.nct_id,n.application_number_or_citation
|
||||
order by mttn.nct_id
|
||||
;
|
||||
|
||||
---Number of trials after a certain date
|
||||
select nct_id,count(distinct application_number_or_citation)
|
||||
from public.match_trial_to_marketing_start_date mttmsd
|
||||
where "min" > '2012-06-01'
|
||||
group by nct_id
|
||||
;
|
||||
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
#!/bin/bash
|
||||
backup_dir="/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/containers/AACT_Reloader/backup/"
|
||||
date_stamp=$(date +%Y%m%d_%H%M%S)
|
||||
filename="aact_db_backup_${date_stamp}.sql"
|
||||
container_name = ${1:-aact_db}
|
||||
|
||||
podman exec "$container_name" pg_dump -U root aact_db > "${backup_dir}/${filename}"
|
||||
|
||||
# Optional: compress the backup
|
||||
gzip "${backup_dir}/${filename}"
|
||||
@ -0,0 +1,117 @@
|
||||
/*
|
||||
I started by creating a formularies schema,
|
||||
then importing the usp - dc formulary data through DBeaver's csv import.
|
||||
*/
|
||||
|
||||
-- DROP SCHEMA "Formularies";
|
||||
|
||||
CREATE SCHEMA "Formularies" AUTHORIZATION root;
|
||||
|
||||
-- "Formularies".usp_dc_2023 definition
|
||||
|
||||
-- Drop table
|
||||
|
||||
-- DROP TABLE "Formularies".usp_dc_2023;
|
||||
|
||||
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||
rxcui varchar(15) NULL, --yes even though this is a number, it is represented as a string elsewhere.
|
||||
tty varchar(10) NULL,
|
||||
"Name" varchar(256) NULL,
|
||||
"Related BN" varchar(250) NULL,
|
||||
"Related DF" varchar(25050) NULL,
|
||||
"USP Category" varchar(250) NULL,
|
||||
"USP Class" varchar(250) NULL,
|
||||
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||
"API Concept" varchar(250) NULL
|
||||
);
|
||||
|
||||
/*
|
||||
I then linked the data back on itself with a materialized view, using claude.ai for simplicity.
|
||||
|
||||
Claude.ai > I need a postres sql statement to create a materialized view that will take the following table and link from a given rxcui to the other rxcui's that share the same category and class
|
||||
|
||||
```sql
|
||||
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||
rxcui int4 NULL,
|
||||
tty varchar(10) NULL,
|
||||
"Name" varchar(256) NULL,
|
||||
"Related BN" varchar(250) NULL,
|
||||
"Related DF" varchar(25050) NULL,
|
||||
"USP Category" varchar(250) NULL,
|
||||
"USP Class" varchar(250) NULL,
|
||||
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||
"API Concept" varchar(250) NULL
|
||||
);
|
||||
```
|
||||
|
||||
It links rxcuis to other rxcuis where they have a matching USP Categories and Class
|
||||
This gives alternative RXCUIs based on category an class.
|
||||
*/
|
||||
CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS
|
||||
WITH base AS (
|
||||
SELECT DISTINCT
|
||||
a.rxcui as source_rxcui,
|
||||
b.rxcui as linked_rxcui,
|
||||
a."USP Category" as category,
|
||||
a."USP Class" as class
|
||||
FROM "Formularies".usp_dc_2023 a
|
||||
JOIN "Formularies".usp_dc_2023 b
|
||||
ON a."USP Category" = b."USP Category"
|
||||
AND a."USP Class" = b."USP Class"
|
||||
AND a.rxcui != b.rxcui
|
||||
WHERE a.rxcui IS NOT NULL
|
||||
AND b.rxcui IS NOT NULL
|
||||
)
|
||||
SELECT * FROM base;
|
||||
|
||||
-- Add indexes for better query performance
|
||||
CREATE INDEX ON "Formularies".rxcui_category_class_links (source_rxcui);
|
||||
CREATE INDEX ON "Formularies".rxcui_category_class_links (linked_rxcui);
|
||||
|
||||
/*
|
||||
Next step is linking a given nct -> compounds -> formulary alternatives -> compounds -> brands/generics.
|
||||
I'll' break this into two steps.
|
||||
|
||||
1. link formulary alternatives to compounds and brands,
|
||||
2. link nct_id to formulary alternatives
|
||||
*/
|
||||
drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis;
|
||||
drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade;
|
||||
|
||||
create materialized view "Formularies".rxcui_to_brand_through_uspdc AS
|
||||
select distinct
|
||||
rccl.source_rxcui
|
||||
,rccl.linked_rxcui
|
||||
,rccl.category
|
||||
,rccl."class"
|
||||
,rr.tty1
|
||||
--,rr.tty2
|
||||
,rr.rxcui2
|
||||
from "Formularies".rxcui_category_class_links rccl
|
||||
join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui
|
||||
where rr.tty2 = 'BN'
|
||||
;
|
||||
|
||||
/* So this one takes each RXCUI and it's associated RXCUIs from the same
|
||||
category and class, and filters it down to associated RXCUI's that
|
||||
represent brand names.
|
||||
*/
|
||||
|
||||
create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as
|
||||
select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis
|
||||
from match_trials_to_bn_in mttbi
|
||||
join "Formularies".rxcui_to_brand_through_uspdc rtbtu
|
||||
on mttbi.bn_or_in_cui = rtbtu.rxcui2
|
||||
;
|
||||
|
||||
/*
|
||||
This takes the list of ingredients and brands associated with a trial, and
|
||||
links it to the list of alternative brand names.
|
||||
*/
|
||||
|
||||
--renamed the view
|
||||
CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc
|
||||
AS SELECT mtctabr.nct_id,
|
||||
count(*) AS brand_name_counts
|
||||
FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr
|
||||
GROUP BY mtctabr.nct_id;
|
||||
@ -0,0 +1,100 @@
|
||||
|
||||
/* How many trials were included?
|
||||
* How many trial were inspected?
|
||||
* How many trials were reserved for downloaded?
|
||||
* How many trials didn't get included for some technical reason?
|
||||
*
|
||||
********* Data from 2023-03-29 ***********
|
||||
Of Interest 1981
|
||||
Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier
|
||||
Downloaded 1960
|
||||
Incomplete 3 #there were are few http 500 and 404 codes
|
||||
******************************************
|
||||
* Note there were 21 missing trials of interest.
|
||||
* */
|
||||
select status,count(distinct nct_id) from http.download_status ds
|
||||
group by status;
|
||||
|
||||
/* Get a list of trials
|
||||
* -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29)
|
||||
* -- There are currently 1138 trials for which I was able to extract unique snapshots (2023-04-03)
|
||||
* */
|
||||
select count(distinct nct_id) from history.trial_snapshots ts
|
||||
|
||||
/* Get the number of listed conditions
|
||||
* -- There are only 609 listed (MeSH classified) conditions from 284 trials(2023-03-29)
|
||||
* I may need to expand how I address conditions
|
||||
*/
|
||||
select count(*)
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
|
||||
select count(distinct nct_id)
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
|
||||
/*
|
||||
* If I were to expand that to non-coded conditions that would be
|
||||
* 304 trials with 398 conditions
|
||||
* */
|
||||
select count(distinct nct_id)
|
||||
from ctgov.conditions bc
|
||||
where
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
;
|
||||
select count(*) from ctgov.conditions c
|
||||
where
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
|
||||
|
||||
/* Get the number of matches from UMLS
|
||||
* There are about 5,808 proposed matches.
|
||||
*
|
||||
*/
|
||||
select count(*) from "DiseaseBurden".trial_to_icd10 tti ;
|
||||
--1383 before run at 8pm 2023-03-29
|
||||
--5209 at 2023-04-03T11:21
|
||||
|
||||
|
||||
/*Get the number of trials that have links to icd10 trials*/
|
||||
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||
group by tti.approved;
|
||||
|
||||
-- finding and removing duplicates from the trial linking stuff. Useful when you redownload trials.
|
||||
/*
|
||||
with CTE as (
|
||||
select row_number() over (partition by nct_id, "condition",ui) as rownum, *
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
)
|
||||
delete from "DiseaseBurden".trial_to_icd10 tti2
|
||||
where id in (
|
||||
select id from cte where rownum > 1
|
||||
);
|
||||
*/
|
||||
|
||||
--get the number of completed vs terminated trials
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved ='accepted' )
|
||||
group by overall_status
|
||||
;
|
||||
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
group by overall_status
|
||||
;
|
||||
|
||||
select overall_status,count(distinct nct_id)
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts )
|
||||
group by overall_status
|
||||
;
|
||||
@ -0,0 +1,38 @@
|
||||
--TODO: Document and migrate to setup
|
||||
|
||||
drop table if exists "DiseaseBurden".trial_to_icd10;
|
||||
drop type if exists "DiseaseBurden".validation_type;
|
||||
|
||||
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
|
||||
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
|
||||
|
||||
|
||||
CREATE TABLE "DiseaseBurden".trial_to_icd10 (
|
||||
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
||||
nct_id varchar NOT NULL,
|
||||
"condition" varchar NOT NULL,
|
||||
ui varchar NULL,
|
||||
uri varchar NULL,
|
||||
rootsource varchar NULL,
|
||||
"name" varchar NULL,
|
||||
"source" varchar null,
|
||||
approved "DiseaseBurden".validation_type,
|
||||
approval_timestamp timestamp,
|
||||
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id)
|
||||
);
|
||||
comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.';
|
||||
|
||||
|
||||
|
||||
DROP TABLE if exists "DiseaseBurden".icd10_to_cause;
|
||||
|
||||
CREATE TABLE "DiseaseBurden".icd10_to_cause (
|
||||
id SERIAL NOT NULL ,
|
||||
code varchar NOT NULL,
|
||||
cause_text varchar NOT NULL,
|
||||
CONSTRAINT icd10_to_cause_pk PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,38 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema' -- Replace with your schema name
|
||||
;
|
||||
|
||||
SELECT
|
||||
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'
|
||||
;
|
||||
|
||||
SELECT
|
||||
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||
FROM (
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
column_name || ' ' || data_type ||
|
||||
CASE
|
||||
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||
ELSE ''
|
||||
END ||
|
||||
CASE
|
||||
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||
ELSE ''
|
||||
END as column_definition
|
||||
FROM pg_catalog.pg_tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.schemaname = c.table_schema
|
||||
AND t.tablename = c.table_name
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'-- Replace with your schema name
|
||||
) t
|
||||
GROUP BY schemaname, tablename;
|
||||
@ -0,0 +1,658 @@
|
||||
create extension tablefunc;
|
||||
|
||||
/*Getting Trial Data all together
|
||||
* There are 3 main datasets to join per trial:
|
||||
*
|
||||
* - Trial Data (still need to stick it together)
|
||||
* - Duration and enrollment data
|
||||
* - Compound Marketing (can get for any trial)
|
||||
* - how many individual brands per compound at the start of the trial
|
||||
* - Disease Data (can get for verified trials)
|
||||
* - Population upper limit (Global Burdens of Disease)
|
||||
* - Category (ICD10 2nd level groups)
|
||||
*/
|
||||
|
||||
/*Disease Data*/
|
||||
-- ICD10 Category and GBD data
|
||||
with cte as (
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from trial_to_cause
|
||||
group by nct_id
|
||||
), cte2 as (
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
from trial_to_cause ttc
|
||||
join cte
|
||||
on cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
order by nct_id,ui
|
||||
), cte3 as (
|
||||
select
|
||||
nct_id,
|
||||
substring(cte2.ui for 3) as code,
|
||||
cte2."condition",
|
||||
cte2.cause_text,
|
||||
cte2.cause_id,
|
||||
ic.id as category_id,
|
||||
ic.group_name
|
||||
from cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(cte2.ui for 3) >= ic.start_code
|
||||
)
|
||||
select nct_id, cause_id,category_id
|
||||
from cte3
|
||||
group by nct_id, cause_id, category_id
|
||||
;
|
||||
--still need to link to actual disease burdens.
|
||||
|
||||
/*Compound Marketing Data*/
|
||||
---Number of trials after a certain date
|
||||
with marketing_cte as (
|
||||
select nct_id,count(distinct application_number_or_citation)
|
||||
from public.match_trial_to_marketing_start_date mttmsd
|
||||
where "min" > '2012-06-01'
|
||||
group by nct_id
|
||||
)
|
||||
select * from marketing_cte
|
||||
;
|
||||
|
||||
/*Get versions*/
|
||||
/* Ignore this version
|
||||
with cte1 as (
|
||||
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date < ts.submission_date
|
||||
), cte2 as (
|
||||
select * from cte1
|
||||
group by nct_id, min_version
|
||||
order by nct_id
|
||||
), cte3 as (
|
||||
select
|
||||
ts2.nct_id,
|
||||
ts2."version",
|
||||
ts2.overall_status,
|
||||
ts2.submission_date,
|
||||
ts2.start_date,
|
||||
ts2.enrollment,
|
||||
ts2.enrollment_category,
|
||||
ts2.primary_completion_date,
|
||||
ts2.primary_completion_date_category ,
|
||||
--mv.nct_id,
|
||||
mv.min_version
|
||||
from history.trial_snapshots ts2
|
||||
join cte2 mv
|
||||
on mv.nct_id = ts2.nct_id
|
||||
where
|
||||
ts2."version" = mv.min_version
|
||||
order by ts2.nct_id
|
||||
), cte4 as (
|
||||
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||
from cte3
|
||||
)
|
||||
select avg(submission_presecence)
|
||||
from cte4
|
||||
;
|
||||
--avg 61 day difference
|
||||
*/
|
||||
|
||||
--use this version
|
||||
with cte1 as ( --get trials
|
||||
select nct_id,max("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date > ts.submission_date
|
||||
), cte2 as ( --
|
||||
select * from cte1
|
||||
group by nct_id, min_version
|
||||
order by nct_id
|
||||
), cte3 as (
|
||||
select
|
||||
ts2.nct_id,
|
||||
ts2."version",
|
||||
ts2.overall_status,
|
||||
ts2.submission_date,
|
||||
ts2.start_date,
|
||||
ts2.enrollment,
|
||||
ts2.enrollment_category,
|
||||
ts2.primary_completion_date,
|
||||
ts2.primary_completion_date_category ,
|
||||
--mv.nct_id,
|
||||
mv.min_version
|
||||
from history.trial_snapshots ts2
|
||||
join cte2 mv
|
||||
on mv.nct_id = ts2.nct_id
|
||||
where
|
||||
ts2."version" = mv.min_version
|
||||
order by ts2.nct_id
|
||||
)
|
||||
select *
|
||||
from cte3
|
||||
where
|
||||
enrollment is null
|
||||
or enrollment_category is null
|
||||
or primary_completion_date is null
|
||||
or primary_completion_date_category is null
|
||||
or start_date is null
|
||||
/*, cte4 as (
|
||||
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||
from cte3
|
||||
)
|
||||
select avg(submission_presecence)
|
||||
from cte4
|
||||
; -- -33 day difference on average
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
with cte1_min as (
|
||||
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date <= ts.submission_date
|
||||
),cte1_max as (
|
||||
select nct_id,max("version") over (partition by nct_id) as max_version
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
ts.start_date >= ts.submission_date
|
||||
), cte2_min as (
|
||||
select * from cte1_min
|
||||
group by nct_id, min_version
|
||||
), cte2_max as (
|
||||
select * from cte1_max
|
||||
group by nct_id, max_version
|
||||
)
|
||||
select *
|
||||
from cte2_min
|
||||
join cte2_max
|
||||
on cte2_min.nct_id=cte2_max.nct_id
|
||||
where min_version >= max_version
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* Neet to take a different tack in filling out the is version of the data.
|
||||
* The idea is that we need the latest of each major category
|
||||
* before the start date.
|
||||
* */
|
||||
|
||||
--get the set of trials which have
|
||||
with cte as (
|
||||
/* Get the absolute difference between the start date and the
|
||||
* submission_date for each version of the trial (measured in days)
|
||||
*
|
||||
*/
|
||||
select
|
||||
s.nct_id,
|
||||
s.start_date,
|
||||
ts."version",
|
||||
ts.submission_date,
|
||||
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||
from ctgov.studies s
|
||||
join history.trial_snapshots ts
|
||||
on s.nct_id = ts.nct_id
|
||||
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
),cte2 as (
|
||||
/* Rank each version based on it's proximity to the start date
|
||||
* */
|
||||
select
|
||||
cte.nct_id,
|
||||
cte."version",
|
||||
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||
cte.submission_date,
|
||||
cte.start_deviance,
|
||||
cte.start_date,
|
||||
ts.primary_completion_date ,
|
||||
ts.primary_completion_date_category ,
|
||||
ts.overall_status ,
|
||||
ts.enrollment ,
|
||||
ts.enrollment_category
|
||||
from cte
|
||||
join history.trial_snapshots ts
|
||||
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||
), cte3_primary_completion as (
|
||||
/* for each trial
|
||||
* select the version with a filled out primary_completion_source
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as primary_completion_source
|
||||
from cte2
|
||||
where cte2.primary_completion_date is not null
|
||||
group by cte2.nct_id
|
||||
), cte3_enrollment as (
|
||||
/* for each trial
|
||||
* select the version with a filled out enrollment
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||
from cte2
|
||||
where cte2.enrollment is not null
|
||||
group by cte2.nct_id
|
||||
), cte4 as (
|
||||
/* join the best options together to get the data of interest.
|
||||
*
|
||||
* On further inspection there are just a view of those, with
|
||||
* many of them having a 7+ month difference between the two versions.
|
||||
* I think I am going to drop them.
|
||||
* */
|
||||
select
|
||||
c3e.nct_id,
|
||||
--c2a.submission_date as submission_date_a,
|
||||
--c2b.submission_date as submission_date_b,
|
||||
--c3e.enrollment_source,
|
||||
c2a."version" as version_a,
|
||||
c2a.enrollment,
|
||||
c2a.enrollment_category,
|
||||
--c3p.primary_completion_source ,
|
||||
c2b."version" as version_b,
|
||||
c2b.primary_completion_date,
|
||||
c2b.primary_completion_date_category
|
||||
from cte3_enrollment c3e
|
||||
join cte2 c2a
|
||||
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||
join cte3_primary_completion c3p
|
||||
on c3e.nct_id = c3p.nct_id
|
||||
join cte2 c2b
|
||||
on c3p.nct_id=c2b.nct_id and c3p.primary_completion_source = c2b.rownum
|
||||
), cte5 as (
|
||||
select nct_id
|
||||
from cte4 where version_a != version_b
|
||||
)
|
||||
select
|
||||
c.nct_id,
|
||||
s2.overall_status,
|
||||
c.enrollment as planned_enrollment,
|
||||
s2.enrollment,
|
||||
s2.start_date,
|
||||
c.primary_completion_date as planned_primary_completion_date,
|
||||
s2.primary_completion_date,
|
||||
extract(epoch from c.primary_completion_date - s2.start_date)/(24*60*60) as planned_duration,
|
||||
s2.primary_completion_date - s2.start_date as actual_duration
|
||||
from cte4 c
|
||||
join ctgov.studies s2
|
||||
on c.nct_id = s2.nct_id
|
||||
where c.nct_id not in (select nct_id from cte5)
|
||||
;
|
||||
|
||||
|
||||
/*
|
||||
* Concern about causal inference
|
||||
*
|
||||
* When putting the data together for CBO it looked like we got occasional updates about
|
||||
* the status of trials that included enrollment updates.
|
||||
* That doesn't appear to be the case, but that messes with the ability to causally identify
|
||||
* any results. I need to be careful about this data is used.
|
||||
*
|
||||
* I created the statements below to get the data that I need.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
----get the set of trial snapshots
|
||||
create or replace view public.view_cte as
|
||||
select
|
||||
nct_id,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
enrollment,
|
||||
start_date,
|
||||
enrollment_category ,
|
||||
overall_status,
|
||||
--count("version"),
|
||||
min(submission_date) as earliest_date_observed
|
||||
from history.trial_snapshots ts
|
||||
where
|
||||
nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved='accepted')
|
||||
and submission_date >= start_date
|
||||
and overall_status not in ('Completed','Terminated')
|
||||
group by
|
||||
nct_id,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
start_date,
|
||||
enrollment,
|
||||
enrollment_category ,
|
||||
overall_status
|
||||
;
|
||||
create or replace view public.view_disbur_cte0 as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
;
|
||||
create or replace view public.view_trial_to_cause as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
order by nct_id
|
||||
;--does this duplicate the view above?
|
||||
|
||||
create or replace view public.view_disbur_cte as
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from view_disbur_cte0
|
||||
group by nct_id
|
||||
|
||||
;
|
||||
create or replace view public.view_disbur_cte2 as
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
disbur_cte.max_level
|
||||
from view_trial_to_cause ttc
|
||||
join view_disbur_cte as disbur_cte
|
||||
on disbur_cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=disbur_cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
disbur_cte.max_level
|
||||
order by nct_id,ui
|
||||
;
|
||||
create or replace view public.view_disbur_cte3 as
|
||||
select
|
||||
nct_id,
|
||||
substring(disbur_cte2.ui for 3) as code,
|
||||
disbur_cte2."condition",
|
||||
disbur_cte2.cause_text,
|
||||
disbur_cte2.cause_id,
|
||||
ic.chapter_code as category_id,
|
||||
ic.group_name,
|
||||
disbur_cte2.max_level
|
||||
from view_disbur_cte2 as disbur_cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(disbur_cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(disbur_cte2.ui for 3) >= ic.start_code
|
||||
where ic."level" = 1
|
||||
|
||||
;
|
||||
create or replace view public.view_burdens_cte as
|
||||
select *
|
||||
from "DiseaseBurden".burdens b
|
||||
where b.sex_id = 3 --both sexes
|
||||
and b.metric_id = 1 --number/count
|
||||
and b.measure_id = 2 --DALYs
|
||||
and b.age_id =22 --all ages
|
||||
;
|
||||
create or replace view public.view_burdens_cte2 as
|
||||
select
|
||||
--c1.location_id,
|
||||
c1.cause_id,
|
||||
c1."year",
|
||||
--high sdi
|
||||
c1.val as h_sdi_val,
|
||||
c1.upper_95 as h_sdi_u95,
|
||||
c1.lower_95 as h_sdi_l95,
|
||||
--high-middle sdi
|
||||
c2.val as hm_sdi_val,
|
||||
c2.upper_95 as hm_sdi_u95,
|
||||
c2.lower_95 as hm_sdi_l95,
|
||||
--middle sdi
|
||||
c3.val as m_sdi_val,
|
||||
c3.upper_95 as m_sdi_u95,
|
||||
c3.lower_95 as m_sdi_l95,
|
||||
--low-middle sdi
|
||||
c4.val as lm_sdi_val,
|
||||
c4.upper_95 as lm_sdi_u95,
|
||||
c4.lower_95 as lm_sdi_l95,
|
||||
--low sdi
|
||||
c5.val as l_sdi_val,
|
||||
c5.upper_95 as l_sdi_u95,
|
||||
c5.lower_95 as l_sdi_l95
|
||||
from view_burdens_cte c1
|
||||
join view_burdens_cte c2
|
||||
on c1.cause_id = c2.cause_id
|
||||
and c1."year" = c2."year"
|
||||
join view_burdens_cte c3
|
||||
on c1.cause_id = c3.cause_id
|
||||
and c1."year" = c3."year"
|
||||
join view_burdens_cte c4
|
||||
on c1.cause_id = c4.cause_id
|
||||
and c1."year" = c4."year"
|
||||
join view_burdens_cte c5
|
||||
on c1.cause_id = c5.cause_id
|
||||
and c1."year" = c5."year"
|
||||
where c1.location_id = 44635
|
||||
and c2.location_id = 44634
|
||||
and c3.location_id = 44639
|
||||
and c4.location_id = 44636
|
||||
and c5.location_id = 44637
|
||||
;
|
||||
--drop view if exists public.formatted_data cascade;
|
||||
create or replace view public.formatted_data as
|
||||
select
|
||||
cte.nct_id,
|
||||
cte.start_date,
|
||||
cte.enrollment as current_enrollment,
|
||||
cte.enrollment_category,
|
||||
cte.overall_status as current_status,
|
||||
cte.earliest_date_observed,
|
||||
extract( epoch from (cte.earliest_date_observed - cte.start_date))/extract( epoch from (cte.primary_completion_date - cte.start_date)) as elapsed_duration
|
||||
,count(distinct mttmsd."application_number_or_citation") as n_brands
|
||||
,dbc3.code
|
||||
,dbc3."condition"
|
||||
,dbc3.cause_text
|
||||
,dbc3.cause_id
|
||||
,dbc3.category_id
|
||||
,dbc3.group_name
|
||||
,dbc3.max_level
|
||||
--c1.location_id,
|
||||
--,b.cause_id
|
||||
,b."year",
|
||||
--high sdi
|
||||
b.h_sdi_val,
|
||||
b.h_sdi_u95,
|
||||
b.h_sdi_l95,
|
||||
--high-middle sdi
|
||||
b.hm_sdi_val,
|
||||
b.hm_sdi_u95,
|
||||
b.hm_sdi_l95,
|
||||
--middle sdi
|
||||
b.m_sdi_val,
|
||||
b.m_sdi_u95,
|
||||
b.m_sdi_l95,
|
||||
--low-middle sdi
|
||||
b.lm_sdi_val,
|
||||
b.lm_sdi_u95,
|
||||
b.lm_sdi_l95,
|
||||
--low sdi
|
||||
b.l_sdi_val,
|
||||
b.l_sdi_u95,
|
||||
b.l_sdi_l95
|
||||
from view_cte as cte
|
||||
join public.match_trial_to_marketing_start_date mttmsd
|
||||
on cte.nct_id = mttmsd."nct_id"
|
||||
join view_disbur_cte3 dbc3
|
||||
on dbc3.nct_id = cte.nct_id
|
||||
join view_burdens_cte2 b
|
||||
on b.cause_id = dbc3.cause_id and extract(year from b."year") = extract(year from cte.earliest_date_observed)
|
||||
where
|
||||
mttmsd."min" <= cte.earliest_date_observed
|
||||
group by
|
||||
cte.nct_id,
|
||||
cte.start_date,
|
||||
cte.enrollment,
|
||||
cte.enrollment_category,
|
||||
cte.overall_status,
|
||||
cte.earliest_date_observed,
|
||||
elapsed_duration
|
||||
,dbc3.code
|
||||
,dbc3."condition"
|
||||
,dbc3.cause_text
|
||||
,dbc3.cause_id
|
||||
,dbc3.category_id
|
||||
,dbc3.group_name
|
||||
,dbc3.max_level
|
||||
--c1.location_id,
|
||||
,b.cause_id,
|
||||
b."year",
|
||||
--high sdi
|
||||
b.h_sdi_val,
|
||||
b.h_sdi_u95,
|
||||
b.h_sdi_l95,
|
||||
--high-middle sdi
|
||||
b.hm_sdi_val,
|
||||
b.hm_sdi_u95,
|
||||
b.hm_sdi_l95,
|
||||
--middle sdi
|
||||
b.m_sdi_val,
|
||||
b.m_sdi_u95,
|
||||
b.m_sdi_l95,
|
||||
--low-middle sdi
|
||||
b.lm_sdi_val,
|
||||
b.lm_sdi_u95,
|
||||
b.lm_sdi_l95,
|
||||
--low sdi
|
||||
b.l_sdi_val,
|
||||
b.l_sdi_u95,
|
||||
b.l_sdi_l95
|
||||
order by cte.nct_id ,cte.earliest_date_observed
|
||||
;--used this one 2023-04-05
|
||||
--get the planned enrollment
|
||||
create or replace view public.time_between_submission_and_start_view as
|
||||
/* Get the absolute difference between the start date and the
|
||||
* submission_date for each version of the trial (measured in days)
|
||||
*
|
||||
*/
|
||||
select
|
||||
s.nct_id,
|
||||
s.start_date,
|
||||
ts."version",
|
||||
ts.submission_date,
|
||||
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||
from ctgov.studies s
|
||||
join history.trial_snapshots ts
|
||||
on s.nct_id = ts.nct_id
|
||||
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||
;
|
||||
create or replace view rank_proximity_to_start_time_view as
|
||||
/* Rank each version based on it's proximity to the start date
|
||||
* */
|
||||
select
|
||||
cte.nct_id,
|
||||
cte."version",
|
||||
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||
cte.submission_date,
|
||||
cte.start_deviance,
|
||||
cte.start_date,
|
||||
ts.primary_completion_date ,
|
||||
ts.primary_completion_date_category ,
|
||||
ts.overall_status ,
|
||||
ts.enrollment ,
|
||||
ts.enrollment_category
|
||||
from public.time_between_submission_and_start_view cte
|
||||
join history.trial_snapshots ts
|
||||
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||
;
|
||||
create or replace view enrollment_closest_to_start_view as
|
||||
/* for each trial
|
||||
* select the version with a filled out enrollment
|
||||
* that is closest to the start date.
|
||||
* */
|
||||
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||
from rank_proximity_to_start_time_view cte2
|
||||
where cte2.enrollment is not null
|
||||
group by cte2.nct_id
|
||||
;
|
||||
--drop view public.formatted_data_with_planned_enrollment ;
|
||||
create or replace view formatted_data_with_planned_enrollment as
|
||||
select
|
||||
f.*,
|
||||
s.overall_status as final_status,
|
||||
c2a."version",
|
||||
c2a.enrollment as planned_enrollment
|
||||
from formatted_data f
|
||||
join ctgov.studies s
|
||||
on f.nct_id = s.nct_id
|
||||
join enrollment_closest_to_start_view c3e
|
||||
on c3e.nct_id = f.nct_id
|
||||
join rank_proximity_to_start_time_view c2a
|
||||
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||
;
|
||||
select * from formatted_data_with_planned_enrollment
|
||||
|
||||
-------------------GET COUNTS------------------
|
||||
select count(distinct nct_id) from public.view_cte; --88
|
||||
select count(distinct nct_id) from public.view_disbur_cte0; --130
|
||||
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||
select count(distinct nct_id) from public.view_disbur_cte;--130
|
||||
select count(distinct nct_id) from public.view_disbur_cte2;--130
|
||||
select count(distinct nct_id) from public.view_disbur_cte3;--130
|
||||
select count(distinct nct_id) from public.formatted_data; --48 probably because there are so many trials that don't fall into a GBD category/cause
|
||||
select count(distinct nct_id) from public.time_between_submission_and_start_view;--1067
|
||||
select count(distinct nct_id) from rank_proximity_to_start_time_view;--1067
|
||||
select count(distinct nct_id) from enrollment_closest_to_start_view;--1067
|
||||
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||
|
||||
|
||||
|
||||
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||
|
||||
|
||||
--get durations and count snapshots per trial per trial
|
||||
with cte1 as (
|
||||
select
|
||||
nct_id,
|
||||
start_date ,
|
||||
primary_completion_date,
|
||||
overall_status ,
|
||||
primary_completion_date - start_date as duration
|
||||
from ctgov.studies s
|
||||
where nct_id in (select distinct nct_id from http.download_status ds)
|
||||
), cte2 as (
|
||||
select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
|
||||
group by nct_id
|
||||
)
|
||||
select a.nct_id, a.overall_status, a.duration,b.snapshot_count
|
||||
from cte1 as a
|
||||
join cte2 as b
|
||||
on a.nct_id=b.nct_id
|
||||
;
|
||||
@ -0,0 +1,104 @@
|
||||
select * from "DiseaseBurden".icd10_to_cause itc ;
|
||||
select * from "DiseaseBurden".cause c ;
|
||||
|
||||
|
||||
|
||||
|
||||
select c.id, count(distinct code)
|
||||
from "DiseaseBurden".cause c
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on c.cause = itc.cause_text
|
||||
group by c.id
|
||||
order by c.id
|
||||
;
|
||||
|
||||
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||
group by tti.approved;
|
||||
|
||||
select nct_id, "condition", ui
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved = 'accepted';
|
||||
|
||||
drop view trial_to_cause;
|
||||
|
||||
---Link trials to their causes
|
||||
|
||||
create temp view trial_to_cause as
|
||||
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
join "DiseaseBurden".icd10_to_cause itc
|
||||
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||
join "DiseaseBurden".cause_hierarchy ch
|
||||
on itc.cause_text = ch.cause_name
|
||||
where
|
||||
tti.approved = 'accepted'
|
||||
order by nct_id
|
||||
;
|
||||
|
||||
select distinct nct_id, count(*), min("level"), max("level")
|
||||
from trial_to_cause ttc
|
||||
group by nct_id
|
||||
;
|
||||
|
||||
select nct_id,cause_text,cause_id from trial_to_cause
|
||||
where level = 3
|
||||
group by nct_id,cause_text,cause_id
|
||||
order by cause_id
|
||||
;
|
||||
|
||||
select cause_id,"condition",cause_text,count(distinct nct_id) as c
|
||||
from trial_to_cause
|
||||
where level >= 3
|
||||
group by cause_id,"condition",cause_text
|
||||
--having count(distinct nct_id) > 2
|
||||
order by cause_id
|
||||
;
|
||||
|
||||
with cte as (
|
||||
select
|
||||
nct_id,
|
||||
max("level") as max_level
|
||||
from trial_to_cause
|
||||
group by nct_id
|
||||
), cte2 as (
|
||||
select
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
from trial_to_cause ttc
|
||||
join cte
|
||||
on cte.nct_id=ttc.nct_id
|
||||
where ttc."level"=cte.max_level
|
||||
group by
|
||||
ttc.nct_id,
|
||||
ttc.ui,
|
||||
ttc."condition",
|
||||
ttc.cause_text,
|
||||
ttc.cause_id,
|
||||
cte.max_level
|
||||
order by nct_id,ui
|
||||
), cte3 as (
|
||||
select
|
||||
nct_id,
|
||||
substring(cte2.ui for 3) as code,
|
||||
cte2."condition",
|
||||
cte2.cause_text,
|
||||
cte2.cause_id,
|
||||
ic.id as category_id,
|
||||
ic.group_name
|
||||
from cte2
|
||||
join "DiseaseBurden".icd10_categories ic
|
||||
on
|
||||
substring(cte2.ui for 3) <= ic.end_code
|
||||
and
|
||||
substring(cte2.ui for 3) >= ic.start_code
|
||||
)
|
||||
select nct_id, cause_id,category_id
|
||||
from cte3
|
||||
group by nct_id, cause_id, category_id
|
||||
;
|
||||
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
|
||||
drop view if exists public.match_trial_to_marketing_start_date;
|
||||
DROP VIEW if exists public.match_trial_to_ndc11;
|
||||
drop view if exists public.match_trials_to_bn_in;
|
||||
|
||||
drop view if exists history.match_drugs_to_trials;
|
||||
DROP TABLE IF EXISTS history.trial_snapshots;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS history.trial_snapshots
|
||||
(
|
||||
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
|
||||
version integer NOT NULL,
|
||||
submission_date timestamp without time zone,
|
||||
primary_completion_date timestamp without time zone,
|
||||
primary_completion_date_category history.updatable_catetories,
|
||||
start_date timestamp without time zone,
|
||||
start_date_category history.updatable_catetories,
|
||||
completion_date timestamp without time zone,
|
||||
completion_date_category history.updatable_catetories,
|
||||
overall_status history.study_statuses,
|
||||
enrollment integer,
|
||||
enrollment_category history.updatable_catetories,
|
||||
sponsor character varying COLLATE pg_catalog."default",
|
||||
responsible_party character varying COLLATE pg_catalog."default",
|
||||
CONSTRAINT trial_snapshots_pkey PRIMARY KEY (nct_id, version)
|
||||
);
|
||||
|
||||
|
||||
ALTER TABLE IF EXISTS history.trial_snapshots
|
||||
OWNER to root;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials
|
||||
AS SELECT bi.nct_id,
|
||||
rp.rxcui,
|
||||
rp.propvalue1
|
||||
FROM ctgov.browse_interventions bi
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
WHERE rp.propname::text = 'RxNorm Name'::text AND (bi.nct_id::text IN ( SELECT trial_snapshots.nct_id
|
||||
FROM history.trial_snapshots));
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trials_to_bn_in
|
||||
AS WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
)
|
||||
SELECT bi.nct_id,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2,
|
||||
rr.rxcui2 AS bn_or_in_cui,
|
||||
count(*) AS count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||
ORDER BY bi.nct_id;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_ndc11
|
||||
AS SELECT mttbi.nct_id,
|
||||
ah.ndc,
|
||||
count(*) AS count
|
||||
FROM match_trials_to_bn_in mttbi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON mttbi.bn_or_in_cui = rr.rxcui1
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON rr.rxcui2 = ah.rxcui
|
||||
WHERE rr.tty1 = 'BN'::bpchar AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ah.sab::text = 'RXNORM'::text
|
||||
GROUP BY mttbi.nct_id, ah.ndc
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date
|
||||
AS SELECT mttn.nct_id,
|
||||
n.application_number_or_citation,
|
||||
min(n.marketing_start_date) AS min
|
||||
FROM match_trial_to_ndc11 mttn
|
||||
JOIN spl.nsde n ON mttn.ndc = n.package_ndc11::bpchar
|
||||
WHERE n.product_type::text = 'HUMAN PRESCRIPTION DRUG'::text AND (n.marketing_category::text = ANY (ARRAY['NDA'::character varying, 'ANDA'::character varying, 'BLA'::character varying, 'NDA authorized generic'::character varying, 'NDA AUTHORIZED GENERIC'::character varying]::text[]))
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation
|
||||
ORDER BY mttn.nct_id;
|
||||
|
||||
@ -0,0 +1,308 @@
|
||||
select * from formatted_data_with_planned_enrollment fdwpe
|
||||
;
|
||||
|
||||
|
||||
select * from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
select count(distinct condition ) from formatted_data_mat fdm
|
||||
|
||||
select nct_id, fdm.current_status , count(*)
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id , fdm.current_status
|
||||
order by nct_id
|
||||
;
|
||||
|
||||
select * from formatted_data_mat fdm ;
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
;
|
||||
|
||||
select count(distinct category_id ) from
|
||||
|
||||
|
||||
select distinct category_id from formatted_data_mat fdm
|
||||
;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
-- group with trial split
|
||||
with cte as (
|
||||
select nct_id
|
||||
from formatted_data_mat fdm
|
||||
group by nct_id
|
||||
having count(distinct current_status) > 1
|
||||
order by nct_id
|
||||
)
|
||||
select
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
from formatted_data_mat fdm
|
||||
join cte on cte.nct_id = fdm.nct_id
|
||||
group by
|
||||
fdm.nct_id
|
||||
, current_status
|
||||
, earliest_date_observed
|
||||
, elapsed_duration
|
||||
, n_brands
|
||||
, category_id
|
||||
, h_sdi_val
|
||||
, h_sdi_u95
|
||||
, h_sdi_l95
|
||||
, hm_sdi_val
|
||||
, hm_sdi_u95
|
||||
, hm_sdi_l95
|
||||
, m_sdi_val
|
||||
, m_sdi_u95
|
||||
, m_sdi_l95
|
||||
, lm_sdi_val
|
||||
, lm_sdi_u95
|
||||
, lm_sdi_l95
|
||||
, l_sdi_val
|
||||
, l_sdi_u95
|
||||
, l_sdi_l95
|
||||
order by nct_id , earliest_date_observed
|
||||
; --TODO: join to usp dc dataset
|
||||
|
||||
|
||||
|
||||
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
), nct_to_cui AS (
|
||||
SELECT bi.nct_id,
|
||||
bi.downcase_mesh_term,
|
||||
rr.tty2,
|
||||
rr.rxcui2 AS approved_drug_rxcui,
|
||||
count(*) AS count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||
)
|
||||
SELECT nct_to_cui.nct_id,
|
||||
ud."USP Category",
|
||||
ud."USP Class"
|
||||
FROM nct_to_cui
|
||||
JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
|
||||
GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
|
||||
ORDER BY nct_to_cui.nct_id;
|
||||
|
||||
|
||||
|
||||
|
||||
CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
|
||||
AS
|
||||
WITH trialncts AS (
|
||||
SELECT DISTINCT ts.nct_id
|
||||
FROM history.trial_snapshots ts
|
||||
)
|
||||
SELECT
|
||||
bi.nct_id,
|
||||
count( distinct rr2.rxcui2 ) as brand_name_count
|
||||
FROM ctgov.browse_interventions bi
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis
|
||||
WHERE
|
||||
(bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list
|
||||
AND
|
||||
bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
|
||||
AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/
|
||||
AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
|
||||
AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
|
||||
GROUP BY bi.nct_id --remove duplicates
|
||||
;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
select
|
||||
fdqpe.nct_id
|
||||
--,fdqpe.start_date
|
||||
--,fdqpe.current_enrollment
|
||||
--,fdqpe.enrollment_category
|
||||
,fdqpe.current_status
|
||||
,fdqpe.earliest_date_observed
|
||||
,fdqpe.elapsed_duration
|
||||
,fdqpe.n_brands as identical_brands
|
||||
,ntbtu.brand_name_count
|
||||
,fdqpe.category_id
|
||||
,fdqpe.final_status
|
||||
,fdqpe.h_sdi_val
|
||||
--,fdqpe.h_sdi_u95
|
||||
--,fdqpe.h_sdi_l95
|
||||
,fdqpe.hm_sdi_val
|
||||
--,fdqpe.hm_sdi_u95
|
||||
--,fdqpe.hm_sdi_l95
|
||||
,fdqpe.m_sdi_val
|
||||
--,fdqpe.m_sdi_u95
|
||||
--,fdqpe.m_sdi_l95
|
||||
,fdqpe.lm_sdi_val
|
||||
--,fdqpe.lm_sdi_u95
|
||||
--,fdqpe.lm_sdi_l95
|
||||
,fdqpe.l_sdi_val
|
||||
--,fdqpe.l_sdi_u95
|
||||
--,fdqpe.l_sdi_l95
|
||||
from formatted_data_mat fdqpe
|
||||
join "Formularies".nct_to_brands_through_uspdc ntbtu
|
||||
on fdqpe.nct_id = ntbtu.nct_id
|
||||
;
|
||||
|
||||
--example of multiple reopenings
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
where nct_id = 'NCT01239797'
|
||||
|
||||
--attempt to automatically find transition periods
|
||||
with cte1 as (
|
||||
select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status = 'Active, not recruiting'
|
||||
), cte2 as (
|
||||
select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
|
||||
from formatted_data_mat fdm
|
||||
where current_status != 'Active, not recruiting'
|
||||
)
|
||||
select
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment
|
||||
from cte1
|
||||
join cte2 on cte1.nct_id = cte2.nct_id
|
||||
/*group by
|
||||
cte1.nct_id
|
||||
,cte1.earliest_closed_enrollment
|
||||
,cte2.latest_open_enrollment
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/* So ocassionally a study reopens enrollment.
|
||||
* If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
|
||||
* to get the transitions
|
||||
* Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change.
|
||||
*
|
||||
* Better yet, just get the last shift to ANR.
|
||||
* */
|
||||
|
||||
|
||||
/* Take each entry and get the status from a lagged snapshot
|
||||
* Then select each snapshot moving from previous_state to ANR
|
||||
* and filter out everything except the last one.
|
||||
* */
|
||||
with cte as (
|
||||
select
|
||||
nct_id
|
||||
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
||||
,current_status
|
||||
,earliest_date_observed as date_current
|
||||
from formatted_data_mat fdm
|
||||
), cte2 as (
|
||||
select
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,max(date_current) as date_current_max
|
||||
from cte
|
||||
where
|
||||
previous_status != current_status
|
||||
and
|
||||
current_status = 'Active, not recruiting'
|
||||
group by
|
||||
nct_id
|
||||
,previous_status
|
||||
,current_status
|
||||
,date_current
|
||||
)
|
||||
select *
|
||||
from formatted_data_mat fdm
|
||||
join cte2
|
||||
on cte2.nct_id = fdm.nct_id
|
||||
and cte2.date_current_max = fdm.earliest_date_observed
|
||||
; --join back into
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# Uses
|
||||
#
|
||||
# Defauls
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: pg_export container_name [database_name] [username]"
|
||||
return 1
|
||||
fi
|
||||
|
||||
CONTAINER=$1
|
||||
DBNAME=${2:-aact_db}
|
||||
USER=${3:-root}
|
||||
|
||||
#
|
||||
# for sqlfile in ../export/export_data_*.sql; do
|
||||
# if [[ -f "$sqlfile" ]]; then
|
||||
# outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||
# # podman exec -t "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f - < "$sqlfile" > "$outfile"
|
||||
# # podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f "$sqlfile" > "$outfile"
|
||||
# podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||
# podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f /tmp/query.sql > "$outfile"
|
||||
# fi
|
||||
# done
|
||||
#
|
||||
|
||||
for sqlfile in ../export/export_data_*.sql; do
|
||||
if [[ -f "$sqlfile" ]]; then
|
||||
outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||
podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||
podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -f "/tmp/query.sql" > "$outfile"
|
||||
fi
|
||||
done
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
/***************CREATE VIEWS*******************/
|
||||
create or replace view
|
||||
history.match_drugs_to_trials as
|
||||
select nct_id, rxcui, propvalue1
|
||||
from
|
||||
ctgov.browse_interventions as bi
|
||||
join
|
||||
rxnorm_migrated.rxnorm_props as rp
|
||||
on bi.downcase_mesh_term = rp.propvalue1
|
||||
where
|
||||
propname='RxNorm Name'
|
||||
and
|
||||
nct_id in (select nct_id from history.trial_snapshots)
|
||||
;
|
||||
|
||||
|
||||
/********************IN DEVLEOPMENT*********************/
|
||||
|
||||
/* Get the count of brand names attached to each trial
|
||||
* I should develop this into a view that matches trials to brands
|
||||
* then create a view that gets the counts.
|
||||
*/
|
||||
select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr
|
||||
where
|
||||
rxcui1 in (select rxcui from history.match_drugs_to_trials)
|
||||
and
|
||||
tty2 = 'BN'
|
||||
group by rxcui1
|
||||
order by count(rxcui2) desc
|
||||
;
|
||||
@ -0,0 +1,3 @@
|
||||
# TODO
|
||||
|
||||
Code up a data extraction tool that uses llama3 or a similar quality source to extract the data that I need from the extended aact_database
|
||||
@ -1,44 +0,0 @@
|
||||
SELECT why_stopped FROM ctgov.studies
|
||||
WHERE why_stopped IS NOT NULL
|
||||
LIMIT 100;
|
||||
|
||||
SELECT study_type, count(*) from ctgov.studies
|
||||
group by study_type;
|
||||
|
||||
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
|
||||
GROUP BY is_fda_regulated_drug;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Note that there is a decent number of trials that have expanded access
|
||||
*/
|
||||
SELECT
|
||||
study_type
|
||||
, phase
|
||||
, has_expanded_access
|
||||
, has_dmc
|
||||
, count(*)
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug is true
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
start_date > date('2007-01-01')
|
||||
group by study_type, phase, has_expanded_access, has_dmc;
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Find different mesh terms as assigned by clinicaltrials.gov
|
||||
*/
|
||||
select * from ctgov.browse_conditions
|
||||
order by nct_id desc,mesh_type
|
||||
limit 200;
|
||||
|
||||
select * from ctgov.browse_interventions
|
||||
order by nct_id desc
|
||||
limit 200;
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
import psycopg2 as psyco
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.corpus import stopwords
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.cluster import KMeans
|
||||
import re
|
||||
|
||||
|
||||
def preporcess_text(text):
|
||||
|
||||
text = text.lower()
|
||||
text = re.sub("[^A-Za-z]+", " ", text)
|
||||
#make tokens
|
||||
tokens = nltk.word_tokenize(text)
|
||||
|
||||
#remove stopwords
|
||||
tokens = [ w for w in tokens if not w in stopwords.words("english")]
|
||||
|
||||
#rejoin
|
||||
return " ".join(tokens).strip()
|
||||
|
||||
if __name__ == "__main__":
|
||||
conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
|
||||
|
||||
curse = conn.cursor()
|
||||
|
||||
curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
|
||||
results = curse.fetchall()
|
||||
|
||||
curse.close()
|
||||
conn.close()
|
||||
|
||||
data = pd.DataFrame(results, columns = ["corpus"])
|
||||
data["cleaned"] = data.corpus.apply(preporcess_text)
|
||||
|
||||
vectorizer = TfidfVectorizer(sublinear_tf=True)
|
||||
|
||||
X = vectorizer.fit_transform(data.cleaned)
|
||||
|
||||
kmeans = KMeans(n_clusters=10, random_state=11021585)
|
||||
kmeans.fit(X)
|
||||
|
||||
data["cluster"] = kmeans.labels_
|
||||
|
||||
print(data.groupby(["cluster"])["cleaned"].count())
|
||||
|
||||
|
||||
@ -1 +0,0 @@
|
||||
I believe this is for a ml classification or reasons for terminations.
|
||||
@ -0,0 +1 @@
|
||||
backup/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||
@ -0,0 +1,42 @@
|
||||
#!/bin/bash
|
||||
|
||||
RESTORE_DUMP_GZ="${1:-aact_db_backup_20250107_133822.sql.gz}"
|
||||
POSTGRES_USER=root
|
||||
POSTGRES_PASSWORD=root
|
||||
POSTGRES_DB=aact_db
|
||||
|
||||
CONTAINER_NAME="${POSTGRES_DB}-restored-$(date -I)"
|
||||
|
||||
#start container
|
||||
podman run \
|
||||
-e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
|
||||
-e POSTGRES_USER="${POSTGRES_USER}" \
|
||||
-e POSTGRES_DB="${POSTGRES_DB}" \
|
||||
--name "${CONTAINER_NAME}" \
|
||||
--detach \
|
||||
--network research-network \
|
||||
--shm-size=512mb \
|
||||
--volume ./backup/:/backup/ \
|
||||
-p 5432:5432\
|
||||
postgres:14-alpine
|
||||
|
||||
|
||||
sleep 10
|
||||
|
||||
# Function to check if PostgreSQL is ready
|
||||
function check_postgres {
|
||||
podman exec -i "${CONTAINER_NAME}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1
|
||||
}
|
||||
|
||||
# Wait for PostgreSQL to be ready
|
||||
until check_postgres; do
|
||||
echo "Waiting for PostgreSQL to be ready..."
|
||||
sleep 4
|
||||
done
|
||||
|
||||
echo "PostgreSQL is ready. Restoring the database..."
|
||||
|
||||
# Decompress the dump file and restore it to the database
|
||||
podman exec -i "${CONTAINER_NAME}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}"
|
||||
|
||||
echo "Database restoration complete."
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,31 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
shm_size: '4gb' #adjust the shared memeory /dev/shm when running
|
||||
#https://stackoverflow.com/questions/30210362/how-to-increase-the-size-of-the-dev-shm-in-docker-container
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_PASSWORD: root
|
||||
POSTGRES_DB: aact_db
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./db_store/:/var/lib/postgresql/
|
||||
# this is the database dump to restore from
|
||||
- ./aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
env_file:
|
||||
../.env
|
||||
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
--Create ctti user and grant permissions
|
||||
CREATE ROLE ctti;
|
||||
GRANT ALL PRIVILEGES ON DATABASE aact_db TO ctti;
|
||||
|
||||
/*
|
||||
Add the root user if it doesn't exist.
|
||||
With the default configuration this shouldn't be an issue,
|
||||
but I can see myself forgetting and changing the default POSTGRES_USER
|
||||
*/
|
||||
DO LANGUAGE plpgsql
|
||||
$do$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT FROM pg_catalog.pg_roles -- SELECT list can be empty for this
|
||||
WHERE rolname = 'root')
|
||||
THEN
|
||||
CREATE ROLE root LOGIN PASSWORD 'root'; --SECURITY ISSUE
|
||||
GRANT ALL PRIVILEGES ON DATABASE aact_db TO root;
|
||||
END IF;
|
||||
END
|
||||
$do$
|
||||
@ -0,0 +1,18 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
set -x
|
||||
|
||||
|
||||
# This file loads the database dump into the postgres database
|
||||
|
||||
dump_path="/mnt/host_data/postgres_data.dmp"
|
||||
|
||||
#Double check the postgres user and database are set
|
||||
if [[ -z $POSTGRES_USER && -z $POSTGRES_DB ]]; #if either POSTGRES_USER or POSTGRES_DB are empty, throw an error.
|
||||
then
|
||||
echo "Missing either the POSTGRESS_USER or the POSTGRES_DB environment variable"
|
||||
exit 4
|
||||
else
|
||||
#restore the DB
|
||||
pg_restore -e -v -O -x --dbname="$POSTGRES_DB" --username="$POSTGRES_USER" --no-owner "$dump_path"
|
||||
fi
|
||||
@ -0,0 +1,127 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA history;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
CREATE ROLE history_writer;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_writer;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_writer;
|
||||
|
||||
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
|
||||
|
||||
|
||||
--Create role for anyone who only needs selection access to historical data, such as for analysis
|
||||
CREATE ROLE history_reader;
|
||||
GRANT CONNECT ON DATABASE aact_db to history_reader;
|
||||
|
||||
GRANT USAGE ON SCHEMA history TO history_reader;
|
||||
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
|
||||
|
||||
|
||||
/* History Tables
|
||||
Below is where I would construct the parsed trial history tables that I need.
|
||||
|
||||
Possible fields
|
||||
nct_id
|
||||
version
|
||||
--Study Status
|
||||
overall_status^
|
||||
primary_completion_date^
|
||||
completion_date^
|
||||
last_update_submitted_date
|
||||
--SponsorCollaborators
|
||||
sponsor (multi?)
|
||||
collaborators (multi?)
|
||||
--Oversight
|
||||
fda_regulated_drug (ignore)
|
||||
fda_regulated_device (ignore)
|
||||
dmc (ignore)
|
||||
--StuldyDescription
|
||||
summary
|
||||
detailed_description
|
||||
--Conditions
|
||||
Conditions
|
||||
Keywords
|
||||
--StudyDesign
|
||||
Study type
|
||||
Primary Purpose
|
||||
Study Phase
|
||||
Interventional Study Model
|
||||
Number of Arms
|
||||
Masking
|
||||
Allocation
|
||||
Enrollment ^
|
||||
--ArmsAndInterventions
|
||||
Arms (multiple) (Ignore)
|
||||
--ProtocolOutcomeMeasures
|
||||
--Eligibility
|
||||
--ContactsLocation
|
||||
--IPDSharing
|
||||
--References
|
||||
--ParticipantFlow
|
||||
--BaselineCharacteristics
|
||||
--ROutcomeMeasures
|
||||
--AdverseEvents
|
||||
--LimitationsAndCaveats
|
||||
--More Information
|
||||
|
||||
*/
|
||||
|
||||
CREATE TYPE history.updatable_catetories AS ENUM
|
||||
('Actual', 'Anticipated', 'Expected');
|
||||
|
||||
ALTER TYPE history.updatable_catetories
|
||||
OWNER TO root;
|
||||
|
||||
COMMENT ON TYPE history.updatable_catetories
|
||||
IS 'This enum is used to capture the different types of categories that a date or enrollemnt figure may have.';
|
||||
|
||||
|
||||
|
||||
CREATE TYPE history.study_statuses AS ENUM
|
||||
('Available', 'Withdrawn', 'Withheld', 'Temporarily not available', 'Active, not recruiting', 'Recruiting', 'Not yet recruiting', 'Enrolling by invitation', 'Suspended', 'No longer available', 'Approved for marketing', 'Unknown status', 'Completed', 'Terminated');
|
||||
|
||||
ALTER TYPE history.study_statuses
|
||||
OWNER TO root;
|
||||
|
||||
COMMENT ON TYPE history.study_statuses
|
||||
IS 'This enum is used to record study status. These are pulled from the ClinicalTrials.gov documentation.';
|
||||
|
||||
|
||||
-- Table: history.trial_snapshots
|
||||
|
||||
DROP TABLE IF EXISTS history.trial_snapshots;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS history.trial_snapshots
|
||||
(
|
||||
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
|
||||
version integer NOT NULL,
|
||||
submission_date timestamp NOT NULL,
|
||||
primary_completion_date timestamp without time zone,
|
||||
primary_completion_date_category history.updatable_catetories,
|
||||
start_date timestamp without time zone,
|
||||
start_date_category history.updatable_catetories,
|
||||
completion_date timestamp without time zone,
|
||||
completion_date_category history.updatable_catetories,
|
||||
overall_status history.study_statuses,
|
||||
enrollment integer,
|
||||
enrollment_category history.updatable_catetories,
|
||||
sponsor character varying COLLATE pg_catalog."default",
|
||||
responsible_party character varying COLLATE pg_catalog."default",
|
||||
CONSTRAINT trial_snapshots_pkey PRIMARY KEY (nct_id, version)
|
||||
);
|
||||
|
||||
|
||||
ALTER TABLE IF EXISTS history.trial_snapshots
|
||||
OWNER to root;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials
|
||||
AS SELECT bi.nct_id,
|
||||
rp.rxcui,
|
||||
rp.propvalue1
|
||||
FROM ctgov.browse_interventions bi
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||
WHERE rp.propname::text = 'RxNorm Name'::text AND (bi.nct_id::text IN ( SELECT trial_snapshots.nct_id
|
||||
FROM history.trial_snapshots));
|
||||
@ -0,0 +1,40 @@
|
||||
CREATE SCHEMA spl AUTHORIZATION root;
|
||||
|
||||
DROP TABLE IF EXISTS spl.nsde;
|
||||
|
||||
CREATE SEQUENCE IF NOT EXISTS spl.nsde_id_seq
|
||||
INCREMENT 1
|
||||
START 1
|
||||
MINVALUE 1
|
||||
MAXVALUE 9223372036854775807
|
||||
CACHE 1;
|
||||
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNER TO root;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS spl.nsde
|
||||
(
|
||||
id integer NOT NULL DEFAULT nextval('spl.nsde_id_seq'::regclass),
|
||||
package_ndc11 character varying(11) COLLATE pg_catalog."default",
|
||||
application_number_or_citation character varying(25) COLLATE pg_catalog."default",
|
||||
package_ndc character varying(50) COLLATE pg_catalog."default",
|
||||
proprietary_name character varying(500) COLLATE pg_catalog."default",
|
||||
product_type character varying(90) COLLATE pg_catalog."default",
|
||||
marketing_category character varying(160) COLLATE pg_catalog."default",
|
||||
dosage_form character varying(155) COLLATE pg_catalog."default",
|
||||
billing_unit character varying(35) COLLATE pg_catalog."default",
|
||||
marketing_start_date date,
|
||||
marketing_end_date date,
|
||||
inactivation_date date,
|
||||
reactivation_date date,
|
||||
CONSTRAINT nsde_pkey PRIMARY KEY (id)
|
||||
)
|
||||
|
||||
TABLESPACE pg_default;
|
||||
|
||||
ALTER TABLE IF EXISTS spl.nsde
|
||||
OWNER to root;
|
||||
|
||||
-- if the table is dropped, the sequence is as well
|
||||
ALTER SEQUENCE spl.nsde_id_seq
|
||||
OWNED BY spl.nsde.id;
|
||||
@ -0,0 +1,6 @@
|
||||
-- Create a schema handling trial history.
|
||||
CREATE SCHEMA rxnorm_migrated;
|
||||
|
||||
--Create role for anyone who needs to both select and insert on historical data
|
||||
|
||||
GRANT ALL ON ALL TABLES IN SCHEMA rxnorm_migrated TO root;
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,7 @@
|
||||
# Instructions:
|
||||
Go go [RxNavInABox](https://lhncbc.nlm.nih.gov/RxNav/applications/RxNav-in-a-Box.html) and download the most recent version.
|
||||
|
||||
I have included the version I use.
|
||||
|
||||
Then unzip and run docker-compose.yaml
|
||||
|
||||
@ -0,0 +1,48 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
external: true
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
restart: "no"
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
container_name: aact_db
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
VERSION: podman
|
||||
ports:
|
||||
- "5432:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./AACT_downloader/postgresql/:/var/lib/postgresql/
|
||||
# this is the database dump to restore from
|
||||
- ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
shm_size: 512mb
|
||||
|
||||
|
||||
rxnav-db:
|
||||
image: mariadb:10.4
|
||||
restart: "no"
|
||||
ports:
|
||||
- "3306:3306"
|
||||
volumes:
|
||||
- ./RxNav-In-a-box/rxnav-in-a-box-20230103/mysql:/docker-entrypoint-initdb.d:ro
|
||||
- ./RxNav-In-a-box/rxnav_data:/var/lib/mysql
|
||||
environment:
|
||||
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
|
||||
MYSQL_USER: ${MYSQL_USER}
|
||||
MYSQL_PASSWORD: ${MYSQL_PASSWORD}
|
||||
VERSION: podman
|
||||
env_file:
|
||||
.env
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
@ -0,0 +1,23 @@
|
||||
version: '3'
|
||||
|
||||
networks:
|
||||
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||
|
||||
services:
|
||||
aact_db:
|
||||
image: postgres:14-alpine
|
||||
networks:
|
||||
- pharmaceutical_research
|
||||
container_name: DrugCentral
|
||||
#restart: always #restart after crashes
|
||||
environment:
|
||||
POSTGRES_PASSWORD: root
|
||||
ports:
|
||||
- "54320:5432" #host:container
|
||||
volumes: #host:container is the format.
|
||||
# this is persistant storage for the database
|
||||
- ./db_store/:/var/lib/postgresql/
|
||||
# this is the folder containing entrypoint info.
|
||||
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||
|
||||
|
||||
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
filename="drugcentral.dump.08222022.sql.gz"
|
||||
|
||||
cd ./docker-entrypoint-initdb.d/
|
||||
|
||||
curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename"
|
||||
|
||||
gzip -d $filename
|
||||
@ -1 +0,0 @@
|
||||
<mxfile host="Electron" modified="2022-09-19T21:58:15.288Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/16.5.1 Chrome/96.0.4664.110 Electron/16.0.7 Safari/537.36" etag="K1oYB1ahwdBUMmjzqr-S" version="16.5.1" type="device"><diagram id="-7mtYT5q5bNZQN0eJ9dG" name="Page-1">7Vxtb6M4EP41kfY+JOI1Lx/z0t5VykZVu7fX/XRywBBvASPjbJP99TcmJoSatLQJobeLVDV4MI49z3jm8dikY07DzZ8MxavP1MVBx9DcTcecdQxDH9kGfAjJdicZGvZO4DPiykq54J78xFKoSemauDgpVOSUBpzERaFDowg7vCBDjNGnYjWPBsVvjZGPFcG9gwJV+g9x+UqOwtZy+V+Y+Kvsm3VN3glRVlkKkhVy6dOByLzqmFNGKd9dhZspDoTyMr3snrs+cnffMYYjXuUBdGMFnuZynkxCf+V9vXPQ9661a+UHCtZywNOARATG/4URFCQ9n/6Q3efbTCfJEwkDFEFp4tGI38s7GpSdFQncOdrStehTwpHzmJUmK8rIT6iPArilgwBuMy4hN7VCjXvxpGyT4QTq3GYD1Z+JPqNNoeIcJTzrDQ0CFCdkmfZPPBgi5pNoQjmnoawUoCUOJvB1PqPryJ3SgDK4FdHdAEkQZKKOYboIDz0n7Tujj/jgTt8Z4qUHd6RGMeN4cxQqfW8AMHMwDTFnW6iSPSBNRs4ZPbO2p9wCdUvKVofWl1VE0ur9fdO5YcCFtI032Imt2Mli+uVmppgGjJkr2ilRpRShgPgRFAPsiceE0oTtjaU4JK4rWp4kMXJI5M/TajMrl9zJwQsRhce9IJ1iK3gQQwuTmJKIp8qwJ/AH6plqPbtjQ1+nUNbzMvyJ6oxPaQTdRySFCoMxPWFhUBNGOeJouTf1Sjgfn3Qq+BJtsyLYZl1Y9xWsbyIXYOGERtUAFxOlv+zbfXUCeZ5nOE4LfSn0/aahHyjQOzSMhVtMWuTrRH7UNPKWqUB/FTGIn6EYlqHdRxBIV5RXt4Pl0LZsrcQOhg5u7eCIHehG04YwVOzgnrO1w9cMuyC/ZdSFElzNBW9SzaElhzWTQ9soskOzeXY4Um3mqmWH73QQw4/NDrNxHC4FZmNoapx+aC3sdcDeODPMlqSHmQJJDVu8z49343xQV6f54SpQ64pkGsPYoywUOTkBeWsHZ7eD5vmgbiiGMCMJqAKDcLJmQrUtB7wwB+wOihzQrsoBR7WZibp+nI3n39LM/FrMOuOaIY7b5eOJ7mI3HT8uPdTVHQXyxuxhGyHeBHnz1FBNGC9m0+797bw7AxV0r+ZYJJGSnpO0e0mNpwvKQoWhldjLfhfq/AajpplvOBY0ckpd3DqJdzmJ/lvjQino9TkJNa8ITgIE2UKjBfysgJdEhcsCbqnrhs+IPWIOChczHcigT+X3tOCfF/yS7MGFwVezB+M4DvL0wWIdLjGDi0958vCP1hZqsIWyDMKFjUFNHR56glm7LKzfCKqSvvriv7pHdMtozAgG4i56ukBhS/1qwX7YNPaGSgXuNos0cdyuBi+7GrS04mqwar6otryhoeYNYcY4OBZHDMReovbp7mH6982sZQfvcg7GB88XGiUnkPf7ilrUBoV6cG88aWioOaBdTPg3RHEMWk96vGRLsQ0QNQeI/rMAoVf1EPt1xvlNRU0dPQ8RbOOsiduGiPe5isEHDxElx8swb88Z1YJ244Eha1jdHDBbwGsAvPGTRqa6AYBdH2ehHXS0oj6NUHCVSydpvMSuVE5eZ05pLOPtd8z5VoZ4tOZUBHkeZgQAdMa2D4eFb6IxQEcWZ1lo35W2BRxEB18OsDAeumYOrhDXgIr4+EUEzXIEGQ4QJz+KPTk/Pkaj+GjN4TOqiM+oUXjU3XfwKR5hoTivXwJcem6/qOzMMTqgI8xeco0pzz1wS0WPN3vJUck3dOXDnf17sa86MPPIUkY2r/W0wcAqktZdqTIAsu1bMZiDKtTzEswVhPZdOAE0NZb9HnOqX3FK6Udek3kbpGPG0PaggrRWFfHsWJ3+bPVjyLfVc/h3TZ7XGEqOQwgdrUj0mH5iJs5aovR/iJOVEGIWJuKTwr8gq0iS3i8y5Y9EvAwnrWcNsj3GbJly2pzfW4BZ9CRZ+QI+Yfib+oRBVRp0qk84Lc6qq9Dqs3SFhI2FKZv+JefrkVPs+xBt6HZhXnU/fIi21MT04u66O/vSMSCImbpQ7QKWf6l2RJeoJ8azicS+lnmt4NpmLuv+0YxRMXbburqyLd337Ne1srVUai5AlabiIi5chXiRxmU0jtPXa5diQF/H6cyIA+TkwkR0CpyNjwPx6sXeBhVDa5MiVd7CfvNvcFz4zMzgNyUDGat7nQ0c8QaXYQNWw2StZwz7hxjpryCUlm4xIzB8QR92wg3hDwfXB2BDKW9JFBqFulHiZ6nEryy+/y95m/Uab9NG2qhI3E5cZ51C3MQ7ZPufS9tVz390zrz6Dw==</diagram></mxfile>
|
||||
@ -0,0 +1,44 @@
|
||||
from flask import Flask
|
||||
import os
|
||||
from dotenv import dotenv_values
|
||||
|
||||
|
||||
|
||||
env_path = "../../containers/.env"
|
||||
ENV = dotenv_values(env_path)
|
||||
|
||||
def create_app(test_config=None):
|
||||
# create and configure the app
|
||||
app = Flask(__name__, instance_relative_config=True)
|
||||
app.config.from_mapping(
|
||||
SECRET_KEY='6e674d6e41b733270fd01c6257b3a1b4769eb80f3f773cd0fe8eff25f350fc1f',
|
||||
POSTGRES_DB=ENV["POSTGRES_DB"],
|
||||
POSTGRES_USER=ENV["POSTGRES_USER"],
|
||||
POSTGRES_HOST=ENV["POSTGRES_HOST"],
|
||||
POSTGRES_PORT=ENV["POSTGRES_PORT"],
|
||||
POSTGRES_PASSWORD=ENV["POSTGRES_PASSWORD"],
|
||||
)
|
||||
|
||||
|
||||
|
||||
# ensure the instance folder exists
|
||||
try:
|
||||
os.makedirs(app.instance_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# a simple page that says hello
|
||||
@app.route('/')
|
||||
def hello():
|
||||
return 'Hello, World!'
|
||||
|
||||
|
||||
from . import db_interface
|
||||
db_interface.init_database(app)
|
||||
|
||||
from . import validation
|
||||
app.register_blueprint(validation.bp)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@ -0,0 +1,175 @@
|
||||
import psycopg2 as psyco
|
||||
from psycopg2 import extras
|
||||
from datetime import datetime
|
||||
|
||||
import click #used for cli commands. Not needed for what I am doing.
|
||||
from flask import current_app, g
|
||||
|
||||
def get_db(**kwargs):
|
||||
|
||||
if "db" not in g:
|
||||
g.db = psyco.connect(
|
||||
dbname=current_app.config["POSTGRES_DB"]
|
||||
,user=current_app.config["POSTGRES_USER"]
|
||||
,host=current_app.config["POSTGRES_HOST"]
|
||||
,port=current_app.config["POSTGRES_PORT"]
|
||||
,password=current_app.config["POSTGRES_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
return g.db
|
||||
|
||||
def close_db(e=None):
|
||||
db = g.pop('db', None)
|
||||
|
||||
if db is not None:
|
||||
db.close()
|
||||
|
||||
def check_initialization(app):
|
||||
db = get_db()
|
||||
with db.cursor() as curse:
|
||||
curse.execute("select count(*) from \"DiseaseBurden\".trial_to_icd10")
|
||||
curse.fetchall()
|
||||
#just checking if everything is going to fail
|
||||
|
||||
def init_database(app):
|
||||
#check_initialization(app)
|
||||
app.teardown_appcontext(close_db)
|
||||
|
||||
|
||||
|
||||
|
||||
def select_remaing_trials_to_analyze(db_conn):
|
||||
'''
|
||||
This will get the set of trials that need to be analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved is null
|
||||
order by nct_id
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def select_analyzed_trials(db_conn):
|
||||
'''
|
||||
This will get the set of trials that have been analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id, max(approval_timestamp)
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved in ('accepted','rejected')
|
||||
group by nct_id
|
||||
order by max(approval_timestamp) desc
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
def select_unmatched_trials(db_conn):
|
||||
'''
|
||||
This will get the set of trials that have been analyzed.
|
||||
'''
|
||||
sql = '''
|
||||
select distinct nct_id
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where tti.approved = 'unmatched'
|
||||
order by nct_id
|
||||
;
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def get_trial_conditions_and_proposed_matches(db_conn, nct_id):
|
||||
sql = '''
|
||||
select *
|
||||
from "DiseaseBurden".trial_to_icd10 tti
|
||||
where nct_id = %s
|
||||
'''
|
||||
with db_conn.cursor() as cursor:
|
||||
cursor.execute(sql,[nct_id])
|
||||
return cursor.fetchall()
|
||||
|
||||
|
||||
def store_validation(db_conn, list_of_insert_data):
|
||||
sql = """
|
||||
update "DiseaseBurden".trial_to_icd10
|
||||
set approved=%s, approval_timestamp=%s
|
||||
where id=%s
|
||||
;
|
||||
"""
|
||||
with db_conn.cursor() as cursor:
|
||||
for l in list_of_insert_data:
|
||||
cursor.execute(sql, l)
|
||||
db_conn.commit()
|
||||
|
||||
def get_trial_summary(db_conn,nct_id):
|
||||
sql_summary ="""
|
||||
select
|
||||
s.nct_id,
|
||||
brief_title ,
|
||||
official_title ,
|
||||
bs.description as brief_description,
|
||||
dd.description as detailed_description
|
||||
from ctgov.studies s
|
||||
left join ctgov.brief_summaries bs
|
||||
on bs.nct_id = s.nct_id
|
||||
left join ctgov.detailed_descriptions dd
|
||||
on dd.nct_id = s.nct_id
|
||||
where s.nct_id = %s
|
||||
;
|
||||
"""
|
||||
sql_conditions="""
|
||||
--conditions mentioned
|
||||
select * from ctgov.conditions c
|
||||
where c.nct_id = %s
|
||||
;
|
||||
"""
|
||||
sql_keywords="""
|
||||
select nct_id ,downcase_name
|
||||
from ctgov.keywords k
|
||||
where k.nct_id = %s
|
||||
;
|
||||
"""
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql_summary,[nct_id])
|
||||
summary = curse.fetchall()
|
||||
|
||||
curse.execute(sql_keywords,[nct_id])
|
||||
keywords = curse.fetchall()
|
||||
|
||||
curse.execute(sql_conditions,[nct_id])
|
||||
conditions = curse.fetchall()
|
||||
|
||||
return {"summary":summary, "keywords":keywords, "conditions":conditions}
|
||||
|
||||
def get_list_icd10_codes(db_conn):
|
||||
sql = """
|
||||
select distinct code
|
||||
from "DiseaseBurden".icd10_to_cause itc
|
||||
order by code;
|
||||
"""
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql)
|
||||
codes = curse.fetchall()
|
||||
|
||||
return [ x[0] for x in codes ]
|
||||
|
||||
def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
|
||||
sql1 = """
|
||||
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||
(nct_id,"condition",ui,"source",approved,approval_timestamp)
|
||||
VALUES (%s,%s,%s,'hand matched','accepted',%s)
|
||||
;
|
||||
"""
|
||||
|
||||
|
||||
with db_conn.cursor() as curse:
|
||||
curse.execute(sql1,[nct_id,condition,icd10_code,datetime.now()])
|
||||
db_conn.commit()
|
||||
@ -0,0 +1 @@
|
||||
#at some point I need to add a login or something.
|
||||
@ -0,0 +1,25 @@
|
||||
<!doctype html>
|
||||
<title>{% block title %}{% endblock %} - ClinicalTrialsProject</title>
|
||||
<!--<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">-->
|
||||
|
||||
<nav>
|
||||
<h1>Nav</h1>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="{{ url_for('validation.remaining') }}">Validation Home</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://icd.who.int/browse10/2019/en">WHO ICD-10 Codes (2019)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://uts.nlm.nih.gov/uts/umls/home">UMLS Metathesaurs browser (requires login)</a>
|
||||
</li>
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
<section class="content">
|
||||
<header>
|
||||
{% block header %}{% endblock %}
|
||||
</header>
|
||||
{% block content %}{% endblock %}
|
||||
</section>
|
||||
@ -0,0 +1,49 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1>{% block title %} ICD-10 to Trial Conditions Validation {% endblock %}</h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<h2>Trials to Validate</h2>
|
||||
|
||||
<table>
|
||||
<th>Trials</th>
|
||||
{% for trial in list_to_validate %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
<h2>Trials that have been Validated</h2>
|
||||
|
||||
<table>
|
||||
<th>Trials Links</th>
|
||||
{% for trial in validated_list %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
(Most recently updated {{trial[1]}})
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
<h2>Trials that don't have a good match</h2>
|
||||
|
||||
<table>
|
||||
<th>Trial Links</th>
|
||||
{% for trial in unmatched_list %}
|
||||
<tr><td>
|
||||
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||
{{ trial [0] }}
|
||||
</a>
|
||||
</td></tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,95 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block header %}
|
||||
<h1> ICD-10 to Trial Conditions Validation: {{ nct_id }} </h1>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
|
||||
<section class="summary">
|
||||
<h3>Trial Summary</h3>
|
||||
|
||||
<div class="text_summary">
|
||||
<ul>
|
||||
<li>NCT: {{ summary_dats["summary"][0][0] }}</li>
|
||||
<li>Brief Title: {{ summary_dats["summary"][0][1] }}</li>
|
||||
<li>Long Title: {{ summary_dats["summary"][0][2] }}</li>
|
||||
<li>Brief Description: {{ summary_dats["summary"][0][3] }}</li>
|
||||
<li>Long Description: {{ summary_dats["summary"][0][4] }}</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="keywords">
|
||||
<h4>Keywords</h4>
|
||||
<ul>
|
||||
{% for keyword in summary_dats["keywords"] %}
|
||||
<li>
|
||||
{{ keyword[1] }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
<div class="conditions">
|
||||
<h4>Raw Conditions </h4>
|
||||
<ul>
|
||||
{% for condition in summary_dats["conditions"] %}
|
||||
<li>
|
||||
{{ condition[3] }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="proposed_conditions">
|
||||
<h3>Proposed Conditions</h3>
|
||||
<form method="post">
|
||||
<table>
|
||||
<tr>
|
||||
<th>Approve</th>
|
||||
<th>Condition (MeSH normalized)</th>
|
||||
<th>Identifier</th>
|
||||
<th>Source</th>
|
||||
<th>Description</th>
|
||||
<th>Source</th>
|
||||
</tr>
|
||||
{% for condition in condition_list %}
|
||||
|
||||
<tr>
|
||||
<td> <input type="checkbox" id="{{ condition[0] }}" name="{{condition[0]}}" value="accepted" {% if condition[8] == "accepted" %}checked{% endif %}> </td>
|
||||
<td> {{condition[2]}} </td>
|
||||
<td> {{condition[3]}} </td>
|
||||
<td> {{condition[5]}} </td>
|
||||
<td> {{condition[6]}} </td>
|
||||
<td> {{condition[7]}} </td>
|
||||
</tr>
|
||||
|
||||
{% endfor %}
|
||||
</table>
|
||||
<input type="submit" name="submission" value="Submit approvals">
|
||||
<br/>
|
||||
<input type="submit" name="marked_unmatched" value="Mark unmmatched">
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="submit_alternate">
|
||||
<h3>Submit Alternate Conditions</h3>
|
||||
<!--For each listed condition, provide a spot to enter a ICT10 code-->
|
||||
<form method="post">
|
||||
<label for="alternate_sub">Please enter the proposed code that appears to be the best match:</label>
|
||||
<input name="alt_sub" id="alternate_sub">
|
||||
<br/>
|
||||
<label for="condition">
|
||||
Please give a name to the condition you used to match this<br/>
|
||||
Condition:
|
||||
</label>
|
||||
<input name="condition", id="condition">
|
||||
<br/>
|
||||
<input type="submit" name="alternate_submission" value="Submit alternate ICD-10 code">
|
||||
</form>
|
||||
</section>
|
||||
|
||||
<section class="approved">
|
||||
<!--TODO:This will list the already approved values-->
|
||||
</section>
|
||||
|
||||
{% endblock %}
|
||||
@ -0,0 +1,98 @@
|
||||
import functools
|
||||
from flask import (Blueprint, flash, g, redirect, render_template, request, session, url_for)
|
||||
from Icd10ConditionsMatching.db_interface import (
|
||||
get_db,select_remaing_trials_to_analyze,
|
||||
select_analyzed_trials,
|
||||
select_unmatched_trials,
|
||||
get_trial_conditions_and_proposed_matches,
|
||||
store_validation,
|
||||
get_trial_summary,
|
||||
get_list_icd10_codes,
|
||||
record_suggested_matches,
|
||||
)
|
||||
from datetime import datetime
|
||||
|
||||
#### First Blueprint: Checking Data
|
||||
bp = Blueprint("validation", __name__, url_prefix="/validation")
|
||||
|
||||
|
||||
|
||||
@bp.route("/",methods=["GET"])
|
||||
def remaining():
|
||||
db_conn = get_db()
|
||||
|
||||
|
||||
to_validate = select_remaing_trials_to_analyze(db_conn)
|
||||
validated = select_analyzed_trials(db_conn)
|
||||
unmatched_list = select_unmatched_trials(db_conn)
|
||||
|
||||
|
||||
return render_template(
|
||||
"validation_index.html",
|
||||
list_to_validate=to_validate,
|
||||
validated_list = validated,
|
||||
unmatched_list = unmatched_list
|
||||
)
|
||||
|
||||
|
||||
@bp.route("/<nct_id>", methods=["GET","POST"])
|
||||
def validate_trial(nct_id):
|
||||
|
||||
if request.method == "GET":
|
||||
db_conn = get_db()
|
||||
|
||||
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||
summary_dats = get_trial_summary(db_conn, nct_id)
|
||||
|
||||
return render_template(
|
||||
"validation_of_trial.html",
|
||||
nct_id=nct_id,
|
||||
condition_list=condition_list,
|
||||
summary_dats=summary_dats,
|
||||
)
|
||||
elif request.method == "POST":
|
||||
db_conn = get_db()
|
||||
|
||||
list_of_insert_data = []
|
||||
|
||||
db_conn = get_db()
|
||||
|
||||
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||
|
||||
print(request.form)
|
||||
|
||||
if "submission" in request.form:
|
||||
#if it is a submission:
|
||||
#grab all match ids from db
|
||||
#if match id in submitted form, mark as approved, otherwise mark as rejected
|
||||
for condition in condition_list:
|
||||
id = condition[0]
|
||||
list_of_insert_data.append((request.form.get(str(id),"rejected"), datetime.now(),id))
|
||||
|
||||
store_validation(db_conn, list_of_insert_data)
|
||||
return redirect(url_for("validation.remaining"))
|
||||
elif "marked_unmatched" in request.form:
|
||||
#if this was marked as "unmatched", store that for each entry.
|
||||
for condition in condition_list:
|
||||
id = condition[0]
|
||||
list_of_insert_data.append(( "unmatched", datetime.now(), id))
|
||||
|
||||
store_validation(db_conn, list_of_insert_data)
|
||||
return redirect(url_for("validation.remaining"))
|
||||
elif "alternate_submission" in request.form:
|
||||
code = request.form["alt_sub"]
|
||||
code = code.strip().replace(".",'').ljust(7,"-")
|
||||
|
||||
condition = request.form["condition"].strip()
|
||||
|
||||
codelist = get_list_icd10_codes(db_conn)
|
||||
if code in codelist:
|
||||
record_suggested_matches(db_conn, nct_id, condition, code)
|
||||
return redirect(request.path)
|
||||
else:
|
||||
record_suggested_matches(db_conn, nct_id, condition + "| Code not in GBD list", code)
|
||||
return """
|
||||
Entered `{}`, which is not in the list of available ICD-10 codes. <a href={}>Return to trial summary</a>
|
||||
""".format(code.strip("-"),request.path), 422
|
||||
|
||||
|
||||
@ -0,0 +1,13 @@
|
||||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='Icd10ConditionsMatching',
|
||||
packages=['Icd10ConditionsMatching'],
|
||||
include_package_data=True,
|
||||
install_requires=[
|
||||
'flask',
|
||||
'psycopg2',
|
||||
'datetime',
|
||||
'python-dotenv',
|
||||
],
|
||||
)
|
||||
@ -0,0 +1 @@
|
||||
waitress-serve --port=5000 --call 'Icd10ConditionsMatching:create_app'
|
||||
@ -0,0 +1,11 @@
|
||||
from drugtools.env_setup import postgres_conn, mariadb_conn, ENV
|
||||
|
||||
print(ENV)
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor() as curse:
|
||||
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
||||
print(curse.fetchall())
|
||||
|
||||
with mariadb_conn() as mconn, mconn.cursor() as mcurse:
|
||||
mcurse.execute("select * FROM ALLNDC_HISTORY LIMIT 10;")
|
||||
print(mcurse.fetchall())
|
||||
@ -0,0 +1,96 @@
|
||||
import json
|
||||
from psycopg2.extras import execute_values
|
||||
import datetime as dt
|
||||
from drugtools.env_setup import postgres_conn, ENV
|
||||
import requests
|
||||
import zipfile
|
||||
import io
|
||||
|
||||
URL_STEM = 'https://download.open.fda.gov/other/nsde/'
|
||||
NUMBER_OF_NSDE_FILES = int(ENV["NUMBER_OF_NSDE_FILES"])
|
||||
|
||||
def filename_generator(max_num):
|
||||
for itt in range(1,max_num+1):
|
||||
filename = "other-nsde-{:0>4}-of-{:0>4}.json.zip".format(itt,max_num)
|
||||
yield filename
|
||||
|
||||
def get_date(result,key):
|
||||
r = result.get(key)
|
||||
if r:
|
||||
return dt.datetime.strptime(r, "%Y%m%d")
|
||||
else:
|
||||
return None
|
||||
|
||||
def build_values(result):
|
||||
#adjust types
|
||||
proprietary_name = result.get("proprietary_name")
|
||||
application_number_or_citation = result.get("application_number_or_citation")
|
||||
product_type = result.get("product_type")
|
||||
package_ndc = result.get("package_ndc")
|
||||
marketing_category = result.get("marketing_category")
|
||||
package_ndc11 = result.get("package_ndc11")
|
||||
dosage_form = result.get("dosage_form")
|
||||
billing_unit = result.get("billing_unit")
|
||||
marketing_start_date = get_date(result,"marketing_start_date")
|
||||
marketing_end_date = get_date(result, "marketing_end_date")
|
||||
inactivation_date = get_date(result, "inactivation_date")
|
||||
reactivation_date = get_date(result,"reactivation_date")
|
||||
|
||||
return (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_start_date
|
||||
,marketing_end_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
|
||||
def download_and_extract_zip(base_url,filename):
|
||||
response = requests.get(base_url + filename)
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(response.content)) as the_zip:
|
||||
contents_list = the_zip.infolist()
|
||||
for content_name in contents_list:
|
||||
return the_zip.read(content_name)
|
||||
|
||||
def run():
|
||||
for filename in filename_generator(NUMBER_OF_NSDE_FILES):
|
||||
#It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
|
||||
with (postgres_conn() as con , con.cursor() as curse):
|
||||
print(filename)
|
||||
|
||||
j = download_and_extract_zip(URL_STEM, filename)
|
||||
results = json.loads(j)["results"]
|
||||
query = """
|
||||
INSERT INTO spl.nsde (
|
||||
proprietary_name
|
||||
,application_number_or_citation
|
||||
,product_type
|
||||
,package_ndc
|
||||
,marketing_category
|
||||
,package_ndc11
|
||||
,dosage_form
|
||||
,billing_unit
|
||||
,marketing_start_date
|
||||
,marketing_end_date
|
||||
,inactivation_date
|
||||
,reactivation_date
|
||||
)
|
||||
VALUES %s;
|
||||
"""
|
||||
|
||||
values = [build_values(y) for y in results]
|
||||
execute_values(curse,query,values)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,43 @@
|
||||
import pymysql
|
||||
import psycopg2 as psyco
|
||||
from psycopg2.sql import SQL
|
||||
from dotenv import dotenv_values
|
||||
|
||||
env_path = "../containers/.env"
|
||||
ENV = dotenv_values(env_path)
|
||||
|
||||
def mariadb_conn(**kwargs):
|
||||
return pymysql.connect(
|
||||
database=ENV["MYSQL_DB"]
|
||||
,user=ENV["MYSQL_USER"]
|
||||
,host=ENV["MYSQL_HOST"]
|
||||
,port=int(ENV["MYSQL_PORT"])
|
||||
,password=ENV["MYSQL_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
def postgres_conn(**kwargs):
|
||||
return psyco.connect(
|
||||
dbname=ENV["POSTGRES_DB"]
|
||||
,user=ENV["POSTGRES_USER"]
|
||||
,host=ENV["POSTGRES_HOST"]
|
||||
,port=ENV["POSTGRES_PORT"]
|
||||
,password=ENV["POSTGRES_PASSWORD"]
|
||||
,**kwargs
|
||||
)
|
||||
|
||||
|
||||
def get_tables_of_interest():
|
||||
return ENV["TABLES_OF_INTEREST"].split(",")
|
||||
|
||||
def postgres_table_delete_entries(schema,table):
|
||||
with postgres_conn() as con:
|
||||
with con.cursor() as curse:
|
||||
delete_statement = SQL("delete from {schema}.{table}").format(
|
||||
schema=Identifier(schema),
|
||||
talbe=Identifier(table)
|
||||
)
|
||||
curse.execute(delete_statement)
|
||||
con.commit()
|
||||
|
||||
|
||||
@ -0,0 +1,465 @@
|
||||
from collections import namedtuple
|
||||
from copy import copy
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from tqdm import tqdm
|
||||
#requires Python 3.10
|
||||
|
||||
#### GLOBALS
|
||||
VERBOSE = True if ENV["VERBOSE"] == "True" else False
|
||||
|
||||
###CLASSES AND CONSTRUCTORS
|
||||
|
||||
TagDatePair = namedtuple("TagDatePair", ["tag","date"])
|
||||
TagTextPair = namedtuple("TagTextPair", ["tag","text"])
|
||||
|
||||
#superclasses
|
||||
class VersionData():
|
||||
"""
|
||||
This class holds two types of data:
|
||||
- Data with a 1-to-1 relationship with the trial/version pair.
|
||||
- Data with a child relationship with the trial/version pair.
|
||||
|
||||
This initializes with None attributes, and implements setter
|
||||
methods to load them (just to double check types)
|
||||
That way I can just pass around the VersionData instance
|
||||
and add data as I go.
|
||||
|
||||
It will also implement the ability to load the data to the database
|
||||
"""
|
||||
def __init__(self,nct_id,version_id,submission_date):
|
||||
#identifiers
|
||||
self.nct_id = nct_id.strip()
|
||||
self.version_id = version_id
|
||||
self.submission_date = submission_date
|
||||
|
||||
#Study Status
|
||||
self._primary_completion_date = None
|
||||
self._primary_completion_date_category = None
|
||||
self._start_date = None
|
||||
self._start_date_category = None
|
||||
self._completion_date = None
|
||||
self._completion_date_category = None
|
||||
self._overall_status = None
|
||||
self._enrollment = None
|
||||
self._enrollment_category = None
|
||||
self._sponsor = None
|
||||
#self._sponsor_category = None #I don't believe this is included in the raw data
|
||||
self._responsible_party = None
|
||||
#self._responsible_party_category = None #I don't believe this is included in the raw data
|
||||
#self._collaborators = None #currently going to ignore as I've not fount it in AACT
|
||||
|
||||
def load_to_db(self,db_connection):
|
||||
#load to initial table, then load any extra details into other tables
|
||||
sql = """
|
||||
INSERT INTO history.trial_snapshots
|
||||
(
|
||||
nct_id,
|
||||
version,
|
||||
submission_date,
|
||||
primary_completion_date,
|
||||
primary_completion_date_category,
|
||||
start_date,
|
||||
start_date_category,
|
||||
completion_date,
|
||||
completion_date_category,
|
||||
overall_status,
|
||||
enrollment,
|
||||
enrollment_category,
|
||||
sponsor,
|
||||
responsible_party
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s,
|
||||
%s
|
||||
)
|
||||
"""
|
||||
|
||||
with db_connection.cursor() as db_cursor:
|
||||
try:
|
||||
db_cursor.execute(
|
||||
sql,
|
||||
(
|
||||
self.nct_id,
|
||||
self.version_id,
|
||||
self.submission_date,
|
||||
self._primary_completion_date,
|
||||
self._primary_completion_date_category,
|
||||
self._start_date,
|
||||
self._start_date_category,
|
||||
self._completion_date,
|
||||
self._completion_date_category,
|
||||
self._overall_status,
|
||||
self._enrollment,
|
||||
self._enrollment_category,
|
||||
self._sponsor,
|
||||
self._responsible_party
|
||||
)
|
||||
)
|
||||
except Exception as err:
|
||||
#catch any error, print the applicable information, and raise the error.
|
||||
print(self)
|
||||
raise err
|
||||
|
||||
db_connection.commit()
|
||||
|
||||
############ Functions
|
||||
def extract_submission_dates(soup):
|
||||
"""
|
||||
Extract dates for each version
|
||||
"""
|
||||
table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")
|
||||
|
||||
version_date_dict = {}
|
||||
|
||||
for row in table_rows:
|
||||
# if it is <td headers="VersionNumber">xx</td> then it contains what we need.
|
||||
version_number = None
|
||||
version_date = None
|
||||
for td in row.findChildren("td"):
|
||||
if ("headers" in td.attrs):
|
||||
if (td.attrs["headers"][0]=="VersionNumber"):
|
||||
version_number = int(td.text)
|
||||
elif (td.attrs["headers"][0]=="VersionDate"):
|
||||
version_date = datetime.strptime(td.text.strip() , "%B %d, %Y")
|
||||
|
||||
version_date_dict[version_number] = version_date
|
||||
return version_date_dict
|
||||
|
||||
def optional_strip(possible_string):
|
||||
if type(possible_string) == str:
|
||||
return possible_string.strip()
|
||||
else:
|
||||
return possible_string
|
||||
|
||||
def extract_study_statuses(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
|
||||
match tr_to_td(trow):
|
||||
case ["Primary Completion:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._primary_completion_date = tagdate1.date
|
||||
version_a._primary_completion_date_category = optional_strip(tagdate1.tag)
|
||||
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._primary_completion_date = tagdate2.date
|
||||
version_b._primary_completion_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Study Start:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._start_date = tagdate1.date
|
||||
version_a._start_date_category = optional_strip(tagdate1.tag)
|
||||
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._start_date = tagdate2.date
|
||||
version_b._start_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Study Completion:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
tagdate1 = extract_date_and_tag(old.text)
|
||||
version_a._completion_date = tagdate1.date
|
||||
version_a._completion_date_category = optional_strip(tagdate1.tag)
|
||||
tagdate2 = extract_date_and_tag(new.text)
|
||||
version_b._completion_date = tagdate2.date
|
||||
version_b._completion_date_category = optional_strip(tagdate2.tag)
|
||||
|
||||
case ["Overall Status:" as row_label, old,new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
#split out any notes such as "Suspended [reason for suspenstion ]"
|
||||
version_a._overall_status = optional_strip(old.text.split("[")[0])
|
||||
#split out any notes such as "Suspended [reason for suspenstion ]"
|
||||
version_b._overall_status = optional_strip(new.text.split("[")[0])
|
||||
#FIX: There is an issue with NCT00789633 where the overall status includes information as to why it was suspended.
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
|
||||
def extract_study_design(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
match tr_to_td(trow):
|
||||
case ["Enrollment:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
#Extract tag and text, add them to preallocated object
|
||||
tagtext1 = extract_text_and_tag(old.text)
|
||||
version_a._enrollment = tagtext1.text
|
||||
version_a._enrollment_category = optional_strip(tagtext1.tag)
|
||||
|
||||
tagtext2 = extract_text_and_tag(new.text)
|
||||
version_b._enrollment = tagtext2.text
|
||||
version_b._enrollment_category = optional_strip(tagtext2.tag)
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
def extract_sponsor_data(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two
|
||||
StudyStatusData objects,
|
||||
|
||||
"""
|
||||
#get rows
|
||||
rows = study_status_form.table.tbody.find_all("tr")
|
||||
#iterate through rows,
|
||||
for trow in rows:
|
||||
#matching on rowLabels
|
||||
match tr_to_td(trow):
|
||||
case ["Sponsor:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
version_a._sponsor = optional_strip(old.text)
|
||||
version_b._sponsor = optional_strip(new.text)
|
||||
|
||||
case ["Responsible Party:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
version_a._responsible_party = optional_strip(old.text)
|
||||
version_b._responsible_party = optional_strip(new.text)
|
||||
|
||||
case ["Collaborators:" as row_label, old, new]:
|
||||
print("row matched: {}".format(row_label)) if VERBOSE else ""
|
||||
#TODO: find a trial with multiple collaborators and figure out how to identify/count them:w
|
||||
# So far can't figure out where this is in AACT, so I'm going to ignore it.
|
||||
pass
|
||||
|
||||
case _ as row_label:
|
||||
print("row not matched: {}".format(row_label)) if VERBOSE else ""
|
||||
|
||||
|
||||
def split_by_version(tag):
|
||||
'''
|
||||
OUTDATED: With the new format that separates old and new versions, I don't technically need this. It is a nice place to identify exact changes if those are every needed though and it removes the highlights cleanly.
|
||||
'''
|
||||
#clone elements and remove sub-tags that are not needed.
|
||||
old = copy(tag)
|
||||
for span in old.find_all(class_="add_hilite"):
|
||||
span.extract()
|
||||
|
||||
new = copy(tag)
|
||||
for span in new.find_all(class_="drop_hilite"):
|
||||
span.extract()
|
||||
return old,new
|
||||
|
||||
|
||||
def extract_date_and_tag(text):
|
||||
"""
|
||||
Extracts a datetype according to the date format
|
||||
and the estimate tag based on
|
||||
|
||||
"""
|
||||
|
||||
text = text.strip()
|
||||
|
||||
#handle various empty cases
|
||||
if not text or text == '':
|
||||
return TagDatePair(None, None)
|
||||
|
||||
date_split = text.split("[")
|
||||
if len(date_split) > 1:
|
||||
estimate_tag = date_split[1].split("]")[0].strip()
|
||||
else:
|
||||
estimate_tag = None
|
||||
|
||||
try:
|
||||
date_object = datetime.strptime(date_split[0].strip(), date_MMMM_YYYY)
|
||||
except ValueError as ve:
|
||||
date_object = datetime.strptime(date_split[0].strip(), date_MMMM_DD_YYYY)
|
||||
|
||||
return TagDatePair(estimate_tag, date_object)
|
||||
|
||||
|
||||
def extract_text_and_tag(text):
|
||||
"""
|
||||
Extracts a datetype according to the date format
|
||||
and the estimate tag based on
|
||||
|
||||
"""
|
||||
text = text.strip()
|
||||
|
||||
#handle various empty cases
|
||||
if not text or text == '':
|
||||
return TagTextPair(None, None)
|
||||
|
||||
date_split = text.split("[")
|
||||
if len(date_split) > 1:
|
||||
estimate_tag = date_split[1].split("]")[0].strip()
|
||||
else:
|
||||
estimate_tag = None
|
||||
text_object = date_split[0].strip()
|
||||
|
||||
return TagTextPair(estimate_tag, text_object)
|
||||
|
||||
### FUNCTIONS
|
||||
|
||||
def tr_to_td(tr) -> tuple[str, str, str]:
|
||||
"""
|
||||
Takes an html data row of interest, extracts the record_name from the first <td>, and the data from the second <td>.
|
||||
|
||||
For the data, it just extracts the text.
|
||||
The text itself then needs processed separately, based on what it should contain.
|
||||
"""
|
||||
#get list of cells
|
||||
td_list = tr.find_all("td")
|
||||
if len(td_list) == 3:
|
||||
return td_list[0].text, td_list[1], td_list[2]
|
||||
else:
|
||||
return None, None, None
|
||||
|
||||
def get_forms(soup,version_a,version_b):
|
||||
|
||||
#extract all forms
|
||||
for form in soup.body.find_all("form"):
|
||||
#Match forms against ID types
|
||||
if not "id" in form.attrs:
|
||||
continue
|
||||
|
||||
#for each type of form (identified by the ID field)
|
||||
# extract and add the data to the preallocated objects
|
||||
match form.attrs["id"]:
|
||||
case "form_StudyStatus":
|
||||
extract_study_statuses(form,version_a,version_b)
|
||||
case "form_SponsorCollaborators":
|
||||
extract_sponsor_data(form, version_a, version_b)
|
||||
case "form_Oversight":
|
||||
pass
|
||||
case "form_StudyDescription":
|
||||
pass
|
||||
case "form_Conditions":
|
||||
pass
|
||||
case "form_StudyDesign":
|
||||
extract_study_design(form,version_a,version_b)
|
||||
case "form_ArmsandInterventions":
|
||||
pass
|
||||
case "form_ProtocolOutcomeMeasures":
|
||||
pass
|
||||
case "form_Eligibility":
|
||||
pass
|
||||
case "form_ContactsLocations":
|
||||
pass
|
||||
case "form_IPDSharing":
|
||||
pass
|
||||
case "form_References":
|
||||
pass
|
||||
case "form_ParticipantFlow":
|
||||
pass
|
||||
case "form_BaselineCharacteristics":
|
||||
pass
|
||||
case "form_ROutcomeMeasures":
|
||||
pass
|
||||
case "form_AdverseEvents":
|
||||
pass
|
||||
case "form_LimitationsandCaveats":
|
||||
pass
|
||||
case "form_MoreInformation":
|
||||
pass
|
||||
case _ as form_name:
|
||||
print("form not matched: {}".format(form_name)) if VERBOSE else ""
|
||||
|
||||
|
||||
### CONSTANTS
|
||||
date_MMMM_YYYY = "%B %Y"
|
||||
date_MMMM_DD_YYYY = "%B %d, %Y"
|
||||
|
||||
def get_data_from_versions(nct_id,html, version_a_int, version_b_int):
|
||||
soup = BeautifulSoup(html,"lxml")
|
||||
|
||||
version_date_dict = extract_submission_dates(soup)
|
||||
|
||||
#preallocate version data
|
||||
version_a = VersionData(nct_id, version_a_int, version_date_dict.get(version_a_int))
|
||||
version_b = VersionData(nct_id, version_b_int, version_date_dict.get(version_b_int))
|
||||
|
||||
#extract data from html and put it in the preallocated objects
|
||||
get_forms(soup, version_a, version_b)
|
||||
|
||||
return version_a,version_b
|
||||
|
||||
|
||||
|
||||
def run():
|
||||
with postgres_conn() as db_connection:
|
||||
#pull the requests from the db
|
||||
with db_connection.cursor() as curse:
|
||||
sql = """
|
||||
SELECT nct_id, version_a,version_b, html
|
||||
FROM http.responses
|
||||
WHERE response_code = 200
|
||||
"""
|
||||
curse.execute(sql)
|
||||
for response in tqdm(curse.fetchall()):
|
||||
nct_id, version_a, version_b, html = response
|
||||
|
||||
print(nct_id, version_a, version_b) if VERBOSE else ""
|
||||
|
||||
version1, version2 = get_data_from_versions(nct_id, html, version_a, version_b)
|
||||
|
||||
if version_b == version_a + 1:
|
||||
version1.load_to_db(db_connection)
|
||||
version2.load_to_db(db_connection)
|
||||
else:
|
||||
version2.load_to_db(db_connection)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
|
||||
"""
|
||||
Documentation:
|
||||
|
||||
TO add a new field to extraction-lib
|
||||
|
||||
1. Locate the field in the HTML
|
||||
- form id (e.g. <form id="form_StudyStatus> gives the form id "form_StudyStatus)
|
||||
- Table row's data label. This corresponds to the text of first column in the row and will
|
||||
look something like
|
||||
<td class="rowLabel" style="min-width: 210px;">Record Verification:</td>.
|
||||
"Record Verification:" is the data label in the example above.
|
||||
2. Identify what data you will be extracting
|
||||
- type (date, text, int, etc)
|
||||
- if it contains a category ([Actual] vs [Anticipated] etc)
|
||||
3. Add data to:
|
||||
- sql table: history.trial_snapshots
|
||||
- the VersionData class
|
||||
- the VersionData.load_to_db() function
|
||||
4. Ensure the field matcher in `get_forms(*)` is matching on the form ID and has a function processing the form
|
||||
5. Ensure the function processing the form has a match entry to proceess the row
|
||||
- This should match on data label and then process the data by
|
||||
- splitting into old and new versions
|
||||
- Extracting the data for both old and new
|
||||
- add the data to the passed VersionData objects
|
||||
"""
|
||||
@ -0,0 +1,15 @@
|
||||
from .env_setup import postgres_conn
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run():
|
||||
#get relative path
|
||||
p = Path(__file__).with_name("selected_trials.sql")
|
||||
with open(p,'r') as fh:
|
||||
sqlfile = fh.read()
|
||||
with postgres_conn() as connection:
|
||||
with connection.cursor() as curse:
|
||||
curse.execute(sqlfile)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,118 @@
|
||||
import psycopg2 as psyco
|
||||
from psycopg2 import sql
|
||||
from psycopg2 import extras
|
||||
import pymysql
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from .env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
|
||||
|
||||
|
||||
##############NOTE
|
||||
'''
|
||||
|
||||
|
||||
mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
|
||||
|
||||
I will have the ability to reduce memory usage and simplify what I am doing.
|
||||
|
||||
|
||||
'''
|
||||
|
||||
############### GLOBALS
|
||||
#these are hardcoded so they shouldn't require any updates
|
||||
mschema="rxnorm_current"
|
||||
pschema="rxnorm_migrated"
|
||||
|
||||
########FUNCTIONS#################
|
||||
|
||||
|
||||
def convert_column(d):
|
||||
"""
|
||||
Given the metadata about a column in mysql, make the portion of the `create table`
|
||||
statement that corresponds to that column in postgres
|
||||
"""
|
||||
#extract
|
||||
data_type = d["DATA_TYPE"]
|
||||
position = d["ORDINAL_POSITION"]
|
||||
table_name = d["TABLE_NAME"]
|
||||
d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
|
||||
|
||||
#convert
|
||||
if data_type=="varchar":
|
||||
string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="char":
|
||||
string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="tinyint":
|
||||
string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="decimal":
|
||||
string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="int":
|
||||
string = "{COLUMN_NAME} integer {IS_NULLABLE}".format(**d)
|
||||
elif data_type=="enum":
|
||||
string = None
|
||||
elif data_type=="text":
|
||||
string = None
|
||||
|
||||
return string
|
||||
|
||||
def run():
|
||||
#get & convert datatypes for each table of interest
|
||||
tables_of_interest = get_tables_of_interest()
|
||||
|
||||
|
||||
with mariadb_conn(cursorclass=pymysql.cursors.DictCursor) as mcon, postgres_conn() as pcon:
|
||||
with mcon.cursor() as mcurse, pcon.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
for table in tables_of_interest: #create equivalent table in postgres
|
||||
|
||||
#get columns from mysql
|
||||
q = "SELECT * FROM INFORMATION_SCHEMA.columns WHERE TABLE_SCHEMA=%s and TABLE_NAME=%s;"
|
||||
mcurse.execute(q,[mschema,table])
|
||||
|
||||
#convert mysql column names and types to postgres column statements.
|
||||
columns = [convert_column(a) for a in mcurse.fetchall() ]
|
||||
#TODO make sure this uses psycopg colums correctly.
|
||||
column_sql = sql.SQL(",\n".join(columns))
|
||||
|
||||
#build a header and footer
|
||||
header=sql.SQL("CREATE TABLE IF NOT EXISTS {}\n(").format(sql.Identifier(pschema,table))
|
||||
footer=sql.SQL(");")
|
||||
|
||||
#Joint the header, columns, and footer.
|
||||
create_table_statement = sql.SQL("\n").join([header,column_sql,footer])
|
||||
print(create_table_statement.as_string(pcon))
|
||||
|
||||
#Create the table in postgres
|
||||
pcurse.execute(create_table_statement)
|
||||
pcon.commit()
|
||||
|
||||
#Get the data from mysql
|
||||
mcurse.execute("SELECT * FROM {schema}.{table}".format(schema=mschema,table=table))
|
||||
#FIX setting up sql this^^^ way is improper.
|
||||
results = mcurse.fetchall()
|
||||
|
||||
#build the insert statement template
|
||||
#get list of field names
|
||||
column_list = [sql.SQL(x) for x in results[0]]
|
||||
column_inserts = [sql.SQL("%({})s".format(x)) for x in results[0]] #fix with sql.Placeholder
|
||||
#generate insert statement
|
||||
psql_insert = sql.SQL("INSERT INTO {table} ({columns}) VALUES %s ").format(
|
||||
table=sql.Identifier(pschema,table)
|
||||
,columns=sql.SQL(",").join(column_list)
|
||||
)
|
||||
#Note that this^^^^ does not contain parenthases around the placeholder
|
||||
|
||||
#Building the values template.
|
||||
#Note that it must include the parenthases so that the
|
||||
#VALUES portion is formatted correctly.
|
||||
template = sql.SQL(",").join(column_inserts)
|
||||
template = sql.Composed([
|
||||
sql.SQL("(")
|
||||
,template
|
||||
,sql.SQL(")")
|
||||
])
|
||||
|
||||
#insert the data with page_size
|
||||
extras.execute_values(pcurse,psql_insert,argslist=results,template=template, page_size=1000)
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
@ -0,0 +1,21 @@
|
||||
DELETE FROM http.download_status;
|
||||
|
||||
INSERT INTO http.download_status (nct_id, status)
|
||||
SELECT nct_id, 'Of Interest'::http.history_download_status AS status
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug=TRUE
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
phase='Phase 3'
|
||||
AND
|
||||
overall_status in ('Terminated', 'Completed')
|
||||
AND
|
||||
start_date > '2010-01-01'
|
||||
AND
|
||||
completion_date < '2022-01-01'
|
||||
;
|
||||
|
||||
|
||||
SELECT count(*) FROM http.download_status ;
|
||||
@ -0,0 +1,36 @@
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from psycopg2 import extras
|
||||
from collections import namedtuple
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
FILES=[
|
||||
"../non-db_data_sources/GBD and ICD-10_(2019 version)/NONFATAL_cause2code.psv",
|
||||
"../non-db_data_sources/GBD and ICD-10_(2019 version)/COD_cause2code.psv"
|
||||
]
|
||||
SEP="|"
|
||||
|
||||
sql = """
|
||||
INSERT INTO "DiseaseBurden".icd10_to_cause
|
||||
(code,cause_text)
|
||||
VALUES %s
|
||||
"""
|
||||
|
||||
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
entries = []
|
||||
for fpath in FILES:
|
||||
print(fpath)
|
||||
|
||||
with open(fpath,"r") as fh:
|
||||
for line in tqdm(fh.readlines(),desc=fpath):
|
||||
code,cause = line.split(SEP)
|
||||
code = code.strip()
|
||||
cause = cause.strip()
|
||||
|
||||
entries.append((code,cause))
|
||||
|
||||
extras.execute_values(pcurse, sql , entries)
|
||||
|
||||
|
||||
@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
|
||||
|
||||
rm -r ../containers/AACT_downloader/postgresql/data
|
||||
@ -0,0 +1,24 @@
|
||||
from drugtools import env_setup
|
||||
from drugtools import historical_trial_selector as hts
|
||||
from drugtools import historical_nct_downloader as hnd
|
||||
from drugtools import historical_nct_extractor as hne
|
||||
from drugtools import download_and_extract_nsde as daen
|
||||
from drugtools import migrate_mysql2pgsql as mm2p
|
||||
|
||||
print("Current Environment")
|
||||
print(env_setup.ENV)
|
||||
|
||||
cont = input("Are you willing to continue with the current environmnet? y/[n]")
|
||||
|
||||
if cont == "Y" or cont == "y":
|
||||
print("SelectingTrials")
|
||||
#hts.run()
|
||||
print("downloading trials")
|
||||
#hnd.run()
|
||||
print("extracting trials")
|
||||
hne.run()
|
||||
exit(0)
|
||||
daen.run()
|
||||
mm2p.run()
|
||||
else:
|
||||
print("Please fix your .env file and try again")
|
||||
@ -0,0 +1,87 @@
|
||||
import requests
|
||||
import json
|
||||
from drugtools.env_setup import ENV,postgres_conn
|
||||
from psycopg2 import extras
|
||||
from collections import namedtuple
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
|
||||
|
||||
class Requestor():
|
||||
def __init__(self,api_key):
|
||||
self.key = api_key
|
||||
|
||||
def search(self,search_term,inputType="sourceUi", returnIdType="code", addnl_terms={}):
|
||||
query_terms = {
|
||||
"apiKey":self.key,
|
||||
"sabs":"ICD10",
|
||||
"string":search_term,
|
||||
"returnIdType":returnIdType,
|
||||
"inputType":inputType
|
||||
} | addnl_terms
|
||||
query = "https://uts-ws.nlm.nih.gov/rest/search/current/"
|
||||
|
||||
r = requests.get(query,params=query_terms)
|
||||
return r
|
||||
|
||||
|
||||
r = Requestor(ENV.get("UMLS_API_KEY"))
|
||||
|
||||
|
||||
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||
sql = """
|
||||
select nct_id, downcase_mesh_term
|
||||
from ctgov.browse_conditions bc
|
||||
where
|
||||
mesh_type = 'mesh-list'
|
||||
and
|
||||
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
order by nct_id
|
||||
;
|
||||
"""
|
||||
sql2 = """
|
||||
with cte as (
|
||||
/* Keywords added too much noise
|
||||
select nct_id,downcase_name
|
||||
from ctgov.keywords k
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
union */
|
||||
select nct_id, downcase_name
|
||||
from ctgov.conditions c
|
||||
union
|
||||
select nct_id ,downcase_mesh_term as downcase_name
|
||||
from ctgov.browse_conditions bc
|
||||
where mesh_type = 'mesh-list'
|
||||
)
|
||||
select nct_id, downcase_name from cte
|
||||
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||
order by nct_id
|
||||
"""
|
||||
pcurse.execute(sql2)
|
||||
rows = pcurse.fetchall()
|
||||
|
||||
entries = []
|
||||
|
||||
for row in tqdm(rows,desc="Search MeSH terms"):
|
||||
nctid = row[0]
|
||||
condition = row[1]
|
||||
# print(nctid,condition)
|
||||
|
||||
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
|
||||
|
||||
#if results are empty?
|
||||
if not results:
|
||||
entries.append(RecordStuff(nctid,condition,None,None,None,None))
|
||||
else:
|
||||
for entry in results:
|
||||
entries.append(RecordStuff(nctid, condition, entry["ui"], entry["uri"], entry["rootSource"], entry["name"]))
|
||||
|
||||
sql_insert = """
|
||||
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||
(nct_id, "condition", ui,uri,rootsource,"name","source",approved,approval_timestamp)
|
||||
VALUES
|
||||
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search', null,null)
|
||||
"""
|
||||
for entry in tqdm(entries,desc="Inserting entries to DB"):
|
||||
pcurse.execute(sql_insert,entry._asdict())
|
||||
@ -0,0 +1,6 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'
|
||||
;
|
||||
@ -0,0 +1,24 @@
|
||||
SELECT
|
||||
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||
FROM (
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
column_name || ' ' || data_type ||
|
||||
CASE
|
||||
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||
ELSE ''
|
||||
END ||
|
||||
CASE
|
||||
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||
ELSE ''
|
||||
END as column_definition
|
||||
FROM pg_catalog.pg_tables t
|
||||
JOIN information_schema.columns c
|
||||
ON t.schemaname = c.table_schema
|
||||
AND t.tablename = c.table_name
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema'-- Replace with your schema name
|
||||
) t
|
||||
GROUP BY schemaname, tablename;
|
||||
@ -0,0 +1,6 @@
|
||||
SELECT
|
||||
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||
FROM pg_views
|
||||
WHERE schemaname != 'pg_catalog'
|
||||
and schemaname != 'information_schema' -- Replace with your schema name
|
||||
;
|
||||
@ -0,0 +1,415 @@
|
||||
?column?
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_conditions +
|
||||
GROUP BY browse_conditions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_interventions +
|
||||
GROUP BY browse_interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||
FROM ctgov.conditions +
|
||||
GROUP BY conditions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||
FROM ctgov.countries +
|
||||
WHERE (countries.removed IS NOT TRUE) +
|
||||
GROUP BY countries.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||
FROM ctgov.design_groups +
|
||||
GROUP BY design_groups.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||
FROM ctgov.id_information +
|
||||
GROUP BY id_information.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||
FROM ctgov.keywords +
|
||||
GROUP BY keywords.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||
FROM ctgov.sponsors +
|
||||
GROUP BY sponsors.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.categories AS SELECT search_results.id, +
|
||||
search_results.nct_id, +
|
||||
search_results.name, +
|
||||
search_results.created_at, +
|
||||
search_results.updated_at, +
|
||||
search_results."grouping", +
|
||||
search_results.study_search_id +
|
||||
FROM ctgov.search_results;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||
s.overall_status, +
|
||||
s.study_type, +
|
||||
s.official_title, +
|
||||
s.acronym, +
|
||||
s.phase, +
|
||||
s.why_stopped, +
|
||||
s.has_dmc, +
|
||||
s.enrollment, +
|
||||
s.is_fda_regulated_device, +
|
||||
s.is_fda_regulated_drug, +
|
||||
s.is_unapproved_device, +
|
||||
s.has_expanded_access, +
|
||||
s.study_first_submitted_date, +
|
||||
s.last_update_posted_date, +
|
||||
s.results_first_posted_date, +
|
||||
s.start_date, +
|
||||
s.primary_completion_date, +
|
||||
s.completion_date, +
|
||||
s.study_first_posted_date, +
|
||||
cv.number_of_facilities, +
|
||||
cv.has_single_facility, +
|
||||
cv.nlm_download_date, +
|
||||
s.number_of_arms, +
|
||||
s.number_of_groups, +
|
||||
sp.name AS lead_sponsor, +
|
||||
aid.names AS other_ids, +
|
||||
e.gender, +
|
||||
e.gender_based, +
|
||||
e.gender_description, +
|
||||
e.population, +
|
||||
e.minimum_age, +
|
||||
e.maximum_age, +
|
||||
e.criteria, +
|
||||
e.healthy_volunteers, +
|
||||
ak.names AS keywords, +
|
||||
ai.names AS interventions, +
|
||||
ac.names AS conditions, +
|
||||
d.primary_purpose, +
|
||||
d.allocation, +
|
||||
d.observational_model, +
|
||||
d.intervention_model, +
|
||||
d.masking, +
|
||||
d.subject_masked, +
|
||||
d.caregiver_masked, +
|
||||
d.investigator_masked, +
|
||||
d.outcomes_assessor_masked, +
|
||||
ado.names AS design_outcomes, +
|
||||
bs.description AS brief_summary, +
|
||||
dd.description AS detailed_description +
|
||||
FROM (((((((((((ctgov.studies s +
|
||||
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||
FROM ctgov.search_results +
|
||||
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||
rp.rxcui, +
|
||||
rp.propvalue1 +
|
||||
FROM (ctgov.browse_interventions bi +
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||
FROM history.trial_snapshots)));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||
t.status, +
|
||||
t.update_timestamp +
|
||||
FROM ( SELECT download_status.id, +
|
||||
download_status.nct_id, +
|
||||
download_status.status, +
|
||||
download_status.update_timestamp, +
|
||||
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||
FROM http.download_status) t +
|
||||
WHERE (t.rn = 1) +
|
||||
ORDER BY t.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||
s.start_date, +
|
||||
ts.version, +
|
||||
ts.submission_date, +
|
||||
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||
FROM (ctgov.studies s +
|
||||
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||
cte.version, +
|
||||
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||
cte.submission_date, +
|
||||
cte.start_deviance, +
|
||||
cte.start_date, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.overall_status, +
|
||||
ts.enrollment, +
|
||||
ts.enrollment_category +
|
||||
FROM (time_between_submission_and_start_view cte +
|
||||
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||
min(cte2.rownum) AS enrollment_source +
|
||||
FROM rank_proximity_to_start_time_view cte2 +
|
||||
WHERE (cte2.enrollment IS NOT NULL) +
|
||||
GROUP BY cte2.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||
SELECT DISTINCT ts.nct_id +
|
||||
FROM history.trial_snapshots ts +
|
||||
) +
|
||||
SELECT bi.nct_id, +
|
||||
bi.downcase_mesh_term, +
|
||||
rr.tty2, +
|
||||
rr.rxcui2 AS bn_or_in_cui, +
|
||||
count(*) AS count +
|
||||
FROM ((ctgov.browse_interventions bi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||
ORDER BY bi.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||
ah.ndc, +
|
||||
count(*) AS count +
|
||||
FROM ((match_trials_to_bn_in mttbi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||
GROUP BY mttbi.nct_id, ah.ndc +
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||
n.application_number_or_citation, +
|
||||
min(n.marketing_start_date) AS min +
|
||||
FROM (match_trial_to_ndc11 mttn +
|
||||
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||
ORDER BY mttn.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||
b.location_id, +
|
||||
b.sex_id, +
|
||||
b.age_id, +
|
||||
b.cause_id, +
|
||||
b.metric_id, +
|
||||
b.year, +
|
||||
b.val, +
|
||||
b.upper_95, +
|
||||
b.lower_95, +
|
||||
b.key_column +
|
||||
FROM "DiseaseBurden".burdens b +
|
||||
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||
c1.year, +
|
||||
c1.val AS h_sdi_val, +
|
||||
c1.upper_95 AS h_sdi_u95, +
|
||||
c1.lower_95 AS h_sdi_l95, +
|
||||
c2.val AS hm_sdi_val, +
|
||||
c2.upper_95 AS hm_sdi_u95, +
|
||||
c2.lower_95 AS hm_sdi_l95, +
|
||||
c3.val AS m_sdi_val, +
|
||||
c3.upper_95 AS m_sdi_u95, +
|
||||
c3.lower_95 AS m_sdi_l95, +
|
||||
c4.val AS lm_sdi_val, +
|
||||
c4.upper_95 AS lm_sdi_u95, +
|
||||
c4.lower_95 AS lm_sdi_l95, +
|
||||
c5.val AS l_sdi_val, +
|
||||
c5.upper_95 AS l_sdi_u95, +
|
||||
c5.lower_95 AS l_sdi_l95 +
|
||||
FROM ((((view_burdens_cte c1 +
|
||||
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.enrollment, +
|
||||
ts.start_date, +
|
||||
ts.enrollment_category, +
|
||||
ts.overall_status, +
|
||||
min(ts.submission_date) AS earliest_date_observed +
|
||||
FROM history.trial_snapshots ts +
|
||||
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||
max(view_disbur_cte0.level) AS max_level +
|
||||
FROM view_disbur_cte0 +
|
||||
GROUP BY view_disbur_cte0.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||
ORDER BY tti.nct_id;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||
ttc.ui, +
|
||||
ttc.condition, +
|
||||
ttc.cause_text, +
|
||||
ttc.cause_id, +
|
||||
disbur_cte.max_level +
|
||||
FROM (view_trial_to_cause ttc +
|
||||
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||
WHERE (ttc.level = disbur_cte.max_level) +
|
||||
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||
ORDER BY ttc.nct_id, ttc.ui;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||
disbur_cte2.condition, +
|
||||
disbur_cte2.cause_text, +
|
||||
disbur_cte2.cause_id, +
|
||||
ic.chapter_code AS category_id, +
|
||||
ic.group_name, +
|
||||
disbur_cte2.max_level +
|
||||
FROM (view_disbur_cte2 disbur_cte2 +
|
||||
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||
WHERE (ic.level = 1);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||
cte.start_date, +
|
||||
cte.enrollment AS current_enrollment, +
|
||||
cte.enrollment_category, +
|
||||
cte.overall_status AS current_status, +
|
||||
cte.earliest_date_observed, +
|
||||
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||
dbc3.code, +
|
||||
dbc3.condition, +
|
||||
dbc3.cause_text, +
|
||||
dbc3.cause_id, +
|
||||
dbc3.category_id, +
|
||||
dbc3.group_name, +
|
||||
dbc3.max_level, +
|
||||
b.year, +
|
||||
b.h_sdi_val, +
|
||||
b.h_sdi_u95, +
|
||||
b.h_sdi_l95, +
|
||||
b.hm_sdi_val, +
|
||||
b.hm_sdi_u95, +
|
||||
b.hm_sdi_l95, +
|
||||
b.m_sdi_val, +
|
||||
b.m_sdi_u95, +
|
||||
b.m_sdi_l95, +
|
||||
b.lm_sdi_val, +
|
||||
b.lm_sdi_u95, +
|
||||
b.lm_sdi_l95, +
|
||||
b.l_sdi_val, +
|
||||
b.l_sdi_u95, +
|
||||
b.l_sdi_l95 +
|
||||
FROM (((view_cte cte +
|
||||
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||
f.start_date, +
|
||||
f.current_enrollment, +
|
||||
f.enrollment_category, +
|
||||
f.current_status, +
|
||||
f.earliest_date_observed, +
|
||||
f.elapsed_duration, +
|
||||
f.n_brands, +
|
||||
f.code, +
|
||||
f.condition, +
|
||||
f.cause_text, +
|
||||
f.cause_id, +
|
||||
f.category_id, +
|
||||
f.group_name, +
|
||||
f.max_level, +
|
||||
f.year, +
|
||||
f.h_sdi_val, +
|
||||
f.h_sdi_u95, +
|
||||
f.h_sdi_l95, +
|
||||
f.hm_sdi_val, +
|
||||
f.hm_sdi_u95, +
|
||||
f.hm_sdi_l95, +
|
||||
f.m_sdi_val, +
|
||||
f.m_sdi_u95, +
|
||||
f.m_sdi_l95, +
|
||||
f.lm_sdi_val, +
|
||||
f.lm_sdi_u95, +
|
||||
f.lm_sdi_l95, +
|
||||
f.l_sdi_val, +
|
||||
f.l_sdi_u95, +
|
||||
f.l_sdi_l95, +
|
||||
s.overall_status AS final_status, +
|
||||
c2a.version, +
|
||||
c2a.enrollment AS planned_enrollment +
|
||||
FROM (((formatted_data f +
|
||||
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||
CREATE OR REPLACE MATERIALIZED VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||
FROM http.most_recent_download_status +
|
||||
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||
CREATE OR REPLACE MATERIALIZED VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||
do2.nct_id, +
|
||||
do2.outcome_type, +
|
||||
do2.measure, +
|
||||
do2.time_frame, +
|
||||
do2.population, +
|
||||
do2.description +
|
||||
FROM ctgov.design_outcomes do2 +
|
||||
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||
FROM formatted_data fd)));
|
||||
(40 rows)
|
||||
|
||||
@ -0,0 +1,920 @@
|
||||
?column?
|
||||
-------------------------------------------------------
|
||||
CREATE TABLE DiseaseBurden.age_group +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
age_group character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.burdens +
|
||||
( +
|
||||
measure_id integer NOT NULL, +
|
||||
location_id integer NOT NULL, +
|
||||
sex_id integer NOT NULL, +
|
||||
age_id integer NOT NULL, +
|
||||
cause_id integer NOT NULL, +
|
||||
metric_id integer NOT NULL, +
|
||||
year date NOT NULL, +
|
||||
val double precision NOT NULL, +
|
||||
upper_95 double precision NOT NULL, +
|
||||
lower_95 double precision NOT NULL, +
|
||||
key_column integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.cause +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
cause character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.cause_hierarchy +
|
||||
( +
|
||||
cause_id integer NOT NULL, +
|
||||
cause_name character varying, +
|
||||
parent_id integer NOT NULL, +
|
||||
parent_nae character varying, +
|
||||
level integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.icd10_categories +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
start_code character varying NOT NULL, +
|
||||
end_code character varying NOT NULL, +
|
||||
group_name character varying NOT NULL, +
|
||||
level integer NOT NULL, +
|
||||
chapter character varying NOT NULL, +
|
||||
chapter_code integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.icd10_to_cause +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
code character varying NOT NULL, +
|
||||
cause_text character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.location +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
location character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.measures +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.metric +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
metric_label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.rei +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
rei_label character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.sex +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
sex character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE DiseaseBurden.trial_to_icd10 +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying NOT NULL, +
|
||||
condition character varying NOT NULL, +
|
||||
ui character varying, +
|
||||
uri character varying, +
|
||||
rootsource character varying, +
|
||||
name character varying, +
|
||||
source character varying, +
|
||||
approved USER-DEFINED, +
|
||||
approval_timestamp timestamp without time zone +
|
||||
); +
|
||||
|
||||
CREATE TABLE Formularies.usp_dc_2023 +
|
||||
( +
|
||||
USP Class character varying(250), +
|
||||
USP Pharmacotherapeutic Group character varying(250),+
|
||||
API Concept character varying(250), +
|
||||
rxcui character varying(15), +
|
||||
tty character varying(10), +
|
||||
Name character varying(256), +
|
||||
Related BN character varying(250), +
|
||||
Related DF character varying(25050), +
|
||||
USP Category character varying(250) +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.active_storage_attachments +
|
||||
( +
|
||||
id bigint NOT NULL, +
|
||||
name character varying NOT NULL, +
|
||||
record_type character varying NOT NULL, +
|
||||
record_id bigint NOT NULL, +
|
||||
blob_id bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.active_storage_blobs +
|
||||
( +
|
||||
metadata text, +
|
||||
checksum character varying NOT NULL, +
|
||||
byte_size bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
id bigint NOT NULL, +
|
||||
key character varying NOT NULL, +
|
||||
filename character varying NOT NULL, +
|
||||
content_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.baseline_counts +
|
||||
( +
|
||||
count integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
ctgov_group_code character varying, +
|
||||
units character varying, +
|
||||
scope character varying, +
|
||||
result_group_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.baseline_measurements +
|
||||
( +
|
||||
param_value character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
classification character varying, +
|
||||
category character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
units character varying, +
|
||||
param_type character varying, +
|
||||
param_value_num numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value character varying, +
|
||||
dispersion_value_num numeric, +
|
||||
dispersion_lower_limit numeric, +
|
||||
dispersion_upper_limit numeric, +
|
||||
explanation_of_na character varying, +
|
||||
number_analyzed integer, +
|
||||
number_analyzed_units character varying, +
|
||||
population_description character varying, +
|
||||
calculate_percentage character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.brief_summaries +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.browse_conditions +
|
||||
( +
|
||||
mesh_term character varying, +
|
||||
id integer NOT NULL, +
|
||||
mesh_type character varying, +
|
||||
downcase_mesh_term character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.browse_interventions +
|
||||
( +
|
||||
downcase_mesh_term character varying, +
|
||||
mesh_term character varying, +
|
||||
mesh_type character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.calculated_values +
|
||||
( +
|
||||
number_of_secondary_outcomes_to_measure integer, +
|
||||
maximum_age_unit character varying, +
|
||||
minimum_age_unit character varying, +
|
||||
maximum_age_num integer, +
|
||||
minimum_age_num integer, +
|
||||
has_single_facility boolean, +
|
||||
has_us_facility boolean, +
|
||||
months_to_report_results integer, +
|
||||
number_of_sae_subjects integer, +
|
||||
were_results_reported boolean, +
|
||||
registered_in_calendar_year integer, +
|
||||
nlm_download_date date, +
|
||||
actual_duration integer, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
number_of_facilities integer, +
|
||||
number_of_nsae_subjects integer, +
|
||||
number_of_other_outcomes_to_measure integer, +
|
||||
number_of_primary_outcomes_to_measure integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.central_contacts +
|
||||
( +
|
||||
phone_extension character varying, +
|
||||
nct_id character varying, +
|
||||
role character varying, +
|
||||
id integer NOT NULL, +
|
||||
contact_type character varying, +
|
||||
name character varying, +
|
||||
phone character varying, +
|
||||
email character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.conditions +
|
||||
( +
|
||||
downcase_name character varying, +
|
||||
name character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.countries +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
removed boolean +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_group_interventions +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
design_group_id integer, +
|
||||
intervention_id integer, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_groups +
|
||||
( +
|
||||
group_type character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
title character varying, +
|
||||
description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.design_outcomes +
|
||||
( +
|
||||
description text, +
|
||||
measure text, +
|
||||
outcome_type character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
time_frame text, +
|
||||
population character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.designs +
|
||||
( +
|
||||
masking_description text, +
|
||||
subject_masked boolean, +
|
||||
caregiver_masked boolean, +
|
||||
investigator_masked boolean, +
|
||||
outcomes_assessor_masked boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
allocation character varying, +
|
||||
intervention_model character varying, +
|
||||
observational_model character varying, +
|
||||
primary_purpose character varying, +
|
||||
time_perspective character varying, +
|
||||
masking character varying, +
|
||||
intervention_model_description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.detailed_descriptions +
|
||||
( +
|
||||
description text, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.documents +
|
||||
( +
|
||||
comment text, +
|
||||
id integer NOT NULL, +
|
||||
url character varying, +
|
||||
document_type character varying, +
|
||||
nct_id character varying, +
|
||||
document_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.drop_withdrawals +
|
||||
( +
|
||||
period character varying, +
|
||||
reason character varying, +
|
||||
count integer, +
|
||||
ctgov_group_code character varying, +
|
||||
result_group_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.eligibilities +
|
||||
( +
|
||||
older_adult boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
sampling_method character varying, +
|
||||
gender character varying, +
|
||||
minimum_age character varying, +
|
||||
maximum_age character varying, +
|
||||
healthy_volunteers character varying, +
|
||||
population text, +
|
||||
criteria text, +
|
||||
gender_description text, +
|
||||
gender_based boolean, +
|
||||
adult boolean, +
|
||||
child boolean +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facilities +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
status character varying, +
|
||||
name character varying, +
|
||||
city character varying, +
|
||||
state character varying, +
|
||||
zip character varying, +
|
||||
country character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facility_contacts +
|
||||
( +
|
||||
contact_type character varying, +
|
||||
name character varying, +
|
||||
email character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
phone character varying, +
|
||||
phone_extension character varying, +
|
||||
facility_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.facility_investigators +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
facility_id integer, +
|
||||
role character varying, +
|
||||
name character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.file_records +
|
||||
( +
|
||||
url character varying, +
|
||||
id bigint NOT NULL, +
|
||||
filename character varying, +
|
||||
file_size bigint, +
|
||||
file_type character varying, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.id_information +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
id_source character varying, +
|
||||
nct_id character varying, +
|
||||
id_link character varying, +
|
||||
id_value character varying, +
|
||||
id_type_description character varying, +
|
||||
id_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.intervention_other_names +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
intervention_id integer, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.interventions +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
name character varying, +
|
||||
intervention_type character varying, +
|
||||
description text, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.ipd_information_types +
|
||||
( +
|
||||
name character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.keywords +
|
||||
( +
|
||||
name character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
downcase_name character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.links +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
description text, +
|
||||
url character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.mesh_headings +
|
||||
( +
|
||||
qualifier character varying, +
|
||||
id integer NOT NULL, +
|
||||
subcategory character varying, +
|
||||
heading character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.mesh_terms +
|
||||
( +
|
||||
description character varying, +
|
||||
tree_number character varying, +
|
||||
qualifier character varying, +
|
||||
id integer NOT NULL, +
|
||||
downcase_mesh_term character varying, +
|
||||
mesh_term character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.milestones +
|
||||
( +
|
||||
count_units character varying, +
|
||||
count integer, +
|
||||
description text, +
|
||||
period character varying, +
|
||||
title character varying, +
|
||||
ctgov_group_code character varying, +
|
||||
result_group_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
milestone_description character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_analyses +
|
||||
( +
|
||||
other_analysis_description text, +
|
||||
param_type character varying, +
|
||||
non_inferiority_type character varying, +
|
||||
outcome_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL, +
|
||||
param_value numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value numeric, +
|
||||
p_value_modifier character varying, +
|
||||
p_value double precision, +
|
||||
ci_n_sides character varying, +
|
||||
ci_percent numeric, +
|
||||
ci_lower_limit numeric, +
|
||||
ci_upper_limit numeric, +
|
||||
ci_upper_limit_na_comment character varying, +
|
||||
p_value_description character varying, +
|
||||
method character varying, +
|
||||
method_description text, +
|
||||
estimate_description text, +
|
||||
groups_description text, +
|
||||
non_inferiority_description text +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_analysis_groups +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_analysis_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_counts +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
scope character varying, +
|
||||
units character varying, +
|
||||
count integer, +
|
||||
outcome_id integer, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcome_measurements +
|
||||
( +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
classification character varying, +
|
||||
category character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
units character varying, +
|
||||
param_type character varying, +
|
||||
param_value character varying, +
|
||||
param_value_num numeric, +
|
||||
dispersion_type character varying, +
|
||||
dispersion_value character varying, +
|
||||
dispersion_value_num numeric, +
|
||||
dispersion_lower_limit numeric, +
|
||||
dispersion_upper_limit numeric, +
|
||||
explanation_of_na text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.outcomes +
|
||||
( +
|
||||
population text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
outcome_type character varying, +
|
||||
title text, +
|
||||
description text, +
|
||||
time_frame text, +
|
||||
anticipated_posting_date date, +
|
||||
anticipated_posting_month_year character varying, +
|
||||
units character varying, +
|
||||
units_analyzed character varying, +
|
||||
dispersion_type character varying, +
|
||||
param_type character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.overall_officials +
|
||||
( +
|
||||
name character varying, +
|
||||
affiliation character varying, +
|
||||
role character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.participant_flows +
|
||||
( +
|
||||
count_units integer, +
|
||||
nct_id character varying, +
|
||||
pre_assignment_details text, +
|
||||
units_analyzed character varying, +
|
||||
drop_withdraw_comment character varying, +
|
||||
reason_comment character varying, +
|
||||
recruitment_details text, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.pending_results +
|
||||
( +
|
||||
event_date_description character varying, +
|
||||
event_date date, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
event character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.provided_documents +
|
||||
( +
|
||||
has_sap boolean, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
document_type character varying, +
|
||||
has_protocol boolean, +
|
||||
has_icf boolean, +
|
||||
document_date date, +
|
||||
url character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.reported_event_totals +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
subjects_at_risk integer, +
|
||||
subjects_affected integer, +
|
||||
classification character varying NOT NULL, +
|
||||
event_type character varying, +
|
||||
ctgov_group_code character varying NOT NULL, +
|
||||
nct_id character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.reported_events +
|
||||
( +
|
||||
vocab character varying, +
|
||||
nct_id character varying, +
|
||||
result_group_id integer, +
|
||||
ctgov_group_code character varying, +
|
||||
time_frame text, +
|
||||
event_type character varying, +
|
||||
default_vocab character varying, +
|
||||
default_assessment character varying, +
|
||||
subjects_affected integer, +
|
||||
subjects_at_risk integer, +
|
||||
description text, +
|
||||
event_count integer, +
|
||||
organ_system character varying, +
|
||||
adverse_event_term character varying, +
|
||||
frequency_threshold integer, +
|
||||
assessment character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.responsible_parties +
|
||||
( +
|
||||
affiliation text, +
|
||||
nct_id character varying, +
|
||||
responsible_party_type character varying, +
|
||||
name character varying, +
|
||||
title character varying, +
|
||||
organization character varying, +
|
||||
id integer NOT NULL, +
|
||||
old_name_title character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_agreements +
|
||||
( +
|
||||
other_details text, +
|
||||
restrictive_agreement character varying, +
|
||||
restriction_type character varying, +
|
||||
agreement text, +
|
||||
pi_employee character varying, +
|
||||
nct_id character varying, +
|
||||
id integer NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_contacts +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
organization character varying, +
|
||||
name character varying, +
|
||||
phone character varying, +
|
||||
email character varying, +
|
||||
extension character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.result_groups +
|
||||
( +
|
||||
result_type character varying, +
|
||||
title character varying, +
|
||||
description text, +
|
||||
id integer NOT NULL, +
|
||||
nct_id character varying, +
|
||||
ctgov_group_code character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.retractions +
|
||||
( +
|
||||
pmid character varying, +
|
||||
id bigint NOT NULL, +
|
||||
nct_id character varying, +
|
||||
source character varying, +
|
||||
reference_id integer +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.search_results +
|
||||
( +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
nct_id character varying NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
grouping character varying NOT NULL, +
|
||||
study_search_id integer, +
|
||||
name character varying NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.sponsors +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
name character varying, +
|
||||
lead_or_collaborator character varying, +
|
||||
agency_class character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.studies +
|
||||
( +
|
||||
phase character varying, +
|
||||
delayed_posting character varying, +
|
||||
source_class character varying, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
plan_to_share_ipd_description character varying, +
|
||||
plan_to_share_ipd character varying, +
|
||||
ipd_url character varying, +
|
||||
ipd_access_criteria character varying, +
|
||||
ipd_time_frame character varying, +
|
||||
biospec_description text, +
|
||||
biospec_retention character varying, +
|
||||
is_us_export boolean, +
|
||||
is_ppsd boolean, +
|
||||
is_unapproved_device boolean, +
|
||||
is_fda_regulated_device boolean, +
|
||||
is_fda_regulated_drug boolean, +
|
||||
has_dmc boolean, +
|
||||
expanded_access_type_treatment boolean, +
|
||||
expanded_access_type_intermediate boolean, +
|
||||
expanded_access_type_individual boolean, +
|
||||
has_expanded_access boolean, +
|
||||
why_stopped character varying, +
|
||||
number_of_groups integer, +
|
||||
number_of_arms integer, +
|
||||
limitations_and_caveats character varying, +
|
||||
source character varying, +
|
||||
enrollment_type character varying, +
|
||||
enrollment integer, +
|
||||
expanded_access_nctid character varying, +
|
||||
last_known_status character varying, +
|
||||
overall_status character varying, +
|
||||
official_title text, +
|
||||
brief_title text, +
|
||||
baseline_population text, +
|
||||
acronym character varying, +
|
||||
study_type character varying, +
|
||||
target_duration character varying, +
|
||||
results_first_submitted_date date, +
|
||||
study_first_submitted_date date, +
|
||||
nlm_download_date_description character varying, +
|
||||
primary_completion_date date, +
|
||||
nct_id character varying, +
|
||||
primary_completion_date_type character varying, +
|
||||
primary_completion_month_year character varying, +
|
||||
completion_date date, +
|
||||
completion_date_type character varying, +
|
||||
completion_month_year character varying, +
|
||||
verification_date date, +
|
||||
verification_month_year character varying, +
|
||||
start_date date, +
|
||||
start_date_type character varying, +
|
||||
start_month_year character varying, +
|
||||
last_update_posted_date_type character varying, +
|
||||
last_update_posted_date date, +
|
||||
last_update_submitted_qc_date date, +
|
||||
disposition_first_posted_date_type character varying,+
|
||||
disposition_first_posted_date date, +
|
||||
disposition_first_submitted_qc_date date, +
|
||||
results_first_posted_date_type character varying, +
|
||||
results_first_posted_date date, +
|
||||
results_first_submitted_qc_date date, +
|
||||
study_first_posted_date_type character varying, +
|
||||
study_first_posted_date date, +
|
||||
study_first_submitted_qc_date date, +
|
||||
last_update_submitted_date date, +
|
||||
disposition_first_submitted_date date, +
|
||||
baseline_type_units_analyzed character varying, +
|
||||
fdaaa801_violation boolean, +
|
||||
expanded_access_status_for_nctid character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_records +
|
||||
( +
|
||||
nct_id character varying, +
|
||||
sha character varying, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
type character varying, +
|
||||
content json, +
|
||||
id bigint NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_references +
|
||||
( +
|
||||
id integer NOT NULL, +
|
||||
citation text, +
|
||||
reference_type character varying, +
|
||||
pmid character varying, +
|
||||
nct_id character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.study_searches +
|
||||
( +
|
||||
query character varying NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
beta_api boolean NOT NULL, +
|
||||
name character varying NOT NULL, +
|
||||
grouping character varying NOT NULL, +
|
||||
save_tsv boolean NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE ctgov.verifiers +
|
||||
( +
|
||||
id bigint NOT NULL, +
|
||||
created_at timestamp without time zone NOT NULL, +
|
||||
source json, +
|
||||
updated_at timestamp without time zone NOT NULL, +
|
||||
load_event_id integer, +
|
||||
last_run timestamp without time zone, +
|
||||
differences json NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE history.trial_snapshots +
|
||||
( +
|
||||
completion_date timestamp without time zone, +
|
||||
nct_id character varying(15) NOT NULL, +
|
||||
version integer NOT NULL, +
|
||||
submission_date timestamp without time zone, +
|
||||
primary_completion_date timestamp without time zone, +
|
||||
primary_completion_date_category USER-DEFINED, +
|
||||
start_date timestamp without time zone, +
|
||||
start_date_category USER-DEFINED, +
|
||||
completion_date_category USER-DEFINED, +
|
||||
overall_status USER-DEFINED, +
|
||||
enrollment integer, +
|
||||
enrollment_category USER-DEFINED, +
|
||||
sponsor character varying, +
|
||||
responsible_party character varying +
|
||||
); +
|
||||
|
||||
CREATE TABLE http.download_status +
|
||||
( +
|
||||
status USER-DEFINED NOT NULL, +
|
||||
nct_id character varying(15) NOT NULL, +
|
||||
id integer NOT NULL, +
|
||||
update_timestamp timestamp with time zone +
|
||||
); +
|
||||
|
||||
CREATE TABLE http.responses +
|
||||
( +
|
||||
nct_id character varying(15), +
|
||||
version_a smallint, +
|
||||
version_b smallint, +
|
||||
url character varying(255), +
|
||||
response_code smallint, +
|
||||
response_date timestamp with time zone, +
|
||||
id integer NOT NULL, +
|
||||
html text +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.ALLNDC_HISTORY +
|
||||
( +
|
||||
sab character varying(10), +
|
||||
ndc11_left9 character(9) NOT NULL, +
|
||||
rowid integer NOT NULL, +
|
||||
ndc character(13) NOT NULL, +
|
||||
suppress character(1), +
|
||||
edate character(6), +
|
||||
sdate character(6), +
|
||||
rxcui character(16) +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.ALLRXCUI_HISTORY +
|
||||
( +
|
||||
tty character varying(5), +
|
||||
sts character(1), +
|
||||
rxindb character(1), +
|
||||
indb character(1), +
|
||||
rowid integer NOT NULL, +
|
||||
rxcui character(16) NOT NULL, +
|
||||
sab character varying(20), +
|
||||
str character varying(3000), +
|
||||
sdate character(6), +
|
||||
edate character(6) +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.rxnorm_props +
|
||||
( +
|
||||
rxcui character(8) NOT NULL, +
|
||||
pres smallint NOT NULL, +
|
||||
propvalue1 character varying(4000) NOT NULL, +
|
||||
propname character varying(30) NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE rxnorm_migrated.rxnorm_relations +
|
||||
( +
|
||||
tty2 character(4) NOT NULL, +
|
||||
rxcui1 character(8) NOT NULL, +
|
||||
tty1 character(4) NOT NULL, +
|
||||
cvf character(4) NOT NULL, +
|
||||
rxcui2 character(8) NOT NULL +
|
||||
); +
|
||||
|
||||
CREATE TABLE spl.nsde +
|
||||
( +
|
||||
proprietary_name character varying(500), +
|
||||
package_ndc character varying(50), +
|
||||
application_number_or_citation character varying(25),+
|
||||
package_ndc11 character varying(11), +
|
||||
id integer NOT NULL, +
|
||||
reactivation_date date, +
|
||||
inactivation_date date, +
|
||||
marketing_start_date date, +
|
||||
marketing_end_date date, +
|
||||
billing_unit character varying(35), +
|
||||
dosage_form character varying(155), +
|
||||
marketing_category character varying(160), +
|
||||
product_type character varying(90) +
|
||||
); +
|
||||
|
||||
(76 rows)
|
||||
|
||||
@ -0,0 +1,415 @@
|
||||
?column?
|
||||
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||
CREATE OR REPLACE VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_conditions +
|
||||
GROUP BY browse_conditions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||
FROM ctgov.browse_interventions +
|
||||
GROUP BY browse_interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||
FROM ctgov.conditions +
|
||||
GROUP BY conditions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||
FROM ctgov.countries +
|
||||
WHERE (countries.removed IS NOT TRUE) +
|
||||
GROUP BY countries.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||
FROM ctgov.design_groups +
|
||||
GROUP BY design_groups.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||
FROM ctgov.id_information +
|
||||
GROUP BY id_information.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||
FROM ctgov.interventions +
|
||||
GROUP BY interventions.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||
FROM ctgov.keywords +
|
||||
GROUP BY keywords.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||
FROM ctgov.overall_officials +
|
||||
GROUP BY overall_officials.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||
FROM ctgov.design_outcomes +
|
||||
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||
GROUP BY design_outcomes.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||
FROM ctgov.sponsors +
|
||||
GROUP BY sponsors.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||
FROM ctgov.facilities +
|
||||
GROUP BY facilities.nct_id;
|
||||
CREATE OR REPLACE VIEW ctgov.categories AS SELECT search_results.id, +
|
||||
search_results.nct_id, +
|
||||
search_results.name, +
|
||||
search_results.created_at, +
|
||||
search_results.updated_at, +
|
||||
search_results."grouping", +
|
||||
search_results.study_search_id +
|
||||
FROM ctgov.search_results;
|
||||
CREATE OR REPLACE VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||
s.overall_status, +
|
||||
s.study_type, +
|
||||
s.official_title, +
|
||||
s.acronym, +
|
||||
s.phase, +
|
||||
s.why_stopped, +
|
||||
s.has_dmc, +
|
||||
s.enrollment, +
|
||||
s.is_fda_regulated_device, +
|
||||
s.is_fda_regulated_drug, +
|
||||
s.is_unapproved_device, +
|
||||
s.has_expanded_access, +
|
||||
s.study_first_submitted_date, +
|
||||
s.last_update_posted_date, +
|
||||
s.results_first_posted_date, +
|
||||
s.start_date, +
|
||||
s.primary_completion_date, +
|
||||
s.completion_date, +
|
||||
s.study_first_posted_date, +
|
||||
cv.number_of_facilities, +
|
||||
cv.has_single_facility, +
|
||||
cv.nlm_download_date, +
|
||||
s.number_of_arms, +
|
||||
s.number_of_groups, +
|
||||
sp.name AS lead_sponsor, +
|
||||
aid.names AS other_ids, +
|
||||
e.gender, +
|
||||
e.gender_based, +
|
||||
e.gender_description, +
|
||||
e.population, +
|
||||
e.minimum_age, +
|
||||
e.maximum_age, +
|
||||
e.criteria, +
|
||||
e.healthy_volunteers, +
|
||||
ak.names AS keywords, +
|
||||
ai.names AS interventions, +
|
||||
ac.names AS conditions, +
|
||||
d.primary_purpose, +
|
||||
d.allocation, +
|
||||
d.observational_model, +
|
||||
d.intervention_model, +
|
||||
d.masking, +
|
||||
d.subject_masked, +
|
||||
d.caregiver_masked, +
|
||||
d.investigator_masked, +
|
||||
d.outcomes_assessor_masked, +
|
||||
ado.names AS design_outcomes, +
|
||||
bs.description AS brief_summary, +
|
||||
dd.description AS detailed_description +
|
||||
FROM (((((((((((ctgov.studies s +
|
||||
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||
FROM ctgov.search_results +
|
||||
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||
CREATE OR REPLACE VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||
rp.rxcui, +
|
||||
rp.propvalue1 +
|
||||
FROM (ctgov.browse_interventions bi +
|
||||
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||
FROM history.trial_snapshots)));
|
||||
CREATE OR REPLACE VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||
t.status, +
|
||||
t.update_timestamp +
|
||||
FROM ( SELECT download_status.id, +
|
||||
download_status.nct_id, +
|
||||
download_status.status, +
|
||||
download_status.update_timestamp, +
|
||||
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||
FROM http.download_status) t +
|
||||
WHERE (t.rn = 1) +
|
||||
ORDER BY t.nct_id;
|
||||
CREATE OR REPLACE VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||
s.start_date, +
|
||||
ts.version, +
|
||||
ts.submission_date, +
|
||||
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||
FROM (ctgov.studies s +
|
||||
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||
CREATE OR REPLACE VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||
cte.version, +
|
||||
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||
cte.submission_date, +
|
||||
cte.start_deviance, +
|
||||
cte.start_date, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.overall_status, +
|
||||
ts.enrollment, +
|
||||
ts.enrollment_category +
|
||||
FROM (time_between_submission_and_start_view cte +
|
||||
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||
CREATE OR REPLACE VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||
min(cte2.rownum) AS enrollment_source +
|
||||
FROM rank_proximity_to_start_time_view cte2 +
|
||||
WHERE (cte2.enrollment IS NOT NULL) +
|
||||
GROUP BY cte2.nct_id;
|
||||
CREATE OR REPLACE VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||
SELECT DISTINCT ts.nct_id +
|
||||
FROM history.trial_snapshots ts +
|
||||
) +
|
||||
SELECT bi.nct_id, +
|
||||
bi.downcase_mesh_term, +
|
||||
rr.tty2, +
|
||||
rr.rxcui2 AS bn_or_in_cui, +
|
||||
count(*) AS count +
|
||||
FROM ((ctgov.browse_interventions bi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||
ORDER BY bi.nct_id;
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||
ah.ndc, +
|
||||
count(*) AS count +
|
||||
FROM ((match_trials_to_bn_in mttbi +
|
||||
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||
GROUP BY mttbi.nct_id, ah.ndc +
|
||||
ORDER BY mttbi.nct_id, ah.ndc;
|
||||
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||
n.application_number_or_citation, +
|
||||
min(n.marketing_start_date) AS min +
|
||||
FROM (match_trial_to_ndc11 mttn +
|
||||
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||
ORDER BY mttn.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||
b.location_id, +
|
||||
b.sex_id, +
|
||||
b.age_id, +
|
||||
b.cause_id, +
|
||||
b.metric_id, +
|
||||
b.year, +
|
||||
b.val, +
|
||||
b.upper_95, +
|
||||
b.lower_95, +
|
||||
b.key_column +
|
||||
FROM "DiseaseBurden".burdens b +
|
||||
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||
CREATE OR REPLACE VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||
c1.year, +
|
||||
c1.val AS h_sdi_val, +
|
||||
c1.upper_95 AS h_sdi_u95, +
|
||||
c1.lower_95 AS h_sdi_l95, +
|
||||
c2.val AS hm_sdi_val, +
|
||||
c2.upper_95 AS hm_sdi_u95, +
|
||||
c2.lower_95 AS hm_sdi_l95, +
|
||||
c3.val AS m_sdi_val, +
|
||||
c3.upper_95 AS m_sdi_u95, +
|
||||
c3.lower_95 AS m_sdi_l95, +
|
||||
c4.val AS lm_sdi_val, +
|
||||
c4.upper_95 AS lm_sdi_u95, +
|
||||
c4.lower_95 AS lm_sdi_l95, +
|
||||
c5.val AS l_sdi_val, +
|
||||
c5.upper_95 AS l_sdi_u95, +
|
||||
c5.lower_95 AS l_sdi_l95 +
|
||||
FROM ((((view_burdens_cte c1 +
|
||||
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||
CREATE OR REPLACE VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||
ts.primary_completion_date, +
|
||||
ts.primary_completion_date_category, +
|
||||
ts.enrollment, +
|
||||
ts.start_date, +
|
||||
ts.enrollment_category, +
|
||||
ts.overall_status, +
|
||||
min(ts.submission_date) AS earliest_date_observed +
|
||||
FROM history.trial_snapshots ts +
|
||||
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||
max(view_disbur_cte0.level) AS max_level +
|
||||
FROM view_disbur_cte0 +
|
||||
GROUP BY view_disbur_cte0.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||
tti.ui, +
|
||||
tti.condition, +
|
||||
itc.cause_text, +
|
||||
ch.cause_id, +
|
||||
ch.level +
|
||||
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||
ORDER BY tti.nct_id;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||
ttc.ui, +
|
||||
ttc.condition, +
|
||||
ttc.cause_text, +
|
||||
ttc.cause_id, +
|
||||
disbur_cte.max_level +
|
||||
FROM (view_trial_to_cause ttc +
|
||||
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||
WHERE (ttc.level = disbur_cte.max_level) +
|
||||
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||
ORDER BY ttc.nct_id, ttc.ui;
|
||||
CREATE OR REPLACE VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||
disbur_cte2.condition, +
|
||||
disbur_cte2.cause_text, +
|
||||
disbur_cte2.cause_id, +
|
||||
ic.chapter_code AS category_id, +
|
||||
ic.group_name, +
|
||||
disbur_cte2.max_level +
|
||||
FROM (view_disbur_cte2 disbur_cte2 +
|
||||
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||
WHERE (ic.level = 1);
|
||||
CREATE OR REPLACE VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||
cte.start_date, +
|
||||
cte.enrollment AS current_enrollment, +
|
||||
cte.enrollment_category, +
|
||||
cte.overall_status AS current_status, +
|
||||
cte.earliest_date_observed, +
|
||||
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||
dbc3.code, +
|
||||
dbc3.condition, +
|
||||
dbc3.cause_text, +
|
||||
dbc3.cause_id, +
|
||||
dbc3.category_id, +
|
||||
dbc3.group_name, +
|
||||
dbc3.max_level, +
|
||||
b.year, +
|
||||
b.h_sdi_val, +
|
||||
b.h_sdi_u95, +
|
||||
b.h_sdi_l95, +
|
||||
b.hm_sdi_val, +
|
||||
b.hm_sdi_u95, +
|
||||
b.hm_sdi_l95, +
|
||||
b.m_sdi_val, +
|
||||
b.m_sdi_u95, +
|
||||
b.m_sdi_l95, +
|
||||
b.lm_sdi_val, +
|
||||
b.lm_sdi_u95, +
|
||||
b.lm_sdi_l95, +
|
||||
b.l_sdi_val, +
|
||||
b.l_sdi_u95, +
|
||||
b.l_sdi_l95 +
|
||||
FROM (((view_cte cte +
|
||||
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||
CREATE OR REPLACE VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||
f.start_date, +
|
||||
f.current_enrollment, +
|
||||
f.enrollment_category, +
|
||||
f.current_status, +
|
||||
f.earliest_date_observed, +
|
||||
f.elapsed_duration, +
|
||||
f.n_brands, +
|
||||
f.code, +
|
||||
f.condition, +
|
||||
f.cause_text, +
|
||||
f.cause_id, +
|
||||
f.category_id, +
|
||||
f.group_name, +
|
||||
f.max_level, +
|
||||
f.year, +
|
||||
f.h_sdi_val, +
|
||||
f.h_sdi_u95, +
|
||||
f.h_sdi_l95, +
|
||||
f.hm_sdi_val, +
|
||||
f.hm_sdi_u95, +
|
||||
f.hm_sdi_l95, +
|
||||
f.m_sdi_val, +
|
||||
f.m_sdi_u95, +
|
||||
f.m_sdi_l95, +
|
||||
f.lm_sdi_val, +
|
||||
f.lm_sdi_u95, +
|
||||
f.lm_sdi_l95, +
|
||||
f.l_sdi_val, +
|
||||
f.l_sdi_u95, +
|
||||
f.l_sdi_l95, +
|
||||
s.overall_status AS final_status, +
|
||||
c2a.version, +
|
||||
c2a.enrollment AS planned_enrollment +
|
||||
FROM (((formatted_data f +
|
||||
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||
CREATE OR REPLACE VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||
FROM http.most_recent_download_status +
|
||||
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||
CREATE OR REPLACE VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||
do2.nct_id, +
|
||||
do2.outcome_type, +
|
||||
do2.measure, +
|
||||
do2.time_frame, +
|
||||
do2.population, +
|
||||
do2.description +
|
||||
FROM ctgov.design_outcomes do2 +
|
||||
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||
FROM formatted_data fd)));
|
||||
(40 rows)
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
import psycopg2 as psyco
|
||||
|
||||
conn = psyco.connect(dbname="aact_db", user="root", host="will-office", password="root")
|
||||
|
||||
curse = conn.cursor()
|
||||
|
||||
curse.execute("select * FROM http.responses LIMIT 100;")
|
||||
print(curse.fetchall())
|
||||
|
||||
curse.close()
|
||||
conn.close()
|
||||
@ -1,21 +0,0 @@
|
||||
DELETE FROM http.download_status;
|
||||
|
||||
INSERT INTO http.download_status (nct_id, status)
|
||||
SELECT nct_id, 'Of Interest'::http.history_download_status AS status
|
||||
FROM ctgov.studies
|
||||
WHERE
|
||||
is_fda_regulated_drug=TRUE
|
||||
AND
|
||||
study_type = 'Interventional'
|
||||
AND
|
||||
phase='Phase 3'
|
||||
AND
|
||||
overall_status in ('Terminated', 'Completed')
|
||||
AND
|
||||
start_date > '2008-01-01'
|
||||
AND
|
||||
completion_date < '2022-01-01'
|
||||
;
|
||||
|
||||
|
||||
SELECT count(*) FROM http.download_status ;
|
||||
@ -0,0 +1,55 @@
|
||||
import ollama
|
||||
|
||||
import psycopg
|
||||
from psycopg.rows import dict_row
|
||||
from typing import List, Dict
|
||||
|
||||
def fetch_all_rows(conn_params: dict) -> List[Dict]:
|
||||
# Establish a connection to the PostgreSQL database
|
||||
conn = psycopg.connect(**conn_params, row_factory=dict_row)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Define your SQL query to select all rows from the table
|
||||
sql_query = "SELECT * FROM public.primary_design_outcomes;"
|
||||
|
||||
# Execute the query
|
||||
cursor.execute(sql_query)
|
||||
|
||||
# Fetch all rows from the result set
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Close the cursor and connection
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return rows
|
||||
|
||||
# Example usage
|
||||
conn_params = {
|
||||
"dbname": "aact_db",
|
||||
"user": "root",
|
||||
"password": "root",
|
||||
"host": "localhost",
|
||||
"port": "5432"
|
||||
}
|
||||
|
||||
outcome_description = '''
|
||||
Measure: {measure}
|
||||
Time Frame: {time_frame}
|
||||
Description: {description}
|
||||
'''
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#check for model
|
||||
|
||||
#get information
|
||||
rows_dicts = fetch_all_rows(conn_params)
|
||||
|
||||
for row in rows_dicts[:3]:
|
||||
text_data = outcome_description.format(**row)
|
||||
r = ollama.generate(model='youainti/llama3.1-extractor:2024-08-28.2', prompt=text_data)
|
||||
|
||||
print(text_data)
|
||||
print(r["response"])
|
||||
|
||||
@ -0,0 +1,31 @@
|
||||
FROM llama3.1
|
||||
PARAMETER num_ctx 8192
|
||||
PARAMETER seed 11021585
|
||||
SYSTEM """
|
||||
You are a Natural Language Processor, tasked with extracting data about outcome measures from textual tables.
|
||||
You are to extract the longest observation time from the primary objectives for this clinical trial.
|
||||
I need you to distinguish between trials that have a specified period during which they track participants
|
||||
and those trials that don't.
|
||||
|
||||
Return results as JSON, with the format
|
||||
```json
|
||||
{
|
||||
"longest_observation_scalar": <number>,
|
||||
"longest_observation_unit: <string: minutes, hours, days, weeks, months, years>
|
||||
}
|
||||
```
|
||||
Do not return any other commentary.
|
||||
If the study does not have a specified end of observation, set the values to `null`.
|
||||
If the text does not appear to be related to clinical trials, return `{ null }`
|
||||
|
||||
For example:
|
||||
- 'baseline to week 3' should give: `{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }`
|
||||
- 'tracked 4 months' should give: `{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}`
|
||||
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||
- 'After day 1 to week 48' should give `{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}`
|
||||
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||
- 'baseline until death' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||
- 'progression free survival up to 4 years' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||
- 'the quick brown fox jumped over the lazy dog for one hour' should give: `{null}`
|
||||
|
||||
"""
|
||||
|
@ -0,0 +1,19 @@
|
||||
### Template:
|
||||
{
|
||||
"longest_observation_scalar": "",
|
||||
"longest_observation_unit": "",
|
||||
}
|
||||
### Examples:
|
||||
|
||||
|
||||
### Text:
|
||||
|
||||
{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }
|
||||
{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}
|
||||
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
||||
https://doi.org/10.6069/GHCW-8955
|
||||
@ -0,0 +1 @@
|
||||
,dad,home-pc,03.04.2023 15:13,file:///home/dad/.config/libreoffice/4;
|
||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,365 @@
|
||||
Cause ID,Cause Name,Parent ID,Parent Name,Level,Cause Outline,Sort Order,YLL Only,YLD Only
|
||||
294,All causes,294,All causes,0,Total,1,,
|
||||
295,"Communicable, maternal, neonatal, and nutritional diseases",294,All causes,1,A,2,,
|
||||
955,HIV/AIDS and sexually transmitted infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.1,3,,
|
||||
298,HIV/AIDS,955,HIV/AIDS and sexually transmitted infections,3,A.1.1,4,,
|
||||
948,HIV/AIDS - Drug-susceptible Tuberculosis,298,HIV/AIDS,4,A.1.1.1,5,,
|
||||
949,HIV/AIDS - Multidrug-resistant Tuberculosis without extensive drug resistance,298,HIV/AIDS,4,A.1.1.2,6,,
|
||||
950,HIV/AIDS - Extensively drug-resistant Tuberculosis,298,HIV/AIDS,4,A.1.1.3,7,,
|
||||
300,HIV/AIDS resulting in other diseases,298,HIV/AIDS,4,A.1.1.4,8,,
|
||||
393,Sexually transmitted infections excluding HIV,955,HIV/AIDS and sexually transmitted infections,3,A.1.2,9,,
|
||||
394,Syphilis,393,Sexually transmitted infections excluding HIV,4,A.1.2.1,10,,
|
||||
395,Chlamydial infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.2,11,,
|
||||
396,Gonococcal infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.3,12,,
|
||||
397,Trichomoniasis,393,Sexually transmitted infections excluding HIV,4,A.1.2.4,13,,X
|
||||
398,Genital herpes,393,Sexually transmitted infections excluding HIV,4,A.1.2.5,14,,X
|
||||
399,Other sexually transmitted infections,393,Sexually transmitted infections excluding HIV,4,A.1.2.6,15,,
|
||||
956,Respiratory infections and tuberculosis,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.2,16,,
|
||||
297,Tuberculosis,956,Respiratory infections and tuberculosis,3,A.2.1,17,,
|
||||
954,Latent tuberculosis infection,297,Tuberculosis,4,A.2.1.1,18,,X
|
||||
934,Drug-susceptible tuberculosis,297,Tuberculosis,4,A.2.1.2,19,,
|
||||
946,Multidrug-resistant tuberculosis without extensive drug resistance,297,Tuberculosis,4,A.2.1.3,20,,
|
||||
947,Extensively drug-resistant tuberculosis,297,Tuberculosis,4,A.2.1.4,21,,
|
||||
322,Lower respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.2,22,,
|
||||
328,Upper respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.3,23,,
|
||||
329,Otitis media,956,Respiratory infections and tuberculosis,3,A.2.4,24,,
|
||||
957,Enteric infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.3,25,,
|
||||
302,Diarrheal diseases,957,Enteric infections,3,A.3.1,26,,
|
||||
958,Typhoid and paratyphoid,957,Enteric infections,3,A.3.2,27,,
|
||||
319,Typhoid fever,958,Typhoid and paratyphoid,4,A.3.2.1,28,,
|
||||
320,Paratyphoid fever,958,Typhoid and paratyphoid,4,A.3.2.2,29,,
|
||||
959,Invasive Non-typhoidal Salmonella (iNTS),957,Enteric infections,3,A.3.3,30,,
|
||||
321,Other intestinal infectious diseases,957,Enteric infections,3,A.3.4,31,,
|
||||
344,Neglected tropical diseases and malaria,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.4,32,,
|
||||
345,Malaria,344,Neglected tropical diseases and malaria,3,A.4.1,33,,
|
||||
346,Chagas disease,344,Neglected tropical diseases and malaria,3,A.4.2,34,,
|
||||
347,Leishmaniasis,344,Neglected tropical diseases and malaria,3,A.4.3,35,,
|
||||
348,Visceral leishmaniasis,347,Leishmaniasis,4,A.4.3.1,36,,
|
||||
349,Cutaneous and mucocutaneous leishmaniasis,347,Leishmaniasis,4,A.4.3.2,37,,X
|
||||
350,African trypanosomiasis,344,Neglected tropical diseases and malaria,3,A.4.4,38,,
|
||||
351,Schistosomiasis,344,Neglected tropical diseases and malaria,3,A.4.5,39,,
|
||||
352,Cysticercosis,344,Neglected tropical diseases and malaria,3,A.4.6,40,,
|
||||
353,Cystic echinococcosis,344,Neglected tropical diseases and malaria,3,A.4.7,41,,
|
||||
354,Lymphatic filariasis,344,Neglected tropical diseases and malaria,3,A.4.8,42,,X
|
||||
355,Onchocerciasis,344,Neglected tropical diseases and malaria,3,A.4.9,43,,X
|
||||
356,Trachoma,344,Neglected tropical diseases and malaria,3,A.4.10,44,,X
|
||||
357,Dengue,344,Neglected tropical diseases and malaria,3,A.4.11,45,,
|
||||
358,Yellow fever,344,Neglected tropical diseases and malaria,3,A.4.12,46,,
|
||||
359,Rabies,344,Neglected tropical diseases and malaria,3,A.4.13,47,,
|
||||
360,Intestinal nematode infections,344,Neglected tropical diseases and malaria,3,A.4.14,48,,
|
||||
361,Ascariasis,360,Intestinal nematode infections,4,A.4.14.1,49,,
|
||||
362,Trichuriasis,360,Intestinal nematode infections,4,A.4.14.2,50,,X
|
||||
363,Hookworm disease,360,Intestinal nematode infections,4,A.4.14.3,51,,X
|
||||
364,Food-borne trematodiases,344,Neglected tropical diseases and malaria,3,A.4.15,52,,X
|
||||
405,Leprosy,344,Neglected tropical diseases and malaria,3,A.4.16,53,,X
|
||||
843,Ebola,344,Neglected tropical diseases and malaria,3,A.4.17,54,,
|
||||
935,Zika virus,344,Neglected tropical diseases and malaria,3,A.4.18,55,,
|
||||
936,Guinea worm disease,344,Neglected tropical diseases and malaria,3,A.4.19,56,,X
|
||||
365,Other neglected tropical diseases,344,Neglected tropical diseases and malaria,3,A.4.20,57,,
|
||||
961,Other infectious diseases,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.5,58,,
|
||||
332,Meningitis,961,Other infectious diseases,3,A.5.1,59,,
|
||||
337,Encephalitis,961,Other infectious diseases,3,A.5.2,60,,
|
||||
338,Diphtheria,961,Other infectious diseases,3,A.5.3,61,,
|
||||
339,Whooping cough,961,Other infectious diseases,3,A.5.4,62,,
|
||||
340,Tetanus,961,Other infectious diseases,3,A.5.5,63,,
|
||||
341,Measles,961,Other infectious diseases,3,A.5.6,64,,
|
||||
342,Varicella and herpes zoster,961,Other infectious diseases,3,A.5.7,65,,
|
||||
400,Acute hepatitis,961,Other infectious diseases,3,A.5.8,66,,
|
||||
401,Acute hepatitis A,400,Acute hepatitis,4,A.5.8.1,67,,
|
||||
402,Acute hepatitis B,400,Acute hepatitis,4,A.5.8.2,68,,
|
||||
403,Acute hepatitis C,400,Acute hepatitis,4,A.5.8.3,69,,
|
||||
404,Acute hepatitis E,400,Acute hepatitis,4,A.5.8.4,70,,
|
||||
408,Other unspecified infectious diseases,961,Other infectious diseases,3,A.5.9,71,,
|
||||
962,Maternal and neonatal disorders,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.6,72,,
|
||||
366,Maternal disorders,962,Maternal and neonatal disorders,3,A.6.1,73,,
|
||||
367,Maternal hemorrhage,366,Maternal disorders,4,A.6.1.1,74,,
|
||||
368,Maternal sepsis and other maternal infections,366,Maternal disorders,4,A.6.1.2,75,,
|
||||
369,Maternal hypertensive disorders,366,Maternal disorders,4,A.6.1.3,76,,
|
||||
370,Maternal obstructed labor and uterine rupture,366,Maternal disorders,4,A.6.1.4,77,,
|
||||
995,Maternal abortion and miscarriage,366,Maternal disorders,4,A.6.1.5,78,,
|
||||
374,Ectopic pregnancy,366,Maternal disorders,4,A.6.1.6,79,,
|
||||
375,Indirect maternal deaths,366,Maternal disorders,4,A.6.1.7,80,X,
|
||||
376,Late maternal deaths,366,Maternal disorders,4,A.6.1.8,81,X,
|
||||
741,Maternal deaths aggravated by HIV/AIDS,366,Maternal disorders,4,A.6.1.9,82,X,
|
||||
379,Other maternal disorders,366,Maternal disorders,4,A.6.1.10,83,,
|
||||
380,Neonatal disorders,962,Maternal and neonatal disorders,3,A.6.2,84,,
|
||||
381,Neonatal preterm birth,380,Neonatal disorders,4,A.6.2.1,85,,
|
||||
382,Neonatal encephalopathy due to birth asphyxia and trauma,380,Neonatal disorders,4,A.6.2.2,86,,
|
||||
383,Neonatal sepsis and other neonatal infections,380,Neonatal disorders,4,A.6.2.3,87,,
|
||||
384,Hemolytic disease and other neonatal jaundice,380,Neonatal disorders,4,A.6.2.4,88,,
|
||||
385,Other neonatal disorders,380,Neonatal disorders,4,A.6.2.5,89,,
|
||||
386,Nutritional deficiencies,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.7,90,,
|
||||
387,Protein-energy malnutrition,386,Nutritional deficiencies,3,A.7.1,91,,
|
||||
388,Iodine deficiency,386,Nutritional deficiencies,3,A.7.2,92,,X
|
||||
389,Vitamin A deficiency,386,Nutritional deficiencies,3,A.7.3,93,,X
|
||||
390,Dietary iron deficiency,386,Nutritional deficiencies,3,A.7.4,94,,X
|
||||
391,Other nutritional deficiencies,386,Nutritional deficiencies,3,A.7.5,95,,
|
||||
409,Non-communicable diseases,294,All causes,1,B,96,,
|
||||
410,Neoplasms,409,Non-communicable diseases,2,B.1,97,,
|
||||
444,Lip and oral cavity cancer,410,Neoplasms,3,B.1.1,98,,
|
||||
447,Nasopharynx cancer,410,Neoplasms,3,B.1.2,99,,
|
||||
450,Other pharynx cancer,410,Neoplasms,3,B.1.3,100,,
|
||||
411,Esophageal cancer,410,Neoplasms,3,B.1.4,101,,
|
||||
414,Stomach cancer,410,Neoplasms,3,B.1.5,102,,
|
||||
441,Colon and rectum cancer,410,Neoplasms,3,B.1.6,103,,
|
||||
417,Liver cancer,410,Neoplasms,3,B.1.7,104,,
|
||||
418,Liver cancer due to hepatitis B,417,Liver cancer,4,B.1.7.1,105,,
|
||||
419,Liver cancer due to hepatitis C,417,Liver cancer,4,B.1.7.2,106,,
|
||||
420,Liver cancer due to alcohol use,417,Liver cancer,4,B.1.7.3,107,,
|
||||
996,Liver cancer due to NASH,417,Liver cancer,4,B.1.7.4,108,,
|
||||
1021,Liver cancer due to other causes,417,Liver cancer,4,B.1.7.5,109,,
|
||||
453,Gallbladder and biliary tract cancer,410,Neoplasms,3,B.1.8,110,,
|
||||
456,Pancreatic cancer,410,Neoplasms,3,B.1.9,111,,
|
||||
423,Larynx cancer,410,Neoplasms,3,B.1.10,112,,
|
||||
426,"Tracheal, bronchus, and lung cancer",410,Neoplasms,3,B.1.11,113,,
|
||||
459,Malignant skin melanoma,410,Neoplasms,3,B.1.12,114,,
|
||||
462,Non-melanoma skin cancer,410,Neoplasms,3,B.1.13,115,,
|
||||
849,Non-melanoma skin cancer (squamous-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.1,116,,
|
||||
850,Non-melanoma skin cancer (basal-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.2,117,,X
|
||||
429,Breast cancer,410,Neoplasms,3,B.1.14,118,,
|
||||
432,Cervical cancer,410,Neoplasms,3,B.1.15,119,,
|
||||
435,Uterine cancer,410,Neoplasms,3,B.1.16,120,,
|
||||
465,Ovarian cancer,410,Neoplasms,3,B.1.17,121,,
|
||||
438,Prostate cancer,410,Neoplasms,3,B.1.18,122,,
|
||||
468,Testicular cancer,410,Neoplasms,3,B.1.19,123,,
|
||||
471,Kidney cancer,410,Neoplasms,3,B.1.20,124,,
|
||||
474,Bladder cancer,410,Neoplasms,3,B.1.21,125,,
|
||||
477,Brain and central nervous system cancer,410,Neoplasms,3,B.1.22,126,,
|
||||
480,Thyroid cancer,410,Neoplasms,3,B.1.23,127,,
|
||||
483,Mesothelioma,410,Neoplasms,3,B.1.24,128,,
|
||||
484,Hodgkin lymphoma,410,Neoplasms,3,B.1.25,129,,
|
||||
485,Non-Hodgkin lymphoma,410,Neoplasms,3,B.1.26,130,,
|
||||
486,Multiple myeloma,410,Neoplasms,3,B.1.27,131,,
|
||||
487,Leukemia,410,Neoplasms,3,B.1.28,132,,
|
||||
845,Acute lymphoid leukemia,487,Leukemia,4,B.1.28.1,133,,
|
||||
846,Chronic lymphoid leukemia,487,Leukemia,4,B.1.28.2,134,,
|
||||
847,Acute myeloid leukemia,487,Leukemia,4,B.1.28.3,135,,
|
||||
848,Chronic myeloid leukemia,487,Leukemia,4,B.1.28.4,136,,
|
||||
943,Other leukemia,487,Leukemia,4,B.1.28.5,137,,
|
||||
1022,Other malignant neoplasms,410,Neoplasms,3,B.1.29,138,,
|
||||
490,Other neoplasms,410,Neoplasms,3,B.1.30,139,,
|
||||
964,"Myelodysplastic, myeloproliferative, and other hematopoietic neoplasms",490,Other neoplasms,4,B.1.30.1,140,,
|
||||
965,Benign and in situ intestinal neoplasms,490,Other neoplasms,4,B.1.30.2,141,,X
|
||||
966,Benign and in situ cervical and uterine neoplasms,490,Other neoplasms,4,B.1.30.3,142,,X
|
||||
967,Other benign and in situ neoplasms,490,Other neoplasms,4,B.1.30.4,143,,X
|
||||
491,Cardiovascular diseases,409,Non-communicable diseases,2,B.2,144,,
|
||||
492,Rheumatic heart disease,491,Cardiovascular diseases,3,B.2.1,145,,
|
||||
493,Ischemic heart disease,491,Cardiovascular diseases,3,B.2.2,146,,
|
||||
494,Stroke,491,Cardiovascular diseases,3,B.2.3,147,,
|
||||
495,Ischemic stroke,494,Stroke,4,B.2.3.1,148,,
|
||||
496,Intracerebral hemorrhage,494,Stroke,4,B.2.3.2,149,,
|
||||
497,Subarachnoid hemorrhage,494,Stroke,4,B.2.3.3,150,,
|
||||
498,Hypertensive heart disease,491,Cardiovascular diseases,3,B.2.4,151,,
|
||||
504,Non-rheumatic valvular heart disease,491,Cardiovascular diseases,3,B.2.5,152,,
|
||||
968,Non-rheumatic calcific aortic valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.1,153,,
|
||||
969,Non-rheumatic degenerative mitral valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.2,154,,
|
||||
970,Other non-rheumatic valve diseases,504,Non-rheumatic valvular heart disease,4,B.2.5.3,155,,
|
||||
499,Cardiomyopathy and myocarditis,491,Cardiovascular diseases,3,B.2.6,156,,
|
||||
942,Myocarditis,499,Cardiomyopathy and myocarditis,4,B.2.6.1,157,,
|
||||
938,Alcoholic cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.2,158,,
|
||||
944,Other cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.3,159,,
|
||||
500,Atrial fibrillation and flutter,491,Cardiovascular diseases,3,B.2.8,160,,
|
||||
501,Aortic aneurysm,491,Cardiovascular diseases,3,B.2.9,161,X,
|
||||
502,Peripheral artery disease,491,Cardiovascular diseases,3,B.2.10,162,,
|
||||
503,Endocarditis,491,Cardiovascular diseases,3,B.2.11,163,,
|
||||
1023,Other cardiovascular and circulatory diseases,491,Cardiovascular diseases,3,B.2.12,164,,
|
||||
508,Chronic respiratory diseases,409,Non-communicable diseases,2,B.3,165,,
|
||||
509,Chronic obstructive pulmonary disease,508,Chronic respiratory diseases,3,B.3.1,166,,
|
||||
510,Pneumoconiosis,508,Chronic respiratory diseases,3,B.3.2,167,,
|
||||
511,Silicosis,510,Pneumoconiosis,4,B.3.2.1,168,,
|
||||
512,Asbestosis,510,Pneumoconiosis,4,B.3.2.2,169,,
|
||||
513,Coal workers pneumoconiosis,510,Pneumoconiosis,4,B.3.2.3,170,,
|
||||
514,Other pneumoconiosis,510,Pneumoconiosis,4,B.3.2.4,171,,
|
||||
515,Asthma,508,Chronic respiratory diseases,3,B.3.3,172,,
|
||||
516,Interstitial lung disease and pulmonary sarcoidosis,508,Chronic respiratory diseases,3,B.3.4,173,,
|
||||
520,Other chronic respiratory diseases,508,Chronic respiratory diseases,3,B.3.5,174,,
|
||||
526,Digestive diseases,409,Non-communicable diseases,2,B.4,175,,
|
||||
521,Cirrhosis and other chronic liver diseases,526,Digestive diseases,3,B.4.1,176,,
|
||||
522,Cirrhosis and other chronic liver diseases due to hepatitis B,521,Cirrhosis and other chronic liver diseases,4,B.4.1.1,177,,
|
||||
523,Cirrhosis and other chronic liver diseases due to hepatitis C,521,Cirrhosis and other chronic liver diseases,4,B.4.1.2,178,,
|
||||
524,Cirrhosis and other chronic liver diseases due to alcohol use,521,Cirrhosis and other chronic liver diseases,4,B.4.1.3,179,,
|
||||
971,Cirrhosis and other chronic liver diseases due to NAFLD,521,Cirrhosis and other chronic liver diseases,4,B.4.1.4,180,,
|
||||
525,Cirrhosis and other chronic liver diseases due to other causes,521,Cirrhosis and other chronic liver diseases,4,B.4.1.5,181,,
|
||||
992,Upper digestive system diseases,526,Digestive diseases,3,B.4.2,182,,
|
||||
527,Peptic ulcer disease,992,Upper digestive system diseases,4,B.4.2.1,183,,
|
||||
528,Gastritis and duodenitis,992,Upper digestive system diseases,4,B.4.2.2,184,,
|
||||
536,Gastroesophageal reflux disease,992,Upper digestive system diseases,4,B.4.2.3,185,,X
|
||||
529,Appendicitis,526,Digestive diseases,3,B.4.3,186,,
|
||||
530,Paralytic ileus and intestinal obstruction,526,Digestive diseases,3,B.4.4,187,,
|
||||
531,"Inguinal, femoral, and abdominal hernia",526,Digestive diseases,3,B.4.5,188,,
|
||||
532,Inflammatory bowel disease,526,Digestive diseases,3,B.4.6,189,,
|
||||
533,Vascular intestinal disorders,526,Digestive diseases,3,B.4.7,190,,
|
||||
534,Gallbladder and biliary diseases,526,Digestive diseases,3,B.4.8,191,,
|
||||
535,Pancreatitis,526,Digestive diseases,3,B.4.9,192,,
|
||||
541,Other digestive diseases,526,Digestive diseases,3,B.4.10,193,,
|
||||
542,Neurological disorders,409,Non-communicable diseases,2,B.5,194,,
|
||||
543,Alzheimer's disease and other dementias,542,Neurological disorders,3,B.5.1,195,,
|
||||
544,Parkinson's disease,542,Neurological disorders,3,B.5.2,196,,
|
||||
545,Idiopathic epilepsy,542,Neurological disorders,3,B.5.3,197,,
|
||||
546,Multiple sclerosis,542,Neurological disorders,3,B.5.4,198,,
|
||||
554,Motor neuron disease,542,Neurological disorders,3,B.5.5,199,,
|
||||
972,Headache disorders,542,Neurological disorders,3,B.5.6,200,,X
|
||||
547,Migraine,972,Headache disorders,4,B.5.6.1,201,,X
|
||||
548,Tension-type headache,972,Headache disorders,4,B.5.6.2,202,,X
|
||||
557,Other neurological disorders,542,Neurological disorders,3,B.5.7,203,,
|
||||
558,Mental disorders,409,Non-communicable diseases,2,B.6,204,,
|
||||
559,Schizophrenia,558,Mental disorders,3,B.6.1,205,,X
|
||||
567,Depressive disorders,558,Mental disorders,3,B.6.2,206,,X
|
||||
568,Major depressive disorder,567,Depressive disorders,4,B.6.2.1,207,,X
|
||||
569,Dysthymia,567,Depressive disorders,4,B.6.2.2,208,,X
|
||||
570,Bipolar disorder,558,Mental disorders,3,B.6.3,209,,X
|
||||
571,Anxiety disorders,558,Mental disorders,3,B.6.4,210,,X
|
||||
572,Eating disorders,558,Mental disorders,3,B.6.5,211,,
|
||||
573,Anorexia nervosa,572,Eating disorders,4,B.6.5.1,212,,
|
||||
574,Bulimia nervosa,572,Eating disorders,4,B.6.5.2,213,,
|
||||
575,Autism spectrum disorders,558,Mental disorders,3,B.6.6,214,,X
|
||||
578,Attention-deficit/hyperactivity disorder,558,Mental disorders,3,B.6.7,215,,X
|
||||
579,Conduct disorder,558,Mental disorders,3,B.6.8,216,,X
|
||||
582,Idiopathic developmental intellectual disability,558,Mental disorders,3,B.6.9,217,,X
|
||||
585,Other mental disorders,558,Mental disorders,3,B.6.10,218,,X
|
||||
973,Substance use disorders,409,Non-communicable diseases,2,B.7,219,,
|
||||
560,Alcohol use disorders,973,Substance use disorders,3,B.7.1,220,,
|
||||
561,Drug use disorders,973,Substance use disorders,3,B.7.2,221,,
|
||||
562,Opioid use disorders,561,Drug use disorders,4,B.7.2.1,222,,
|
||||
563,Cocaine use disorders,561,Drug use disorders,4,B.7.2.2,223,,
|
||||
564,Amphetamine use disorders,561,Drug use disorders,4,B.7.2.3,224,,
|
||||
565,Cannabis use disorders,561,Drug use disorders,4,B.7.2.4,225,,X
|
||||
566,Other drug use disorders,561,Drug use disorders,4,B.7.2.5,226,,
|
||||
974,Diabetes and kidney diseases,409,Non-communicable diseases,2,B.8,227,,
|
||||
587,Diabetes mellitus,974,Diabetes and kidney diseases,3,B.8.1,228,,
|
||||
975,Diabetes mellitus type 1,587,Diabetes mellitus,4,B.8.1.1,229,,
|
||||
976,Diabetes mellitus type 2,587,Diabetes mellitus,4,B.8.1.2,230,,
|
||||
589,Chronic kidney disease,974,Diabetes and kidney diseases,3,B.8.2,231,,
|
||||
997,Chronic kidney disease due to diabetes mellitus type 1,589,Chronic kidney disease,4,B.8.2.1,232,,
|
||||
998,Chronic kidney disease due to diabetes mellitus type 2,589,Chronic kidney disease,4,B.8.2.2,233,,
|
||||
591,Chronic kidney disease due to hypertension,589,Chronic kidney disease,4,B.8.2.3,234,,
|
||||
592,Chronic kidney disease due to glomerulonephritis,589,Chronic kidney disease,4,B.8.2.4,235,,
|
||||
593,Chronic kidney disease due to other and unspecified causes,589,Chronic kidney disease,4,B.8.2.5,236,,
|
||||
588,Acute glomerulonephritis,974,Diabetes and kidney diseases,3,B.8.3,237,,
|
||||
653,Skin and subcutaneous diseases,409,Non-communicable diseases,2,B.9,238,,
|
||||
654,Dermatitis,653,Skin and subcutaneous diseases,3,B.9.1,239,,X
|
||||
977,Atopic dermatitis,654,Dermatitis,4,B.9.1.1,240,,X
|
||||
978,Contact dermatitis,654,Dermatitis,4,B.9.1.2,241,,X
|
||||
979,Seborrhoeic dermatitis,654,Dermatitis,4,B.9.1.3,242,,X
|
||||
655,Psoriasis,653,Skin and subcutaneous diseases,3,B.9.2,243,,X
|
||||
980,Bacterial skin diseases,653,Skin and subcutaneous diseases,3,B.9.3,244,,
|
||||
656,Cellulitis,980,Bacterial skin diseases,4,B.9.3.1,245,,
|
||||
657,Pyoderma,980,Bacterial skin diseases,4,B.9.3.2,246,,
|
||||
658,Scabies,653,Skin and subcutaneous diseases,3,B.9.4,247,,X
|
||||
659,Fungal skin diseases,653,Skin and subcutaneous diseases,3,B.9.5,248,,X
|
||||
660,Viral skin diseases,653,Skin and subcutaneous diseases,3,B.9.6,249,,X
|
||||
661,Acne vulgaris,653,Skin and subcutaneous diseases,3,B.9.7,250,,X
|
||||
662,Alopecia areata,653,Skin and subcutaneous diseases,3,B.9.8,251,,X
|
||||
663,Pruritus,653,Skin and subcutaneous diseases,3,B.9.9,252,,X
|
||||
664,Urticaria,653,Skin and subcutaneous diseases,3,B.9.10,253,,X
|
||||
665,Decubitus ulcer,653,Skin and subcutaneous diseases,3,B.9.11,254,,
|
||||
668,Other skin and subcutaneous diseases,653,Skin and subcutaneous diseases,3,B.9.12,255,,
|
||||
669,Sense organ diseases,409,Non-communicable diseases,2,B.10,256,,X
|
||||
981,Blindness and vision loss,669,Sense organ diseases,3,B.10.1,257,,X
|
||||
670,Glaucoma,981,Blindness and vision loss,4,B.10.1.1,258,,X
|
||||
671,Cataract,981,Blindness and vision loss,4,B.10.1.2,259,,X
|
||||
672,Age-related macular degeneration,981,Blindness and vision loss,4,B.10.1.3,260,,X
|
||||
999,Refraction disorders,981,Blindness and vision loss,4,B.10.1.4,261,,X
|
||||
1000,Near vision loss,981,Blindness and vision loss,4,B.10.1.5,262,,X
|
||||
675,Other vision loss,981,Blindness and vision loss,4,B.10.1.6,263,,X
|
||||
674,Age-related and other hearing loss,669,Sense organ diseases,3,B.10.2,264,,X
|
||||
679,Other sense organ diseases,669,Sense organ diseases,3,B.10.3,265,,X
|
||||
626,Musculoskeletal disorders,409,Non-communicable diseases,2,B.11,266,,
|
||||
627,Rheumatoid arthritis,626,Musculoskeletal disorders,3,B.11.1,267,,
|
||||
628,Osteoarthritis,626,Musculoskeletal disorders,3,B.11.2,268,,X
|
||||
1014,Osteoarthritis hip,628,Osteoarthritis,4,B.11.2.1,269,,X
|
||||
1015,Osteoarthritis knee,628,Osteoarthritis,4,B.11.2.2,270,,X
|
||||
1016,Osteoarthritis hand,628,Osteoarthritis,4,B.11.2.3,271,,X
|
||||
1017,Osteoarthritis other,628,Osteoarthritis,4,B.11.2.4,272,,X
|
||||
630,Low back pain,626,Musculoskeletal disorders,3,B.11.3,273,,X
|
||||
631,Neck pain,626,Musculoskeletal disorders,3,B.11.4,274,,X
|
||||
632,Gout,626,Musculoskeletal disorders,3,B.11.5,275,,X
|
||||
639,Other musculoskeletal disorders,626,Musculoskeletal disorders,3,B.11.6,276,,
|
||||
640,Other non-communicable diseases,409,Non-communicable diseases,2,B.12,277,,
|
||||
641,Congenital birth defects,640,Other non-communicable diseases,3,B.12.1,278,,
|
||||
642,Neural tube defects,641,Congenital birth defects,4,B.12.1.1,279,,
|
||||
643,Congenital heart anomalies,641,Congenital birth defects,4,B.12.1.2,280,,
|
||||
644,Orofacial clefts,641,Congenital birth defects,4,B.12.1.3,281,,
|
||||
645,Down syndrome,641,Congenital birth defects,4,B.12.1.4,282,,
|
||||
646,Turner syndrome,641,Congenital birth defects,4,B.12.1.5,283,,X
|
||||
647,Klinefelter syndrome,641,Congenital birth defects,4,B.12.1.6,284,,X
|
||||
648,Other chromosomal abnormalities,641,Congenital birth defects,4,B.12.1.7,285,,
|
||||
649,Congenital musculoskeletal and limb anomalies,641,Congenital birth defects,4,B.12.1.8,286,,
|
||||
650,Urogenital congenital anomalies,641,Congenital birth defects,4,B.12.1.9,287,,
|
||||
651,Digestive congenital anomalies,641,Congenital birth defects,4,B.12.1.10,288,,
|
||||
652,Other congenital birth defects,641,Congenital birth defects,4,B.12.1.11,289,,
|
||||
594,Urinary diseases and male infertility,640,Other non-communicable diseases,3,B.12.2,290,,
|
||||
595,Urinary tract infections and interstitial nephritis,594,Urinary diseases and male infertility,4,B.12.2.1,291,,
|
||||
596,Urolithiasis,594,Urinary diseases and male infertility,4,B.12.2.2,292,,
|
||||
597,Benign prostatic hyperplasia,594,Urinary diseases and male infertility,4,B.12.2.3,293,,X
|
||||
598,Male infertility,594,Urinary diseases and male infertility,4,B.12.2.4,294,,X
|
||||
602,Other urinary diseases,594,Urinary diseases and male infertility,4,B.12.2.5,295,,
|
||||
603,Gynecological diseases,640,Other non-communicable diseases,3,B.12.3,296,,
|
||||
604,Uterine fibroids,603,Gynecological diseases,4,B.12.3.1,297,,
|
||||
605,Polycystic ovarian syndrome,603,Gynecological diseases,4,B.12.3.2,298,,X
|
||||
606,Female infertility,603,Gynecological diseases,4,B.12.3.3,299,,X
|
||||
607,Endometriosis,603,Gynecological diseases,4,B.12.3.4,300,,
|
||||
608,Genital prolapse,603,Gynecological diseases,4,B.12.3.5,301,,
|
||||
609,Premenstrual syndrome,603,Gynecological diseases,4,B.12.3.6,302,,X
|
||||
612,Other gynecological diseases,603,Gynecological diseases,4,B.12.3.7,303,,
|
||||
613,Hemoglobinopathies and hemolytic anemias,640,Other non-communicable diseases,3,B.12.4,304,,
|
||||
614,Thalassemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.1,305,,
|
||||
837,Thalassemias trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.2,306,,X
|
||||
615,Sickle cell disorders,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.3,307,,
|
||||
838,Sickle cell trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.4,308,,X
|
||||
616,G6PD deficiency,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.5,309,,
|
||||
839,G6PD trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.6,310,,X
|
||||
618,Other hemoglobinopathies and hemolytic anemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.7,311,,
|
||||
619,"Endocrine, metabolic, blood, and immune disorders",640,Other non-communicable diseases,3,B.12.5,312,,
|
||||
680,Oral disorders,640,Other non-communicable diseases,3,B.12.6,313,,X
|
||||
681,Caries of deciduous teeth,680,Oral disorders,4,B.12.6.1,314,,X
|
||||
682,Caries of permanent teeth,680,Oral disorders,4,B.12.6.2,315,,X
|
||||
683,Periodontal diseases,680,Oral disorders,4,B.12.6.3,316,,X
|
||||
684,Edentulism,680,Oral disorders,4,B.12.6.4,317,,X
|
||||
685,Other oral disorders,680,Oral disorders,4,B.12.6.5,318,,X
|
||||
686,Sudden infant death syndrome,640,Other non-communicable diseases,3,B.12.7,319,X,
|
||||
687,Injuries,294,All causes,1,C,320,,
|
||||
688,Transport injuries,687,Injuries,2,C.1,321,,
|
||||
689,Road injuries,688,Transport injuries,3,C.1.1,322,,
|
||||
690,Pedestrian road injuries,689,Road injuries,4,C.1.1.1,323,,
|
||||
691,Cyclist road injuries,689,Road injuries,4,C.1.1.2,324,,
|
||||
692,Motorcyclist road injuries,689,Road injuries,4,C.1.1.3,325,,
|
||||
693,Motor vehicle road injuries,689,Road injuries,4,C.1.1.4,326,,
|
||||
694,Other road injuries,689,Road injuries,4,C.1.1.5,327,,
|
||||
695,Other transport injuries,688,Transport injuries,3,C.1.2,328,,
|
||||
696,Unintentional injuries,687,Injuries,2,C.2,329,,
|
||||
697,Falls,696,Unintentional injuries,3,C.2.1,330,,
|
||||
698,Drowning,696,Unintentional injuries,3,C.2.2,331,,
|
||||
699,"Fire, heat, and hot substances",696,Unintentional injuries,3,C.2.3,332,,
|
||||
700,Poisonings,696,Unintentional injuries,3,C.2.4,333,,
|
||||
701,Poisoning by carbon monoxide,700,Poisonings,4,C.2.4.1,334,,
|
||||
703,Poisoning by other means,700,Poisonings,4,C.2.4.2,335,,
|
||||
704,Exposure to mechanical forces,696,Unintentional injuries,3,C.2.5,336,,
|
||||
705,Unintentional firearm injuries,704,Exposure to mechanical forces,4,C.2.5.1,337,,
|
||||
707,Other exposure to mechanical forces,704,Exposure to mechanical forces,4,C.2.5.2,338,,
|
||||
708,Adverse effects of medical treatment,696,Unintentional injuries,3,C.2.6,339,,
|
||||
709,Animal contact,696,Unintentional injuries,3,C.2.7,340,,
|
||||
710,Venomous animal contact,709,Animal contact,4,C.2.7.1,341,,
|
||||
711,Non-venomous animal contact,709,Animal contact,4,C.2.7.2,342,,
|
||||
712,Foreign body,696,Unintentional injuries,3,C.2.8,343,,
|
||||
713,Pulmonary aspiration and foreign body in airway,712,Foreign body,4,C.2.8.1,344,,
|
||||
714,Foreign body in eyes,712,Foreign body,4,C.2.8.2,345,,X
|
||||
715,Foreign body in other body part,712,Foreign body,4,C.2.8.3,346,,
|
||||
842,Environmental heat and cold exposure,696,Unintentional injuries,3,C.2.9,347,,
|
||||
729,Exposure to forces of nature,696,Unintentional injuries,3,C.2.10,348,,
|
||||
716,Other unintentional injuries,696,Unintentional injuries,3,C.2.11,349,,
|
||||
717,Self-harm and interpersonal violence,687,Injuries,2,C.3,350,,
|
||||
718,Self-harm,717,Self-harm and interpersonal violence,3,C.3.1,351,,
|
||||
721,Self-harm by firearm,718,Self-harm,4,C.3.1.1,352,,
|
||||
723,Self-harm by other specified means,718,Self-harm,4,C.3.1.2,353,,
|
||||
724,Interpersonal violence,717,Self-harm and interpersonal violence,3,C.3.2,354,,
|
||||
725,Physical violence by firearm,724,Interpersonal violence,4,C.3.2.1,355,,
|
||||
726,Physical violence by sharp object,724,Interpersonal violence,4,C.3.2.2,356,,
|
||||
941,Sexual violence,724,Interpersonal violence,4,C.3.2.3,357,,X
|
||||
727,Physical violence by other means,724,Interpersonal violence,4,C.3.2.4,358,,
|
||||
945,Conflict and terrorism,717,Self-harm and interpersonal violence,3,C.3.3,359,,
|
||||
854,Executions and police conflict,717,Self-harm and interpersonal violence,3,C.3.4,360,,
|
||||
1029,Total cancers,294,All causes,1,D,361,,
|
||||
1026,Total burden related to hepatitis B,294,All causes,1,E,362,,
|
||||
1027,Total burden related to hepatitis C,294,All causes,1,F,363,,
|
||||
1028,Total burden related to Non-alcoholic fatty liver disease (NAFLD),294,All causes,1,G,364,,
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue