Compare commits
75 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d0dc87a7 | 1 year ago |
|
|
1782372a45 | 1 year ago |
|
|
fc478517ac | 1 year ago |
|
|
d912408456 | 1 year ago |
|
|
2488cceebc | 1 year ago |
|
|
eca4795d2f | 1 year ago |
|
|
87074aa42b | 1 year ago |
|
|
3311159ab6 | 2 years ago |
|
|
bb374dbde9 | 2 years ago |
|
|
635cfe42d9 | 2 years ago |
|
|
79902f400a | 2 years ago |
|
|
495955170c | 2 years ago |
|
|
de3698052b | 2 years ago |
|
|
dfb041d12b | 2 years ago |
|
|
9aaf007791 | 2 years ago |
|
|
b4a3cec7e6 | 2 years ago |
|
|
211151e223 | 2 years ago |
|
|
d90539a679 | 2 years ago |
|
|
142670d08a | 2 years ago |
|
|
6a931b3a49 | 3 years ago |
|
|
1c3d749ef4 | 3 years ago |
|
|
ef68adae89 | 3 years ago |
|
|
a336fb92d9 | 3 years ago |
|
|
05a96a3a29 | 3 years ago |
|
|
256177e569 | 3 years ago |
|
|
b7290c271b | 3 years ago |
|
|
c4b8484cab | 3 years ago |
|
|
2a9b8349ba | 3 years ago |
|
|
9a718f72a0 | 3 years ago |
|
|
e88f450b8c | 3 years ago |
|
|
4643351305 | 3 years ago |
|
|
c1f9f6e528 | 3 years ago |
|
|
277b5b9bd5 | 3 years ago |
|
|
1de1ff9e4a | 3 years ago |
|
|
47996ba607 | 3 years ago |
|
|
9ac4cffe61 | 3 years ago |
|
|
5600ad932d | 3 years ago |
|
|
d1edac3c4f | 3 years ago |
|
|
e2edf1eb6b | 3 years ago |
|
|
c5f3bfcdec | 3 years ago |
|
|
12c3c69304 | 3 years ago |
|
|
29644a0ad5 | 3 years ago |
|
|
123fe3b5e4 | 3 years ago |
|
|
470dfc2611 | 3 years ago |
|
|
6876779c17 | 3 years ago |
|
|
bbdd7552a8 | 3 years ago |
|
|
4831864805 | 3 years ago |
|
|
6b5a48c77a | 3 years ago |
|
|
2aba3469d3 | 3 years ago |
|
|
ef7ed7001b | 3 years ago |
|
|
2ec314180f | 3 years ago |
|
|
f6b56da261 | 3 years ago |
|
|
ed49d8728a | 3 years ago |
|
|
39397cc224 | 3 years ago |
|
|
804a90c247 | 3 years ago |
|
|
8dbf4e8c2e | 3 years ago |
|
|
1a106a553e | 3 years ago |
|
|
f6f687fff5 | 3 years ago |
|
|
016a449258 | 3 years ago |
|
|
091fd63366 | 3 years ago |
|
|
bbf8c77e6d | 3 years ago |
|
|
4283719d3d | 3 years ago |
|
|
52f8152afd | 3 years ago |
|
|
23826fb576 | 3 years ago |
|
|
966171c840 | 3 years ago |
|
|
f5788051f7 | 3 years ago |
|
|
97af862419 | 3 years ago |
|
|
4cc4c5c99f | 3 years ago |
|
|
fa37dccfff | 3 years ago |
|
|
339a83117e | 3 years ago |
|
|
266c1c9686 | 3 years ago |
|
|
dfbd82de54 | 3 years ago |
|
|
fc38a2e92c | 3 years ago |
|
|
3eb9a4130a | 3 years ago |
|
|
ee3e37e834 | 3 years ago |
@ -0,0 +1,8 @@
|
|||||||
|
*.sql.gzip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xlsx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
containers/AACT_Reloader/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
other_data/USP[[:space:]]DC/usp_dc_pub_2023_release_2.0_updated_final.csv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
other_data/USP[[:space:]]MMG/MMG_v8.0_Alignment_File.csv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
other_data/VA[[:space:]]Formulary/PharmacyProductSystem_NationalDrugCodeExtract.csv filter=lfs diff=lfs merge=lfs -text
|
||||||
|
containers/AACT_Reloader/backup/aact_db_backup_20250106_184236.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
containers/AACT_Reloader/backup/aact_db_backup_20250107_133822.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<projectDescription>
|
||||||
|
<name>ClinicalTrialsDataProcessing</name>
|
||||||
|
<comment></comment>
|
||||||
|
<projects>
|
||||||
|
</projects>
|
||||||
|
<buildSpec>
|
||||||
|
</buildSpec>
|
||||||
|
<natures>
|
||||||
|
<nature>org.jkiss.dbeaver.DBeaverNature</nature>
|
||||||
|
</natures>
|
||||||
|
</projectDescription>
|
||||||
@ -1,26 +0,0 @@
|
|||||||
-- Create a schema handling trial history.
|
|
||||||
CREATE SCHEMA history;
|
|
||||||
|
|
||||||
--Create role for anyone who needs to both select and insert on historical data
|
|
||||||
CREATE ROLE history_writer;
|
|
||||||
GRANT CONNECT ON DATABASE aact_db to history_writer;
|
|
||||||
|
|
||||||
GRANT USAGE ON SCHEMA history TO history_writer;
|
|
||||||
|
|
||||||
GRANT INSERT,SELECT ON ALL TABLES IN SCHEMA http TO history_writer;
|
|
||||||
|
|
||||||
|
|
||||||
--Create role for anyone who only needs selection access to historical data, such as for analysis
|
|
||||||
CREATE ROLE history_reader;
|
|
||||||
GRANT CONNECT ON DATABASE aact_db to history_reader;
|
|
||||||
|
|
||||||
GRANT USAGE ON SCHEMA history TO history_reader;
|
|
||||||
|
|
||||||
GRANT SELECT ON ALL TABLES IN SCHEMA http TO history_reader;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* History Tables
|
|
||||||
Below is where I would construct the parsed trial history tables that I need.
|
|
||||||
*/
|
|
||||||
|
|
||||||
@ -1,93 +0,0 @@
|
|||||||
# Adobe Pdf Character ID (cid:\d+) parser
|
|
||||||
# The purpose is to allow someone to create their own table equivalent to the "\toUnicode" that
|
|
||||||
# should be provided in every PDF using cid's (but is often mangled).
|
|
||||||
|
|
||||||
def get_digits(string):
|
|
||||||
"""
|
|
||||||
Extract leading the digits from a cid tag.
|
|
||||||
"""
|
|
||||||
splat = string.split(")")
|
|
||||||
num = splat[0]
|
|
||||||
l = len(num)
|
|
||||||
return int(num),l
|
|
||||||
|
|
||||||
def token_generator(string):
|
|
||||||
"""
|
|
||||||
An iterable that returns tokens describing a string in a pdf.
|
|
||||||
Tokens take two forms:
|
|
||||||
- Integers: these represend CID codes
|
|
||||||
- Characters: these represent the arbitrary characters often returned amidst cid's.
|
|
||||||
|
|
||||||
It is a python generator becasue that simplifies the ordering and allows us to avoid recursion.
|
|
||||||
"""
|
|
||||||
start = 0
|
|
||||||
str_len = len(string)
|
|
||||||
|
|
||||||
while start < str_len:
|
|
||||||
substring = string[start:]
|
|
||||||
|
|
||||||
#check for cid
|
|
||||||
if (str_len - start > 6) and (substring[0:5] == "(cid:"):
|
|
||||||
|
|
||||||
num,length = get_digits(substring[5:])
|
|
||||||
start += length + 6
|
|
||||||
yield num
|
|
||||||
|
|
||||||
elif (str_len - start > 1):
|
|
||||||
start += 1
|
|
||||||
yield substring[0]
|
|
||||||
else:
|
|
||||||
start += 1
|
|
||||||
yield substring
|
|
||||||
|
|
||||||
|
|
||||||
class UnknownSymbol():
|
|
||||||
"""
|
|
||||||
Represents a token that is not in the parser's dictionary.
|
|
||||||
"""
|
|
||||||
def __init__(self, symbol):
|
|
||||||
self.symbol = symbol
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "UnknownSymbol: {} of type {}".format(self.symbol, type(self.symbol))
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "\uFFFD"
|
|
||||||
|
|
||||||
class Parser:
|
|
||||||
"""
|
|
||||||
Translates from tokens to character arrays or strings, handling errors as it goes.
|
|
||||||
|
|
||||||
It requires a dictionary during instantiation.
|
|
||||||
This dictionary is what is used to perform lookups.
|
|
||||||
|
|
||||||
It exposes 3 methods
|
|
||||||
- convert attempts to convert a single token
|
|
||||||
- convert_stream will try to convert an iterable of tokens into an iterable of text.
|
|
||||||
- check_list_of_strings will try to convert a list of strings containing cids and other symbols into
|
|
||||||
- strings, if there are no Unknown symbols.
|
|
||||||
- lists, containing characters and Unknown symbols.
|
|
||||||
"""
|
|
||||||
def __init__(self, lookup_table):
|
|
||||||
self._lookup_table = lookup_table
|
|
||||||
|
|
||||||
def convert(self,token):
|
|
||||||
try:
|
|
||||||
return self._lookup_table[token]
|
|
||||||
except:
|
|
||||||
return UnknownSymbol(token)
|
|
||||||
|
|
||||||
def convert_list(self,token_stream):
|
|
||||||
for token in token_stream:
|
|
||||||
yield self.convert(token)
|
|
||||||
|
|
||||||
def convert_list_of_strings(self, list_of_strings):
|
|
||||||
for token_stream in list_of_stings:
|
|
||||||
arr = [x for x in ob2020.convert_stream(token_generator(token_stream))]
|
|
||||||
try:
|
|
||||||
print("".join(arr))
|
|
||||||
except:
|
|
||||||
print(arr)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Plan was to accept and proceess a symbol table and text. Apparently it has not been implemented."
|
|
||||||
@ -1,371 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 18,
|
|
||||||
"id": "40358f02-c376-4431-be39-cdd477f17e7a",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import polars as pl"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 25,
|
|
||||||
"id": "8fb27ee2-72c1-4e80-9d00-de54f2834fe8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"polars.datatypes.Datetime"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 25,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"pl.datatypes.Datetime"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 55,
|
|
||||||
"id": "2c0edd77-c2d0-4184-a094-8c01783d2f0e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"products = pl.scan_csv(file=\"./EOBZIP_2022_04/products.txt\", sep=\"~\")\n",
|
|
||||||
"patents = pl.scan_csv(file=\"./EOBZIP_2022_04/patent.txt\", sep=\"~\")\n",
|
|
||||||
"exclusivity = pl.scan_csv(file=\"./EOBZIP_2022_04/exclusivity.txt\", sep=\"~\", parse_dates=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 58,
|
|
||||||
"id": "023f211d-23aa-4a2c-843d-1b60cec91079",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def set_exclusivity_types(df):\n",
|
|
||||||
" return df.with_columns([\n",
|
|
||||||
" pl.col(\"Exclusivity_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\")\n",
|
|
||||||
" ])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 61,
|
|
||||||
"id": "a1da42c9-e47a-4437-b089-e9b91f789a0c",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
|
||||||
"<thead>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Appl_Type\n",
|
|
||||||
"</th>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Appl_No\n",
|
|
||||||
"</th>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Product_No\n",
|
|
||||||
"</th>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Exclusivity_Code\n",
|
|
||||||
"</th>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Exclusivity_Date\n",
|
|
||||||
"</th>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"str\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"i64\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"i64\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"str\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"date\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"</thead>\n",
|
|
||||||
"<tbody>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"N\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"11366\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"ODE-96\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-08-07\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"N\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"20287\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"11\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"NPP\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-05-16\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"N\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"20287\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"10\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"NPP\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-05-16\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"N\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"20287\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"9\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"NPP\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-05-16\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"N\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"20287\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"8\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"\"NPP\"\n",
|
|
||||||
"</td>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-05-16\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"</tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
"shape: (5, 5)\n",
|
|
||||||
"┌───────────┬─────────┬────────────┬──────────────────┬──────────────────┐\n",
|
|
||||||
"│ Appl_Type ┆ Appl_No ┆ Product_No ┆ Exclusivity_Code ┆ Exclusivity_Date │\n",
|
|
||||||
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
||||||
"│ str ┆ i64 ┆ i64 ┆ str ┆ date │\n",
|
|
||||||
"╞═══════════╪═════════╪════════════╪══════════════════╪══════════════════╡\n",
|
|
||||||
"│ N ┆ 11366 ┆ 2 ┆ ODE-96 ┆ 2022-08-07 │\n",
|
|
||||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
||||||
"│ N ┆ 20287 ┆ 11 ┆ NPP ┆ 2022-05-16 │\n",
|
|
||||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
||||||
"│ N ┆ 20287 ┆ 10 ┆ NPP ┆ 2022-05-16 │\n",
|
|
||||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
||||||
"│ N ┆ 20287 ┆ 9 ┆ NPP ┆ 2022-05-16 │\n",
|
|
||||||
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
||||||
"│ N ┆ 20287 ┆ 8 ┆ NPP ┆ 2022-05-16 │\n",
|
|
||||||
"└───────────┴─────────┴────────────┴──────────────────┴──────────────────┘"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 61,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"exclusivity.pipe(set_exclusivity_types).head(5).collect()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 85,
|
|
||||||
"id": "92fe99fa-1963-460c-99ea-7f614b4b2e25",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def set_patent_types(df):\n",
|
|
||||||
" return df.with_columns([\n",
|
|
||||||
" pl.col(\"Patent_Expire_Date_Text\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
|
||||||
" pl.col(\"Submission_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
|
||||||
" pl.col(\"Drug_Substance_Flag\") == \"Y\",\n",
|
|
||||||
" pl.col(\"Drug_Product_Flag\") == \"Y\",\n",
|
|
||||||
" pl.col(\"Delist_Flag\") == \"Y\"\n",
|
|
||||||
" ])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 90,
|
|
||||||
"id": "13707ca6-094f-4ed7-94cb-824087e97874",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1 \"class=\"dataframe \">\n",
|
|
||||||
"<thead>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<th>\n",
|
|
||||||
"Patent_Expire_Date_Text\n",
|
|
||||||
"</th>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"date\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"</thead>\n",
|
|
||||||
"<tbody>\n",
|
|
||||||
"<tr>\n",
|
|
||||||
"<td>\n",
|
|
||||||
"2022-01-02\n",
|
|
||||||
"</td>\n",
|
|
||||||
"</tr>\n",
|
|
||||||
"</tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
"shape: (1, 1)\n",
|
|
||||||
"┌─────────────────────────┐\n",
|
|
||||||
"│ Patent_Expire_Date_Text │\n",
|
|
||||||
"│ --- │\n",
|
|
||||||
"│ date │\n",
|
|
||||||
"╞═════════════════════════╡\n",
|
|
||||||
"│ 2022-01-02 │\n",
|
|
||||||
"└─────────────────────────┘"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 90,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"patents.pipe(set_patent_types).select(\"Patent_Expire_Date_Text\").min().collect()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 81,
|
|
||||||
"id": "18ad8df7-45d5-4454-8955-c5f28a7d7f1e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"polars.datatypes.Null"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 81,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"pl.datatypes.Null"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "79e4b3d9-29ae-4302-bee1-4be02e0ba654",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.13"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
|||||||
https://www.fda.gov/media/76860/download
|
|
||||||
@ -1 +0,0 @@
|
|||||||
Most of these are related to potentially parsing orangebook data from the pdfs.
|
|
||||||
@ -1,145 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"id": "51bf48a1-920a-4e64-ac5f-323ff3a27ebf",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Will use tool 'Tesseract (sh)'\n",
|
|
||||||
"Available languages: eng, osd\n",
|
|
||||||
"Will use language 'eng'\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# Import the required libraries\n",
|
|
||||||
"from wand.image import Image\n",
|
|
||||||
"from PIL import Image as PI\n",
|
|
||||||
"import pyocr\n",
|
|
||||||
"import pyocr.builders\n",
|
|
||||||
"import io, sys\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Get the handle of the OCR library (in this case, tesseract)\n",
|
|
||||||
"tools = pyocr.get_available_tools()\n",
|
|
||||||
"if len(tools) == 0:\n",
|
|
||||||
"\tprint(\"No OCR tool found!\")\n",
|
|
||||||
"\tsys.exit(1)\n",
|
|
||||||
"tool = tools[0]\n",
|
|
||||||
"print(\"Will use tool '%s'\" % (tool.get_name()))\n",
|
|
||||||
"\n",
|
|
||||||
"# Get the language\n",
|
|
||||||
"langs = tool.get_available_languages()\n",
|
|
||||||
"print(\"Available languages: %s\" % \", \".join(langs)) \n",
|
|
||||||
"lang = langs[0] # For English\n",
|
|
||||||
"print(\"Will use language '%s'\" % (lang))\n",
|
|
||||||
"\n",
|
|
||||||
"# Setup two lists which will be used to hold our images and final_text\n",
|
|
||||||
"req_image = []\n",
|
|
||||||
"final_text = []\n",
|
|
||||||
"\n",
|
|
||||||
"# Open the PDF file using wand and convert it to jpeg\n",
|
|
||||||
"image_pdf = Image(filename=\"/home/will/research/ClinicalTrialsDataProcessing/Orangebook/Orangebooks/testprint.pdf\", resolution=300)\n",
|
|
||||||
"image_jpeg = image_pdf.convert('pdf')\n",
|
|
||||||
"\n",
|
|
||||||
"# wand has converted all the separate pages in the PDF into separate image\n",
|
|
||||||
"# blobs. We can loop over them and append them as a blob into the req_image\n",
|
|
||||||
"# list.\n",
|
|
||||||
"for img in image_jpeg.sequence:\n",
|
|
||||||
"\timg_page = Image(image=img)\n",
|
|
||||||
"\treq_image.append(img_page.make_blob('jpeg'))\n",
|
|
||||||
"\n",
|
|
||||||
"# Now we just need to run OCR over the image blobs and store all of the \n",
|
|
||||||
"# recognized text in final_text.\n",
|
|
||||||
"for img in req_image:\n",
|
|
||||||
"\ttxt = tool.image_to_string(\n",
|
|
||||||
"\t\tPI.open(io.BytesIO(img)),\n",
|
|
||||||
"\t\tlang=lang,\n",
|
|
||||||
"\t\tbuilder=pyocr.builders.TextBuilder()\n",
|
|
||||||
"\t)\n",
|
|
||||||
"\tfinal_text.append(txt)\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"id": "f0d5f1d6-7e15-4ee6-b4ee-cbd41c5afb99",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"\n",
|
|
||||||
"The final text is: \n",
|
|
||||||
"\n",
|
|
||||||
"40TH EDITION - 2020 - APPROVED DRUG PRODUCT LIST\n",
|
|
||||||
"\n",
|
|
||||||
"PRESCRIPTION DRUG PRODUCT LIST\n",
|
|
||||||
"\n",
|
|
||||||
"ABACAVIR SULFATE\n",
|
|
||||||
"SOLUTION; ORAL\n",
|
|
||||||
"ABACAVIR SULFATE\n",
|
|
||||||
"\n",
|
|
||||||
"EQ 2 5 /ML\n",
|
|
||||||
"\n",
|
|
||||||
"EQ 2 Ee /ML\n",
|
|
||||||
"\n",
|
|
||||||
"EQ 300MG BASE\n",
|
|
||||||
"EQ 300MG BASE\n",
|
|
||||||
"EQ 300MG BASE\n",
|
|
||||||
"\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"print(\"\\nThe final text is: \\n\")\n",
|
|
||||||
"print(final_text[0][0:200])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "1cac17e7-079d-4e32-bdbf-ae49194b2078",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"it appears taht this does not have the required precision. I'll need to do this some other way."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "2283e290-fab3-4cda-8ce9-55a0b3533c98",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.13"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
// Use IntelliSense to learn about possible attributes.
|
|
||||||
// Hover to view descriptions of existing attributes.
|
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python: Current File",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${file}",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"justMyCode": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,133 @@
|
|||||||
|
|
||||||
|
/* OVERVIEW
|
||||||
|
*
|
||||||
|
* This links trials to the first date each drug (indexed by NDA/ANDA etc) is
|
||||||
|
* put on the market.
|
||||||
|
*
|
||||||
|
* It takes 3 views to build up to it.
|
||||||
|
* */
|
||||||
|
|
||||||
|
--Match trials to brands and ingredients
|
||||||
|
create or replace view public.match_trials_to_bn_in as
|
||||||
|
with trialncts as (
|
||||||
|
SELECT DISTINCT nct_id FROM history.trial_snapshots TS
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bi.nct_id ,
|
||||||
|
bi.downcase_mesh_term,
|
||||||
|
rr.tty2 ,
|
||||||
|
rr.rxcui2 as bn_or_in_cui, --brand or ingredient
|
||||||
|
count(*)
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
left outer JOIN rxnorm_migrated.rxnorm_props AS rp
|
||||||
|
on bi.downcase_mesh_term = rp.propvalue1 --link names to drug cuis ()
|
||||||
|
left outer join rxnorm_migrated.rxnorm_relations rr
|
||||||
|
on rr.rxcui1 = rp.rxcui
|
||||||
|
WHERE
|
||||||
|
bi.nct_id in (
|
||||||
|
SELECT nct_id FROM trialncts
|
||||||
|
)
|
||||||
|
and
|
||||||
|
bi.mesh_type='mesh-list'
|
||||||
|
and rp.propname = 'Active_ingredient_name'
|
||||||
|
and rr.tty2 in ('BN', 'IN', 'MIN')
|
||||||
|
group by bi.nct_id, bi.downcase_mesh_term , rr.tty2 ,rr.rxcui2
|
||||||
|
order by bi.nct_id
|
||||||
|
;
|
||||||
|
--running out of space.
|
||||||
|
|
||||||
|
-- get list of interventions assoicated with trials of interest
|
||||||
|
create temp table tmp_interventions as
|
||||||
|
select * from ctgov.browse_interventions bi
|
||||||
|
where
|
||||||
|
bi.mesh_type ='mesh-list'
|
||||||
|
and
|
||||||
|
bi.nct_id in (select distinct nct_id from history.trial_snapshots)
|
||||||
|
;
|
||||||
|
select * from tmp_interventions;
|
||||||
|
|
||||||
|
--drop table tmp_join_interv_rxcui;
|
||||||
|
create temp table tmp_join_interv_rxcui as
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
tmp_interventions tint
|
||||||
|
inner join
|
||||||
|
rxnorm_migrated.rxnorm_props rp
|
||||||
|
on tint.downcase_mesh_term = rp.propvalue1
|
||||||
|
where propname='RxNorm Name'
|
||||||
|
;-- get the rxcui for ingredients
|
||||||
|
|
||||||
|
select * from tmp_join_interv_rxcui;
|
||||||
|
|
||||||
|
--filter rxcui -> is human prescribable
|
||||||
|
create temp view tmp_view_prescribable as
|
||||||
|
select count(*) from rxnorm_migrated.rxnorm_props rp
|
||||||
|
where
|
||||||
|
rp.propname = 'PRESCRIBABLE'
|
||||||
|
and
|
||||||
|
rp.propvalue1 = 'Y'
|
||||||
|
;
|
||||||
|
|
||||||
|
--link prescribable to brand ingredients or brand names.
|
||||||
|
|
||||||
|
|
||||||
|
--get relationships of IN -> BN
|
||||||
|
select *
|
||||||
|
from
|
||||||
|
rxnorm_migrated.rxnorm_relations rr
|
||||||
|
where
|
||||||
|
rr.tty1 in ('IN','MIN')
|
||||||
|
and rr.rxcui1 in (select distinct rxcui from tmp_join_interv_rxcui tjir)
|
||||||
|
and rr.tty2 = 'BN'
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
--match trials to through brands NDC11
|
||||||
|
create or replace view public.match_trial_to_ndc11 as
|
||||||
|
select
|
||||||
|
mttbi.nct_id,
|
||||||
|
ah.ndc,
|
||||||
|
count(*)
|
||||||
|
from public.match_trials_to_bn_in as mttbi
|
||||||
|
left outer join rxnorm_migrated.rxnorm_relations as rr
|
||||||
|
on mttbi.bn_or_in_cui = rr.rxcui1
|
||||||
|
left outer join rxnorm_migrated."ALLNDC_HISTORY" as ah
|
||||||
|
on rr.rxcui2 = ah.rxcui
|
||||||
|
where
|
||||||
|
rr.tty1 = 'BN'
|
||||||
|
and
|
||||||
|
rr.tty2 in ('SBD', 'BPCK')
|
||||||
|
and
|
||||||
|
ah.sab='RXNORM'
|
||||||
|
group by mttbi.nct_id, ah.ndc
|
||||||
|
order by mttbi.nct_id, ah.ndc
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
---associate trials to marketing start dates
|
||||||
|
create or replace view public.match_trial_to_marketing_start_date as
|
||||||
|
select
|
||||||
|
mttn.nct_id,
|
||||||
|
n.application_number_or_citation,
|
||||||
|
min(n.marketing_start_date )
|
||||||
|
from match_trial_to_ndc11 mttn
|
||||||
|
inner join spl.nsde n
|
||||||
|
on mttn.ndc = n.package_ndc11
|
||||||
|
where
|
||||||
|
n.product_type = 'HUMAN PRESCRIPTION DRUG'
|
||||||
|
and
|
||||||
|
n.marketing_category in ('NDA','ANDA','BLA', 'NDA authorized generic', 'NDA AUTHORIZED GENERIC')
|
||||||
|
group by mttn.nct_id,n.application_number_or_citation
|
||||||
|
order by mttn.nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
---Number of trials after a certain date
|
||||||
|
select nct_id,count(distinct application_number_or_citation)
|
||||||
|
from public.match_trial_to_marketing_start_date mttmsd
|
||||||
|
where "min" > '2012-06-01'
|
||||||
|
group by nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
backup_dir="/mnt/will/large_data/Research_large_data/ClinicalTrialsDataProcessing/containers/AACT_Reloader/backup/"
|
||||||
|
date_stamp=$(date +%Y%m%d_%H%M%S)
|
||||||
|
filename="aact_db_backup_${date_stamp}.sql"
|
||||||
|
container_name = ${1:-aact_db}
|
||||||
|
|
||||||
|
podman exec "$container_name" pg_dump -U root aact_db > "${backup_dir}/${filename}"
|
||||||
|
|
||||||
|
# Optional: compress the backup
|
||||||
|
gzip "${backup_dir}/${filename}"
|
||||||
@ -0,0 +1,117 @@
|
|||||||
|
/*
|
||||||
|
I started by creating a formularies schema,
|
||||||
|
then importing the usp - dc formulary data through DBeaver's csv import.
|
||||||
|
*/
|
||||||
|
|
||||||
|
-- DROP SCHEMA "Formularies";
|
||||||
|
|
||||||
|
CREATE SCHEMA "Formularies" AUTHORIZATION root;
|
||||||
|
|
||||||
|
-- "Formularies".usp_dc_2023 definition
|
||||||
|
|
||||||
|
-- Drop table
|
||||||
|
|
||||||
|
-- DROP TABLE "Formularies".usp_dc_2023;
|
||||||
|
|
||||||
|
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||||
|
rxcui varchar(15) NULL, --yes even though this is a number, it is represented as a string elsewhere.
|
||||||
|
tty varchar(10) NULL,
|
||||||
|
"Name" varchar(256) NULL,
|
||||||
|
"Related BN" varchar(250) NULL,
|
||||||
|
"Related DF" varchar(25050) NULL,
|
||||||
|
"USP Category" varchar(250) NULL,
|
||||||
|
"USP Class" varchar(250) NULL,
|
||||||
|
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||||
|
"API Concept" varchar(250) NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
I then linked the data back on itself with a materialized view, using claude.ai for simplicity.
|
||||||
|
|
||||||
|
Claude.ai > I need a postres sql statement to create a materialized view that will take the following table and link from a given rxcui to the other rxcui's that share the same category and class
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE "Formularies".usp_dc_2023 (
|
||||||
|
rxcui int4 NULL,
|
||||||
|
tty varchar(10) NULL,
|
||||||
|
"Name" varchar(256) NULL,
|
||||||
|
"Related BN" varchar(250) NULL,
|
||||||
|
"Related DF" varchar(25050) NULL,
|
||||||
|
"USP Category" varchar(250) NULL,
|
||||||
|
"USP Class" varchar(250) NULL,
|
||||||
|
"USP Pharmacotherapeutic Group" varchar(250) NULL,
|
||||||
|
"API Concept" varchar(250) NULL
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
It links rxcuis to other rxcuis where they have a matching USP Categories and Class
|
||||||
|
This gives alternative RXCUIs based on category an class.
|
||||||
|
*/
|
||||||
|
CREATE MATERIALIZED VIEW "Formularies".rxcui_category_class_links AS
|
||||||
|
WITH base AS (
|
||||||
|
SELECT DISTINCT
|
||||||
|
a.rxcui as source_rxcui,
|
||||||
|
b.rxcui as linked_rxcui,
|
||||||
|
a."USP Category" as category,
|
||||||
|
a."USP Class" as class
|
||||||
|
FROM "Formularies".usp_dc_2023 a
|
||||||
|
JOIN "Formularies".usp_dc_2023 b
|
||||||
|
ON a."USP Category" = b."USP Category"
|
||||||
|
AND a."USP Class" = b."USP Class"
|
||||||
|
AND a.rxcui != b.rxcui
|
||||||
|
WHERE a.rxcui IS NOT NULL
|
||||||
|
AND b.rxcui IS NOT NULL
|
||||||
|
)
|
||||||
|
SELECT * FROM base;
|
||||||
|
|
||||||
|
-- Add indexes for better query performance
|
||||||
|
CREATE INDEX ON "Formularies".rxcui_category_class_links (source_rxcui);
|
||||||
|
CREATE INDEX ON "Formularies".rxcui_category_class_links (linked_rxcui);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Next step is linking a given nct -> compounds -> formulary alternatives -> compounds -> brands/generics.
|
||||||
|
I'll' break this into two steps.
|
||||||
|
|
||||||
|
1. link formulary alternatives to compounds and brands,
|
||||||
|
2. link nct_id to formulary alternatives
|
||||||
|
*/
|
||||||
|
drop if exists materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis;
|
||||||
|
drop if exists materialized view "Formularies".rxcui_to_brand_through_uspdc cascade;
|
||||||
|
|
||||||
|
create materialized view "Formularies".rxcui_to_brand_through_uspdc AS
|
||||||
|
select distinct
|
||||||
|
rccl.source_rxcui
|
||||||
|
,rccl.linked_rxcui
|
||||||
|
,rccl.category
|
||||||
|
,rccl."class"
|
||||||
|
,rr.tty1
|
||||||
|
--,rr.tty2
|
||||||
|
,rr.rxcui2
|
||||||
|
from "Formularies".rxcui_category_class_links rccl
|
||||||
|
join rxnorm_migrated.rxnorm_relations rr on rr.rxcui1 = rccl.linked_rxcui
|
||||||
|
where rr.tty2 = 'BN'
|
||||||
|
;
|
||||||
|
|
||||||
|
/* So this one takes each RXCUI and it's associated RXCUIs from the same
|
||||||
|
category and class, and filters it down to associated RXCUI's that
|
||||||
|
represent brand names.
|
||||||
|
*/
|
||||||
|
|
||||||
|
create materialized view "Formularies".match_trial_compound_to_alternate_bn_rxcuis as
|
||||||
|
select distinct mttbi.nct_id, rtbtu.rxcui2 as brand_rxcuis
|
||||||
|
from match_trials_to_bn_in mttbi
|
||||||
|
join "Formularies".rxcui_to_brand_through_uspdc rtbtu
|
||||||
|
on mttbi.bn_or_in_cui = rtbtu.rxcui2
|
||||||
|
;
|
||||||
|
|
||||||
|
/*
|
||||||
|
This takes the list of ingredients and brands associated with a trial, and
|
||||||
|
links it to the list of alternative brand names.
|
||||||
|
*/
|
||||||
|
|
||||||
|
--renamed the view
|
||||||
|
CREATE OR REPLACE VIEW "Formularies".nct_to_brand_counts_through_uspdc
|
||||||
|
AS SELECT mtctabr.nct_id,
|
||||||
|
count(*) AS brand_name_counts
|
||||||
|
FROM "Formularies".match_trial_compound_to_alternate_bn_rxcuis mtctabr
|
||||||
|
GROUP BY mtctabr.nct_id;
|
||||||
@ -0,0 +1,100 @@
|
|||||||
|
|
||||||
|
/* How many trials were included?
|
||||||
|
* How many trial were inspected?
|
||||||
|
* How many trials were reserved for downloaded?
|
||||||
|
* How many trials didn't get included for some technical reason?
|
||||||
|
*
|
||||||
|
********* Data from 2023-03-29 ***********
|
||||||
|
Of Interest 1981
|
||||||
|
Reserved 1709 #I believe this is lower than the downloaded number because I reserved them earlier
|
||||||
|
Downloaded 1960
|
||||||
|
Incomplete 3 #there were are few http 500 and 404 codes
|
||||||
|
******************************************
|
||||||
|
* Note there were 21 missing trials of interest.
|
||||||
|
* */
|
||||||
|
select status,count(distinct nct_id) from http.download_status ds
|
||||||
|
group by status;
|
||||||
|
|
||||||
|
/* Get a list of trials
|
||||||
|
* -- There are currently 304 trials for which I was able to extract unique snapshots (2023-03-29)
|
||||||
|
* -- There are currently 1138 trials for which I was able to extract unique snapshots (2023-04-03)
|
||||||
|
* */
|
||||||
|
select count(distinct nct_id) from history.trial_snapshots ts
|
||||||
|
|
||||||
|
/* Get the number of listed conditions
|
||||||
|
* -- There are only 609 listed (MeSH classified) conditions from 284 trials(2023-03-29)
|
||||||
|
* I may need to expand how I address conditions
|
||||||
|
*/
|
||||||
|
select count(*)
|
||||||
|
from ctgov.browse_conditions bc
|
||||||
|
where
|
||||||
|
mesh_type = 'mesh-list'
|
||||||
|
and
|
||||||
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
;
|
||||||
|
|
||||||
|
select count(distinct nct_id)
|
||||||
|
from ctgov.browse_conditions bc
|
||||||
|
where
|
||||||
|
mesh_type = 'mesh-list'
|
||||||
|
and
|
||||||
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If I were to expand that to non-coded conditions that would be
|
||||||
|
* 304 trials with 398 conditions
|
||||||
|
* */
|
||||||
|
select count(distinct nct_id)
|
||||||
|
from ctgov.conditions bc
|
||||||
|
where
|
||||||
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
;
|
||||||
|
select count(*) from ctgov.conditions c
|
||||||
|
where
|
||||||
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
|
||||||
|
|
||||||
|
/* Get the number of matches from UMLS
|
||||||
|
* There are about 5,808 proposed matches.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
select count(*) from "DiseaseBurden".trial_to_icd10 tti ;
|
||||||
|
--1383 before run at 8pm 2023-03-29
|
||||||
|
--5209 at 2023-04-03T11:21
|
||||||
|
|
||||||
|
|
||||||
|
/*Get the number of trials that have links to icd10 trials*/
|
||||||
|
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
group by tti.approved;
|
||||||
|
|
||||||
|
-- finding and removing duplicates from the trial linking stuff. Useful when you redownload trials.
|
||||||
|
/*
|
||||||
|
with CTE as (
|
||||||
|
select row_number() over (partition by nct_id, "condition",ui) as rownum, *
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
)
|
||||||
|
delete from "DiseaseBurden".trial_to_icd10 tti2
|
||||||
|
where id in (
|
||||||
|
select id from cte where rownum > 1
|
||||||
|
);
|
||||||
|
*/
|
||||||
|
|
||||||
|
--get the number of completed vs terminated trials
|
||||||
|
select overall_status,count(distinct nct_id)
|
||||||
|
from ctgov.studies s
|
||||||
|
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved ='accepted' )
|
||||||
|
group by overall_status
|
||||||
|
;
|
||||||
|
|
||||||
|
select overall_status,count(distinct nct_id)
|
||||||
|
from ctgov.studies s
|
||||||
|
where nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||||
|
group by overall_status
|
||||||
|
;
|
||||||
|
|
||||||
|
select overall_status,count(distinct nct_id)
|
||||||
|
from ctgov.studies s
|
||||||
|
where nct_id in (select distinct nct_id from history.trial_snapshots ts )
|
||||||
|
group by overall_status
|
||||||
|
;
|
||||||
@ -0,0 +1,38 @@
|
|||||||
|
--TODO: Document and migrate to setup
|
||||||
|
|
||||||
|
drop table if exists "DiseaseBurden".trial_to_icd10;
|
||||||
|
drop type if exists "DiseaseBurden".validation_type;
|
||||||
|
|
||||||
|
create type "DiseaseBurden".validation_type as enum ('accepted', 'rejected', 'unmatched');
|
||||||
|
comment on type "DiseaseBurden".validation_type is 'This is used to record interactions with each type. It can be accepted (yes this should be used), rejected (no this doesn`t match), or unmatched (where non of the proposed options match)';
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE "DiseaseBurden".trial_to_icd10 (
|
||||||
|
id integer NOT NULL GENERATED ALWAYS AS IDENTITY,
|
||||||
|
nct_id varchar NOT NULL,
|
||||||
|
"condition" varchar NOT NULL,
|
||||||
|
ui varchar NULL,
|
||||||
|
uri varchar NULL,
|
||||||
|
rootsource varchar NULL,
|
||||||
|
"name" varchar NULL,
|
||||||
|
"source" varchar null,
|
||||||
|
approved "DiseaseBurden".validation_type,
|
||||||
|
approval_timestamp timestamp,
|
||||||
|
CONSTRAINT trial_to_icd10_pk PRIMARY KEY (id)
|
||||||
|
);
|
||||||
|
comment on type "DiseaseBurden".trial_to_icd10 is 'This represents potential links between trials and icd10 codes. Most of the links are both automatic and wrong.';
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
DROP TABLE if exists "DiseaseBurden".icd10_to_cause;
|
||||||
|
|
||||||
|
CREATE TABLE "DiseaseBurden".icd10_to_cause (
|
||||||
|
id SERIAL NOT NULL ,
|
||||||
|
code varchar NOT NULL,
|
||||||
|
cause_text varchar NOT NULL,
|
||||||
|
CONSTRAINT icd10_to_cause_pk PRIMARY KEY (id)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,38 @@
|
|||||||
|
SELECT
|
||||||
|
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||||
|
FROM pg_views
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema' -- Replace with your schema name
|
||||||
|
;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||||
|
FROM pg_views
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema'
|
||||||
|
;
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||||
|
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
schemaname,
|
||||||
|
tablename,
|
||||||
|
column_name || ' ' || data_type ||
|
||||||
|
CASE
|
||||||
|
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||||
|
ELSE ''
|
||||||
|
END ||
|
||||||
|
CASE
|
||||||
|
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||||
|
ELSE ''
|
||||||
|
END as column_definition
|
||||||
|
FROM pg_catalog.pg_tables t
|
||||||
|
JOIN information_schema.columns c
|
||||||
|
ON t.schemaname = c.table_schema
|
||||||
|
AND t.tablename = c.table_name
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema'-- Replace with your schema name
|
||||||
|
) t
|
||||||
|
GROUP BY schemaname, tablename;
|
||||||
@ -0,0 +1,658 @@
|
|||||||
|
create extension tablefunc;
|
||||||
|
|
||||||
|
/*Getting Trial Data all together
|
||||||
|
* There are 3 main datasets to join per trial:
|
||||||
|
*
|
||||||
|
* - Trial Data (still need to stick it together)
|
||||||
|
* - Duration and enrollment data
|
||||||
|
* - Compound Marketing (can get for any trial)
|
||||||
|
* - how many individual brands per compound at the start of the trial
|
||||||
|
* - Disease Data (can get for verified trials)
|
||||||
|
* - Population upper limit (Global Burdens of Disease)
|
||||||
|
* - Category (ICD10 2nd level groups)
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*Disease Data*/
|
||||||
|
-- ICD10 Category and GBD data
|
||||||
|
with cte as (
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
max("level") as max_level
|
||||||
|
from trial_to_cause
|
||||||
|
group by nct_id
|
||||||
|
), cte2 as (
|
||||||
|
select
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
cte.max_level
|
||||||
|
from trial_to_cause ttc
|
||||||
|
join cte
|
||||||
|
on cte.nct_id=ttc.nct_id
|
||||||
|
where ttc."level"=cte.max_level
|
||||||
|
group by
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
cte.max_level
|
||||||
|
order by nct_id,ui
|
||||||
|
), cte3 as (
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
substring(cte2.ui for 3) as code,
|
||||||
|
cte2."condition",
|
||||||
|
cte2.cause_text,
|
||||||
|
cte2.cause_id,
|
||||||
|
ic.id as category_id,
|
||||||
|
ic.group_name
|
||||||
|
from cte2
|
||||||
|
join "DiseaseBurden".icd10_categories ic
|
||||||
|
on
|
||||||
|
substring(cte2.ui for 3) <= ic.end_code
|
||||||
|
and
|
||||||
|
substring(cte2.ui for 3) >= ic.start_code
|
||||||
|
)
|
||||||
|
select nct_id, cause_id,category_id
|
||||||
|
from cte3
|
||||||
|
group by nct_id, cause_id, category_id
|
||||||
|
;
|
||||||
|
--still need to link to actual disease burdens.
|
||||||
|
|
||||||
|
/*Compound Marketing Data*/
|
||||||
|
---Number of trials after a certain date
|
||||||
|
with marketing_cte as (
|
||||||
|
select nct_id,count(distinct application_number_or_citation)
|
||||||
|
from public.match_trial_to_marketing_start_date mttmsd
|
||||||
|
where "min" > '2012-06-01'
|
||||||
|
group by nct_id
|
||||||
|
)
|
||||||
|
select * from marketing_cte
|
||||||
|
;
|
||||||
|
|
||||||
|
/*Get versions*/
|
||||||
|
/* Ignore this version
|
||||||
|
with cte1 as (
|
||||||
|
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||||
|
from history.trial_snapshots ts
|
||||||
|
where
|
||||||
|
ts.start_date < ts.submission_date
|
||||||
|
), cte2 as (
|
||||||
|
select * from cte1
|
||||||
|
group by nct_id, min_version
|
||||||
|
order by nct_id
|
||||||
|
), cte3 as (
|
||||||
|
select
|
||||||
|
ts2.nct_id,
|
||||||
|
ts2."version",
|
||||||
|
ts2.overall_status,
|
||||||
|
ts2.submission_date,
|
||||||
|
ts2.start_date,
|
||||||
|
ts2.enrollment,
|
||||||
|
ts2.enrollment_category,
|
||||||
|
ts2.primary_completion_date,
|
||||||
|
ts2.primary_completion_date_category ,
|
||||||
|
--mv.nct_id,
|
||||||
|
mv.min_version
|
||||||
|
from history.trial_snapshots ts2
|
||||||
|
join cte2 mv
|
||||||
|
on mv.nct_id = ts2.nct_id
|
||||||
|
where
|
||||||
|
ts2."version" = mv.min_version
|
||||||
|
order by ts2.nct_id
|
||||||
|
), cte4 as (
|
||||||
|
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||||
|
from cte3
|
||||||
|
)
|
||||||
|
select avg(submission_presecence)
|
||||||
|
from cte4
|
||||||
|
;
|
||||||
|
--avg 61 day difference
|
||||||
|
*/
|
||||||
|
|
||||||
|
--use this version
|
||||||
|
with cte1 as ( --get trials
|
||||||
|
select nct_id,max("version") over (partition by nct_id) as min_version
|
||||||
|
from history.trial_snapshots ts
|
||||||
|
where
|
||||||
|
ts.start_date > ts.submission_date
|
||||||
|
), cte2 as ( --
|
||||||
|
select * from cte1
|
||||||
|
group by nct_id, min_version
|
||||||
|
order by nct_id
|
||||||
|
), cte3 as (
|
||||||
|
select
|
||||||
|
ts2.nct_id,
|
||||||
|
ts2."version",
|
||||||
|
ts2.overall_status,
|
||||||
|
ts2.submission_date,
|
||||||
|
ts2.start_date,
|
||||||
|
ts2.enrollment,
|
||||||
|
ts2.enrollment_category,
|
||||||
|
ts2.primary_completion_date,
|
||||||
|
ts2.primary_completion_date_category ,
|
||||||
|
--mv.nct_id,
|
||||||
|
mv.min_version
|
||||||
|
from history.trial_snapshots ts2
|
||||||
|
join cte2 mv
|
||||||
|
on mv.nct_id = ts2.nct_id
|
||||||
|
where
|
||||||
|
ts2."version" = mv.min_version
|
||||||
|
order by ts2.nct_id
|
||||||
|
)
|
||||||
|
select *
|
||||||
|
from cte3
|
||||||
|
where
|
||||||
|
enrollment is null
|
||||||
|
or enrollment_category is null
|
||||||
|
or primary_completion_date is null
|
||||||
|
or primary_completion_date_category is null
|
||||||
|
or start_date is null
|
||||||
|
/*, cte4 as (
|
||||||
|
select cte3.nct_id, cte3.submission_date - cte3.start_date as submission_presecence
|
||||||
|
from cte3
|
||||||
|
)
|
||||||
|
select avg(submission_presecence)
|
||||||
|
from cte4
|
||||||
|
; -- -33 day difference on average
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with cte1_min as (
|
||||||
|
select nct_id,min("version") over (partition by nct_id) as min_version
|
||||||
|
from history.trial_snapshots ts
|
||||||
|
where
|
||||||
|
ts.start_date <= ts.submission_date
|
||||||
|
),cte1_max as (
|
||||||
|
select nct_id,max("version") over (partition by nct_id) as max_version
|
||||||
|
from history.trial_snapshots ts
|
||||||
|
where
|
||||||
|
ts.start_date >= ts.submission_date
|
||||||
|
), cte2_min as (
|
||||||
|
select * from cte1_min
|
||||||
|
group by nct_id, min_version
|
||||||
|
), cte2_max as (
|
||||||
|
select * from cte1_max
|
||||||
|
group by nct_id, max_version
|
||||||
|
)
|
||||||
|
select *
|
||||||
|
from cte2_min
|
||||||
|
join cte2_max
|
||||||
|
on cte2_min.nct_id=cte2_max.nct_id
|
||||||
|
where min_version >= max_version
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* Neet to take a different tack in filling out the is version of the data.
|
||||||
|
* The idea is that we need the latest of each major category
|
||||||
|
* before the start date.
|
||||||
|
* */
|
||||||
|
|
||||||
|
--get the set of trials which have
|
||||||
|
with cte as (
|
||||||
|
/* Get the absolute difference between the start date and the
|
||||||
|
* submission_date for each version of the trial (measured in days)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
select
|
||||||
|
s.nct_id,
|
||||||
|
s.start_date,
|
||||||
|
ts."version",
|
||||||
|
ts.submission_date,
|
||||||
|
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||||
|
from ctgov.studies s
|
||||||
|
join history.trial_snapshots ts
|
||||||
|
on s.nct_id = ts.nct_id
|
||||||
|
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||||
|
),cte2 as (
|
||||||
|
/* Rank each version based on it's proximity to the start date
|
||||||
|
* */
|
||||||
|
select
|
||||||
|
cte.nct_id,
|
||||||
|
cte."version",
|
||||||
|
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||||
|
cte.submission_date,
|
||||||
|
cte.start_deviance,
|
||||||
|
cte.start_date,
|
||||||
|
ts.primary_completion_date ,
|
||||||
|
ts.primary_completion_date_category ,
|
||||||
|
ts.overall_status ,
|
||||||
|
ts.enrollment ,
|
||||||
|
ts.enrollment_category
|
||||||
|
from cte
|
||||||
|
join history.trial_snapshots ts
|
||||||
|
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||||
|
), cte3_primary_completion as (
|
||||||
|
/* for each trial
|
||||||
|
* select the version with a filled out primary_completion_source
|
||||||
|
* that is closest to the start date.
|
||||||
|
* */
|
||||||
|
select cte2.nct_id, min(cte2.rownum) as primary_completion_source
|
||||||
|
from cte2
|
||||||
|
where cte2.primary_completion_date is not null
|
||||||
|
group by cte2.nct_id
|
||||||
|
), cte3_enrollment as (
|
||||||
|
/* for each trial
|
||||||
|
* select the version with a filled out enrollment
|
||||||
|
* that is closest to the start date.
|
||||||
|
* */
|
||||||
|
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||||
|
from cte2
|
||||||
|
where cte2.enrollment is not null
|
||||||
|
group by cte2.nct_id
|
||||||
|
), cte4 as (
|
||||||
|
/* join the best options together to get the data of interest.
|
||||||
|
*
|
||||||
|
* On further inspection there are just a view of those, with
|
||||||
|
* many of them having a 7+ month difference between the two versions.
|
||||||
|
* I think I am going to drop them.
|
||||||
|
* */
|
||||||
|
select
|
||||||
|
c3e.nct_id,
|
||||||
|
--c2a.submission_date as submission_date_a,
|
||||||
|
--c2b.submission_date as submission_date_b,
|
||||||
|
--c3e.enrollment_source,
|
||||||
|
c2a."version" as version_a,
|
||||||
|
c2a.enrollment,
|
||||||
|
c2a.enrollment_category,
|
||||||
|
--c3p.primary_completion_source ,
|
||||||
|
c2b."version" as version_b,
|
||||||
|
c2b.primary_completion_date,
|
||||||
|
c2b.primary_completion_date_category
|
||||||
|
from cte3_enrollment c3e
|
||||||
|
join cte2 c2a
|
||||||
|
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||||
|
join cte3_primary_completion c3p
|
||||||
|
on c3e.nct_id = c3p.nct_id
|
||||||
|
join cte2 c2b
|
||||||
|
on c3p.nct_id=c2b.nct_id and c3p.primary_completion_source = c2b.rownum
|
||||||
|
), cte5 as (
|
||||||
|
select nct_id
|
||||||
|
from cte4 where version_a != version_b
|
||||||
|
)
|
||||||
|
select
|
||||||
|
c.nct_id,
|
||||||
|
s2.overall_status,
|
||||||
|
c.enrollment as planned_enrollment,
|
||||||
|
s2.enrollment,
|
||||||
|
s2.start_date,
|
||||||
|
c.primary_completion_date as planned_primary_completion_date,
|
||||||
|
s2.primary_completion_date,
|
||||||
|
extract(epoch from c.primary_completion_date - s2.start_date)/(24*60*60) as planned_duration,
|
||||||
|
s2.primary_completion_date - s2.start_date as actual_duration
|
||||||
|
from cte4 c
|
||||||
|
join ctgov.studies s2
|
||||||
|
on c.nct_id = s2.nct_id
|
||||||
|
where c.nct_id not in (select nct_id from cte5)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Concern about causal inference
|
||||||
|
*
|
||||||
|
* When putting the data together for CBO it looked like we got occasional updates about
|
||||||
|
* the status of trials that included enrollment updates.
|
||||||
|
* That doesn't appear to be the case, but that messes with the ability to causally identify
|
||||||
|
* any results. I need to be careful about this data is used.
|
||||||
|
*
|
||||||
|
* I created the statements below to get the data that I need.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
----get the set of trial snapshots
|
||||||
|
create or replace view public.view_cte as
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
primary_completion_date,
|
||||||
|
primary_completion_date_category,
|
||||||
|
enrollment,
|
||||||
|
start_date,
|
||||||
|
enrollment_category ,
|
||||||
|
overall_status,
|
||||||
|
--count("version"),
|
||||||
|
min(submission_date) as earliest_date_observed
|
||||||
|
from history.trial_snapshots ts
|
||||||
|
where
|
||||||
|
nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti where tti.approved='accepted')
|
||||||
|
and submission_date >= start_date
|
||||||
|
and overall_status not in ('Completed','Terminated')
|
||||||
|
group by
|
||||||
|
nct_id,
|
||||||
|
primary_completion_date,
|
||||||
|
primary_completion_date_category,
|
||||||
|
start_date,
|
||||||
|
enrollment,
|
||||||
|
enrollment_category ,
|
||||||
|
overall_status
|
||||||
|
;
|
||||||
|
create or replace view public.view_disbur_cte0 as
|
||||||
|
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
join "DiseaseBurden".icd10_to_cause itc
|
||||||
|
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||||
|
join "DiseaseBurden".cause_hierarchy ch
|
||||||
|
on itc.cause_text = ch.cause_name
|
||||||
|
where
|
||||||
|
tti.approved = 'accepted'
|
||||||
|
;
|
||||||
|
create or replace view public.view_trial_to_cause as
|
||||||
|
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
join "DiseaseBurden".icd10_to_cause itc
|
||||||
|
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||||
|
join "DiseaseBurden".cause_hierarchy ch
|
||||||
|
on itc.cause_text = ch.cause_name
|
||||||
|
where
|
||||||
|
tti.approved = 'accepted'
|
||||||
|
order by nct_id
|
||||||
|
;--does this duplicate the view above?
|
||||||
|
|
||||||
|
create or replace view public.view_disbur_cte as
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
max("level") as max_level
|
||||||
|
from view_disbur_cte0
|
||||||
|
group by nct_id
|
||||||
|
|
||||||
|
;
|
||||||
|
create or replace view public.view_disbur_cte2 as
|
||||||
|
select
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
disbur_cte.max_level
|
||||||
|
from view_trial_to_cause ttc
|
||||||
|
join view_disbur_cte as disbur_cte
|
||||||
|
on disbur_cte.nct_id=ttc.nct_id
|
||||||
|
where ttc."level"=disbur_cte.max_level
|
||||||
|
group by
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
disbur_cte.max_level
|
||||||
|
order by nct_id,ui
|
||||||
|
;
|
||||||
|
create or replace view public.view_disbur_cte3 as
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
substring(disbur_cte2.ui for 3) as code,
|
||||||
|
disbur_cte2."condition",
|
||||||
|
disbur_cte2.cause_text,
|
||||||
|
disbur_cte2.cause_id,
|
||||||
|
ic.chapter_code as category_id,
|
||||||
|
ic.group_name,
|
||||||
|
disbur_cte2.max_level
|
||||||
|
from view_disbur_cte2 as disbur_cte2
|
||||||
|
join "DiseaseBurden".icd10_categories ic
|
||||||
|
on
|
||||||
|
substring(disbur_cte2.ui for 3) <= ic.end_code
|
||||||
|
and
|
||||||
|
substring(disbur_cte2.ui for 3) >= ic.start_code
|
||||||
|
where ic."level" = 1
|
||||||
|
|
||||||
|
;
|
||||||
|
create or replace view public.view_burdens_cte as
|
||||||
|
select *
|
||||||
|
from "DiseaseBurden".burdens b
|
||||||
|
where b.sex_id = 3 --both sexes
|
||||||
|
and b.metric_id = 1 --number/count
|
||||||
|
and b.measure_id = 2 --DALYs
|
||||||
|
and b.age_id =22 --all ages
|
||||||
|
;
|
||||||
|
create or replace view public.view_burdens_cte2 as
|
||||||
|
select
|
||||||
|
--c1.location_id,
|
||||||
|
c1.cause_id,
|
||||||
|
c1."year",
|
||||||
|
--high sdi
|
||||||
|
c1.val as h_sdi_val,
|
||||||
|
c1.upper_95 as h_sdi_u95,
|
||||||
|
c1.lower_95 as h_sdi_l95,
|
||||||
|
--high-middle sdi
|
||||||
|
c2.val as hm_sdi_val,
|
||||||
|
c2.upper_95 as hm_sdi_u95,
|
||||||
|
c2.lower_95 as hm_sdi_l95,
|
||||||
|
--middle sdi
|
||||||
|
c3.val as m_sdi_val,
|
||||||
|
c3.upper_95 as m_sdi_u95,
|
||||||
|
c3.lower_95 as m_sdi_l95,
|
||||||
|
--low-middle sdi
|
||||||
|
c4.val as lm_sdi_val,
|
||||||
|
c4.upper_95 as lm_sdi_u95,
|
||||||
|
c4.lower_95 as lm_sdi_l95,
|
||||||
|
--low sdi
|
||||||
|
c5.val as l_sdi_val,
|
||||||
|
c5.upper_95 as l_sdi_u95,
|
||||||
|
c5.lower_95 as l_sdi_l95
|
||||||
|
from view_burdens_cte c1
|
||||||
|
join view_burdens_cte c2
|
||||||
|
on c1.cause_id = c2.cause_id
|
||||||
|
and c1."year" = c2."year"
|
||||||
|
join view_burdens_cte c3
|
||||||
|
on c1.cause_id = c3.cause_id
|
||||||
|
and c1."year" = c3."year"
|
||||||
|
join view_burdens_cte c4
|
||||||
|
on c1.cause_id = c4.cause_id
|
||||||
|
and c1."year" = c4."year"
|
||||||
|
join view_burdens_cte c5
|
||||||
|
on c1.cause_id = c5.cause_id
|
||||||
|
and c1."year" = c5."year"
|
||||||
|
where c1.location_id = 44635
|
||||||
|
and c2.location_id = 44634
|
||||||
|
and c3.location_id = 44639
|
||||||
|
and c4.location_id = 44636
|
||||||
|
and c5.location_id = 44637
|
||||||
|
;
|
||||||
|
--drop view if exists public.formatted_data cascade;
|
||||||
|
create or replace view public.formatted_data as
|
||||||
|
select
|
||||||
|
cte.nct_id,
|
||||||
|
cte.start_date,
|
||||||
|
cte.enrollment as current_enrollment,
|
||||||
|
cte.enrollment_category,
|
||||||
|
cte.overall_status as current_status,
|
||||||
|
cte.earliest_date_observed,
|
||||||
|
extract( epoch from (cte.earliest_date_observed - cte.start_date))/extract( epoch from (cte.primary_completion_date - cte.start_date)) as elapsed_duration
|
||||||
|
,count(distinct mttmsd."application_number_or_citation") as n_brands
|
||||||
|
,dbc3.code
|
||||||
|
,dbc3."condition"
|
||||||
|
,dbc3.cause_text
|
||||||
|
,dbc3.cause_id
|
||||||
|
,dbc3.category_id
|
||||||
|
,dbc3.group_name
|
||||||
|
,dbc3.max_level
|
||||||
|
--c1.location_id,
|
||||||
|
--,b.cause_id
|
||||||
|
,b."year",
|
||||||
|
--high sdi
|
||||||
|
b.h_sdi_val,
|
||||||
|
b.h_sdi_u95,
|
||||||
|
b.h_sdi_l95,
|
||||||
|
--high-middle sdi
|
||||||
|
b.hm_sdi_val,
|
||||||
|
b.hm_sdi_u95,
|
||||||
|
b.hm_sdi_l95,
|
||||||
|
--middle sdi
|
||||||
|
b.m_sdi_val,
|
||||||
|
b.m_sdi_u95,
|
||||||
|
b.m_sdi_l95,
|
||||||
|
--low-middle sdi
|
||||||
|
b.lm_sdi_val,
|
||||||
|
b.lm_sdi_u95,
|
||||||
|
b.lm_sdi_l95,
|
||||||
|
--low sdi
|
||||||
|
b.l_sdi_val,
|
||||||
|
b.l_sdi_u95,
|
||||||
|
b.l_sdi_l95
|
||||||
|
from view_cte as cte
|
||||||
|
join public.match_trial_to_marketing_start_date mttmsd
|
||||||
|
on cte.nct_id = mttmsd."nct_id"
|
||||||
|
join view_disbur_cte3 dbc3
|
||||||
|
on dbc3.nct_id = cte.nct_id
|
||||||
|
join view_burdens_cte2 b
|
||||||
|
on b.cause_id = dbc3.cause_id and extract(year from b."year") = extract(year from cte.earliest_date_observed)
|
||||||
|
where
|
||||||
|
mttmsd."min" <= cte.earliest_date_observed
|
||||||
|
group by
|
||||||
|
cte.nct_id,
|
||||||
|
cte.start_date,
|
||||||
|
cte.enrollment,
|
||||||
|
cte.enrollment_category,
|
||||||
|
cte.overall_status,
|
||||||
|
cte.earliest_date_observed,
|
||||||
|
elapsed_duration
|
||||||
|
,dbc3.code
|
||||||
|
,dbc3."condition"
|
||||||
|
,dbc3.cause_text
|
||||||
|
,dbc3.cause_id
|
||||||
|
,dbc3.category_id
|
||||||
|
,dbc3.group_name
|
||||||
|
,dbc3.max_level
|
||||||
|
--c1.location_id,
|
||||||
|
,b.cause_id,
|
||||||
|
b."year",
|
||||||
|
--high sdi
|
||||||
|
b.h_sdi_val,
|
||||||
|
b.h_sdi_u95,
|
||||||
|
b.h_sdi_l95,
|
||||||
|
--high-middle sdi
|
||||||
|
b.hm_sdi_val,
|
||||||
|
b.hm_sdi_u95,
|
||||||
|
b.hm_sdi_l95,
|
||||||
|
--middle sdi
|
||||||
|
b.m_sdi_val,
|
||||||
|
b.m_sdi_u95,
|
||||||
|
b.m_sdi_l95,
|
||||||
|
--low-middle sdi
|
||||||
|
b.lm_sdi_val,
|
||||||
|
b.lm_sdi_u95,
|
||||||
|
b.lm_sdi_l95,
|
||||||
|
--low sdi
|
||||||
|
b.l_sdi_val,
|
||||||
|
b.l_sdi_u95,
|
||||||
|
b.l_sdi_l95
|
||||||
|
order by cte.nct_id ,cte.earliest_date_observed
|
||||||
|
;--used this one 2023-04-05
|
||||||
|
--get the planned enrollment
|
||||||
|
create or replace view public.time_between_submission_and_start_view as
|
||||||
|
/* Get the absolute difference between the start date and the
|
||||||
|
* submission_date for each version of the trial (measured in days)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
select
|
||||||
|
s.nct_id,
|
||||||
|
s.start_date,
|
||||||
|
ts."version",
|
||||||
|
ts.submission_date,
|
||||||
|
abs(extract(epoch from ts.submission_date - s.start_date)::float/(24*60*60)) as start_deviance
|
||||||
|
from ctgov.studies s
|
||||||
|
join history.trial_snapshots ts
|
||||||
|
on s.nct_id = ts.nct_id
|
||||||
|
where s.nct_id in (select distinct nct_id from "DiseaseBurden".trial_to_icd10 tti)
|
||||||
|
;
|
||||||
|
create or replace view rank_proximity_to_start_time_view as
|
||||||
|
/* Rank each version based on it's proximity to the start date
|
||||||
|
* */
|
||||||
|
select
|
||||||
|
cte.nct_id,
|
||||||
|
cte."version",
|
||||||
|
row_number() over (partition by cte.nct_id order by cte.start_deviance) as rownum,
|
||||||
|
cte.submission_date,
|
||||||
|
cte.start_deviance,
|
||||||
|
cte.start_date,
|
||||||
|
ts.primary_completion_date ,
|
||||||
|
ts.primary_completion_date_category ,
|
||||||
|
ts.overall_status ,
|
||||||
|
ts.enrollment ,
|
||||||
|
ts.enrollment_category
|
||||||
|
from public.time_between_submission_and_start_view cte
|
||||||
|
join history.trial_snapshots ts
|
||||||
|
on cte.nct_id=ts.nct_id and cte."version"=ts."version"
|
||||||
|
;
|
||||||
|
create or replace view enrollment_closest_to_start_view as
|
||||||
|
/* for each trial
|
||||||
|
* select the version with a filled out enrollment
|
||||||
|
* that is closest to the start date.
|
||||||
|
* */
|
||||||
|
select cte2.nct_id, min(cte2.rownum) as enrollment_source
|
||||||
|
from rank_proximity_to_start_time_view cte2
|
||||||
|
where cte2.enrollment is not null
|
||||||
|
group by cte2.nct_id
|
||||||
|
;
|
||||||
|
--drop view public.formatted_data_with_planned_enrollment ;
|
||||||
|
create or replace view formatted_data_with_planned_enrollment as
|
||||||
|
select
|
||||||
|
f.*,
|
||||||
|
s.overall_status as final_status,
|
||||||
|
c2a."version",
|
||||||
|
c2a.enrollment as planned_enrollment
|
||||||
|
from formatted_data f
|
||||||
|
join ctgov.studies s
|
||||||
|
on f.nct_id = s.nct_id
|
||||||
|
join enrollment_closest_to_start_view c3e
|
||||||
|
on c3e.nct_id = f.nct_id
|
||||||
|
join rank_proximity_to_start_time_view c2a
|
||||||
|
on c3e.nct_id = c2a.nct_id and c3e.enrollment_source = c2a.rownum
|
||||||
|
;
|
||||||
|
select * from formatted_data_with_planned_enrollment
|
||||||
|
|
||||||
|
-------------------GET COUNTS------------------
|
||||||
|
select count(distinct nct_id) from public.view_cte; --88
|
||||||
|
select count(distinct nct_id) from public.view_disbur_cte0; --130
|
||||||
|
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||||
|
select count(distinct nct_id) from public.view_disbur_cte;--130
|
||||||
|
select count(distinct nct_id) from public.view_disbur_cte2;--130
|
||||||
|
select count(distinct nct_id) from public.view_disbur_cte3;--130
|
||||||
|
select count(distinct nct_id) from public.formatted_data; --48 probably because there are so many trials that don't fall into a GBD category/cause
|
||||||
|
select count(distinct nct_id) from public.time_between_submission_and_start_view;--1067
|
||||||
|
select count(distinct nct_id) from rank_proximity_to_start_time_view;--1067
|
||||||
|
select count(distinct nct_id) from enrollment_closest_to_start_view;--1067
|
||||||
|
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
select count(distinct nct_id) from public.view_trial_to_cause; --130
|
||||||
|
select count(distinct nct_id) from formatted_data_with_planned_enrollment;--48
|
||||||
|
|
||||||
|
|
||||||
|
--get durations and count snapshots per trial per trial
|
||||||
|
with cte1 as (
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
start_date ,
|
||||||
|
primary_completion_date,
|
||||||
|
overall_status ,
|
||||||
|
primary_completion_date - start_date as duration
|
||||||
|
from ctgov.studies s
|
||||||
|
where nct_id in (select distinct nct_id from http.download_status ds)
|
||||||
|
), cte2 as (
|
||||||
|
select nct_id,count(*) as snapshot_count from formatted_data_with_planned_enrollment fdwpe
|
||||||
|
group by nct_id
|
||||||
|
)
|
||||||
|
select a.nct_id, a.overall_status, a.duration,b.snapshot_count
|
||||||
|
from cte1 as a
|
||||||
|
join cte2 as b
|
||||||
|
on a.nct_id=b.nct_id
|
||||||
|
;
|
||||||
@ -0,0 +1,104 @@
|
|||||||
|
select * from "DiseaseBurden".icd10_to_cause itc ;
|
||||||
|
select * from "DiseaseBurden".cause c ;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
select c.id, count(distinct code)
|
||||||
|
from "DiseaseBurden".cause c
|
||||||
|
join "DiseaseBurden".icd10_to_cause itc
|
||||||
|
on c.cause = itc.cause_text
|
||||||
|
group by c.id
|
||||||
|
order by c.id
|
||||||
|
;
|
||||||
|
|
||||||
|
select tti.approved,count(distinct nct_id) from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
group by tti.approved;
|
||||||
|
|
||||||
|
select nct_id, "condition", ui
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
where tti.approved = 'accepted';
|
||||||
|
|
||||||
|
drop view trial_to_cause;
|
||||||
|
|
||||||
|
---Link trials to their causes
|
||||||
|
|
||||||
|
create temp view trial_to_cause as
|
||||||
|
select tti.nct_id, tti.ui , tti."condition",itc.cause_text, ch.cause_id, ch."level"
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
join "DiseaseBurden".icd10_to_cause itc
|
||||||
|
on replace(REPLACE(tti.ui,'-',''),'.','') = replace(REPLACE(itc.code ,'-',''),'.','')
|
||||||
|
join "DiseaseBurden".cause_hierarchy ch
|
||||||
|
on itc.cause_text = ch.cause_name
|
||||||
|
where
|
||||||
|
tti.approved = 'accepted'
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
select distinct nct_id, count(*), min("level"), max("level")
|
||||||
|
from trial_to_cause ttc
|
||||||
|
group by nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
select nct_id,cause_text,cause_id from trial_to_cause
|
||||||
|
where level = 3
|
||||||
|
group by nct_id,cause_text,cause_id
|
||||||
|
order by cause_id
|
||||||
|
;
|
||||||
|
|
||||||
|
select cause_id,"condition",cause_text,count(distinct nct_id) as c
|
||||||
|
from trial_to_cause
|
||||||
|
where level >= 3
|
||||||
|
group by cause_id,"condition",cause_text
|
||||||
|
--having count(distinct nct_id) > 2
|
||||||
|
order by cause_id
|
||||||
|
;
|
||||||
|
|
||||||
|
with cte as (
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
max("level") as max_level
|
||||||
|
from trial_to_cause
|
||||||
|
group by nct_id
|
||||||
|
), cte2 as (
|
||||||
|
select
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
cte.max_level
|
||||||
|
from trial_to_cause ttc
|
||||||
|
join cte
|
||||||
|
on cte.nct_id=ttc.nct_id
|
||||||
|
where ttc."level"=cte.max_level
|
||||||
|
group by
|
||||||
|
ttc.nct_id,
|
||||||
|
ttc.ui,
|
||||||
|
ttc."condition",
|
||||||
|
ttc.cause_text,
|
||||||
|
ttc.cause_id,
|
||||||
|
cte.max_level
|
||||||
|
order by nct_id,ui
|
||||||
|
), cte3 as (
|
||||||
|
select
|
||||||
|
nct_id,
|
||||||
|
substring(cte2.ui for 3) as code,
|
||||||
|
cte2."condition",
|
||||||
|
cte2.cause_text,
|
||||||
|
cte2.cause_id,
|
||||||
|
ic.id as category_id,
|
||||||
|
ic.group_name
|
||||||
|
from cte2
|
||||||
|
join "DiseaseBurden".icd10_categories ic
|
||||||
|
on
|
||||||
|
substring(cte2.ui for 3) <= ic.end_code
|
||||||
|
and
|
||||||
|
substring(cte2.ui for 3) >= ic.start_code
|
||||||
|
)
|
||||||
|
select nct_id, cause_id,category_id
|
||||||
|
from cte3
|
||||||
|
group by nct_id, cause_id, category_id
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,83 @@
|
|||||||
|
|
||||||
|
drop view if exists public.match_trial_to_marketing_start_date;
|
||||||
|
DROP VIEW if exists public.match_trial_to_ndc11;
|
||||||
|
drop view if exists public.match_trials_to_bn_in;
|
||||||
|
|
||||||
|
drop view if exists history.match_drugs_to_trials;
|
||||||
|
DROP TABLE IF EXISTS history.trial_snapshots;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS history.trial_snapshots
|
||||||
|
(
|
||||||
|
nct_id character varying(15) COLLATE pg_catalog."default" NOT NULL,
|
||||||
|
version integer NOT NULL,
|
||||||
|
submission_date timestamp without time zone,
|
||||||
|
primary_completion_date timestamp without time zone,
|
||||||
|
primary_completion_date_category history.updatable_catetories,
|
||||||
|
start_date timestamp without time zone,
|
||||||
|
start_date_category history.updatable_catetories,
|
||||||
|
completion_date timestamp without time zone,
|
||||||
|
completion_date_category history.updatable_catetories,
|
||||||
|
overall_status history.study_statuses,
|
||||||
|
enrollment integer,
|
||||||
|
enrollment_category history.updatable_catetories,
|
||||||
|
sponsor character varying COLLATE pg_catalog."default",
|
||||||
|
responsible_party character varying COLLATE pg_catalog."default",
|
||||||
|
CONSTRAINT trial_snapshots_pkey PRIMARY KEY (nct_id, version)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
ALTER TABLE IF EXISTS history.trial_snapshots
|
||||||
|
OWNER to root;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW history.match_drugs_to_trials
|
||||||
|
AS SELECT bi.nct_id,
|
||||||
|
rp.rxcui,
|
||||||
|
rp.propvalue1
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||||
|
WHERE rp.propname::text = 'RxNorm Name'::text AND (bi.nct_id::text IN ( SELECT trial_snapshots.nct_id
|
||||||
|
FROM history.trial_snapshots));
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW public.match_trials_to_bn_in
|
||||||
|
AS WITH trialncts AS (
|
||||||
|
SELECT DISTINCT ts.nct_id
|
||||||
|
FROM history.trial_snapshots ts
|
||||||
|
)
|
||||||
|
SELECT bi.nct_id,
|
||||||
|
bi.downcase_mesh_term,
|
||||||
|
rr.tty2,
|
||||||
|
rr.rxcui2 AS bn_or_in_cui,
|
||||||
|
count(*) AS count
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||||
|
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||||
|
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))
|
||||||
|
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||||
|
ORDER BY bi.nct_id;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW public.match_trial_to_ndc11
|
||||||
|
AS SELECT mttbi.nct_id,
|
||||||
|
ah.ndc,
|
||||||
|
count(*) AS count
|
||||||
|
FROM match_trials_to_bn_in mttbi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON mttbi.bn_or_in_cui = rr.rxcui1
|
||||||
|
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON rr.rxcui2 = ah.rxcui
|
||||||
|
WHERE rr.tty1 = 'BN'::bpchar AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ah.sab::text = 'RXNORM'::text
|
||||||
|
GROUP BY mttbi.nct_id, ah.ndc
|
||||||
|
ORDER BY mttbi.nct_id, ah.ndc;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date
|
||||||
|
AS SELECT mttn.nct_id,
|
||||||
|
n.application_number_or_citation,
|
||||||
|
min(n.marketing_start_date) AS min
|
||||||
|
FROM match_trial_to_ndc11 mttn
|
||||||
|
JOIN spl.nsde n ON mttn.ndc = n.package_ndc11::bpchar
|
||||||
|
WHERE n.product_type::text = 'HUMAN PRESCRIPTION DRUG'::text AND (n.marketing_category::text = ANY (ARRAY['NDA'::character varying, 'ANDA'::character varying, 'BLA'::character varying, 'NDA authorized generic'::character varying, 'NDA AUTHORIZED GENERIC'::character varying]::text[]))
|
||||||
|
GROUP BY mttn.nct_id, n.application_number_or_citation
|
||||||
|
ORDER BY mttn.nct_id;
|
||||||
|
|
||||||
@ -0,0 +1,308 @@
|
|||||||
|
select * from formatted_data_with_planned_enrollment fdwpe
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
select * from formatted_data_mat fdm
|
||||||
|
;
|
||||||
|
|
||||||
|
select count(distinct condition ) from formatted_data_mat fdm
|
||||||
|
|
||||||
|
select nct_id, fdm.current_status , count(*)
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id , fdm.current_status
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
select * from formatted_data_mat fdm ;
|
||||||
|
|
||||||
|
|
||||||
|
-- group with trial split
|
||||||
|
with cte as (
|
||||||
|
select nct_id
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id
|
||||||
|
having count(distinct current_status) > 1
|
||||||
|
order by nct_id
|
||||||
|
)
|
||||||
|
select
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte on cte.nct_id = fdm.nct_id
|
||||||
|
group by
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
order by nct_id , earliest_date_observed
|
||||||
|
;
|
||||||
|
|
||||||
|
select count(distinct category_id ) from
|
||||||
|
|
||||||
|
|
||||||
|
select distinct category_id from formatted_data_mat fdm
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- group with trial split
|
||||||
|
with cte as (
|
||||||
|
select nct_id
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
group by nct_id
|
||||||
|
having count(distinct current_status) > 1
|
||||||
|
order by nct_id
|
||||||
|
)
|
||||||
|
select
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte on cte.nct_id = fdm.nct_id
|
||||||
|
group by
|
||||||
|
fdm.nct_id
|
||||||
|
, current_status
|
||||||
|
, earliest_date_observed
|
||||||
|
, elapsed_duration
|
||||||
|
, n_brands
|
||||||
|
, category_id
|
||||||
|
, h_sdi_val
|
||||||
|
, h_sdi_u95
|
||||||
|
, h_sdi_l95
|
||||||
|
, hm_sdi_val
|
||||||
|
, hm_sdi_u95
|
||||||
|
, hm_sdi_l95
|
||||||
|
, m_sdi_val
|
||||||
|
, m_sdi_u95
|
||||||
|
, m_sdi_l95
|
||||||
|
, lm_sdi_val
|
||||||
|
, lm_sdi_u95
|
||||||
|
, lm_sdi_l95
|
||||||
|
, l_sdi_val
|
||||||
|
, l_sdi_u95
|
||||||
|
, l_sdi_l95
|
||||||
|
order by nct_id , earliest_date_observed
|
||||||
|
; --TODO: join to usp dc dataset
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
WITH trialncts AS (
|
||||||
|
SELECT DISTINCT ts.nct_id
|
||||||
|
FROM history.trial_snapshots ts
|
||||||
|
), nct_to_cui AS (
|
||||||
|
SELECT bi.nct_id,
|
||||||
|
bi.downcase_mesh_term,
|
||||||
|
rr.tty2,
|
||||||
|
rr.rxcui2 AS approved_drug_rxcui,
|
||||||
|
count(*) AS count
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui
|
||||||
|
WHERE (bi.nct_id::text IN ( SELECT trialncts.nct_id
|
||||||
|
FROM trialncts)) AND bi.mesh_type::text = 'mesh-list'::text AND rp.propname::text = 'Active_ingredient_name'::text AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text]))
|
||||||
|
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2
|
||||||
|
)
|
||||||
|
SELECT nct_to_cui.nct_id,
|
||||||
|
ud."USP Category",
|
||||||
|
ud."USP Class"
|
||||||
|
FROM nct_to_cui
|
||||||
|
JOIN "Formularies".usp_dc ud ON ud.rxcui::bpchar = nct_to_cui.approved_drug_rxcui
|
||||||
|
GROUP BY nct_to_cui.nct_id, ud."USP Category", ud."USP Class"
|
||||||
|
ORDER BY nct_to_cui.nct_id;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE MATERIALIZED VIEW "Formularies".nct_to_brands_through_uspdc
|
||||||
|
AS
|
||||||
|
WITH trialncts AS (
|
||||||
|
SELECT DISTINCT ts.nct_id
|
||||||
|
FROM history.trial_snapshots ts
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
bi.nct_id,
|
||||||
|
count( distinct rr2.rxcui2 ) as brand_name_count
|
||||||
|
FROM ctgov.browse_interventions bi
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON bi.downcase_mesh_term::text = rp.propvalue1::text --match mesh terms to rxcui
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON rr.rxcui1 = rp.rxcui -- match rxcui to relations between rxcuis
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr2 ON rr.rxcui2 = rr2.rxcui1 -- match rxcui to relations between rxcuis
|
||||||
|
WHERE
|
||||||
|
(bi.nct_id::text IN (SELECT trialncts.nct_id FROM trialncts)) --check the nct_id is in our list
|
||||||
|
AND
|
||||||
|
bi.mesh_type::text = 'mesh-list'::text --we are only looking at mesh "list" rxcuis
|
||||||
|
AND rp.propname::text = 'Active_ingredient_name'::text --and we only care about active ingredients linked to \/\/\/\/\/
|
||||||
|
AND (rr.tty2::text = ANY (ARRAY['BPCK'::text, 'SCD'::text, 'SBD'::text, 'GPCK'::text])) --and we are linking from active ingredients ^^^^ to branded packs
|
||||||
|
AND (rr2.tty2::text = 'BN') --and from branded packs back to brand names
|
||||||
|
GROUP BY bi.nct_id --remove duplicates
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
select
|
||||||
|
fdqpe.nct_id
|
||||||
|
--,fdqpe.start_date
|
||||||
|
--,fdqpe.current_enrollment
|
||||||
|
--,fdqpe.enrollment_category
|
||||||
|
,fdqpe.current_status
|
||||||
|
,fdqpe.earliest_date_observed
|
||||||
|
,fdqpe.elapsed_duration
|
||||||
|
,fdqpe.n_brands as identical_brands
|
||||||
|
,ntbtu.brand_name_count
|
||||||
|
,fdqpe.category_id
|
||||||
|
,fdqpe.final_status
|
||||||
|
,fdqpe.h_sdi_val
|
||||||
|
--,fdqpe.h_sdi_u95
|
||||||
|
--,fdqpe.h_sdi_l95
|
||||||
|
,fdqpe.hm_sdi_val
|
||||||
|
--,fdqpe.hm_sdi_u95
|
||||||
|
--,fdqpe.hm_sdi_l95
|
||||||
|
,fdqpe.m_sdi_val
|
||||||
|
--,fdqpe.m_sdi_u95
|
||||||
|
--,fdqpe.m_sdi_l95
|
||||||
|
,fdqpe.lm_sdi_val
|
||||||
|
--,fdqpe.lm_sdi_u95
|
||||||
|
--,fdqpe.lm_sdi_l95
|
||||||
|
,fdqpe.l_sdi_val
|
||||||
|
--,fdqpe.l_sdi_u95
|
||||||
|
--,fdqpe.l_sdi_l95
|
||||||
|
from formatted_data_mat fdqpe
|
||||||
|
join "Formularies".nct_to_brands_through_uspdc ntbtu
|
||||||
|
on fdqpe.nct_id = ntbtu.nct_id
|
||||||
|
;
|
||||||
|
|
||||||
|
--example of multiple reopenings
|
||||||
|
select *
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where nct_id = 'NCT01239797'
|
||||||
|
|
||||||
|
--attempt to automatically find transition periods
|
||||||
|
with cte1 as (
|
||||||
|
select nct_id, min(earliest_date_observed) over (partition by nct_id) as earliest_closed_enrollment
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where current_status = 'Active, not recruiting'
|
||||||
|
), cte2 as (
|
||||||
|
select nct_id, max(earliest_date_observed) over (partition by nct_id) latest_open_enrollment
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
where current_status != 'Active, not recruiting'
|
||||||
|
)
|
||||||
|
select
|
||||||
|
cte1.nct_id
|
||||||
|
,cte1.earliest_closed_enrollment
|
||||||
|
,cte2.latest_open_enrollment
|
||||||
|
,cte1.earliest_closed_enrollment - cte2.latest_open_enrollment
|
||||||
|
from cte1
|
||||||
|
join cte2 on cte1.nct_id = cte2.nct_id
|
||||||
|
/*group by
|
||||||
|
cte1.nct_id
|
||||||
|
,cte1.earliest_closed_enrollment
|
||||||
|
,cte2.latest_open_enrollment
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* So ocassionally a study reopens enrollment.
|
||||||
|
* If that didn't happen, then I could just find the first enrollment matching X and/or last enrollment matching Y
|
||||||
|
* to get the transitions
|
||||||
|
* Instead I need to create shifts of statuses between snapshots, and then remove all of those that did not change.
|
||||||
|
*
|
||||||
|
* Better yet, just get the last shift to ANR.
|
||||||
|
* */
|
||||||
|
|
||||||
|
|
||||||
|
/* Take each entry and get the status from a lagged snapshot
|
||||||
|
* Then select each snapshot moving from previous_state to ANR
|
||||||
|
* and filter out everything except the last one.
|
||||||
|
* */
|
||||||
|
with cte as (
|
||||||
|
select
|
||||||
|
nct_id
|
||||||
|
,lag(current_status, 1) over (partition by nct_id order by earliest_date_observed) as previous_status
|
||||||
|
,current_status
|
||||||
|
,earliest_date_observed as date_current
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
), cte2 as (
|
||||||
|
select
|
||||||
|
nct_id
|
||||||
|
,previous_status
|
||||||
|
,current_status
|
||||||
|
,max(date_current) as date_current_max
|
||||||
|
from cte
|
||||||
|
where
|
||||||
|
previous_status != current_status
|
||||||
|
and
|
||||||
|
current_status = 'Active, not recruiting'
|
||||||
|
group by
|
||||||
|
nct_id
|
||||||
|
,previous_status
|
||||||
|
,current_status
|
||||||
|
,date_current
|
||||||
|
)
|
||||||
|
select *
|
||||||
|
from formatted_data_mat fdm
|
||||||
|
join cte2
|
||||||
|
on cte2.nct_id = fdm.nct_id
|
||||||
|
and cte2.date_current_max = fdm.earliest_date_observed
|
||||||
|
; --join back into
|
||||||
|
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -x
|
||||||
|
|
||||||
|
# Uses
|
||||||
|
#
|
||||||
|
# Defauls
|
||||||
|
if [[ $# -lt 1 ]]; then
|
||||||
|
echo "Usage: pg_export container_name [database_name] [username]"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
CONTAINER=$1
|
||||||
|
DBNAME=${2:-aact_db}
|
||||||
|
USER=${3:-root}
|
||||||
|
|
||||||
|
#
|
||||||
|
# for sqlfile in ../export/export_data_*.sql; do
|
||||||
|
# if [[ -f "$sqlfile" ]]; then
|
||||||
|
# outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||||
|
# # podman exec -t "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f - < "$sqlfile" > "$outfile"
|
||||||
|
# # podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f "$sqlfile" > "$outfile"
|
||||||
|
# podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||||
|
# podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -t -A -f /tmp/query.sql > "$outfile"
|
||||||
|
# fi
|
||||||
|
# done
|
||||||
|
#
|
||||||
|
|
||||||
|
for sqlfile in ../export/export_data_*.sql; do
|
||||||
|
if [[ -f "$sqlfile" ]]; then
|
||||||
|
outfile="../export/output_$(date -I)_$(basename ${sqlfile%.sql}).sql"
|
||||||
|
podman cp "$sqlfile" "$CONTAINER":/tmp/query.sql
|
||||||
|
podman exec "$CONTAINER" psql -U "$USER" -d "$DBNAME" -f "/tmp/query.sql" > "$outfile"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
/***************CREATE VIEWS*******************/
|
||||||
|
create or replace view
|
||||||
|
history.match_drugs_to_trials as
|
||||||
|
select nct_id, rxcui, propvalue1
|
||||||
|
from
|
||||||
|
ctgov.browse_interventions as bi
|
||||||
|
join
|
||||||
|
rxnorm_migrated.rxnorm_props as rp
|
||||||
|
on bi.downcase_mesh_term = rp.propvalue1
|
||||||
|
where
|
||||||
|
propname='RxNorm Name'
|
||||||
|
and
|
||||||
|
nct_id in (select nct_id from history.trial_snapshots)
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
/********************IN DEVLEOPMENT*********************/
|
||||||
|
|
||||||
|
/* Get the count of brand names attached to each trial
|
||||||
|
* I should develop this into a view that matches trials to brands
|
||||||
|
* then create a view that gets the counts.
|
||||||
|
*/
|
||||||
|
select rxcui1,count(rxcui2) from rxnorm_migrated.rxnorm_relations rr
|
||||||
|
where
|
||||||
|
rxcui1 in (select rxcui from history.match_drugs_to_trials)
|
||||||
|
and
|
||||||
|
tty2 = 'BN'
|
||||||
|
group by rxcui1
|
||||||
|
order by count(rxcui2) desc
|
||||||
|
;
|
||||||
@ -0,0 +1,3 @@
|
|||||||
|
# TODO
|
||||||
|
|
||||||
|
Code up a data extraction tool that uses llama3 or a similar quality source to extract the data that I need from the extended aact_database
|
||||||
@ -1,44 +0,0 @@
|
|||||||
SELECT why_stopped FROM ctgov.studies
|
|
||||||
WHERE why_stopped IS NOT NULL
|
|
||||||
LIMIT 100;
|
|
||||||
|
|
||||||
SELECT study_type, count(*) from ctgov.studies
|
|
||||||
group by study_type;
|
|
||||||
|
|
||||||
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
|
|
||||||
GROUP BY is_fda_regulated_drug;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Note that there is a decent number of trials that have expanded access
|
|
||||||
*/
|
|
||||||
SELECT
|
|
||||||
study_type
|
|
||||||
, phase
|
|
||||||
, has_expanded_access
|
|
||||||
, has_dmc
|
|
||||||
, count(*)
|
|
||||||
FROM ctgov.studies
|
|
||||||
WHERE
|
|
||||||
is_fda_regulated_drug is true
|
|
||||||
AND
|
|
||||||
study_type = 'Interventional'
|
|
||||||
AND
|
|
||||||
start_date > date('2007-01-01')
|
|
||||||
group by study_type, phase, has_expanded_access, has_dmc;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
Find different mesh terms as assigned by clinicaltrials.gov
|
|
||||||
*/
|
|
||||||
select * from ctgov.browse_conditions
|
|
||||||
order by nct_id desc,mesh_type
|
|
||||||
limit 200;
|
|
||||||
|
|
||||||
select * from ctgov.browse_interventions
|
|
||||||
order by nct_id desc
|
|
||||||
limit 200;
|
|
||||||
|
|
||||||
@ -1,48 +0,0 @@
|
|||||||
import psycopg2 as psyco
|
|
||||||
import pandas as pd
|
|
||||||
import nltk
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
from sklearn.cluster import KMeans
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def preporcess_text(text):
|
|
||||||
|
|
||||||
text = text.lower()
|
|
||||||
text = re.sub("[^A-Za-z]+", " ", text)
|
|
||||||
#make tokens
|
|
||||||
tokens = nltk.word_tokenize(text)
|
|
||||||
|
|
||||||
#remove stopwords
|
|
||||||
tokens = [ w for w in tokens if not w in stopwords.words("english")]
|
|
||||||
|
|
||||||
#rejoin
|
|
||||||
return " ".join(tokens).strip()
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
conn = psyco.connect(dbname="aact_db", user="analysis", host="localhost", password="test")
|
|
||||||
|
|
||||||
curse = conn.cursor()
|
|
||||||
|
|
||||||
curse.execute("SELECT why_stopped FROM ctgov.studies WHERE why_stopped IS NOT NULL LIMIT 2000;")
|
|
||||||
results = curse.fetchall()
|
|
||||||
|
|
||||||
curse.close()
|
|
||||||
conn.close()
|
|
||||||
|
|
||||||
data = pd.DataFrame(results, columns = ["corpus"])
|
|
||||||
data["cleaned"] = data.corpus.apply(preporcess_text)
|
|
||||||
|
|
||||||
vectorizer = TfidfVectorizer(sublinear_tf=True)
|
|
||||||
|
|
||||||
X = vectorizer.fit_transform(data.cleaned)
|
|
||||||
|
|
||||||
kmeans = KMeans(n_clusters=10, random_state=11021585)
|
|
||||||
kmeans.fit(X)
|
|
||||||
|
|
||||||
data["cluster"] = kmeans.labels_
|
|
||||||
|
|
||||||
print(data.groupby(["cluster"])["cleaned"].count())
|
|
||||||
|
|
||||||
|
|
||||||
@ -1 +0,0 @@
|
|||||||
I believe this is for a ml classification or reasons for terminations.
|
|
||||||
@ -0,0 +1 @@
|
|||||||
|
backup/2023-09-06_aactdb_with_matches.sql.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
@ -0,0 +1,42 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
RESTORE_DUMP_GZ="${1:-aact_db_backup_20250107_133822.sql.gz}"
|
||||||
|
POSTGRES_USER=root
|
||||||
|
POSTGRES_PASSWORD=root
|
||||||
|
POSTGRES_DB=aact_db
|
||||||
|
|
||||||
|
CONTAINER_NAME="${POSTGRES_DB}-restored-$(date -I)"
|
||||||
|
|
||||||
|
#start container
|
||||||
|
podman run \
|
||||||
|
-e POSTGRES_PASSWORD="${POSTGRES_PASSWORD}" \
|
||||||
|
-e POSTGRES_USER="${POSTGRES_USER}" \
|
||||||
|
-e POSTGRES_DB="${POSTGRES_DB}" \
|
||||||
|
--name "${CONTAINER_NAME}" \
|
||||||
|
--detach \
|
||||||
|
--network research-network \
|
||||||
|
--shm-size=512mb \
|
||||||
|
--volume ./backup/:/backup/ \
|
||||||
|
-p 5432:5432\
|
||||||
|
postgres:14-alpine
|
||||||
|
|
||||||
|
|
||||||
|
sleep 10
|
||||||
|
|
||||||
|
# Function to check if PostgreSQL is ready
|
||||||
|
function check_postgres {
|
||||||
|
podman exec -i "${CONTAINER_NAME}" psql -h localhost -U "${POSTGRES_USER}" -d "${POSTGRES_DB}" -c '\q' > /dev/null 2>&1
|
||||||
|
}
|
||||||
|
|
||||||
|
# Wait for PostgreSQL to be ready
|
||||||
|
until check_postgres; do
|
||||||
|
echo "Waiting for PostgreSQL to be ready..."
|
||||||
|
sleep 4
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "PostgreSQL is ready. Restoring the database..."
|
||||||
|
|
||||||
|
# Decompress the dump file and restore it to the database
|
||||||
|
podman exec -i "${CONTAINER_NAME}" sh -c "gunzip -c /backup/${RESTORE_DUMP_GZ} | psql -h localhost -U ${POSTGRES_USER} -d ${POSTGRES_DB}"
|
||||||
|
|
||||||
|
echo "Database restoration complete."
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,40 @@
|
|||||||
|
CREATE SCHEMA spl AUTHORIZATION root;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS spl.nsde;
|
||||||
|
|
||||||
|
CREATE SEQUENCE IF NOT EXISTS spl.nsde_id_seq
|
||||||
|
INCREMENT 1
|
||||||
|
START 1
|
||||||
|
MINVALUE 1
|
||||||
|
MAXVALUE 9223372036854775807
|
||||||
|
CACHE 1;
|
||||||
|
|
||||||
|
ALTER SEQUENCE spl.nsde_id_seq
|
||||||
|
OWNER TO root;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS spl.nsde
|
||||||
|
(
|
||||||
|
id integer NOT NULL DEFAULT nextval('spl.nsde_id_seq'::regclass),
|
||||||
|
package_ndc11 character varying(11) COLLATE pg_catalog."default",
|
||||||
|
application_number_or_citation character varying(25) COLLATE pg_catalog."default",
|
||||||
|
package_ndc character varying(50) COLLATE pg_catalog."default",
|
||||||
|
proprietary_name character varying(500) COLLATE pg_catalog."default",
|
||||||
|
product_type character varying(90) COLLATE pg_catalog."default",
|
||||||
|
marketing_category character varying(160) COLLATE pg_catalog."default",
|
||||||
|
dosage_form character varying(155) COLLATE pg_catalog."default",
|
||||||
|
billing_unit character varying(35) COLLATE pg_catalog."default",
|
||||||
|
marketing_start_date date,
|
||||||
|
marketing_end_date date,
|
||||||
|
inactivation_date date,
|
||||||
|
reactivation_date date,
|
||||||
|
CONSTRAINT nsde_pkey PRIMARY KEY (id)
|
||||||
|
)
|
||||||
|
|
||||||
|
TABLESPACE pg_default;
|
||||||
|
|
||||||
|
ALTER TABLE IF EXISTS spl.nsde
|
||||||
|
OWNER to root;
|
||||||
|
|
||||||
|
-- if the table is dropped, the sequence is as well
|
||||||
|
ALTER SEQUENCE spl.nsde_id_seq
|
||||||
|
OWNED BY spl.nsde.id;
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
-- Create a schema handling trial history.
|
||||||
|
CREATE SCHEMA rxnorm_migrated;
|
||||||
|
|
||||||
|
--Create role for anyone who needs to both select and insert on historical data
|
||||||
|
|
||||||
|
GRANT ALL ON ALL TABLES IN SCHEMA rxnorm_migrated TO root;
|
||||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,7 @@
|
|||||||
|
# Instructions:
|
||||||
|
Go go [RxNavInABox](https://lhncbc.nlm.nih.gov/RxNav/applications/RxNav-in-a-Box.html) and download the most recent version.
|
||||||
|
|
||||||
|
I have included the version I use.
|
||||||
|
|
||||||
|
Then unzip and run docker-compose.yaml
|
||||||
|
|
||||||
@ -0,0 +1,48 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||||
|
external: true
|
||||||
|
|
||||||
|
services:
|
||||||
|
aact_db:
|
||||||
|
image: postgres:14-alpine
|
||||||
|
restart: "no"
|
||||||
|
networks:
|
||||||
|
- pharmaceutical_research
|
||||||
|
container_name: aact_db
|
||||||
|
#restart: always #restart after crashes
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
VERSION: podman
|
||||||
|
ports:
|
||||||
|
- "5432:5432" #host:container
|
||||||
|
volumes: #host:container is the format.
|
||||||
|
# this is persistant storage for the database
|
||||||
|
- ./AACT_downloader/postgresql/:/var/lib/postgresql/
|
||||||
|
# this is the database dump to restore from
|
||||||
|
- ./AACT_downloader/aact_downloads/postgres_data.dmp:/mnt/host_data/postgres_data.dmp
|
||||||
|
# this is the folder containing entrypoint info.
|
||||||
|
- ./AACT_downloader/docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||||
|
shm_size: 512mb
|
||||||
|
|
||||||
|
|
||||||
|
rxnav-db:
|
||||||
|
image: mariadb:10.4
|
||||||
|
restart: "no"
|
||||||
|
ports:
|
||||||
|
- "3306:3306"
|
||||||
|
volumes:
|
||||||
|
- ./RxNav-In-a-box/rxnav-in-a-box-20230103/mysql:/docker-entrypoint-initdb.d:ro
|
||||||
|
- ./RxNav-In-a-box/rxnav_data:/var/lib/mysql
|
||||||
|
environment:
|
||||||
|
MYSQL_RANDOM_ROOT_PASSWORD: "yes"
|
||||||
|
MYSQL_USER: ${MYSQL_USER}
|
||||||
|
MYSQL_PASSWORD: ${MYSQL_PASSWORD}
|
||||||
|
VERSION: podman
|
||||||
|
env_file:
|
||||||
|
.env
|
||||||
|
networks:
|
||||||
|
- pharmaceutical_research
|
||||||
@ -0,0 +1,23 @@
|
|||||||
|
version: '3'
|
||||||
|
|
||||||
|
networks:
|
||||||
|
pharmaceutical_research: #because it helps to have a way to link specifically to this.
|
||||||
|
|
||||||
|
services:
|
||||||
|
aact_db:
|
||||||
|
image: postgres:14-alpine
|
||||||
|
networks:
|
||||||
|
- pharmaceutical_research
|
||||||
|
container_name: DrugCentral
|
||||||
|
#restart: always #restart after crashes
|
||||||
|
environment:
|
||||||
|
POSTGRES_PASSWORD: root
|
||||||
|
ports:
|
||||||
|
- "54320:5432" #host:container
|
||||||
|
volumes: #host:container is the format.
|
||||||
|
# this is persistant storage for the database
|
||||||
|
- ./db_store/:/var/lib/postgresql/
|
||||||
|
# this is the folder containing entrypoint info.
|
||||||
|
- ./docker-entrypoint-initdb.d/:/docker-entrypoint-initdb.d/
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
filename="drugcentral.dump.08222022.sql.gz"
|
||||||
|
|
||||||
|
cd ./docker-entrypoint-initdb.d/
|
||||||
|
|
||||||
|
curl "https://unmtid-shinyapps.net/download/$filename" --output "$filename"
|
||||||
|
|
||||||
|
gzip -d $filename
|
||||||
@ -1 +0,0 @@
|
|||||||
<mxfile host="Electron" modified="2022-09-19T21:58:15.288Z" agent="5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/16.5.1 Chrome/96.0.4664.110 Electron/16.0.7 Safari/537.36" etag="K1oYB1ahwdBUMmjzqr-S" version="16.5.1" type="device"><diagram id="-7mtYT5q5bNZQN0eJ9dG" name="Page-1">7Vxtb6M4EP41kfY+JOI1Lx/z0t5VykZVu7fX/XRywBBvASPjbJP99TcmJoSatLQJobeLVDV4MI49z3jm8dikY07DzZ8MxavP1MVBx9DcTcecdQxDH9kGfAjJdicZGvZO4DPiykq54J78xFKoSemauDgpVOSUBpzERaFDowg7vCBDjNGnYjWPBsVvjZGPFcG9gwJV+g9x+UqOwtZy+V+Y+Kvsm3VN3glRVlkKkhVy6dOByLzqmFNGKd9dhZspDoTyMr3snrs+cnffMYYjXuUBdGMFnuZynkxCf+V9vXPQ9661a+UHCtZywNOARATG/4URFCQ9n/6Q3efbTCfJEwkDFEFp4tGI38s7GpSdFQncOdrStehTwpHzmJUmK8rIT6iPArilgwBuMy4hN7VCjXvxpGyT4QTq3GYD1Z+JPqNNoeIcJTzrDQ0CFCdkmfZPPBgi5pNoQjmnoawUoCUOJvB1PqPryJ3SgDK4FdHdAEkQZKKOYboIDz0n7Tujj/jgTt8Z4qUHd6RGMeN4cxQqfW8AMHMwDTFnW6iSPSBNRs4ZPbO2p9wCdUvKVofWl1VE0ur9fdO5YcCFtI032Imt2Mli+uVmppgGjJkr2ilRpRShgPgRFAPsiceE0oTtjaU4JK4rWp4kMXJI5M/TajMrl9zJwQsRhce9IJ1iK3gQQwuTmJKIp8qwJ/AH6plqPbtjQ1+nUNbzMvyJ6oxPaQTdRySFCoMxPWFhUBNGOeJouTf1Sjgfn3Qq+BJtsyLYZl1Y9xWsbyIXYOGERtUAFxOlv+zbfXUCeZ5nOE4LfSn0/aahHyjQOzSMhVtMWuTrRH7UNPKWqUB/FTGIn6EYlqHdRxBIV5RXt4Pl0LZsrcQOhg5u7eCIHehG04YwVOzgnrO1w9cMuyC/ZdSFElzNBW9SzaElhzWTQ9soskOzeXY4Um3mqmWH73QQw4/NDrNxHC4FZmNoapx+aC3sdcDeODPMlqSHmQJJDVu8z49343xQV6f54SpQ64pkGsPYoywUOTkBeWsHZ7eD5vmgbiiGMCMJqAKDcLJmQrUtB7wwB+wOihzQrsoBR7WZibp+nI3n39LM/FrMOuOaIY7b5eOJ7mI3HT8uPdTVHQXyxuxhGyHeBHnz1FBNGC9m0+797bw7AxV0r+ZYJJGSnpO0e0mNpwvKQoWhldjLfhfq/AajpplvOBY0ckpd3DqJdzmJ/lvjQino9TkJNa8ITgIE2UKjBfysgJdEhcsCbqnrhs+IPWIOChczHcigT+X3tOCfF/yS7MGFwVezB+M4DvL0wWIdLjGDi0958vCP1hZqsIWyDMKFjUFNHR56glm7LKzfCKqSvvriv7pHdMtozAgG4i56ukBhS/1qwX7YNPaGSgXuNos0cdyuBi+7GrS04mqwar6otryhoeYNYcY4OBZHDMReovbp7mH6982sZQfvcg7GB88XGiUnkPf7ilrUBoV6cG88aWioOaBdTPg3RHEMWk96vGRLsQ0QNQeI/rMAoVf1EPt1xvlNRU0dPQ8RbOOsiduGiPe5isEHDxElx8swb88Z1YJ244Eha1jdHDBbwGsAvPGTRqa6AYBdH2ehHXS0oj6NUHCVSydpvMSuVE5eZ05pLOPtd8z5VoZ4tOZUBHkeZgQAdMa2D4eFb6IxQEcWZ1lo35W2BRxEB18OsDAeumYOrhDXgIr4+EUEzXIEGQ4QJz+KPTk/Pkaj+GjN4TOqiM+oUXjU3XfwKR5hoTivXwJcem6/qOzMMTqgI8xeco0pzz1wS0WPN3vJUck3dOXDnf17sa86MPPIUkY2r/W0wcAqktZdqTIAsu1bMZiDKtTzEswVhPZdOAE0NZb9HnOqX3FK6Udek3kbpGPG0PaggrRWFfHsWJ3+bPVjyLfVc/h3TZ7XGEqOQwgdrUj0mH5iJs5aovR/iJOVEGIWJuKTwr8gq0iS3i8y5Y9EvAwnrWcNsj3GbJly2pzfW4BZ9CRZ+QI+Yfib+oRBVRp0qk84Lc6qq9Dqs3SFhI2FKZv+JefrkVPs+xBt6HZhXnU/fIi21MT04u66O/vSMSCImbpQ7QKWf6l2RJeoJ8azicS+lnmt4NpmLuv+0YxRMXbburqyLd337Ne1srVUai5AlabiIi5chXiRxmU0jtPXa5diQF/H6cyIA+TkwkR0CpyNjwPx6sXeBhVDa5MiVd7CfvNvcFz4zMzgNyUDGat7nQ0c8QaXYQNWw2StZwz7hxjpryCUlm4xIzB8QR92wg3hDwfXB2BDKW9JFBqFulHiZ6nEryy+/y95m/Uab9NG2qhI3E5cZ51C3MQ7ZPufS9tVz390zrz6Dw==</diagram></mxfile>
|
|
||||||
@ -0,0 +1,44 @@
|
|||||||
|
from flask import Flask
|
||||||
|
import os
|
||||||
|
from dotenv import dotenv_values
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
env_path = "../../containers/.env"
|
||||||
|
ENV = dotenv_values(env_path)
|
||||||
|
|
||||||
|
def create_app(test_config=None):
|
||||||
|
# create and configure the app
|
||||||
|
app = Flask(__name__, instance_relative_config=True)
|
||||||
|
app.config.from_mapping(
|
||||||
|
SECRET_KEY='6e674d6e41b733270fd01c6257b3a1b4769eb80f3f773cd0fe8eff25f350fc1f',
|
||||||
|
POSTGRES_DB=ENV["POSTGRES_DB"],
|
||||||
|
POSTGRES_USER=ENV["POSTGRES_USER"],
|
||||||
|
POSTGRES_HOST=ENV["POSTGRES_HOST"],
|
||||||
|
POSTGRES_PORT=ENV["POSTGRES_PORT"],
|
||||||
|
POSTGRES_PASSWORD=ENV["POSTGRES_PASSWORD"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ensure the instance folder exists
|
||||||
|
try:
|
||||||
|
os.makedirs(app.instance_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# a simple page that says hello
|
||||||
|
@app.route('/')
|
||||||
|
def hello():
|
||||||
|
return 'Hello, World!'
|
||||||
|
|
||||||
|
|
||||||
|
from . import db_interface
|
||||||
|
db_interface.init_database(app)
|
||||||
|
|
||||||
|
from . import validation
|
||||||
|
app.register_blueprint(validation.bp)
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,175 @@
|
|||||||
|
import psycopg2 as psyco
|
||||||
|
from psycopg2 import extras
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import click #used for cli commands. Not needed for what I am doing.
|
||||||
|
from flask import current_app, g
|
||||||
|
|
||||||
|
def get_db(**kwargs):
|
||||||
|
|
||||||
|
if "db" not in g:
|
||||||
|
g.db = psyco.connect(
|
||||||
|
dbname=current_app.config["POSTGRES_DB"]
|
||||||
|
,user=current_app.config["POSTGRES_USER"]
|
||||||
|
,host=current_app.config["POSTGRES_HOST"]
|
||||||
|
,port=current_app.config["POSTGRES_PORT"]
|
||||||
|
,password=current_app.config["POSTGRES_PASSWORD"]
|
||||||
|
,**kwargs
|
||||||
|
)
|
||||||
|
return g.db
|
||||||
|
|
||||||
|
def close_db(e=None):
|
||||||
|
db = g.pop('db', None)
|
||||||
|
|
||||||
|
if db is not None:
|
||||||
|
db.close()
|
||||||
|
|
||||||
|
def check_initialization(app):
|
||||||
|
db = get_db()
|
||||||
|
with db.cursor() as curse:
|
||||||
|
curse.execute("select count(*) from \"DiseaseBurden\".trial_to_icd10")
|
||||||
|
curse.fetchall()
|
||||||
|
#just checking if everything is going to fail
|
||||||
|
|
||||||
|
def init_database(app):
|
||||||
|
#check_initialization(app)
|
||||||
|
app.teardown_appcontext(close_db)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def select_remaing_trials_to_analyze(db_conn):
|
||||||
|
'''
|
||||||
|
This will get the set of trials that need to be analyzed.
|
||||||
|
'''
|
||||||
|
sql = '''
|
||||||
|
select distinct nct_id
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
where tti.approved is null
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
'''
|
||||||
|
with db_conn.cursor() as cursor:
|
||||||
|
cursor.execute(sql)
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def select_analyzed_trials(db_conn):
|
||||||
|
'''
|
||||||
|
This will get the set of trials that have been analyzed.
|
||||||
|
'''
|
||||||
|
sql = '''
|
||||||
|
select distinct nct_id, max(approval_timestamp)
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
where tti.approved in ('accepted','rejected')
|
||||||
|
group by nct_id
|
||||||
|
order by max(approval_timestamp) desc
|
||||||
|
;
|
||||||
|
'''
|
||||||
|
with db_conn.cursor() as cursor:
|
||||||
|
cursor.execute(sql)
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
def select_unmatched_trials(db_conn):
|
||||||
|
'''
|
||||||
|
This will get the set of trials that have been analyzed.
|
||||||
|
'''
|
||||||
|
sql = '''
|
||||||
|
select distinct nct_id
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
where tti.approved = 'unmatched'
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
'''
|
||||||
|
with db_conn.cursor() as cursor:
|
||||||
|
cursor.execute(sql)
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def get_trial_conditions_and_proposed_matches(db_conn, nct_id):
|
||||||
|
sql = '''
|
||||||
|
select *
|
||||||
|
from "DiseaseBurden".trial_to_icd10 tti
|
||||||
|
where nct_id = %s
|
||||||
|
'''
|
||||||
|
with db_conn.cursor() as cursor:
|
||||||
|
cursor.execute(sql,[nct_id])
|
||||||
|
return cursor.fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def store_validation(db_conn, list_of_insert_data):
|
||||||
|
sql = """
|
||||||
|
update "DiseaseBurden".trial_to_icd10
|
||||||
|
set approved=%s, approval_timestamp=%s
|
||||||
|
where id=%s
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
with db_conn.cursor() as cursor:
|
||||||
|
for l in list_of_insert_data:
|
||||||
|
cursor.execute(sql, l)
|
||||||
|
db_conn.commit()
|
||||||
|
|
||||||
|
def get_trial_summary(db_conn,nct_id):
|
||||||
|
sql_summary ="""
|
||||||
|
select
|
||||||
|
s.nct_id,
|
||||||
|
brief_title ,
|
||||||
|
official_title ,
|
||||||
|
bs.description as brief_description,
|
||||||
|
dd.description as detailed_description
|
||||||
|
from ctgov.studies s
|
||||||
|
left join ctgov.brief_summaries bs
|
||||||
|
on bs.nct_id = s.nct_id
|
||||||
|
left join ctgov.detailed_descriptions dd
|
||||||
|
on dd.nct_id = s.nct_id
|
||||||
|
where s.nct_id = %s
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
sql_conditions="""
|
||||||
|
--conditions mentioned
|
||||||
|
select * from ctgov.conditions c
|
||||||
|
where c.nct_id = %s
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
sql_keywords="""
|
||||||
|
select nct_id ,downcase_name
|
||||||
|
from ctgov.keywords k
|
||||||
|
where k.nct_id = %s
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
with db_conn.cursor() as curse:
|
||||||
|
curse.execute(sql_summary,[nct_id])
|
||||||
|
summary = curse.fetchall()
|
||||||
|
|
||||||
|
curse.execute(sql_keywords,[nct_id])
|
||||||
|
keywords = curse.fetchall()
|
||||||
|
|
||||||
|
curse.execute(sql_conditions,[nct_id])
|
||||||
|
conditions = curse.fetchall()
|
||||||
|
|
||||||
|
return {"summary":summary, "keywords":keywords, "conditions":conditions}
|
||||||
|
|
||||||
|
def get_list_icd10_codes(db_conn):
|
||||||
|
sql = """
|
||||||
|
select distinct code
|
||||||
|
from "DiseaseBurden".icd10_to_cause itc
|
||||||
|
order by code;
|
||||||
|
"""
|
||||||
|
with db_conn.cursor() as curse:
|
||||||
|
curse.execute(sql)
|
||||||
|
codes = curse.fetchall()
|
||||||
|
|
||||||
|
return [ x[0] for x in codes ]
|
||||||
|
|
||||||
|
def record_suggested_matches(db_conn, nct_id,condition,icd10_code):
|
||||||
|
sql1 = """
|
||||||
|
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||||
|
(nct_id,"condition",ui,"source",approved,approval_timestamp)
|
||||||
|
VALUES (%s,%s,%s,'hand matched','accepted',%s)
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
with db_conn.cursor() as curse:
|
||||||
|
curse.execute(sql1,[nct_id,condition,icd10_code,datetime.now()])
|
||||||
|
db_conn.commit()
|
||||||
@ -0,0 +1 @@
|
|||||||
|
#at some point I need to add a login or something.
|
||||||
@ -0,0 +1,25 @@
|
|||||||
|
<!doctype html>
|
||||||
|
<title>{% block title %}{% endblock %} - ClinicalTrialsProject</title>
|
||||||
|
<!--<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">-->
|
||||||
|
|
||||||
|
<nav>
|
||||||
|
<h1>Nav</h1>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('validation.remaining') }}">Validation Home</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="https://icd.who.int/browse10/2019/en">WHO ICD-10 Codes (2019)</a>
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
<a href="https://uts.nlm.nih.gov/uts/umls/home">UMLS Metathesaurs browser (requires login)</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
|
||||||
|
<section class="content">
|
||||||
|
<header>
|
||||||
|
{% block header %}{% endblock %}
|
||||||
|
</header>
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</section>
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block header %}
|
||||||
|
<h1>{% block title %} ICD-10 to Trial Conditions Validation {% endblock %}</h1>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<h2>Trials to Validate</h2>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<th>Trials</th>
|
||||||
|
{% for trial in list_to_validate %}
|
||||||
|
<tr><td>
|
||||||
|
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||||
|
{{ trial [0] }}
|
||||||
|
</a>
|
||||||
|
</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h2>Trials that have been Validated</h2>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<th>Trials Links</th>
|
||||||
|
{% for trial in validated_list %}
|
||||||
|
<tr><td>
|
||||||
|
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||||
|
{{ trial [0] }}
|
||||||
|
</a>
|
||||||
|
(Most recently updated {{trial[1]}})
|
||||||
|
</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
<h2>Trials that don't have a good match</h2>
|
||||||
|
|
||||||
|
<table>
|
||||||
|
<th>Trial Links</th>
|
||||||
|
{% for trial in unmatched_list %}
|
||||||
|
<tr><td>
|
||||||
|
<a href="{{ url_for('.validate_trial', nct_id=trial[0] ) }}">
|
||||||
|
{{ trial [0] }}
|
||||||
|
</a>
|
||||||
|
</td></tr>
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@ -0,0 +1,95 @@
|
|||||||
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
|
{% block header %}
|
||||||
|
<h1> ICD-10 to Trial Conditions Validation: {{ nct_id }} </h1>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<section class="summary">
|
||||||
|
<h3>Trial Summary</h3>
|
||||||
|
|
||||||
|
<div class="text_summary">
|
||||||
|
<ul>
|
||||||
|
<li>NCT: {{ summary_dats["summary"][0][0] }}</li>
|
||||||
|
<li>Brief Title: {{ summary_dats["summary"][0][1] }}</li>
|
||||||
|
<li>Long Title: {{ summary_dats["summary"][0][2] }}</li>
|
||||||
|
<li>Brief Description: {{ summary_dats["summary"][0][3] }}</li>
|
||||||
|
<li>Long Description: {{ summary_dats["summary"][0][4] }}</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="keywords">
|
||||||
|
<h4>Keywords</h4>
|
||||||
|
<ul>
|
||||||
|
{% for keyword in summary_dats["keywords"] %}
|
||||||
|
<li>
|
||||||
|
{{ keyword[1] }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="conditions">
|
||||||
|
<h4>Raw Conditions </h4>
|
||||||
|
<ul>
|
||||||
|
{% for condition in summary_dats["conditions"] %}
|
||||||
|
<li>
|
||||||
|
{{ condition[3] }}
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="proposed_conditions">
|
||||||
|
<h3>Proposed Conditions</h3>
|
||||||
|
<form method="post">
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<th>Approve</th>
|
||||||
|
<th>Condition (MeSH normalized)</th>
|
||||||
|
<th>Identifier</th>
|
||||||
|
<th>Source</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Source</th>
|
||||||
|
</tr>
|
||||||
|
{% for condition in condition_list %}
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td> <input type="checkbox" id="{{ condition[0] }}" name="{{condition[0]}}" value="accepted" {% if condition[8] == "accepted" %}checked{% endif %}> </td>
|
||||||
|
<td> {{condition[2]}} </td>
|
||||||
|
<td> {{condition[3]}} </td>
|
||||||
|
<td> {{condition[5]}} </td>
|
||||||
|
<td> {{condition[6]}} </td>
|
||||||
|
<td> {{condition[7]}} </td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
{% endfor %}
|
||||||
|
</table>
|
||||||
|
<input type="submit" name="submission" value="Submit approvals">
|
||||||
|
<br/>
|
||||||
|
<input type="submit" name="marked_unmatched" value="Mark unmmatched">
|
||||||
|
</form>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="submit_alternate">
|
||||||
|
<h3>Submit Alternate Conditions</h3>
|
||||||
|
<!--For each listed condition, provide a spot to enter a ICT10 code-->
|
||||||
|
<form method="post">
|
||||||
|
<label for="alternate_sub">Please enter the proposed code that appears to be the best match:</label>
|
||||||
|
<input name="alt_sub" id="alternate_sub">
|
||||||
|
<br/>
|
||||||
|
<label for="condition">
|
||||||
|
Please give a name to the condition you used to match this<br/>
|
||||||
|
Condition:
|
||||||
|
</label>
|
||||||
|
<input name="condition", id="condition">
|
||||||
|
<br/>
|
||||||
|
<input type="submit" name="alternate_submission" value="Submit alternate ICD-10 code">
|
||||||
|
</form>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="approved">
|
||||||
|
<!--TODO:This will list the already approved values-->
|
||||||
|
</section>
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@ -0,0 +1,98 @@
|
|||||||
|
import functools
|
||||||
|
from flask import (Blueprint, flash, g, redirect, render_template, request, session, url_for)
|
||||||
|
from Icd10ConditionsMatching.db_interface import (
|
||||||
|
get_db,select_remaing_trials_to_analyze,
|
||||||
|
select_analyzed_trials,
|
||||||
|
select_unmatched_trials,
|
||||||
|
get_trial_conditions_and_proposed_matches,
|
||||||
|
store_validation,
|
||||||
|
get_trial_summary,
|
||||||
|
get_list_icd10_codes,
|
||||||
|
record_suggested_matches,
|
||||||
|
)
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
#### First Blueprint: Checking Data
|
||||||
|
bp = Blueprint("validation", __name__, url_prefix="/validation")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/",methods=["GET"])
|
||||||
|
def remaining():
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
|
||||||
|
to_validate = select_remaing_trials_to_analyze(db_conn)
|
||||||
|
validated = select_analyzed_trials(db_conn)
|
||||||
|
unmatched_list = select_unmatched_trials(db_conn)
|
||||||
|
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"validation_index.html",
|
||||||
|
list_to_validate=to_validate,
|
||||||
|
validated_list = validated,
|
||||||
|
unmatched_list = unmatched_list
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@bp.route("/<nct_id>", methods=["GET","POST"])
|
||||||
|
def validate_trial(nct_id):
|
||||||
|
|
||||||
|
if request.method == "GET":
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||||
|
summary_dats = get_trial_summary(db_conn, nct_id)
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"validation_of_trial.html",
|
||||||
|
nct_id=nct_id,
|
||||||
|
condition_list=condition_list,
|
||||||
|
summary_dats=summary_dats,
|
||||||
|
)
|
||||||
|
elif request.method == "POST":
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
list_of_insert_data = []
|
||||||
|
|
||||||
|
db_conn = get_db()
|
||||||
|
|
||||||
|
condition_list = get_trial_conditions_and_proposed_matches(db_conn, nct_id)
|
||||||
|
|
||||||
|
print(request.form)
|
||||||
|
|
||||||
|
if "submission" in request.form:
|
||||||
|
#if it is a submission:
|
||||||
|
#grab all match ids from db
|
||||||
|
#if match id in submitted form, mark as approved, otherwise mark as rejected
|
||||||
|
for condition in condition_list:
|
||||||
|
id = condition[0]
|
||||||
|
list_of_insert_data.append((request.form.get(str(id),"rejected"), datetime.now(),id))
|
||||||
|
|
||||||
|
store_validation(db_conn, list_of_insert_data)
|
||||||
|
return redirect(url_for("validation.remaining"))
|
||||||
|
elif "marked_unmatched" in request.form:
|
||||||
|
#if this was marked as "unmatched", store that for each entry.
|
||||||
|
for condition in condition_list:
|
||||||
|
id = condition[0]
|
||||||
|
list_of_insert_data.append(( "unmatched", datetime.now(), id))
|
||||||
|
|
||||||
|
store_validation(db_conn, list_of_insert_data)
|
||||||
|
return redirect(url_for("validation.remaining"))
|
||||||
|
elif "alternate_submission" in request.form:
|
||||||
|
code = request.form["alt_sub"]
|
||||||
|
code = code.strip().replace(".",'').ljust(7,"-")
|
||||||
|
|
||||||
|
condition = request.form["condition"].strip()
|
||||||
|
|
||||||
|
codelist = get_list_icd10_codes(db_conn)
|
||||||
|
if code in codelist:
|
||||||
|
record_suggested_matches(db_conn, nct_id, condition, code)
|
||||||
|
return redirect(request.path)
|
||||||
|
else:
|
||||||
|
record_suggested_matches(db_conn, nct_id, condition + "| Code not in GBD list", code)
|
||||||
|
return """
|
||||||
|
Entered `{}`, which is not in the list of available ICD-10 codes. <a href={}>Return to trial summary</a>
|
||||||
|
""".format(code.strip("-"),request.path), 422
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,13 @@
|
|||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='Icd10ConditionsMatching',
|
||||||
|
packages=['Icd10ConditionsMatching'],
|
||||||
|
include_package_data=True,
|
||||||
|
install_requires=[
|
||||||
|
'flask',
|
||||||
|
'psycopg2',
|
||||||
|
'datetime',
|
||||||
|
'python-dotenv',
|
||||||
|
],
|
||||||
|
)
|
||||||
@ -0,0 +1 @@
|
|||||||
|
waitress-serve --port=5000 --call 'Icd10ConditionsMatching:create_app'
|
||||||
@ -0,0 +1,11 @@
|
|||||||
|
from drugtools.env_setup import postgres_conn, mariadb_conn, ENV
|
||||||
|
|
||||||
|
print(ENV)
|
||||||
|
|
||||||
|
with postgres_conn() as pconn, pconn.cursor() as curse:
|
||||||
|
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
||||||
|
print(curse.fetchall())
|
||||||
|
|
||||||
|
with mariadb_conn() as mconn, mconn.cursor() as mcurse:
|
||||||
|
mcurse.execute("select * FROM ALLNDC_HISTORY LIMIT 10;")
|
||||||
|
print(mcurse.fetchall())
|
||||||
@ -0,0 +1,96 @@
|
|||||||
|
import json
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
import datetime as dt
|
||||||
|
from drugtools.env_setup import postgres_conn, ENV
|
||||||
|
import requests
|
||||||
|
import zipfile
|
||||||
|
import io
|
||||||
|
|
||||||
|
URL_STEM = 'https://download.open.fda.gov/other/nsde/'
|
||||||
|
NUMBER_OF_NSDE_FILES = int(ENV["NUMBER_OF_NSDE_FILES"])
|
||||||
|
|
||||||
|
def filename_generator(max_num):
|
||||||
|
for itt in range(1,max_num+1):
|
||||||
|
filename = "other-nsde-{:0>4}-of-{:0>4}.json.zip".format(itt,max_num)
|
||||||
|
yield filename
|
||||||
|
|
||||||
|
def get_date(result,key):
|
||||||
|
r = result.get(key)
|
||||||
|
if r:
|
||||||
|
return dt.datetime.strptime(r, "%Y%m%d")
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def build_values(result):
|
||||||
|
#adjust types
|
||||||
|
proprietary_name = result.get("proprietary_name")
|
||||||
|
application_number_or_citation = result.get("application_number_or_citation")
|
||||||
|
product_type = result.get("product_type")
|
||||||
|
package_ndc = result.get("package_ndc")
|
||||||
|
marketing_category = result.get("marketing_category")
|
||||||
|
package_ndc11 = result.get("package_ndc11")
|
||||||
|
dosage_form = result.get("dosage_form")
|
||||||
|
billing_unit = result.get("billing_unit")
|
||||||
|
marketing_start_date = get_date(result,"marketing_start_date")
|
||||||
|
marketing_end_date = get_date(result, "marketing_end_date")
|
||||||
|
inactivation_date = get_date(result, "inactivation_date")
|
||||||
|
reactivation_date = get_date(result,"reactivation_date")
|
||||||
|
|
||||||
|
return (
|
||||||
|
proprietary_name
|
||||||
|
,application_number_or_citation
|
||||||
|
,product_type
|
||||||
|
,package_ndc
|
||||||
|
,marketing_category
|
||||||
|
,package_ndc11
|
||||||
|
,dosage_form
|
||||||
|
,billing_unit
|
||||||
|
,marketing_start_date
|
||||||
|
,marketing_end_date
|
||||||
|
,inactivation_date
|
||||||
|
,reactivation_date
|
||||||
|
)
|
||||||
|
|
||||||
|
def download_and_extract_zip(base_url,filename):
|
||||||
|
response = requests.get(base_url + filename)
|
||||||
|
|
||||||
|
with zipfile.ZipFile(io.BytesIO(response.content)) as the_zip:
|
||||||
|
contents_list = the_zip.infolist()
|
||||||
|
for content_name in contents_list:
|
||||||
|
return the_zip.read(content_name)
|
||||||
|
|
||||||
|
def run():
|
||||||
|
for filename in filename_generator(NUMBER_OF_NSDE_FILES):
|
||||||
|
#It would be nice to replace this^^ file_generator with something that retrieves and unzips the files directly.
|
||||||
|
with (postgres_conn() as con , con.cursor() as curse):
|
||||||
|
print(filename)
|
||||||
|
|
||||||
|
j = download_and_extract_zip(URL_STEM, filename)
|
||||||
|
results = json.loads(j)["results"]
|
||||||
|
query = """
|
||||||
|
INSERT INTO spl.nsde (
|
||||||
|
proprietary_name
|
||||||
|
,application_number_or_citation
|
||||||
|
,product_type
|
||||||
|
,package_ndc
|
||||||
|
,marketing_category
|
||||||
|
,package_ndc11
|
||||||
|
,dosage_form
|
||||||
|
,billing_unit
|
||||||
|
,marketing_start_date
|
||||||
|
,marketing_end_date
|
||||||
|
,inactivation_date
|
||||||
|
,reactivation_date
|
||||||
|
)
|
||||||
|
VALUES %s;
|
||||||
|
"""
|
||||||
|
|
||||||
|
values = [build_values(y) for y in results]
|
||||||
|
execute_values(curse,query,values)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
@ -0,0 +1,43 @@
|
|||||||
|
import pymysql
|
||||||
|
import psycopg2 as psyco
|
||||||
|
from psycopg2.sql import SQL
|
||||||
|
from dotenv import dotenv_values
|
||||||
|
|
||||||
|
env_path = "../containers/.env"
|
||||||
|
ENV = dotenv_values(env_path)
|
||||||
|
|
||||||
|
def mariadb_conn(**kwargs):
|
||||||
|
return pymysql.connect(
|
||||||
|
database=ENV["MYSQL_DB"]
|
||||||
|
,user=ENV["MYSQL_USER"]
|
||||||
|
,host=ENV["MYSQL_HOST"]
|
||||||
|
,port=int(ENV["MYSQL_PORT"])
|
||||||
|
,password=ENV["MYSQL_PASSWORD"]
|
||||||
|
,**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def postgres_conn(**kwargs):
|
||||||
|
return psyco.connect(
|
||||||
|
dbname=ENV["POSTGRES_DB"]
|
||||||
|
,user=ENV["POSTGRES_USER"]
|
||||||
|
,host=ENV["POSTGRES_HOST"]
|
||||||
|
,port=ENV["POSTGRES_PORT"]
|
||||||
|
,password=ENV["POSTGRES_PASSWORD"]
|
||||||
|
,**kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_tables_of_interest():
|
||||||
|
return ENV["TABLES_OF_INTEREST"].split(",")
|
||||||
|
|
||||||
|
def postgres_table_delete_entries(schema,table):
|
||||||
|
with postgres_conn() as con:
|
||||||
|
with con.cursor() as curse:
|
||||||
|
delete_statement = SQL("delete from {schema}.{table}").format(
|
||||||
|
schema=Identifier(schema),
|
||||||
|
talbe=Identifier(table)
|
||||||
|
)
|
||||||
|
curse.execute(delete_statement)
|
||||||
|
con.commit()
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,15 @@
|
|||||||
|
from .env_setup import postgres_conn
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
#get relative path
|
||||||
|
p = Path(__file__).with_name("selected_trials.sql")
|
||||||
|
with open(p,'r') as fh:
|
||||||
|
sqlfile = fh.read()
|
||||||
|
with postgres_conn() as connection:
|
||||||
|
with connection.cursor() as curse:
|
||||||
|
curse.execute(sqlfile)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
@ -0,0 +1,118 @@
|
|||||||
|
import psycopg2 as psyco
|
||||||
|
from psycopg2 import sql
|
||||||
|
from psycopg2 import extras
|
||||||
|
import pymysql
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
from .env_setup import postgres_conn, mariadb_conn, get_tables_of_interest
|
||||||
|
|
||||||
|
|
||||||
|
##############NOTE
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
mariadb --mariadb.connect--> incrementally fetched dict --psycopg2--> postgres
|
||||||
|
|
||||||
|
I will have the ability to reduce memory usage and simplify what I am doing.
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
############### GLOBALS
|
||||||
|
#these are hardcoded so they shouldn't require any updates
|
||||||
|
mschema="rxnorm_current"
|
||||||
|
pschema="rxnorm_migrated"
|
||||||
|
|
||||||
|
########FUNCTIONS#################
|
||||||
|
|
||||||
|
|
||||||
|
def convert_column(d):
|
||||||
|
"""
|
||||||
|
Given the metadata about a column in mysql, make the portion of the `create table`
|
||||||
|
statement that corresponds to that column in postgres
|
||||||
|
"""
|
||||||
|
#extract
|
||||||
|
data_type = d["DATA_TYPE"]
|
||||||
|
position = d["ORDINAL_POSITION"]
|
||||||
|
table_name = d["TABLE_NAME"]
|
||||||
|
d["IS_NULLABLE"] = "NOT NULL" if d["IS_NULLABLE"] == "NO" else ""
|
||||||
|
|
||||||
|
#convert
|
||||||
|
if data_type=="varchar":
|
||||||
|
string = "{COLUMN_NAME} character varying({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="char":
|
||||||
|
string = "{COLUMN_NAME} character({CHARACTER_MAXIMUM_LENGTH}) COLLATE pg_catalog.\"default\" {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="tinyint":
|
||||||
|
string = "{COLUMN_NAME} smallint {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="decimal":
|
||||||
|
string = "{COLUMN_NAME} numeric({NUMERIC_PRECISION},{NUMERIC_SCALE}) {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="int":
|
||||||
|
string = "{COLUMN_NAME} integer {IS_NULLABLE}".format(**d)
|
||||||
|
elif data_type=="enum":
|
||||||
|
string = None
|
||||||
|
elif data_type=="text":
|
||||||
|
string = None
|
||||||
|
|
||||||
|
return string
|
||||||
|
|
||||||
|
def run():
|
||||||
|
#get & convert datatypes for each table of interest
|
||||||
|
tables_of_interest = get_tables_of_interest()
|
||||||
|
|
||||||
|
|
||||||
|
with mariadb_conn(cursorclass=pymysql.cursors.DictCursor) as mcon, postgres_conn() as pcon:
|
||||||
|
with mcon.cursor() as mcurse, pcon.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||||
|
for table in tables_of_interest: #create equivalent table in postgres
|
||||||
|
|
||||||
|
#get columns from mysql
|
||||||
|
q = "SELECT * FROM INFORMATION_SCHEMA.columns WHERE TABLE_SCHEMA=%s and TABLE_NAME=%s;"
|
||||||
|
mcurse.execute(q,[mschema,table])
|
||||||
|
|
||||||
|
#convert mysql column names and types to postgres column statements.
|
||||||
|
columns = [convert_column(a) for a in mcurse.fetchall() ]
|
||||||
|
#TODO make sure this uses psycopg colums correctly.
|
||||||
|
column_sql = sql.SQL(",\n".join(columns))
|
||||||
|
|
||||||
|
#build a header and footer
|
||||||
|
header=sql.SQL("CREATE TABLE IF NOT EXISTS {}\n(").format(sql.Identifier(pschema,table))
|
||||||
|
footer=sql.SQL(");")
|
||||||
|
|
||||||
|
#Joint the header, columns, and footer.
|
||||||
|
create_table_statement = sql.SQL("\n").join([header,column_sql,footer])
|
||||||
|
print(create_table_statement.as_string(pcon))
|
||||||
|
|
||||||
|
#Create the table in postgres
|
||||||
|
pcurse.execute(create_table_statement)
|
||||||
|
pcon.commit()
|
||||||
|
|
||||||
|
#Get the data from mysql
|
||||||
|
mcurse.execute("SELECT * FROM {schema}.{table}".format(schema=mschema,table=table))
|
||||||
|
#FIX setting up sql this^^^ way is improper.
|
||||||
|
results = mcurse.fetchall()
|
||||||
|
|
||||||
|
#build the insert statement template
|
||||||
|
#get list of field names
|
||||||
|
column_list = [sql.SQL(x) for x in results[0]]
|
||||||
|
column_inserts = [sql.SQL("%({})s".format(x)) for x in results[0]] #fix with sql.Placeholder
|
||||||
|
#generate insert statement
|
||||||
|
psql_insert = sql.SQL("INSERT INTO {table} ({columns}) VALUES %s ").format(
|
||||||
|
table=sql.Identifier(pschema,table)
|
||||||
|
,columns=sql.SQL(",").join(column_list)
|
||||||
|
)
|
||||||
|
#Note that this^^^^ does not contain parenthases around the placeholder
|
||||||
|
|
||||||
|
#Building the values template.
|
||||||
|
#Note that it must include the parenthases so that the
|
||||||
|
#VALUES portion is formatted correctly.
|
||||||
|
template = sql.SQL(",").join(column_inserts)
|
||||||
|
template = sql.Composed([
|
||||||
|
sql.SQL("(")
|
||||||
|
,template
|
||||||
|
,sql.SQL(")")
|
||||||
|
])
|
||||||
|
|
||||||
|
#insert the data with page_size
|
||||||
|
extras.execute_values(pcurse,psql_insert,argslist=results,template=template, page_size=1000)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
||||||
@ -0,0 +1,36 @@
|
|||||||
|
from drugtools.env_setup import ENV,postgres_conn
|
||||||
|
from psycopg2 import extras
|
||||||
|
from collections import namedtuple
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
FILES=[
|
||||||
|
"../non-db_data_sources/GBD and ICD-10_(2019 version)/NONFATAL_cause2code.psv",
|
||||||
|
"../non-db_data_sources/GBD and ICD-10_(2019 version)/COD_cause2code.psv"
|
||||||
|
]
|
||||||
|
SEP="|"
|
||||||
|
|
||||||
|
sql = """
|
||||||
|
INSERT INTO "DiseaseBurden".icd10_to_cause
|
||||||
|
(code,cause_text)
|
||||||
|
VALUES %s
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||||
|
entries = []
|
||||||
|
for fpath in FILES:
|
||||||
|
print(fpath)
|
||||||
|
|
||||||
|
with open(fpath,"r") as fh:
|
||||||
|
for line in tqdm(fh.readlines(),desc=fpath):
|
||||||
|
code,cause = line.split(SEP)
|
||||||
|
code = code.strip()
|
||||||
|
cause = cause.strip()
|
||||||
|
|
||||||
|
entries.append((code,cause))
|
||||||
|
|
||||||
|
extras.execute_values(pcurse, sql , entries)
|
||||||
|
|
||||||
|
|
||||||
@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
rm -r ../containers/RxNav-In-a-box/rxnav_data/*
|
||||||
|
|
||||||
|
rm -r ../containers/AACT_downloader/postgresql/data
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
from drugtools import env_setup
|
||||||
|
from drugtools import historical_trial_selector as hts
|
||||||
|
from drugtools import historical_nct_downloader as hnd
|
||||||
|
from drugtools import historical_nct_extractor as hne
|
||||||
|
from drugtools import download_and_extract_nsde as daen
|
||||||
|
from drugtools import migrate_mysql2pgsql as mm2p
|
||||||
|
|
||||||
|
print("Current Environment")
|
||||||
|
print(env_setup.ENV)
|
||||||
|
|
||||||
|
cont = input("Are you willing to continue with the current environmnet? y/[n]")
|
||||||
|
|
||||||
|
if cont == "Y" or cont == "y":
|
||||||
|
print("SelectingTrials")
|
||||||
|
#hts.run()
|
||||||
|
print("downloading trials")
|
||||||
|
#hnd.run()
|
||||||
|
print("extracting trials")
|
||||||
|
hne.run()
|
||||||
|
exit(0)
|
||||||
|
daen.run()
|
||||||
|
mm2p.run()
|
||||||
|
else:
|
||||||
|
print("Please fix your .env file and try again")
|
||||||
@ -0,0 +1,87 @@
|
|||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from drugtools.env_setup import ENV,postgres_conn
|
||||||
|
from psycopg2 import extras
|
||||||
|
from collections import namedtuple
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
RecordStuff = namedtuple("RecordStuff", "nct_id condition ui uri rootSource name")
|
||||||
|
|
||||||
|
class Requestor():
|
||||||
|
def __init__(self,api_key):
|
||||||
|
self.key = api_key
|
||||||
|
|
||||||
|
def search(self,search_term,inputType="sourceUi", returnIdType="code", addnl_terms={}):
|
||||||
|
query_terms = {
|
||||||
|
"apiKey":self.key,
|
||||||
|
"sabs":"ICD10",
|
||||||
|
"string":search_term,
|
||||||
|
"returnIdType":returnIdType,
|
||||||
|
"inputType":inputType
|
||||||
|
} | addnl_terms
|
||||||
|
query = "https://uts-ws.nlm.nih.gov/rest/search/current/"
|
||||||
|
|
||||||
|
r = requests.get(query,params=query_terms)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
r = Requestor(ENV.get("UMLS_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
with postgres_conn() as pconn, pconn.cursor(cursor_factory=extras.DictCursor) as pcurse:
|
||||||
|
sql = """
|
||||||
|
select nct_id, downcase_mesh_term
|
||||||
|
from ctgov.browse_conditions bc
|
||||||
|
where
|
||||||
|
mesh_type = 'mesh-list'
|
||||||
|
and
|
||||||
|
nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
order by nct_id
|
||||||
|
;
|
||||||
|
"""
|
||||||
|
sql2 = """
|
||||||
|
with cte as (
|
||||||
|
/* Keywords added too much noise
|
||||||
|
select nct_id,downcase_name
|
||||||
|
from ctgov.keywords k
|
||||||
|
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
union */
|
||||||
|
select nct_id, downcase_name
|
||||||
|
from ctgov.conditions c
|
||||||
|
union
|
||||||
|
select nct_id ,downcase_mesh_term as downcase_name
|
||||||
|
from ctgov.browse_conditions bc
|
||||||
|
where mesh_type = 'mesh-list'
|
||||||
|
)
|
||||||
|
select nct_id, downcase_name from cte
|
||||||
|
where nct_id in (select distinct nct_id from history.trial_snapshots ts)
|
||||||
|
order by nct_id
|
||||||
|
"""
|
||||||
|
pcurse.execute(sql2)
|
||||||
|
rows = pcurse.fetchall()
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
for row in tqdm(rows,desc="Search MeSH terms"):
|
||||||
|
nctid = row[0]
|
||||||
|
condition = row[1]
|
||||||
|
# print(nctid,condition)
|
||||||
|
|
||||||
|
results = r.search(row[1]).json().get('result', Exception("No result entry in json")).get('results',Exception("No results entry in json"))
|
||||||
|
|
||||||
|
#if results are empty?
|
||||||
|
if not results:
|
||||||
|
entries.append(RecordStuff(nctid,condition,None,None,None,None))
|
||||||
|
else:
|
||||||
|
for entry in results:
|
||||||
|
entries.append(RecordStuff(nctid, condition, entry["ui"], entry["uri"], entry["rootSource"], entry["name"]))
|
||||||
|
|
||||||
|
sql_insert = """
|
||||||
|
INSERT INTO "DiseaseBurden".trial_to_icd10
|
||||||
|
(nct_id, "condition", ui,uri,rootsource,"name","source",approved,approval_timestamp)
|
||||||
|
VALUES
|
||||||
|
(%(nct_id)s, %(condition)s, %(ui)s, %(uri)s, %(rootSource)s, %(name)s, 'UMLS API search', null,null)
|
||||||
|
"""
|
||||||
|
for entry in tqdm(entries,desc="Inserting entries to DB"):
|
||||||
|
pcurse.execute(sql_insert,entry._asdict())
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
SELECT
|
||||||
|
'CREATE OR REPLACE MATERIALIZED VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||||
|
FROM pg_views
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema'
|
||||||
|
;
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
SELECT
|
||||||
|
'CREATE TABLE ' || schemaname || '.' || tablename || E'\n(\n' ||
|
||||||
|
string_agg(column_definition, E',\n') || E'\n);\n'
|
||||||
|
FROM (
|
||||||
|
SELECT
|
||||||
|
schemaname,
|
||||||
|
tablename,
|
||||||
|
column_name || ' ' || data_type ||
|
||||||
|
CASE
|
||||||
|
WHEN character_maximum_length IS NOT NULL THEN '(' || character_maximum_length || ')'
|
||||||
|
ELSE ''
|
||||||
|
END ||
|
||||||
|
CASE
|
||||||
|
WHEN is_nullable = 'NO' THEN ' NOT NULL'
|
||||||
|
ELSE ''
|
||||||
|
END as column_definition
|
||||||
|
FROM pg_catalog.pg_tables t
|
||||||
|
JOIN information_schema.columns c
|
||||||
|
ON t.schemaname = c.table_schema
|
||||||
|
AND t.tablename = c.table_name
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema'-- Replace with your schema name
|
||||||
|
) t
|
||||||
|
GROUP BY schemaname, tablename;
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
SELECT
|
||||||
|
'CREATE OR REPLACE VIEW ' || schemaname || '.' || viewname || ' AS ' || definition
|
||||||
|
FROM pg_views
|
||||||
|
WHERE schemaname != 'pg_catalog'
|
||||||
|
and schemaname != 'information_schema' -- Replace with your schema name
|
||||||
|
;
|
||||||
@ -0,0 +1,415 @@
|
|||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||||
|
FROM ctgov.browse_conditions +
|
||||||
|
GROUP BY browse_conditions.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||||
|
FROM ctgov.browse_interventions +
|
||||||
|
GROUP BY browse_interventions.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.conditions +
|
||||||
|
GROUP BY conditions.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.countries +
|
||||||
|
WHERE (countries.removed IS NOT TRUE) +
|
||||||
|
GROUP BY countries.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_groups +
|
||||||
|
GROUP BY design_groups.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||||
|
FROM ctgov.id_information +
|
||||||
|
GROUP BY id_information.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||||
|
FROM ctgov.interventions +
|
||||||
|
GROUP BY interventions.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.interventions +
|
||||||
|
GROUP BY interventions.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.keywords +
|
||||||
|
GROUP BY keywords.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||||
|
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||||
|
FROM ctgov.overall_officials +
|
||||||
|
GROUP BY overall_officials.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||||
|
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.overall_officials +
|
||||||
|
GROUP BY overall_officials.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.sponsors +
|
||||||
|
GROUP BY sponsors.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.categories AS SELECT search_results.id, +
|
||||||
|
search_results.nct_id, +
|
||||||
|
search_results.name, +
|
||||||
|
search_results.created_at, +
|
||||||
|
search_results.updated_at, +
|
||||||
|
search_results."grouping", +
|
||||||
|
search_results.study_search_id +
|
||||||
|
FROM ctgov.search_results;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||||
|
s.overall_status, +
|
||||||
|
s.study_type, +
|
||||||
|
s.official_title, +
|
||||||
|
s.acronym, +
|
||||||
|
s.phase, +
|
||||||
|
s.why_stopped, +
|
||||||
|
s.has_dmc, +
|
||||||
|
s.enrollment, +
|
||||||
|
s.is_fda_regulated_device, +
|
||||||
|
s.is_fda_regulated_drug, +
|
||||||
|
s.is_unapproved_device, +
|
||||||
|
s.has_expanded_access, +
|
||||||
|
s.study_first_submitted_date, +
|
||||||
|
s.last_update_posted_date, +
|
||||||
|
s.results_first_posted_date, +
|
||||||
|
s.start_date, +
|
||||||
|
s.primary_completion_date, +
|
||||||
|
s.completion_date, +
|
||||||
|
s.study_first_posted_date, +
|
||||||
|
cv.number_of_facilities, +
|
||||||
|
cv.has_single_facility, +
|
||||||
|
cv.nlm_download_date, +
|
||||||
|
s.number_of_arms, +
|
||||||
|
s.number_of_groups, +
|
||||||
|
sp.name AS lead_sponsor, +
|
||||||
|
aid.names AS other_ids, +
|
||||||
|
e.gender, +
|
||||||
|
e.gender_based, +
|
||||||
|
e.gender_description, +
|
||||||
|
e.population, +
|
||||||
|
e.minimum_age, +
|
||||||
|
e.maximum_age, +
|
||||||
|
e.criteria, +
|
||||||
|
e.healthy_volunteers, +
|
||||||
|
ak.names AS keywords, +
|
||||||
|
ai.names AS interventions, +
|
||||||
|
ac.names AS conditions, +
|
||||||
|
d.primary_purpose, +
|
||||||
|
d.allocation, +
|
||||||
|
d.observational_model, +
|
||||||
|
d.intervention_model, +
|
||||||
|
d.masking, +
|
||||||
|
d.subject_masked, +
|
||||||
|
d.caregiver_masked, +
|
||||||
|
d.investigator_masked, +
|
||||||
|
d.outcomes_assessor_masked, +
|
||||||
|
ado.names AS design_outcomes, +
|
||||||
|
bs.description AS brief_summary, +
|
||||||
|
dd.description AS detailed_description +
|
||||||
|
FROM (((((((((((ctgov.studies s +
|
||||||
|
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||||
|
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||||
|
FROM ctgov.search_results +
|
||||||
|
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||||
|
rp.rxcui, +
|
||||||
|
rp.propvalue1 +
|
||||||
|
FROM (ctgov.browse_interventions bi +
|
||||||
|
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||||
|
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||||
|
FROM history.trial_snapshots)));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||||
|
t.status, +
|
||||||
|
t.update_timestamp +
|
||||||
|
FROM ( SELECT download_status.id, +
|
||||||
|
download_status.nct_id, +
|
||||||
|
download_status.status, +
|
||||||
|
download_status.update_timestamp, +
|
||||||
|
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||||
|
FROM http.download_status) t +
|
||||||
|
WHERE (t.rn = 1) +
|
||||||
|
ORDER BY t.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||||
|
s.start_date, +
|
||||||
|
ts.version, +
|
||||||
|
ts.submission_date, +
|
||||||
|
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||||
|
FROM (ctgov.studies s +
|
||||||
|
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||||
|
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||||
|
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||||
|
cte.version, +
|
||||||
|
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||||
|
cte.submission_date, +
|
||||||
|
cte.start_deviance, +
|
||||||
|
cte.start_date, +
|
||||||
|
ts.primary_completion_date, +
|
||||||
|
ts.primary_completion_date_category, +
|
||||||
|
ts.overall_status, +
|
||||||
|
ts.enrollment, +
|
||||||
|
ts.enrollment_category +
|
||||||
|
FROM (time_between_submission_and_start_view cte +
|
||||||
|
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||||
|
min(cte2.rownum) AS enrollment_source +
|
||||||
|
FROM rank_proximity_to_start_time_view cte2 +
|
||||||
|
WHERE (cte2.enrollment IS NOT NULL) +
|
||||||
|
GROUP BY cte2.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||||
|
SELECT DISTINCT ts.nct_id +
|
||||||
|
FROM history.trial_snapshots ts +
|
||||||
|
) +
|
||||||
|
SELECT bi.nct_id, +
|
||||||
|
bi.downcase_mesh_term, +
|
||||||
|
rr.tty2, +
|
||||||
|
rr.rxcui2 AS bn_or_in_cui, +
|
||||||
|
count(*) AS count +
|
||||||
|
FROM ((ctgov.browse_interventions bi +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||||
|
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||||
|
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||||
|
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||||
|
ORDER BY bi.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||||
|
ah.ndc, +
|
||||||
|
count(*) AS count +
|
||||||
|
FROM ((match_trials_to_bn_in mttbi +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||||
|
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||||
|
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||||
|
GROUP BY mttbi.nct_id, ah.ndc +
|
||||||
|
ORDER BY mttbi.nct_id, ah.ndc;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||||
|
n.application_number_or_citation, +
|
||||||
|
min(n.marketing_start_date) AS min +
|
||||||
|
FROM (match_trial_to_ndc11 mttn +
|
||||||
|
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||||
|
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||||
|
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||||
|
ORDER BY mttn.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||||
|
b.location_id, +
|
||||||
|
b.sex_id, +
|
||||||
|
b.age_id, +
|
||||||
|
b.cause_id, +
|
||||||
|
b.metric_id, +
|
||||||
|
b.year, +
|
||||||
|
b.val, +
|
||||||
|
b.upper_95, +
|
||||||
|
b.lower_95, +
|
||||||
|
b.key_column +
|
||||||
|
FROM "DiseaseBurden".burdens b +
|
||||||
|
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||||
|
c1.year, +
|
||||||
|
c1.val AS h_sdi_val, +
|
||||||
|
c1.upper_95 AS h_sdi_u95, +
|
||||||
|
c1.lower_95 AS h_sdi_l95, +
|
||||||
|
c2.val AS hm_sdi_val, +
|
||||||
|
c2.upper_95 AS hm_sdi_u95, +
|
||||||
|
c2.lower_95 AS hm_sdi_l95, +
|
||||||
|
c3.val AS m_sdi_val, +
|
||||||
|
c3.upper_95 AS m_sdi_u95, +
|
||||||
|
c3.lower_95 AS m_sdi_l95, +
|
||||||
|
c4.val AS lm_sdi_val, +
|
||||||
|
c4.upper_95 AS lm_sdi_u95, +
|
||||||
|
c4.lower_95 AS lm_sdi_l95, +
|
||||||
|
c5.val AS l_sdi_val, +
|
||||||
|
c5.upper_95 AS l_sdi_u95, +
|
||||||
|
c5.lower_95 AS l_sdi_l95 +
|
||||||
|
FROM ((((view_burdens_cte c1 +
|
||||||
|
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||||
|
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||||
|
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||||
|
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||||
|
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||||
|
ts.primary_completion_date, +
|
||||||
|
ts.primary_completion_date_category, +
|
||||||
|
ts.enrollment, +
|
||||||
|
ts.start_date, +
|
||||||
|
ts.enrollment_category, +
|
||||||
|
ts.overall_status, +
|
||||||
|
min(ts.submission_date) AS earliest_date_observed +
|
||||||
|
FROM history.trial_snapshots ts +
|
||||||
|
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||||
|
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||||
|
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||||
|
tti.ui, +
|
||||||
|
tti.condition, +
|
||||||
|
itc.cause_text, +
|
||||||
|
ch.cause_id, +
|
||||||
|
ch.level +
|
||||||
|
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||||
|
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||||
|
max(view_disbur_cte0.level) AS max_level +
|
||||||
|
FROM view_disbur_cte0 +
|
||||||
|
GROUP BY view_disbur_cte0.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||||
|
tti.ui, +
|
||||||
|
tti.condition, +
|
||||||
|
itc.cause_text, +
|
||||||
|
ch.cause_id, +
|
||||||
|
ch.level +
|
||||||
|
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||||
|
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||||
|
ORDER BY tti.nct_id;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||||
|
ttc.ui, +
|
||||||
|
ttc.condition, +
|
||||||
|
ttc.cause_text, +
|
||||||
|
ttc.cause_id, +
|
||||||
|
disbur_cte.max_level +
|
||||||
|
FROM (view_trial_to_cause ttc +
|
||||||
|
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||||
|
WHERE (ttc.level = disbur_cte.max_level) +
|
||||||
|
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||||
|
ORDER BY ttc.nct_id, ttc.ui;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||||
|
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||||
|
disbur_cte2.condition, +
|
||||||
|
disbur_cte2.cause_text, +
|
||||||
|
disbur_cte2.cause_id, +
|
||||||
|
ic.chapter_code AS category_id, +
|
||||||
|
ic.group_name, +
|
||||||
|
disbur_cte2.max_level +
|
||||||
|
FROM (view_disbur_cte2 disbur_cte2 +
|
||||||
|
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||||
|
WHERE (ic.level = 1);
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||||
|
cte.start_date, +
|
||||||
|
cte.enrollment AS current_enrollment, +
|
||||||
|
cte.enrollment_category, +
|
||||||
|
cte.overall_status AS current_status, +
|
||||||
|
cte.earliest_date_observed, +
|
||||||
|
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||||
|
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||||
|
dbc3.code, +
|
||||||
|
dbc3.condition, +
|
||||||
|
dbc3.cause_text, +
|
||||||
|
dbc3.cause_id, +
|
||||||
|
dbc3.category_id, +
|
||||||
|
dbc3.group_name, +
|
||||||
|
dbc3.max_level, +
|
||||||
|
b.year, +
|
||||||
|
b.h_sdi_val, +
|
||||||
|
b.h_sdi_u95, +
|
||||||
|
b.h_sdi_l95, +
|
||||||
|
b.hm_sdi_val, +
|
||||||
|
b.hm_sdi_u95, +
|
||||||
|
b.hm_sdi_l95, +
|
||||||
|
b.m_sdi_val, +
|
||||||
|
b.m_sdi_u95, +
|
||||||
|
b.m_sdi_l95, +
|
||||||
|
b.lm_sdi_val, +
|
||||||
|
b.lm_sdi_u95, +
|
||||||
|
b.lm_sdi_l95, +
|
||||||
|
b.l_sdi_val, +
|
||||||
|
b.l_sdi_u95, +
|
||||||
|
b.l_sdi_l95 +
|
||||||
|
FROM (((view_cte cte +
|
||||||
|
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||||
|
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||||
|
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||||
|
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||||
|
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||||
|
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||||
|
f.start_date, +
|
||||||
|
f.current_enrollment, +
|
||||||
|
f.enrollment_category, +
|
||||||
|
f.current_status, +
|
||||||
|
f.earliest_date_observed, +
|
||||||
|
f.elapsed_duration, +
|
||||||
|
f.n_brands, +
|
||||||
|
f.code, +
|
||||||
|
f.condition, +
|
||||||
|
f.cause_text, +
|
||||||
|
f.cause_id, +
|
||||||
|
f.category_id, +
|
||||||
|
f.group_name, +
|
||||||
|
f.max_level, +
|
||||||
|
f.year, +
|
||||||
|
f.h_sdi_val, +
|
||||||
|
f.h_sdi_u95, +
|
||||||
|
f.h_sdi_l95, +
|
||||||
|
f.hm_sdi_val, +
|
||||||
|
f.hm_sdi_u95, +
|
||||||
|
f.hm_sdi_l95, +
|
||||||
|
f.m_sdi_val, +
|
||||||
|
f.m_sdi_u95, +
|
||||||
|
f.m_sdi_l95, +
|
||||||
|
f.lm_sdi_val, +
|
||||||
|
f.lm_sdi_u95, +
|
||||||
|
f.lm_sdi_l95, +
|
||||||
|
f.l_sdi_val, +
|
||||||
|
f.l_sdi_u95, +
|
||||||
|
f.l_sdi_l95, +
|
||||||
|
s.overall_status AS final_status, +
|
||||||
|
c2a.version, +
|
||||||
|
c2a.enrollment AS planned_enrollment +
|
||||||
|
FROM (((formatted_data f +
|
||||||
|
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||||
|
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||||
|
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||||
|
FROM http.most_recent_download_status +
|
||||||
|
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||||
|
CREATE OR REPLACE MATERIALIZED VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||||
|
do2.nct_id, +
|
||||||
|
do2.outcome_type, +
|
||||||
|
do2.measure, +
|
||||||
|
do2.time_frame, +
|
||||||
|
do2.population, +
|
||||||
|
do2.description +
|
||||||
|
FROM ctgov.design_outcomes do2 +
|
||||||
|
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||||
|
FROM formatted_data fd)));
|
||||||
|
(40 rows)
|
||||||
|
|
||||||
@ -0,0 +1,920 @@
|
|||||||
|
?column?
|
||||||
|
-------------------------------------------------------
|
||||||
|
CREATE TABLE DiseaseBurden.age_group +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
age_group character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.burdens +
|
||||||
|
( +
|
||||||
|
measure_id integer NOT NULL, +
|
||||||
|
location_id integer NOT NULL, +
|
||||||
|
sex_id integer NOT NULL, +
|
||||||
|
age_id integer NOT NULL, +
|
||||||
|
cause_id integer NOT NULL, +
|
||||||
|
metric_id integer NOT NULL, +
|
||||||
|
year date NOT NULL, +
|
||||||
|
val double precision NOT NULL, +
|
||||||
|
upper_95 double precision NOT NULL, +
|
||||||
|
lower_95 double precision NOT NULL, +
|
||||||
|
key_column integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.cause +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
cause character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.cause_hierarchy +
|
||||||
|
( +
|
||||||
|
cause_id integer NOT NULL, +
|
||||||
|
cause_name character varying, +
|
||||||
|
parent_id integer NOT NULL, +
|
||||||
|
parent_nae character varying, +
|
||||||
|
level integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.icd10_categories +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
start_code character varying NOT NULL, +
|
||||||
|
end_code character varying NOT NULL, +
|
||||||
|
group_name character varying NOT NULL, +
|
||||||
|
level integer NOT NULL, +
|
||||||
|
chapter character varying NOT NULL, +
|
||||||
|
chapter_code integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.icd10_to_cause +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
code character varying NOT NULL, +
|
||||||
|
cause_text character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.location +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
location character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.measures +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
label character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.metric +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
metric_label character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.rei +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
rei_label character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.sex +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
sex character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE DiseaseBurden.trial_to_icd10 +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying NOT NULL, +
|
||||||
|
condition character varying NOT NULL, +
|
||||||
|
ui character varying, +
|
||||||
|
uri character varying, +
|
||||||
|
rootsource character varying, +
|
||||||
|
name character varying, +
|
||||||
|
source character varying, +
|
||||||
|
approved USER-DEFINED, +
|
||||||
|
approval_timestamp timestamp without time zone +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE Formularies.usp_dc_2023 +
|
||||||
|
( +
|
||||||
|
USP Class character varying(250), +
|
||||||
|
USP Pharmacotherapeutic Group character varying(250),+
|
||||||
|
API Concept character varying(250), +
|
||||||
|
rxcui character varying(15), +
|
||||||
|
tty character varying(10), +
|
||||||
|
Name character varying(256), +
|
||||||
|
Related BN character varying(250), +
|
||||||
|
Related DF character varying(25050), +
|
||||||
|
USP Category character varying(250) +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.active_storage_attachments +
|
||||||
|
( +
|
||||||
|
id bigint NOT NULL, +
|
||||||
|
name character varying NOT NULL, +
|
||||||
|
record_type character varying NOT NULL, +
|
||||||
|
record_id bigint NOT NULL, +
|
||||||
|
blob_id bigint NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.active_storage_blobs +
|
||||||
|
( +
|
||||||
|
metadata text, +
|
||||||
|
checksum character varying NOT NULL, +
|
||||||
|
byte_size bigint NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
id bigint NOT NULL, +
|
||||||
|
key character varying NOT NULL, +
|
||||||
|
filename character varying NOT NULL, +
|
||||||
|
content_type character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.baseline_counts +
|
||||||
|
( +
|
||||||
|
count integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
units character varying, +
|
||||||
|
scope character varying, +
|
||||||
|
result_group_id integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.baseline_measurements +
|
||||||
|
( +
|
||||||
|
param_value character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
result_group_id integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
classification character varying, +
|
||||||
|
category character varying, +
|
||||||
|
title character varying, +
|
||||||
|
description text, +
|
||||||
|
units character varying, +
|
||||||
|
param_type character varying, +
|
||||||
|
param_value_num numeric, +
|
||||||
|
dispersion_type character varying, +
|
||||||
|
dispersion_value character varying, +
|
||||||
|
dispersion_value_num numeric, +
|
||||||
|
dispersion_lower_limit numeric, +
|
||||||
|
dispersion_upper_limit numeric, +
|
||||||
|
explanation_of_na character varying, +
|
||||||
|
number_analyzed integer, +
|
||||||
|
number_analyzed_units character varying, +
|
||||||
|
population_description character varying, +
|
||||||
|
calculate_percentage character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.brief_summaries +
|
||||||
|
( +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
description text +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.browse_conditions +
|
||||||
|
( +
|
||||||
|
mesh_term character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
mesh_type character varying, +
|
||||||
|
downcase_mesh_term character varying, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.browse_interventions +
|
||||||
|
( +
|
||||||
|
downcase_mesh_term character varying, +
|
||||||
|
mesh_term character varying, +
|
||||||
|
mesh_type character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.calculated_values +
|
||||||
|
( +
|
||||||
|
number_of_secondary_outcomes_to_measure integer, +
|
||||||
|
maximum_age_unit character varying, +
|
||||||
|
minimum_age_unit character varying, +
|
||||||
|
maximum_age_num integer, +
|
||||||
|
minimum_age_num integer, +
|
||||||
|
has_single_facility boolean, +
|
||||||
|
has_us_facility boolean, +
|
||||||
|
months_to_report_results integer, +
|
||||||
|
number_of_sae_subjects integer, +
|
||||||
|
were_results_reported boolean, +
|
||||||
|
registered_in_calendar_year integer, +
|
||||||
|
nlm_download_date date, +
|
||||||
|
actual_duration integer, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
number_of_facilities integer, +
|
||||||
|
number_of_nsae_subjects integer, +
|
||||||
|
number_of_other_outcomes_to_measure integer, +
|
||||||
|
number_of_primary_outcomes_to_measure integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.central_contacts +
|
||||||
|
( +
|
||||||
|
phone_extension character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
role character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
contact_type character varying, +
|
||||||
|
name character varying, +
|
||||||
|
phone character varying, +
|
||||||
|
email character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.conditions +
|
||||||
|
( +
|
||||||
|
downcase_name character varying, +
|
||||||
|
name character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.countries +
|
||||||
|
( +
|
||||||
|
name character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
removed boolean +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.design_group_interventions +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
design_group_id integer, +
|
||||||
|
intervention_id integer, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.design_groups +
|
||||||
|
( +
|
||||||
|
group_type character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
title character varying, +
|
||||||
|
description text +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.design_outcomes +
|
||||||
|
( +
|
||||||
|
description text, +
|
||||||
|
measure text, +
|
||||||
|
outcome_type character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
time_frame text, +
|
||||||
|
population character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.designs +
|
||||||
|
( +
|
||||||
|
masking_description text, +
|
||||||
|
subject_masked boolean, +
|
||||||
|
caregiver_masked boolean, +
|
||||||
|
investigator_masked boolean, +
|
||||||
|
outcomes_assessor_masked boolean, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
allocation character varying, +
|
||||||
|
intervention_model character varying, +
|
||||||
|
observational_model character varying, +
|
||||||
|
primary_purpose character varying, +
|
||||||
|
time_perspective character varying, +
|
||||||
|
masking character varying, +
|
||||||
|
intervention_model_description text +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.detailed_descriptions +
|
||||||
|
( +
|
||||||
|
description text, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.documents +
|
||||||
|
( +
|
||||||
|
comment text, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
url character varying, +
|
||||||
|
document_type character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
document_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.drop_withdrawals +
|
||||||
|
( +
|
||||||
|
period character varying, +
|
||||||
|
reason character varying, +
|
||||||
|
count integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
result_group_id integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.eligibilities +
|
||||||
|
( +
|
||||||
|
older_adult boolean, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
sampling_method character varying, +
|
||||||
|
gender character varying, +
|
||||||
|
minimum_age character varying, +
|
||||||
|
maximum_age character varying, +
|
||||||
|
healthy_volunteers character varying, +
|
||||||
|
population text, +
|
||||||
|
criteria text, +
|
||||||
|
gender_description text, +
|
||||||
|
gender_based boolean, +
|
||||||
|
adult boolean, +
|
||||||
|
child boolean +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.facilities +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
status character varying, +
|
||||||
|
name character varying, +
|
||||||
|
city character varying, +
|
||||||
|
state character varying, +
|
||||||
|
zip character varying, +
|
||||||
|
country character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.facility_contacts +
|
||||||
|
( +
|
||||||
|
contact_type character varying, +
|
||||||
|
name character varying, +
|
||||||
|
email character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
phone character varying, +
|
||||||
|
phone_extension character varying, +
|
||||||
|
facility_id integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.facility_investigators +
|
||||||
|
( +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
facility_id integer, +
|
||||||
|
role character varying, +
|
||||||
|
name character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.file_records +
|
||||||
|
( +
|
||||||
|
url character varying, +
|
||||||
|
id bigint NOT NULL, +
|
||||||
|
filename character varying, +
|
||||||
|
file_size bigint, +
|
||||||
|
file_type character varying, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
updated_at timestamp without time zone NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.id_information +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
id_source character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id_link character varying, +
|
||||||
|
id_value character varying, +
|
||||||
|
id_type_description character varying, +
|
||||||
|
id_type character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.intervention_other_names +
|
||||||
|
( +
|
||||||
|
name character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
intervention_id integer, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.interventions +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
name character varying, +
|
||||||
|
intervention_type character varying, +
|
||||||
|
description text, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.ipd_information_types +
|
||||||
|
( +
|
||||||
|
name character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.keywords +
|
||||||
|
( +
|
||||||
|
name character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
downcase_name character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.links +
|
||||||
|
( +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
description text, +
|
||||||
|
url character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.mesh_headings +
|
||||||
|
( +
|
||||||
|
qualifier character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
subcategory character varying, +
|
||||||
|
heading character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.mesh_terms +
|
||||||
|
( +
|
||||||
|
description character varying, +
|
||||||
|
tree_number character varying, +
|
||||||
|
qualifier character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
downcase_mesh_term character varying, +
|
||||||
|
mesh_term character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.milestones +
|
||||||
|
( +
|
||||||
|
count_units character varying, +
|
||||||
|
count integer, +
|
||||||
|
description text, +
|
||||||
|
period character varying, +
|
||||||
|
title character varying, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
result_group_id integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
milestone_description character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.outcome_analyses +
|
||||||
|
( +
|
||||||
|
other_analysis_description text, +
|
||||||
|
param_type character varying, +
|
||||||
|
non_inferiority_type character varying, +
|
||||||
|
outcome_id integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
param_value numeric, +
|
||||||
|
dispersion_type character varying, +
|
||||||
|
dispersion_value numeric, +
|
||||||
|
p_value_modifier character varying, +
|
||||||
|
p_value double precision, +
|
||||||
|
ci_n_sides character varying, +
|
||||||
|
ci_percent numeric, +
|
||||||
|
ci_lower_limit numeric, +
|
||||||
|
ci_upper_limit numeric, +
|
||||||
|
ci_upper_limit_na_comment character varying, +
|
||||||
|
p_value_description character varying, +
|
||||||
|
method character varying, +
|
||||||
|
method_description text, +
|
||||||
|
estimate_description text, +
|
||||||
|
groups_description text, +
|
||||||
|
non_inferiority_description text +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.outcome_analysis_groups +
|
||||||
|
( +
|
||||||
|
result_group_id integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
outcome_analysis_id integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.outcome_counts +
|
||||||
|
( +
|
||||||
|
result_group_id integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
scope character varying, +
|
||||||
|
units character varying, +
|
||||||
|
count integer, +
|
||||||
|
outcome_id integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.outcome_measurements +
|
||||||
|
( +
|
||||||
|
result_group_id integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
classification character varying, +
|
||||||
|
category character varying, +
|
||||||
|
title character varying, +
|
||||||
|
description text, +
|
||||||
|
units character varying, +
|
||||||
|
param_type character varying, +
|
||||||
|
param_value character varying, +
|
||||||
|
param_value_num numeric, +
|
||||||
|
dispersion_type character varying, +
|
||||||
|
dispersion_value character varying, +
|
||||||
|
dispersion_value_num numeric, +
|
||||||
|
dispersion_lower_limit numeric, +
|
||||||
|
dispersion_upper_limit numeric, +
|
||||||
|
explanation_of_na text, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
outcome_id integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.outcomes +
|
||||||
|
( +
|
||||||
|
population text, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
outcome_type character varying, +
|
||||||
|
title text, +
|
||||||
|
description text, +
|
||||||
|
time_frame text, +
|
||||||
|
anticipated_posting_date date, +
|
||||||
|
anticipated_posting_month_year character varying, +
|
||||||
|
units character varying, +
|
||||||
|
units_analyzed character varying, +
|
||||||
|
dispersion_type character varying, +
|
||||||
|
param_type character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.overall_officials +
|
||||||
|
( +
|
||||||
|
name character varying, +
|
||||||
|
affiliation character varying, +
|
||||||
|
role character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.participant_flows +
|
||||||
|
( +
|
||||||
|
count_units integer, +
|
||||||
|
nct_id character varying, +
|
||||||
|
pre_assignment_details text, +
|
||||||
|
units_analyzed character varying, +
|
||||||
|
drop_withdraw_comment character varying, +
|
||||||
|
reason_comment character varying, +
|
||||||
|
recruitment_details text, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.pending_results +
|
||||||
|
( +
|
||||||
|
event_date_description character varying, +
|
||||||
|
event_date date, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
event character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.provided_documents +
|
||||||
|
( +
|
||||||
|
has_sap boolean, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
document_type character varying, +
|
||||||
|
has_protocol boolean, +
|
||||||
|
has_icf boolean, +
|
||||||
|
document_date date, +
|
||||||
|
url character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.reported_event_totals +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
subjects_at_risk integer, +
|
||||||
|
subjects_affected integer, +
|
||||||
|
classification character varying NOT NULL, +
|
||||||
|
event_type character varying, +
|
||||||
|
ctgov_group_code character varying NOT NULL, +
|
||||||
|
nct_id character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.reported_events +
|
||||||
|
( +
|
||||||
|
vocab character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
result_group_id integer, +
|
||||||
|
ctgov_group_code character varying, +
|
||||||
|
time_frame text, +
|
||||||
|
event_type character varying, +
|
||||||
|
default_vocab character varying, +
|
||||||
|
default_assessment character varying, +
|
||||||
|
subjects_affected integer, +
|
||||||
|
subjects_at_risk integer, +
|
||||||
|
description text, +
|
||||||
|
event_count integer, +
|
||||||
|
organ_system character varying, +
|
||||||
|
adverse_event_term character varying, +
|
||||||
|
frequency_threshold integer, +
|
||||||
|
assessment character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.responsible_parties +
|
||||||
|
( +
|
||||||
|
affiliation text, +
|
||||||
|
nct_id character varying, +
|
||||||
|
responsible_party_type character varying, +
|
||||||
|
name character varying, +
|
||||||
|
title character varying, +
|
||||||
|
organization character varying, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
old_name_title character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.result_agreements +
|
||||||
|
( +
|
||||||
|
other_details text, +
|
||||||
|
restrictive_agreement character varying, +
|
||||||
|
restriction_type character varying, +
|
||||||
|
agreement text, +
|
||||||
|
pi_employee character varying, +
|
||||||
|
nct_id character varying, +
|
||||||
|
id integer NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.result_contacts +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
organization character varying, +
|
||||||
|
name character varying, +
|
||||||
|
phone character varying, +
|
||||||
|
email character varying, +
|
||||||
|
extension character varying, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.result_groups +
|
||||||
|
( +
|
||||||
|
result_type character varying, +
|
||||||
|
title character varying, +
|
||||||
|
description text, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
ctgov_group_code character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.retractions +
|
||||||
|
( +
|
||||||
|
pmid character varying, +
|
||||||
|
id bigint NOT NULL, +
|
||||||
|
nct_id character varying, +
|
||||||
|
source character varying, +
|
||||||
|
reference_id integer +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.search_results +
|
||||||
|
( +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
nct_id character varying NOT NULL, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
grouping character varying NOT NULL, +
|
||||||
|
study_search_id integer, +
|
||||||
|
name character varying NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.sponsors +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
name character varying, +
|
||||||
|
lead_or_collaborator character varying, +
|
||||||
|
agency_class character varying, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.studies +
|
||||||
|
( +
|
||||||
|
phase character varying, +
|
||||||
|
delayed_posting character varying, +
|
||||||
|
source_class character varying, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
plan_to_share_ipd_description character varying, +
|
||||||
|
plan_to_share_ipd character varying, +
|
||||||
|
ipd_url character varying, +
|
||||||
|
ipd_access_criteria character varying, +
|
||||||
|
ipd_time_frame character varying, +
|
||||||
|
biospec_description text, +
|
||||||
|
biospec_retention character varying, +
|
||||||
|
is_us_export boolean, +
|
||||||
|
is_ppsd boolean, +
|
||||||
|
is_unapproved_device boolean, +
|
||||||
|
is_fda_regulated_device boolean, +
|
||||||
|
is_fda_regulated_drug boolean, +
|
||||||
|
has_dmc boolean, +
|
||||||
|
expanded_access_type_treatment boolean, +
|
||||||
|
expanded_access_type_intermediate boolean, +
|
||||||
|
expanded_access_type_individual boolean, +
|
||||||
|
has_expanded_access boolean, +
|
||||||
|
why_stopped character varying, +
|
||||||
|
number_of_groups integer, +
|
||||||
|
number_of_arms integer, +
|
||||||
|
limitations_and_caveats character varying, +
|
||||||
|
source character varying, +
|
||||||
|
enrollment_type character varying, +
|
||||||
|
enrollment integer, +
|
||||||
|
expanded_access_nctid character varying, +
|
||||||
|
last_known_status character varying, +
|
||||||
|
overall_status character varying, +
|
||||||
|
official_title text, +
|
||||||
|
brief_title text, +
|
||||||
|
baseline_population text, +
|
||||||
|
acronym character varying, +
|
||||||
|
study_type character varying, +
|
||||||
|
target_duration character varying, +
|
||||||
|
results_first_submitted_date date, +
|
||||||
|
study_first_submitted_date date, +
|
||||||
|
nlm_download_date_description character varying, +
|
||||||
|
primary_completion_date date, +
|
||||||
|
nct_id character varying, +
|
||||||
|
primary_completion_date_type character varying, +
|
||||||
|
primary_completion_month_year character varying, +
|
||||||
|
completion_date date, +
|
||||||
|
completion_date_type character varying, +
|
||||||
|
completion_month_year character varying, +
|
||||||
|
verification_date date, +
|
||||||
|
verification_month_year character varying, +
|
||||||
|
start_date date, +
|
||||||
|
start_date_type character varying, +
|
||||||
|
start_month_year character varying, +
|
||||||
|
last_update_posted_date_type character varying, +
|
||||||
|
last_update_posted_date date, +
|
||||||
|
last_update_submitted_qc_date date, +
|
||||||
|
disposition_first_posted_date_type character varying,+
|
||||||
|
disposition_first_posted_date date, +
|
||||||
|
disposition_first_submitted_qc_date date, +
|
||||||
|
results_first_posted_date_type character varying, +
|
||||||
|
results_first_posted_date date, +
|
||||||
|
results_first_submitted_qc_date date, +
|
||||||
|
study_first_posted_date_type character varying, +
|
||||||
|
study_first_posted_date date, +
|
||||||
|
study_first_submitted_qc_date date, +
|
||||||
|
last_update_submitted_date date, +
|
||||||
|
disposition_first_submitted_date date, +
|
||||||
|
baseline_type_units_analyzed character varying, +
|
||||||
|
fdaaa801_violation boolean, +
|
||||||
|
expanded_access_status_for_nctid character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.study_records +
|
||||||
|
( +
|
||||||
|
nct_id character varying, +
|
||||||
|
sha character varying, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
type character varying, +
|
||||||
|
content json, +
|
||||||
|
id bigint NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.study_references +
|
||||||
|
( +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
citation text, +
|
||||||
|
reference_type character varying, +
|
||||||
|
pmid character varying, +
|
||||||
|
nct_id character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.study_searches +
|
||||||
|
( +
|
||||||
|
query character varying NOT NULL, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
beta_api boolean NOT NULL, +
|
||||||
|
name character varying NOT NULL, +
|
||||||
|
grouping character varying NOT NULL, +
|
||||||
|
save_tsv boolean NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE ctgov.verifiers +
|
||||||
|
( +
|
||||||
|
id bigint NOT NULL, +
|
||||||
|
created_at timestamp without time zone NOT NULL, +
|
||||||
|
source json, +
|
||||||
|
updated_at timestamp without time zone NOT NULL, +
|
||||||
|
load_event_id integer, +
|
||||||
|
last_run timestamp without time zone, +
|
||||||
|
differences json NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE history.trial_snapshots +
|
||||||
|
( +
|
||||||
|
completion_date timestamp without time zone, +
|
||||||
|
nct_id character varying(15) NOT NULL, +
|
||||||
|
version integer NOT NULL, +
|
||||||
|
submission_date timestamp without time zone, +
|
||||||
|
primary_completion_date timestamp without time zone, +
|
||||||
|
primary_completion_date_category USER-DEFINED, +
|
||||||
|
start_date timestamp without time zone, +
|
||||||
|
start_date_category USER-DEFINED, +
|
||||||
|
completion_date_category USER-DEFINED, +
|
||||||
|
overall_status USER-DEFINED, +
|
||||||
|
enrollment integer, +
|
||||||
|
enrollment_category USER-DEFINED, +
|
||||||
|
sponsor character varying, +
|
||||||
|
responsible_party character varying +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE http.download_status +
|
||||||
|
( +
|
||||||
|
status USER-DEFINED NOT NULL, +
|
||||||
|
nct_id character varying(15) NOT NULL, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
update_timestamp timestamp with time zone +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE http.responses +
|
||||||
|
( +
|
||||||
|
nct_id character varying(15), +
|
||||||
|
version_a smallint, +
|
||||||
|
version_b smallint, +
|
||||||
|
url character varying(255), +
|
||||||
|
response_code smallint, +
|
||||||
|
response_date timestamp with time zone, +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
html text +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE rxnorm_migrated.ALLNDC_HISTORY +
|
||||||
|
( +
|
||||||
|
sab character varying(10), +
|
||||||
|
ndc11_left9 character(9) NOT NULL, +
|
||||||
|
rowid integer NOT NULL, +
|
||||||
|
ndc character(13) NOT NULL, +
|
||||||
|
suppress character(1), +
|
||||||
|
edate character(6), +
|
||||||
|
sdate character(6), +
|
||||||
|
rxcui character(16) +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE rxnorm_migrated.ALLRXCUI_HISTORY +
|
||||||
|
( +
|
||||||
|
tty character varying(5), +
|
||||||
|
sts character(1), +
|
||||||
|
rxindb character(1), +
|
||||||
|
indb character(1), +
|
||||||
|
rowid integer NOT NULL, +
|
||||||
|
rxcui character(16) NOT NULL, +
|
||||||
|
sab character varying(20), +
|
||||||
|
str character varying(3000), +
|
||||||
|
sdate character(6), +
|
||||||
|
edate character(6) +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE rxnorm_migrated.rxnorm_props +
|
||||||
|
( +
|
||||||
|
rxcui character(8) NOT NULL, +
|
||||||
|
pres smallint NOT NULL, +
|
||||||
|
propvalue1 character varying(4000) NOT NULL, +
|
||||||
|
propname character varying(30) NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE rxnorm_migrated.rxnorm_relations +
|
||||||
|
( +
|
||||||
|
tty2 character(4) NOT NULL, +
|
||||||
|
rxcui1 character(8) NOT NULL, +
|
||||||
|
tty1 character(4) NOT NULL, +
|
||||||
|
cvf character(4) NOT NULL, +
|
||||||
|
rxcui2 character(8) NOT NULL +
|
||||||
|
); +
|
||||||
|
|
||||||
|
CREATE TABLE spl.nsde +
|
||||||
|
( +
|
||||||
|
proprietary_name character varying(500), +
|
||||||
|
package_ndc character varying(50), +
|
||||||
|
application_number_or_citation character varying(25),+
|
||||||
|
package_ndc11 character varying(11), +
|
||||||
|
id integer NOT NULL, +
|
||||||
|
reactivation_date date, +
|
||||||
|
inactivation_date date, +
|
||||||
|
marketing_start_date date, +
|
||||||
|
marketing_end_date date, +
|
||||||
|
billing_unit character varying(35), +
|
||||||
|
dosage_form character varying(155), +
|
||||||
|
marketing_category character varying(160), +
|
||||||
|
product_type character varying(90) +
|
||||||
|
); +
|
||||||
|
|
||||||
|
(76 rows)
|
||||||
|
|
||||||
@ -0,0 +1,415 @@
|
|||||||
|
?column?
|
||||||
|
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_browse_conditions AS SELECT browse_conditions.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT browse_conditions.mesh_term), '|'::text) AS names +
|
||||||
|
FROM ctgov.browse_conditions +
|
||||||
|
GROUP BY browse_conditions.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_browse_interventions AS SELECT browse_interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(browse_interventions.mesh_term), '|'::text) AS names +
|
||||||
|
FROM ctgov.browse_interventions +
|
||||||
|
GROUP BY browse_interventions.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_cities AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT facilities.city), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_conditions AS SELECT conditions.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT conditions.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.conditions +
|
||||||
|
GROUP BY conditions.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_countries AS SELECT countries.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT countries.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.countries +
|
||||||
|
WHERE (countries.removed IS NOT TRUE) +
|
||||||
|
GROUP BY countries.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_design_outcomes AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_facilities AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(facilities.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_group_types AS SELECT design_groups.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_groups.group_type), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_groups +
|
||||||
|
GROUP BY design_groups.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_id_information AS SELECT id_information.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT id_information.id_value), '|'::text) AS names +
|
||||||
|
FROM ctgov.id_information +
|
||||||
|
GROUP BY id_information.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_intervention_types AS SELECT interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(interventions.intervention_type), '|'::text) AS names +
|
||||||
|
FROM ctgov.interventions +
|
||||||
|
GROUP BY interventions.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_interventions AS SELECT interventions.nct_id, +
|
||||||
|
array_to_string(array_agg(interventions.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.interventions +
|
||||||
|
GROUP BY interventions.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_keywords AS SELECT keywords.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT keywords.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.keywords +
|
||||||
|
GROUP BY keywords.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_overall_official_affiliations AS SELECT overall_officials.nct_id, +
|
||||||
|
array_to_string(array_agg(overall_officials.affiliation), '|'::text) AS names +
|
||||||
|
FROM ctgov.overall_officials +
|
||||||
|
GROUP BY overall_officials.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_overall_officials AS SELECT overall_officials.nct_id, +
|
||||||
|
array_to_string(array_agg(overall_officials.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.overall_officials +
|
||||||
|
GROUP BY overall_officials.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_primary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
WHERE ((design_outcomes.outcome_type)::text = 'primary'::text) +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_secondary_outcome_measures AS SELECT design_outcomes.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT design_outcomes.measure), '|'::text) AS names +
|
||||||
|
FROM ctgov.design_outcomes +
|
||||||
|
WHERE ((design_outcomes.outcome_type)::text = 'secondary'::text) +
|
||||||
|
GROUP BY design_outcomes.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_sponsors AS SELECT sponsors.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT sponsors.name), '|'::text) AS names +
|
||||||
|
FROM ctgov.sponsors +
|
||||||
|
GROUP BY sponsors.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.all_states AS SELECT facilities.nct_id, +
|
||||||
|
array_to_string(array_agg(DISTINCT facilities.state), '|'::text) AS names +
|
||||||
|
FROM ctgov.facilities +
|
||||||
|
GROUP BY facilities.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.categories AS SELECT search_results.id, +
|
||||||
|
search_results.nct_id, +
|
||||||
|
search_results.name, +
|
||||||
|
search_results.created_at, +
|
||||||
|
search_results.updated_at, +
|
||||||
|
search_results."grouping", +
|
||||||
|
search_results.study_search_id +
|
||||||
|
FROM ctgov.search_results;
|
||||||
|
CREATE OR REPLACE VIEW ctgov.covid_19_studies AS SELECT s.nct_id, +
|
||||||
|
s.overall_status, +
|
||||||
|
s.study_type, +
|
||||||
|
s.official_title, +
|
||||||
|
s.acronym, +
|
||||||
|
s.phase, +
|
||||||
|
s.why_stopped, +
|
||||||
|
s.has_dmc, +
|
||||||
|
s.enrollment, +
|
||||||
|
s.is_fda_regulated_device, +
|
||||||
|
s.is_fda_regulated_drug, +
|
||||||
|
s.is_unapproved_device, +
|
||||||
|
s.has_expanded_access, +
|
||||||
|
s.study_first_submitted_date, +
|
||||||
|
s.last_update_posted_date, +
|
||||||
|
s.results_first_posted_date, +
|
||||||
|
s.start_date, +
|
||||||
|
s.primary_completion_date, +
|
||||||
|
s.completion_date, +
|
||||||
|
s.study_first_posted_date, +
|
||||||
|
cv.number_of_facilities, +
|
||||||
|
cv.has_single_facility, +
|
||||||
|
cv.nlm_download_date, +
|
||||||
|
s.number_of_arms, +
|
||||||
|
s.number_of_groups, +
|
||||||
|
sp.name AS lead_sponsor, +
|
||||||
|
aid.names AS other_ids, +
|
||||||
|
e.gender, +
|
||||||
|
e.gender_based, +
|
||||||
|
e.gender_description, +
|
||||||
|
e.population, +
|
||||||
|
e.minimum_age, +
|
||||||
|
e.maximum_age, +
|
||||||
|
e.criteria, +
|
||||||
|
e.healthy_volunteers, +
|
||||||
|
ak.names AS keywords, +
|
||||||
|
ai.names AS interventions, +
|
||||||
|
ac.names AS conditions, +
|
||||||
|
d.primary_purpose, +
|
||||||
|
d.allocation, +
|
||||||
|
d.observational_model, +
|
||||||
|
d.intervention_model, +
|
||||||
|
d.masking, +
|
||||||
|
d.subject_masked, +
|
||||||
|
d.caregiver_masked, +
|
||||||
|
d.investigator_masked, +
|
||||||
|
d.outcomes_assessor_masked, +
|
||||||
|
ado.names AS design_outcomes, +
|
||||||
|
bs.description AS brief_summary, +
|
||||||
|
dd.description AS detailed_description +
|
||||||
|
FROM (((((((((((ctgov.studies s +
|
||||||
|
FULL JOIN ctgov.all_conditions ac ON (((s.nct_id)::text = (ac.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_id_information aid ON (((s.nct_id)::text = (aid.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_design_outcomes ado ON (((s.nct_id)::text = (ado.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_keywords ak ON (((s.nct_id)::text = (ak.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.all_interventions ai ON (((s.nct_id)::text = (ai.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.sponsors sp ON (((s.nct_id)::text = (sp.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.calculated_values cv ON (((s.nct_id)::text = (cv.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.designs d ON (((s.nct_id)::text = (d.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.eligibilities e ON (((s.nct_id)::text = (e.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.brief_summaries bs ON (((s.nct_id)::text = (bs.nct_id)::text))) +
|
||||||
|
FULL JOIN ctgov.detailed_descriptions dd ON (((s.nct_id)::text = (dd.nct_id)::text))) +
|
||||||
|
WHERE (((sp.lead_or_collaborator)::text = 'lead'::text) AND ((s.nct_id)::text IN ( SELECT search_results.nct_id +
|
||||||
|
FROM ctgov.search_results +
|
||||||
|
WHERE ((search_results.name)::text = 'covid-19'::text))));
|
||||||
|
CREATE OR REPLACE VIEW history.match_drugs_to_trials AS SELECT bi.nct_id, +
|
||||||
|
rp.rxcui, +
|
||||||
|
rp.propvalue1 +
|
||||||
|
FROM (ctgov.browse_interventions bi +
|
||||||
|
JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||||
|
WHERE (((rp.propname)::text = 'RxNorm Name'::text) AND ((bi.nct_id)::text IN ( SELECT trial_snapshots.nct_id +
|
||||||
|
FROM history.trial_snapshots)));
|
||||||
|
CREATE OR REPLACE VIEW http.most_recent_download_status AS SELECT t.nct_id, +
|
||||||
|
t.status, +
|
||||||
|
t.update_timestamp +
|
||||||
|
FROM ( SELECT download_status.id, +
|
||||||
|
download_status.nct_id, +
|
||||||
|
download_status.status, +
|
||||||
|
download_status.update_timestamp, +
|
||||||
|
row_number() OVER (PARTITION BY download_status.nct_id ORDER BY download_status.update_timestamp DESC) AS rn +
|
||||||
|
FROM http.download_status) t +
|
||||||
|
WHERE (t.rn = 1) +
|
||||||
|
ORDER BY t.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.time_between_submission_and_start_view AS SELECT s.nct_id, +
|
||||||
|
s.start_date, +
|
||||||
|
ts.version, +
|
||||||
|
ts.submission_date, +
|
||||||
|
abs(((EXTRACT(epoch FROM (ts.submission_date - (s.start_date)::timestamp without time zone)))::double precision / (((24 * 60) * 60))::double precision)) AS start_deviance +
|
||||||
|
FROM (ctgov.studies s +
|
||||||
|
JOIN history.trial_snapshots ts ON (((s.nct_id)::text = (ts.nct_id)::text))) +
|
||||||
|
WHERE ((s.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||||
|
FROM "DiseaseBurden".trial_to_icd10 tti));
|
||||||
|
CREATE OR REPLACE VIEW public.rank_proximity_to_start_time_view AS SELECT cte.nct_id, +
|
||||||
|
cte.version, +
|
||||||
|
row_number() OVER (PARTITION BY cte.nct_id ORDER BY cte.start_deviance) AS rownum, +
|
||||||
|
cte.submission_date, +
|
||||||
|
cte.start_deviance, +
|
||||||
|
cte.start_date, +
|
||||||
|
ts.primary_completion_date, +
|
||||||
|
ts.primary_completion_date_category, +
|
||||||
|
ts.overall_status, +
|
||||||
|
ts.enrollment, +
|
||||||
|
ts.enrollment_category +
|
||||||
|
FROM (time_between_submission_and_start_view cte +
|
||||||
|
JOIN history.trial_snapshots ts ON ((((cte.nct_id)::text = (ts.nct_id)::text) AND (cte.version = ts.version))));
|
||||||
|
CREATE OR REPLACE VIEW public.enrollment_closest_to_start_view AS SELECT cte2.nct_id, +
|
||||||
|
min(cte2.rownum) AS enrollment_source +
|
||||||
|
FROM rank_proximity_to_start_time_view cte2 +
|
||||||
|
WHERE (cte2.enrollment IS NOT NULL) +
|
||||||
|
GROUP BY cte2.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.match_trials_to_bn_in AS WITH trialncts AS ( +
|
||||||
|
SELECT DISTINCT ts.nct_id +
|
||||||
|
FROM history.trial_snapshots ts +
|
||||||
|
) +
|
||||||
|
SELECT bi.nct_id, +
|
||||||
|
bi.downcase_mesh_term, +
|
||||||
|
rr.tty2, +
|
||||||
|
rr.rxcui2 AS bn_or_in_cui, +
|
||||||
|
count(*) AS count +
|
||||||
|
FROM ((ctgov.browse_interventions bi +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_props rp ON (((bi.downcase_mesh_term)::text = (rp.propvalue1)::text))) +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((rr.rxcui1 = rp.rxcui))) +
|
||||||
|
WHERE (((bi.nct_id)::text IN ( SELECT trialncts.nct_id +
|
||||||
|
FROM trialncts)) AND ((bi.mesh_type)::text = 'mesh-list'::text) AND ((rp.propname)::text = 'Active_ingredient_name'::text) AND (rr.tty2 = ANY (ARRAY['BN'::bpchar, 'IN'::bpchar, 'MIN'::bpchar]))) +
|
||||||
|
GROUP BY bi.nct_id, bi.downcase_mesh_term, rr.tty2, rr.rxcui2 +
|
||||||
|
ORDER BY bi.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.match_trial_to_ndc11 AS SELECT mttbi.nct_id, +
|
||||||
|
ah.ndc, +
|
||||||
|
count(*) AS count +
|
||||||
|
FROM ((match_trials_to_bn_in mttbi +
|
||||||
|
LEFT JOIN rxnorm_migrated.rxnorm_relations rr ON ((mttbi.bn_or_in_cui = rr.rxcui1))) +
|
||||||
|
LEFT JOIN rxnorm_migrated."ALLNDC_HISTORY" ah ON ((rr.rxcui2 = ah.rxcui))) +
|
||||||
|
WHERE ((rr.tty1 = 'BN'::bpchar) AND (rr.tty2 = ANY (ARRAY['SBD'::bpchar, 'BPCK'::bpchar])) AND ((ah.sab)::text = 'RXNORM'::text)) +
|
||||||
|
GROUP BY mttbi.nct_id, ah.ndc +
|
||||||
|
ORDER BY mttbi.nct_id, ah.ndc;
|
||||||
|
CREATE OR REPLACE VIEW public.match_trial_to_marketing_start_date AS SELECT mttn.nct_id, +
|
||||||
|
n.application_number_or_citation, +
|
||||||
|
min(n.marketing_start_date) AS min +
|
||||||
|
FROM (match_trial_to_ndc11 mttn +
|
||||||
|
JOIN spl.nsde n ON ((mttn.ndc = (n.package_ndc11)::bpchar))) +
|
||||||
|
WHERE (((n.product_type)::text = 'HUMAN PRESCRIPTION DRUG'::text) AND ((n.marketing_category)::text = ANY (ARRAY[('NDA'::character varying)::text, ('ANDA'::character varying)::text, ('BLA'::character varying)::text, ('NDA authorized generic'::character varying)::text, ('NDA AUTHORIZED GENERIC'::character varying)::text]))) +
|
||||||
|
GROUP BY mttn.nct_id, n.application_number_or_citation +
|
||||||
|
ORDER BY mttn.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.view_burdens_cte AS SELECT b.measure_id, +
|
||||||
|
b.location_id, +
|
||||||
|
b.sex_id, +
|
||||||
|
b.age_id, +
|
||||||
|
b.cause_id, +
|
||||||
|
b.metric_id, +
|
||||||
|
b.year, +
|
||||||
|
b.val, +
|
||||||
|
b.upper_95, +
|
||||||
|
b.lower_95, +
|
||||||
|
b.key_column +
|
||||||
|
FROM "DiseaseBurden".burdens b +
|
||||||
|
WHERE ((b.sex_id = 3) AND (b.metric_id = 1) AND (b.measure_id = 2) AND (b.age_id = 22));
|
||||||
|
CREATE OR REPLACE VIEW public.view_burdens_cte2 AS SELECT c1.cause_id, +
|
||||||
|
c1.year, +
|
||||||
|
c1.val AS h_sdi_val, +
|
||||||
|
c1.upper_95 AS h_sdi_u95, +
|
||||||
|
c1.lower_95 AS h_sdi_l95, +
|
||||||
|
c2.val AS hm_sdi_val, +
|
||||||
|
c2.upper_95 AS hm_sdi_u95, +
|
||||||
|
c2.lower_95 AS hm_sdi_l95, +
|
||||||
|
c3.val AS m_sdi_val, +
|
||||||
|
c3.upper_95 AS m_sdi_u95, +
|
||||||
|
c3.lower_95 AS m_sdi_l95, +
|
||||||
|
c4.val AS lm_sdi_val, +
|
||||||
|
c4.upper_95 AS lm_sdi_u95, +
|
||||||
|
c4.lower_95 AS lm_sdi_l95, +
|
||||||
|
c5.val AS l_sdi_val, +
|
||||||
|
c5.upper_95 AS l_sdi_u95, +
|
||||||
|
c5.lower_95 AS l_sdi_l95 +
|
||||||
|
FROM ((((view_burdens_cte c1 +
|
||||||
|
JOIN view_burdens_cte c2 ON (((c1.cause_id = c2.cause_id) AND (c1.year = c2.year)))) +
|
||||||
|
JOIN view_burdens_cte c3 ON (((c1.cause_id = c3.cause_id) AND (c1.year = c3.year)))) +
|
||||||
|
JOIN view_burdens_cte c4 ON (((c1.cause_id = c4.cause_id) AND (c1.year = c4.year)))) +
|
||||||
|
JOIN view_burdens_cte c5 ON (((c1.cause_id = c5.cause_id) AND (c1.year = c5.year)))) +
|
||||||
|
WHERE ((c1.location_id = 44635) AND (c2.location_id = 44634) AND (c3.location_id = 44639) AND (c4.location_id = 44636) AND (c5.location_id = 44637));
|
||||||
|
CREATE OR REPLACE VIEW public.view_cte AS SELECT ts.nct_id, +
|
||||||
|
ts.primary_completion_date, +
|
||||||
|
ts.primary_completion_date_category, +
|
||||||
|
ts.enrollment, +
|
||||||
|
ts.start_date, +
|
||||||
|
ts.enrollment_category, +
|
||||||
|
ts.overall_status, +
|
||||||
|
min(ts.submission_date) AS earliest_date_observed +
|
||||||
|
FROM history.trial_snapshots ts +
|
||||||
|
WHERE (((ts.nct_id)::text IN ( SELECT DISTINCT tti.nct_id +
|
||||||
|
FROM "DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type))) AND (ts.submission_date >= ts.start_date) AND (ts.overall_status <> ALL (ARRAY['Completed'::history.study_statuses, 'Terminated'::history.study_statuses]))) +
|
||||||
|
GROUP BY ts.nct_id, ts.primary_completion_date, ts.primary_completion_date_category, ts.start_date, ts.enrollment, ts.enrollment_category, ts.overall_status;
|
||||||
|
CREATE OR REPLACE VIEW public.view_disbur_cte0 AS SELECT tti.nct_id, +
|
||||||
|
tti.ui, +
|
||||||
|
tti.condition, +
|
||||||
|
itc.cause_text, +
|
||||||
|
ch.cause_id, +
|
||||||
|
ch.level +
|
||||||
|
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||||
|
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type);
|
||||||
|
CREATE OR REPLACE VIEW public.view_disbur_cte AS SELECT view_disbur_cte0.nct_id, +
|
||||||
|
max(view_disbur_cte0.level) AS max_level +
|
||||||
|
FROM view_disbur_cte0 +
|
||||||
|
GROUP BY view_disbur_cte0.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.view_trial_to_cause AS SELECT tti.nct_id, +
|
||||||
|
tti.ui, +
|
||||||
|
tti.condition, +
|
||||||
|
itc.cause_text, +
|
||||||
|
ch.cause_id, +
|
||||||
|
ch.level +
|
||||||
|
FROM (("DiseaseBurden".trial_to_icd10 tti +
|
||||||
|
JOIN "DiseaseBurden".icd10_to_cause itc ON ((replace(replace((tti.ui)::text, '-'::text, ''::text), '.'::text, ''::text) = replace(replace((itc.code)::text, '-'::text, ''::text), '.'::text, ''::text)))) +
|
||||||
|
JOIN "DiseaseBurden".cause_hierarchy ch ON (((itc.cause_text)::text = (ch.cause_name)::text))) +
|
||||||
|
WHERE (tti.approved = 'accepted'::"DiseaseBurden".validation_type) +
|
||||||
|
ORDER BY tti.nct_id;
|
||||||
|
CREATE OR REPLACE VIEW public.view_disbur_cte2 AS SELECT ttc.nct_id, +
|
||||||
|
ttc.ui, +
|
||||||
|
ttc.condition, +
|
||||||
|
ttc.cause_text, +
|
||||||
|
ttc.cause_id, +
|
||||||
|
disbur_cte.max_level +
|
||||||
|
FROM (view_trial_to_cause ttc +
|
||||||
|
JOIN view_disbur_cte disbur_cte ON (((disbur_cte.nct_id)::text = (ttc.nct_id)::text))) +
|
||||||
|
WHERE (ttc.level = disbur_cte.max_level) +
|
||||||
|
GROUP BY ttc.nct_id, ttc.ui, ttc.condition, ttc.cause_text, ttc.cause_id, disbur_cte.max_level +
|
||||||
|
ORDER BY ttc.nct_id, ttc.ui;
|
||||||
|
CREATE OR REPLACE VIEW public.view_disbur_cte3 AS SELECT disbur_cte2.nct_id, +
|
||||||
|
SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) AS code, +
|
||||||
|
disbur_cte2.condition, +
|
||||||
|
disbur_cte2.cause_text, +
|
||||||
|
disbur_cte2.cause_id, +
|
||||||
|
ic.chapter_code AS category_id, +
|
||||||
|
ic.group_name, +
|
||||||
|
disbur_cte2.max_level +
|
||||||
|
FROM (view_disbur_cte2 disbur_cte2 +
|
||||||
|
JOIN "DiseaseBurden".icd10_categories ic ON (((SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) <= (ic.end_code)::text) AND (SUBSTRING(disbur_cte2.ui FROM 1 FOR 3) >= (ic.start_code)::text)))) +
|
||||||
|
WHERE (ic.level = 1);
|
||||||
|
CREATE OR REPLACE VIEW public.formatted_data AS SELECT cte.nct_id, +
|
||||||
|
cte.start_date, +
|
||||||
|
cte.enrollment AS current_enrollment, +
|
||||||
|
cte.enrollment_category, +
|
||||||
|
cte.overall_status AS current_status, +
|
||||||
|
cte.earliest_date_observed, +
|
||||||
|
(EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))) AS elapsed_duration, +
|
||||||
|
count(DISTINCT mttmsd.application_number_or_citation) AS n_brands, +
|
||||||
|
dbc3.code, +
|
||||||
|
dbc3.condition, +
|
||||||
|
dbc3.cause_text, +
|
||||||
|
dbc3.cause_id, +
|
||||||
|
dbc3.category_id, +
|
||||||
|
dbc3.group_name, +
|
||||||
|
dbc3.max_level, +
|
||||||
|
b.year, +
|
||||||
|
b.h_sdi_val, +
|
||||||
|
b.h_sdi_u95, +
|
||||||
|
b.h_sdi_l95, +
|
||||||
|
b.hm_sdi_val, +
|
||||||
|
b.hm_sdi_u95, +
|
||||||
|
b.hm_sdi_l95, +
|
||||||
|
b.m_sdi_val, +
|
||||||
|
b.m_sdi_u95, +
|
||||||
|
b.m_sdi_l95, +
|
||||||
|
b.lm_sdi_val, +
|
||||||
|
b.lm_sdi_u95, +
|
||||||
|
b.lm_sdi_l95, +
|
||||||
|
b.l_sdi_val, +
|
||||||
|
b.l_sdi_u95, +
|
||||||
|
b.l_sdi_l95 +
|
||||||
|
FROM (((view_cte cte +
|
||||||
|
JOIN match_trial_to_marketing_start_date mttmsd ON (((cte.nct_id)::text = (mttmsd.nct_id)::text))) +
|
||||||
|
JOIN view_disbur_cte3 dbc3 ON (((dbc3.nct_id)::text = (cte.nct_id)::text))) +
|
||||||
|
JOIN view_burdens_cte2 b ON (((b.cause_id = dbc3.cause_id) AND (EXTRACT(year FROM b.year) = EXTRACT(year FROM cte.earliest_date_observed))))) +
|
||||||
|
WHERE (mttmsd.min <= cte.earliest_date_observed) +
|
||||||
|
GROUP BY cte.nct_id, cte.start_date, cte.enrollment, cte.enrollment_category, cte.overall_status, cte.earliest_date_observed, (EXTRACT(epoch FROM (cte.earliest_date_observed - cte.start_date)) / EXTRACT(epoch FROM (cte.primary_completion_date - cte.start_date))), dbc3.code, dbc3.condition, dbc3.cause_text, dbc3.cause_id, dbc3.category_id, dbc3.group_name, dbc3.max_level, b.cause_id, b.year, b.h_sdi_val, b.h_sdi_u95, b.h_sdi_l95, b.hm_sdi_val, b.hm_sdi_u95, b.hm_sdi_l95, b.m_sdi_val, b.m_sdi_u95, b.m_sdi_l95, b.lm_sdi_val, b.lm_sdi_u95, b.lm_sdi_l95, b.l_sdi_val, b.l_sdi_u95, b.l_sdi_l95+
|
||||||
|
ORDER BY cte.nct_id, cte.earliest_date_observed;
|
||||||
|
CREATE OR REPLACE VIEW public.formatted_data_with_planned_enrollment AS SELECT f.nct_id, +
|
||||||
|
f.start_date, +
|
||||||
|
f.current_enrollment, +
|
||||||
|
f.enrollment_category, +
|
||||||
|
f.current_status, +
|
||||||
|
f.earliest_date_observed, +
|
||||||
|
f.elapsed_duration, +
|
||||||
|
f.n_brands, +
|
||||||
|
f.code, +
|
||||||
|
f.condition, +
|
||||||
|
f.cause_text, +
|
||||||
|
f.cause_id, +
|
||||||
|
f.category_id, +
|
||||||
|
f.group_name, +
|
||||||
|
f.max_level, +
|
||||||
|
f.year, +
|
||||||
|
f.h_sdi_val, +
|
||||||
|
f.h_sdi_u95, +
|
||||||
|
f.h_sdi_l95, +
|
||||||
|
f.hm_sdi_val, +
|
||||||
|
f.hm_sdi_u95, +
|
||||||
|
f.hm_sdi_l95, +
|
||||||
|
f.m_sdi_val, +
|
||||||
|
f.m_sdi_u95, +
|
||||||
|
f.m_sdi_l95, +
|
||||||
|
f.lm_sdi_val, +
|
||||||
|
f.lm_sdi_u95, +
|
||||||
|
f.lm_sdi_l95, +
|
||||||
|
f.l_sdi_val, +
|
||||||
|
f.l_sdi_u95, +
|
||||||
|
f.l_sdi_l95, +
|
||||||
|
s.overall_status AS final_status, +
|
||||||
|
c2a.version, +
|
||||||
|
c2a.enrollment AS planned_enrollment +
|
||||||
|
FROM (((formatted_data f +
|
||||||
|
JOIN ctgov.studies s ON (((f.nct_id)::text = (s.nct_id)::text))) +
|
||||||
|
JOIN enrollment_closest_to_start_view c3e ON (((c3e.nct_id)::text = (f.nct_id)::text))) +
|
||||||
|
JOIN rank_proximity_to_start_time_view c2a ON ((((c3e.nct_id)::text = (c2a.nct_id)::text) AND (c3e.enrollment_source = c2a.rownum))));
|
||||||
|
CREATE OR REPLACE VIEW http.trials_to_download AS SELECT most_recent_download_status.nct_id +
|
||||||
|
FROM http.most_recent_download_status +
|
||||||
|
WHERE (most_recent_download_status.status = 'Of Interest'::http.history_download_status);
|
||||||
|
CREATE OR REPLACE VIEW public.primary_design_outcomes AS SELECT do2.id, +
|
||||||
|
do2.nct_id, +
|
||||||
|
do2.outcome_type, +
|
||||||
|
do2.measure, +
|
||||||
|
do2.time_frame, +
|
||||||
|
do2.population, +
|
||||||
|
do2.description +
|
||||||
|
FROM ctgov.design_outcomes do2 +
|
||||||
|
WHERE (((do2.outcome_type)::text = 'primary'::text) AND ((do2.nct_id)::text IN ( SELECT DISTINCT fd.nct_id +
|
||||||
|
FROM formatted_data fd)));
|
||||||
|
(40 rows)
|
||||||
|
|
||||||
@ -1,15 +0,0 @@
|
|||||||
# Description
|
|
||||||
# This program tests the ability to connect the the DB
|
|
||||||
#
|
|
||||||
|
|
||||||
import psycopg2 as psyco
|
|
||||||
|
|
||||||
conn = psyco.connect(dbname="aact_db", user="root", host="will-office", password="root")
|
|
||||||
|
|
||||||
curse = conn.cursor()
|
|
||||||
|
|
||||||
curse.execute("select nct_id FROM ctgov.studies LIMIT 10;")
|
|
||||||
print(curse.fetchall())
|
|
||||||
|
|
||||||
curse.close()
|
|
||||||
conn.close()
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
# File descriptions
|
|
||||||
|
|
||||||
db_connection.py
|
|
||||||
- is just a test file
|
|
||||||
- [ ] TODO: should be incorporated in a tests justfile recipe. maybe moved to a test location?
|
|
||||||
|
|
||||||
|
|
||||||
downloader_prep.sql
|
|
||||||
- contains sql to identify which trials are of interest.
|
|
||||||
- [ ] TODO: add into the automation routine somewhere.
|
|
||||||
|
|
||||||
downloader.py
|
|
||||||
- does the actual downloading
|
|
||||||
- setup to also act as a python module if needed.
|
|
||||||
- [ ] TODO: there are quite a few things that need cleaned or refactored.
|
|
||||||
|
|
||||||
./tests/download_tests.py
|
|
||||||
- downloads some test html values from clinicaltrials.gov
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
import downloader as dldr
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
|
|
||||||
dbc = dldr.DBConnectionCreator(
|
|
||||||
dbname="aact_db"
|
|
||||||
,user="root"
|
|
||||||
,host="will-office"
|
|
||||||
,port=5432
|
|
||||||
,password="root")
|
|
||||||
|
|
||||||
with open('selected_trials.sql','r') as fh:
|
|
||||||
sqlfile = fh.read()
|
|
||||||
with dbc.new() as connection:
|
|
||||||
with connection.cursor() as curse:
|
|
||||||
curse.execute(sqlfile)
|
|
||||||
@ -0,0 +1,55 @@
|
|||||||
|
import ollama
|
||||||
|
|
||||||
|
import psycopg
|
||||||
|
from psycopg.rows import dict_row
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
def fetch_all_rows(conn_params: dict) -> List[Dict]:
|
||||||
|
# Establish a connection to the PostgreSQL database
|
||||||
|
conn = psycopg.connect(**conn_params, row_factory=dict_row)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Define your SQL query to select all rows from the table
|
||||||
|
sql_query = "SELECT * FROM public.primary_design_outcomes;"
|
||||||
|
|
||||||
|
# Execute the query
|
||||||
|
cursor.execute(sql_query)
|
||||||
|
|
||||||
|
# Fetch all rows from the result set
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
# Close the cursor and connection
|
||||||
|
cursor.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
conn_params = {
|
||||||
|
"dbname": "aact_db",
|
||||||
|
"user": "root",
|
||||||
|
"password": "root",
|
||||||
|
"host": "localhost",
|
||||||
|
"port": "5432"
|
||||||
|
}
|
||||||
|
|
||||||
|
outcome_description = '''
|
||||||
|
Measure: {measure}
|
||||||
|
Time Frame: {time_frame}
|
||||||
|
Description: {description}
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
#check for model
|
||||||
|
|
||||||
|
#get information
|
||||||
|
rows_dicts = fetch_all_rows(conn_params)
|
||||||
|
|
||||||
|
for row in rows_dicts[:3]:
|
||||||
|
text_data = outcome_description.format(**row)
|
||||||
|
r = ollama.generate(model='youainti/llama3.1-extractor:2024-08-28.2', prompt=text_data)
|
||||||
|
|
||||||
|
print(text_data)
|
||||||
|
print(r["response"])
|
||||||
|
|
||||||
@ -0,0 +1,31 @@
|
|||||||
|
FROM llama3.1
|
||||||
|
PARAMETER num_ctx 8192
|
||||||
|
PARAMETER seed 11021585
|
||||||
|
SYSTEM """
|
||||||
|
You are a Natural Language Processor, tasked with extracting data about outcome measures from textual tables.
|
||||||
|
You are to extract the longest observation time from the primary objectives for this clinical trial.
|
||||||
|
I need you to distinguish between trials that have a specified period during which they track participants
|
||||||
|
and those trials that don't.
|
||||||
|
|
||||||
|
Return results as JSON, with the format
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"longest_observation_scalar": <number>,
|
||||||
|
"longest_observation_unit: <string: minutes, hours, days, weeks, months, years>
|
||||||
|
}
|
||||||
|
```
|
||||||
|
Do not return any other commentary.
|
||||||
|
If the study does not have a specified end of observation, set the values to `null`.
|
||||||
|
If the text does not appear to be related to clinical trials, return `{ null }`
|
||||||
|
|
||||||
|
For example:
|
||||||
|
- 'baseline to week 3' should give: `{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }`
|
||||||
|
- 'tracked 4 months' should give: `{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}`
|
||||||
|
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||||
|
- 'After day 1 to week 48' should give `{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}`
|
||||||
|
- 'randomization to 14 months' should give `{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}`
|
||||||
|
- 'baseline until death' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||||
|
- 'progression free survival up to 4 years' should give: `{ "longest_observation_scalar": null, "longest_observation_unit": null }`
|
||||||
|
- 'the quick brown fox jumped over the lazy dog for one hour' should give: `{null}`
|
||||||
|
|
||||||
|
"""
|
||||||
|
@ -0,0 +1,19 @@
|
|||||||
|
### Template:
|
||||||
|
{
|
||||||
|
"longest_observation_scalar": "",
|
||||||
|
"longest_observation_unit": "",
|
||||||
|
}
|
||||||
|
### Examples:
|
||||||
|
|
||||||
|
|
||||||
|
### Text:
|
||||||
|
|
||||||
|
{ "longest_observation_scalar": 3, "longest_observation_unit": "weeks" }
|
||||||
|
{ "longest_observation_scalar": 4, "longest_observation_unit": "months"}
|
||||||
|
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||||
|
{ "longest_observation_scalar": 48, "longest_observation_unit": "weeks"}
|
||||||
|
{ "longest_observation_scalar": 14, "longest_observation_unit": "months"}
|
||||||
|
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||||
|
{ "longest_observation_scalar": null, "longest_observation_unit": null }
|
||||||
|
|
||||||
|
"""
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
https://doi.org/10.6069/GHCW-8955
|
||||||
@ -0,0 +1 @@
|
|||||||
|
,dad,home-pc,03.04.2023 15:13,file:///home/dad/.config/libreoffice/4;
|
||||||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,365 @@
|
|||||||
|
Cause ID,Cause Name,Parent ID,Parent Name,Level,Cause Outline,Sort Order,YLL Only,YLD Only
|
||||||
|
294,All causes,294,All causes,0,Total,1,,
|
||||||
|
295,"Communicable, maternal, neonatal, and nutritional diseases",294,All causes,1,A,2,,
|
||||||
|
955,HIV/AIDS and sexually transmitted infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.1,3,,
|
||||||
|
298,HIV/AIDS,955,HIV/AIDS and sexually transmitted infections,3,A.1.1,4,,
|
||||||
|
948,HIV/AIDS - Drug-susceptible Tuberculosis,298,HIV/AIDS,4,A.1.1.1,5,,
|
||||||
|
949,HIV/AIDS - Multidrug-resistant Tuberculosis without extensive drug resistance,298,HIV/AIDS,4,A.1.1.2,6,,
|
||||||
|
950,HIV/AIDS - Extensively drug-resistant Tuberculosis,298,HIV/AIDS,4,A.1.1.3,7,,
|
||||||
|
300,HIV/AIDS resulting in other diseases,298,HIV/AIDS,4,A.1.1.4,8,,
|
||||||
|
393,Sexually transmitted infections excluding HIV,955,HIV/AIDS and sexually transmitted infections,3,A.1.2,9,,
|
||||||
|
394,Syphilis,393,Sexually transmitted infections excluding HIV,4,A.1.2.1,10,,
|
||||||
|
395,Chlamydial infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.2,11,,
|
||||||
|
396,Gonococcal infection,393,Sexually transmitted infections excluding HIV,4,A.1.2.3,12,,
|
||||||
|
397,Trichomoniasis,393,Sexually transmitted infections excluding HIV,4,A.1.2.4,13,,X
|
||||||
|
398,Genital herpes,393,Sexually transmitted infections excluding HIV,4,A.1.2.5,14,,X
|
||||||
|
399,Other sexually transmitted infections,393,Sexually transmitted infections excluding HIV,4,A.1.2.6,15,,
|
||||||
|
956,Respiratory infections and tuberculosis,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.2,16,,
|
||||||
|
297,Tuberculosis,956,Respiratory infections and tuberculosis,3,A.2.1,17,,
|
||||||
|
954,Latent tuberculosis infection,297,Tuberculosis,4,A.2.1.1,18,,X
|
||||||
|
934,Drug-susceptible tuberculosis,297,Tuberculosis,4,A.2.1.2,19,,
|
||||||
|
946,Multidrug-resistant tuberculosis without extensive drug resistance,297,Tuberculosis,4,A.2.1.3,20,,
|
||||||
|
947,Extensively drug-resistant tuberculosis,297,Tuberculosis,4,A.2.1.4,21,,
|
||||||
|
322,Lower respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.2,22,,
|
||||||
|
328,Upper respiratory infections,956,Respiratory infections and tuberculosis,3,A.2.3,23,,
|
||||||
|
329,Otitis media,956,Respiratory infections and tuberculosis,3,A.2.4,24,,
|
||||||
|
957,Enteric infections,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.3,25,,
|
||||||
|
302,Diarrheal diseases,957,Enteric infections,3,A.3.1,26,,
|
||||||
|
958,Typhoid and paratyphoid,957,Enteric infections,3,A.3.2,27,,
|
||||||
|
319,Typhoid fever,958,Typhoid and paratyphoid,4,A.3.2.1,28,,
|
||||||
|
320,Paratyphoid fever,958,Typhoid and paratyphoid,4,A.3.2.2,29,,
|
||||||
|
959,Invasive Non-typhoidal Salmonella (iNTS),957,Enteric infections,3,A.3.3,30,,
|
||||||
|
321,Other intestinal infectious diseases,957,Enteric infections,3,A.3.4,31,,
|
||||||
|
344,Neglected tropical diseases and malaria,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.4,32,,
|
||||||
|
345,Malaria,344,Neglected tropical diseases and malaria,3,A.4.1,33,,
|
||||||
|
346,Chagas disease,344,Neglected tropical diseases and malaria,3,A.4.2,34,,
|
||||||
|
347,Leishmaniasis,344,Neglected tropical diseases and malaria,3,A.4.3,35,,
|
||||||
|
348,Visceral leishmaniasis,347,Leishmaniasis,4,A.4.3.1,36,,
|
||||||
|
349,Cutaneous and mucocutaneous leishmaniasis,347,Leishmaniasis,4,A.4.3.2,37,,X
|
||||||
|
350,African trypanosomiasis,344,Neglected tropical diseases and malaria,3,A.4.4,38,,
|
||||||
|
351,Schistosomiasis,344,Neglected tropical diseases and malaria,3,A.4.5,39,,
|
||||||
|
352,Cysticercosis,344,Neglected tropical diseases and malaria,3,A.4.6,40,,
|
||||||
|
353,Cystic echinococcosis,344,Neglected tropical diseases and malaria,3,A.4.7,41,,
|
||||||
|
354,Lymphatic filariasis,344,Neglected tropical diseases and malaria,3,A.4.8,42,,X
|
||||||
|
355,Onchocerciasis,344,Neglected tropical diseases and malaria,3,A.4.9,43,,X
|
||||||
|
356,Trachoma,344,Neglected tropical diseases and malaria,3,A.4.10,44,,X
|
||||||
|
357,Dengue,344,Neglected tropical diseases and malaria,3,A.4.11,45,,
|
||||||
|
358,Yellow fever,344,Neglected tropical diseases and malaria,3,A.4.12,46,,
|
||||||
|
359,Rabies,344,Neglected tropical diseases and malaria,3,A.4.13,47,,
|
||||||
|
360,Intestinal nematode infections,344,Neglected tropical diseases and malaria,3,A.4.14,48,,
|
||||||
|
361,Ascariasis,360,Intestinal nematode infections,4,A.4.14.1,49,,
|
||||||
|
362,Trichuriasis,360,Intestinal nematode infections,4,A.4.14.2,50,,X
|
||||||
|
363,Hookworm disease,360,Intestinal nematode infections,4,A.4.14.3,51,,X
|
||||||
|
364,Food-borne trematodiases,344,Neglected tropical diseases and malaria,3,A.4.15,52,,X
|
||||||
|
405,Leprosy,344,Neglected tropical diseases and malaria,3,A.4.16,53,,X
|
||||||
|
843,Ebola,344,Neglected tropical diseases and malaria,3,A.4.17,54,,
|
||||||
|
935,Zika virus,344,Neglected tropical diseases and malaria,3,A.4.18,55,,
|
||||||
|
936,Guinea worm disease,344,Neglected tropical diseases and malaria,3,A.4.19,56,,X
|
||||||
|
365,Other neglected tropical diseases,344,Neglected tropical diseases and malaria,3,A.4.20,57,,
|
||||||
|
961,Other infectious diseases,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.5,58,,
|
||||||
|
332,Meningitis,961,Other infectious diseases,3,A.5.1,59,,
|
||||||
|
337,Encephalitis,961,Other infectious diseases,3,A.5.2,60,,
|
||||||
|
338,Diphtheria,961,Other infectious diseases,3,A.5.3,61,,
|
||||||
|
339,Whooping cough,961,Other infectious diseases,3,A.5.4,62,,
|
||||||
|
340,Tetanus,961,Other infectious diseases,3,A.5.5,63,,
|
||||||
|
341,Measles,961,Other infectious diseases,3,A.5.6,64,,
|
||||||
|
342,Varicella and herpes zoster,961,Other infectious diseases,3,A.5.7,65,,
|
||||||
|
400,Acute hepatitis,961,Other infectious diseases,3,A.5.8,66,,
|
||||||
|
401,Acute hepatitis A,400,Acute hepatitis,4,A.5.8.1,67,,
|
||||||
|
402,Acute hepatitis B,400,Acute hepatitis,4,A.5.8.2,68,,
|
||||||
|
403,Acute hepatitis C,400,Acute hepatitis,4,A.5.8.3,69,,
|
||||||
|
404,Acute hepatitis E,400,Acute hepatitis,4,A.5.8.4,70,,
|
||||||
|
408,Other unspecified infectious diseases,961,Other infectious diseases,3,A.5.9,71,,
|
||||||
|
962,Maternal and neonatal disorders,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.6,72,,
|
||||||
|
366,Maternal disorders,962,Maternal and neonatal disorders,3,A.6.1,73,,
|
||||||
|
367,Maternal hemorrhage,366,Maternal disorders,4,A.6.1.1,74,,
|
||||||
|
368,Maternal sepsis and other maternal infections,366,Maternal disorders,4,A.6.1.2,75,,
|
||||||
|
369,Maternal hypertensive disorders,366,Maternal disorders,4,A.6.1.3,76,,
|
||||||
|
370,Maternal obstructed labor and uterine rupture,366,Maternal disorders,4,A.6.1.4,77,,
|
||||||
|
995,Maternal abortion and miscarriage,366,Maternal disorders,4,A.6.1.5,78,,
|
||||||
|
374,Ectopic pregnancy,366,Maternal disorders,4,A.6.1.6,79,,
|
||||||
|
375,Indirect maternal deaths,366,Maternal disorders,4,A.6.1.7,80,X,
|
||||||
|
376,Late maternal deaths,366,Maternal disorders,4,A.6.1.8,81,X,
|
||||||
|
741,Maternal deaths aggravated by HIV/AIDS,366,Maternal disorders,4,A.6.1.9,82,X,
|
||||||
|
379,Other maternal disorders,366,Maternal disorders,4,A.6.1.10,83,,
|
||||||
|
380,Neonatal disorders,962,Maternal and neonatal disorders,3,A.6.2,84,,
|
||||||
|
381,Neonatal preterm birth,380,Neonatal disorders,4,A.6.2.1,85,,
|
||||||
|
382,Neonatal encephalopathy due to birth asphyxia and trauma,380,Neonatal disorders,4,A.6.2.2,86,,
|
||||||
|
383,Neonatal sepsis and other neonatal infections,380,Neonatal disorders,4,A.6.2.3,87,,
|
||||||
|
384,Hemolytic disease and other neonatal jaundice,380,Neonatal disorders,4,A.6.2.4,88,,
|
||||||
|
385,Other neonatal disorders,380,Neonatal disorders,4,A.6.2.5,89,,
|
||||||
|
386,Nutritional deficiencies,295,"Communicable, maternal, neonatal, and nutritional diseases",2,A.7,90,,
|
||||||
|
387,Protein-energy malnutrition,386,Nutritional deficiencies,3,A.7.1,91,,
|
||||||
|
388,Iodine deficiency,386,Nutritional deficiencies,3,A.7.2,92,,X
|
||||||
|
389,Vitamin A deficiency,386,Nutritional deficiencies,3,A.7.3,93,,X
|
||||||
|
390,Dietary iron deficiency,386,Nutritional deficiencies,3,A.7.4,94,,X
|
||||||
|
391,Other nutritional deficiencies,386,Nutritional deficiencies,3,A.7.5,95,,
|
||||||
|
409,Non-communicable diseases,294,All causes,1,B,96,,
|
||||||
|
410,Neoplasms,409,Non-communicable diseases,2,B.1,97,,
|
||||||
|
444,Lip and oral cavity cancer,410,Neoplasms,3,B.1.1,98,,
|
||||||
|
447,Nasopharynx cancer,410,Neoplasms,3,B.1.2,99,,
|
||||||
|
450,Other pharynx cancer,410,Neoplasms,3,B.1.3,100,,
|
||||||
|
411,Esophageal cancer,410,Neoplasms,3,B.1.4,101,,
|
||||||
|
414,Stomach cancer,410,Neoplasms,3,B.1.5,102,,
|
||||||
|
441,Colon and rectum cancer,410,Neoplasms,3,B.1.6,103,,
|
||||||
|
417,Liver cancer,410,Neoplasms,3,B.1.7,104,,
|
||||||
|
418,Liver cancer due to hepatitis B,417,Liver cancer,4,B.1.7.1,105,,
|
||||||
|
419,Liver cancer due to hepatitis C,417,Liver cancer,4,B.1.7.2,106,,
|
||||||
|
420,Liver cancer due to alcohol use,417,Liver cancer,4,B.1.7.3,107,,
|
||||||
|
996,Liver cancer due to NASH,417,Liver cancer,4,B.1.7.4,108,,
|
||||||
|
1021,Liver cancer due to other causes,417,Liver cancer,4,B.1.7.5,109,,
|
||||||
|
453,Gallbladder and biliary tract cancer,410,Neoplasms,3,B.1.8,110,,
|
||||||
|
456,Pancreatic cancer,410,Neoplasms,3,B.1.9,111,,
|
||||||
|
423,Larynx cancer,410,Neoplasms,3,B.1.10,112,,
|
||||||
|
426,"Tracheal, bronchus, and lung cancer",410,Neoplasms,3,B.1.11,113,,
|
||||||
|
459,Malignant skin melanoma,410,Neoplasms,3,B.1.12,114,,
|
||||||
|
462,Non-melanoma skin cancer,410,Neoplasms,3,B.1.13,115,,
|
||||||
|
849,Non-melanoma skin cancer (squamous-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.1,116,,
|
||||||
|
850,Non-melanoma skin cancer (basal-cell carcinoma),462,Non-melanoma skin cancer,4,B.1.13.2,117,,X
|
||||||
|
429,Breast cancer,410,Neoplasms,3,B.1.14,118,,
|
||||||
|
432,Cervical cancer,410,Neoplasms,3,B.1.15,119,,
|
||||||
|
435,Uterine cancer,410,Neoplasms,3,B.1.16,120,,
|
||||||
|
465,Ovarian cancer,410,Neoplasms,3,B.1.17,121,,
|
||||||
|
438,Prostate cancer,410,Neoplasms,3,B.1.18,122,,
|
||||||
|
468,Testicular cancer,410,Neoplasms,3,B.1.19,123,,
|
||||||
|
471,Kidney cancer,410,Neoplasms,3,B.1.20,124,,
|
||||||
|
474,Bladder cancer,410,Neoplasms,3,B.1.21,125,,
|
||||||
|
477,Brain and central nervous system cancer,410,Neoplasms,3,B.1.22,126,,
|
||||||
|
480,Thyroid cancer,410,Neoplasms,3,B.1.23,127,,
|
||||||
|
483,Mesothelioma,410,Neoplasms,3,B.1.24,128,,
|
||||||
|
484,Hodgkin lymphoma,410,Neoplasms,3,B.1.25,129,,
|
||||||
|
485,Non-Hodgkin lymphoma,410,Neoplasms,3,B.1.26,130,,
|
||||||
|
486,Multiple myeloma,410,Neoplasms,3,B.1.27,131,,
|
||||||
|
487,Leukemia,410,Neoplasms,3,B.1.28,132,,
|
||||||
|
845,Acute lymphoid leukemia,487,Leukemia,4,B.1.28.1,133,,
|
||||||
|
846,Chronic lymphoid leukemia,487,Leukemia,4,B.1.28.2,134,,
|
||||||
|
847,Acute myeloid leukemia,487,Leukemia,4,B.1.28.3,135,,
|
||||||
|
848,Chronic myeloid leukemia,487,Leukemia,4,B.1.28.4,136,,
|
||||||
|
943,Other leukemia,487,Leukemia,4,B.1.28.5,137,,
|
||||||
|
1022,Other malignant neoplasms,410,Neoplasms,3,B.1.29,138,,
|
||||||
|
490,Other neoplasms,410,Neoplasms,3,B.1.30,139,,
|
||||||
|
964,"Myelodysplastic, myeloproliferative, and other hematopoietic neoplasms",490,Other neoplasms,4,B.1.30.1,140,,
|
||||||
|
965,Benign and in situ intestinal neoplasms,490,Other neoplasms,4,B.1.30.2,141,,X
|
||||||
|
966,Benign and in situ cervical and uterine neoplasms,490,Other neoplasms,4,B.1.30.3,142,,X
|
||||||
|
967,Other benign and in situ neoplasms,490,Other neoplasms,4,B.1.30.4,143,,X
|
||||||
|
491,Cardiovascular diseases,409,Non-communicable diseases,2,B.2,144,,
|
||||||
|
492,Rheumatic heart disease,491,Cardiovascular diseases,3,B.2.1,145,,
|
||||||
|
493,Ischemic heart disease,491,Cardiovascular diseases,3,B.2.2,146,,
|
||||||
|
494,Stroke,491,Cardiovascular diseases,3,B.2.3,147,,
|
||||||
|
495,Ischemic stroke,494,Stroke,4,B.2.3.1,148,,
|
||||||
|
496,Intracerebral hemorrhage,494,Stroke,4,B.2.3.2,149,,
|
||||||
|
497,Subarachnoid hemorrhage,494,Stroke,4,B.2.3.3,150,,
|
||||||
|
498,Hypertensive heart disease,491,Cardiovascular diseases,3,B.2.4,151,,
|
||||||
|
504,Non-rheumatic valvular heart disease,491,Cardiovascular diseases,3,B.2.5,152,,
|
||||||
|
968,Non-rheumatic calcific aortic valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.1,153,,
|
||||||
|
969,Non-rheumatic degenerative mitral valve disease,504,Non-rheumatic valvular heart disease,4,B.2.5.2,154,,
|
||||||
|
970,Other non-rheumatic valve diseases,504,Non-rheumatic valvular heart disease,4,B.2.5.3,155,,
|
||||||
|
499,Cardiomyopathy and myocarditis,491,Cardiovascular diseases,3,B.2.6,156,,
|
||||||
|
942,Myocarditis,499,Cardiomyopathy and myocarditis,4,B.2.6.1,157,,
|
||||||
|
938,Alcoholic cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.2,158,,
|
||||||
|
944,Other cardiomyopathy,499,Cardiomyopathy and myocarditis,4,B.2.6.3,159,,
|
||||||
|
500,Atrial fibrillation and flutter,491,Cardiovascular diseases,3,B.2.8,160,,
|
||||||
|
501,Aortic aneurysm,491,Cardiovascular diseases,3,B.2.9,161,X,
|
||||||
|
502,Peripheral artery disease,491,Cardiovascular diseases,3,B.2.10,162,,
|
||||||
|
503,Endocarditis,491,Cardiovascular diseases,3,B.2.11,163,,
|
||||||
|
1023,Other cardiovascular and circulatory diseases,491,Cardiovascular diseases,3,B.2.12,164,,
|
||||||
|
508,Chronic respiratory diseases,409,Non-communicable diseases,2,B.3,165,,
|
||||||
|
509,Chronic obstructive pulmonary disease,508,Chronic respiratory diseases,3,B.3.1,166,,
|
||||||
|
510,Pneumoconiosis,508,Chronic respiratory diseases,3,B.3.2,167,,
|
||||||
|
511,Silicosis,510,Pneumoconiosis,4,B.3.2.1,168,,
|
||||||
|
512,Asbestosis,510,Pneumoconiosis,4,B.3.2.2,169,,
|
||||||
|
513,Coal workers pneumoconiosis,510,Pneumoconiosis,4,B.3.2.3,170,,
|
||||||
|
514,Other pneumoconiosis,510,Pneumoconiosis,4,B.3.2.4,171,,
|
||||||
|
515,Asthma,508,Chronic respiratory diseases,3,B.3.3,172,,
|
||||||
|
516,Interstitial lung disease and pulmonary sarcoidosis,508,Chronic respiratory diseases,3,B.3.4,173,,
|
||||||
|
520,Other chronic respiratory diseases,508,Chronic respiratory diseases,3,B.3.5,174,,
|
||||||
|
526,Digestive diseases,409,Non-communicable diseases,2,B.4,175,,
|
||||||
|
521,Cirrhosis and other chronic liver diseases,526,Digestive diseases,3,B.4.1,176,,
|
||||||
|
522,Cirrhosis and other chronic liver diseases due to hepatitis B,521,Cirrhosis and other chronic liver diseases,4,B.4.1.1,177,,
|
||||||
|
523,Cirrhosis and other chronic liver diseases due to hepatitis C,521,Cirrhosis and other chronic liver diseases,4,B.4.1.2,178,,
|
||||||
|
524,Cirrhosis and other chronic liver diseases due to alcohol use,521,Cirrhosis and other chronic liver diseases,4,B.4.1.3,179,,
|
||||||
|
971,Cirrhosis and other chronic liver diseases due to NAFLD,521,Cirrhosis and other chronic liver diseases,4,B.4.1.4,180,,
|
||||||
|
525,Cirrhosis and other chronic liver diseases due to other causes,521,Cirrhosis and other chronic liver diseases,4,B.4.1.5,181,,
|
||||||
|
992,Upper digestive system diseases,526,Digestive diseases,3,B.4.2,182,,
|
||||||
|
527,Peptic ulcer disease,992,Upper digestive system diseases,4,B.4.2.1,183,,
|
||||||
|
528,Gastritis and duodenitis,992,Upper digestive system diseases,4,B.4.2.2,184,,
|
||||||
|
536,Gastroesophageal reflux disease,992,Upper digestive system diseases,4,B.4.2.3,185,,X
|
||||||
|
529,Appendicitis,526,Digestive diseases,3,B.4.3,186,,
|
||||||
|
530,Paralytic ileus and intestinal obstruction,526,Digestive diseases,3,B.4.4,187,,
|
||||||
|
531,"Inguinal, femoral, and abdominal hernia",526,Digestive diseases,3,B.4.5,188,,
|
||||||
|
532,Inflammatory bowel disease,526,Digestive diseases,3,B.4.6,189,,
|
||||||
|
533,Vascular intestinal disorders,526,Digestive diseases,3,B.4.7,190,,
|
||||||
|
534,Gallbladder and biliary diseases,526,Digestive diseases,3,B.4.8,191,,
|
||||||
|
535,Pancreatitis,526,Digestive diseases,3,B.4.9,192,,
|
||||||
|
541,Other digestive diseases,526,Digestive diseases,3,B.4.10,193,,
|
||||||
|
542,Neurological disorders,409,Non-communicable diseases,2,B.5,194,,
|
||||||
|
543,Alzheimer's disease and other dementias,542,Neurological disorders,3,B.5.1,195,,
|
||||||
|
544,Parkinson's disease,542,Neurological disorders,3,B.5.2,196,,
|
||||||
|
545,Idiopathic epilepsy,542,Neurological disorders,3,B.5.3,197,,
|
||||||
|
546,Multiple sclerosis,542,Neurological disorders,3,B.5.4,198,,
|
||||||
|
554,Motor neuron disease,542,Neurological disorders,3,B.5.5,199,,
|
||||||
|
972,Headache disorders,542,Neurological disorders,3,B.5.6,200,,X
|
||||||
|
547,Migraine,972,Headache disorders,4,B.5.6.1,201,,X
|
||||||
|
548,Tension-type headache,972,Headache disorders,4,B.5.6.2,202,,X
|
||||||
|
557,Other neurological disorders,542,Neurological disorders,3,B.5.7,203,,
|
||||||
|
558,Mental disorders,409,Non-communicable diseases,2,B.6,204,,
|
||||||
|
559,Schizophrenia,558,Mental disorders,3,B.6.1,205,,X
|
||||||
|
567,Depressive disorders,558,Mental disorders,3,B.6.2,206,,X
|
||||||
|
568,Major depressive disorder,567,Depressive disorders,4,B.6.2.1,207,,X
|
||||||
|
569,Dysthymia,567,Depressive disorders,4,B.6.2.2,208,,X
|
||||||
|
570,Bipolar disorder,558,Mental disorders,3,B.6.3,209,,X
|
||||||
|
571,Anxiety disorders,558,Mental disorders,3,B.6.4,210,,X
|
||||||
|
572,Eating disorders,558,Mental disorders,3,B.6.5,211,,
|
||||||
|
573,Anorexia nervosa,572,Eating disorders,4,B.6.5.1,212,,
|
||||||
|
574,Bulimia nervosa,572,Eating disorders,4,B.6.5.2,213,,
|
||||||
|
575,Autism spectrum disorders,558,Mental disorders,3,B.6.6,214,,X
|
||||||
|
578,Attention-deficit/hyperactivity disorder,558,Mental disorders,3,B.6.7,215,,X
|
||||||
|
579,Conduct disorder,558,Mental disorders,3,B.6.8,216,,X
|
||||||
|
582,Idiopathic developmental intellectual disability,558,Mental disorders,3,B.6.9,217,,X
|
||||||
|
585,Other mental disorders,558,Mental disorders,3,B.6.10,218,,X
|
||||||
|
973,Substance use disorders,409,Non-communicable diseases,2,B.7,219,,
|
||||||
|
560,Alcohol use disorders,973,Substance use disorders,3,B.7.1,220,,
|
||||||
|
561,Drug use disorders,973,Substance use disorders,3,B.7.2,221,,
|
||||||
|
562,Opioid use disorders,561,Drug use disorders,4,B.7.2.1,222,,
|
||||||
|
563,Cocaine use disorders,561,Drug use disorders,4,B.7.2.2,223,,
|
||||||
|
564,Amphetamine use disorders,561,Drug use disorders,4,B.7.2.3,224,,
|
||||||
|
565,Cannabis use disorders,561,Drug use disorders,4,B.7.2.4,225,,X
|
||||||
|
566,Other drug use disorders,561,Drug use disorders,4,B.7.2.5,226,,
|
||||||
|
974,Diabetes and kidney diseases,409,Non-communicable diseases,2,B.8,227,,
|
||||||
|
587,Diabetes mellitus,974,Diabetes and kidney diseases,3,B.8.1,228,,
|
||||||
|
975,Diabetes mellitus type 1,587,Diabetes mellitus,4,B.8.1.1,229,,
|
||||||
|
976,Diabetes mellitus type 2,587,Diabetes mellitus,4,B.8.1.2,230,,
|
||||||
|
589,Chronic kidney disease,974,Diabetes and kidney diseases,3,B.8.2,231,,
|
||||||
|
997,Chronic kidney disease due to diabetes mellitus type 1,589,Chronic kidney disease,4,B.8.2.1,232,,
|
||||||
|
998,Chronic kidney disease due to diabetes mellitus type 2,589,Chronic kidney disease,4,B.8.2.2,233,,
|
||||||
|
591,Chronic kidney disease due to hypertension,589,Chronic kidney disease,4,B.8.2.3,234,,
|
||||||
|
592,Chronic kidney disease due to glomerulonephritis,589,Chronic kidney disease,4,B.8.2.4,235,,
|
||||||
|
593,Chronic kidney disease due to other and unspecified causes,589,Chronic kidney disease,4,B.8.2.5,236,,
|
||||||
|
588,Acute glomerulonephritis,974,Diabetes and kidney diseases,3,B.8.3,237,,
|
||||||
|
653,Skin and subcutaneous diseases,409,Non-communicable diseases,2,B.9,238,,
|
||||||
|
654,Dermatitis,653,Skin and subcutaneous diseases,3,B.9.1,239,,X
|
||||||
|
977,Atopic dermatitis,654,Dermatitis,4,B.9.1.1,240,,X
|
||||||
|
978,Contact dermatitis,654,Dermatitis,4,B.9.1.2,241,,X
|
||||||
|
979,Seborrhoeic dermatitis,654,Dermatitis,4,B.9.1.3,242,,X
|
||||||
|
655,Psoriasis,653,Skin and subcutaneous diseases,3,B.9.2,243,,X
|
||||||
|
980,Bacterial skin diseases,653,Skin and subcutaneous diseases,3,B.9.3,244,,
|
||||||
|
656,Cellulitis,980,Bacterial skin diseases,4,B.9.3.1,245,,
|
||||||
|
657,Pyoderma,980,Bacterial skin diseases,4,B.9.3.2,246,,
|
||||||
|
658,Scabies,653,Skin and subcutaneous diseases,3,B.9.4,247,,X
|
||||||
|
659,Fungal skin diseases,653,Skin and subcutaneous diseases,3,B.9.5,248,,X
|
||||||
|
660,Viral skin diseases,653,Skin and subcutaneous diseases,3,B.9.6,249,,X
|
||||||
|
661,Acne vulgaris,653,Skin and subcutaneous diseases,3,B.9.7,250,,X
|
||||||
|
662,Alopecia areata,653,Skin and subcutaneous diseases,3,B.9.8,251,,X
|
||||||
|
663,Pruritus,653,Skin and subcutaneous diseases,3,B.9.9,252,,X
|
||||||
|
664,Urticaria,653,Skin and subcutaneous diseases,3,B.9.10,253,,X
|
||||||
|
665,Decubitus ulcer,653,Skin and subcutaneous diseases,3,B.9.11,254,,
|
||||||
|
668,Other skin and subcutaneous diseases,653,Skin and subcutaneous diseases,3,B.9.12,255,,
|
||||||
|
669,Sense organ diseases,409,Non-communicable diseases,2,B.10,256,,X
|
||||||
|
981,Blindness and vision loss,669,Sense organ diseases,3,B.10.1,257,,X
|
||||||
|
670,Glaucoma,981,Blindness and vision loss,4,B.10.1.1,258,,X
|
||||||
|
671,Cataract,981,Blindness and vision loss,4,B.10.1.2,259,,X
|
||||||
|
672,Age-related macular degeneration,981,Blindness and vision loss,4,B.10.1.3,260,,X
|
||||||
|
999,Refraction disorders,981,Blindness and vision loss,4,B.10.1.4,261,,X
|
||||||
|
1000,Near vision loss,981,Blindness and vision loss,4,B.10.1.5,262,,X
|
||||||
|
675,Other vision loss,981,Blindness and vision loss,4,B.10.1.6,263,,X
|
||||||
|
674,Age-related and other hearing loss,669,Sense organ diseases,3,B.10.2,264,,X
|
||||||
|
679,Other sense organ diseases,669,Sense organ diseases,3,B.10.3,265,,X
|
||||||
|
626,Musculoskeletal disorders,409,Non-communicable diseases,2,B.11,266,,
|
||||||
|
627,Rheumatoid arthritis,626,Musculoskeletal disorders,3,B.11.1,267,,
|
||||||
|
628,Osteoarthritis,626,Musculoskeletal disorders,3,B.11.2,268,,X
|
||||||
|
1014,Osteoarthritis hip,628,Osteoarthritis,4,B.11.2.1,269,,X
|
||||||
|
1015,Osteoarthritis knee,628,Osteoarthritis,4,B.11.2.2,270,,X
|
||||||
|
1016,Osteoarthritis hand,628,Osteoarthritis,4,B.11.2.3,271,,X
|
||||||
|
1017,Osteoarthritis other,628,Osteoarthritis,4,B.11.2.4,272,,X
|
||||||
|
630,Low back pain,626,Musculoskeletal disorders,3,B.11.3,273,,X
|
||||||
|
631,Neck pain,626,Musculoskeletal disorders,3,B.11.4,274,,X
|
||||||
|
632,Gout,626,Musculoskeletal disorders,3,B.11.5,275,,X
|
||||||
|
639,Other musculoskeletal disorders,626,Musculoskeletal disorders,3,B.11.6,276,,
|
||||||
|
640,Other non-communicable diseases,409,Non-communicable diseases,2,B.12,277,,
|
||||||
|
641,Congenital birth defects,640,Other non-communicable diseases,3,B.12.1,278,,
|
||||||
|
642,Neural tube defects,641,Congenital birth defects,4,B.12.1.1,279,,
|
||||||
|
643,Congenital heart anomalies,641,Congenital birth defects,4,B.12.1.2,280,,
|
||||||
|
644,Orofacial clefts,641,Congenital birth defects,4,B.12.1.3,281,,
|
||||||
|
645,Down syndrome,641,Congenital birth defects,4,B.12.1.4,282,,
|
||||||
|
646,Turner syndrome,641,Congenital birth defects,4,B.12.1.5,283,,X
|
||||||
|
647,Klinefelter syndrome,641,Congenital birth defects,4,B.12.1.6,284,,X
|
||||||
|
648,Other chromosomal abnormalities,641,Congenital birth defects,4,B.12.1.7,285,,
|
||||||
|
649,Congenital musculoskeletal and limb anomalies,641,Congenital birth defects,4,B.12.1.8,286,,
|
||||||
|
650,Urogenital congenital anomalies,641,Congenital birth defects,4,B.12.1.9,287,,
|
||||||
|
651,Digestive congenital anomalies,641,Congenital birth defects,4,B.12.1.10,288,,
|
||||||
|
652,Other congenital birth defects,641,Congenital birth defects,4,B.12.1.11,289,,
|
||||||
|
594,Urinary diseases and male infertility,640,Other non-communicable diseases,3,B.12.2,290,,
|
||||||
|
595,Urinary tract infections and interstitial nephritis,594,Urinary diseases and male infertility,4,B.12.2.1,291,,
|
||||||
|
596,Urolithiasis,594,Urinary diseases and male infertility,4,B.12.2.2,292,,
|
||||||
|
597,Benign prostatic hyperplasia,594,Urinary diseases and male infertility,4,B.12.2.3,293,,X
|
||||||
|
598,Male infertility,594,Urinary diseases and male infertility,4,B.12.2.4,294,,X
|
||||||
|
602,Other urinary diseases,594,Urinary diseases and male infertility,4,B.12.2.5,295,,
|
||||||
|
603,Gynecological diseases,640,Other non-communicable diseases,3,B.12.3,296,,
|
||||||
|
604,Uterine fibroids,603,Gynecological diseases,4,B.12.3.1,297,,
|
||||||
|
605,Polycystic ovarian syndrome,603,Gynecological diseases,4,B.12.3.2,298,,X
|
||||||
|
606,Female infertility,603,Gynecological diseases,4,B.12.3.3,299,,X
|
||||||
|
607,Endometriosis,603,Gynecological diseases,4,B.12.3.4,300,,
|
||||||
|
608,Genital prolapse,603,Gynecological diseases,4,B.12.3.5,301,,
|
||||||
|
609,Premenstrual syndrome,603,Gynecological diseases,4,B.12.3.6,302,,X
|
||||||
|
612,Other gynecological diseases,603,Gynecological diseases,4,B.12.3.7,303,,
|
||||||
|
613,Hemoglobinopathies and hemolytic anemias,640,Other non-communicable diseases,3,B.12.4,304,,
|
||||||
|
614,Thalassemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.1,305,,
|
||||||
|
837,Thalassemias trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.2,306,,X
|
||||||
|
615,Sickle cell disorders,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.3,307,,
|
||||||
|
838,Sickle cell trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.4,308,,X
|
||||||
|
616,G6PD deficiency,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.5,309,,
|
||||||
|
839,G6PD trait,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.6,310,,X
|
||||||
|
618,Other hemoglobinopathies and hemolytic anemias,613,Hemoglobinopathies and hemolytic anemias,4,B.12.4.7,311,,
|
||||||
|
619,"Endocrine, metabolic, blood, and immune disorders",640,Other non-communicable diseases,3,B.12.5,312,,
|
||||||
|
680,Oral disorders,640,Other non-communicable diseases,3,B.12.6,313,,X
|
||||||
|
681,Caries of deciduous teeth,680,Oral disorders,4,B.12.6.1,314,,X
|
||||||
|
682,Caries of permanent teeth,680,Oral disorders,4,B.12.6.2,315,,X
|
||||||
|
683,Periodontal diseases,680,Oral disorders,4,B.12.6.3,316,,X
|
||||||
|
684,Edentulism,680,Oral disorders,4,B.12.6.4,317,,X
|
||||||
|
685,Other oral disorders,680,Oral disorders,4,B.12.6.5,318,,X
|
||||||
|
686,Sudden infant death syndrome,640,Other non-communicable diseases,3,B.12.7,319,X,
|
||||||
|
687,Injuries,294,All causes,1,C,320,,
|
||||||
|
688,Transport injuries,687,Injuries,2,C.1,321,,
|
||||||
|
689,Road injuries,688,Transport injuries,3,C.1.1,322,,
|
||||||
|
690,Pedestrian road injuries,689,Road injuries,4,C.1.1.1,323,,
|
||||||
|
691,Cyclist road injuries,689,Road injuries,4,C.1.1.2,324,,
|
||||||
|
692,Motorcyclist road injuries,689,Road injuries,4,C.1.1.3,325,,
|
||||||
|
693,Motor vehicle road injuries,689,Road injuries,4,C.1.1.4,326,,
|
||||||
|
694,Other road injuries,689,Road injuries,4,C.1.1.5,327,,
|
||||||
|
695,Other transport injuries,688,Transport injuries,3,C.1.2,328,,
|
||||||
|
696,Unintentional injuries,687,Injuries,2,C.2,329,,
|
||||||
|
697,Falls,696,Unintentional injuries,3,C.2.1,330,,
|
||||||
|
698,Drowning,696,Unintentional injuries,3,C.2.2,331,,
|
||||||
|
699,"Fire, heat, and hot substances",696,Unintentional injuries,3,C.2.3,332,,
|
||||||
|
700,Poisonings,696,Unintentional injuries,3,C.2.4,333,,
|
||||||
|
701,Poisoning by carbon monoxide,700,Poisonings,4,C.2.4.1,334,,
|
||||||
|
703,Poisoning by other means,700,Poisonings,4,C.2.4.2,335,,
|
||||||
|
704,Exposure to mechanical forces,696,Unintentional injuries,3,C.2.5,336,,
|
||||||
|
705,Unintentional firearm injuries,704,Exposure to mechanical forces,4,C.2.5.1,337,,
|
||||||
|
707,Other exposure to mechanical forces,704,Exposure to mechanical forces,4,C.2.5.2,338,,
|
||||||
|
708,Adverse effects of medical treatment,696,Unintentional injuries,3,C.2.6,339,,
|
||||||
|
709,Animal contact,696,Unintentional injuries,3,C.2.7,340,,
|
||||||
|
710,Venomous animal contact,709,Animal contact,4,C.2.7.1,341,,
|
||||||
|
711,Non-venomous animal contact,709,Animal contact,4,C.2.7.2,342,,
|
||||||
|
712,Foreign body,696,Unintentional injuries,3,C.2.8,343,,
|
||||||
|
713,Pulmonary aspiration and foreign body in airway,712,Foreign body,4,C.2.8.1,344,,
|
||||||
|
714,Foreign body in eyes,712,Foreign body,4,C.2.8.2,345,,X
|
||||||
|
715,Foreign body in other body part,712,Foreign body,4,C.2.8.3,346,,
|
||||||
|
842,Environmental heat and cold exposure,696,Unintentional injuries,3,C.2.9,347,,
|
||||||
|
729,Exposure to forces of nature,696,Unintentional injuries,3,C.2.10,348,,
|
||||||
|
716,Other unintentional injuries,696,Unintentional injuries,3,C.2.11,349,,
|
||||||
|
717,Self-harm and interpersonal violence,687,Injuries,2,C.3,350,,
|
||||||
|
718,Self-harm,717,Self-harm and interpersonal violence,3,C.3.1,351,,
|
||||||
|
721,Self-harm by firearm,718,Self-harm,4,C.3.1.1,352,,
|
||||||
|
723,Self-harm by other specified means,718,Self-harm,4,C.3.1.2,353,,
|
||||||
|
724,Interpersonal violence,717,Self-harm and interpersonal violence,3,C.3.2,354,,
|
||||||
|
725,Physical violence by firearm,724,Interpersonal violence,4,C.3.2.1,355,,
|
||||||
|
726,Physical violence by sharp object,724,Interpersonal violence,4,C.3.2.2,356,,
|
||||||
|
941,Sexual violence,724,Interpersonal violence,4,C.3.2.3,357,,X
|
||||||
|
727,Physical violence by other means,724,Interpersonal violence,4,C.3.2.4,358,,
|
||||||
|
945,Conflict and terrorism,717,Self-harm and interpersonal violence,3,C.3.3,359,,
|
||||||
|
854,Executions and police conflict,717,Self-harm and interpersonal violence,3,C.3.4,360,,
|
||||||
|
1029,Total cancers,294,All causes,1,D,361,,
|
||||||
|
1026,Total burden related to hepatitis B,294,All causes,1,E,362,,
|
||||||
|
1027,Total burden related to hepatitis C,294,All causes,1,F,363,,
|
||||||
|
1028,Total burden related to Non-alcoholic fatty liver disease (NAFLD),294,All causes,1,G,364,,
|
||||||
|
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue