diff --git a/DockerContainers/ClinicalTrialHistory/Dockerfile b/AACT_downloader/ClinicalTrialHistory/Dockerfile similarity index 100% rename from DockerContainers/ClinicalTrialHistory/Dockerfile rename to AACT_downloader/ClinicalTrialHistory/Dockerfile diff --git a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql b/AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql similarity index 100% rename from DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql rename to AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/020_HttpSchema.sql diff --git a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql b/AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql similarity index 100% rename from DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql rename to AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/030_HistoricalSchema.sql diff --git a/DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql b/AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql similarity index 100% rename from DockerContainers/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql rename to AACT_downloader/ClinicalTrialHistory/docker-entrypoint-initdb.d/090_AnalysisViews.sql diff --git a/DockerContainers/docker-compose.yaml b/AACT_downloader/docker-compose.yaml similarity index 100% rename from DockerContainers/docker-compose.yaml rename to AACT_downloader/docker-compose.yaml diff --git a/assorted/examining the db.sql b/assorted/examining the db.sql new file mode 100644 index 0000000..0c3ed1a --- /dev/null +++ b/assorted/examining the db.sql @@ -0,0 +1,44 @@ +SELECT why_stopped FROM ctgov.studies +WHERE why_stopped IS NOT NULL +LIMIT 100; + +SELECT study_type, count(*) from ctgov.studies +group by study_type; + +SELECT is_fda_regulated_drug, count(*) from ctgov.studies +GROUP BY is_fda_regulated_drug; + + + +/* +Note that there is a decent number of trials that have expanded access +*/ +SELECT + study_type + , phase + , has_expanded_access + , has_dmc + , count(*) +FROM ctgov.studies +WHERE + is_fda_regulated_drug is true + AND + study_type = 'Interventional' + AND + start_date > date('2007-01-01') +group by study_type, phase, has_expanded_access, has_dmc; + + + + +/* + Find different mesh terms as assigned by clinicaltrials.gov + */ +select * from ctgov.browse_conditions +order by nct_id desc,mesh_type +limit 200; + +select * from ctgov.browse_interventions +order by nct_id desc +limit 200; + diff --git a/DockerContainers/classifications/classify_terminations.py b/classifications/classify_terminations.py similarity index 90% rename from DockerContainers/classifications/classify_terminations.py rename to classifications/classify_terminations.py index d2a7702..7984463 100644 --- a/DockerContainers/classifications/classify_terminations.py +++ b/classifications/classify_terminations.py @@ -38,11 +38,11 @@ if __name__ == "__main__": X = vectorizer.fit_transform(data.cleaned) - kmeans = KMeans(n_clusters=3, random_state=11021585) + kmeans = KMeans(n_clusters=10, random_state=11021585) kmeans.fit(X) data["cluster"] = kmeans.labels_ - print(data.groupby(["cluster"]).count()) + print(data.groupby(["cluster"])["cleaned"].count()) \ No newline at end of file diff --git a/DockerContainers/downloader/db_connection.py b/downloader/db_connection.py similarity index 100% rename from DockerContainers/downloader/db_connection.py rename to downloader/db_connection.py