Reorganized folder system. added an assorted section to help keep track of useful sql etc.

history-download
youainti 4 years ago
parent d923c262a4
commit d436a67457

@ -0,0 +1,44 @@
SELECT why_stopped FROM ctgov.studies
WHERE why_stopped IS NOT NULL
LIMIT 100;
SELECT study_type, count(*) from ctgov.studies
group by study_type;
SELECT is_fda_regulated_drug, count(*) from ctgov.studies
GROUP BY is_fda_regulated_drug;
/*
Note that there is a decent number of trials that have expanded access
*/
SELECT
study_type
, phase
, has_expanded_access
, has_dmc
, count(*)
FROM ctgov.studies
WHERE
is_fda_regulated_drug is true
AND
study_type = 'Interventional'
AND
start_date > date('2007-01-01')
group by study_type, phase, has_expanded_access, has_dmc;
/*
Find different mesh terms as assigned by clinicaltrials.gov
*/
select * from ctgov.browse_conditions
order by nct_id desc,mesh_type
limit 200;
select * from ctgov.browse_interventions
order by nct_id desc
limit 200;

@ -38,11 +38,11 @@ if __name__ == "__main__":
X = vectorizer.fit_transform(data.cleaned) X = vectorizer.fit_transform(data.cleaned)
kmeans = KMeans(n_clusters=3, random_state=11021585) kmeans = KMeans(n_clusters=10, random_state=11021585)
kmeans.fit(X) kmeans.fit(X)
data["cluster"] = kmeans.labels_ data["cluster"] = kmeans.labels_
print(data.groupby(["cluster"]).count()) print(data.groupby(["cluster"])["cleaned"].count())
Loading…
Cancel
Save