Saving current status.
parent
d8d00101fa
commit
71e87a9abe
@ -0,0 +1,16 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,8 +1,127 @@
|
||||
from tokenize import String
|
||||
from bs4 import BeautifulSoup
|
||||
import abc
|
||||
import textprocessing as tp #cuz tp is important
|
||||
#requires Python 3.10
|
||||
|
||||
def extract_data_from_tr(tr) -> tuple[String, String]:
|
||||
"""
|
||||
Takes an html data row of interest, extracts the record_name from the first <td>, and the data from the second <td>.
|
||||
|
||||
For the data, it will split between old and new data, making copies of each and returnign them.
|
||||
|
||||
Uses functionality from ./textprocessing.py (separated because it is important to test that functionality)
|
||||
to get extract data from tags.
|
||||
|
||||
"""
|
||||
#get list of cells
|
||||
#for cell in cells
|
||||
#if class_=="rowLabel", extract text
|
||||
#else parse out new and old text
|
||||
#return triple: row_lable, old, new
|
||||
pass
|
||||
|
||||
#superclasses
|
||||
class VersionData{abc.ABC}:
|
||||
"""
|
||||
This abstract class holds two types of data:
|
||||
- Data with a 1-to-1 relationship with the trial/version pair.
|
||||
- Data with a child relationship with the trial/version pair.
|
||||
|
||||
Each subclass will return the 1-to-1 data for another system to add to the DB.
|
||||
This is so that a single record can be created in one go.
|
||||
Each subclass will load the child data to the database directly.
|
||||
"""
|
||||
@abc.abstractmethod
|
||||
def version_fields(self):
|
||||
"""
|
||||
This function returns data that should be included in a standard table
|
||||
related to version_x of the record.
|
||||
|
||||
It also returns the columns?
|
||||
"""
|
||||
pass
|
||||
@abc.abstractmethod
|
||||
def version_records(self, foreign_key, db_cursor):
|
||||
"""
|
||||
This function loads data that needs to be held in auxilary tables
|
||||
into the database.
|
||||
For example, the list of sponsors will need to be tracked separatly from
|
||||
trial status.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StudyStatusData(VersionData):
|
||||
columns = ["primary_completion_date", "completion_date", "last_update_posted_date"]
|
||||
|
||||
def __init__(self ,primary_completion_date, completion_date, last_update_posted_date) -> None:
|
||||
pass
|
||||
|
||||
def extract_study_statuses(study_status_form, version_a,version_b):
|
||||
"""
|
||||
This extracts data from a study_status form and returns one or two StudyStatusData objects
|
||||
"""
|
||||
pass
|
||||
|
||||
class SponsorCollaboratorsData(VersionData):
|
||||
columns=[]
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def get_forms(soup):
|
||||
|
||||
data_list = []
|
||||
|
||||
#extract all forms
|
||||
for form in soup.body.find_all("form"):
|
||||
#Match forms against ID types
|
||||
if not "id" in form.attrs:
|
||||
continue
|
||||
|
||||
match form.attrs["id"]:
|
||||
case "form_StudyStatus":
|
||||
print("test successful 2")
|
||||
case "form_SponsorCollaborators":
|
||||
pass
|
||||
case "form_Oversight":
|
||||
pass
|
||||
case "form_StudyDescription":
|
||||
pass
|
||||
case "form_Conditions":
|
||||
pass
|
||||
case "form_StudyDesign":
|
||||
pass
|
||||
case "form_ArmsandInterventions":
|
||||
pass
|
||||
case "form_ProtocolOutcomeMeasures":
|
||||
pass
|
||||
case "form_Eligibility":
|
||||
pass
|
||||
case "form_ContactsLocations":
|
||||
pass
|
||||
case "form_IPDSharing":
|
||||
pass
|
||||
case "form_References":
|
||||
pass
|
||||
case "form_ParticipantFlow":
|
||||
pass
|
||||
case "form_BaselineCharacteristics":
|
||||
pass
|
||||
case "form_ROutcomeMeasures":
|
||||
pass
|
||||
case "form_AdverseEvents":
|
||||
pass
|
||||
case "form_LimitationsandCaveats":
|
||||
pass
|
||||
case "form_MoreInformation":
|
||||
pass
|
||||
case _:
|
||||
print(form.attrs["id"])
|
||||
|
||||
if __name__ == "__main__":
|
||||
with open("./NCT00658567.html") as fh:
|
||||
soup = BeautifulSoup(fh, "lxml")
|
||||
|
||||
print(soup)
|
||||
|
||||
get_forms(soup)
|
||||
@ -0,0 +1,62 @@
|
||||
/*
|
||||
Create schema history
|
||||
|
||||
|
||||
CREATE TABLE history.versions
|
||||
nct_id
|
||||
version
|
||||
--Study Status
|
||||
overall_status
|
||||
primary_completion_date
|
||||
completion_date
|
||||
last_update_submitted_date
|
||||
--SponsorCollaborators
|
||||
sponsor (multi?)
|
||||
collaborators (multi?)
|
||||
--Oversight
|
||||
fda_regulated_drug (ignore)
|
||||
fda_regulated_device (ignore)
|
||||
dmc (ignore)
|
||||
--StuldyDescription
|
||||
summary
|
||||
detailed_description
|
||||
--Conditions
|
||||
Conditions
|
||||
Keywords
|
||||
--StudyDesign
|
||||
Study type
|
||||
Primary Purpose
|
||||
Study Phase
|
||||
Interventional Study Model
|
||||
Number of Arms
|
||||
Masking
|
||||
Allocation
|
||||
Enrollment
|
||||
--ArmsAndInterventions
|
||||
Arms (multiple) (Ignore)
|
||||
--ProtocolOutcomeMeasures
|
||||
--Eligibility
|
||||
--ContactsLocation
|
||||
--IPDSharing
|
||||
--References
|
||||
--ParticipantFlow
|
||||
--BaselineCharacteristics
|
||||
--ROutcomeMeasures
|
||||
--AdverseEvents
|
||||
--LimitationsAndCaveats
|
||||
--More Information
|
||||
|
||||
|
||||
CREATE TABLE history.colaborators
|
||||
nct_id
|
||||
version
|
||||
collaborator_name
|
||||
|
||||
CREATE TABLE history.locations
|
||||
nct_id
|
||||
version
|
||||
location name
|
||||
location contact info
|
||||
|
||||
CREATE TABLE history.arms
|
||||
*/
|
||||
@ -0,0 +1,121 @@
|
||||
from cgitb import html
|
||||
import re
|
||||
|
||||
form = """
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<form id="form_StudyStatus">
|
||||
<div class="w3-responsive">
|
||||
<fieldset class="entryReq" id="StudyStatus"
|
||||
style="margin:auto;margin-bottom:1em;padding-bottom:0.5em;width:98%;">
|
||||
<legend class="moduleLabel"> <img id="StudyStatusImg" class="toggleImage"
|
||||
onclick="toggleModule('StudyStatus');" src="html/images/collapse.png"
|
||||
alt='Open or close this module'>
|
||||
Study Status</legend>
|
||||
<div id="StudyStatusBody" class="moduleBody">
|
||||
<table class="indent1 moduleTable resultTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th style="width:210px;"></th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Record Verification:</td>
|
||||
<td>April <span class="drop_hilite">2008</span> <span class="add_hilite">2017</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Overall Status:</td>
|
||||
<td><span class="drop_hilite">Recruiting</span> <span class="add_hilite">Completed</span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Study Start:</td>
|
||||
<td>March 2008 </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Primary Completion:</td>
|
||||
<td> <span class="add_hilite">December 2009 [Actual]</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Study Completion:</td>
|
||||
<td>December 2009 [ <span class="drop_hilite">Anticipated</span> <span
|
||||
class="add_hilite">Actual</span>] </td>
|
||||
</tr>
|
||||
<tr style="border-bottom:1px solid lightgray">
|
||||
<td colspan="3"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">First Submitted:</td>
|
||||
<td>April 10, 2008 </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">First Submitted that<br />Met QC Criteria:</td>
|
||||
<td>April 10, 2008 </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">First Posted:</td>
|
||||
<td>April 15, 2008 [Estimate] </td>
|
||||
</tr>
|
||||
<tr style="border-bottom:1px solid lightgray">
|
||||
<td colspan="3"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Results First Submitted:</td>
|
||||
<td> <span class="add_hilite">February 6, 2014</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Results First Submitted that<br />Met QC
|
||||
Criteria:</td>
|
||||
<td> <span class="add_hilite">August 29, 2014</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Results First Posted:</td>
|
||||
<td> <span class="add_hilite">September 9, 2014 [Estimate]</span> </td>
|
||||
</tr>
|
||||
<tr style="border-bottom:1px solid lightgray">
|
||||
<td colspan="3"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Last Update Submitted that<br />Met QC Criteria:
|
||||
</td>
|
||||
<td>April <span class="drop_hilite">10</span> <span class="add_hilite">18</span>, <span
|
||||
class="drop_hilite">2008</span> <span class="add_hilite">2017</span> </td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Last Update Posted:</td>
|
||||
<td><span class="drop_hilite">April 15, 2008 [Estimate]</span> <span class="add_hilite">May 19,
|
||||
2017 [Actual]</span> </td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</fieldset>
|
||||
</div>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
|
||||
entry1 = """
|
||||
<tr>
|
||||
<td class="rowLabel" style="min-width: 210px;">Record Verification:</td>
|
||||
<td>April <span class="drop_hilite">2008</span> <span class="add_hilite">2017</span> </td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
|
||||
drop_old_re = re.compile('<span class="drop_hilite">\w*</span>\s?')
|
||||
drop_new_re = re.compile('<span class="add_hilite">\w*</span>\s?')
|
||||
drop_tags_re = re.compile('<[=-_,.:;"/\w\s]+>')
|
||||
|
||||
|
||||
print(drop_new_re.sub("",entry1))
|
||||
print(drop_old_re.sub("",entry1))
|
||||
print(drop_tags_re.sub("",entry1))
|
||||
|
||||
print(drop_tags_re.sub("",drop_new_re.sub("",entry1)))
|
||||
|
||||
|
||||
print(drop_tags_re.sub("",drop_new_re.sub("",form)))
|
||||
Loading…
Reference in New Issue