|
|
|
|
@ -6,11 +6,30 @@ from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
from multiprocessing import Pool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_highest_version_number(response):
|
|
|
|
|
#navigate to a specific part of the returned html and extract the highest posted version.
|
|
|
|
|
"""
|
|
|
|
|
Navigate to the version table and and extract the highest posted version.
|
|
|
|
|
|
|
|
|
|
As there are cases where the last element in the table IS NOT a
|
|
|
|
|
a version entry, this function iterates from the last row entry to the first,
|
|
|
|
|
looking for cells with the correct header, indicating
|
|
|
|
|
that it contains version information.
|
|
|
|
|
The last one occuring in the unreversed list is what we need.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
soup = BeautifulSoup(response.text, features="lxml")
|
|
|
|
|
version_value = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")[-1].td.text
|
|
|
|
|
return int(version_value)
|
|
|
|
|
#get version table rows
|
|
|
|
|
table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")
|
|
|
|
|
|
|
|
|
|
for row in reversed(table_rows):
|
|
|
|
|
# if it is <td headers="VersionNumber">xx</td> then it contains what we need.
|
|
|
|
|
for td in row.findChildren("td"):
|
|
|
|
|
print("\n", td)
|
|
|
|
|
if ("headers" in td.attrs) and (td.attrs["headers"][0]=="VersionNumber"):
|
|
|
|
|
#Note the use of [0] above. attribute elements are lists.
|
|
|
|
|
version_number = int(td.text)
|
|
|
|
|
return version_number
|
|
|
|
|
|
|
|
|
|
def make_request(nct_id,version1,version2):
|
|
|
|
|
#create url
|
|
|
|
|
|