diff --git a/history_downloader/downloader.py b/history_downloader/downloader.py index 8738e67..ab4b363 100644 --- a/history_downloader/downloader.py +++ b/history_downloader/downloader.py @@ -6,14 +6,30 @@ from bs4 import BeautifulSoup from multiprocessing import Pool + def get_highest_version_number(response): """ - Extract the highest version currently available from the version number. + Navigate to the version table and and extract the highest posted version. + + As there are cases where the last element in the table IS NOT a + a version entry, this function iterates from the last row entry to the first, + looking for cells with the correct header, indicating + that it contains version information. + The last one occuring in the unreversed list is what we need. """ - #navigate to a specific part of the returned html and extract the highest posted version. + soup = BeautifulSoup(response.text, features="lxml") - version_value = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr")[-1].td.text - return int(version_value) + #get version table rows + table_rows = soup.findChildren("fieldset")[0].table.tbody.findChildren("tr") + + for row in reversed(table_rows): + # if it is