from bs4 import BeautifulSoup if __name__ == "__main__": with open("./NCT00658567.html") as fh: soup = BeautifulSoup(fh, "lxml") print(soup)