{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "412f6c09-9f40-46e3-9642-720946c93ba8", "metadata": {}, "outputs": [], "source": [ "import pdfminer" ] }, { "cell_type": "code", "execution_count": 2, "id": "967257f2-dae8-4789-8306-539575d55f57", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20220506\n" ] } ], "source": [ "print(pdfminer.__version__)" ] }, { "cell_type": "code", "execution_count": 3, "id": "5f56326c-70c4-4e2c-a11b-75b32c231842", "metadata": {}, "outputs": [], "source": [ "from pdfminer.converter import TextConverter\n", "from pdfminer.layout import LAParams\n", "from pdfminer.pdfdocument import PDFDocument\n", "from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\n", "from pdfminer.pdfpage import PDFPage\n", "from pdfminer.pdfparser import PDFParser\n", "\n", "from io import StringIO" ] }, { "cell_type": "code", "execution_count": 4, "id": "c6c619b1-f897-4c1f-892f-a294b8ab0338", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20220506\n" ] } ], "source": [ "print(pdfminer.__version__)" ] }, { "cell_type": "code", "execution_count": 5, "id": "7b4c4555-57cc-4109-a937-f26fea9afae3", "metadata": {}, "outputs": [], "source": [ "output_string = StringIO()\n", "\n", "with open(\"/home/will/research/ClinicalTrialsDataProcessing/Orangebook/Orangebooks/testprint4.pdf\", \"rb\") as file_handle:\n", " parser = PDFParser(file_handle)\n", " doc = PDFDocument(parser)\n", " \n", " rsrcmgr = PDFResourceManager()\n", " device = TextConverter(rsrcmgr, output_string, laparams=LAParams())\n", " interpreter = PDFPageInterpreter(rsrcmgr, device)\n", " \n", " for page in PDFPage.create_pages(doc):\n", " interpreter.process_page(page)\n", "\n", "pdfminer_lines = output_string.getvalue().splitlines()\n", "pdftotext_lines = [ln for ln in pdfminer_lines if ln]\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "7315468d-a918-4365-9008-661fca836344", "metadata": {}, "outputs": [], "source": [ "def get_digits(string):\n", " splat = string.split(\")\")\n", " num = splat[0]\n", " l = len(num)\n", " return int(num),l\n", "\n", "def token_generator(string):\n", " \n", " start = 0\n", " str_len = len(string)\n", " \n", " \n", " while start < str_len:\n", " substring = string[start:]\n", " \n", " #check for cid\n", " if (str_len - start > 6) and (substring[0:5] == \"(cid:\"):\n", " \n", " num,length = get_digits(substring[5:])\n", " start += length + 6\n", " yield num\n", " \n", " elif (str_len - start > 1):\n", " start += 1\n", " yield substring[0]\n", " else:\n", " start += 1\n", " yield substring\n", "\n", "class UnknownSymbol():\n", " def __init__(self, symbol):\n", " self.symbol = symbol\n", " \n", " def __repr__(self):\n", " return \"UnknownSymbol: {} of type {}\".format(self.symbol, type(self.symbol))\n", " \n", " def __str__(self):\n", " return \"\\uFFFD\"\n", "\n", "class Parser:\n", " def __init__(self, lookup_table):\n", " self._lookup_table = lookup_table\n", " \n", " def convert(self,symbol):\n", " try:\n", " return self._lookup_table[symbol]\n", " except:\n", " return UnknownSymbol(symbol)\n", " \n", " def convert_stream(self,token_stream):\n", " for token in token_stream:\n", " yield self.convert(token)\n", " \n", " def check_stream(self, pdftotext_lines):\n", " for entry in pdftotext_lines:\n", " arr = [x for x in ob2020.convert_stream(token_generator(entry))]\n", " try:\n", " print(\"\".join(arr))\n", " except:\n", " print(arr)" ] }, { "cell_type": "code", "execution_count": 7, "id": "826d0009-120e-4e04-b90b-6585308e69d3", "metadata": {}, "outputs": [], "source": [ "ob2020 = Parser({\n", " 23:\"4\"\n", " ,19:\"0\"\n", " ,\"7\":\"T\"\n", " ,\"+\":\"H\"\n", " ,3:\" \"\n", " ,\"(\":\"E\"\n", " ,\"’\":\"D\"\n", " ,\",\":\"I\"\n", " ,\"2\":\"O\"\n", " ,\"1\":\"N\"\n", " ,16:\"-\"\n", " ,21:\"2\"\n", " ,\"$\":\"A\"\n", " ,\"3\":\"P\"\n", " ,\"5\":\"R\"\n", " ,\"9\":\"V\"\n", " ,\"8\":\"U\"\n", " ,\"*\":\"G\"\n", " ,\"&\":\"C\"\n", " ,\"/\":\"L\"\n", " ,\"6\":\"S\"\n", " ,22:\"3\"\n", " ,20:\"1\"\n", " ,11:\"(\"\n", " ,\"R\":\"o\"\n", " ,\"I\":\"f\"\n", " ,24:\"5\"\n", " ,12:\")\"\n", " ,\" \":\"👨🏻‍🚀\"\n", " ,\"%\":\"B\"\n", " ,\")\":\"F\"\n", " ,30:\";\"\n", " ,\"0\":\"M\"\n", " ,\"4\":\"Q\"\n", " ,18:\"/\"\n", " ,26:\"7\"\n", " ,28:\"9\"\n", " ,\"D\":\"a\"\n", " ,\"U\":\"r\"\n", " ,15:\",\"\n", " ,27:\"8\"\n", " ,\"H\":\"e\"\n", " ,\"S\":\"p\"\n", " ,25:\"6\"\n", " ,\"=\":\"Z\"\n", " ,14:\"+\"\n", " ,4:\"!\"\n", " ,\"F\":\"c\"\n", " ,\";\":\"X\"\n", " ,\"<\":\"Y\"\n", " ,\"Y\":\"v\"\n", " ,\"-\":\"J\"\n", " ,\"X\":\"u\"\n", " ,\"Q\":\"n\"\n", " ,\"W\":\"t\"\n", " ,\"J\":\"g\"\n", " ,\".\":\"K\"\n", " ,\":\":\"W\"\n", " ,17:\".\"\n", " ,\"O\":\"l\"\n", " ,\"E\":\"b\"\n", " ,\"\\\\\":\"y\"\n", " ,8:\"%\"\n", " ,\"L\":\"i\"\n", " ,\"P\":\"m\"\n", " ,10:\"'\"\n", "})" ] }, { "cell_type": "code", "execution_count": 8, "id": "58327a77-029b-4c05-b410-45d1a539ad3e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "39TH EDITION - 2019 - APPROVED DRUG PRODUCT LIST👨🏻‍🚀👨🏻‍🚀\n", "PRESCRIPTION DRUG PRODUCT LIST \n", "['c', 'a', UnknownSymbol: A of type , '👨🏻\\u200d🚀', UnknownSymbol: d of type , UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: a of type , '👨🏻\\u200d🚀', UnknownSymbol: s of type , UnknownSymbol: u of type , UnknownSymbol: p of type , UnknownSymbol: p of type , UnknownSymbol: l of type , UnknownSymbol: i of type , UnknownSymbol: e of type , UnknownSymbol: d of type , '👨🏻\\u200d🚀', UnknownSymbol: b of type , UnknownSymbol: y of type , '👨🏻\\u200d🚀', 'a', UnknownSymbol: r of type , UnknownSymbol: u of type , UnknownSymbol: g of type , 'm', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: e of type , UnknownSymbol: n of type , UnknownSymbol: t of type , 't', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: c of type , UnknownSymbol: h of type , 'K', UnknownSymbol: c of type , UnknownSymbol: o of type , UnknownSymbol: m of type ]\n", "['3', '-', '1', 'E', UnknownSymbol: o of type , UnknownSymbol: f of type , '👨🏻\\u200d🚀', 'Q', 'R', 'O', 'F', '👨🏻\\u200d🚀', '👨🏻\\u200d🚀']\n", "ABACAVIR SULFATE👨🏻‍🚀👨🏻‍🚀\n", "SOLUTION;ORAL👨🏻‍🚀\n", "ABACAVIR SULFATE👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "AUROBINDO PHARMA \n", "LTD👨🏻‍🚀👨🏻‍🚀\n", "HETERO LABS LTD III 👨🏻‍🚀EQ 20MG BASE/ML \n", "EQ 20MG BASE/ML \n", "A077950 001 👨🏻‍🚀Mar 14, 2018👨🏻‍🚀\n", "A201107 001 👨🏻‍🚀Sep 26, 2016👨🏻‍🚀\n", "AA \n", "ZIAGEN👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "TABLET;ORAL👨🏻‍🚀\n", "VIIV HLTHCARE \n", "EQ 20MG BASE/ML \n", "N020978 001 👨🏻‍🚀Dec 17, 1998👨🏻‍🚀👨🏻‍🚀\n", "ABACAVIR SULFATE👨🏻‍🚀👨🏻‍🚀\n", "APOTEX INC \n", "AUROBINDO PHARMA \n", "LTD👨🏻‍🚀👨🏻‍🚀\n", "EQ 300MG BASE \n", "CIPLA \n", "HETERO LABS LTD III 👨🏻‍🚀EQ 300MG BASE \n", "EQ 300MG BASE \n", "MYLAN PHARMS INC \n", "EQ 300MG BASE \n", "STRIDES PHARMA \n", "EQ 300MG BASE \n", "EQ 300MG BASE \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "ZIAGEN👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "AB \n", "ABACAVIR SULFATE; DOLUTEGRAVIR SODIUM; LAMIVUDINE👨🏻‍🚀👨🏻‍🚀\n", "VIIV HLTHCARE \n", "EQ 300MG BASE \n", "A201570 001 👨🏻‍🚀Dec 17, 2012👨🏻‍🚀\n", "A077844 001 👨🏻‍🚀Dec 17, 2012👨🏻‍🚀\n", "A078119 001 👨🏻‍🚀Nov 21, 2017👨🏻‍🚀\n", "A091560 001 👨🏻‍🚀Sep 13, 2013👨🏻‍🚀\n", "A091294 001 👨🏻‍🚀Jun 18, 2012👨🏻‍🚀\n", "A091050 001 👨🏻‍🚀Oct 28, 2016👨🏻‍🚀\n", "N020977 001 👨🏻‍🚀Dec 17, 1998👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀👨🏻‍🚀\n", "TRIUMEQ👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "VIIV HLTHCARE \n", "ABACAVIR SULFATE; LAMIVUDINE👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "EQ 600MG BASE;EQ 50MG BASE;300MG \n", "N205551 001 👨🏻‍🚀Aug 22, 2014👨🏻‍🚀👨🏻‍🚀\n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "ABACAVIR SULFATE AND LAMIVUDINE👨🏻‍🚀👨🏻‍🚀\n", "AUROBINDO PHARMA \n", "LTD👨🏻‍🚀👨🏻‍🚀\n", "EQ 600MG BASE;300MG \n", "CIPLA \n", "LUPIN LTD \n", "TEVA PHARMS USA \n", "ZYDUS PHARMS USA \n", "INC👨🏻‍🚀👨🏻‍🚀\n", "EQ 600MG BASE;300MG \n", "EQ 600MG BASE;300MG \n", "EQ 600MG BASE;300MG \n", "EQ 600MG BASE;300MG \n", "EQ 600MG BASE;300MG \n", "EPZICOM👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "AB \n", "ABACAVIR SULFATE; LAMIVUDINE; ZIDOVUDINE👨🏻‍🚀👨🏻‍🚀\n", "VIIV HLTHCARE \n", "EQ 600MG BASE;300MG \n", "A090159 001 👨🏻‍🚀Nov 15, 2018👨🏻‍🚀\n", "A206151 001 👨🏻‍🚀Mar 28, 2017👨🏻‍🚀\n", "A091144 001 👨🏻‍🚀Mar 28, 2017👨🏻‍🚀\n", "A204990 001 👨🏻‍🚀Mar 28, 2017👨🏻‍🚀\n", "A079246 001 👨🏻‍🚀Sep 29, 2016👨🏻‍🚀\n", "A208990 001 👨🏻‍🚀Nov 15, 2018👨🏻‍🚀\n", "N021652 001 👨🏻‍🚀Aug 02, 2004👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "ABACAVIR SULFATE, LAMIVUDINE AND ZIDOVUDINE👨🏻‍🚀\n", "AB \n", "LUPIN LTD \n", "EQ 300MG BASE;150MG;300MG \n", "A202912 001 👨🏻‍🚀Dec 05, 2013👨🏻‍🚀\n", "TRIZIVIR👨🏻‍🚀👨🏻‍🚀\n", "AB \n", "+! \n", "ABALOPARATIDE👨🏻‍🚀👨🏻‍🚀\n", "VIIV HLTHCARE \n", "SOLUTION;SUBCUTANEOUS👨🏻‍🚀👨🏻‍🚀\n", "TYMLOS👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "RADIUS HEALTH INC \n", "ABEMACICLIB👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀👨🏻‍🚀\n", "VERZENIO👨🏻‍🚀👨🏻‍🚀\n", "ELI LILLY AND CO \n", "+ \n", "+ \n", "+ \n", "+! \n", "ABIRATERONE ACETATE👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "ABIRATERONE ACETATE👨🏻‍🚀👨🏻‍🚀\n", "AMNEAL PHARMS \n", "APOTEX INC \n", "HIKMA PHARMS \n", "MYLAN PHARMS INC \n", "TEVA PHARMS USA \n", "ZYTIGA👨🏻‍🚀👨🏻‍🚀\n", "+ \n", "YONSA👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "ZYTIGA👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "JANSSEN BIOTECH \n", "SUN PHARMA GLOBAL \n", "JANSSEN BIOTECH \n", "EQ 300MG BASE;150MG;300MG \n", "N021205 001 👨🏻‍🚀Nov 14, 2000👨🏻‍🚀👨🏻‍🚀\n", "3.12MG/1.56ML (2MG/ML) \n", "N208743 001 👨🏻‍🚀Apr 28, 2017👨🏻‍🚀👨🏻‍🚀\n", "50MG \n", "100MG \n", "150MG \n", "200MG \n", "250MG \n", "250MG \n", "250MG \n", "250MG \n", "250MG \n", "250MG \n", "125MG \n", "500MG \n", "N208716 001 👨🏻‍🚀Sep 28, 2017👨🏻‍🚀\n", "N208716 002 👨🏻‍🚀Sep 28, 2017👨🏻‍🚀\n", "N208716 003 👨🏻‍🚀Sep 28, 2017👨🏻‍🚀\n", "N208716 004 👨🏻‍🚀Sep 28, 2017👨🏻‍🚀👨🏻‍🚀\n", "A208327 001 👨🏻‍🚀Jan 07, 2019👨🏻‍🚀\n", "A208453 001 👨🏻‍🚀Oct 31, 2018👨🏻‍🚀\n", "A208339 001 👨🏻‍🚀Oct 31, 2018👨🏻‍🚀\n", "A208446 001 👨🏻‍🚀Oct 31, 2018👨🏻‍🚀\n", "A208432 001 👨🏻‍🚀Oct 31, 2018👨🏻‍🚀\n", "N202379 001 👨🏻‍🚀Apr 28, 2011👨🏻‍🚀\n", "N210308 001 👨🏻‍🚀May 22, 2018👨🏻‍🚀\n", "N202379 002 👨🏻‍🚀Apr 14, 2017👨🏻‍🚀👨🏻‍🚀\n", "39TH EDITION - 2019 - APPROVED DRUG PRODUCT LIST👨🏻‍🚀👨🏻‍🚀\n", "PRESCRIPTION DRUG PRODUCT LIST \n", "['c', 'a', UnknownSymbol: A of type , '👨🏻\\u200d🚀', UnknownSymbol: d of type , UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: a of type , '👨🏻\\u200d🚀', UnknownSymbol: s of type , UnknownSymbol: u of type , UnknownSymbol: p of type , UnknownSymbol: p of type , UnknownSymbol: l of type , UnknownSymbol: i of type , UnknownSymbol: e of type , UnknownSymbol: d of type , '👨🏻\\u200d🚀', UnknownSymbol: b of type , UnknownSymbol: y of type , '👨🏻\\u200d🚀', 'a', UnknownSymbol: r of type , UnknownSymbol: u of type , UnknownSymbol: g of type , 'm', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: e of type , UnknownSymbol: n of type , UnknownSymbol: t of type , 't', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: c of type , UnknownSymbol: h of type , 'K', UnknownSymbol: c of type , UnknownSymbol: o of type , UnknownSymbol: m of type ]\n", "['3', '-', '2', 'E', UnknownSymbol: o of type , UnknownSymbol: f of type , '👨🏻\\u200d🚀', 'Q', 'R', 'O', 'F', '👨🏻\\u200d🚀', '👨🏻\\u200d🚀']\n", "ACALABRUTINIB👨🏻‍🚀👨🏻‍🚀\n", "CAPSULE;ORAL👨🏻‍🚀👨🏻‍🚀\n", "CALQUENCE👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "ASTRAZENECA \n", "100MG \n", "ACAMPROSATE CALCIUM👨🏻‍🚀👨🏻‍🚀\n", "TABLET, DELAYED RELEASE;ORAL👨🏻‍🚀\n", "ACAMPROSATE CALCIUM👨🏻‍🚀👨🏻‍🚀\n", "! \n", "AB \n", "AB \n", "AB \n", "AB \n", "BARR LABS DIV TEVA \n", "GLENMARK GENERICS \n", "MYLAN PHARMS INC \n", "ZYDUS PHARMS USA \n", "INC👨🏻‍🚀👨🏻‍🚀\n", "333MG \n", "333MG \n", "333MG \n", "333MG \n", "ACARBOSE👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "ACARBOSE👨🏻‍🚀👨🏻‍🚀\n", "EMCURE PHARMS LTD \n", "IMPAX LABS \n", "MYLAN \n", "STRIDES PHARMA \n", "VIRTUS PHARM \n", "WATSON LABS \n", "WEST-WARD PHARMS \n", "INT👨🏻‍🚀👨🏻‍🚀\n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "AB \n", "PRECOSE👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "+ \n", "+ \n", "BAYER HLTHCARE \n", "AB \n", "AB \n", "AB \n", "ACEBUTOLOL HYDROCHLORIDE👨🏻‍🚀👨🏻‍🚀\n", "CAPSULE;ORAL👨🏻‍🚀\n", "ACEBUTOLOL HYDROCHLORIDE👨🏻‍🚀👨🏻‍🚀\n", "AMNEAL PHARM \n", "! \n", "! \n", "AB \n", "AB \n", "AB \n", "AB \n", "ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "MYLAN \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "25MG \n", "50MG \n", "100MG \n", "EQ 200MG BASE \n", "EQ 400MG BASE \n", "EQ 200MG BASE \n", "EQ 400MG BASE \n", "N210259 001 👨🏻‍🚀Oct 31, 2017👨🏻‍🚀👨🏻‍🚀\n", "A200143 001 👨🏻‍🚀Nov 18, 2013👨🏻‍🚀\n", "A202229 001 👨🏻‍🚀Jul 16, 2013👨🏻‍🚀\n", "A200142 001 👨🏻‍🚀Mar 11, 2014👨🏻‍🚀\n", "A205995 001 👨🏻‍🚀May 26, 2017👨🏻‍🚀\n", "A202271 001 👨🏻‍🚀Feb 07, 2012👨🏻‍🚀\n", "A202271 002 👨🏻‍🚀Feb 07, 2012👨🏻‍🚀\n", "A202271 003 👨🏻‍🚀Feb 07, 2012👨🏻‍🚀\n", "A078441 001 👨🏻‍🚀May 14, 2009👨🏻‍🚀\n", "A078441 002 👨🏻‍🚀May 14, 2009👨🏻‍🚀\n", "A078441 003 👨🏻‍🚀May 14, 2009👨🏻‍🚀\n", "A091053 001 👨🏻‍🚀Jan 06, 2011👨🏻‍🚀\n", "A091053 002 👨🏻‍🚀Jan 06, 2011👨🏻‍🚀\n", "A091053 003 👨🏻‍🚀Jan 06, 2011👨🏻‍🚀\n", "A090912 001 👨🏻‍🚀Jul 27, 2011👨🏻‍🚀\n", "A090912 002 👨🏻‍🚀Jul 27, 2011👨🏻‍🚀\n", "A090912 003 👨🏻‍🚀Jul 27, 2011👨🏻‍🚀\n", "A091343 001 👨🏻‍🚀Oct 17, 2013👨🏻‍🚀\n", "A091343 002 👨🏻‍🚀Oct 17, 2013👨🏻‍🚀\n", "A091343 003 👨🏻‍🚀Oct 17, 2013👨🏻‍🚀\n", "A077532 001 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "A077532 002 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "A077532 003 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "A078470 001 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "A078470 002 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "A078470 003 👨🏻‍🚀May 07, 2008👨🏻‍🚀\n", "N020482 004 👨🏻‍🚀May 29, 1997👨🏻‍🚀\n", "N020482 001 👨🏻‍🚀Sep 06, 1995👨🏻‍🚀\n", "N020482 002 👨🏻‍🚀Sep 06, 1995👨🏻‍🚀👨🏻‍🚀\n", "A075047 001 👨🏻‍🚀Dec 30, 1999👨🏻‍🚀\n", "A075047 002 👨🏻‍🚀Dec 30, 1999👨🏻‍🚀\n", "A074288 001 👨🏻‍🚀Apr 24, 1995👨🏻‍🚀\n", "A074288 002 👨🏻‍🚀Apr 24, 1995👨🏻‍🚀👨🏻‍🚀\n", "SOLUTION;INTRAVENOUS👨🏻‍🚀\n", "ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "AP \n", "AP \n", "AP \n", "CUSTOPHARM INC \n", "SANDOZ INC \n", "1GM/100ML (10MG/ML) \n", "1GM/100ML (10MG/ML) \n", "A202605 001 👨🏻‍🚀Jun 13, 2016👨🏻‍🚀\n", "A204052 001 👨🏻‍🚀Mar 22, 2016👨🏻‍🚀\n", "OFIRMEV👨🏻‍🚀👨🏻‍🚀\n", "+! \n", "MALLINCKRODT HOSP \n", "ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "1GM/100ML (10MG/ML) \n", "N022450 001 👨🏻‍🚀Nov 02, 2010👨🏻‍🚀👨🏻‍🚀\n", "FRESENIUS KABI USA \n", "1GM/100ML (10MG/ML) \n", "N204767 001 👨🏻‍🚀Oct 28, 2015👨🏻‍🚀👨🏻‍🚀\n", "ACETAMINOPHEN; BENZHYDROCODONE HYDROCHLORIDE👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀👨🏻‍🚀\n", "APADAZ👨🏻‍🚀👨🏻‍🚀\n", "+ \n", "KEMPHARM \n", "ACETAMINOPHEN; BUTALBITAL👨🏻‍🚀👨🏻‍🚀\n", "CAPSULE;ORAL👨🏻‍🚀👨🏻‍🚀\n", "325MG;EQ 6.12MG BASE \n", "N208653 001 👨🏻‍🚀Feb 23, 2018👨🏻‍🚀👨🏻‍🚀\n", "BUTALBITAL AND ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "MAYNE PHARMA INC \n", "! \n", "300MG;50MG \n", "TABLET;ORAL👨🏻‍🚀\n", "BUTALBITAL AND ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "CNTY LINE PHARMS \n", "LARKEN LABS INC \n", "300MG;50MG \n", "325MG;50MG \n", "325MG;50MG \n", "AA \n", "AA \n", "AA \n", "A207313 001 👨🏻‍🚀Dec 27, 2017👨🏻‍🚀👨🏻‍🚀\n", "A207635 001 👨🏻‍🚀Jun 05, 2017👨🏻‍🚀\n", "A205120 001 👨🏻‍🚀Oct 30, 2015👨🏻‍🚀\n", "A203484 002 👨🏻‍🚀Dec 04, 2015👨🏻‍🚀👨🏻‍🚀\n", "39TH EDITION - 2019 - APPROVED DRUG PRODUCT LIST👨🏻‍🚀👨🏻‍🚀\n", "PRESCRIPTION DRUG PRODUCT LIST \n", "['c', 'a', UnknownSymbol: A of type , '👨🏻\\u200d🚀', UnknownSymbol: d of type , UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: a of type , '👨🏻\\u200d🚀', UnknownSymbol: s of type , UnknownSymbol: u of type , UnknownSymbol: p of type , UnknownSymbol: p of type , UnknownSymbol: l of type , UnknownSymbol: i of type , UnknownSymbol: e of type , UnknownSymbol: d of type , '👨🏻\\u200d🚀', UnknownSymbol: b of type , UnknownSymbol: y of type , '👨🏻\\u200d🚀', 'a', UnknownSymbol: r of type , UnknownSymbol: u of type , UnknownSymbol: g of type , 'm', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: e of type , UnknownSymbol: n of type , UnknownSymbol: t of type , 't', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: c of type , UnknownSymbol: h of type , 'K', UnknownSymbol: c of type , UnknownSymbol: o of type , UnknownSymbol: m of type ]\n", "['3', '-', '3', 'E', UnknownSymbol: o of type , UnknownSymbol: f of type , '👨🏻\\u200d🚀', 'Q', 'R', 'O', 'F', '👨🏻\\u200d🚀', '👨🏻\\u200d🚀']\n", "ACETAMINOPHEN; BUTALBITAL👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "BUTALBITAL AND ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "! \n", "MIKART \n", "NEXGEN PHARMA \n", "BUTAPAP👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "! \n", "MIKART \n", "ALLZITAL👨🏻‍🚀👨🏻‍🚀\n", "300MG;50MG \n", "300MG;50MG \n", "325MG;50MG \n", "LARKEN LABS INC \n", "ACETAMINOPHEN; BUTALBITAL; CAFFEINE👨🏻‍🚀👨🏻‍🚀\n", "325MG;25MG \n", "CAPSULE;ORAL👨🏻‍🚀\n", "BUTALBITAL, ACETAMINOPHEN AND CAFFEINE👨🏻‍🚀\n", "! \n", "! \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AUROLIFE PHARMA LLC 👨🏻‍🚀325MG;50MG;40MG \n", "325MG;50MG;40MG \n", "MAYNE PHARMA INC \n", "300MG;50MG;40MG \n", "NEXGEN PHARMA \n", "300MG;50MG;40MG \n", "NUVO PHARMS INC \n", "300MG;50MG;40MG \n", "WRASER PHARMS LLC \n", "SOLUTION;ORAL👨🏻‍🚀👨🏻‍🚀\n", "BUTALBITAL, ACETAMINOPHEN AND CAFFEINE👨🏻‍🚀👨🏻‍🚀\n", "A207386 001 👨🏻‍🚀Nov 15, 2016👨🏻‍🚀\n", "A090956 001 👨🏻‍🚀Aug 23, 2011👨🏻‍🚀\n", "A089987 001 👨🏻‍🚀Oct 26, 1992👨🏻‍🚀👨🏻‍🚀\n", "A203484 001 👨🏻‍🚀Dec 04, 2015👨🏻‍🚀👨🏻‍🚀\n", "A204733 001 👨🏻‍🚀Sep 26, 2018👨🏻‍🚀\n", "A089007 001 👨🏻‍🚀Mar 17, 1986👨🏻‍🚀\n", "A040885 001 👨🏻‍🚀Nov 16, 2009👨🏻‍🚀\n", "A207118 001 👨🏻‍🚀Oct 28, 2016👨🏻‍🚀\n", "A206615 001 👨🏻‍🚀Aug 04, 2017👨🏻‍🚀\n", "! \n", "MIKART \n", "TABLET;ORAL👨🏻‍🚀\n", "325MG/15ML;50MG/15ML;40MG/15ML \n", "A040387 001 👨🏻‍🚀Jan 31, 2003👨🏻‍🚀👨🏻‍🚀\n", "BUTALBITAL, ACETAMINOPHEN AND CAFFEINE👨🏻‍🚀\n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "ACETAMINOPHEN; BUTALBITAL; CAFFEINE; CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "325MG;50MG;40MG \n", "ABHAI LLC \n", "ACTAVIS LABS UT INC 👨🏻‍🚀325MG;50MG;40MG \n", "325MG;50MG;40MG \n", "CNTY LINE PHARMS \n", "325MG;50MG;40MG \n", "HIKMA PHARMS \n", "325MG;50MG;40MG \n", "LANNETT CO INC \n", "325MG;50MG;40MG \n", "MIKART \n", "325MG;50MG;40MG \n", "NEXGEN PHARMA INC \n", "325MG;50MG;40MG \n", "SPECGX LLC \n", "325MG;50MG;40MG \n", "VINTAGE PHARMS \n", "! \n", "CAPSULE;ORAL👨🏻‍🚀\n", "BUTALBITAL, ACETAMINOPHEN, CAFFEINE AND CODEINE PHOSPHATE👨🏻‍🚀\n", "AB \n", "AB \n", "NEXGEN PHARMA INC \n", "VINTAGE PHARMS \n", "325MG;50MG;40MG;30MG \n", "325MG;50MG;40MG;30MG \n", "FIORICET W/ CODEINE👨🏻‍🚀\n", "AB \n", "+! \n", "ACTAVIS LABS UT INC 👨🏻‍🚀325MG;50MG;40MG;30MG \n", "BUTALBITAL, ACETAMINOPHEN, CAFFEINE AND CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "A211106 001 👨🏻‍🚀Sep 26, 2018👨🏻‍🚀\n", "A088616 001 👨🏻‍🚀Nov 09, 1984👨🏻‍🚀\n", "A204984 001 👨🏻‍🚀Jan 10, 2017👨🏻‍🚀\n", "A089718 001 👨🏻‍🚀Jun 12, 1995👨🏻‍🚀\n", "A200243 001 👨🏻‍🚀Sep 13, 2012👨🏻‍🚀\n", "A089175 001 👨🏻‍🚀Jan 21, 1987👨🏻‍🚀\n", "A209587 001 👨🏻‍🚀Oct 31, 2018👨🏻‍🚀\n", "A087804 001 👨🏻‍🚀Jan 24, 1985👨🏻‍🚀\n", "A040511 001 👨🏻‍🚀Aug 27, 2003👨🏻‍🚀👨🏻‍🚀\n", "A076560 001 👨🏻‍🚀Jun 10, 2004👨🏻‍🚀\n", "A075929 001 👨🏻‍🚀Apr 22, 2002👨🏻‍🚀\n", "N020232 001 👨🏻‍🚀Jul 30, 1992👨🏻‍🚀👨🏻‍🚀\n", "NEXGEN PHARMA INC \n", "300MG;50MG;40MG;30MG \n", "A076560 002 👨🏻‍🚀Jul 19, 2012👨🏻‍🚀👨🏻‍🚀\n", "ACETAMINOPHEN; CAFFEINE; DIHYDROCODEINE BITARTRATE👨🏻‍🚀👨🏻‍🚀\n", "CAPSULE;ORAL👨🏻‍🚀👨🏻‍🚀\n", "TREZIX👨🏻‍🚀👨🏻‍🚀\n", "WRASER PHARMS LLC \n", "320.5MG;30MG;16MG \n", "A204785 001 👨🏻‍🚀Nov 26, 2014👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀👨🏻‍🚀\n", "ACETAMINOPHEN, CAFFEINE AND DIHYDROCODEINE BITARTRATE👨🏻‍🚀👨🏻‍🚀\n", "LARKEN LABS INC \n", "325MG;30MG;16MG \n", "ACETAMINOPHEN; CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "SOLUTION;ORAL👨🏻‍🚀\n", "ACETAMINOPHEN AND CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "! \n", "HI TECH PHARMA \n", "LANNETT CO INC \n", "MIKART \n", "PHARM ASSOC \n", "WOCKHARDT BIO AG \n", "TABLET;ORAL👨🏻‍🚀\n", "120MG/5ML;12MG/5ML \n", "120MG/5ML;12MG/5ML \n", "120MG/5ML;12MG/5ML \n", "120MG/5ML;12MG/5ML \n", "120MG/5ML;12MG/5ML \n", "ACETAMINOPHEN AND CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "! \n", "! \n", "SPECGX LLC \n", "300MG;30MG \n", "AMNEAL PHARMS NY \n", "AUROLIFE PHARMA LLC 👨🏻‍🚀300MG;15MG \n", "300MG;30MG \n", "300MG;60MG \n", "300MG;15MG \n", "300MG;30MG \n", "300MG;60MG \n", "300MG;30MG \n", "300MG;60MG \n", "300MG;15MG \n", "300MG;30MG \n", "300MG;60MG \n", "300MG;15MG \n", "SUN PHARM INDS LTD \n", "VINTAGE \n", "TEVA \n", "A204209 001 👨🏻‍🚀Sep 30, 2016👨🏻‍🚀👨🏻‍🚀\n", "A040119 001 👨🏻‍🚀Apr 26, 1996👨🏻‍🚀\n", "A091238 001 👨🏻‍🚀Nov 10, 2011👨🏻‍🚀\n", "A089450 001 👨🏻‍🚀Oct 27, 1992👨🏻‍🚀\n", "A087508 001👨🏻‍🚀👨🏻‍🚀\n", "A087006 001👨🏻‍🚀👨🏻‍🚀\n", "A040779 001 👨🏻‍🚀May 29, 2008👨🏻‍🚀\n", "A202800 001 👨🏻‍🚀Apr 15, 2013👨🏻‍🚀\n", "A202800 002 👨🏻‍🚀Apr 15, 2013👨🏻‍🚀\n", "A202800 003 👨🏻‍🚀Apr 15, 2013👨🏻‍🚀\n", "A040419 001 👨🏻‍🚀May 31, 2001👨🏻‍🚀\n", "A040419 002 👨🏻‍🚀May 31, 2001👨🏻‍🚀\n", "A040419 003 👨🏻‍🚀May 31, 2001👨🏻‍🚀\n", "A085868 001👨🏻‍🚀👨🏻‍🚀\n", "A087083 001👨🏻‍🚀👨🏻‍🚀\n", "A088627 001 👨🏻‍🚀Mar 06, 1985👨🏻‍🚀\n", "A088628 001 👨🏻‍🚀Mar 06, 1985👨🏻‍🚀\n", "A088629 001 👨🏻‍🚀Mar 06, 1985👨🏻‍🚀\n", "A089990 001 👨🏻‍🚀Sep 30, 1988👨🏻‍🚀👨🏻‍🚀\n", "39TH EDITION - 2019 - APPROVED DRUG PRODUCT LIST👨🏻‍🚀👨🏻‍🚀\n", "PRESCRIPTION DRUG PRODUCT LIST \n", "['c', 'a', UnknownSymbol: A of type , '👨🏻\\u200d🚀', UnknownSymbol: d of type , UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: a of type , '👨🏻\\u200d🚀', UnknownSymbol: s of type , UnknownSymbol: u of type , UnknownSymbol: p of type , UnknownSymbol: p of type , UnknownSymbol: l of type , UnknownSymbol: i of type , UnknownSymbol: e of type , UnknownSymbol: d of type , '👨🏻\\u200d🚀', UnknownSymbol: b of type , UnknownSymbol: y of type , '👨🏻\\u200d🚀', 'a', UnknownSymbol: r of type , UnknownSymbol: u of type , UnknownSymbol: g of type , 'm', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: e of type , UnknownSymbol: n of type , UnknownSymbol: t of type , 't', UnknownSymbol: a of type , UnknownSymbol: t of type , UnknownSymbol: c of type , UnknownSymbol: h of type , 'K', UnknownSymbol: c of type , UnknownSymbol: o of type , UnknownSymbol: m of type ]\n", "['3', '-', '4', 'E', UnknownSymbol: o of type , UnknownSymbol: f of type , '👨🏻\\u200d🚀', 'Q', 'R', 'O', 'F', '👨🏻\\u200d🚀', '👨🏻\\u200d🚀']\n", "ACETAMINOPHEN; CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "TABLET;ORAL👨🏻‍🚀\n", "ACETAMINOPHEN AND CODEINE PHOSPHATE👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "VINTAGE PHARMS \n", "TYLENOL W/ CODEINE NO. 3👨🏻‍🚀\n", "300MG;30MG \n", "300MG;60MG \n", "AA \n", "! \n", "JANSSEN PHARMS \n", "300MG;30MG \n", "TYLENOL W/ CODEINE NO. 4👨🏻‍🚀\n", "AA \n", "ACETAMINOPHEN; HYDROCODONE BITARTRATE👨🏻‍🚀👨🏻‍🚀\n", "JANSSEN PHARMS \n", "300MG;60MG \n", "SOLUTION;ORAL👨🏻‍🚀\n", "HYDROCODONE BITARTRATE AND ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "AA \n", "AA \n", "AA \n", "AA \n", "GENUS LIFESCIENCES \n", "MIKART \n", "PHARM ASSOC \n", "VISTAPHARM \n", "MIKART \n", "PHARM ASSOC \n", "325MG/15ML;7.5MG/15ML \n", "325MG/15ML;7.5MG/15ML \n", "325MG/15ML;7.5MG/15ML \n", "325MG/15ML;7.5MG/15ML \n", "300MG/15ML;10MG/15ML \n", "325MG/15ML;10MG/15ML \n", "! \n", "! \n", "! \n", "TABLET;ORAL👨🏻‍🚀\n", "ANEXSIA 5/325👨🏻‍🚀\n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "AA \n", "SPECGX LLC \n", "ANEXSIA 7.5/325👨🏻‍🚀\n", "325MG;5MG \n", "325MG;7.5MG \n", "HYDROCODONE BITARTRATE AND ACETAMINOPHEN👨🏻‍🚀👨🏻‍🚀\n", "SPECGX LLC \n", "ABHAI LLC \n", "300MG;5MG \n", "300MG;7.5MG \n", "300MG;10MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "ACTAVIS LABS FL INC 👨🏻‍🚀300MG;5MG \n", "ALVOGEN PINE BROOK \n", "AMNEAL PHARMS \n", "AMNEAL PHARMS NY \n", "ASCENT PHARMS INC \n", "300MG;7.5MG \n", "300MG;10MG \n", "300MG;5MG \n", "300MG;7.5MG \n", "300MG;10MG \n", "325MG;2.5MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "300MG;10MG \n", "300MG;5MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "325MG;2.5MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "AUROLIFE PHARMA LLC 👨🏻‍🚀300MG;5MG \n", "300MG;7.5MG \n", "300MG;10MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "325MG;2.5MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "300MG;5MG \n", "300MG;7.5MG \n", "300MG;10MG \n", "325MG;5MG \n", "325MG;7.5MG \n", "325MG;10MG \n", "300MG;5MG \n", "300MG;7.5MG \n", "300MG;10MG \n", "325MG;2.5MG \n", "ELITE LABS INC \n", "EPIC PHARMA LLC \n", "LANNETT CO INC \n", "MIKART \n", "! \n", "! \n", "! \n", "! \n", "A089805 001 👨🏻‍🚀Sep 30, 1988👨🏻‍🚀\n", "A089828 001 👨🏻‍🚀Sep 30, 1988👨🏻‍🚀\n", "A085055 003👨🏻‍🚀👨🏻‍🚀\n", "A085055 004👨🏻‍🚀👨🏻‍🚀\n", "A040894 001 👨🏻‍🚀Jul 19, 2011👨🏻‍🚀\n", "A040482 001 👨🏻‍🚀Sep 25, 2003👨🏻‍🚀\n", "A040838 001 👨🏻‍🚀May 10, 2013👨🏻‍🚀\n", "A200343 001 👨🏻‍🚀Jan 25, 2012👨🏻‍🚀👨🏻‍🚀\n", "A040881 001 👨🏻‍🚀Feb 25, 2010👨🏻‍🚀👨🏻‍🚀\n", "A040834 001 👨🏻‍🚀Apr 18, 2008👨🏻‍🚀\n", "A040409 001 👨🏻‍🚀Oct 20, 2000👨🏻‍🚀\n", "A040405 001 👨🏻‍🚀Sep 08, 2000👨🏻‍🚀\n", "A209036 001 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A209036 002 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A209036 003 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A209037 001 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A209037 002 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A209037 003 👨🏻‍🚀Jun 21, 2017👨🏻‍🚀\n", "A206470 001 👨🏻‍🚀Jun 02, 2016👨🏻‍🚀\n", "A206470 002 👨🏻‍🚀Jun 02, 2016👨🏻‍🚀\n", "A206470 003 👨🏻‍🚀Jun 02, 2016👨🏻‍🚀\n", "A208540 001 👨🏻‍🚀Nov 08, 2018👨🏻‍🚀\n", "A208540 002 👨🏻‍🚀Nov 08, 2018👨🏻‍🚀\n", "A208540 003 👨🏻‍🚀Nov 08, 2018👨🏻‍🚀\n", "A209958 001 👨🏻‍🚀Oct 24, 2018👨🏻‍🚀\n", "A209958 002 👨🏻‍🚀Oct 24, 2018👨🏻‍🚀\n", "A209958 003 👨🏻‍🚀Oct 24, 2018👨🏻‍🚀\n", "A209958 004 👨🏻‍🚀Oct 24, 2018👨🏻‍🚀\n", "A207137 001 👨🏻‍🚀Nov 29, 2016👨🏻‍🚀\n", "A206869 001 👨🏻‍🚀Jun 23, 2017👨🏻‍🚀\n", "A040736 001 👨🏻‍🚀Aug 25, 2006👨🏻‍🚀\n", "A040746 002 👨🏻‍🚀May 10, 2016👨🏻‍🚀\n", "A040746 001 👨🏻‍🚀Aug 25, 2006👨🏻‍🚀\n", "A211487 001 👨🏻‍🚀Nov 07, 2018👨🏻‍🚀\n", "A211487 002 👨🏻‍🚀Nov 07, 2018👨🏻‍🚀\n", "A211487 003 👨🏻‍🚀Nov 07, 2018👨🏻‍🚀\n", "A211487 004 👨🏻‍🚀Nov 07, 2018👨🏻‍🚀\n", "A207709 001 👨🏻‍🚀Sep 13, 2018👨🏻‍🚀\n", "A207709 002 👨🏻‍🚀Sep 13, 2018👨🏻‍🚀\n", "A207709 003 👨🏻‍🚀Sep 13, 2018👨🏻‍🚀\n", "A201013 001 👨🏻‍🚀Apr 11, 2012👨🏻‍🚀\n", "A201013 002 👨🏻‍🚀Apr 11, 2012👨🏻‍🚀\n", "A201013 003 👨🏻‍🚀Apr 11, 2012👨🏻‍🚀\n", "A209924 001 👨🏻‍🚀Nov 16, 2018👨🏻‍🚀\n", "A209924 002 👨🏻‍🚀Nov 16, 2018👨🏻‍🚀\n", "A209924 003 👨🏻‍🚀Nov 16, 2018👨🏻‍🚀\n", "A209924 004 👨🏻‍🚀Nov 16, 2018👨🏻‍🚀\n", "A203863 001 👨🏻‍🚀Mar 30, 2018👨🏻‍🚀\n", "A203863 002 👨🏻‍🚀Mar 30, 2018👨🏻‍🚀\n", "A203863 003 👨🏻‍🚀Mar 30, 2018👨🏻‍🚀\n", "A207171 001 👨🏻‍🚀Jun 20, 2017👨🏻‍🚀\n", "A207171 002 👨🏻‍🚀Jun 20, 2017👨🏻‍🚀\n", "A207171 003 👨🏻‍🚀Jun 20, 2017👨🏻‍🚀\n", "A207172 001 👨🏻‍🚀Jun 22, 2017👨🏻‍🚀\n", "A207172 002 👨🏻‍🚀Jun 22, 2017👨🏻‍🚀\n", "A207172 003 👨🏻‍🚀Jun 22, 2017👨🏻‍🚀\n", "A040658 001 👨🏻‍🚀Jan 19, 2006👨🏻‍🚀\n", "A040658 002 👨🏻‍🚀Mar 24, 2006👨🏻‍🚀\n", "A040658 003 👨🏻‍🚀Jun 23, 2004👨🏻‍🚀\n", "A040846 001 👨🏻‍🚀Jun 09, 2010👨🏻‍🚀👨🏻‍🚀\n" ] } ], "source": [ "ob2020.check_stream(pdftotext_lines)" ] }, { "cell_type": "markdown", "id": "e899245f-b901-4108-bfab-6ab7b329e8d6", "metadata": {}, "source": [ "current thought: Walk the object tree of the pdf, extracting and converting text while tracking where it is physically." ] }, { "cell_type": "code", "execution_count": null, "id": "4543148b-4f2f-47f6-a13c-13f21dfd4a7c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }