{ "cells": [ { "cell_type": "code", "execution_count": 18, "id": "40358f02-c376-4431-be39-cdd477f17e7a", "metadata": {}, "outputs": [], "source": [ "import polars as pl" ] }, { "cell_type": "code", "execution_count": 25, "id": "8fb27ee2-72c1-4e80-9d00-de54f2834fe8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "polars.datatypes.Datetime" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.datatypes.Datetime" ] }, { "cell_type": "code", "execution_count": 55, "id": "2c0edd77-c2d0-4184-a094-8c01783d2f0e", "metadata": {}, "outputs": [], "source": [ "products = pl.scan_csv(file=\"./EOBZIP_2022_04/products.txt\", sep=\"~\")\n", "patents = pl.scan_csv(file=\"./EOBZIP_2022_04/patent.txt\", sep=\"~\")\n", "exclusivity = pl.scan_csv(file=\"./EOBZIP_2022_04/exclusivity.txt\", sep=\"~\", parse_dates=True)" ] }, { "cell_type": "code", "execution_count": 58, "id": "023f211d-23aa-4a2c-843d-1b60cec91079", "metadata": {}, "outputs": [], "source": [ "def set_exclusivity_types(df):\n", " return df.with_columns([\n", " pl.col(\"Exclusivity_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\")\n", " ])" ] }, { "cell_type": "code", "execution_count": 61, "id": "a1da42c9-e47a-4437-b089-e9b91f789a0c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
\n", "Appl_Type\n", "\n", "Appl_No\n", "\n", "Product_No\n", "\n", "Exclusivity_Code\n", "\n", "Exclusivity_Date\n", "
\n", "str\n", "\n", "i64\n", "\n", "i64\n", "\n", "str\n", "\n", "date\n", "
\n", "\"N\"\n", "\n", "11366\n", "\n", "2\n", "\n", "\"ODE-96\"\n", "\n", "2022-08-07\n", "
\n", "\"N\"\n", "\n", "20287\n", "\n", "11\n", "\n", "\"NPP\"\n", "\n", "2022-05-16\n", "
\n", "\"N\"\n", "\n", "20287\n", "\n", "10\n", "\n", "\"NPP\"\n", "\n", "2022-05-16\n", "
\n", "\"N\"\n", "\n", "20287\n", "\n", "9\n", "\n", "\"NPP\"\n", "\n", "2022-05-16\n", "
\n", "\"N\"\n", "\n", "20287\n", "\n", "8\n", "\n", "\"NPP\"\n", "\n", "2022-05-16\n", "
\n", "
" ], "text/plain": [ "shape: (5, 5)\n", "┌───────────┬─────────┬────────────┬──────────────────┬──────────────────┐\n", "│ Appl_Type ┆ Appl_No ┆ Product_No ┆ Exclusivity_Code ┆ Exclusivity_Date │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ i64 ┆ i64 ┆ str ┆ date │\n", "╞═══════════╪═════════╪════════════╪══════════════════╪══════════════════╡\n", "│ N ┆ 11366 ┆ 2 ┆ ODE-96 ┆ 2022-08-07 │\n", "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", "│ N ┆ 20287 ┆ 11 ┆ NPP ┆ 2022-05-16 │\n", "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", "│ N ┆ 20287 ┆ 10 ┆ NPP ┆ 2022-05-16 │\n", "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", "│ N ┆ 20287 ┆ 9 ┆ NPP ┆ 2022-05-16 │\n", "├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n", "│ N ┆ 20287 ┆ 8 ┆ NPP ┆ 2022-05-16 │\n", "└───────────┴─────────┴────────────┴──────────────────┴──────────────────┘" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exclusivity.pipe(set_exclusivity_types).head(5).collect()" ] }, { "cell_type": "code", "execution_count": 85, "id": "92fe99fa-1963-460c-99ea-7f614b4b2e25", "metadata": {}, "outputs": [], "source": [ "def set_patent_types(df):\n", " return df.with_columns([\n", " pl.col(\"Patent_Expire_Date_Text\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n", " pl.col(\"Submission_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n", " pl.col(\"Drug_Substance_Flag\") == \"Y\",\n", " pl.col(\"Drug_Product_Flag\") == \"Y\",\n", " pl.col(\"Delist_Flag\") == \"Y\"\n", " ])" ] }, { "cell_type": "code", "execution_count": 90, "id": "13707ca6-094f-4ed7-94cb-824087e97874", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
\n", "Patent_Expire_Date_Text\n", "
\n", "date\n", "
\n", "2022-01-02\n", "
\n", "
" ], "text/plain": [ "shape: (1, 1)\n", "┌─────────────────────────┐\n", "│ Patent_Expire_Date_Text │\n", "│ --- │\n", "│ date │\n", "╞═════════════════════════╡\n", "│ 2022-01-02 │\n", "└─────────────────────────┘" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "patents.pipe(set_patent_types).select(\"Patent_Expire_Date_Text\").min().collect()" ] }, { "cell_type": "code", "execution_count": 81, "id": "18ad8df7-45d5-4454-8955-c5f28a7d7f1e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "polars.datatypes.Null" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pl.datatypes.Null" ] }, { "cell_type": "code", "execution_count": null, "id": "79e4b3d9-29ae-4302-bee1-4be02e0ba654", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 }