You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
372 lines
10 KiB
Plaintext
372 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "40358f02-c376-4431-be39-cdd477f17e7a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import polars as pl"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"id": "8fb27ee2-72c1-4e80-9d00-de54f2834fe8",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"polars.datatypes.Datetime"
|
|
]
|
|
},
|
|
"execution_count": 25,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pl.datatypes.Datetime"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 55,
|
|
"id": "2c0edd77-c2d0-4184-a094-8c01783d2f0e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"products = pl.scan_csv(file=\"./EOBZIP_2022_04/products.txt\", sep=\"~\")\n",
|
|
"patents = pl.scan_csv(file=\"./EOBZIP_2022_04/patent.txt\", sep=\"~\")\n",
|
|
"exclusivity = pl.scan_csv(file=\"./EOBZIP_2022_04/exclusivity.txt\", sep=\"~\", parse_dates=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 58,
|
|
"id": "023f211d-23aa-4a2c-843d-1b60cec91079",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def set_exclusivity_types(df):\n",
|
|
" return df.with_columns([\n",
|
|
" pl.col(\"Exclusivity_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\")\n",
|
|
" ])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 61,
|
|
"id": "a1da42c9-e47a-4437-b089-e9b91f789a0c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1 \"class=\"dataframe \">\n",
|
|
"<thead>\n",
|
|
"<tr>\n",
|
|
"<th>\n",
|
|
"Appl_Type\n",
|
|
"</th>\n",
|
|
"<th>\n",
|
|
"Appl_No\n",
|
|
"</th>\n",
|
|
"<th>\n",
|
|
"Product_No\n",
|
|
"</th>\n",
|
|
"<th>\n",
|
|
"Exclusivity_Code\n",
|
|
"</th>\n",
|
|
"<th>\n",
|
|
"Exclusivity_Date\n",
|
|
"</th>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"str\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"i64\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"i64\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"str\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"date\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</thead>\n",
|
|
"<tbody>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"\"N\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"11366\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"\"ODE-96\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2022-08-07\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"\"N\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"20287\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"11\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"\"NPP\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2022-05-16\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"\"N\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"20287\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"10\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"\"NPP\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2022-05-16\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"\"N\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"20287\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"9\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"\"NPP\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2022-05-16\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"\"N\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"20287\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"8\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"\"NPP\"\n",
|
|
"</td>\n",
|
|
"<td>\n",
|
|
"2022-05-16\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (5, 5)\n",
|
|
"┌───────────┬─────────┬────────────┬──────────────────┬──────────────────┐\n",
|
|
"│ Appl_Type ┆ Appl_No ┆ Product_No ┆ Exclusivity_Code ┆ Exclusivity_Date │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n",
|
|
"│ str ┆ i64 ┆ i64 ┆ str ┆ date │\n",
|
|
"╞═══════════╪═════════╪════════════╪══════════════════╪══════════════════╡\n",
|
|
"│ N ┆ 11366 ┆ 2 ┆ ODE-96 ┆ 2022-08-07 │\n",
|
|
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
"│ N ┆ 20287 ┆ 11 ┆ NPP ┆ 2022-05-16 │\n",
|
|
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
"│ N ┆ 20287 ┆ 10 ┆ NPP ┆ 2022-05-16 │\n",
|
|
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
"│ N ┆ 20287 ┆ 9 ┆ NPP ┆ 2022-05-16 │\n",
|
|
"├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤\n",
|
|
"│ N ┆ 20287 ┆ 8 ┆ NPP ┆ 2022-05-16 │\n",
|
|
"└───────────┴─────────┴────────────┴──────────────────┴──────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 61,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"exclusivity.pipe(set_exclusivity_types).head(5).collect()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 85,
|
|
"id": "92fe99fa-1963-460c-99ea-7f614b4b2e25",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def set_patent_types(df):\n",
|
|
" return df.with_columns([\n",
|
|
" pl.col(\"Patent_Expire_Date_Text\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
|
" pl.col(\"Submission_Date\").str.strptime(pl.Date, fmt=\"%b %-d, %Y\"),\n",
|
|
" pl.col(\"Drug_Substance_Flag\") == \"Y\",\n",
|
|
" pl.col(\"Drug_Product_Flag\") == \"Y\",\n",
|
|
" pl.col(\"Delist_Flag\") == \"Y\"\n",
|
|
" ])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 90,
|
|
"id": "13707ca6-094f-4ed7-94cb-824087e97874",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1 \"class=\"dataframe \">\n",
|
|
"<thead>\n",
|
|
"<tr>\n",
|
|
"<th>\n",
|
|
"Patent_Expire_Date_Text\n",
|
|
"</th>\n",
|
|
"</tr>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"date\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</thead>\n",
|
|
"<tbody>\n",
|
|
"<tr>\n",
|
|
"<td>\n",
|
|
"2022-01-02\n",
|
|
"</td>\n",
|
|
"</tr>\n",
|
|
"</tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
"shape: (1, 1)\n",
|
|
"┌─────────────────────────┐\n",
|
|
"│ Patent_Expire_Date_Text │\n",
|
|
"│ --- │\n",
|
|
"│ date │\n",
|
|
"╞═════════════════════════╡\n",
|
|
"│ 2022-01-02 │\n",
|
|
"└─────────────────────────┘"
|
|
]
|
|
},
|
|
"execution_count": 90,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"patents.pipe(set_patent_types).select(\"Patent_Expire_Date_Text\").min().collect()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 81,
|
|
"id": "18ad8df7-45d5-4454-8955-c5f28a7d7f1e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"polars.datatypes.Null"
|
|
]
|
|
},
|
|
"execution_count": 81,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pl.datatypes.Null"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "79e4b3d9-29ae-4302-bee1-4be02e0ba654",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|