You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
96 lines
3.0 KiB
Python
96 lines
3.0 KiB
Python
import connetorx as cx
|
|
from sqlalchemy import create_engine
|
|
import re
|
|
|
|
####################CONSTANTS#################################
|
|
MYSQL_CONNECTION_STRING="mysql://webuser:9521354c77aa@localhost/"
|
|
POSTGRES_CONNECTION_STRING="postgresql://root:root@localhost/aact_db"
|
|
POSTGRES_ENGINE = create_engine(POSTGRES_CONNECTION_STRING)
|
|
SPLIT_RE = re.compile("(\w+)(\((\d+)\))?")
|
|
|
|
|
|
###################QUERIES#########################
|
|
|
|
QUERY_columns_from_Information_Schema = """
|
|
SELECT *
|
|
FROM INFORMATION_SCHEMA.columns
|
|
WHERE
|
|
TABLE_SCHEMA="rxnorm_current"
|
|
"""
|
|
|
|
QUERY_data_from_table = ""
|
|
|
|
|
|
########FUNCTIONS#################
|
|
def query_mysql(query):
|
|
"""
|
|
runs a query against the MYSQL database, returning a pandas df
|
|
"""
|
|
return cx.read_sql(MYSQL_CONNECTION_STRING, query)
|
|
|
|
def insert_table_postgres(df, table, schema):
|
|
"""
|
|
Inserts data into a table
|
|
"""
|
|
return df.to_sql(
|
|
table
|
|
,POSTGRES_ENGINE
|
|
,schema=schema
|
|
,if_exists="append"
|
|
,method="multi"
|
|
)
|
|
|
|
|
|
|
|
def convert_mysql_types_to_pgsql(binary_type):
|
|
"""
|
|
Given a binary string of a column's type,
|
|
convert to utf8, and then parse it into
|
|
a postgres type
|
|
"""
|
|
string_type = binary_type.decode("utf-8").lower()
|
|
|
|
#get the value name and length out.
|
|
val_type,_,length = SPLIT_RE.match(string_type).groups()
|
|
|
|
def convert_column(df_row):
|
|
#extract
|
|
position = df_row.ORDINAL_POSITION
|
|
table_name = df_row.TABLE_NAME
|
|
|
|
#convert
|
|
if data_type=="varchar":
|
|
string = "{column_name} character varying({data_length}) COLLATE pg_catalog.\"default\" {is_nullable},".format(
|
|
column_name = df_row.COLUMN_NAME
|
|
,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
|
|
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
|
)
|
|
elif data_type=="char":
|
|
string = "{column_name} char({data_length})[] COLLATE pg_catalog.\"default\" {is_nullable},".format(
|
|
column_name = df_row.COLUMN_NAME
|
|
,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH)
|
|
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
|
)
|
|
elif data_type=="tinyint":
|
|
string = "{column_name} smallint {is_nullable},".format(
|
|
column_name = df_row.COLUMN_NAME
|
|
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
|
)
|
|
series_type = numpy.int8
|
|
elif data_type=="decimal":
|
|
string = "{column_name} numeric({precision},{scale}) {is_nullable},".format(
|
|
column_name = df_row.COLUMN_NAME
|
|
,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else ""
|
|
,precision= np.int64(df_row.NUMERIC_PRECISION)
|
|
,scale= np.int64(df_row.NUMERIC_SCALE)
|
|
)
|
|
elif data_type=="int":
|
|
pass
|
|
elif data_type=="enum":
|
|
pass
|
|
elif data_type=="text":
|
|
pass
|
|
|
|
return string
|
|
|