import connetorx as cx from sqlalchemy import create_engine import re ####################CONSTANTS################################# MYSQL_CONNECTION_STRING="mysql://webuser:9521354c77aa@localhost/" POSTGRES_CONNECTION_STRING="postgresql://root:root@localhost/aact_db" POSTGRES_ENGINE = create_engine(POSTGRES_CONNECTION_STRING) SPLIT_RE = re.compile("(\w+)(\((\d+)\))?") ###################QUERIES######################### QUERY_columns_from_Information_Schema = """ SELECT * FROM INFORMATION_SCHEMA.columns WHERE TABLE_SCHEMA="rxnorm_current" """ QUERY_data_from_table = "" ########FUNCTIONS################# def query_mysql(query): """ runs a query against the MYSQL database, returning a pandas df """ return cx.read_sql(MYSQL_CONNECTION_STRING, query) def insert_table_postgres(df, table, schema): """ Inserts data into a table """ return df.to_sql( table ,POSTGRES_ENGINE ,schema=schema ,if_exists="append" ,method="multi" ) def convert_mysql_types_to_pgsql(binary_type): """ Given a binary string of a column's type, convert to utf8, and then parse it into a postgres type """ string_type = binary_type.decode("utf-8").lower() #get the value name and length out. val_type,_,length = SPLIT_RE.match(string_type).groups() def convert_column(df_row): #extract position = df_row.ORDINAL_POSITION table_name = df_row.TABLE_NAME #convert if data_type=="varchar": string = "{column_name} character varying({data_length}) COLLATE pg_catalog.\"default\" {is_nullable},".format( column_name = df_row.COLUMN_NAME ,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH) ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else "" ) elif data_type=="char": string = "{column_name} char({data_length})[] COLLATE pg_catalog.\"default\" {is_nullable},".format( column_name = df_row.COLUMN_NAME ,data_length = np.int64(df_row.CHARACTER_MAXIMUM_LENGTH) ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else "" ) elif data_type=="tinyint": string = "{column_name} smallint {is_nullable},".format( column_name = df_row.COLUMN_NAME ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else "" ) series_type = numpy.int8 elif data_type=="decimal": string = "{column_name} numeric({precision},{scale}) {is_nullable},".format( column_name = df_row.COLUMN_NAME ,is_nullable = "NOT NULL" if df_row.IS_NULLABLE == "NO" else "" ,precision= np.int64(df_row.NUMERIC_PRECISION) ,scale= np.int64(df_row.NUMERIC_SCALE) ) elif data_type=="int": pass elif data_type=="enum": pass elif data_type=="text": pass return string