AWS SDK for pandas

7 - Redshift, MySQL, PostgreSQL, SQL Server and Oracle

awswrangler’s Redshift, MySQL and PostgreSQL have two basic functions in common that try to follow Pandas conventions, but add more data type consistency.

[ ]:
# Install the optional modules first
!pip install 'awswrangler[redshift, postgres, mysql, sqlserver, oracle]'
[1]:
import pandas as pd

import awswrangler as wr

df = pd.DataFrame({"id": [1, 2], "name": ["foo", "boo"]})

Connect using the Glue Catalog Connections

[2]:
con_redshift = wr.redshift.connect("aws-sdk-pandas-redshift")
con_mysql = wr.mysql.connect("aws-sdk-pandas-mysql")
con_postgresql = wr.postgresql.connect("aws-sdk-pandas-postgresql")
con_sqlserver = wr.sqlserver.connect("aws-sdk-pandas-sqlserver")
con_oracle = wr.oracle.connect("aws-sdk-pandas-oracle")

Raw SQL queries (No Pandas)

[3]:
with con_redshift.cursor() as cursor:
    for row in cursor.execute("SELECT 1"):
        print(row)
[1]

Loading data to Database

[4]:
wr.redshift.to_sql(df, con_redshift, schema="public", table="tutorial", mode="overwrite")
wr.mysql.to_sql(df, con_mysql, schema="test", table="tutorial", mode="overwrite")
wr.postgresql.to_sql(df, con_postgresql, schema="public", table="tutorial", mode="overwrite")
wr.sqlserver.to_sql(df, con_sqlserver, schema="dbo", table="tutorial", mode="overwrite")
wr.oracle.to_sql(df, con_oracle, schema="test", table="tutorial", mode="overwrite")

Unloading data from Database

[5]:
wr.redshift.read_sql_query("SELECT * FROM public.tutorial", con=con_redshift)
wr.mysql.read_sql_query("SELECT * FROM test.tutorial", con=con_mysql)
wr.postgresql.read_sql_query("SELECT * FROM public.tutorial", con=con_postgresql)
wr.sqlserver.read_sql_query("SELECT * FROM dbo.tutorial", con=con_sqlserver)
wr.oracle.read_sql_query("SELECT * FROM test.tutorial", con=con_oracle)
[5]:
id name
0 1 foo
1 2 boo
[6]:
con_redshift.close()
con_mysql.close()
con_postgresql.close()
con_sqlserver.close()
con_oracle.close()