AWS SDK for pandas

7 - Redshift, MySQL, PostgreSQL, SQL Server and Oracle

awswrangler’s Redshift, MySQL and PostgreSQL have two basic functions in common that try to follow Pandas conventions, but add more data type consistency.

[ ]:
# Install the optional modules first
!pip install 'awswrangler[redshift, postgres, mysql, sqlserver, oracle]'
import pandas as pd

import awswrangler as wr

df = pd.DataFrame({"id": [1, 2], "name": ["foo", "boo"]})

Connect using the Glue Catalog Connections

con_redshift = wr.redshift.connect("aws-sdk-pandas-redshift")
con_mysql = wr.mysql.connect("aws-sdk-pandas-mysql")
con_postgresql = wr.postgresql.connect("aws-sdk-pandas-postgresql")
con_sqlserver = wr.sqlserver.connect("aws-sdk-pandas-sqlserver")
con_oracle ="aws-sdk-pandas-oracle")

Raw SQL queries (No Pandas)

with con_redshift.cursor() as cursor:
    for row in cursor.execute("SELECT 1"):

Loading data to Database

wr.redshift.to_sql(df, con_redshift, schema="public", table="tutorial", mode="overwrite")
wr.mysql.to_sql(df, con_mysql, schema="test", table="tutorial", mode="overwrite")
wr.postgresql.to_sql(df, con_postgresql, schema="public", table="tutorial", mode="overwrite")
wr.sqlserver.to_sql(df, con_sqlserver, schema="dbo", table="tutorial", mode="overwrite"), con_oracle, schema="test", table="tutorial", mode="overwrite")

Unloading data from Database

wr.redshift.read_sql_query("SELECT * FROM public.tutorial", con=con_redshift)
wr.mysql.read_sql_query("SELECT * FROM test.tutorial", con=con_mysql)
wr.postgresql.read_sql_query("SELECT * FROM public.tutorial", con=con_postgresql)
wr.sqlserver.read_sql_query("SELECT * FROM dbo.tutorial", con=con_sqlserver)"SELECT * FROM test.tutorial", con=con_oracle)
id name
0 1 foo
1 2 boo