#import dependencies
import pandas as pd
from sqlalchemy import create_engine
from config import db_password
from getpass import getpass
#File to Load Ontario Males 25-54 wages for 1997 to 2021
wages_male_25_data_to_load = "Raw Data CSV files/StatsCan_Male_25to54.csv"
# Create a DataFrame for Ontario Males 25-54 wages for 1997 to 2021
wages_male1_df = pd.read_csv(wages_male_25_data_to_load)
#display the data table for preview
wages_male1_df.head()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665269 | 7.3.2.1.2.3 | 1250.33 | NaN | NaN | NaN | 2 |
1 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665269 | 7.3.2.1.2.3 | 1283.03 | NaN | NaN | NaN | 2 |
2 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665269 | 7.3.2.1.2.3 | 1314.85 | NaN | NaN | NaN | 2 |
3 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665269 | 7.3.2.1.2.3 | 1388.65 | NaN | NaN | NaN | 2 |
4 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665269 | 7.3.2.1.2.3 | 1427.36 | NaN | NaN | NaN | 2 |
#File to Load Ontario Males >55 wages for 1997 to 2021
wages_male_55_data_to_load = "Raw Data CSV files/StatsCan_Male_55Older.csv"
# Create a DataFrame for Ontario Males >55 wages for 1997 to 2021
wages_male2_df = pd.read_csv(wages_male_55_data_to_load)
#display the data table for preview
wages_male2_df.head()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665270 | 7.3.2.1.2.4 | 1253.76 | NaN | NaN | NaN | 2 |
1 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665270 | 7.3.2.1.2.4 | 1278.24 | NaN | NaN | NaN | 2 |
2 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665270 | 7.3.2.1.2.4 | 1313.42 | NaN | NaN | NaN | 2 |
3 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665270 | 7.3.2.1.2.4 | 1417.76 | NaN | NaN | NaN | 2 |
4 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665270 | 7.3.2.1.2.4 | 1425.09 | NaN | NaN | NaN | 2 |
#File to Load Ontario Female 25-54 wages for 1997 to 2021
wages_female_25_data_to_load = "Raw Data CSV files/StatsCan_Wage_Female_25to54.csv"
# Create a DataFrame for Ontario Males 25-54 wages for 1997 to 2021
wages_female1_df = pd.read_csv(wages_female_25_data_to_load)
#display the data table for preview
wages_female1_df.head()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665273 | 7.3.2.1.3.3 | 1058.00 | NaN | NaN | NaN | 2 |
1 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665273 | 7.3.2.1.3.3 | 1092.20 | NaN | NaN | NaN | 2 |
2 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665273 | 7.3.2.1.3.3 | 1139.61 | NaN | NaN | NaN | 2 |
3 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665273 | 7.3.2.1.3.3 | 1205.10 | NaN | NaN | NaN | 2 |
4 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665273 | 7.3.2.1.3.3 | 1234.24 | NaN | NaN | NaN | 2 |
#File to Load Ontario Female > 55 wages for 1997 to 2021
wages_female_55_data_to_load = "Raw Data CSV files/StatsCan_Wage_Female_55Older.csv"
# Create a DataFrame for Ontario Males >55 wages for 1997 to 2021
wages_female2_df = pd.read_csv(wages_female_55_data_to_load)
#display the data table for preview
wages_female2_df.head()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665274 | 7.3.2.1.3.4 | 1038.15 | NaN | NaN | NaN | 2 |
1 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665274 | 7.3.2.1.3.4 | 1064.90 | NaN | NaN | NaN | 2 |
2 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665274 | 7.3.2.1.3.4 | 1100.28 | NaN | NaN | NaN | 2 |
3 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665274 | 7.3.2.1.3.4 | 1154.73 | NaN | NaN | NaN | 2 |
4 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Total employees, all occupations | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665274 | 7.3.2.1.3.4 | 1187.72 | NaN | NaN | NaN | 2 |
#Create Wage DataFrame - combine Male data
Ontario_Wages= wages_male1_df.append(wages_male2_df, ignore_index = True)
# Ontario_Wages.head()
Ontario_Wages.tail()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
505 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665870 | 7.3.2.51.2.4 | 712.27 | NaN | NaN | NaN | 2 |
506 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665870 | 7.3.2.51.2.4 | 768.42 | NaN | NaN | NaN | 2 |
507 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665870 | 7.3.2.51.2.4 | 783.01 | NaN | NaN | NaN | 2 |
508 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665870 | 7.3.2.51.2.4 | 758.46 | NaN | NaN | NaN | 2 |
509 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Males | 55 years and over | Current dollars | 75 | units | 0 | v1063665870 | 7.3.2.51.2.4 | 847.31 | NaN | NaN | NaN | 2 |
# Add female 25-54 data
Ontario_Wages= Ontario_Wages.append(wages_female1_df, ignore_index = True)
Ontario_Wages.tail()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
760 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665873 | 7.3.2.51.3.3 | 618.12 | NaN | NaN | NaN | 2 |
761 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665873 | 7.3.2.51.3.3 | 665.37 | NaN | NaN | NaN | 2 |
762 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665873 | 7.3.2.51.3.3 | 694.23 | NaN | NaN | NaN | 2 |
763 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665873 | 7.3.2.51.3.3 | 668.01 | NaN | NaN | NaN | 2 |
764 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 25 to 54 years | Current dollars | 75 | units | 0 | v1063665873 | 7.3.2.51.3.3 | 697.58 | NaN | NaN | NaN | 2 |
Ontario_Wages= Ontario_Wages.append(wages_female2_df, ignore_index = True)
Ontario_Wages.tail()
REF_DATE | GEO | DGUID | Wages | Type of work | National Occupational Classification (NOC) | Sex | Age group | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1015 | 2017 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665874 | 7.3.2.51.3.4 | 612.71 | NaN | NaN | NaN | 2 |
1016 | 2018 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665874 | 7.3.2.51.3.4 | 657.11 | NaN | NaN | NaN | 2 |
1017 | 2019 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665874 | 7.3.2.51.3.4 | 667.23 | NaN | NaN | NaN | 2 |
1018 | 2020 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665874 | 7.3.2.51.3.4 | 697.39 | NaN | NaN | NaN | 2 |
1019 | 2021 | Ontario | 2016A000235 | Average weekly wage rate | Full-time employees | Labourers in processing, manufacturing and uti... | Females | 55 years and over | Current dollars | 75 | units | 0 | v1063665874 | 7.3.2.51.3.4 | 688.87 | NaN | NaN | NaN | 2 |
#review data types
Ontario_Wages.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1020 entries, 0 to 1019 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 REF_DATE 1020 non-null int64 1 GEO 1020 non-null object 2 DGUID 1020 non-null object 3 Wages 1020 non-null object 4 Type of work 1020 non-null object 5 National Occupational Classification (NOC) 1020 non-null object 6 Sex 1020 non-null object 7 Age group 1020 non-null object 8 UOM 1020 non-null object 9 UOM_ID 1020 non-null int64 10 SCALAR_FACTOR 1020 non-null object 11 SCALAR_ID 1020 non-null int64 12 VECTOR 1020 non-null object 13 COORDINATE 1020 non-null object 14 VALUE 924 non-null float64 15 STATUS 96 non-null object 16 SYMBOL 0 non-null float64 17 TERMINATED 0 non-null float64 18 DECIMALS 1020 non-null int64 dtypes: float64(3), int64(4), object(12) memory usage: 151.5+ KB
# Drop null value, not required columns Status, Symbol and Terminated
Ontario_Wages.drop(columns=['UOM','Type of work','DECIMALS','DGUID','UOM_ID','SCALAR_FACTOR', 'SCALAR_ID','VECTOR','COORDINATE','STATUS','SYMBOL','TERMINATED'], inplace=True)
Ontario_Wages.columns
Index(['REF_DATE', 'GEO', 'Wages', 'National Occupational Classification (NOC)', 'Sex', 'Age group', 'VALUE'], dtype='object')
#Review Datatypes
Ontario_Wages.dtypes
REF_DATE int64 GEO object Wages object National Occupational Classification (NOC) object Sex object Age group object VALUE float64 dtype: object
#Change Wages to numeric
# Use pd.to_numeric() method to convert the datatype of the Amount column
# df['Amount'] = pd.to_numeric(df['Amount'])
Ontario_Wages['VALUE'] = Ontario_Wages['VALUE'].astype(str). astype(float)
#find null values
for column in Ontario_Wages.columns:
print(f"Column {column} has {Ontario_Wages[column].isnull().sum()}null values")
Column REF_DATE has 0null values Column GEO has 0null values Column Wages has 0null values Column National Occupational Classification (NOC) has 0null values Column Sex has 0null values Column Age group has 0null values Column VALUE has 96null values
# #review rows with null values - gender
# Null_Wage=Ontario_Wages[Ontario_Wages['weekly_wage'].isnull()]
# gender_split=Null_Wage.groupby('gender').count()
# gender_split
# #review rows with null values - noc
# Null_Wage=Ontario_Wages[Ontario_Wages['weekly_wage'].isnull()]
# #need to figure out how to see gender by no for the nan values
# Rename the columns in the sunshine_data DataFrame.### changed to match 2020 data
Ontario_Wages.rename({'REF_DATE':'year',
'GEO':'province',
'Wages':'wage_frequency',
'National Occupational Classification (NOC)':'noc',
'Sex': 'gender',
'Age group':'age_group',
'VALUE':'weekly_wage'
}, axis='columns', inplace=True)
# Export to csv
Ontario_Wages.to_csv("Resources/Clean_Ontario_Wages.csv", index=False)
Ontario_Wages.head()
year | province | wage_frequency | noc | gender | age_group | weekly_wage | |
---|---|---|---|---|---|---|---|
0 | 2017 | Ontario | Average weekly wage rate | Total employees, all occupations | Males | 25 to 54 years | 1250.33 |
1 | 2018 | Ontario | Average weekly wage rate | Total employees, all occupations | Males | 25 to 54 years | 1283.03 |
2 | 2019 | Ontario | Average weekly wage rate | Total employees, all occupations | Males | 25 to 54 years | 1314.85 |
3 | 2020 | Ontario | Average weekly wage rate | Total employees, all occupations | Males | 25 to 54 years | 1388.65 |
4 | 2021 | Ontario | Average weekly wage rate | Total employees, all occupations | Males | 25 to 54 years | 1427.36 |
# print(Ontario_Wages.shape())
Ontario_Wages.dtypes
year int64 province object wage_frequency object noc object gender object age_group object weekly_wage float64 dtype: object
# review types of NOC
Ontario_Wages['noc'].unique()
array(['Total employees, all occupations', 'Management occupations [0]', 'Senior management occupations [00]', 'Specialized middle management occupations [01-05]', 'Middle management occupations in retail and wholesale trade and customer services [06]', 'Middle management occupations in trades, transportation, production and utilities [07-09]', 'Business, finance and administration occupations [1]', 'Professional occupations in business and finance [11]', 'Administrative and financial supervisors and administrative occupations [12]', 'Finance, insurance and related business administrative occupations [13]', 'Office support occupations [14]', 'Distribution, tracking and scheduling co-ordination occupations [15]', 'Natural and applied sciences and related occupations [2]', 'Professional occupations in natural and applied sciences [21]', 'Technical occupations related to natural and applied sciences [22]', 'Health occupations [3]', 'Professional occupations in nursing [30]', 'Professional occupations in health (except nursing) [31]', 'Technical occupations in health [32]', 'Assisting occupations in support of health services [34]', 'Occupations in education, law and social, community and government services [4]', 'Professional occupations in education services [40]', 'Professional occupations in law and social, community and government services [41]', 'Paraprofessional occupations in legal, social, community and education services [42]', 'Occupations in front-line public protection services [43]', 'Care providers and educational, legal and public protection support occupations [44]', 'Occupations in art, culture, recreation and sport [5]', 'Professional occupations in art and culture [51]', 'Technical occupations in art, culture, recreation and sport [52]', 'Sales and service occupations [6]', 'Retail sales supervisors and specialized sales occupations [62]', 'Service supervisors and specialized service occupations [63]', 'Sales representatives and salespersons - wholesale and retail trade [64]', 'Service representatives and other customer and personal services occupations [65]', 'Sales support occupations [66]', 'Service support and other service occupations, n.e.c. [67]', 'Trades, transport and equipment operators and related occupations [7]', 'Industrial, electrical and construction trades [72]', 'Maintenance and equipment operation trades [73]', 'Other installers, repairers and servicers and material handlers [74]', 'Transport and heavy equipment operation and related maintenance occupations [75]', 'Trades helpers, construction labourers and related occupations [76]', 'Natural resources, agriculture and related production occupations [8]', 'Supervisors and technical occupations in natural resources, agriculture and related production [82]', 'Workers in natural resources, agriculture and related production [84]', 'Harvesting, landscaping and natural resources labourers [86]', 'Occupations in manufacturing and utilities [9]', 'Processing, manufacturing and utilities supervisors and central control operators [92]', 'Processing and manufacturing machine operators and related production workers [94]', 'Assemblers in manufacturing [95]', 'Labourers in processing, manufacturing and utilities [96]'], dtype=object)
Ontario_Wages.shape
(1020, 7)
#Remove NOC where not on sunshine list, ie. labour, processing, constructio/trades
remove = ['Workers in natural resources, agriculture and related production [84]',
'Harvesting, landscaping and natural resources labourers [86]',
'Occupations in manufacturing and utilities [9]',
'Processing, manufacturing and utilities supervisors and central control operators [92]',
'Processing and manufacturing machine operators and related production workers [94]',
'Assemblers in manufacturing [95]','Trades, transport and equipment operators and related occupations [7]',
'Industrial, electrical and construction trades [72]',
'Maintenance and equipment operation trades [73]',
'Other installers, repairers and servicers and material handlers [74]',
'Transport and heavy equipment operation and related maintenance occupations [75]',
'Trades helpers, construction labourers and related occupations [76]',
'Natural resources, agriculture and related production occupations [8]',
'Sales and service occupations [6]',
'Retail sales supervisors and specialized sales occupations [62]',
'Distribution, tracking and scheduling co-ordination occupations [15]',
'Sales representatives and salespersons - wholesale and retail trade [64]',
'Sales support occupations [66]',
'Total employees',
'all occupations',
'Labourers in processing, manufacturing and utilities [96]']
Ontario_Wages=Ontario_Wages.drop(Ontario_Wages[Ontario_Wages["noc"].isin(remove)].index)
print(Ontario_Wages.shape)
(640, 7)
#File to Load Ontario Males 25-54 wages for 1997 to 2021
cpi_data_to_load = "Raw Data CSV files/OntarioCPI_databaseLoadingData.csv"
# Create a DataFrame for Ontario Males 25-54 wages for 1997 to 2021
cpi_df = pd.read_csv(cpi_data_to_load)
#display the data table for preview
cpi_df.tail()
REF_DATE | GEO | DGUID | Products and product groups | UOM | UOM_ID | SCALAR_FACTOR | SCALAR_ID | VECTOR | COORDINATE | VALUE | STATUS | SYMBOL | TERMINATED | DECIMALS | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
370 | 2016 | Ontario | 2016A000235 | Services | 2002=100 | 17 | units | 0 | v41694341 | 14.282 | 140.6 | NaN | NaN | NaN | 1 |
371 | 2017 | Ontario | 2016A000235 | Services | 2002=100 | 17 | units | 0 | v41694341 | 14.282 | 144.6 | NaN | NaN | NaN | 1 |
372 | 2018 | Ontario | 2016A000235 | Services | 2002=100 | 17 | units | 0 | v41694341 | 14.282 | 148.9 | NaN | NaN | NaN | 1 |
373 | 2019 | Ontario | 2016A000235 | Services | 2002=100 | 17 | units | 0 | v41694341 | 14.282 | 152.4 | NaN | NaN | NaN | 1 |
374 | 2020 | Ontario | 2016A000235 | Services | 2002=100 | 17 | units | 0 | v41694341 | 14.282 | 154.6 | NaN | NaN | NaN | 1 |
#Review data types
cpi_df.dtypes
REF_DATE int64 GEO object DGUID object Products and product groups object UOM object UOM_ID int64 SCALAR_FACTOR object SCALAR_ID int64 VECTOR object COORDINATE float64 VALUE float64 STATUS float64 SYMBOL float64 TERMINATED float64 DECIMALS int64 dtype: object
for column in cpi_df.columns: print(f"Column {column} has {cpi_df[column].isnull().sum()}null values")
# Drop null value, not required columns Status, Symbol and Terminated
cpi_df.drop(columns=['UOM','DECIMALS','DGUID','UOM_ID','SCALAR_FACTOR', 'SCALAR_ID','VECTOR','COORDINATE','STATUS','SYMBOL','TERMINATED'], inplace=True)
# Rename the columns in the sunshine_data DataFrame.### changed to match 2020 data
cpi_df.rename({'REF_DATE':'year',
'GEO':'province',
'Products and product groups':'product_groups',
'VALUE':'CPI'
}, axis='columns', inplace=True)
cpi_df.columns
Index(['year', 'province', 'product_groups', 'CPI'], dtype='object')
cpi_df.dtypes
year int64 province object product_groups object CPI float64 dtype: object
# Export to csv
cpi_df.to_csv("Resources/Clean_CPI.csv", index=False)
cpi_df.head()
year | province | product_groups | CPI | |
---|---|---|---|---|
0 | 1996 | Ontario | All-items | 88.2 |
1 | 1997 | Ontario | All-items | 89.8 |
2 | 1998 | Ontario | All-items | 90.6 |
3 | 1999 | Ontario | All-items | 92.4 |
4 | 2000 | Ontario | All-items | 95.1 |
# db_password = 'db_password'
db_password = getpass ('Enter in Password')
Enter in Password········
#connect to local server
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/Final_Project_SunshineList"
#Create database engine
engine = create_engine(db_string)
#Import Ontario Wages into SQL table
Ontario_Wages.to_sql(name='ontario_wage_table', con=engine)
#Import Ontario Wages into SQL table
cpi_df.to_sql(name='cpi_table', con=engine)