Background: I am trying to learn from a notebook used in Kaggle House Price Prediction Dataset.
I am trying to use a Pipeline to transform numerical and categorical columns in a dataframe. It is having issues with my Categorical variables' names, which is a list stored in this variable categ_cols_names. It says that those categorical columns are not unique in dataframe, which I'm not sure what that means.
categ_cols_names = ['MSZoning','Street','LotShape','LandContour','Utilities','LotConfig','LandSlope','Neighborhood','Condition1','Condition2','BldgType','HouseStyle','OverallQual','OverallCond','YearBuilt','YearRemodAdd','RoofStyle','RoofMatl','Exterior1st','Exterior2nd','MasVnrType','ExterQual','ExterCond','Foundation','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','Heating','HeatingQC','CentralAir','Electrical','BsmtFullBath','BsmtHalfBath','FullBath','HalfBath','BedroomAbvGr','KitchenAbvGr','KitchenQual','Functional','Fireplaces','GarageType','GarageYrBlt','GarageFinish','GarageCars','GarageQual','GarageCond','PavedDrive','MoSold','YrSold','SaleType','SaleCondition','OverallQual','GarageCars','FullBath','YearBuilt']
Below is my code:
# Get numerical columns names
num_cols_names = X_train.columns[X_train.dtypes != object].to_list()
# Numerical columns with missing values
num_nan_cols = X_train[num_cols_names].columns[X_train[num_cols_names].isna().sum() > 0]
# Assign np.nan type to NaN values in categorical features
# in order to ensure detectability in posterior methods
X_train[num_nan_cols] = X_train[num_nan_cols].fillna(value = np.nan, axis = 1)
# Define pipeline for imputation of the numerical features
num_pipeline = Pipeline(steps = [
                                    ('Simple Imputer', SimpleImputer(strategy = 'median')),
                                    ('Robust Scaler', RobustScaler()),
                                    ('Power Transformer', PowerTransformer())
                                ]
                        )
# Get categorical columns names
categ_cols_names = X_train.columns[X_train.dtypes == object].to_list()
# Categorical columns with missing values
categ_nan_cols = X_train[categ_cols_names].columns[X_train[categ_cols_names].isna().sum() > 0]
# Assign np.nan type to NaN values in categorical features
# in order to ensure detectability in posterior methods
X_train[categ_nan_cols] = X_train[categ_nan_cols].fillna(value = np.nan, axis = 1)
# Define pipeline for imputation and encoding of the categorical features
categ_pipeline = Pipeline(steps = [
('Categorical Imputer', SimpleImputer(strategy = 'most_frequent')),
('One Hot Encoder', OneHotEncoder(drop = 'first'))
])
ct = ColumnTransformer([
('Categorical Pipeline', categ_pipeline, categ_cols_names),
('Numerical Pipeline', num_pipeline, num_cols_names)], 
                        remainder        = 'passthrough', 
                        sparse_threshold = 0,
                        n_jobs           = -1)
pipe = Pipeline(steps = [('Column Transformer', ct)])
pipe.fit_transform(X_train)
The ValueError occurs on the .fit_transform() line:

Here is a sample of my X_train:
{'MSZoning': {0: 'RL', 1: 'RL', 2: 'RL', 3: 'RL', 4: 'RL'},
 'Street': {0: 'Pave', 1: 'Pave', 2: 'Pave', 3: 'Pave', 4: 'Pave'},
 'LotShape': {0: 'Reg', 1: 'Reg', 2: 'IR1', 3: 'IR1', 4: 'IR1'},
 'LandContour': {0: 'Lvl', 1: 'Lvl', 2: 'Lvl', 3: 'Lvl', 4: 'Lvl'},
 'Utilities': {0: 'AllPub',
  1: 'AllPub',
  2: 'AllPub',
  3: 'AllPub',
  4: 'AllPub'},
 'LotConfig': {0: 'Inside', 1: 'FR2', 2: 'Inside', 3: 'Corner', 4: 'FR2'},
 'LandSlope': {0: 'Gtl', 1: 'Gtl', 2: 'Gtl', 3: 'Gtl', 4: 'Gtl'},
 'Neighborhood': {0: 'CollgCr',
  1: 'Veenker',
  2: 'CollgCr',
  3: 'Crawfor',
  4: 'NoRidge'},
 'Condition1': {0: 'Norm', 1: 'Feedr', 2: 'Norm', 3: 'Norm', 4: 'Norm'},
 'Condition2': {0: 'Norm', 1: 'Norm', 2: 'Norm', 3: 'Norm', 4: 'Norm'},
 'BldgType': {0: '1Fam', 1: '1Fam', 2: '1Fam', 3: '1Fam', 4: '1Fam'},
 'HouseStyle': {0: '2Story',
  1: '1Story',
  2: '2Story',
  3: '2Story',
  4: '2Story'},
 'OverallQual': {0: '7', 1: '6', 2: '7', 3: '7', 4: '8'},
 'OverallCond': {0: '5', 1: '8', 2: '5', 3: '5', 4: '5'},
 'YearBuilt': {0: '2003', 1: '1976', 2: '2001', 3: '1915', 4: '2000'},
 'YearRemodAdd': {0: '2003', 1: '1976', 2: '2002', 3: '1970', 4: '2000'},
 'RoofStyle': {0: 'Gable', 1: 'Gable', 2: 'Gable', 3: 'Gable', 4: 'Gable'},
 'RoofMatl': {0: 'CompShg',
  1: 'CompShg',
  2: 'CompShg',
  3: 'CompShg',
  4: 'CompShg'},
 'Exterior1st': {0: 'VinylSd',
  1: 'MetalSd',
  2: 'VinylSd',
  3: 'Wd Sdng',
  4: 'VinylSd'},
 'Exterior2nd': {0: 'VinylSd',
  1: 'MetalSd',
  2: 'VinylSd',
  3: 'Wd Shng',
  4: 'VinylSd'},
 'MasVnrType': {0: 'BrkFace',
  1: 'None',
  2: 'BrkFace',
  3: 'None',
  4: 'BrkFace'},
 'ExterQual': {0: 'Gd', 1: 'TA', 2: 'Gd', 3: 'TA', 4: 'Gd'},
 'ExterCond': {0: 'TA', 1: 'TA', 2: 'TA', 3: 'TA', 4: 'TA'},
 'Foundation': {0: 'PConc', 1: 'CBlock', 2: 'PConc', 3: 'BrkTil', 4: 'PConc'},
 'BsmtQual': {0: 'Gd', 1: 'Gd', 2: 'Gd', 3: 'TA', 4: 'Gd'},
 'BsmtCond': {0: 'TA', 1: 'TA', 2: 'TA', 3: 'Gd', 4: 'TA'},
 'BsmtExposure': {0: 'No', 1: 'Gd', 2: 'Mn', 3: 'No', 4: 'Av'},
 'BsmtFinType1': {0: 'GLQ', 1: 'ALQ', 2: 'GLQ', 3: 'ALQ', 4: 'GLQ'},
 'BsmtFinType2': {0: 'Unf', 1: 'Unf', 2: 'Unf', 3: 'Unf', 4: 'Unf'},
 'Heating': {0: 'GasA', 1: 'GasA', 2: 'GasA', 3: 'GasA', 4: 'GasA'},
 'HeatingQC': {0: 'Ex', 1: 'Ex', 2: 'Ex', 3: 'Gd', 4: 'Ex'},
 'CentralAir': {0: 'Y', 1: 'Y', 2: 'Y', 3: 'Y', 4: 'Y'},
 'Electrical': {0: 'SBrkr', 1: 'SBrkr', 2: 'SBrkr', 3: 'SBrkr', 4: 'SBrkr'},
 'BsmtFullBath': {0: '1', 1: '0', 2: '1', 3: '1', 4: '1'},
 'BsmtHalfBath': {0: '0', 1: '1', 2: '0', 3: '0', 4: '0'},
 'FullBath': {0: '2', 1: '2', 2: '2', 3: '1', 4: '2'},
 'HalfBath': {0: '1', 1: '0', 2: '1', 3: '0', 4: '1'},
 'BedroomAbvGr': {0: '3', 1: '3', 2: '3', 3: '3', 4: '4'},
 'KitchenAbvGr': {0: '1', 1: '1', 2: '1', 3: '1', 4: '1'},
 'KitchenQual': {0: 'Gd', 1: 'TA', 2: 'Gd', 3: 'Gd', 4: 'Gd'},
 'Functional': {0: 'Typ', 1: 'Typ', 2: 'Typ', 3: 'Typ', 4: 'Typ'},
 'Fireplaces': {0: '0', 1: '1', 2: '1', 3: '1', 4: '1'},
 'GarageType': {0: 'Attchd',
  1: 'Attchd',
  2: 'Attchd',
  3: 'Detchd',
  4: 'Attchd'},
 'GarageYrBlt': {0: '2003.0',
  1: '1976.0',
  2: '2001.0',
  3: '1998.0',
  4: '2000.0'},
 'GarageFinish': {0: 'RFn', 1: 'RFn', 2: 'RFn', 3: 'Unf', 4: 'RFn'},
 'GarageCars': {0: '2', 1: '2', 2: '2', 3: '3', 4: '3'},
 'GarageQual': {0: 'TA', 1: 'TA', 2: 'TA', 3: 'TA', 4: 'TA'},
 'GarageCond': {0: 'TA', 1: 'TA', 2: 'TA', 3: 'TA', 4: 'TA'},
 'PavedDrive': {0: 'Y', 1: 'Y', 2: 'Y', 3: 'Y', 4: 'Y'},
 'MoSold': {0: '2', 1: '5', 2: '9', 3: '2', 4: '12'},
 'YrSold': {0: '2008', 1: '2007', 2: '2008', 3: '2006', 4: '2008'},
 'SaleType': {0: 'WD', 1: 'WD', 2: 'WD', 3: 'WD', 4: 'WD'},
 'SaleCondition': {0: 'Normal',
  1: 'Normal',
  2: 'Normal',
  3: 'Abnorml',
  4: 'Normal'},
 'GrLivArea': {0: 1710, 1: 1262, 2: 1786, 3: 1717, 4: 2198},
 'GarageArea': {0: 548, 1: 460, 2: 608, 3: 642, 4: 836},
 'TotalBsmtSF': {0: 856, 1: 1262, 2: 920, 3: 756, 4: 1145},
 '1stFlrSF': {0: 856, 1: 1262, 2: 920, 3: 961, 4: 1145},
 'TotRmsAbvGrd': {0: 8, 1: 6, 2: 6, 3: 7, 4: 9}}
 
    