I'm using R K-nearest Neighbors and am trying to find out the accuracy in the validation set and also the best choice of k.
I receive the following error message:
Error: data and reference should be factors with the same levels.
My code is:
t(t(names(bank.df)))[,1]               
[1,] "Age"              
[2,] "Experience"       
[3,] "Income"           
[4,] "Family"           
[5,] "CCAvg"            
[6,] "Education"        
[7,] "Mortgage"         
[8,] "SecuritiesAccount"
[9,] "CDAccount"        
[10,] "Online"           
[11,] "CreditCard"       
[12,] "PersonalLoan"     
train.index <- sample(row.names(bank.df), 0.6*dim(bank.df)[1])
valid.index <- setdiff(row.names(bank.df), train.index)
train.df <- bank.df [train.index, ]
valid.df <- bank.df [valid.index, ]
train.norm.df <- train.df
valid.norm.df <- valid.df
bank.norm.df <- bank.df
library(caret)
library(lattice)
library(ggplot2)
train.norm.df[, 1:12] <- predict(norm.values, train.df[, 1:12])
valid.norm.df[, 1:12] <- predict(norm.values, valid.df[, 1:12])
bank.norm.df[, 1:12] <- predict(norm.values, bank.df[, 1:12]
accuracy.df <- data.frame (k= seq(1,19,1), accuracy=rep(1,19))
library(FNN)
for (i in 1:19){  
    knn.pred <- knn(train.norm.df[, 1:12], valid.norm.df[, 1:12],
                  cl= train.norm.df [, 12], k=i)
     accuracy.df[i,2] <- confusionMatrix(knn.pred, valid.norm.df[, 12])$overall[1]
     }
 
    