Performing machine learning

Question

I've written the code below to do some work on machine learning in R. I'm not overly happy with some bits of it, and I suspect I could improve it quite a bit. Bits I'm specifically interested in looking at are how to deal with appending to vectors in the loop, and whether I can combine all of the functions kf.knn, kf.svm etc into one function with various arguments.

library(class)
library(nnet)
library(epibasix)
library(CVThresh)
library(e1071)

df <- read.table("semeion.data.data")
df$digit <- replicate(nrow(df), 42)

df$digit[which(as.logical(df$V266), arr.ind=TRUE)] = 9
df$digit[which(as.logical(df$V265), arr.ind=TRUE)] = 8
df$digit[which(as.logical(df$V264), arr.ind=TRUE)] = 7
df$digit[which(as.logical(df$V263), arr.ind=TRUE)] = 6
df$digit[which(as.logical(df$V262), arr.ind=TRUE)] = 5
df$digit[which(as.logical(df$V261), arr.ind=TRUE)] = 4
df$digit[which(as.logical(df$V260), arr.ind=TRUE)] = 3
df$digit[which(as.logical(df$V259), arr.ind=TRUE)] = 2
df$digit[which(as.logical(df$V258), arr.ind=TRUE)] = 1
df$digit[which(as.logical(df$V257), arr.ind=TRUE)] = 0

data <- df[,c(0:256,267)]

squareTable <- function(x,y) {
    x <- factor(x)
    y <- factor(y)

    commonLevels <- sort(unique(c(levels(x), levels(y))))

    x <- factor(x, levels = commonLevels)
    y <- factor(y, levels = commonLevels)

    table(x,y)

}

kf.knn <- function(data, k, nearest)
{
  N <- nrow(data)
  data <- data[sample(1:N),]

  folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index

  total = 0

  for (i in 1:k)
  {
    test <- data[folds.index[i,], 0:256]
    test.labels <- data[as.array(folds.index[i,]), 257]

    rest <- as.array(folds.index[-i,])

    train <- data[rest, 0:256]
    train.labels <- data[rest, 257]

    knnpredict <- knn(train, test, train.labels, nearest)
    t <- table(as.factor(test.labels), as.factor(knnpredict))

    kap <- epiKappa(t)
    total <- total + kap$kappa
  }

return(total / k)
}

kf.nnet <- function(data, k, hidden)
{
  N <- nrow(data)
  data <- data[sample(1:N),]

  folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index

  total = 0

  for (i in 2:k)
  {
    test <- data[folds.index[i,], 0:256]
    test.labels <- data[as.array(folds.index[i,]), 257]

    rest <- as.array(folds.index[-i,])

    train <- data[rest, 0:256]
    train.labels <- data[rest, 257]

    train$label <- as.factor(train.labels)


    nnetwork <- nnet(label ~ ., data=train, size=hidden, MaxNWts = 10000, maxit=200, linout=TRUE)
    nnetpredict <- predict(nnetwork, test, "class")

    table <- squareTable(as.factor(test.labels), as.factor(nnetpredict))

    print(table)
    kap <- epiKappa(table)
    total <- total + kap$kappa
  }

return(total / k)
}

kf.svm <- function(data, k)
{
  N <- nrow(data)
  data <- data[sample(1:N),]

  folds.index <- cvtype(N, cv.bsize=1, cv.kfold=k, FALSE)$cv.index

  total = 0

  for (i in 2:k)
  {
    test <- data[folds.index[i,], 0:256]
    test.labels <- data[as.array(folds.index[i,]), 257]

    rest <- as.array(folds.index[-i,])

    train <- data[rest, 0:256]
    train.labels <- data[rest, 257]

    train$label <- as.factor(train.labels)

    svmmodel <- svm(label ~ ., data=train, cost=8, gamma=0.001, cross=2)
    svmpredict <- predict(svmmodel, test)

    t <- table(as.factor(test.labels), as.factor(svmpredict))
    print(t)
    kap <- epiKappa(t)
    total <- total + kap$kappa
  }

return(total / k)
}

result <- vector("numeric", 1)
n <- vector("numeric", 1)


for (i in 2:10)
{
  avg <- kf.nnet(data, 5, i)
  result <- append(result, avg)
  n <- append(n, i)
  cat("!!!!!!!", i, avg, "\n")
}

result <- result[2:length(result)]
n <- n[2:length(n)]

df <- data.frame(kappa = result, n=n)
plot(df$n, df$res)

print(df)

Brian Diggs · Accepted Answer · 2013-12-09 17:00:27Z

You can avoid appending to vectors (which can cause re-allocation of space and can considerably slow things down in principle; though in your case of only a length 10 vector that shouldn't be noticeable) if you allocate them to the needed size initially and then assign within them.

result <- vector("numeric", 10)
# or even: result <- numeric(10)
n <- vector("numeric", 10)

for (i in 2:10)
{
  avg <- kf.nnet(data, 5, i)
  result[i] <- avg
  n[i] <- i
  cat("!!!!!!!", i, avg, "\n")
}

I'm not clear what you are asking about combing function; you can add an additional argument which is a character vector which then the code just has a series of if/elseif's checking that parameter to see which algorithm to use, if that is what you mean.

Stack Exchange Network

Performing machine learning

1 Answer 1

You must log in to answer this question.

Hot Network Questions

Performing machine learning

1 Answer 1

You must log in to answer this question.

Related

Hot Network Questions