In logistic regression, we can select top variables based on their high wald chi-square value. In other words, we can run univariate analysis of each independent variable and then pick important predictors based on their wald chi-square value.
#Read Data FileMethod 2 :
mydata <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
#Run Logistic Regression
mylogit <- glm(admit ~ ., data = mydata, family = "binomial")
#Create Logistic Regression Function
unilogit = function(df,depvar) {
depvar1 = deparse(substitute(depvar))
lapply(names(df)[which(names(df)!= depvar1)], function(x)
{mylogit = glm(formula(paste(depvar1,"~",x)), data = df, family = "binomial")
summary(mylogit)$coefficient}
)
}
#Run Function
univariate = unilogit(mydata, admit)
#Merge all the coefficients
final <- do.call(rbind, univariate)
#Make the table formatable
univList = cbind(data.frame(Variable = row.names(final)),final)
FinalList = subset(univList, Variable!="(Intercept)")
FinalList[,"Wald ChiSquare"] = FinalList[4]^2
FinalList[,"Rank"] = rank(-FinalList[6])
FinalList = FinalList[order(FinalList$Rank),]
unilogit2 = function(df,depvar, output) {
dummydt=data.frame(matrix(ncol=0,nrow=0))
depvar1 = deparse(substitute(depvar))
out = deparse(substitute(output))
xxxx = names(df)[which(names(df)!= depvar1)]
for (i in 1:length(xxxx)) {
mylogit = glm(formula(paste(depvar1,"~",xxxx[i])), data = df, family = "binomial")
coeff = data.frame(summary(mylogit)$coefficient)
if (i==1) {output = rbind(dummydt,coeff)}
else {output = rbind(output,coeff)}
assign(out,output, envir = .GlobalEnv)
}
}
unilogit2(mydata, admit, outtable)
is this for both continuous and categorical variables?
ReplyDeletecan you please explain which variable is most impacting .?
ReplyDelete