직업 데이터를 코드화 하기
setwd("C:/challenge_data")
data_set <- read.csv("Data_set.csv", header=TRUE)
table(data_set$MATE_OCCP_NAME_G)
occp_name_to_code <- function(row) {
if(row=="1차산업 종사자")
row = 1
else if(row=="2차산업 종사자")
row = 2
else if(row=="3차산업 종사자")
row = 3
else if(row=="*")
row = 4
else if(row=="고소득 전문직")
row = 5
else if(row=="공무원")
row = 6
else if(row=="기업/단체 임원")
row = 7
else if(row=="기타")
row = 8
else if(row=="단순 노무직")
row = 9
else if(row=="단순 사무직")
row = 10
else if(row=="사무직")
row = 11
else if(row=="예체능계 종사자")
row = 12
else if(row=="운전직")
row = 13
else if(row=="자영업")
row = 14
else if(row=="전문직")
row = 15
else if(row=="주부")
row = 16
else if(row=="학생")
row = 17
else
row = 18
}
data_set$MATE_OCCP_NAME_G <- sapply(data_set$MATE_OCCP_NAME_G, occp_name_to_code)
data_set$MATE_OCCP_NAME_G <- unlist(data_set$MATE_OCCP_NAME_G)
table(data_set$MATE_OCCP_NAME_G)
data_set$OCCP_NAME_G <- sapply(data_set$OCCP_NAME_G, occp_name_to_code)
data_set$OCCP_NAME_G <- unlist(data_set$OCCP_NAME_G)
table(data_set$OCCP_NAME_G)
LT1Y_PEOD_RATE 데이터를 코드화 하기
table(data_set$LT1Y_PEOD_RATE)
lt1y_peod_rate_to_code <- function(row) {
if(row=="10미만")
row = 10
else if(row=="20미만")
row = 20
else if(row=="30미만")
row = 30
else if(row=="40미만")
row = 40
else if(row=="50미만")
row = 50
else if(row=="60미만")
row = 60
else if(row=="90미만")
row = 90
else if(row=="90이상")
row = 100
else
row = 0
}
data_set$LT1Y_PEOD_RATE <- sapply(data_set$LT1Y_PEOD_RATE, lt1y_peod_rate_to_code)
data_set$LT1Y_PEOD_RATE <- unlist(data_set$LT1Y_PEOD_RATE)
table(data_set$LT1Y_PEOD_RATE)
멤버십 데이터를 코드화 하기
#SKT Memgership grade to code
table(data_set$TEL_MBSP_GRAD)
tel_mgsp_grad_to_code <- function(row) {
if(row=="E")
row = 1
else if(row=="Q")
row = 2
else if(row=="R")
row = 3
else if(row=="W")
row = 4
else
row = 0
}
data_set$TEL_MBSP_GRAD <- sapply(data_set$TEL_MBSP_GRAD, tel_mgsp_grad_to_code)
data_set$TEL_MBSP_GRAD <- unlist(data_set$TEL_MBSP_GRAD)
table(data_set$TEL_MBSP_GRAD)
table(data_set$CBPT_MBSP_YN)
yn_to_10 <- function(row) {
if(row=="Y")
row = 1
else if(row=="N")
row = 0
else
row = 2
}
data_set$CBPT_MBSP_YN <- sapply(data_set$CBPT_MBSP_YN, yn_to_10)
data_set$CBPT_MBSP_YN <- unlist(data_set$CBPT_MBSP_YN)
table(data_set$CBPT_MBSP_YN)
|