자바전문가그룹

글번호 678
작성자 heojk
작성일 2017-06-07 11:47:41
제목 R_Preview.R
내용
R Preview
dim(iris)       #데이터 차원 확인
names(iris)       #변수 이름 또는 컬럼의 이름 확인
str(iris)       #Structure
attributes(iris)    #attributes
summary(iris)   #평균, 표준편차, 사분위수 등 기본통계량 출력
var(iris$Sepal.Length)    #Sepal.Length의 Variance
cov(iris$Sepal.Length, iris$Petal.Length)   #Covariance of two variables
cor(iris$Sepal.Length, iris$Petal.Length)   #Correlation of two variables
pie(table(iris$Species))    #Pie Chart
hist(iris$Sepal.Length)     #histogram
plot(density(iris$Sepal.Length))  #Density
plot(iris$Sepal.Length, iris$Sepal.Width)   #산점도
plot(iris)      #Pair plot
pairs(iris)       #Pair plot
 
sort(iris$Sepal.Width, decreasing=TRUE)
order(iris$Sepal.Width, decreasing=TRUE)
temp <- iris[order(-iris$Sepal.Length, iris$Sepal.Width),]
head(temp)
 
#데이터셋 결합
rbind(iris[1, ], iris[7, ])       # 행으로 결합
head(cbind(iris, newVar=1:50))        # 열로 결합
dataA <- data.frame(name=c("a", "b", "c"), age=c(22, 25, 30))
dataA
dataB <- data.frame(name=c("a", "c"), weight=c(80, 75))
dataB
merge(dataA, dataB)       # 공통 값을 기준으로 결합, a와 c의 age, weight만 출력됨
merge(dataA, dataB, all=T)        # 전체 결합, 값이 없는 경우 NA 사용, b의 age는 NA
 
#데이터셋 분할
head(iris, 10)        # 처음 10개의 데이터 추출
tail(iris, 10)        # 마지막 10개의 데이터 추출
subset(iris, iris$Species == "setosa")      # 조건을 만족하는 부분 데이터 셋 추출
subset(iris, iris$Sepal.Length > 5 & iris$Sepal.Length < 6)     #여러 조건을 만족하는 부분 데이터 셋 추출
subset(iris, select=Sepal.Length)     #해당 열만 선택
subset(iris, select=-Sepal.Length)      #해당 열을 제외하고 선택
subset(iris, select=c(Sepal.Length, Sepal.Width))   #여러 개 열 선택, -를 붙이면 해당 열 들을 제외하고 선택
iris[,(colnames(iris) %in% c("Sepal.Length", "Sepal.Width"))]   #여러 개 열 선택, !을 붙이면 해당 열 들을 제외하고 선택
 
#집계
table(iris$Species)       #종별 개수
aggregate(iris$Sepal.Length, by=list(iris$Species), sum)  
#table(data_claim$ACCI_DVSN, data_claim$DMND_RESN_CODE)   #두 개 이상으로 집계, 보험 데이터의 사고구분/청구사유 조합
 
#apply 함수 적용
apply(iris[,1:4], 2, mean)  #Sepal.Length Sepal.Width Petal.Length Petal.Width 평균을 출력
lapply(iris[,1:4], mean)   #Sepal.Length Sepal.Width Petal.Length Petal.Width 평균을 출력, 리스트로 반환
sapply(iris[,1:4], mean)   #Sepal.Length Sepal.Width Petal.Length Petal.Width 평균을 출력
sapply(iris$Sepal.Length, function(row) (round(row))) #Sepal.Length를 반올림
tapply(iris$Sepal.Length, iris$Species, mean) #종 별 Sepal.Length의 평균을 출력
 
#데이터 테이블
install.packages("data.table")
library(data.table)
irisTable <- data.table(iris)    # data.frame을 data.table로 변환
 
#key를 지정하고 key에 해당하는 데이터 추출
setkey(irisTable, Species)    # key 생성
key(irisTable )     # 생성된 key 확인
tables()
 
# irisTable 조회
irisTable["setosa", ]   # key의 값이 "setosa"인 데이터 추출
irisTable[2]      # 2번째 행
irisTable[, 2, with=FALSE]              # 2번째 열
irisTable[, Sepal.Length]   # Sepal.Length 열 vector 반환
irisTable[, list(Sepal.Length)]   # Sepal.Length 열 data.table 반환
irisTable[2:3, sum(Sepal.Length)]     # 2,3행 Sepal.Length 열의 합
 
#iris 데이터에서 종별 꽃잎의 길이와 너비 평균을 출력.
cbind(tapply(iris$Petal.Length, iris$Species, mean), tapply(iris$Petal.Length, iris$Species, sd)) #apply 이용
irisTable[, list(lengthAvg=mean(Petal.Length), widthAvg=mean(Petal.Width)), by=Species] #data.table 이용
 
#iris 데이터에서 종별 꽃잎의 길이 평균과 표준편차를 출력.
cbind(tapply(iris$Petal.Length, iris$Species, mean), tapply(iris$Petal.Length, iris$Species, sd)) #apply 이용
irisTable[, list(avg=mean(Petal.Length), sd=sd(Petal.Length)), by=Species] #data.table 이용
첨부파일 R_Review.R (4,056byte)