Skip Headers

SAS  SPSS  R-PROJECT  S-PLUS  PROC-SQL

통 계
프 로 그 램 비 교

Go to Documentation Home
HOME
Go to Book List
PRO_home
Go to Table of Contents
연구회
Go to Index
자료실
Go to Master Index
R-PROJECT
Go to Feedback page
MAIL

Go to previous page
Previous
Go to next page
Next

11. Aggregating Or Summarizing 데이터.


1. SAS
2. SPSS
3. R-PROJECT
4. S-PLUS
5. PROC SQL

 


1. SAS

MAIN

 

* 각 성별에 대한 q1의 평균을 구한다.;

PROC SUMMARY DATA=BACK.mydata MEAN NWAY;

     CLASS GENDER;

     VAR q1;

     OUTPUT OUT=myAgg;

RUN;

PROC PRINT; RUN;

OBS    gender    _TYPE_    _FREQ_    _STAT_       q1

  1      f          1         4       N        4.00000

  2      f          1         4       MIN      1.00000

  3      f          1         4       MAX      3.00000

  4      f          1         4       MEAN     2.00000

  5      f          1         4       STD      0.81650

  6      m          1         4       N        4.00000

  7      m          1         4       MIN      4.00000

  8      m          1         4       MAX      5.00000

  9      m          1         4       MEAN     4.50000

 10      m          1         4       STD      0.57735

 

DATA myAgg;

 SET myAgg;

     WHERE _STAT_='MEAN';

     KEEP gender q1;

RUN;

PROC PRINT; RUN;

OBS    gender     q1

 1       f       2.0

 2       m       4.5

 

* workshop와 성별을 기준으로 q1의 평균을 구한다.;

PROC SUMMARY DATA=mydata MEAN NWAY;

     CLASS WORKSHOP GENDER;

     VAR Q1;

     OUTPUT OUT=myAgg;

RUN;

PROC PRINT; RUN;

OBS    workshop    gender    _TYPE_    _FREQ_    _STAT_       q1

  1        1         f          3         2       N        2.00000

  2        1         f          3         2       MIN      1.00000

  3        1         f          3         2       MAX      2.00000

  4        1         f          3         2       MEAN     1.50000

  5        1         f          3         2       STD      0.70711

  6        1         m          3         2       N        2.00000

  7        1         m          3         2       MIN      4.00000

  8        1         m          3         2       MAX      5.00000

  9        1         m          3         2       MEAN     4.50000

 10        1         m          3         2       STD      0.70711

 11        2         f          3         2       N        2.00000

 12        2         f          3         2       MIN      2.00000

 13        2         f          3         2       MAX      3.00000

 14        2         f          3         2       MEAN     2.50000

 15        2         f          3         2       STD      0.70711

 16        2         m          3         2       N        2.00000

 17        2         m          3         2       MIN      4.00000

 18        2         m          3         2       MAX      5.00000

 19        2         m          3         2       MEAN     4.50000

 20        2         m          3         2       STD      0.70711

 

* 평균을 선택한다.;

DATA myAgg;

 SET myAgg;

     WHERE _STAT_='MEAN';

     KEEP workshop gender q1;

     RENAME q1=meanQ1;

RUN;

PROC PRINT; RUN;

mean

OBS    workshop    gender     Q1

 1         1         f        1.5

 2         1         m        4.5

 3         2         f        2.5

 4         2         m        4.5

 

* mydata에 집계된 데이터를 Merge한다.;

PROC SORT DATA=BACK.mydata out=mydata;

     BY workshop gender;

RUN;

PROC SORT DATA=myAgg;

     BY workshop gender;

RUN;

DATA SASUSER.mydata2;

 MERGE mydata myAgg;

     BY workshop gender;

run;

PROC PRINT; RUN;

mean

OBS    id    workshop    gender    q1    q2    q3    q4     Q1

 1      1        1         f        1     1     5     1     1.5

 2      3        1         f        2     2     4     3     1.5

 3      5        1         m        4     5     2     4     4.5

 4      7        1         m        5     3     4     4     4.5

 5      2        2         f        2     1     4     1     2.5

 6      4        2         f        3     1     .     3     2.5

 7      6        2         m        5     4     5     5     4.5

 8      8        2         m        4     5     5     5     4.5



2. SPSS

MAIN

 

* SPSS Program for Aggregating/Summarizing Data.

* Get mean of q1 by gender.

GET FILE='C:\mydata.sav'.

AGGREGATE

  /OUTFILE='C:\myAgg.sav'

  /BREAK=gender

  /q1_mean = MEAN(q1).

GET FILE='C:\myAgg.sav'.

LIST.

EXECUTE.

 

* Get mean of q1 by workshop and gender.

GET FILE='C:\mydata.sav'.

AGGREGATE

  /OUTFILE='C:\myAgg.sav'

  /BREAK=workshop gender

  /q1_mean = MEAN(q1).

GET FILE='C:\myAgg.sav'.

LIST.

EXECUTE.

 

* Merge aggregated data back into mydata.

GET FILE='C:\mydata.sav'.

SORT CASES BY  workshop (A) gender (A) .

MATCH FILES /FILE=*

 /TABLE='C:\myAgg.sav'

 /BY workshop gender.

SAVE OUTFILE='C:\mydata.sav'.

EXECUTE.

 


3. R-PROJECT

MAIN

* Aggregating / Summarizing 데이터를 위한 R-Project 프로그램.

* [참 고]  RESHAPE 라이브러리

 

load(file="c:\\mydata.Rdata")

print(mydata)

attach(mydata)

 

# 필요한 패키지 로드. 필요시 사전에 인스톨 해야한다.

library(Hmisc)

library(reshape)

 

# Aggregate함수는 R-Project에서 만들어진 함수이다.

# Aggregate함수는 변수에 대하여 새 변수명을 생성한다.

# Gender는 단일 오브젝트임에 불구하고 List함수내에 들어가 있다.

myAgg<-aggregate(q1, by=list(gender), FUN=mean, na.rm=TRUE)

print(myAgg)

Group.1   x

1       f 2.0

2       m 4.5

 

# workshop과 gender를 기준으로 집계.

myAgg<-aggregate(q1, by=list(workshop,gender), FUN=mean, na.rm=TRUE)

print(myAgg)

Group.1 Group.2   x

1       1       f 1.5

2       2       f 2.5

3       1       m 4.5

4       2       m 4.5

 

# Summarize함수는 Hmisc라이브러리에 존재.

# Summarize함수는 원 변수명을 유지하고, 라벨 존재시 라벨 역시 유지한다.

library(Hmisc)

# 성별에 대하여.

myAgg<-summarize(q1, by=gender, FUN=mean, na.rm=TRUE)

print(myAgg)

gender  q1

1      f 2.0

2      m 4.5

 

# Workshop과 gender에 대하여 Summary

myAgg<-summarize(q1, by=llist(workshop,gender), FUN=mean, na.rm=TRUE)

print(myAgg)

workshop gender  q1

1        1      f 1.5

2        1      m 4.5

3        2      f 2.5

4        2      m 4.5

 

# Mydata에 집계된 값을 Merge한다.

# 먼저, q1변수명을 mean.q1으로 변경.

myAgg<-rename(myAgg, c(q1="mean.q1"))

print(myAgg)

 workshop gender mean.q1

1        1      f     1.5

2        1      m     4.5

3        2      f     2.5

4        2      m     4.5

 

# Mydata에 위의 값을 Merge한다.

mydata2<-merge(mydata,myAgg,by=c("workshop","gender") )

print(mydata2)

workshop gender q1 q2 q3 q4 mean.q1

1        1      f  1  1  5  1     1.5

2        1      f  2  2  4  3     1.5

3        1      m  4  5  2  4     4.5

4        1      m  5  3  4  4     4.5

5        2      f  2  1  4  1     2.5

6        2      f  3  1 NA  3     2.5

7        2      m  5  4  5  5     4.5

8        2      m  4  5  5  5     4.5

 


4. S-PLUS

MAIN

 

mydata<-read.table ("c:/data/mydata.csv",header=TRUE,

  sep=",",row.names="id")

print(mydata)

 

attach(mydata)

 

# Aggregate함수는 변수에 대하여 새 변수명을 생성한다.

# Gender는 단일 오브젝트임에 불구하고 List함수내에 들어가 있다.

myAgg<-aggregate(q1, by=list(gender), FUN=mean, na.rm=TRUE)

print(myAgg)

 Group.1   x

f       f 2.0

m       m 4.5

 

# workshop과 gender를 기준으로 집계.

myAgg<-aggregate(q1, by=list(workshop,gender), FUN=mean, na.rm=TRUE)

print(myAgg)

Group.1 Group.2   x

1       1       f 1.5

2       2       f 2.5

3       1       m 4.5

4       2       m 4.5

 

names(myAgg)[names(myAgg)=="Group.1"] <-"workshop"

names(myAgg)[names(myAgg)=="Group.2"] <-"gender"

names(myAgg)[names(myAgg)=="x"] <-"mean.q1"

print(myAgg)

workshop gender mean.q1

1        1      f     1.5

2        2      f     2.5

3        1      m     4.5

4        2      m     4.5

 

# Mydata에 집계된 데이터 Merge.

mydata2<-merge(mydata,myAgg,by=c("workshop","gender") )

print(mydata2)

workshop gender q1 q2 q3 q4 mean.q1

1        1      f  1  1  5  1     1.5

2        1      f  2  2  4  3     1.5

3        1      m  4  5  2  4     4.5

4        1      m  5  3  4  4     4.5

5        2      f  2  1  4  1     2.5

6        2      f  3  1 NA  3     2.5

7        2      m  5  4  5  5     4.5

8        2      m  4  5  5  5     4.5

 


5. PROC SQL

MAIN

proc sql;

  create table myAgg as

    select GENDER,

           MEAN(q1) as q1_mean

    from   BACK.mydata

    group by GENDER;

  select * from myAgg;

quit;

gender     q1_mean

 ------------------

 f                2

 m              4.5

 

proc sql;

  create table myAgg as

    select WORKSHOP,

           GENDER,

           MEAN(q1) as q1_mean

    from   BACK.mydata

    group by 1,2;

  select * from myAgg;

quit;

workshop  gender     q1_mean

 ----------------------------

        1  f              1.5

        1  m              4.5

        2  f              2.5

        2  m              4.5

 

* Sub-Query를 이용하여 평균을 가로 결합;

proc sql;

  create table myAgg as

    select a.*,b.q1_mean

    from   BACK.mydata a , ( select workshop,gender,

                                    mean(q1) as q1_mean

                             from   BACK.mydata

                             group by workshop,2 ) b

    where  a.workshop = b.workshop

    and    a.gender   = b.gender;

  select * from myAgg;

quit;

id  workshop  gender          q1        q2        q3        q4   q1_mean

------------------------------------------------------------------------

 1         1  f                1         1         5         1       1.5

 2         2  f                2         1         4         1       2.5

 3         1  f                2         2         4         3       1.5

 4         2  f                3         1         .         3       2.5

 5         1  m                4         5         2         4       4.5

 6         2  m                5         4         5         5       4.5

 7         1  m                5         3         4         4       4.5

 8         2  m                4         5         5         5       4.5