Skip Headers

MODEL

E-MINER PROGRAM (마이닝)

Go to Documentation Home
HOME
Go to Book List
Miner_home
Go to Table of Contents
연구회
Go to Index
자료실
Go to Master Index
SAS
Go to Feedback page
MAIL

Go to previous page
Previous
Go to next page
Next

2-2. 회귀나무(Regression Tree)


* 목표변수가 연속형인 경우는 목표변수의 평균(mean)과 표준편차(standard deviation)에 기초하여 마디의 분리가 일어나며, 이를 회귀나무(Regression Tree)라 한다.


 

 * 메타 샘플데이터 생성(Input data source);
%let DM_SEED = 12345;

data EMDATA.VIEW_CCM / view=EMDATA.VIEW_CCM;
 set EMSAMPLE.WAGES;
run;

data EMPROJ.SMP_VIYF /view=EMPROJ.SMP_VIYF;
 set EMSAMPLE.WAGES;
run;

%let seed = 12345;
data EMDATA.TRN4FC47
     EMDATA.VALNCP5A;
     drop _c00:;
 set EMDATA.VIEW_CCM;
     if (1084 +1-_n_)*ranuni(12345) <= (759 - _c000001) then do;
        _c000001 + 1;
        output EMDATA.TRN4FC47;
     end;
     else do;
        _c000002 + 1;
        output EMDATA.VALNCP5A;
     end;
run;

* Create data view with dmdb name;
data EMDATA.dm_DGM00000 / view=EMDATA.dm_DGM00000;
 set EMDATA.TRN4FC47;
run;

proc split data=EMDATA.TRN4FC47
           outtree=EMPROJ.TREEIE71
           nodesample    = 759 /* NODESAMPLE - 관측치개수*/
           splitsize     = 7 /* NODESAMPLE/100 */
           leafsize      = 5

           NRULES        = 5

           validata      = EMDATA.VALNCP5A
           OUTAFDS       = afdsdata
           OUTIMPORTANCE = IMPORTANCEdata
           OUTLEAF       = leafdata
           OUTMATRIX     = matrixdata
           OUTSEQ        = subtree
           OUTNODES      = NODESdata
           OUTRULES      = RULESdata
           OUTTIME       = timedata;
     input SOUTH  NONWH HISPANIC GENDER   MARRIED MARRFEM UNION MANUF
           CONSTR MANAG SALES    CLERICAL SERVICE PROF    CPS85/level=binary;
     input EDUC EXPER EXPERSQ AGE/level=interval;
     target LNWAGE/level=interval;
run;

proc split intree   = EMPROJ.TREEIE71
           data     = emdata.dm_DGM00000
           outtree  = EMPROJ.TREEO89Z
           validata = EMDATA.VALNCP5A;;
run;

proc split intree = EMPROJ.TREEO89Z;
           score data   = EMDATA.TRN4FC47
                 out    = _NULL_
                 outfit = EMPROJ.TNFTY39P
                 role   = TRAIN;
run;

proc split intree = EMPROJ.TREEO89Z;
     score data = EMDATA.VALNCP5A
           out  = EMPROJ._A00000X(keep=P_: D_: _WARN_ EP_: BP_:
                                       CP_: EL_: BL_: CL_: IC_:
                                       LNWAGE)
           outfit = EMPROJ.VLFT4E7M
           role = VALID;
run;

*********************************************************************;
data EMPROJ._000assx;
 set EMPROJ._A00000X;
     if _n_ = 2 then stop;
run;

data EMPROJ._A00000Y(keep= P_: LNWAGE);
 set EMPROJ._A00000X end=_lastobn;
     length _Ftarget $%DMNORLEN;
     _Ftarget = left(trim(upcase(put( LNWAGE, BEST12.))));
     if (_Ftarget = '' or _Ftarget = '.') and (NOT _lastobn) then delete;
     _ncount + 1;
run;

data EMPROJ.tscr;
 set EMPROJ._A00000X;
     if _n_ > 1 then stop;
run;

data EMPROJ._000tmp1(keep=LNWAGE P_LNWAGE _err )
     EMPROJ._000tmp2(keep=_at_max _pt_max _err_max
                          _at_min _pt_min _err_min);
 set EMPROJ._A00000Y end=last_obn;
     retain _at_max _pt_max _err_max
            _at_min _pt_min _err_min;

     if _at_max = . or _pt_max = . then do;
        if LNWAGE ^= . and P_LNWAGE ^=. then do;
           _at_max = LNWAGE;
           _pt_max = P_LNWAGE;
           _err_max = LNWAGE-P_LNWAGE;
           _at_min = LNWAGE;
           _pt_min = P_LNWAGE;
           _err_min = LNWAGE-P_LNWAGE;
        end;
     end;

     if LNWAGE ^= . and P_LNWAGE ^=. then do;
        if LNWAGE > _at_max then _at_max = LNWAGE;
        else if LNWAGE < _at_min then _at_min = LNWAGE;

        if P_LNWAGE > _pt_max then _pt_max = P_LNWAGE;
        else if P_LNWAGE < _pt_min then _pt_min = P_LNWAGE;

        _err = LNWAGE-P_LNWAGE;
        if _err > _err_max then _err_max = _err;
        else if _err < _err_min then _err_min = _err;

        output EMPROJ._000tmp1;
     end;

     if last_obn then output EMPROJ._000tmp2;
run;

data EMPROJ._000tp3a(keep=_at _pt )
     EMPROJ._000tp3e(keep=_err _pt );
 set EMPROJ._000tmp1;
     _at = ceil((LNWAGE-0.3567)*20/3.4388);
     if _at = 0 then _at = 1;

     _pt = ceil((P_LNWAGE-0.3567)*20/3.4388);
     if _pt = 0 then _pt = 1;

     _err = ceil((_err--1.19349393939393)*20/3.30504700061842);
     if _err = 0 then _err = 1;
run;

proc freq data=EMPROJ._000tp3a noprint;
     table _at * _pt / out=EMPROJ._A00000O;
quit;

data EMPROJ._000tmp4;
     count = 0; percent = 0;
     format _at _pt 2.;
     do _at = 1 to 20;
        do _pt = 1 to 20;
           output;
        end;
     end;
run;

data EMPROJ._A00000O;
 update EMPROJ._000tmp4 EMPROJ._A00000O;
     by _at _pt;
run;

data EMPROJ._A00000O;
 set EMPROJ._A00000O;
     format _at best8.1 _pt best8.1;
     _at = (_at-1) * 0.17194 + (0.44267);
     _pt = (_pt-1) * 0.17194 + (0.44267);
run;

proc freq data=EMPROJ._000tp3e noprint;
     table _err * _pt / out=EMPROJ._A00000P;
quit;

data EMPROJ._000tmp4;
     count = 0; percent = 0;
     format _err _pt 2.;
     do _err = 1 to 20;
        do _pt = 1 to 20;
           output;
        end;
     end;
run;

data EMPROJ._A00000P;
 update EMPROJ._000tmp4 EMPROJ._A00000P;
     by _err _pt;
run;

data EMPROJ._A00000P;
 set EMPROJ._A00000P;
     format _pt best8.1 _err best8.1;
     _pt = (_pt-1) * 0.17194 + (0.44267);
     _err = (_err-1) * 0.16525235003092 + (-1.11086776437847);
run;


맨 위로 이동 맨 위로 이동


Go to previous page
Previous
Go to next page
Next
2007년 백승민 제작하였답니다. 
(http://cafe.daum.net/statsas , http://statwith.pe.kr)
Go to Documentation Home
HOME
Go to Book List
Miner_home
Go to Table of Contents
연구회
Go to Index
자료실
Go to Master Index
SAS
Go to Feedback page
MAIL