Skip Headers

EXPLORE

E-MINER PROGRAM (마이닝)

Go to Documentation Home
HOME
Go to Book List
Miner_home
Go to Table of Contents
연구회
Go to Index
자료실
Go to Master Index
SAS
Go to Feedback page
MAIL

Go to previous page
Previous
Go to next page
Next

6-2. 카이제곱 통계량을 이용한 변수 선택(Variable Selection)


* 일반적으로 대용량 데이터에는 기존의 조사나 실험에 의한 자료에서 발생하는 변수의 수와는 비교할수 없을 정도로 변수가 많다. 때로는 데이터의 용량이 컴퓨터의 처리 능력을 넘어서는 경우도 있고, 모형화에 시간이 너무 많이 소요되어 이를 기다릴수 없는 경우도 있다. 이러한 경우 Variable Selection 노드는 쓸모있는 변수들을 빠르게 선택하여 준다.


1. Proc DMINE
2. 데이터 SCORING(메타 샘플데이터)
3. 데이터 SCORING(뷰 데이터)

1. PROC DMSPLIT

MAIN

 

%let DM_SEED=12345;

data EMDATA.VIEW_6HE / view=EMDATA.VIEW_6HE;
 set EMSAMPLE.HMEQ;
run;

* 메타 샘플데이터 생성(Input data source);
/*
data SMPKOMRN(label="Sample of EMDATA.VIEW_P25.");
 set EMSAMPLE.HMEQ nobs = total;
     drop _sample_count_;
     if _sample_count_ < 2000 then do;
        if ranuni(12345) *(total + 1 - _N_ ) < = ( 2000 - _sample_count_) then do ;
           _sample_count_ + 1;
           output;
        end;
     end;
run;
quit;
*/


data EMPROJ.SMP_VIGA(label="Sample of EMDATA.VIEW_4O9.");
 set EMDATA.VIEW_6HE;
     drop _sample_count_;
     if _sample_count_ < 2000 then do;
        if ranuni(12345) *(5960 + 1 - _N_ ) < = ( 2000 - _sample_count_) then do ;
           _sample_count_ + 1;
           output;
        end;
     end;
run;
quit;

proc dmdb data = EMDATA.VIEW_6HE
          out = _null_
          dmdbcat= EMPROJ.dm_DGM00000
          normlen=32 maxlevel=513;
     class BAD(Desc) REASON(Asc) JOB(Asc);
     var LOAN MORTDUE VALUE YOJ DEROG
         DELINQ CLAGE NINQ CLNO DEBTINC;
     target BAD;
run;

* Create data view with dmdb name;
data EMDATA.dm_DGM00000 / view=EMDATA.dm_DGM00000;
 set EMDATA.VIEW_6HE;
run;

proc dmsplit data=EMDATA.VIEW_6HE
             dmdbcat=EMPROJ.dm_DGM00000
             bins=50 chisq=3.84 passes=6
             outvars = EMPROJ.OUTVMEFV;
     var LOAN MORTDUE VALUE REASON JOB
         YOJ DEROG DELINQ CLAGE NINQ
     CLNO DEBTINC;
     target BAD;
run;

proc split dmdbcat=EMPROJ.dm_DGM00000
           indmsplit

           subtree=largest
           outtree=EMPROJ.OUTTWA1U;
run;


맨 위로 이동 맨 위로 이동


2. 데이터 SCORING(메타 샘플데이터)

MAIN

* 메타 샘플데이터;

data EMPROJ.SMP_4VMK/view=EMPROJ.SMP_4VMK;
 set EMPROJ.SMP_VIGA;
     *-------------------------------------------------------------*;
     * TOOL : Variable Selection ;
     * TYPE : EXPLORE ;
     * NODE : Variable Selection [T1MBZHEP] ;
     *-------------------------------------------------------------*;

     length _warn_ $ 4;
     label _warn_ = "Warnings";
     length _norm1 $ %DMNORLEN;
     _norm1 = ' ';
     length _FORMAT $200;
     drop _FORMAT;
     length I_BAD $ 1;
     /*-- DMSPLIT TREE LOGIC --*/
     _k = 0;
     if DELINQ = . then _k = 0;
     else if DELINQ < 0.6 then _k = 0;
     else _k = 1;

     if _k = 0 then do;
        if DEBTINC = . then _k = 0;
        else if DEBTINC < 45.1377821000916 then _k = 0;
        else _k = 1;
        if _k = 0 then do;
           if DEBTINC = . then _k = 1;
           else if DEBTINC < 33.5383285500796 then _k = 0;
           else _k = 1;
           if _k = 0 then do;
              if VALUE = . then _k = 1;
              else if VALUE < 24958.18 then _k = 0;
              else _k = 1;
              if _k = 0 then do;
                 _FORMAT = put(REASON, $7.);
                 %DMNORMCP(_FORMAT, _norm1);
                 select(_norm1);
                   when('') _k = 0;
                   when('DEBTCON') _k = 1;
                   when('HOMEIMP') _k = 0;
                   otherwise do;
                     _warn_ = 'U';
                     goto T1MBZHEP;
                   end;
                 end;
                 if _k = 0 then do;
                    _pval = 0;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 1;
                    goto T1MBZHEP;
                 end;
              end;
              else do;
                 if YOJ = . then _k = 1;
                 else if YOJ < 5.74 then _k = 0;
                 else _k = 1;
                 if _k = 0 then do;
                    _pval = 0.92821368948247;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 0.9813829787234;
                    goto T1MBZHEP;
                 end;
              end;
           end;
           else do;
             if DEBTINC = . then _k = 0;
             else if DEBTINC < 34.0023066920801 then _k = 0;
             else _k = 1;
             if _k = 0 then do;
                if CLAGE = . then _k = 0;
                else if CLAGE < 186.917369751142 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.46215780998389;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0.74113475177304;
                   goto T1MBZHEP;
                end;
             end;
             else do;
                if DEROG = . then _k = 0;
                else if DEROG < 2 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.94378698224852;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0.63157894736842;
                   goto T1MBZHEP;
                end;
             end;
           end;
        end;
        else do;
           if CLAGE = . then _k = 0;
           else if CLAGE < 233.646712188928 then _k = 0;
           else _k = 1;
           if _k = 0 then do;
              _pval = 0;
              goto T1MBZHEP;
           end;
           else do;
              if YOJ = . then _k = 1;
              else if YOJ < 5.74 then _k = 0;
              else _k = 1;
              if _k = 0 then do;
                 _pval = 0;
                 goto T1MBZHEP;
              end;
              else do;
                 _pval = 1;
                 goto T1MBZHEP;
              end;
           end;
        end;
     end;
     else do;
        if DEBTINC = . then _k = 1;
        else if DEBTINC < 32.9705231315475 then _k = 0;
        else _k = 1;
        if _k = 0 then do;
           if DEBTINC = . then _k = 1;
           else if DEBTINC < 7.01370399865341 then _k = 0;
           else _k = 1;
           if _k = 0 then do;
              _pval = 0;
              goto T1MBZHEP;
           end;
           else do;
              if DEROG = . then _k = 0;
              else if DEROG < 1 then _k = 0;
              else _k = 1;
              if _k = 0 then do;
                 if DELINQ = . then _k = 0;
                 else if DELINQ < 5.208 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.90277777777777;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0;
                   goto T1MBZHEP;
                end;
             end;
             else do;
                if DELINQ = . then _k = 0;
                else if DELINQ < 2.04 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.77777777777777;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0;
                   goto T1MBZHEP;
                end;
             end;
          end;
       end;
       else do;
          if DEBTINC = . then _k = 0;
          else if DEBTINC < 36.3773556427399 then _k = 0;
          else _k = 1;
          if _k = 0 then do;
             if DEBTINC = . then _k = 0;
             else if DEBTINC < 33.7881629342337 then _k = 0;
             else _k = 1;
             if _k = 0 then do;
                if DEBTINC = . then _k = 1;
                else if DEBTINC < 33.7554573421262 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.85714285714285;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0.18162393162393;
                   goto T1MBZHEP;
                end;
             end;
             else do;
                if DELINQ = . then _k = 0;
                else if DELINQ < 4.056 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.84946236559139;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0;
                   goto T1MBZHEP;
                end;
             end;
          end;
          else do;
             if DEBTINC = . then _k = 0;
             else if DEBTINC < 43.0547473646769 then _k = 0;
             else _k = 1;
             if _k = 0 then do;
                if DELINQ = . then _k = 0;
                else if DELINQ < 4.056 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.85714285714285;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0;
                   goto T1MBZHEP;
                end;
             end;
             else do;
                if DEBTINC = . then _k = 0;
                else if DEBTINC < 46.2598953912067 then _k = 0;
                else _k = 1;
                if _k = 0 then do;
                   _pval = 0.42307692307692;
                   goto T1MBZHEP;
                end;
                else do;
                   _pval = 0;
                   goto T1MBZHEP;
                end;
             end;
          end;
       end;
     end;

     T1MBZHEP : /* terminal node */
       if _pval < 0 then _pval = 1;
       else if _pval > 1 then _pval = 0;
       else _pval = 1-_pval;
       /* binary target */
     
if _warn_ ne 'U' then do;
          P_EVENT = _pval;
          P_NEVENT = 1-P_EVENT;
          if P_EVENT ge 0.5 then I_BAD = '1';
          else I_BAD = '0';
       end;
       else do;
          P_EVENT = 0.1994966442953;
          P_NEVENT = 0.80050335570469;
          I_BAD = '0';
       end;
       drop _pval _k _norm1;
       label P_EVENT = "Predicted: EVENT for BAD";
       label P_NEVENT = "Predicted: NO EVENT for BAD";
       label I_BAD = "Into: BAD";
run;


맨 위로 이동 맨 위로 이동


3. 데이터 SCORING(뷰 데이터)

MAIN

data EMDATA.VSELPF0L/view=EMDATA.VSELPF0L;
 set EMDATA.VIEW_6HE;
     *-------------------------------------------------------------*;
     * TOOL : Variable Selection ;
     * TYPE : EXPLORE ;
     * NODE : Variable Selection [T1MBZHEP] ;
     *-------------------------------------------------------------*;

     length _warn_ $ 4;
     label _warn_ = "Warnings";
     length _norm1 $ %DMNORLEN;
     _norm1 = ' ';
     length _FORMAT $200;
     drop _FORMAT;
     length I_BAD $ 1;

     /*-- DMSPLIT TREE LOGIC --*/
     _k = 0;
     if DELINQ = . then _k = 0;
     else if DELINQ < 0.6 then _k = 0;
     else _k = 1;

     if _k = 0 then do;
        if DEBTINC = . then _k = 0;
        else if DEBTINC < 45.1377821000916 then _k = 0;
        else _k = 1;
        if _k = 0 then do;
           if DEBTINC = . then _k = 1;
           else if DEBTINC < 33.5383285500796 then _k = 0;
           else _k = 1;
           if _k = 0 then do;
              if VALUE = . then _k = 1;
              else if VALUE < 24958.18 then _k = 0;
              else _k = 1;
              if _k = 0 then do;
                 _FORMAT = put(REASON, $7.);
                 %DMNORMCP(_FORMAT, _norm1);
                 select(_norm1);
                    when('') _k = 0;
                    when('DEBTCON') _k = 1;
                    when('HOMEIMP') _k = 0;
                    otherwise do;
                       _warn_ = 'U';
                       goto T1MBZHEP;
                    end;
                 end;
                 if _k = 0 then do;
                    _pval = 0;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 1;
                    goto T1MBZHEP;
                 end;
              end;
              else do;
                 if YOJ = . then _k = 1;
                 else if YOJ < 5.74 then _k = 0;
                 else _k = 1;
                 if _k = 0 then do;
                    _pval = 0.92821368948247;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 0.9813829787234;
                    goto T1MBZHEP;
                 end;
              end;
           end;
           else do;
              if DEBTINC = . then _k = 0;
              else if DEBTINC < 34.0023066920801 then _k = 0;
              else _k = 1;
              if _k = 0 then do;
                 if CLAGE = . then _k = 0;
                 else if CLAGE < 186.917369751142 then _k = 0;
                 else _k = 1;
                 if _k = 0 then do;
                    _pval = 0.46215780998389;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 0.74113475177304;
                    goto T1MBZHEP;
                 end;
              end;
              else do;
                 if DEROG = . then _k = 0;
                 else if DEROG < 2 then _k = 0;
                 else _k = 1;
                 if _k = 0 then do;
                    _pval = 0.94378698224852;
                    goto T1MBZHEP;
                 end;
                 else do;
                    _pval = 0.63157894736842;
                    goto T1MBZHEP;
                 end;
              end;
           end;
        end;
        else do;
            if CLAGE = . then _k = 0;
            else if CLAGE < 233.646712188928 then _k = 0;
            else _k = 1;
            if _k = 0 then do;
               _pval = 0;
               goto T1MBZHEP;
            end;
            else do;
               if YOJ = . then _k = 1;
               else if YOJ < 5.74 then _k = 0;
               else _k = 1;
               if _k = 0 then do;
                  _pval = 0;
                  goto T1MBZHEP;
               end;
               else do;
                  _pval = 1;
                  goto T1MBZHEP;
               end;
            end;
         end;
      end;
      else do;
         if DEBTINC = . then _k = 1;
         else if DEBTINC < 32.9705231315475 then _k = 0;
         else _k = 1;

         if _k = 0 then do;
            if DEBTINC = . then _k = 1;
            else if DEBTINC < 7.01370399865341 then _k = 0;
            else _k = 1;
            if _k = 0 then do;
               _pval = 0;
               goto T1MBZHEP;
            end;
            else do;
               if DEROG = . then _k = 0;
               else if DEROG < 1 then _k = 0;
               else _k = 1;
               if _k = 0 then do;
                  if DELINQ = . then _k = 0;
                  else if DELINQ < 5.208 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.90277777777777;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0;
                     goto T1MBZHEP;
                  end;
               end;
               else do;
                  if DELINQ = . then _k = 0;
                  else if DELINQ < 2.04 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.77777777777777;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0;
                     goto T1MBZHEP;
                  end;
               end;
            end;
         end;
         else do;
            if DEBTINC = . then _k = 0;
            else if DEBTINC < 36.3773556427399 then _k = 0;
            else _k = 1;
            if _k = 0 then do;
               if DEBTINC = . then _k = 0;
               else if DEBTINC < 33.7881629342337 then _k = 0;
               else _k = 1;
               if _k = 0 then do;
                  if DEBTINC = . then _k = 1;
                  else if DEBTINC < 33.7554573421262 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.85714285714285;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0.18162393162393;
                     goto T1MBZHEP;
                  end;
               end;
               else do;
                  if DELINQ = . then _k = 0;
                  else if DELINQ < 4.056 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.84946236559139;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0;
                     goto T1MBZHEP;
                  end;
               end;
            end;
            else do;
               if DEBTINC = . then _k = 0;
               else if DEBTINC < 43.0547473646769 then _k = 0;
               else _k = 1;
               if _k = 0 then do;
                  if DELINQ = . then _k = 0;
                  else if DELINQ < 4.056 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.85714285714285;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0;
                     goto T1MBZHEP;
                  end;
               end;
               else do;
                  if DEBTINC = . then _k = 0;
                  else if DEBTINC < 46.2598953912067 then _k = 0;
                  else _k = 1;
                  if _k = 0 then do;
                     _pval = 0.42307692307692;
                     goto T1MBZHEP;
                  end;
                  else do;
                     _pval = 0;
                     goto T1MBZHEP;
                  end;
               end;
            end;
         end;
      end;
 

      T1MBZHEP : /* terminal node */
       
if _pval < 0 then _pval = 1;
          else if _pval > 1 then _pval = 0;
          else _pval = 1-_pval;

          /* binary target */
          if _warn_ ne 'U' then do;
             P_EVENT = _pval;
             P_NEVENT = 1-P_EVENT;
             if P_EVENT ge 0.5 then I_BAD = '1';
             else I_BAD = '0';
          end;
          else do;
             P_EVENT = 0.1994966442953;
             P_NEVENT = 0.80050335570469;
             I_BAD = '0';
          end;
          drop _pval _k _norm1;
          label P_EVENT = "Predicted: EVENT for BAD";
          label P_NEVENT = "Predicted: NO EVENT for BAD";
          label I_BAD = "Into: BAD";
run;


맨 위로 이동 맨 위로 이동