/* SAS Example 5b: Logistic Regression on HATCO data */ /* Dependent Variable: X11 */ /* Independent Variables: Factor Scores of Value and Image */ OPTIONS PS=52; OPTIONS LS=78; OPTIONS NODATE; OPTIONS PAGENO=1; DATA PERCEPTION; INFILE 'A:/HATCO_SET.PRN'; INPUT X1-X14; LABEL X1 = 'Delivery Speed'; LABEL X2 = 'Price Level'; LABEL X3 = 'Price Flexibility'; LABEL X4 = 'Manufacturer Image'; LABEL X5 = 'Service'; LABEL X6 = 'Salesforce Image'; LABEL X7 = 'Product Quality'; LABEL X8 = 'Firm Size'; LABEL X9 = 'Usage Level'; LABEL X10 = 'Satisfaction Level'; LABEL X11 = 'Specification Buying'; LABEL X12 = 'Structure of Procurement'; LABEL X13 = 'Type of Industry'; LABEL X14 = 'Type of Buying Situation'; RUN; /* If you recall our initial run of the factor analysis program on the HATCO dataset, we excluded X5 from the solution. We obtain our solution for the other six perception variables as follows. */ PROC FACTOR CORR MSA SCREE ROTATE = VARIMAX; TITLE 'Factor Analysis on remaining six HATCO Perception Variables'; VAR X1-X4 X6 X7; RUN; /* We will first perform a logistic regression on the analysis sample, after constructing factor scores. We should also examine X5, which was not part of our factor solution, but may add something to our discriminating function. */ DATA PERCEPTION; INFILE 'A:/HATCO2.TXT'; INPUT X1-X14; LABEL X1 = 'Delivery Speed'; LABEL X2 = 'Price Level'; LABEL X3 = 'Price Flexibility'; LABEL X4 = 'Manufacturer Image'; LABEL X5 = 'Service'; LABEL X6 = 'Salesforce Image'; LABEL X7 = 'Product Quality'; LABEL X8 = 'Firm Size'; LABEL X9 = 'Usage Level'; LABEL X10 = 'Satisfaction Level'; LABEL X11 = 'Specification Buying'; LABEL X12 = 'Structure of Procurement'; LABEL X13 = 'Type of Industry'; LABEL X14 = 'Type of Buying Situation'; X2T = 10 - X2; X3T = 10 - X3; X4T = 10 - X4; X6T = 10 - X6; X7T = 10 - X7; V1 = (0.78736*X1 + 0.71388*X2T + 0.80351*X3 + 0.10206*X4T + 0.02537*X6T + 0.76393*X7T)/3.19611; V2 = (0.19414*X1 + 0.26557*X2 + 0.01058*X3T + 0.93334*X4 + 0.933364*X6 + 0.17900*X7)/2.51627; RUN; PROC LOGISTIC DATA=PERCEPTION DESCENDING; MODEL X11 = V1 V2 X5 / SELECTION=STEPWISE; OUTPUT OUT=OUTHAT P=PRED; RUN; /* We take the output file from the last step (OUTHAT), that contains our predicted probability variable (PRED). The variable CLASSIF will represent the group that each variable is ultimately classified into. */ DATA NEWANALY; SET OUTHAT; IF PRED > .5 THEN CLASSIF=1; IF PRED < .5 THEN CLASSIF=0; RUN; /* We can examine our observations to review any misclassifications */ PROC PRINT DATA=NEWANALY; VAR V1 V2 CLASSIF PRED; RUN; /* Finally (for the analysis sample) we can review the Classification Table */ PROC FREQ DATA=NEWANALY; TABLE X11*CLASSIF; RUN; /* We will now repeat some of the classification techniques on the holdout sample. This time though we need to calculate the relevant probabilities using the parameter estimates from our analysis sample */ DATA HOLDOUT; INFILE 'A:/HATCO3.TXT'; INPUT X1-X14; LABEL X1 = 'Delivery Speed'; LABEL X2 = 'Price Level'; LABEL X3 = 'Price Flexibility'; LABEL X4 = 'Manufacturer Image'; LABEL X5 = 'Service'; LABEL X6 = 'Salesforce Image'; LABEL X7 = 'Product Quality'; LABEL X8 = 'Firm Size'; LABEL X9 = 'Usage Level'; LABEL X10 = 'Satisfaction Level'; LABEL X11 = 'Specification Buying'; LABEL X12 = 'Structure of Procurement'; LABEL X13 = 'Type of Industry'; LABEL X14 = 'Type of Buying Situation'; X2T = 10 - X2; X3T = 10 - X3; X4T = 10 - X4; X6T = 10 - X6; X7T = 10 - X7; V1 = (0.78736*X1 + 0.71388*X2T + 0.80351*X3 + 0.10206*X4T + 0.02537*X6T + 0.76393*X7T)/3.19611; V2 = (0.19414*X1 + 0.26557*X2 + 0.01058*X3T + 0.93334*X4 + 0.933364*X6 + 0.17900*X7)/2.51627; Y = exp(-48.7117 + 6.8155*V1 + 3.2714*V2); PRED = Y/(1+Y); IF PRED > .5 THEN CLASSIF=1; IF PRED < .5 THEN CLASSIF=0; RUN; PROC PRINT DATA=HOLDOUT; VAR V1 V2 CLASSIF PRED; RUN; PROC FREQ DATA=HOLDOUT; TABLE X11*CLASSIF; RUN;