/* SAS Example 3a: Cluster Analysis on HATCO data */ /* Two Cluster Solution */ OPTIONS PS=52; OPTIONS LS=78; OPTIONS NODATE; OPTIONS PAGENO=1; DATA PERCEPTION; INFILE 'A:/HATCO_SET.PRN'; INPUT X1-X14; LABEL X1 = 'Delivery Speed'; LABEL X2 = 'Price Level'; LABEL X3 = 'Price Flexibility'; LABEL X4 = 'Manufacturer Image'; LABEL X5 = 'Service'; LABEL X6 = 'Salesforce Image'; LABEL X7 = 'Product Quality'; LABEL X8 = 'Firm Size'; LABEL X9 = 'Usage Level'; LABEL X10 = 'Satisfaction Level'; LABEL X11 = 'Specification Buying'; LABEL X12 = 'Structure of Procurement'; LABEL X13 = 'Type of Industry'; LABEL X14 = 'Type of Buying Situation'; RUN; /* The following commands initiate the cluster procedure using variables X1 to X7. The NONORM option will result in the printing of Between Cluster Sums of Squares */ PROC CLUSTER M=WARD NONORM OUT=SEED2; VAR X1-X7; RUN; /* The following prints a dendogram. We will use this, and the results of the previous PROC CLUSTER to determine the cluster solutions that are worth considering. */ PROC TREE N=5; /* The FASTCLUS procedure can produce an output data set containing a cluster membership variable as well as an output data set containing cluster means. Here we put name our cluster mean variable as NEW and put it in a data set called TWOCLUS. */ PROC FASTCLUS DATA=PERCEPTION MAXC=2 MEAN=NEW OUT=TWOCLUS; VAR X1-X7; RUN; /* Once we have generated cluster means in the previous step, we will use those means in the following non-hierarchical procedure */ PROC FASTCLUS SEED=NEW MAXC=2 OUT=TWOCLUS; VAR X1-X7; RUN; /* The following tests for significant differences between the clusters on the variables used. */ PROC ANOVA; CLASS CLUSTER; MODEL X1-X7 X9 X10=CLUSTER; RUN; /* This will output our data set again so we can see which cluster each observation is in, if desired */ PROC PRINT; VAR CLUSTER X1-X14; RUN; /* Here we will profile the clusters on a set of additional variables not included in the clustering procedure */ PROC FREQ; TABLES CLUSTER*X8 CLUSTER*X11-X14 / CHISQ; RUN;