**Box Plot (Turkey) Method of Identifying Outliers**

If a value is higher than the 1.5*IQR above the upper quartile (Q3), the value will be considered as outlier. Similarly, if a value is lower than the 1.5*IQR below the lower quartile (Q1), the value will be considered as outlier.

If distribution of a variable is skewed, cutoff of 1.5 times IQR might lead to a very high number of values to be assigned outliers.IQRis interquartile range. It measures dispersion or variation. IQR = Q3 -Q1.

**In this case, you can use cutoff of 3.****Imputing Values (Capping / Flooring)**

You can replace all the values that exist outside the following limit with the limit value.

Lower limit of acceptable range = Q1 - 3* (Q3-Q1)

Upper limit of acceptable range = Q3 + 3* (Q3-Q1)

**SAS Macro for Outlier Capping / Flooring**

%macro outliers(input=, vars=, output= );

%let Q1=;

%let Q3=;

%let varL=;

%let varH=;

%let n=%sysfunc(countw(&vars));

%do i= 1 %to &n;

%let val = %scan(&vars,&i);

%let Q1 = &Q1 &val._P25;

%let Q3 = &Q3 &val._P75;

%let varL = &varL &val.L;

%let varH = &varH &val.H;

%end;

/* Calculate the quartiles and inter-quartile range using proc univariate */

proc means data=&input nway noprint;

var &vars;

output out=temp P25= P75= / autoname;

run;

/* Extract the upper and lower limits into macro variables */

data temp;

set temp;

ID = 1;

array varb(&n) &Q1;

array varc(&n) &Q3;

array lower(&n) &varL;

array upper(&n) &varH;

do i = 1 to dim(varb);

lower(i) = varb(i) - 3 * (varc(i) - varb(i));

upper(i) = varc(i) + 3 * (varc(i) - varb(i));

end;

drop i _type_ _freq_;

run;

data temp1;

set &input;

ID = 1;

run;

data &output;

merge temp1 temp;

by ID;

array vars(&n) &vars;

array lower(&n) &varL;

array upper(&n) &varH;

do i = 1 to dim(vars);

if not missing(vars(i)) then do;

if vars(i) < lower(i) then vars(i) = lower(i);

if vars(i) > upper(i) then vars(i) = upper(i);

end;

end;

drop &Q1 &Q3 &varL &varH ID i;

run;

%mend;

%outliers(input=abcd, vars = a b c d, output= test);

So helpful! Thanks for the SAS code!

ReplyDelete