SAS Macro : Capping Outliers

Box Plot (Turkey) Method of Identifying Outliers

If a value is higher than the 1.5*IQR above the upper quartile (Q3), the value will be considered as outlier. Similarly, if a value is lower than the 1.5*IQR below the lower quartile (Q1), the value will be considered as outlier.
IQR is interquartile range. It measures dispersion or variation. IQR = Q3 -Q1.
If distribution of a variable is skewed, cutoff of 1.5 times IQR might lead to a very high number of values to be assigned outliers. In this case, you can use cutoff of 3.

Imputing Values (Capping / Flooring)

You can replace all the values that exist outside the following limit with the limit value.
Lower limit of acceptable range = Q1 - 3* (Q3-Q1)
Upper limit of acceptable range = Q3 + 3* (Q3-Q1)

SAS Macro for Outlier Capping / Flooring
options nomprint mlogic symbolgen;
%macro outliers(input=, vars=, output= );

%let Q1=;
%let Q3=;
%let varL=;
%let varH=;

%let n=%sysfunc(countw(&vars));
%do i= 1 %to &n;
%let val = %scan(&vars,&i);
%let Q1 = &Q1 &val._P25;
%let Q3 = &Q3 &val._P75;
%let varL = &varL &val.L;
%let varH = &varH &val.H;
%end;

/* Calculate the quartiles and inter-quartile range using proc univariate */
proc means data=&input nway noprint;
var &vars;
output out=temp P25= P75= / autoname;
run;

/* Extract the upper and lower limits into macro variables */
data temp;
set temp;
ID = 1;
array varb(&n) &Q1;
array varc(&n) &Q3;
array lower(&n) &varL;
array upper(&n) &varH;
do i = 1 to dim(varb);
lower(i) = varb(i) - 3 * (varc(i) - varb(i));
upper(i) = varc(i) + 3 * (varc(i) - varb(i));
end;
drop i _type_ _freq_;
run;

data temp1;
set &input;
ID = 1;
run;

data &output;
merge temp1 temp;
by ID;
array vars(&n) &vars;
array lower(&n) &varL;
array upper(&n) &varH;
do i = 1 to dim(vars);
if not missing(vars(i)) then do;
if vars(i) < lower(i) then vars(i) = lower(i);
if vars(i) > upper(i) then vars(i) = upper(i);
end;
end;
drop &Q1 &Q3 &varL &varH ID i;
run;
%mend;

%outliers(input=abcd, vars = a b c d, output= test);
Related Posts
About Author:

Deepanshu founded ListenData with a simple objective - Make analytics easy to understand and follow. He has over 10 years of experience in data science. During his tenure, he has worked with global clients in various domains like Banking, Insurance, Private Equity, Telecom and Human Resource.

1 Response to "SAS Macro : Capping Outliers"

Next → ← Prev
Love this Post? Spread the Word!
Share