Viewing contents of file '../idllib/astron/contrib/freudenreich/robust_linefit.pro'
FUNCTION ROBUST_LINEFIT,XIN,YIN,YFIT,SIG,SS, NUMIT=THIS_MANY, BISECT=TYPE, $
Bisquare_Limit=Bisquare_Limit, $
Close_Factor=Close_Factor
;+
; NAME:
; ROBUST_LINEFIT
;
; PURPOSE:
; An outlier-resistant two-variable linear regression. Either Y on X
; or, for the case in which there is no true independent variable, the
; bisecting line of Y vs X and X vs Y is calculated. No knowledge of
; the errors of the input points is assumed.
;
; CALLING SEQUENCE:
; COEFF = ROBUST_LINEFIT( X, Y, YFIT, SIG, COEF_SIG, [ BISECT = ,
; BiSquare_Limit = , Close_factor = , NumIT = ] )
;
; INPUTS:
; X = Independent variable vector, floating-point or double-precision
; Y = Dependent variable vector
;
; OUTPUTS:
; Function result = coefficient vector.
; If = 0.0 (scalar), no fit was possible.
; If vector has more than 2 elements (the last=0) then the fit is dubious.
;
; OPTIONAL OUTPUT PARAMETERS:
; YFIT = Vector of calculated y's
; SIG = The "standard deviation" of the fit's residuals. If BISECTOR
; is set, this will be smaller by ~ sqrt(2).
; COEF_SIG = The estimated standard deviations of the coefficients. If
; BISECTOR is set, however, this becomes the vector of fit
; residuals measured orthogonal to the line.
;
; OPTIONAL INPUT KEYWORDS:
; NUMIT = the number of iterations allowed. Default = 25
; BISECT if set, the bisector of the "Y vs X" and "X vs Y" fits is
; determined. The distance PERPENDICULAR to this line is used
; in calculating weights. This is better when the uncertainties
; in X and Y are comparable, so there is no true independent
; variable. Bisquare_Limit Limit used for calculation of
; bisquare weights. In units of outlier-resistant standard
; deviations. Default: 6.
; Smaller limit ==>more resistant, less efficient
; Close_Factor - Factor used to determine when the calculation has converged.
; Convergence if the computed standard deviation changes by less
; than Close_Factor * ( uncertainty of the std dev of a normal
; distribution ). Default: 0.03.
; SUBROUTINE CALLS:
; MED, to calculate the median
; ROB_CHECKFIT
; ROBUST_SIGMA, to calculate a robust analog to the std. deviation
;
; PROCEDURE:
; For the initial estimate, the data is sorted by X and broken into 2
; groups. A line is fitted to the x and y medians of each group.
; Bisquare ("Tukey's Biweight") weights are then calculated, using the
; a limit of 6 outlier-resistant standard deviations.
; This is done iteratively until the standard deviation changes by less
; than CLOSE_ENOUGH = CLOSE_FACTOR * {uncertainty of the standard
; deviation of a normal distribution}
;
; REVISION HISTORY:
; Written, H. Freudenreich, STX, 4/91.
; 4/13/93 to return more realistic SS's HF
; 2/94 --more error-checking, changed convergence criterion HF
; 5/94 --added BISECT option. HF.
; 8/94 --added Close_Factor and Bisquare_Limit options Jack Saba.
;-
ON_ERROR,2
IF N_ELEMENTS(THIS_MANY) GT 0 THEN ITMAX=THIS_MANY ELSE ITMAX=25
IF ( NOT KEYWORD_SET ( Close_Factor ) ) THEN Close_Factor = 0.03
DEL = 5.0E-07
EPS = 1.0E-20
N=N_ELEMENTS(XIN)
; First, shift X and Y to their centers of gravity:
X0=TOTAL(XIN)/N & Y0=TOTAL(YIN)/N
X = XIN-X0 & Y = YIN-Y0
CC=FLTARR(2)
SS=FLTARR(2)
SIG=0.
YFIT=YIN
BADFIT=0
NGOOD=N
; Make sure the independent variables are not all the same.
XRANGE=MAX(X)-MIN(X)
AVEX= (TOTAL(ABS(X))/N) > EPS
IF (XRANGE LT EPS) OR (XRANGE/AVEX LT DEL) THEN BEGIN
PRINT,'ROBUST_LINEFIT: Independent variables the same. No fit possible.'
RETURN,0.
ENDIF
; First guess:
LSQ=0
YP=Y
IF N GT 5 THEN BEGIN
; We divide the data into 2 groups and fit a line to their X and Y medians.
S=SORT(X) & U=X(S) & V=Y(S)
NHALF=N/2-1
X1=MED(U(0:NHALF)) & X2=MED(U(NHALF+1:N-1))
Y1=MED(V(0:NHALF)) & Y2=MED(V(NHALF+1:N-1))
IF ABS(X2-X1) LT EPS THEN BEGIN
; The X medians are too close. Select the end-points instead.
X1=U(0) & X2=U(N-1)
Y1=V(0) & Y2=V(N-1)
ENDIF
CC(1)=(Y2-Y1)/(X2-X1) & CC(0)=Y1-CC(1)*X1
YFIT=CC(0)+CC(1)*X
ISTAT=ROB_CHECKFIT(YP,YFIT,EPS,DEL, SIG,FRACDEV,NGOOD,W,S)
IF NGOOD LT 2 THEN LSQ=1
ENDIF
IF (LSQ EQ 1) OR (N LT 6) THEN BEGIN ; Try a least-squares fit
SX=TOTAL(X) & SY=TOTAL(Y) & SXY=TOTAL(X*Y) & SXX=TOTAL(X*X)
D=SXX-SX*SX
IF ABS(D) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
YSLOP=(SXY-SX*SY)/D & YYINT=(SXX*SY-SX*SXY)/D
IF KEYWORD_SET(TYPE) THEN BEGIN
; Get the X vs Y line.
SYY=TOTAL(Y*Y)
D=SYY-SY*SY
IF ABS(D) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
TSLOP=(SXY-SY*SX)/D & TYINT=(SYY*SX-SY*SXY)/D
; Now invert it to get the form Y=a+bX:
IF ABS(TSLOP) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
XSLOP=1./TSLOP & XYINT=-TYINT/TSLOP
; Now calculate the equation of the bisector of the 2 lines:
IF YSLOP GT XSLOP THEN BEGIN
A1=YYINT & B1=YSLOP & R1=SQRT(1.+YSLOP^2)
A2=XYINT & B2=XSLOP & R2=SQRT(1.+XSLOP^2)
ENDIF ELSE BEGIN
A2=YYINT & B2=YSLOP & R2=SQRT(1.+YSLOP^2)
A1=XYINT & B1=XSLOP & R1=SQRT(1.+XSLOP^2)
ENDELSE
YINT=(R1*A2+R2*A1)/(R1+R2)
SLOP=(R1*B2+R2*B1)/(R1+R2)
; Now find the orthogonal distance to the line. Convert to normal
; coordinates.
R=SQRT(1.+SLOP^2) & IF YINT GT 0. THEN R=-R
U1=SLOP/R & U2=-1./R & U3=YINT/R
YP=U1*X+U2*Y+U3 ; = orthog. distance to line
YFIT=FLTARR(N) & YFIT(*)=0. ; to fool ROB_CHECKFIT
SS=YP
ENDIF ELSE BEGIN
SLOP=YSLOP & YINT=YYINT
YFIT = YINT+SLOP*X
ENDELSE
CC=[YINT,SLOP]
ISTAT=ROB_CHECKFIT(YP,YFIT,EPS,DEL, SIG,FRACDEV,NGOOD,W,S)
ENDIF
IF ISTAT EQ 0 THEN GOTO,AFTERFIT
IF NGOOD LT 2 THEN BEGIN
PRINT,'ROBUST_LINEFIT: Data Dangerously Weird. Fit Questionable.'
BADFIT=1
GOTO,AFTERFIT
ENDIF
; Now iterate until the solution converges:
SIG_1= (100.*SIG) < 1.0E20
CLOSE_ENOUGH = Close_Factor * SQRT(.5/(N-1)) > DEL
DIFF= 1.0E20
NIT = 0
WHILE( (DIFF GT CLOSE_ENOUGH) AND (NIT LT ITMAX) ) DO BEGIN
NIT=NIT+1
SIG_2=SIG_1
SIG_1=SIG
SX=TOTAL(W*X) & SY=TOTAL(W*Y) & SXY=TOTAL(W*X*Y) & SXX=TOTAL(W*X*X)
D=SXX-SX*SX
IF ABS(D) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
YSLOP=(SXY-SX*SY)/D & YYINT=(SXX*SY-SX*SXY)/D
SLOP=YSLOP & YINT=YYINT
IF KEYWORD_SET(TYPE) THEN BEGIN
; Get the X vs Y line.
SYY=TOTAL(W*Y*Y)
D=SYY-SY*SY
IF ABS(D) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
TSLOP=(SXY-SY*SX)/D & TYINT=(SYY*SX-SY*SXY)/D
; Now invert it to get the form Y=a+bX:
IF ABS(TSLOP) LT EPS THEN BEGIN
PRINT,'ROBUST_LINEFIT: No fit possible.'
RETURN,0.
ENDIF
XSLOP=1./TSLOP & XYINT=-TYINT/TSLOP
; Now calculate the equation of the bisector of the 2 lines:
IF YSLOP GT XSLOP THEN BEGIN
A1=YYINT & B1=YSLOP & R1=SQRT(1.+YSLOP^2)
A2=XYINT & B2=XSLOP & R2=SQRT(1.+XSLOP^2)
ENDIF ELSE BEGIN
A2=YYINT & B2=YSLOP & R2=SQRT(1.+YSLOP^2)
A1=XYINT & B1=XSLOP & R1=SQRT(1.+XSLOP^2)
ENDELSE
YINT=(R1*A2+R2*A1)/(R1+R2)
SLOP=(R1*B2+R2*B1)/(R1+R2)
R=SQRT(1.+SLOP^2) & IF YINT GT 0. THEN R=-R
U1=SLOP/R & U2=-1./R & U3=YINT/R
YP=U1*X+U2*Y+U3 ; = orthog distance to line
YFIT=FLTARR(N) & YFIT(*)=0.
SS=YP
ENDIF ELSE BEGIN
YFIT = YINT+SLOP*X
ENDELSE
CC=[YINT,SLOP]
ISTAT=ROB_CHECKFIT(YP,YFIT,EPS,DEL, SIG,FRACDEV,NGOOD,W,S, $
Bisquare_Limit=Bisquare_Limit )
IF ISTAT EQ 0 THEN GOTO,AFTERFIT
IF NGOOD LT 2 THEN BEGIN
PRINT,'ROBUST_LINEFIT: Data Dangerously Weird. Fit Questionable.'
BADFIT=1
GOTO,AFTERFIT
ENDIF
DIFF = (ABS(SIG_1-SIG)/SIG) < (ABS(SIG_2-SIG)/SIG)
ENDWHILE
AFTERFIT:
; Untranslate the coefficients
CC(0) = CC(0)+Y0-CC(1)*X0
IF N_PARAMS(0) GT 2 THEN YFIT = CC(0) + CC(1)*XIN
IF KEYWORD_SET(BISECT) THEN RETURN,CC
IF (N_PARAMS(0) GT 3) AND (SIG GT EPS) AND (NGOOD GT 2) THEN BEGIN
; Here we use an empirical formula to approximate the standard deviations
; of the coefficients. They are usually accurate to ~ 25%.
SX2=TOTAL(W*X*X)
UU=S*S
DEV=YIN-YFIT
Y0=TOTAL( W*DEV )
Q=WHERE(UU LE 1.0,COUNT)
DEN1 = ABS(TOTAL( (1.-UU(Q))*(1.-5.*UU(Q)) ))
SIG=ROBUST_SIGMA(DEV,/ZERO)
; Now empirically derived estimates of the uncertainties:
SS(0)=SIG/SQRT(DEN1)/1.105
SS(1)=SS(0)/SQRT(SX2)
; Take the X shift into account:
SS(0)=SQRT(SS(0)^2+X0*SS(1)^2)
ENDIF
IF BADFIT EQ 1 THEN CC=[CC,0.]
RETURN,CC
END