Viewing contents of file '../idllib/astron/contrib/freudenreich/fitxyerrs.pro'
FUNCTION TWOLINE,U,V,WX,WY

;+
; Part of FITXYERRS.
;-

EPS=1.0E-20
N=N_ELEMENTS(U)
IF WY GT WX THEN BEGIN
     S=SORT(U)
     TX=U(S)  & TY=V(S)
     X1=MED(TX(0:.5*N))  & X2=MED(TX(.5*N+1:N-1))
     Y1=MED(TY(0:.5*N))  & Y2=MED(TY(.5*N+1:N-1))
     IF ABS(X1-X2) LT EPS THEN BEGIN
         PRINT,'FITXYERRS: Initial Fit Failed. Range of X = 0.'
         RETURN,0.
     ENDIF
     SLOP=(Y1-Y2)/(X1-X2)
     YINT=Y1-SLOP*X1
ENDIF ELSE BEGIN            ; X on Y
     S=SORT(V)
     TX=V(S)  & TY=U(S)
     X1=MED(TX(0:.5*N))  & X2=MED(TX(.5*N+1:N-1))
     Y1=MED(TY(0:.5*N))  & Y2=MED(TY(.5*N+1:N-1))
     IF ABS(X1-X2) LT EPS THEN BEGIN
        PRINT,'FITXYERRS: Initial Fit Failed. Range of Y = 0.'
        RETURN,0.
     ENDIF
     TSLOP=(Y1-Y2)/(X1-X2)
     TYINT=Y1-TSLOP*X1
     IF ABS(TSLOP) LT EPS THEN TSLOP=EPS
     SLOP=1./TSLOP
     YINT=-TYINT/TSLOP
ENDELSE

RETURN,[YINT,SLOP]
END

FUNCTION FITXYERRS, Xin,Yin,EXin,EYin, N_SAMPLE,YinT,SLOP,SYinT,SSLOP,$
   NONROBUST=FITTYPE, TOLERANCE=SMALLENUF
;
;+
; NAME:
;	FITXYERRS
;
; PURPOSE:
;	Linear fit to data in which there are errors in both Y and X, and 
;	possibly outliers or points with true errors much larger than the 
;	measurement errors.   It is outlier-resistant but not as resistant as 
;	ROBUST_LINEFIT.   Uncertainties are calculated using the bootstrap 
;	method.   
;
; CALLING SEQUENCE:
;	Coeff = FITXYERRS( X, Y, Ex, Ey, N_sample, [ Yint, Slope, Sig_Yint, 
;				Sig_Slope, /NONROBUST, Tolerance = ] )
;
; INPUT ARGUMENT:
;	X = Input x vector
;	Y = Input y vector
;	Ex= vector of X measurement errors
;	Ey= the vector of Y measurement errors
;	N_SAMPLE = the number of bootstrap samples. 50 to 100 usually sufficient
;			If N_SAMPLE = 1, standard errors are not returned.
;
; OPTIONAL INPUT KEYWORDS:
;	NONROBUST -   If set, "robustness" weights are not calculated.
;	TOLERANCE -   When the scatter about the fitted line, weighted by
;		measurement errors, changes by less than this fraction,
;		the fit is considered to have converged. Default=.0001
;
; OUTPUT:
;	COEFF = array of coefficients. Dimensions=(NDEG+1,N_SAMPLE)
;		First dimension in the order A0, A1
;
; OPTIONAL OUTPUT ARGUMENT:
;	Yint = Y intercept (average of COEFF(0,*))
;	Slop = slope (average of (COEFF(1,*))
;	Sig_Yint = standard error of Y intercept (std. dev. of COEFF(0,*))
;	Sig_Slop = standard error of slope (std. dev. of COEFF(1,*))
;
; METHOD:  
;	Convert to standardized variables, calculate Y vs X and X vs Y fits,
;	take a weighted mean of the two lines, the weights being a function of
;	slope. The weights of the input points are functions of their 
;	uncertainties and perpendicular distance to the fitted line. This is 
;	an iterative procedure.   To the author's knowledge, neither this nor 
;	a similar method has ever been, or may ever be, published.
;
; WARNING:
;	At least 6 points must be input. It is better to have at least 10.
;	It is best to have a large number.
;	For large datasets, try it out with N_SAMPLE of 1 first to see how long 
;	it takes.
;
; SUBROUTINES CALLED:
;	XYFIT, MED (to calculate a median)
;
; REVSION HISTORY:
;	Written,  H.T. Freudenreich, HSTX, 6/94. 
;	Corrected serious bug in computation of the Y intercept 
;		H.T. Freudenreich/Landsman HSTX, 11/95
;	
;-

ON_ERROR,2

ITMAX =  20
EPS   = 1.0E-22

N=N_ELEMENTS(Xin)

IF N LT 6 THEN BEGIN
   PRINT,'FITXYERRS: Too few points! Returning 0'
   RETURN,0.
ENDIF

IF KEYWORD_SET(SMALLENUF) THEN CONVERGENCE=SMALLENUF ELSE CONVERGENCE=.0001

; Shift the coordinate system:
X0=TOTAL(Xin)/N  &  Y0=TOTAL(Yin)/N
X=Xin-X0         &  Y=Yin-Y0
EX=EXin          &  EY=EYin

R0=SYSTIME(1)-DOUBLE(7.)  ;=random number seed

; Now convert to carefree variables by dividing by the median measurement
; errors. (If the errors are heteroscedastic, they are not quite as carefree.)

XERR = MED(EX)+EPS  &  YERR = MED(EY)+EPS
X=X/XERR            &  Y=Y/YERR
EX=EX/XERR          &  EY=EY/YERR

; Now the errors are roughly the same (exactly, if the variances are fixed).
; The slope will determine the relative weights of the Y|X and X|Y regressions.
; Initially, we don't know the slope, and so we give the most weight to the
; variable with the largest range.
AX=MED(ABS(X))      &  AY=MED(ABS(Y))
WX=AY^2/(AX^2+AY^2) &  WY=1.-WX       ; = initial weights of X|Y and Y|X fits.

; Now perform the calculation N_SAMPLE times.

COEFF=FLTARR(2,N_SAMPLE)
NGOOD = -1

; Get some randomn number seeds:
R0=SYSTIME(1)*2.+1.
SEEDS=RANDOMU(R0,N_SAMPLE)*1.0E6+1.

FOR L=0,N_SAMPLE-1 DO BEGIN

  IF L EQ 0 THEN BEGIN 
;    First time, use the actual data:
     U=X  & V=Y  & Eu=Ex  & Ev=Ey
  ENDIF ELSE BEGIN
     R=SEEDS(L)
;    Uniform random numbers between 0 and N-1:
     PICK=RANDOMU(R,N)
     R=LONG(PICK*N)
     U=X(R) & V=Y(R) & Eu=Ex(R) & Ev=Ey(R)
  ENDELSE

; Fit Y vs X and X vs Y, iteratively:
  NUMIT=0
  MIDDIST=1.0E37

; For the initial fit, divide the data into 2 groups and calculate the line
; through them. 
  CC=TWOLINE(U,V,WX,WY)

; The "robustness" weights are functions of distance from the fitted line,
; so calculate that:
  R=SQRT(1.+CC(1)^2)  & IF CC(0) GT 0. THEN R=-R
  U1=CC(1)/R  &  U2=-1./R  &  U3=CC(0)/R 
  DIST=U1*U+U2*V+U3  ; = orthog. distance to line

; Now iterate:
  FITAGAIN:
  NUMIT=NUMIT+1

  IF N_ELEMENTS(CC) EQ 2 THEN BEGIN
;    The weights will be a product of a function of the measurement errors 
;    and, for outlier-resistance, a function of orthogonal distance to the line.
;    Measurement-error weights: For the Y on X fit, the weights should be 
;    1/(Ey^2+b^2 Ex^2), where b is the slope of Y vs X. For X on Y: 
;    1/(Ex^2+Ey^2/b^2). Since we are doing both, and want to treat X and Y 
;    symmetrically, take the weighted sum. The weights are functions of
;    slope. 
                                       SLOP=CC(1)^2 > 1.0E-18  ; slope^2
     DY= Ev^2 + CC(1)^2*Eu^2 + eps  &  DX= Eu^2 + Ev^2/SLOP + eps

;    The relative weights of the Y|X and X|Y fits:
     FY = 1./(1.+CC(1)^2)         &  FX = 1.-FY  

;    Now the "measurement error" weights:
     WS=  1./DY * FY + 1./DX * FX 

;    Note:
;    (When slope==>0,  WS==>1/Ey^2         (Ex is irrelevant)
;     When slope==>oo, WS==>1/Ex^2         (Ey is irrelevant)
;     When slope==>1,  WS==>1/(Ex^2+Ey^2)  (X and Y indistinguishable))

     IF KEYWORD_SET(FITTYPE) THEN BEGIN
;       Don't need to calculate robustness weights or check for convergence.
;       Fit once and move on to the next set, if there is one.
        W=WS/TOTAL(WS)
        CC=XYFIT(U,V,W,CCX,CCY,DIST,FX,FY) 
        GOTO, DONE
     ENDIF

;    From this calculate an "adjusted" distance (distance/measurement error)
;    to be used in the robustness weights. 
     DIST=ABS(DIST)/SQRT( DY*FY + DX*FX )   ; = adjusted distance
     DX=0 & DY=0

;    Points < 1 *(mid-averaged adjusted distance from the fit line) are 
;    given equal distance weights. Beyond 1 unit of distance the weights 
;    decrease as 1/distance^2. Beyond 4 units, weights are set to 0.0.
;
;    Why the mid-averaged distance? It is more sensitive than the median but 
;    has a lower breakdown point: 25% vs 50%.

     D=DIST(SORT(DIST))
     OLDMIDDIST=MIDDIST
     MIDDIST=AVG(D(.25*N:.75*N))/.70 ; = 1 sigma if Gaussian
     IF MIDDIST LT EPS THEN BEGIN
        PRINT,'FITXYERRS: Weird data. Fit attempt failed'
        CC=0.
        GOTO,DONE
     ENDIF
     Q=WHERE(DIST LT MIDDIST,COUNT) & IF COUNT GT 0 THEN DIST(Q)=MIDDIST
     W=WS/DIST^2
     Q=WHERE(DIST GT (4.*MIDDIST),COUNT) & IF COUNT GT 0 THEN W(Q)=0.
     SUMW = TOTAL(W)
     IF SUMW EQ 0. THEN BEGIN
        PRINT,'FITXYERRS: Weird data. Fit attempt failed'
        CC=0.
        GOTO,DONE
     ENDIF
     W=W/SUMW

     IF NUMIT GT 1 THEN BEGIN
        OLDCC=CC
        DIFF=ABS(OLDMIDDIST-MIDDIST)/MIDDIST
        IF DIFF LT CONVERGENCE THEN GOTO,DONE
        IF MIDDIST GT OLDMIDDIST THEN BEGIN
           CC=OLDCC
           GOTO,DONE
        ENDIF
     ENDIF 
     IF NUMIT EQ ITMAX THEN GOTO,DONE
     IF MIDDIST LT EPS THEN GOTO,DONE

;    Fit again:                            
     OLDCC=CC
     CC=XYFIT(U,V,W,CCX,CCY,DIST,FX,FY) 

     GOTO,FITAGAIN
  ENDIF

  DONE:
; If the fit was successful, store the coefficients:
  IF (N_ELEMENTS(CC) EQ 2) THEN BEGIN 
     NGOOD=NGOOD+1
     COEFF(*,NGOOD)=CC
  ENDIF
ENDFOR

NGOOD = NGOOD+1
IF NGOOD LT N_SAMPLE THEN BEGIN
   PRINT,'FITXYERRS: Some samples rejected.'
   COEFF=COEFF(*,0:NGOOD-1)
ENDIF
; Take out the scale factors:
COEFF(0,*)=COEFF(0,*)*YERR
COEFF(1,*)=COEFF(1,*)*YERR/XERR

; Shift X and Y back to the original coordinate system and calculate the
; y-intercept, slope and their standard errors.

 SLOP = TOTAL(COEFF(1,*))/NGOOD
 COEFF(0,*) = COEFF(0,*) + Y0 - COEFF(1,*)*X0    ;Bug corrected 11/95
 YINT = TOTAL(COEFF(0,*))/NGOOD

 IF NGOOD GT 2 THEN BEGIN
   SYINT = STDEV(COEFF(0,*))
   SSLOP = STDEV(COEFF(1,*))
 ENDIF ELSE BEGIN
   SYINT=0.
   SSLOP=0.
 ENDELSE

RETURN,COEFF
END