Viewing contents of file '../idllib/contrib/buie/goodpoly.pro'
;+
; NAME:
; goodpoly
; PURPOSE: (one line)
; Robust fitting of a polynomial to data.
; DESCRIPTION:
; This is a multi-pass fitting routine that fits a fixed order polynomial
; to the input data. After each pass, the scatter of the fit relative
; to the fitted line is computed. Each point is examined to see if it
; falls beyond THRESH sigma from the line. If is does, it is removed
; from the data and the fit is tried again. This will make up to two
; attempts to remove bad data.
; CATEGORY:
; Function fitting
; CALLING SEQUENCE:
; coeff = goodpoly(x,y,order,thresh,yfit,newx,newy)
; INPUTS:
; x - Input dataset, independant values.
; y - Input dataset, dependant values.
; order - Order of the polynomial fit (linear = 1).
; thresh - Sigma threshold for removing outliers.
; OPTIONAL INPUT PARAMETERS:
; KEYWORD PARAMETERS:
; BAD - Vector of flags, same length as x and y, that indicate bad values
; in the y vector. The default is that all points are considered
; good at the stars. If supplied, any additional bad values found
; will be marked as bad in this vector upon output. Also, any
; NaN values found in either the x or y vector will be trimmed
; from the data (and marked bad) prior to any processing.
; EXCLUDE - Number of points to exclude from initial pass fit. This number
; is rounded up to the next even number. Then EXCLUDE/2 of the
; highest and lowest points are removed before the first fit.
; If these numbers are reasonable, they will not be excluded
; in the second pass. This helps prevent biasing the first fit
; with the worst points in the array. Default is to not exclude
; any points on the first pass.
; MAX_VALUE - The maximum value to be fitted. If this keyword is provided,
; data values greater than MAX_VALUE are treated as missing
; and are not used in the fit at any pass.
; MIN_VALUE - The minimum value to be fitted. If this keyword is provided,
; data values greater than MIN_VALUE are treated as missing
; and are not used in the fit at any pass.
; SIGMA - Standard deviation of difference between good points and the
; fitted curve.
; OUTPUTS:
; yfit - Fitted values for y that match the input vector.
; newx - X values from input that were considered good.
; newy - Y values from input that were considered good.
; Return value is the set of polynomial coefficients.
; COMMON BLOCKS:
; SIDE EFFECTS:
; RESTRICTIONS:
; PROCEDURE:
; MODIFICATION HISTORY:
; Written 1991 Feb., Marc W. Buie, Lowell Observatory
; 93/11/12, MWB, Program fixed to return a computed y for all input x.
; 95/09/20, MWB, Added EXCLUDE keyword.
; 98/02/09, MWB, Added SIGMA keyword.
; 98/06/08, MWB, Added MIN/MAX_VALUE keywords.
; 98/08/12, MWB, Revamped some logic plus added direct support for badflags.
; The new version is probably a tad faster and more robust
; than the old version.
;-
function goodpoly,x,y,order,thresh,yfit,newx,newy, $
EXCLUDE=exclude,SIGMA=sigma,BAD=bad, $
MIN_VALUE=min_value,MAX_VALUE=max_value
if badpar(exclude, [0,1,2,3], 0,CALLER='goodpoly (EXCLUDE) ', $
DEFAULT=0) then return,0
if badpar(min_value,[0,1,2,3,4,5],0,CALLER='goodpoly (MIN_VALUE) ', $
type=minvtype) then return,0
if badpar(max_value,[0,1,2,3,4,5],0,CALLER='goodpoly (MAX_VALUE) ', $
type=maxvtype) then return,0
savebad = n_elements(bad) eq n_elements(y)
xx = x
yy = y
arlen=n_elements(xx)
;print,'arlen at start',arlen,total(bad)
; Take note of all points that start out good for later indexing into the
; input bad array. Also weed out the points that are known bad at the start.
if savebad then begin
z = where(bad eq 0,count)
IF count ne 0 and count ne arlen THEN BEGIN
xx = xx[z]
yy = yy[z]
ENDIF ELSE IF count eq 0 THEN BEGIN
print,'GOODPOLY: Error! All values are marked bad, nothing to fit.'
coeff=fltarr(order+1)
coeff[0]=0.0
return,coeff
ENDIF
; Save the indicies into the original array for all the good points.
; This will be needed later if any points are marked bad. This array
; should maintain the same length as the xx,yy arrays but will point
; back into the original arrays, x,y,bad
goodidx = z
endif
arlen=n_elements(xx)
; Filter out any NaNs
z = where(finite(xx) eq 0 or finite(yy) eq 0,count)
if count ne 0 and count ne arlen then begin
if savebad then bad[goodidx[z]]=1
z = where(finite(xx) eq 1 and finite(yy) eq 1)
xx = xx[z]
yy = yy[z]
if savebad then goodidx = goodidx[z]
endif else if count eq arlen then begin
print,'GOODPOLY: Error! All values are either NaNs or marked bad, nothing to fit.'
coeff=fltarr(order+1)
coeff[0]=0.0
return,coeff
endif
arlen=n_elements(xx)
; Need an inverted bad flag array for further processing.
if savebad then gflag = 1B-bad
; If min_value provide, exclude those values now
IF minvtype ne 0 THEN BEGIN
z = where(yy gt min_value,count)
IF count ne 0 and count ne arlen THEN BEGIN
xx = xx[z]
yy = yy[z]
if savebad then begin
gflag[goodidx[z]]=gflag[goodidx[z]]*2B
gflag = gflag/2B
goodidx = goodidx[z]
endif
ENDIF ELSE IF count eq 0 THEN BEGIN
print,'GOODPOLY: Error! All y values are below min_value, nothing to fit.'
coeff=fltarr(order+1)
coeff[0]=min_value
return,coeff
ENDIF
ENDIF
arlen=n_elements(xx)
; If max_value provide, exclude those values now
IF maxvtype ne 0 THEN BEGIN
z = where(yy lt max_value,count)
IF count ne 0 and count ne arlen THEN BEGIN
xx = xx[z]
yy = yy[z]
if savebad then begin
gflag[goodidx[z]]=gflag[goodidx[z]]*2B
gflag = gflag/2B
goodidx = goodidx[z]
endif
ENDIF ELSE IF count eq 0 THEN BEGIN
print,'GOODPOLY: Error! All y values are above max_value, nothing to fit.'
coeff=fltarr(order+1)
coeff[0]=max_value
return,coeff
ENDIF
ENDIF
arlen=n_elements(xx)
; Initial fit with all the data.
if exclude eq 0 or arlen le (exclude+order) then begin
coeff=poly_fit(xx,yy,order,yfit)
; chisq0=total((yy-yfit)^2)/(arlen-order)
flat = (yy-yfit)+(total(yfit)/arlen)
sigma = stdev(flat,mean)
; Initial fit excluding EXCLUDE extrema points before fit.
endif else begin
nex = ceil(exclude/2.0)
sidx=sort(yy)
sidx=sidx[nex:arlen-1-nex]
coeff=poly_fit(xx[sidx],yy[sidx],order)
yfit = poly(xx,coeff)
; chisq0=total((yy-yfit)^2)/(arlen-order)
flat = (yy-yfit)+(total(yfit)/arlen)
sigma = stdev(flat[sidx],mean)
endelse
;Remove all points beyond threshold sigma
good=where( abs(flat-mean) lt thresh*sigma,goodnum)
nbad = arlen-goodnum
if goodnum ne 0 and goodnum ne arlen then begin
xx=xx[good]
yy=yy[good]
if savebad then begin
gflag[goodidx[good]]=gflag[goodidx[good]]*2B
gflag = gflag/2B
goodidx = goodidx[good]
endif
arlen=goodnum
endif
if nbad ne 0 then begin
; Second pass fit with bad points removed (if needed).
coeff=poly_fit(xx,yy,order,yfit)
; chisq1=total((yy-yfit)^2)/(arlen-order)
flat = (yy-yfit)+(total(yfit)/arlen)
sigma = stdev(flat,mean)
;Remove all points beyond threshold sigma
good=where( abs(flat-mean) lt thresh*sigma,goodnum)
nbad = arlen-goodnum
if goodnum ne 0 and goodnum ne arlen then begin
xx=xx[good]
yy=yy[good]
if savebad then begin
gflag[goodidx[good]]=gflag[goodidx[good]]*2B
gflag = gflag/2B
goodidx = goodidx[good]
endif
arlen=goodnum
endif
endif
; Third pass fit with bad points removed.
if nbad ne 0 then begin
coeff=poly_fit(xx,yy,order,yfit)
; chisq2=total((yy-yfit)^2)/(arlen-order)
flat = (yy-yfit)+(total(yfit)/arlen)
sigma = stdev(flat,mean)
endif
newx=xx
newy=yy
yfit = poly(x,coeff)
if savebad then bad = 1B - gflag
return,coeff
end