Viewing contents of file '../idllib/astron/contrib/varosi/vlib/allpro/find_outliers.pro'
;+
; NAME:
;	find_outliers
;
; PURPOSE:
;	Find outlier (bad) pixels in an image and return array of subscripts.
;	Outliers are flagged if value exceeds N_SIGMA times local standard
;	deviation (assuming that good data pixels do not deviate that much).
;	This procedure uses the histogram of image with reverse indices
;	to home in on pixels that have values outside the distribution
;	of most pixel values, and so is faster and usually better than
;	function sigma_filter, except when bad pixel values are within the
;	distribution of good pixels.
;
; CALLING:
;	subscripts = find_outliers( image, box_width, N_SIGMA=# )
;
; INPUTS:
;	image = 2-D array (this input will be modified).
;
;	box_width = width of square box in which to test statistics of
;		outlier candidate pixels, units in # pixels (default=3).
;
; KEYWORDS:
;
;	N_SIGMA = number of standard deviations flagging outliers (float),
;			minimum = 1, default = 5.
;
;	STOP = integer, the checking for outliers will stop after this
;		number of candidates are found to be NOT outliers (default=9).
;
;	RADIUS = specify test box size with radius, so box_width = 2*radius+1.
;
;	/MEDIAN : causes the median of values in test box to be used as
;		the replacement value, and in estimation of standard deviation.
;
;	/NO_MIN_OUT : outlier pixels related to minimum extremes of image
;		are NOT checked or replaced. This takes precedence over /NO_MAX.
;
;	/NO_MAX_OUT : outlier pixels related to maximum extremes of image
;		are NOT checked or replaced.
;
;	/MONITOR : prints information about % of pixels replaced.
;
; OUTPUT:
;	image = the input array is modified by replacing outliers as found.
;
; KEYWORD OUTPUT:
;
;	N_FOUND = total # of pixels found to be outliers, optional output.
;
; NOTES:
;	For a flat gaussian noise image:
;		N_sigma = 2 finds about 5% of pixels to be outliers,
;		N_sigma = 3 finds about 1% of pixels to be outliers,
;		N_sigma = 4 finds 0.5% of pixels to be outliers, and so on.
;	Setting /MEDIAN causes less pixels to be replaced and at same time
;	is then more successful at replacing adjacent pairs of outlier pixels.
;	The algorithm is semi-iterative since the image is modified during
;	the processing, thus changing the local deviations in test boxes,
;	and this feature actually helps to remove adjacent outliers.
;	Increasing box_width (or RADIUS) will usually replace more pixels.
;	Setting STOP= larger values causes more pixels to be checked and so
;	takes longer time, and results in more pixels being replaced,
;	whereas STOP=0 will replace only the maximum and/or minimum pixels
;	(if they are outliers, and if they occur first in reverse indices).
;
; PROCEDURE:
;	Possible outliers are found using the reverse indices from the
;	histogram of image. Starting from the maximum bin and working down,
;	each outlier candidate is checked by finding the mean (or median) and
;	standard deviation of pixels in box centered at candidate pixel
;	of the image (excluding that pixel), and if the center value exceeds
;	specified number of standard deviations from the mean (or median),
;	the subscript is added to array of subscripts. The process is repeated
;	starting at the minimum bin and working to more positive values.
;	This method of using histograms is faster than function sigma_filter
;	for large images, and is more successful at finding outliers.
; HISTORY:
;	Written, 1997, Frank Varosi, HSTX @ NASA/GSFC
;	FV, 8/97, for case of int/Long image, histogram binsize must be >= 1.
;-

function find_outliers, image, box_width, N_SIGMA=Nsigma, N_FOUND=nfound, $
			MONITOR=monitor, RADIUS=radius, MEDIAN=mm, STOP=stop, $
			NO_MIN_OUT=no_min, NO_MAX_OUT=no_max, NON_ZERO=nonzero

	sim = size( image )
	npix = sim(sim(0)+2)
	imvtype = sim(sim(0)+1)

	if sim(0) NE 2 then begin
		print,"syntax:"
		print,"	subscripts = find_outliers( image, box_width, N_SIGMA=)"
		return,0
	   endif

	if N_elements( radius ) EQ 1 then  box_width = 2*radius+1  else begin
		if N_elements( box_width ) NE 1 then box_width=3
		box_width = 2*(fix( box_width )/2) + 1	;make sure width is odd.
	   endelse

	if (box_width LT 3) then return,0
	if N_elements( Nsigma ) NE 1 then Nsigma = 5
	Nsigma = Nsigma > 1

	minv = min( image, MAX=maxv )
	binsize = ( float( maxv ) - minv )/1000
	if (imvtype LE 3) then binsize = binsize > 1
	hx = histogram( image, BINS=binsize, REV=revix, MIN=minv, MAX=maxv )
	nbin = N_elements( hx )
	count = 0		;next, try to get better resolution on histogram

	while (hx(0) ne 1) and (hx(nbin-1) ne 1) and (count LT 4) do begin
		count = count+1
		nbin = 2*nbin
		binsize = ( float( maxv ) - minv )/nbin
		if (imvtype LE 3) then begin		;case of int/Long image.
			if( binsize LT 1 ) then count=4
			binsize = binsize > 1
		   endif
		hx = histogram( image, BIN=binsize, REV=revix,MIN=minv,MAX=maxv)
		nbin = N_elements( hx )
		if !DEBUG then begin
			help,nbin
			print,hx(0),hx(nbin-1)
		   endif
	 endwhile

	wp = where( (hx GT 0) and (hx LT 32000), nwh )
	kstart = [ nwh-1, 0 ]
	kincr = [ -1, 1 ]

	nx = sim(1)
	Lx = nx-1
	Ly = sim(2)-1
	brad = fix( box_width/2 )
	nfound = 0L
	if N_elements( stop ) ne 1 then stop = 9

;first pass is to start from max value and work down replacing outliers...
;second pass starts from min value and works up replacing outliers...

if keyword_set( no_min ) then begin
	pass0 = 0
	pass1 = 0
 endif else if keyword_set( no_max ) then begin
	pass0 = 1
	pass1 = 1
 endif else begin
	pass0 = 0
	pass1 = 1
  endelse

for pass = pass0, pass1 do begin

	k = kstart(pass)
	kinc = kincr(pass)
	nok = 0L
	if !DEBUG then help,k,kinc

	Repeat Begin

	  rk = revix( revix(wp(k)) : revix(wp(k)+1)-1 )
	  rx = rk MOD nx
	  ry = rk/nx
	  xmin = ( rx - brad ) > 0
	  xmax = ( rx + brad ) < Lx
	  ymin = ( ry - brad ) > 0
	  ymax = ( ry + brad ) < Ly
	  k = k + kinc

	  for i=0,N_elements( rk )-1 do begin

		p = rk(i)
		ibox = image(xmin(i):xmax(i),ymin(i):ymax(i))
		if keyword_set( nonzero ) then ibox = ibox( where( ibox ) )
		nbox = N_elements( ibox )-1

		if keyword_set( mm ) then mean = median( ibox ) $
				else mean = ( total( ibox ) - image(p) )/nbox

		varp = (image(p) - mean)^2
		vari = ( total( (ibox - mean)^2 ) - varp )/nbox

		if varp GE Nsigma*vari then begin
			if N_elements( wbad ) LE 0 then wbad=p else wbad=[wbad,p]
			nfound = nfound + 1
		 endif else nok = nok+1

	   endfor

	 endrep until (nok GE stop) or (k LT 0) or (k GE nwh)

	if !DEBUG then help,k,nok,nfound
   endfor

	if keyword_set( monitor ) then $
		print, nfound*100./npix, box_width, Nsigma, $
		FORM="(F6.2,' % of pixels found bad, box=',I1,', N_sigma=',F4.1)"

return, wbad
end