Viewing contents of file '../idllib/astron/contrib/freudenreich/halfagauss.pro'
FUNCTION HALFAGAUSS,DATA,PARM,X,Y, NOPLOT=NODOIT,STITLE=SUB
;
;+
; NAME:
; HALFAGAUSS
;
; PURPOSE:
; Histogramming a distribution that can be characterized by a Gaussian
; with one heavy tail and returns the parameters of the Gaussian. Draws
; the histogram, the Gaussian, and the smoothed residuals. If a plausible
; fit to a Gaussian cannot be made (typically, if the distribution has a
; sharp edge or insufficient data near the mode) the parameters of the
; Gaussian are estimated. In this case an asterisk precedes the label of
; mean and width drawn on the plot.
;
; CALLLING SEQUENCE:
; YFit = HALFAGAUSS( Data, Parm, X, Y, [ /NOPLOT, STITLE ] )
;
; INPUT:
; DATA = the data to histogram. Should be large. At least 500 points;
; 10,000 is better.
; OUTPUT:
; YFit - HALFAGAUSS returns the vector of Gaussian fit to the histogram.
; If the program fails, it will return a scalar, 0.0.
;
; OPTIONAL OUTPUT:
; PARM = the parameters of the Gaussian component of the distribution:
; [height,mode,sigma]
; X = the locations of bin-centers
; Y = the locations of the bin-means
;
; OPTIONAL INPUT KEYWORDS:
; NOPLOT - if set, a histogram is not drawn
; STITLE - subtitle, if desired
;
;SUBROUTINES NEEDED:
; HISTOGAUSS,AUTOHIST,ROBUST_SIGMA,ROBUST_POLY_FIT,ROB_CHECKFIT,MED,
; LOWESS, POLY4PEAK,FITAGAUSS and GAUSSFUNC.
;
; REVISION HISTORY:
; Written, H.T. Freudenreich, HSTX, 5/6/94
; 2/10/95: Changed location of labels. HF
;-
on_error,2
; Histogram:
HISTOGAUSS,DATA,P,U,V,/NOPLOT
N=N_ELEMENTS(U)
IF N LT 30 THEN BEGIN
PRINT,'HALFAGAUSS: Cannot Determine Distribution Shape Adequately'
RETURN,0.
ENDIF
DX=U(N/2)-U(N/2-1)
V(0)=V(1)
V(N-1)=V(N-2)
; Start 1 before the first non-zero bin...
Q=WHERE(V GT 0,COUNT)
I1=Q(0)
IF I1 GT 0 THEN I1=I1-1
; ...until 1 after the last non-zero bin:
I2=Q(COUNT-1)
IF I2 LT (N-1) THEN I2=I2+1
X=U(I1:I2)
Y=V(I1:I2)
; Smooth the data and determine the mode.
Q=WHERE( X LT P(1), COUNT1) & Q=WHERE( X GT P(1), COUNT2)
COUNT=COUNT1 < COUNT2
WINDOW=(COUNT/3) > 7
HELP,COUNT1,COUNT2,WINDOW
IF WINDOW MOD 2 EQ 0 THEN WINDOW=WINDOW+1
LY=ALOG(Y+1.)
Z=LOWESS(X,LY,WINDOW,2)
Q=WHERE(ABS(X-P(1)) GT (2.0*P(2)),COUNT)
IF COUNT GT 0 THEN Z(Q)=0.
ZMAX=MAX(Z)
Q=WHERE(Z EQ ZMAX, COUNT)
ZMAX=EXP(ZMAX)-1.
IF COUNT GT 1 THEN X0=AVG(X(Q)) ELSE X0=X(Q)
X0=X0(0)
; OK, now we have the mode--to the nearest bin. Fit a 4th degree polynomial
; to the peak to determine it more precisely.
YMAX=MAX( Y( Q(0)-1:Q(0)+1 ) )
TX=X(Q(0)-7:Q(0)+7) & TY=Z(Q(0)-7:Q(0)+7)
CC=robust_POLY_FIT(TX,TY,4,TFIT)
EXT=POLY4PEAK(CC)
MODE=EXT(0) & ZMAX2=EXP(EXT(1))-1.
IF ABS(MODE-X0) GT .1*P(2) THEN MODE=X0
IF ABS(ZMAX2-ZMAX)/ZMAX LT .1 THEN ZMAX=ZMAX2
; Now re-bin the data so that the mode falls at the edge
; of a bin. Bin data on the short side of the distribution,
; then reflect the histogram about the mode. Then fit.
XMEAN=AVG(DATA)
XMED=MED(DATA)
IF XMED LT XMEAN THEN BEGIN ; tail is on the right
RIGHT=1
Q=WHERE(X LE MODE,NB)
IF NB LT 20 THEN BEGIN ; we want at least 20 bins.
DX=DX*NB/20.
NB=20
ENDIF
TX=FLTARR(NB) & TY=FLTARR(NB)
X2=MODE
FOR I=NB-1,0,-1 DO BEGIN
X1=X2-DX
Q=WHERE((DATA LE X2) AND (DATA GT X1),COUNT)
TX(I)=X1+DX/2. & TY(I)=COUNT
X2=X2-DX
ENDFOR
M=2*NB
XX=FLTARR(M) & YY=XX
X1=TX & Y1=TY
Y2=REVERSE(Y1)
YY=[Y1,Y2]
XX(0:NB-1)=X1
FOR I=NB,M-1 DO XX(I)=XX(I-1)+DX
; Now estimate the width:
Q=WHERE(DATA LE MODE,COUNT) & D=DATA(Q)
S=SORT(D) & D=D(S)
I1=long(.2*COUNT+.5) & SIG1=(MODE-D(I1))/1.282
I2=long(.5*COUNT+.5) & SIG2=(MODE-D(I2))/0.674
I3=long(.8*COUNT+.5) & SIG3=(MODE-D(I3))/0.253
SIG=(SIG1+SIG2+SIG3)/3.
ENDIF ELSE BEGIN ; tail is on the left
RIGHT=0
Q=WHERE(X GE MODE,NB)
IF NB LT 20 THEN BEGIN
DX=DX*NB/20.
NB=20
ENDIF
TX=FLTARR(NB) & TY=FLTARR(NB)
X1=MODE
FOR I=0,NB-1 DO BEGIN
X2=X1+DX
Q=WHERE((DATA LT X2) AND (DATA GE X1),COUNT)
TX(I)=X1+DX/2. & TY(I)=COUNT
X1=X1+DX
ENDFOR
M=2*NB
XX=FLTARR(M) & YY=XX
X2=TX & Y2=TY
Y1=REVERSE(Y2)
YY=[Y1,Y2]
XX(NB:M-1)=X2
FOR I=NB-1,0,-1 DO XX(I)=XX(I+1)-DX
Q=WHERE(DATA GE MODE,COUNT) & D=DATA(Q)
S=SORT(D) & D=D(S)
I1=long(.2*COUNT+.5) & SIG1=-(MODE-D(I1))/1.282
I2=long(.5*COUNT+.5) & SIG2=-(MODE-D(I2))/0.674
I3=long(.8*COUNT+.5) & SIG3=-(MODE-D(I3))/0.253
SIG=(SIG1+SIG2+SIG3)/3.
ENDELSE
; Now fit:
PARM0=[zMAX,MODE,SIG]
PARM=parm0
YFIT=FITAGAUSS(XX,YY,PARM)
IF ABS(PARM(0)-ZMAX)/ZMAX GT .1 THEN BEGIN
PRINT,'HALFAGAUSS: Probable Error in Gaussian Fit. Using Estimate Instead'
PARM=PARM0
NOFIT=1
ENDIF ELSE NOFIT=0
YFIT=PARM(0)*EXP(-(X-PARM(1))^2/(2.*PARM(2)^2))
; Plot if so desired:
IF KEYWORD_SET(NODOIT) THEN RETURN,YFIT
WINDOW=(N/10)
IF WINDOW MOD 2 EQ 0 THEN WINDOW=WINDOW+1
Z=LOWESS(X,Y-YFIT,WINDOW,2) ; smooth the residuals
z=z>0.
IF KEYWORD_SET(SUB) THEN ST=SUB ELSE ST=' '
PLOT,X,Y,YRAN=[MIN(Y),MAX(Y)*1.15],PSYM=10,SUBTI=ST ; plot histogram
OPLOT,X,YFIT,line=2 ; over-plot Gaussian
OPLOT,X,Z,line=2 ; over-plot residuals
MU=STRING(PARM(1),'(F10.5)'); annotate
SI=STRING(PARM(2),'(F9.5)')
LABL='Gaussian Mn, Width='+mu+' '+si
IF NOFIT EQ 1 THEN LABL='*'+LABL
x1=x(0)
dy=(!y.crange(1)-!y.crange(0))/20.
y1=!y.crange(1)-dy
XYOUTS,X1,Y1,LABL,CHARSIZE=.9
TOTGAUSS=TOTAL(YY)
TOT=N_ELEMENTS(DATA)
CTOT=STRING(TOT,'(I7)')
CGAU=STRING(TOTGAUSS,'(I7)')
LABL='# Total, # in Gaussian='+ctot+' '+cgau
XYOUTS,X1,Y1-2*dy,LABL,CHARSIZE=.9
RETURN,YFIT
END