Viewing contents of file '../idllib/astron/contrib/freudenreich/halfagauss.pro'
FUNCTION HALFAGAUSS,DATA,PARM,X,Y, NOPLOT=NODOIT,STITLE=SUB
;
;+
; NAME:
;	HALFAGAUSS
;
; PURPOSE:
;	Histogramming a distribution that can be characterized by a Gaussian
;	with one heavy tail and returns the parameters of the Gaussian. Draws
;	the histogram, the Gaussian, and the smoothed residuals. If a plausible
;	fit to a Gaussian cannot be made (typically, if the distribution has a
;	sharp edge or insufficient data near the mode) the parameters of the
;	Gaussian are estimated. In this case an asterisk precedes the label of
;	mean and width drawn on the plot.
;
; CALLLING SEQUENCE: 
;	YFit = HALFAGAUSS( Data, Parm, X, Y, [ /NOPLOT, STITLE ] )
;
; INPUT:
;	DATA = the data to histogram. Should be large. At least 500 points;
;		10,000 is better.
; OUTPUT:
;	YFit - HALFAGAUSS returns the vector of Gaussian fit to the histogram. 
;		If the program fails, it will return a scalar, 0.0.
;
; OPTIONAL OUTPUT:
;	PARM = the parameters of the Gaussian component of the distribution:
;		[height,mode,sigma]
;	X = the locations of bin-centers
;	Y = the locations of the bin-means
;
; OPTIONAL INPUT KEYWORDS: 
;	NOPLOT - if set, a histogram is not drawn
;	STITLE - subtitle, if desired
;
;SUBROUTINES NEEDED: 
;	HISTOGAUSS,AUTOHIST,ROBUST_SIGMA,ROBUST_POLY_FIT,ROB_CHECKFIT,MED,
;	LOWESS, POLY4PEAK,FITAGAUSS and GAUSSFUNC.
;
; REVISION HISTORY:
;	Written, H.T. Freudenreich, HSTX, 5/6/94
;	2/10/95: Changed location of labels. HF
;-

on_error,2

; Histogram:
HISTOGAUSS,DATA,P,U,V,/NOPLOT
N=N_ELEMENTS(U)
IF N LT 30 THEN BEGIN
   PRINT,'HALFAGAUSS: Cannot Determine Distribution Shape Adequately'
   RETURN,0.
ENDIF
DX=U(N/2)-U(N/2-1)
V(0)=V(1)
V(N-1)=V(N-2)
; Start 1 before the first non-zero bin...
Q=WHERE(V GT 0,COUNT)
I1=Q(0)
IF I1 GT 0 THEN I1=I1-1
; ...until 1 after the last non-zero bin:
I2=Q(COUNT-1)
IF I2 LT (N-1) THEN I2=I2+1
X=U(I1:I2)
Y=V(I1:I2)

; Smooth the data and determine the mode.
Q=WHERE( X LT P(1), COUNT1) & Q=WHERE( X GT P(1), COUNT2)
COUNT=COUNT1 < COUNT2
WINDOW=(COUNT/3) > 7
HELP,COUNT1,COUNT2,WINDOW
IF WINDOW MOD 2 EQ 0 THEN WINDOW=WINDOW+1 
LY=ALOG(Y+1.)
Z=LOWESS(X,LY,WINDOW,2)
Q=WHERE(ABS(X-P(1)) GT (2.0*P(2)),COUNT)
IF COUNT GT 0 THEN Z(Q)=0.
ZMAX=MAX(Z)
Q=WHERE(Z EQ ZMAX, COUNT)
ZMAX=EXP(ZMAX)-1.
IF COUNT GT 1 THEN X0=AVG(X(Q)) ELSE X0=X(Q)
X0=X0(0)

; OK, now we have the mode--to the nearest bin. Fit a 4th degree polynomial 
; to the peak to determine it more precisely.
YMAX=MAX( Y( Q(0)-1:Q(0)+1 ) )
TX=X(Q(0)-7:Q(0)+7)  &  TY=Z(Q(0)-7:Q(0)+7)
CC=robust_POLY_FIT(TX,TY,4,TFIT)
EXT=POLY4PEAK(CC)
MODE=EXT(0)  & ZMAX2=EXP(EXT(1))-1.
IF ABS(MODE-X0) GT .1*P(2) THEN MODE=X0
IF ABS(ZMAX2-ZMAX)/ZMAX LT .1 THEN ZMAX=ZMAX2

; Now re-bin the data so that the mode falls at the edge
; of a bin. Bin data on the short side of the distribution,
; then reflect the histogram about the mode. Then fit.

XMEAN=AVG(DATA)
XMED=MED(DATA)
IF XMED LT XMEAN THEN BEGIN  ; tail is on the right
   RIGHT=1
   Q=WHERE(X LE MODE,NB)
   IF NB LT 20 THEN BEGIN ; we want at least 20 bins.
      DX=DX*NB/20.
      NB=20
   ENDIF
   TX=FLTARR(NB)  &  TY=FLTARR(NB)     
   X2=MODE  
   FOR I=NB-1,0,-1 DO BEGIN
     X1=X2-DX
     Q=WHERE((DATA LE X2) AND (DATA GT X1),COUNT)
     TX(I)=X1+DX/2.  &  TY(I)=COUNT
     X2=X2-DX
   ENDFOR
   M=2*NB
   XX=FLTARR(M)  &  YY=XX
   X1=TX         &  Y1=TY
   Y2=REVERSE(Y1)
   YY=[Y1,Y2]
   XX(0:NB-1)=X1
   FOR I=NB,M-1 DO XX(I)=XX(I-1)+DX
;  Now estimate the width:
   Q=WHERE(DATA LE MODE,COUNT) &  D=DATA(Q)
   S=SORT(D) &  D=D(S)
   I1=long(.2*COUNT+.5)  &  SIG1=(MODE-D(I1))/1.282
   I2=long(.5*COUNT+.5)  &  SIG2=(MODE-D(I2))/0.674
   I3=long(.8*COUNT+.5)  &  SIG3=(MODE-D(I3))/0.253
   SIG=(SIG1+SIG2+SIG3)/3.   
ENDIF ELSE BEGIN          ; tail is on the left
   RIGHT=0
   Q=WHERE(X GE MODE,NB)
   IF NB LT 20 THEN BEGIN
      DX=DX*NB/20.
      NB=20
   ENDIF
   TX=FLTARR(NB)  &  TY=FLTARR(NB)     
   X1=MODE  
   FOR I=0,NB-1 DO BEGIN
     X2=X1+DX
     Q=WHERE((DATA LT X2) AND (DATA GE X1),COUNT)
     TX(I)=X1+DX/2.  &  TY(I)=COUNT
     X1=X1+DX
   ENDFOR
   M=2*NB
   XX=FLTARR(M)  &  YY=XX
   X2=TX         &  Y2=TY
   Y1=REVERSE(Y2)
   YY=[Y1,Y2]
   XX(NB:M-1)=X2
   FOR I=NB-1,0,-1 DO XX(I)=XX(I+1)-DX
   Q=WHERE(DATA GE MODE,COUNT) &  D=DATA(Q)
   S=SORT(D) &  D=D(S)
   I1=long(.2*COUNT+.5)  &   SIG1=-(MODE-D(I1))/1.282
   I2=long(.5*COUNT+.5)  &   SIG2=-(MODE-D(I2))/0.674
   I3=long(.8*COUNT+.5)  &   SIG3=-(MODE-D(I3))/0.253
   SIG=(SIG1+SIG2+SIG3)/3.   
ENDELSE

; Now fit:
PARM0=[zMAX,MODE,SIG]
PARM=parm0 
YFIT=FITAGAUSS(XX,YY,PARM)
IF ABS(PARM(0)-ZMAX)/ZMAX GT .1 THEN BEGIN
   PRINT,'HALFAGAUSS: Probable Error in Gaussian Fit. Using Estimate Instead'
   PARM=PARM0
   NOFIT=1
ENDIF ELSE NOFIT=0

YFIT=PARM(0)*EXP(-(X-PARM(1))^2/(2.*PARM(2)^2))

; Plot if so desired:
IF KEYWORD_SET(NODOIT) THEN RETURN,YFIT

WINDOW=(N/10) 
IF WINDOW MOD 2 EQ 0 THEN WINDOW=WINDOW+1 
Z=LOWESS(X,Y-YFIT,WINDOW,2) ; smooth the residuals
z=z>0.
IF KEYWORD_SET(SUB) THEN ST=SUB ELSE ST=' '
PLOT,X,Y,YRAN=[MIN(Y),MAX(Y)*1.15],PSYM=10,SUBTI=ST   ; plot histogram
OPLOT,X,YFIT,line=2         ; over-plot Gaussian
OPLOT,X,Z,line=2            ; over-plot residuals
MU=STRING(PARM(1),'(F10.5)'); annotate
SI=STRING(PARM(2),'(F9.5)')
LABL='Gaussian Mn, Width='+mu+' '+si
IF NOFIT EQ 1 THEN LABL='*'+LABL

x1=x(0)
dy=(!y.crange(1)-!y.crange(0))/20. 
y1=!y.crange(1)-dy
XYOUTS,X1,Y1,LABL,CHARSIZE=.9

TOTGAUSS=TOTAL(YY)
TOT=N_ELEMENTS(DATA)
CTOT=STRING(TOT,'(I7)')
CGAU=STRING(TOTGAUSS,'(I7)')
LABL='# Total, # in Gaussian='+ctot+' '+cgau
XYOUTS,X1,Y1-2*dy,LABL,CHARSIZE=.9
RETURN,YFIT
END