Viewing contents of file '../idllib/astron/contrib/freudenreich/histogauss.pro'
PRO HISTOGAUSS,SAMPLE,A,XX,YY,GX,GY,NOPLOT=WHATEVER,NOFIT=SIMPL,CHARSIZE=CSIZE
;
;+
;NAME:
;	HISTOGAUSS
;
; PURPOSE:
;	Histograms data and overlays it with a Gaussian. Draws the mean, sigma,
;	and number of points on the plot.
;
; CALLING SEQUENCE:
;	HISTOGAUSS, Sample, A, XX, YY, GX, GY, [/NOPLOT, /NOFIT, CHARSIZE = ]
;
; INPUT:
;	SAMPLE = Vector to be histogrammed
;
; OUTPUT ARGUMENTS:
;	A = coefficients of the Gaussian fit: Height, mean, sigma
;		A(0)= the height of the Gaussian
;		A(1)= the mean
;		A(2)= the standard deviation
;		A(3)= the half-width of the 95% conf. interval 
;		A(4)= 1/(N-1)*total( (y-mean)/sigma)^2 ) = a measure of 
;			normality
;
;	Below: superceded. The formula is not entirely reliable.
;	A(4)= measure of the normality of the distribution. =1.0, perfectly
;       normal. If no more than a few hundred points are input, there are
;       formulae for the 90 and 95% confidence intervals of this quantity:
;       M=ALOG10(N-1) ; N = number of points
;       T90=ABS(.6376-1.1535*M+.1266*M^2)  ; = 90% confidence interval
;       IF N LT 50 THEN T95=ABS(-1.9065-2.5465*M+.5652*M^2) $
;                  ELSE T95=ABS( 0.7824-1.1021*M+.1021*M^2)   ;95% conf.
;       (From Martinez, J. and Iglewicz, I., 1981, Biometrika, 68, 331-333.)
;
;	XX = the x coordinates of the histogram bins (CENTER)
;	YY = the y coordinates of the histogram bins
;
; OPTIONAL INPUT KEYWORDS:
;	NOPLOT - If set, nothing is drawn
;	FITIT   If set, a Gaussian is actually fitted to the distribution.
;		By default, a Gaussian with the same mean and sigma is drawn; 
;		the height is the only free parameter.
;	CHARSIZE Size of the characters in the annotation. Default = 0.82.
;
; SUBROUTINE CALLS:
;	BIWEIGHT_MEAN, which determines the mean and std. dev.
;	AUTOHIST, which draws the histogram
;	FITAGAUSS, which does just that
;	MED, which calculates a median (called by AUTOHIST)
;
; REVISION HISTORY:
;	Written, H. Freudenreich, STX, 12/89
;	Modified for IDL Version 2, 1/91, HF
;	More quantities returned in A, 2/94, HF
;	Added NOPLOT keyword and print if Gaussian, 3/94
;	Stopped printing confidence limits on normality 3/31/94 HF
;	Added CHARSIZE keyword, changed annotation format, 8/94 HF
;	Simplified calculation of Gaussian height, 5/95 HF
;-

on_error,2

DATA=SAMPLE
N=N_ELEMENTS(DATA)

; First make sure that not everything is in the same bin. If most
; data = 0, reject zeroes. If they = some other value, complain and
; give up.
A=0.
DATA=DATA(SORT(DATA))  
N3=.75*N & N1=.25*N
IF DATA(N3) EQ DATA(N1) THEN BEGIN
   IF DATA(N/2) EQ 0. THEN BEGIN
      Q=WHERE(DATA NE 0.,NON0)
      IF (N-NON0) GT 15 THEN BEGIN
         PRINT,'AUTOHIST: Suppressing Zeroes!'
         DATA=DATA(Q)
         N=NON0
      ENDIF ELSE BEGIN 
         PRINT,'AUTOHIST: Too Few Non-0 Values!'
         RETURN
      ENDELSE
      Q=0
   ENDIF ELSE BEGIN
      PRINT,'AUTOHIST:  Too Many Identical Values: ',DATA(N/2)
      RETURN
   ENDELSE
ENDIF

A=FLTARR(5) 

; The "mean":
A(1)=BIWEIGHT_MEAN(DATA,S)
; The "standard deviation":
A(2)=S  
; The 95% confidence interval:
M=.7*(N-1)  ;appropriate for a biweighted mean
A(3)=ABS( STUDENT_T(.95,M) )*S/sqrt(n)

; A measure of the Gausianness:
A(4)=TOTAL((DATA-A(1))^2)/((N-1)*A(2)^2)
;Q=WHERE( ABS(DATA-A(1)) LT (5.*S), COUNT )   ; "robust I" unreliable
;ROB_I=TOTAL((DATA(Q)-A(1))^2)/((COUNT-1)*A(2)^2)
;PRINT,A(4),ROB_I

; Set bounds on the data:
U1=A(1)-5.*A(2)
U2=A(1)+5.*A(2)
Q=WHERE(DATA LT U1)
IF Q(0) GE 0 THEN DATA(Q) = U1
Q=WHERE(DATA GT U2)
IF Q(0) GE 0 THEN DATA(Q) = U2

; Draw the histogram
IF KEYWORD_SET(WHATEVER) THEN AUTOHIST,DATA,X,Y,XX,YY,/NOPLOT $
                         ELSE AUTOHIST,DATA,X,Y,XX,YY
; Check for error in AUTOHIST:
M =N_ELEMENTS(X)
MM=N_ELEMENTS(XX)
IF M LT 2 THEN BEGIN
   XX=0. & YY=0. & A=0.
   RETURN ; (AUTOHIST has already screamed)
ENDIF

; Calculate the height of the Gaussian:
Z = EXP(-.5*(X-A(1))^2/A(2)^2 )
XQ1=A(1)-1.3*A(2)
XQ2=A(1)+1.3*A(2)
QQ=WHERE((X GT XQ1) AND (X LT XQ2),COUNT)
IF COUNT GT 0 THEN HYTE=MED(Y(QQ)/Z(QQ)) ELSE BEGIN
   print,'HISTOGAUSS: Distribution too Weird!'
   HYTE=MAX(SMOOTH(Y,5))
ENDELSE
A(0)=HYTE

; Fit a Gaussian, unless the /NOFIT qualifier is present
IF NOT KEYWORD_SET(SIMPL) THEN BEGIN
   PARM=A(0:2)
   YFIT=FITAGAUSS(XX,YY,PARM)
   A(0:2)=PARM
ENDIF

; It the /NOPLOT qualifier is present, we're done.
IF KEYWORD_SET(WHATEVER) THEN RETURN

; Overplot the Gaussian, 
GX=FLTARR(200)
DU=(U2-U1)/199.
GX(0)=U1  &  FOR I=1,199 DO GX(I)=GX(I-1)+DU
Z=(GX-A(1))/A(2)
GY=A(0)*EXP(-Z^2/2. )
OPLOT,GX,GY

; Annotate. 
MEANST=STRING(A(1),'(F9.4)')
SIGST =STRING(A(2),'(F9.4)')
NUM=N_ELEMENTS(DATA)
NUMST =STRING(N,'(I6)')

IF KEYWORD_SET(CSIZE) THEN ANNOT=CSIZE ELSE ANNOT=.82
LABL='#, !7l!6, !7r!3='+numst+','+meanst+','+sigst 
X1=!x.crange(0)+(!x.crange(1)-!x.crange(0))/20. 
y1=!y.crange(1)-(!y.crange(1)-!y.crange(0))/23. 
XYOUTS,X1,Y1,LABL,CHARSIZE=ANNOT

;---commented out test on normality. not entirely reliable.
;; How about the normality of the distribution?
;IF (N LE 500) AND (A(4) LT 3.) THEN BEGIN
;   M=ALOG10(N-1) ; N = number of points
;   IF N LT 50 THEN T95=ABS(-1.9065-2.5465*M+.5652*M^2) $
;              ELSE T95=ABS( 0.7824-1.1021*M+.1021*M^2)   ;=95% conf.
;;   IF A(4) LT T95 THEN BEGIN
;;      Y4=.92*Y3
;;      XYOUTS,X1,Y4,'>95% Gauss',CHARSIZE=ANNOT
;;   ENDIF ELSE BEGIN
;;      T90=ABS(.6376-1.1535*M+.1266*M^2)  ; = 90% confidence interval
;;      IF A(4) LT T90 THEN XYOUTS,X1,.92*Y3,'>90% Gauss',CHARSIZE=ANNOT
;;   ENDELSE
;ENDIF

RETURN
END