Viewing contents of file '../idllib/contrib/harris/totalreg.pro'
FUNCTION totalreg,X,Y,M, YFIT = yfit, WEIGHT = weight, CHISQ = chisq, $
	SINGULAR = sing, VARIANCE = var, COVAR = covar, FUNCT = funct, $
	LAMBDA = lambda, MINIMISE = minimise, DIRECTION = direction
;+
; NAME:
;	TOTALREG
;
; PURPOSE:
;	Perform a general total least squares fit with optional error estimates
;
;	Based on the USER routine SVDFIT and accepts/outputs all the 
;	same parameters. As for SVDFIT, a user-supplied function or a built-in
;	polynomial is fit to the data.
; 
;	NOTE: Does not use SVD to solve the set of linear equations, rather it
;	uses matrix multiplication of the normal equations.
;
;	Total regression minimises the distance from each x,y point to the 
;	normal to the fitted curve, rather than the y distance. 
;	Solution based on "Matrix Computations" by G.H. Golub and C.F. Van Loan
;
; CATEGORY:
;	Curve fitting.
;
; CALLING SEQUENCE:
;	Result = TOTALREG(X, Y, M)
;
; INPUTS:
;	X:	"Independent" variable vector.  
;
;	Y:	"Dependent" variable vector. This vector should be same length 
;		as X.
;
;	M:	The number of coefficients in the fitting function.  For 
;		polynomials, M is equal to the degree of the polynomial + 1.
;
; OPTIONAL INPUTS:
;	WEIGHT:	A vector of weights for Y(i).  This vector should be the same
;		length as X and Y.
;
;		If this parameter is ommitted, 1 is assumed.  The error for 
;		each term is weighted by Weight(i) when computing the fit.  
;		Frequently, Weight(i) = 1./Sigma(i) where Sigma is the 
;		measurement error or standard deviation of Y(i).
;
;	FUNCT:	A string that contains the name of an optional user-supplied 
;		basis function with M coefficients. If omitted, polynomials
;		are used.
;
;		The function is called:
;			R = FUNCT(X,M)
;		where X is an N element vector, and the function value is an 
;		(N, M) array of the N inputs, evaluated with the M basis 
;		functions.  M is analogous to the degree of the polynomial +1 
;		if the basis function is polynomials.  For example, see the 
;		function COSINES, in the IDL User Library, which returns a 
;		basis function of:
;			R(i,j) = cos(j*x(i)).
;		For more examples, see Numerical Recipes, page 519.
;
;		The basis function for polynomials, is R(i,j) = x(i)^j.
;
;	LAMBDA: the coupling factor that defines the relative distribution of 
;		error between X and Y.
;
;     MINIMISE:	if set then Lambda will be iterated to minimise the Chi-square,
;		If Lambda passed then this will be its starting value,
;		otherwise a value of 1 is used.
;
;    DIRECTION: The initial direction for the change in lambda (only used if
;		MINIMISE is set)
;		
; OUTPUTS:
;	TOTALREG returns a vector of M coefficients.
;
; OPTIONAL OUTPUT PARAMETERS:
;
;	YFIT:	Vector of calculated Y's.
;
;	CHISQ:	Sum of squared errors (normal to the curve) multiplied by 
;		weights if weights are specified.
;
;	COVAR:	Var-Covariance matrix of the coefficients.
;
;    VARIANCE:	Sigma squared in estimate of each coeff(M).
;
;    SINGULAR:	The number of singular values returned.  This value should
;		be 0.  If not, the basis functions do not accurately
;		characterize the data.

;     LAMBDA:	The coupling factor that defines the relative distribution of 
;		error between A and b.
;
; COMMON BLOCKS:
;	None.
;
; SIDE EFFECTS:
;	
;
; MODIFICATION HISTORY:
;	SVDFIT was adapted from SVDFIT, from the book Numerical Recipes, Press,
;	et. al., Page 518.
;	minor error corrected April, 1992 (J.Murthy) 
;	Total Regression solution based on the relevent sections of the book
;	"Matrix Computations" by G.H. Golub and C.F. Van Loan
;	December, 1992 T.J.Harris
;
;-
	ON_ERROR,2		;RETURN TO CALLER IF ERROR
;	set variables
	THRESH = 1.0E-9		;Threshold used in editing singular values


; 	lambda is a coupling factor that defines the relative distribution of 
;	error between A and b
	if (not keyword_set(lambda)) then lambda = 1	 ;equally distributed

	XX = X*1.		;BE SURE X IS FLOATING OR DOUBLE
	YY = Y*1.		;BE SURE Y IS FLOATING OR DOUBLE
	N = N_ELEMENTS(XX) 	;SIZE
	IF N NE N_ELEMENTS(YY) THEN BEGIN ;SAME # OF DATA POINTS.
	  message, 'X and Y must have same # of elements.'
	  ENDIF

	sdx = stdev(XX,meanx)	&	sdy = stdev(YY,meany)
	scale = max(abs([XX,YY]))
	XX = XX/scale		&	YY = YY/scale 	;scale the data

	if n_elements(weight) ne 0 then begin
		if n_elements(weight) ne n then begin
		  message, 'Weights have wrong number of elements.'
		  endif
	  b = YY * weight	;Apply weights
	  endif else b = YY	;No weights
;

	; define coeff matrix, a, and the scaling, s
	if n_elements(funct) eq 0 then begin ;Use polynomial?
	  a = fltarr(n,m) & scl = fltarr(m,m)	;COEFF and scaling MATRICES
	  if n_elements(weight) ne 0 then tmp = float(weight) $
	  else tmp = replicate(1.,n)	;Weights are 1.
	  stmp = 1.
	  for i=0,m-1 do begin		;Make design matrix
		a(0,i) = tmp	&	scl(i,i) = stmp
		tmp = tmp * XX	&	stmp = stmp /scale
		endfor
	endif else begin		;Call user's function
		z = execute('a='+funct+'(XX,m)')
		if z ne 1 then begin
			message, 'Error calling user fcn: ' + funct
			endif
		if n_elements(weight) ne 0 then $
		  a = a * (weight # replicate(1.,m)) ;apply wts to A
	endelse

	svd,a,w,u,v			;Do the svd for a

	keep_w = double(w)

	w = keep_w
	ab = fltarr(n,m+1)	& ab(0,0) = a	& ab(*,m) = lambda*b
	svd,ab,wb			;Do the svd for a:b 
	;(lambda is a coupling factor that defines the relative distribution 
	;of error between A and b)

	;find the minimum singular values	
	; note that w is an n elements array of the singular values for A, 
	; which equals the diagonals of the analytic SVD array W
	mins = min(w)	&	minsn = min(wb)

	if (mins lt minsn) then begin
		message, 'Total least squares solution does not exist',/cont
	        covar = fltarr(m,m)
		var = fltarr(m) 
		coeff = fltarr(m)
		yfit = YY*0.0
		chisq = 99999.
		return,coeff
	endif

	good = where(w gt (max(w) * thresh), ng) ;Cutoff for sing values
	sing = m - ng		;# of singular values
	if sing ne 0 then begin
		message, 'Warning:' + strcompress(sing, /REMOVE) + $
			' singular values found.'
;modified J.M.
small=where(w le max(w)*thresh)
w(Small)=0
;
		if ng eq 0 then return,undefined
		endif
;modified J.M

; Solve the equation AX = b 
;
; This is the normal solution for linear least squares, 
; assuming all the error lies in the b (the y deviations are minimised)
;	X = inverse(transpose(A)#A) # transpose(A) # b
; or	X = V # [(1/w)#I] # (transpose(U) # b)	
; note that w is an n elements array of the singular values for A, 
; which equals the diagonals of the SVD array W
;
; can solve the equation AX = b by backsubstitution of the SVD arrays
;;;;;;;      svbksb,u,w,v,b,coeff
; for total regression we assume that the error is  distributed between the 
; A and b, thus must minimise the deviations normal to the curve
;	X = inverse(transpose(A)#A - minsn^2#I) # transpose(A) # b
; or	X = V # [1./(w*w - minsn*minsn)]*I # transpose(w#I) # (transpose(U)#b)

	; Use the SVD variables since we already have them.....
	;;j = dindgen(n)	& I = dblarr(n,n)	& I(j,j) = 1. ;identity matrix
	;;j = j*0+1	;make it all ones
	;;uu = i*0	&	uu(0,0) = u
	;;new_inv_w = 1./(w*w - minsn*minsn)#j*I(0:m-1,0:m-1) # transpose(j#w*I)
	;;coeff = v # new_inv_w # (transpose(uu) # b)
	;
	; ...OR...
	; simply use the normal equations for solutions
	j = dindgen(m)	& I = dblarr(m,m)	& I(j,j) = 1. ;identity matrix
	covar = invert(transpose(a)#a - minsn^2*I)
	coeff = covar # (transpose(a)#b)

	wt = fltarr(m)
	wt(good)=1./w(good)
	;;;NO.. wt(good)=new_inv_w(good)	;I think ??

	if n_elements(funct) eq 0 then yfit = poly(XX,coeff) $
	else begin 
		yfit = fltarr(n)
		for i=0,m-1 do yfit = yfit + coeff(i) * a(*,i) $
			/ weight	;corrected J.M.
	endelse

	;Compute first chisq
	chisq = (YY - yfit)
	if n_elements(weight) ne 0 then chisq = chisq * weight
	chisq = total(chisq^2) $
				/(1+transpose(coeff(1:*))#coeff(1:*)) 
				;/(1+transpose(coeff)#coeff)  
				;extra factor for the perp distance 
	chisq = chisq(0)	;ensure it is a scalar
	goodcoeff = coeff
	goodyfit = yfit

	if (keyword_set(minimise)) then loops = 0 else loops = 999
	cchisq = 0
	lastchisq = chisq
	inc = 2.
	if (not keyword_set(direction)) then direction = -1
	;lambda = lambda + inc
	lambdan = lambda 
	lambdad = 1.

	;;resetplt,/all
	;;plot,XX,YY,psym=8

	pdiff = (cchisq - chisq)/chisq

WHILE ((abs(pdiff) ge 0.00) and (loops lt 60)) DO BEGIN

	print,loops,lambda,chisq,cchisq
	if (direction gt 0) then lambdan = lambdan * inc $
	else lambdad = lambdad * inc
	lambda = lambdan / lambdad
	oplot,XX,yfit

	w = keep_w

	ab = fltarr(n,m+1)	& ab(0,0) = a	& ab(*,m) = lambda*b
	svd,ab,wb			;Do the svd for a:b 
	;(lambda is a coupling factor that defines the relative distribution 
	;of error between A and b)

	;find the minimum singular values	
	; note that w is an n elements array of the singular values for A, 
	; which equals the diagonals of the analytic SVD array W
	mins = min(w)	&	minsn = min(wb)

	if (mins lt minsn) then begin
		loops = 999
		goto, endd
		endif

	good = where(w gt (max(w) * thresh), ng) ;Cutoff for sing values
	sing = m - ng		;# of singular values
	if sing ne 0 then begin
		message, 'Warning:' + strcompress(sing, /REMOVE) + $
			' singular values found.'
		small=where(w le max(w)*thresh)
		w(Small)=0
		if ng eq 0 then return,undefined
		endif

	; Use the SVD variables since we already have them.....
	;;j = dindgen(n)	& I = dblarr(n,n)	& I(j,j) = 1. ;identity matrix
	;;j = j*0+1	;make it all ones
	;;uu = i*0	&	uu(0,0) = u
	;;new_inv_w = 1./(w*w - minsn*minsn)#j*I(0:m-1,0:m-1) # transpose(j#w*I)
	;;coeff = v # new_inv_w # (transpose(uu) # b)
	;
	; ...OR...
	; simply use the normal equations for solutions
	j = dindgen(m)	& I = dblarr(m,m)	& I(j,j) = 1. ;identity matrix
	covar = invert(transpose(a)#a - minsn^2*I)
	coeff = covar # (transpose(a)#b)

	wt = fltarr(m)
	wt(good)=1./w(good)

	if n_elements(funct) eq 0 then yfit = poly(XX,coeff) $
	else begin 
		yfit = fltarr(n)
		for i=0,m-1 do yfit = yfit + coeff(i) * a(*,i) $
			/ weight	;corrected J.M.
	endelse

	;Compute chisq
	cchisq = (YY - yfit)
	if n_elements(weight) ne 0 then cchisq = cchisq * weight
	cchisq = total(cchisq^2) $
				/(1+transpose(coeff(1:*))#coeff(1:*)) 
				; /(1+transpose(coeff)#coeff) 
				;extra factor for the perp distance 
	cchisq = cchisq(0)	;ensure it is a scalar

	pdiff = (cchisq - chisq)/chisq

	if (pdiff gt 0.05) then begin
		direction = -direction		;change the direction
	endif else begin
		chisq = lastchisq
		lastchisq = cchisq
		goodcoeff = coeff
		goodyfit = yfit
	endelse
	pdiff = (cchisq - chisq)/chisq
	loops = loops + 1
	print,loops,lambda,chisq,cchisq,direction

endd:
ENDWHILE

	coeff = goodcoeff
	yfit = goodyfit 

	wt = wt*wt		;Use squared w


	;UNDO THE EFFECTS OF SCALING THE DATA

	covar = invert((1./scl)#(transpose(a)#a - minsn^2*I)#(1./scl))
	; this is an un-normalised VAR-COV matrix for the fitted y values

	var = fltarr(m)         &       j = indgen(m)
        var(j) = covar(j,j)	;*sdy^2	;or not ?

	coeff = scl#coeff*scale
	yfit = yfit*scale

	;Compute the correct chisq
	chisq = (YY*scale - yfit)
	if n_elements(weight) ne 0 then chisq = chisq * weight
	chisq = total(chisq^2) $
				/(1+transpose(coeff(1:*))#coeff(1:*)) 
				;/(1+transpose(coeff)#coeff)  
				;extra factor for the perp distance 
	chisq = chisq(0)	;ensure it is a scalar

	return,coeff
end