Viewing contents of file '../idllib/user_contrib/habbersett/nn_learn.pro'

;
;******************************* COPYRIGHT 1992 ********************************
;		The Regents of the University of California::
;	This software was produced under a U.S. Government contract
;	(w-7405-eng-36) by the Los Alamos National Laboratory, which is
;	operated by the University of California for the United States
;	Department of Energy. The U.S. Government is licensed to use,
;	reproduce, and distribute this software. Permission is granted
;	to the public to copy and use this software without charge,
;	provided that this notice and any statement of authorship are
;	reproduced on all copies. Neither the Government nor the
;	University makes any warranty, express or implied, or assumes
;	any liability or responsibility for the use of this software.
;*******************************************************************************
;+
; NAME:		nn_learn.pro
;
; PURPOSE:	Learning step to cluster data using neural network techniques.
;
; CATEGORY:	Data processing - cluster analysis
;
; CALLING SEQUENCE:	weights = nn_learn(data,max_val,[BLR=blr],[ELR=elr],
;	[N_EVENT=n_event],[N_PASS=n_pass],[N_PARAM=n_param],[N_CLUST=n_clust]
;
; INPUTS:	data = data set to be clustered, max_val = maximum range of each
;		parameter in the data set (to normalize the weights).
;
; OPTIONAL INPUT PARAMETERS:
;	blr = begining learning rate, elr = ending learning rate
;	n_pass = # of iterations of the learning pass
;	n_param = # of parameters in data set - 1st dimension of data set
;	n_event = # of events in learning set - 2nd dimension of data set
;	n_clust = # of clusters to find - arbitrary (?)
;
; KEYWORD PARAMETERS:	none
;
; OUTPUTS:	result = an array of weights describing the cluster centers.
;
; OPTIONAL OUTPUT PARAMETERS:	none
;
; COMMON BLOCKS:	none
;
; SIDE EFFECTS: This approach has an inherent weakness in that it must be
;		set to find a specific number of clusters; It will find
;		that number of clusters in the data set - regardless.
;
; RESTRICTIONS:	This routine has not been rigorously tested on different types
;		of data. It "appears" to work on flow cytometry data.
;
; PROCEDURE:	A limited subset of a larger data set is presented to this
;		routine as a training set to condition the neural network. The
;		result is a set of weights which describe the centers of the
;		resolved clusters.
;
; MODIFICATION HISTORY:	Initial algorithm: Mark Naiver (Univ of Texas - Austin)
;	Date last modified ==>  1 March 93 : RCH [LANL]
;	Contact: Robb Habbersett (505/667-0296 or robb@big-geek.lanl.gov)
;-
;-------------------------------------------------------------------------------
  function NN_LEARN,data,max_val,blr=blr,elr=elr,n_pass=n_pass,n_param=n_param,$
		    n_clust=n_clust,n_event=n_event

	if n_elements(data) eq 0 then begin	;do some error checking
	  print,'No data received!?!'
	  return,-1
	  endif

	d_dimens = size(data)			;check data array dimensions
	if d_dimens(0) ne 2 then begin
	  print,'Data array must have two dimensions!'
	  return,-1
	  endif

;	check on optional parameters
	if n_elements(n_param) eq 0 then n_param = d_dimens(1)
	if n_elements(n_event) eq 0 then n_event = d_dimens(2)
	if n_elements(n_clust) eq 0 then n_clust = n_param +1
	if n_elements(n_pass) eq 0 then n_pass = 50
	if n_elements(blr) eq 0 then blr = 0.5
	if n_elements(elr) eq 0 then elr = 0.1
	if n_elements(max_val) ne n_param then begin
	  print,'Max_val doesn't fit data array!'
	  return, -1
	  endif

	learn_rate = blr			;initial 'learning rate'
	delta_learn = (blr - elr)/n_pass
	one_row = replicate(1.,1,n_param)	;these arrays permit matrix...
	one_col = replicate(1.,1,n_clust)	;multiplication instead of loops
	weights = randomu(s,n_param,n_clust)	;random cluster weights

	count = intarr(n_clust) ;Keep track of how often each cluster "wins"

	if widget_info(/active) eq 1 then widget_control,/hourglass else $
	  print,'Clustering learning set - please wait!'

	for i=0, n_clust-1 do $		; Normalize the weights
	  weights(*,i) = (weights(*,i)/total(weights(*,i))) * max_val

	for j=1, n_pass do begin
	  lse_error = 0.
	  for event = 0, n_event-1 do begin
;	    vector from current event to all cluster centers(weights)
	    vector = data(*,event) # one_col - weights
;	    Calculate distance from current event to all cluster centers
	    distance = one_row # abs(vector)
;	    Winner is the cluster that is closest to the event
    	    winner = where(distance eq min(distance))
;	    Update the count for each cluster
	    count(winner) = count(winner) + 1
;	    Update the error value. (Distance squared)
	    lse_error = lse_error + distance(winner) ^ 2.
;	    Do not update ALL weights, just the winner's weights
	    weights(*,winner)=learn_rate * vector(*,winner) + weights(*,winner)
	    endfor			;endfor each event
	  learn_rate = learn_rate - delta_learn		;Update learning rate
	  endfor			;endfor each pass

	print,'Cluster counts:' & print, count
	return, weights
	end					;end NN_LEARN
;