OpenVMS Source Code Demos

ICSIS_HEALTH_CHECK

1000	%title "ICSIS_Health_Check_xxx.bas"
	%ident			     "Version_121.2"				! <<<---+---
	declare string constant k_version$ = "121.2"			,	! <<<---+					&
				k_program$ = "ICSIS_Health_Check"	,	!						&
				k_facility$= "Health Check"			!
	!=========================================================================================================================
	! Title  : ICSIS_Health_Check_xxx.bas
	! Author : Neil S. Rieck (https://neilrieck.net/)
	! Purpose: To make sure certain tasks and batch jobs are present
	! Notes  : This program is run in several modes based upon logical: ICSIS$HEALTH_CHECK_MODE
	!		BATCH       - runs continually as a detached process (think: TRON)
	!		BRIEF       - run once from system admin's login.com
	!		DEBUG       - run once with extensive logging
	!		INTERACTIVE - run once from MANAGER's MENU
	!	 : NODE is extracted from lib$getsyi SYI$_NODENAME
	!	 : HOST is extracted from logical TCPIP$INET_HOST
	!	 : Our production HOST must always be KAWC96 so our partners can find us.
	!	   So use HOST=NOST to test for all things which must be present on PROD
	!
	! Sample Config:
	!!	!================================================================
	!!	! sample config file
	!!	! supported SCOPE: DESTINATION, PROCESS, BATCHJOB
	!!	! supported NOUN : NODE, HOST
	!!	! supported ITEM : process names, batch job names, DECnet address
	!!	! notes:
	!!	! 1. the order of SCOPE and HOST/NODE is not important
	!!	! 2. Our production HOST must always be KAWC96 so our partners can find us.
	!!	!    So use NOUN=HOST to test for all things which must be present on PROD
	!!	! ---------------------------------------------------------------
	!	[ SCOPE=DESTINATION ]			<<<--- should appear zero or one times
	!	[ NODE=KAWC09,KAWC99 ]			<<<--- DECnet/SCSNAME node list for this scope
	!	14.950::				<<<--- means send alarms here
	!!	! -----------------------------------
	!	[ SCOPE=PROCESS ]			<<<--- prep to check for processes
	!	[ HOST=KAWC96 ]				<<<--- means TCP/IP host: kawc96.on.bell.ca
	!	ESPP_SERVER
	!!	! -----------------------------------
	!	[ SCOPE=BATCHJOB ]			<<<--- prep to check for batch jobs
	!	DAR_PIXI
	!	DAR_MONTHLY
	!!	! -----------------------------------
	!	[ NODE=KAWC96,KAWC09,KAWC90,KAWC09 ]	<<<-- means DECnet node: {list}
	!	[ SCOPE=PROCESS ]
	!	APACHE$WWW
	!	[ SCOPE=BATCHJOB ]
	!	ROTATE_APACHE_LOGS
	!	CSMIS_MIDNITE
	!!	!================================================================
	! Sample Output:
	!	============================================================
	!	-i-program     : ICSIS_Health_Check_119.2
	!	-i-starting at : 20150929.184129
	!	-i-current mode: INTERACTIVE
	!	-i-scs node    : KAWC09
	!	-i-tcpip host  : KAWC09
	!	-i-processing registry: csmis$dat:icsis_health_check.dat
	!	-i-alarm host  : LOCAL
	!	-i- 37 OUT OF SCOPE items detected in the health registry
	!	-i-  8 PROCESS      items loaded from the health registry
	!	-i-  4 BATCHJOB     items loaded from the health registry
	!	-i-115 REMARKED     lines found in the health registry
	!	-i- 14 SCOPE        lines found in the health registry
	!	-i-  0 REJECTED     lines found in the health registry
	!	-i-status      : all tests passed
	!	============================================================
	! History:
	!
	! Ver Who When   What
	! --- --- ------ ---------------------------------------------------------------------------------------------------------
	! 100 NSR 021011 1. original program (derived from ICSIS_WATCHDOG_42.BAS)
	! 101 NSR 021016 1. started to add code for QUE-JOB checking
	!         021017 2. finished adding code for QUE-JOB checking
	! 102 NSR 021018 1. started added code for queue health
	! 103 NSR 021019 1. added alarm server feature									bf_103.1
	! 104 NSR 021021 1. added BRIEF mode (to simplify login tests)
	!     NSR 030114 2. formalized the logical names
	!     NSR 030213 3. added support for misc4
	! 105 NSR 030430 1. removed support for the registry_nodes$ array (we now only load items for the current node)
	!		 2. added code to prevent duplicate PROCESS or BATCH names from being loaded
	! 106 NSR 030604 1. added code to ignore print queues								bf_106.1
	!     NSR 030806 2. rewrote most of WCSM_Mbx_Send4_102
	!     NSR 030808 3. minor cleanup including removing 5 second delay after mail box notification
	! 107 NSR 030808 1. added code to detect when a queue has too many jobs in it
	!     NSR 030826 2. changed the queue job limit from 40 to 50
	!     NSR 040121 3. now ignore queue ICSIS$SPOOL_TOOL
	!     NSR 040128 4. expanded mapped string length from 30 to 50							bf_107.4
	! 108 NSR 040219 1. changed to a new type of alarm map								bf_108.1
	!		 2. removed the msg$ slice 'n dice
	!     NSR 060831 3. bug fix in error message (while testing VMS 8.2 on KAWC98)
	! 109 NSR 091016 1. small amount of renovation and annotation
	!		 2. started adding code to write health messages to a running log file
	!     NSR 091018 3. added a few logging lines (to trace a problem)
	!     NSR 091019 4. added support for d108_facility
	! 110 NSR 091019 1. started adding code to support the creation/deletion of logicals
	! 111 NSR 091020 1. more work on logical name creation/deletion
	!		 2. chasing a possible bug (killing WATCH_DOG doesn't always seem to work)
	!		 3. added code to support messages from KAWC99 to KAWC15
	!     NSR 091021 4. bug fixes
	!		 5. added code to send out a healthy message every hour
	!		 6. now differentiate between "too many batch jobs" and "too many batch jobs" times two		bf_111.6
	!		 7. enhanced the start-up messages
	!     NSR 091023 8. tweaked the hourly message
	!		 9. expanded the size of d108_text
	! 112 NSR 091026 1. changes to the hourly-good message
	! 113 NSR 120531 1. changes to the error display (overflowed display for "commented number of lines")
	!     NSR 120822 2. added "ATC" to the alarm list
	!     NSR 120823 3. added mbx_temp_misc to simplify the data-fill
	!		 4. changed mbx_temp_noun from "REPLY" to "HEALTH"
	!     NSR 120824 5. tweaks to learn about problems on node kawc99
	! 114 NSR 120825 1. renovated to only use starlet declarations
	!     NSR 130723 2. introduced a few optimizations
	! 115 NSR 131021 1. expanded a few mbx fields
	!		 2. removed mbx_temp_msg_type
	!     NSR 131030 3. now always open/reopen the file (this helps with log file rotation)
	! 116 NSR 140410 1. added a sanity check (required when the QUEUE MANAGER stops)
	!     NSR 140716 2. bug fix. Needed to upcase the translation of sys$input for kawc96 (I wonder what changed?)
	! 117 NSR 150218 1. annotations while searching for an intermittant bug
	! 118 NSR 150730 1. added a new feature for detecting problems on a new node
	! 119 NSR 151029 1. started adding code for a new alternate noun (HOST)
	!		 2. now determine host name via lib$getsyi
	!     NSR 151030 3. more work (good diversion just before a vacation)
	!		 4. ripped out LOGICAL set/clear
	! 120 NSR 160215 5. updated for post-cutover to Itanium (relaced node with host in some logic)
	! 121 NSR 160216 1. renovations for this brave new world :-)
	!		 2. now get alarm destination from the config file
	!=========================================================================================================================
    %let %alarmmbx=1								! enable mailbox messages
	option type = explicit							! cuz tricks are for kids
	set no prompt								!
	!
	on error goto trap							! "old school" error trapping
	!
	!	vms declarations
	!
	%include "starlet"      %from %library "sys$library:basic$starlet"	! system services
        %include "$ssdef"       %from %library "sys$library:basic$starlet"	! ss$
	%include "$syidef"      %from %library "sys$library:basic$starlet"	! syi$
	%include "$quidef"      %from %library "sys$library:basic$starlet"	! qui$
	%include "$jpidef"      %from %library "sys$library:basic$starlet"	! jpi$
	%include "$jbcmsgdef"   %from %library "sys$library:basic$starlet"	! jbc$
	%include "$iledef"      %from %library "sys$library:basic$starlet"	! ile3$ (Item List Entry 3 structures)
!~~~	%include "$iosbdef"     %from %library "sys$library:basic$starlet"	x iosb$ (iosb structures)
	%include "lib$routines"	%from %library "sys$library:basic$starlet"	! lib$
	%include "$libdef"	%from %library "sys$library:basic$starlet"	! eg. lib$_normal
	!
	!	I need this iosb to get around a limitation in the BASIC version of starlet
	!
	!	question : How did I know?
	!	answer   : Hacking
	!	reference: https://neilrieck.net/docs/openvms_notes_hacking_starlet.html
	!
	record my_iosb								!
	    variant								!
		case								! vanilla
		    group one							!
			word		iosb$w_status				!
			word		iosb$w_bcnt				!
			long		iosb$l_dev_depend			!
		    end group							!
		case								! used in sys$getqui
		    group two							!
			long		iosb$l_getxxi_status			!
			long		iosb$l_reserved				!
		    end group							!
		case								! used to satisfy the compiler
		    group three							!
			basic$quadword	iosb$quad				!
		    end group							!
	    end variant								!
	end record								!
	!
	!	declare home brewed code
	!
	external string function formatted_dt_stamp14				!
	external string function WCSM_dt_stamp16				!
	external string function WCSM_dt_stamp					!
	external string function WCSM_trnlnm(string,string)			!
	external string function return_system_msg(long,long)			!
        external long	function wcsm_calc_diff_in_mins(string, string)		!
    %if %alarmmbx=1 %then							!
	external long   function WCSM_Mbx_Send4_102(string,long,long,string)	!
    %end %if									!
	!
	declare long constant	k_scope_reject		=  -1,			!						&
				k_scope_unused		=   0,			! don't start with 0 (cuz of MAT)		&
				k_scope_process		=   1,			!						&
				k_scope_batchjob	=   2,			!--+						&
				k_scope_last		=   2,			!--+-- last two must match			&
										!						&
				k_msg_text		=   1,			!						&
				k_msg_ident		=   2,			!						&
				k_msg_severity		=   4,			!						&
				k_msg_facilty		=   8,			!						&
				k_msg_all		=  15,			!						&
				k_msg_limited		=  11,			! all minus severity				&
										!						&
				k_job_limit%		=  50,			! for alarm limit				&
				k_array_size%		= 100			! initial/incremental value
	!
	declare string	constant k_registry$	= "csmis$dat:icsis_health_check.dat"
	!
	declare long	rc%						,	! Return Code (system status)			&
			log_level%					,	!						&
			sleep_minutes%					,	!						&
			temp%						,	!						&
			junk%						,	!						&
			local%						,	!						&
			is_node_or_host					,	! switch: node=0, host=1			&
			one_time%					,	!						&
			seed_pid%					,	!						&
			i%						,	!						&
			save_i%						,	!						&
			current_scope%					,	!						&
			qui_context%					,	!						&
			array_size%					,	!						&
			max_scope%					,	!						&
			previous_bogey%					,	!						&
			count_bogey%					,	!						&
			count_process%					,	!						&
			count_batchjob%					,	!						&
			count_reject%					,	!						&
			count_remark%					,	!						&
			count_scopes%					,	!						&
			count_out_of_scope%				,	!						&
			count_duplicate%				,	!						&
			q_state%					,	!						&
			handler_error%					,	!						&
			file108_open%					,	!						&
			log_only%					,	!						&
		string	node_name$					,	!						&
			host_name$					,	!						&
			msg_dest$					,	!						&
			default_node$					,	!						&
			junk$						,	!						&
			healthy_msg_time$				,	!						&
			log_type$					,	!						&
			log_class$					,	!						&
			temp$						,	!						&
			msg$						,	!						&
			temp_que$					,	!						&
			temp_job$					,	!						&
			q_status$					,	!						&
			q_type$						,	!						&
			node_list$					,	!						&
			host_list$					,	! usually only one				&
			pgm_mode$					,	!						&
			my_input$						!
	!
	!===============================================================================
	!	Initialize
	!===============================================================================
2000	margin #0, 132									! don't wrap the log at column 78
	print "============================================================"		!
	print "-i-program     : "+ k_program$ +"_"+ k_version$				!
	print "-i-starting at : "+ formatted_dt_stamp14					!
	!
	pgm_mode$ = WCSM_TrnLnm ("ICSIS$HEALTH_CHECK_MODE","LNM$PROCESS_TABLE")		! process level
	pgm_mode$ = edit$(pgm_mode$, 32+2)						!
	my_input$ = WCSM_TrnLnm ("SYS$INPUT","LNM$PROCESS_TABLE")			! get input device
	pgm_mode$ = "BATCH"	if edit$(my_input$,32) = "NL:"				! set batch mode if detached <<<
	!
	select pgm_mode$								!
	    case "BATCH"								! defined in DCL script that runs this
											! (but can't read logical if detached)
		log_level% = 2								! -i- and -e-
	    case "INTERACTIVE",""							! from MANAGER'S MENU
		pgm_mode$ = "INTERACTIVE"						!
		log_level% = 2								! -i- and -e-
	    case "BRIEF"								! from login.com
		log_level% = 0								! -e-
	    case "DEBUG"								! development mode
		log_level% = 3								! all messages
	    case else									!
		print "-e-logical ICSIS$HEALTH_CHECK_MODE value of "; pgm_mode$; " is not supported"
		pgm_mode$ = "INTERACTIVE"						!
		log_level% = 2								! -i- and -e-
	end select									!
	print "-i-program mode: "; pgm_mode$						!
	print "-i-log_level   :";log_level%
	!----------------------------------------------------------------------------------------------------
	!	<<< discover our system's node name and host name >>>
	!
	rc% = lib$getsyi( syi$_nodename,,node_name$ )					!
	if (rc% and 7%) <> 1 then							! if error...
		print "-e-lib$getsyi:";rc%						!
		goto fini_rc								!
	end if										!
	node_name$ = edit$(node_name$,32+2)						! upcase, no w/s
	print "-i-scs node    : "+ node_name$	if log_level% >= 2			!
	!
	!	all IP stacks from all vendors set this logical
	!
	host_name$ = wcsm_trnlnm("TCPIP$INET_HOST","LNM$SYSTEM_TABLE")			! all stacks set this logical
	host_name$ = edit$(host_name$,32+2)						!
	host_name$ = "BLANK" if host_name$ = ""						!
	print "-i-tcpip host  : "+ host_name$	if log_level% >= 2			!
	!
	select pgm_mode$								!
	    case "BATCH"								! defined in DCL script that runs this
		msg$ = "task: "+ k_program$ +"_"+ k_version$ +" started (from init)"	!
		gosub write_log_only_I							!
	end select									!
	!----------------------------------------------------------------------------------------------------
	!
	!	set up some data arrays to load health registry parameters
	!
	array_size%	= k_array_size%							! set initial starting size
	max_scope%	= k_scope_last							!
	!
	!	reenter here (only when we need a bigger array)
	!
	restart_entry_pt:
	!
	dim	string	items_names$(max_scope%,	array_size%)			! items
	dim	long	items_flags%(max_scope%,	array_size%)			! (found) flags
	!
	dim	string	queue_names$(			array_size%)			!
	dim	string	queue_state$(			array_size%)			!
	dim	long	queue_state%(			array_size%)			!
	dim	long	queue_jobs%(			array_size%)			!
	dim	string	queue_type$(			array_size%)			!
	!
	!	load the health registry data
	!
	when error in									!
	    !
	    !	make sure we don't have duplicate entries for this node
	    !
	    map(no_dups)string d2_noun	= 50						!
	    !
	    open "csmis$dat:health_check_no_duplicate_names.dat" as #2			&
		,access modify								&
		,allow none								&
		,organization indexed							&
		,map no_dups								&
		,primary key d2_noun							&
		,temporary								!
	    !
	    print "-i-processing registry: "+ k_registry$	if log_level% >= 2	!
	    open k_registry$ for input as #1						&
		,access read								&
		,allow modify								!
	    !
	    count_process%	= 0							! init tallies
	    count_batchjob%	= 0							!	''
	    count_reject%	= 0							!	''
	    count_remark%	= 0							!	''
	    count_scopes%	= 0							!	''
	    count_out_of_scope%	= 0							!	''
	    count_duplicate%	= 0							!
	    !
	    !	now process registry data lines (we only do this once)
	    !
	    while 1									!
		linput #1, junk$							! read from file
		junk$ = edit$(junk$,32+2)						! upcase, no white space
		!
		if left$(junk$,1) = "!" then						! if a remarked line...
			count_remark% = count_remark% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[SCOPE=DESTINATION",1)=1 then				!
			current_scope%	= 0						!
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[SCOPE=PROCESS",1)=1 then					! SCOPE=PROCESS
			current_scope%	= k_scope_process				!
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[SCOPE=BATCHJOB",1)=1 then				! SCOPE=BATCHJOB
			current_scope%	= k_scope_batchjob				!
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[SCOPE=",1)=1 then					! SCOPE unsupported
			current_scope% = k_scope_reject					!
			print "-e-error, unsupported directive ";junk$			!
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[NODE=",1)=1 then						! NODE label
			node_list$ = junk$						!
			is_node_or_host = 0						! switch=node
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if pos(junk$,"[HOST=",1)=1 then						! HOST label
			host_list$ = junk$						!
			is_node_or_host = 1						! switch=host
			count_scopes% = count_scopes% + 1				!
			iterate								!
		end if									!
		if left$(junk$,1) = "["	then						! ignore unsupported labels
			print "-e-error, unsupported directive ";junk$			!
			count_reject% = count_reject% + 1				!
			iterate								!
		end if									!
		!
		select current_scope%							!
		    case 0								! only used to set the DESTINATION
			if ((pos(node_list$,node_name$,1) > 0) and (is_node_or_host = 0)) or	&
			   ((pos(host_list$,host_name$,1) > 0) and (is_node_or_host = 1))
			then								!
			    if	(len(junk$) >= 5)				and	&
				(pos(junk$,"::",1) = len(junk$)-1)
			    then							!
				default_node$ = junk$					!
			    else							!
				print "-e-ignoring bad DECnet address: "+ junk$		!
				count_reject% = count_reject% + 1			!
			    end if							!
			else								!
			    count_out_of_scope% = count_out_of_scope% + 1		!
			end if								!
		    case k_scope_process						!
			if ((pos(node_list$,node_name$,1) > 0) and (is_node_or_host = 0)) or	&
			   ((pos(host_list$,host_name$,1) > 0) and (is_node_or_host = 1))
			then								!
			    when error in						! make sure we don't have duplicates
				d2_noun = "Process-"+ junk$				!
				put #2							!
				handler_error% = 0					! cool
			    use								!
				handler_error% = err					! oops
				count_duplicate% = count_duplicate% + 1			!
				print "-e-ignoring duplicate PROCESS entry: "+ junk$	!
			    end when							!
			    iterate if handler_error% <> 0				!
			    !
			    count_process% = count_process% + 1				! prep to insert
			    if count_process%  >= array_size%	then			! if too big
				print "-w-count_process:",count_process%
				print "   array_size   :",array_size%
				print "   so increasing the size"
				array_size% = array_size% + k_array_size%		! then adjust
				goto restart_entry_pt					! and restart
			    end if							!
			    items_names$(current_scope%, count_process% ) = junk$	!
			else								!
			    count_out_of_scope% = count_out_of_scope% + 1		!
			end if								!
		    case k_scope_batchjob						!
			if ((pos(node_list$,node_name$,1) > 0) and (is_node_or_host = 0)) or	&
			   ((pos(host_list$,host_name$,1) > 0) and (is_node_or_host = 1))
			then
			    when error in						! make sure we don't have duplicates
				d2_noun = "Batch-"+ junk$				!
				put #2							!
				handler_error% = 0					! cool
			    use								!
				handler_error% = err					! oops
				count_duplicate% = count_duplicate% + 1			!
				print "-e-ignoring duplicate BATCH entry: "+ junk$	!
			    end when							!
			    iterate if handler_error% <> 0				!
			    !
			    count_batchjob% = count_batchjob% + 1			! prep to insert
			    if count_batchjob%  >= array_size%	then			! if too big
				print "-w-count_batchjob:",count_batchjob%
				print "   array_size    :",array_size%
				print "   so increasing the size"
				array_size% = array_size% + k_array_size%		! then adjust
				goto restart_entry_pt					! and restart
			    end if							!
		 	    items_names$(current_scope%, count_batchjob% ) = junk$	!
			else								!
			    count_out_of_scope% = count_out_of_scope% + 1		!
			end if								!
		    case else								!
			count_reject% = count_reject% + 1				!
			iterate								!
		end select								!
	    next									!
	use										!
	    handler_error% = err							! oops
	end when									!
	!
	select handler_error%								!
	    case 11									!
	    case else									!
		print "-e-error: "+ str$(handler_error%) +" while processing the registry"
		if (count_process%  = 0)	and					&
		   (count_batchjob% = 0)						&
		then
		    print "-e-nothing to test so exiting"				!
		    goto fini								!
		end if									!
	end select									!
	if default_node$ = "" then
	    print "-i-alarm host  : LOCAL"
	else
	    print "-i-alarm host  : ";default_node$
	end if
    if log_level% >= 2 then								!
	!
	print "-i-"+ format$(count_out_of_scope%,"###")+" OUT OF SCOPE items detected in the health registry"
	!
	if count_process% = 0 then
	    print "-w-";
	else
	    print "-i-";
	end if
	print	     format$(count_process%,"###")	+" PROCESS      items loaded from the health registry"
	!
	if count_batchjob% = 0 then
	    print "-w-";
	else
	    print "-i-";
	end if
	print	     format$(count_batchjob%,"###")	+" BATCHJOB     items loaded from the health registry"
	!
	if (count_process% + count_batchjob% ) = 0 then
	    print "-e-                 Danger! Nothing To Do (config problem?)"
	end if
	!
	print "-i-"+ format$(count_remark%,"###")	+" REMARKED     lines found in the health registry"
	!
	print "-i-"+ format$(count_scopes%,"###")	+" SCOPE        lines found in the health registry"
    end if
	!
	if count_reject% = 0 then							!
	    print "-i-  0 REJECTED     lines found in the health registry"			if log_level% >= 2
	else										!
	    print "-e-"+ format$(count_reject%,"###") +" REJECTED     lines found in the health registry"
	end if										!
	close #1,#2									!
	!========================================================================================================================
	!	MAIN
	!========================================================================================================================
	main:										!
	sleep_minutes%	= 0								! init to an illegal value
	count_bogey%	= -1								! init to an illegal value
	log_level%	= -1								! init to an illegal value
3000	while 1										! do this forever -------------------------
	    !
	    !	<<< support logging to the log file >>>
	    !
	    !	0 = errors	(and startup messages)
	    !	1 = warnings
	    !	2 = informationals
	    !	3 = trace	(and codes from iosb)
	    !
	    select pgm_mode$								!
		case "BATCH"								!
		    when error in							!
			junk$ = WCSM_TrnLnm ("ICSIS$HEALTH_CHECK_LOG","LNM$SYSTEM")	!
			temp% = integer(junk$)						!
		    use									!
			print "-e-logical ICSIS$HEALTH_CHECK_LOG has a bad assignment: "+ junk$
			temp% = 3							! assume full logging
		    end when								!
		case "DEBUG"								!
		    print "Debug level menu:"						!
		    print " 0 just errors (and startup messages)"			!
		    print " 1 also warnings      "					!
		    print " 2 also informationals"					!
		    print " 3 also trace + debug"					!
		    input "logging level? (0-3) "; junk$				!
		    when error in							!
			temp% = integer(junk$)						!
		    use									!
			temp% = 0							! assume none
		    end when								!
	        case else								! INTERACTIVE, BRIEF, etc.
		    log_level% = 0							!
		    goto skip_logging_level						! skip COS logic	***--->>>
	    end select									!
	    !
	    select temp%								!
		case 0 to 3								!
		case else								!
		    temp% = 3								!
	    end select									!
	    !
	    if log_level% <> temp% then							!  if this is changing...
		log_level% = temp%							!
		print "-i-Logging level changing to "+ str$(log_level%) +" at "+ formatted_dt_stamp14
	    end if									!
	    skip_logging_level:								!			<<<---***
	    !
	    if log_level% >= 2 and one_time% = 0 then					!
		    one_time% = 1							!
		    print								!
		    print "-i-Process Table (one time)"					!
		    for i% = 1 to count_process%					!
			junk% = len(items_names$(k_scope_process, i%))			!
			print	items_names$(k_scope_process, i%)
		    next i%								!
		    !
		    print								!
		    print "-i-Batchjob Table (one time)"				!
		    for i% = 1 to count_batchjob%					!
			junk% = len(items_names$(k_scope_batchjob, i%))			!
			print	items_names$(k_scope_batchjob, i%)
		    next i%								!
		    print								!
	    end if									!
	    !
	    !	<<< support sleep >>>
	    !
	    when error in								!
		junk$ = WCSM_TrnLnm ("ICSIS$HEALTH_CHECK_SLEEP_MINS","LNM$SYSTEM")	!
		temp% = integer(junk$)							!
		select temp%								!
		    case 1 to 5 							!
		    case else								!
			print "-e-ICSIS$HEALTH_CHECK_SLEEP_MINS range error: "+ junk$	!
			temp% = 5							! default to 5 minutes
		end select								!
	    use										!
		print "-e-logical ICSIS$HEALTH_CHECK_SLEEP_MINS has a bad assignment: "+ junk$
		temp% = 5								! default to 5 minutes
	    end when									!
	    if sleep_minutes% <> temp% then						! if this is changing...
		sleep_minutes% = temp%							! then update sleep
		print "-i-Sleep_Minutes now set to "+ str$(sleep_minutes%) +" at "+ formatted_dt_Stamp14	&
											if log_level% >= 2
	    end if									!
	    !----------------------------------------------------------------------------------------------------
	    mat items_flags%		= zer						! initialize results array
	    mat queue_names$		= nul$						!
	    mat queue_state$		= nul$						!
	    mat queue_state%		= zer						!
	    mat queue_jobs%		= zer						!
	    mat queue_type$		= nul$						!
	    previous_bogey%		= count_bogey%					! save previous value for COS tests
	    count_bogey%		= 0						! init
	    !
	    gosub check_processes							! check processes
	    !
	    gosub check_batchjobs							! check queues
	    !
	    if pgm_mode$ = "BATCH" then							! BATCH
		!
		!	if we are going into an all clear situation, send a "healthy" message
		!
		if count_bogey% = 0 then						! if this pass was good
		    select previous_bogey%						!
			case 0								! previous pass was ok
			    junk$ = wcsm_dt_stamp					! snap shot of time
			    if WCSM_CALC_DIFF_IN_MINS(junk$,healthy_msg_time$) >= 60 then ! if 60 minutes since previous good msg
				healthy_msg_time$ = junk$				!
				msg$ = "state: all tests passed ("+ node_name$ +	&
				    " still appears healthy at: "+ formatted_dt_stamp14 +") - hourly update"
				gosub write_log_only_I					!
				healthy_msg_time$ = wcsm_dt_stamp			! rememebr this time
			    end if							!
			case > 0							! previous pass found problems
			    msg$ = "state: all tests passed ("+ node_name$ +" now appears healthy)"
			    gosub write_log_only_I					!
			    healthy_msg_time$ = wcsm_dt_stamp				! remember this time
			case -1								! first time thru
			    msg$ = "state: all tests passed ("+ node_name$ +" appears healthy on first pass)"
			    gosub write_log_only_I					!
			    healthy_msg_time$ = wcsm_dt_stamp				! remember this time
		    end select								!
		else									!
		    healthy_msg_time$ = ""						! zap this time
		end if									!
		!
		!	OK, sleep for a little while
		!
		print "-i-Starting sleep at "+ formatted_dt_stamp14	if log_level% >= 2
		sleep sleep_minutes% * 60						! convert to seconds
		print "-i-Woke up at "+ formatted_dt_stamp14	if log_level% >= 2	!
	    else									! INTERACTIVE, DEBUG, etc.
		if count_bogey% > 0 then						! if we had an error...
		    if count_bogey% = 1 then						!
			junk$ = ""							!
			temp$ = "was"							!
		    else								!
			junk$ = "s"							!
			temp$ = "were"							!
		    end if								!
		    print "-e-error, "+str$(count_bogey%)+" error"+ junk$ +" "+ temp$+ " detected"
		    input "hit <enter> to continue..."; junk$				! then make him press a key
		else									!
		    print "-i-status           all tests passed"			!
		    sleep 1								!
		end if									!
		goto fini								! adios...
	    end if									! end if INTERACTIVE
	    !
4000	next										! Check whole system again
	!====================================================================================================
	!	scan all the system processes
	!====================================================================================================
5000	check_processes:								!
	print "-i-Starting CHECK_PROCESSES at "+ formatted_dt_stamp14	if log_level% >= 2
	!
	!	stuff to support sys$getjpi
	!
	declare	my_iosb	IosbJpi								! IO Status block (for GetJpiW)
	declare	ile3	JpiLst(1)							! 0-1 items for call to sys$getjpi
	MAP(Jpi)	string	PrcNam	= 15						! process name
	Declare	long	PrcNam_RtnLn							! process name return length
	!
	JpiLst(0)::ile3$w_length	= len( PrcNam	)				!
	JpiLst(0)::ile3$w_code		= JPI$_PRCNAM					! Process Name
	JpiLst(0)::ile3$ps_bufaddr	= LOC( PrcNam		)			!
	JpiLst(0)::ile3$ps_retlen_addr	= LOC( PrcNam_RtnLn	)			!
	!
	JpiLst(1)::ile3$w_length	= 0						!
	JpiLst(1)::ile3$w_code		= JPI$C_ListEnd					! end of list
	!
	Seed_PID%			= -1						! Do a wildcard GETJPI
	print "-i-starting wildcard $GetJpiW" if log_level% >=2 			!
	while 1										! loop until we jump out
	    rc% = SYS$GetJpiW(	,Seed_PID%		by ref,,	&
				JpiLst()		by ref,		&
				IosbJpi::iosb$quad	by ref,,	)		! get process info
	    Select rc%									!
		case SS$_Normal								!
		case SS$_suspended							!
		    iterate								!
		case SS$_NoMoreProc							!
		    print "-i-process scan is complete"	if log_level% >= 2		!
		    goto test_items_flags						!
		case else								!
		    print "-e-GetJpi: "+ str$( rc% )					! print rc	(for debug)
		    goto fini_rc							!
	    end select									!
	    !
	    !	we found a process so now do the associated book keeping
	    !
	    junk$ = left$( PrcNam, PrcNam_RtnLn)					!
	    junk$ = edit$( junk$ , 32)							! upcase
	    !
	    !	locate the current process name in the health registry array (it may not be there)
	    !
	    for i% = 1 to count_process%						!
		if items_names$(k_scope_process, i%) = junk$ then			!
		    items_flags%(k_scope_process, i%) = 1				! show it found
		    goto early_scan_proc_exit						!
		end if									!
	    next i%									!
	    early_scan_proc_exit:							!
	next										!
	!
	!	<<< test process flags >>>
	!
	test_items_flags:								!
	for i% = 1 to count_process%							! scan the health registry list
	    if items_flags%(k_scope_process, i%) <> 1 then				! if this entry was not found...
		print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +						&
			" task: "+ items_names$(k_scope_process, i%) +" is missing"+ bel
		count_bogey% = count_bogey% + 1						!
		if pgm_mode$ = "BATCH" then						!
		    msg$ = "task: "+ items_names$(k_scope_process, i%) +" is missing"	!
		    log_class$ = "P"							! P/rogram
		    gosub send_alarm_error 						!				bf_103.1
		end if									!
	    end if									!
	next i%										!
        return										!
	!====================================================================================================
	!	<<< scan all the batch jobs >>>
	!====================================================================================================
	check_batchjobs:								!
	print "-i-Starting CHECK_BATCHJOBS at "+ formatted_dt_stamp14	if log_level% >= 2
	!
	!	Storage for info returned by GetQui
	!
	MAP(QuiNam)	string	QueName		= 31%					! queue name
	Declare		long	QueName_RtnLn					,	! queue name return length	&
				QueStatus					,	! queue status			&
				QueFlags						! queue flags
	MAP(JobNam)	string	JobNam		= 31%					! job name
	Declare		long	JobNam_RtnLn						! job name return length
	MAP(QuiSrchNam)	string	SearchName	= 31%					!
	Declare		long	job_search_flags%					!
	!
	!	<<< initialize context from previous run >>>
	!
	!	note: don't do an error check here because it would fail if run just after a fresh login
	!	SYS$GETQUI [efn] ,func [,context] [,itmlst] [,iosb] [,astadr] [,astprm]
	!
	qui_context% = 0								!
	rc% = SYS$GetQuiW(	,QUI$_CANCEL_OPERATION	by value, qui_context% by ref,,,,)
	!
	!	<<< search all queues on this system >>>
	!
	declare	my_iosb		IosbQue							! IO Status block (for GetQuiW)
	declare	ile3		QueLst(9)						! 0-9 possible items
	!
	Searchname			= "*"						! wildcard (scan all queues)
	!
	QueLst(0)::ile3$w_length	= len( Searchname	)			!
	QueLst(0)::ile3$w_code		= Qui$_SEARCH_NAME				!
	QueLst(0)::ile3$ps_bufaddr	= LOC( SearchName	)			!
	QueLst(0)::ile3$ps_retlen_addr	= 0						! cuz this is an input parameter
	!
	QueLst(1)::ile3$w_length	= len( QueName 		)			!
	QueLst(1)::ile3$w_code		= Qui$_QUEUE_NAME				! we want to see: Queue Name
	QueLst(1)::ile3$ps_bufaddr	= LOC( QueName		)			!
	QueLst(1)::ile3$ps_retlen_addr	= LOC( QueName_RtnLn	)			!
	!
	QueLst(2)::ile3$w_length	= 4						! 4 bytes
	QueLst(2)::ile3$w_code		= Qui$_QUEUE_STATUS				! we want to see: Queue Status
	QueLst(2)::ile3$ps_bufaddr	= LOC(QueStatus	)				!
	QueLst(2)::ile3$ps_retlen_addr	= 0						!
	!
	QueLst(3)::ile3$w_length	= 4						! 4 bytes
	QueLst(3)::ile3$w_code		= Qui$_QUEUE_FLAGS				! we want to see: Queue Flags
	QueLst(3)::ile3$ps_bufaddr	= LOC(QueFlags	)				!
	QueLst(3)::ile3$ps_retlen_addr	= 0						!
	!
	QueLst(4)::ile3$w_length	= 0						! end of list
	QueLst(4)::ile3$w_code		= 0						!
	!
	print "-i-starting wildcard $GetQuiW" if log_level% >=2				!
	qui_context% = -1								! request a new context
6000	while 1										! do this until we jump out
	    !
	    !	SYS$GETQUI [efn] ,func [,context] [,itmlst] [,iosb] [,astadr] [,astprm]
	    !
	    rc% = SYS$GetQuiW(	,QUI$_DISPLAY_QUEUE	by value,	&
				qui_context%		by ref,		&
				QueLst()		by ref,		&
				IosbQue::iosb$quad 	by ref,,	)		! get process info
	    Select rc%
		case SS$_Normal								! call was successful
		    select IosbQue::iosb$l_getxxi_status				! so text iosb$
			case jbc$_normal						! got a queue
			case jbc$_nomoreque						!
			    print "-i-queue scan (job) is complete"	if log_level% >= 2
			    goto que_scan_exit						!
			case else							!
			    print "-e-GetQui(que) iosb: "+ str$(IosbQue::iosb$l_getxxi_status)	+		&
				" msg: "+return_system_msg( IosbQue::iosb$l_getxxi_status, k_msg_limited)
			    print "-e-Teminating on fatal error"
			    goto fini_rc						!oops				bf_116.1
		    end select								!
		case else								!
		    print "-e-GetQui(que): "+ str$(rc%)+" "+return_system_msg(rc%, k_msg_all)
		    print "-e-Teminating on fatal error"
		    goto fini_rc							!
	    end select									!
	    !
	    temp_que$ = left$( QueName, QueName_RtnLn)					!
	    temp_que$ = edit$( temp_que$ , 32%)						! upcase
	    if log_level% >= 3 then							!
		print "-d- queue name: ";temp_que$;" =============================="	!
		sleep 1									!
	    end if									!
	    q_status$ = ""								!
	    q_status$ = q_status$ + ",AUTOSTART INACTIVE"	if (QueStatus and qui$m_queue_autostart_inactive) <> 0
	    q_status$ = q_status$ + ",CLOSED"			if (QueStatus and qui$m_queue_closed		) <> 0
	    q_status$ = q_status$ + ",PAUSED"			if (QueStatus and qui$m_queue_paused		) <> 0
	    q_status$ = q_status$ + ",PAUSING"			if (QueStatus and qui$m_queue_pausing		) <> 0
	    q_status$ = q_status$ + ",STOPPING"			if (QueStatus and qui$m_queue_stopping		) <> 0
	    q_status$ = q_status$ + ",STOPPED"			if (QueStatus and qui$m_queue_stopped		) <> 0
	    q_status$ = q_status$ + ",STALLED"			if (QueStatus and qui$m_queue_stalled		) <> 0
	    q_status$ = q_status$ + ",UNAVAILABLE"		if (QueStatus and qui$m_queue_unavailable	) <> 0
	    !
	    !	we want to see one of these states
	    !
	    q_state%		= 0							! init
	    if (QueStatus and qui$m_queue_idle		) <> 0	then			!
		q_status$	= q_status$ + ",IDLE"					!
		q_state%	= 1							!
	    end if									!
	    if (QueStatus and qui$m_queue_available	) <> 0	then			!
		q_status$	= q_status$ + ",AVAILABLE"				!
		q_state%	= 1							!
	    end if									!
	    if (QueStatus and qui$m_queue_busy		) <> 0	then			!
		q_status$	= q_status$ + ",BUSY"					!
		q_state%	= 1							!
	    end if									!
	    !
	    !	store queue info (name$, state$, state%)
	    !
	    for i% = 1 to array_size%							!
		select queue_names$(i%)							!
		    case = ""								! this entry is blank
			queue_names$(i%) = temp_que$					!
			queue_state$(i%) = q_status$					!
			queue_state%(i%) = q_state%					!
			if (QueFlags and QUI$M_QUEUE_BATCH) <> 0			!				bf_106.1
			then								!
			    q_type$ = "B"						! batch queue
			else								!
			    q_type$ = "P"						! print queue
			end if								!
			queue_type$(i%)	= q_type$					!
			goto	store_queue_info_exit					!
		    case = temp_que$							! this entry already exists
			goto	store_queue_info_exit					!
		    case else								! else keep scanning...
		end select								!
	    next i%									!
	    store_queue_info_exit:							!
	    save_i%	= i%								! save array insertion pointer
	    !
	    !		<<< now prep to scan the jobs in this queue >>>
	    !
	    job_search_flags%	=	qui$m_search_wildcard	or			! wildcard				&
					qui$m_search_all_jobs				! all queued jobs (not just our own)
	    !
	    declare	my_iosb		IosbJob						! IO Status block (for GetQuiW)
	    declare	ile3		JobLst(2)					! 0-2 possible items
	    !
	    JobLst(0)::ile3$w_Length		= 4					! input parameter
	    JobLst(0)::ile3$w_Code		= Qui$_SEARCH_FLAGS			!
	    JobLst(0)::ile3$ps_BufAddr		= LOC( job_Search_Flags%)		!
	    JobLst(0)::ile3$ps_RetLen_Addr	= 0					!
	    !
	    JobLst(1)::ile3$w_Length		= len( JobNam 		)		! output
	    JobLst(1)::ile3$w_Code		= Qui$_JOB_NAME				! job name
	    JobLst(1)::ile3$ps_BufAddr		= LOC( JobNam		)		!
	    JobLst(1)::ile3$ps_RetLen_Addr	= LOC( JobNam_RtnLn	)		!
	    !
	    JobLst(2)::ile3$w_Length		= 0
	    JobLst(2)::ile3$w_Code		= 0					! end of list
	    !
	    !		<<< look for all jobs in the current queue >>>
	    !
	    while 1									!
	        rc% = SYS$GetQuiW(	,QUI$_DISPLAY_JOB	by value,	&
					qui_context%		by ref,		&
					JobLst()		by ref,		&
					IosbJob::iosb$quad 	by ref,,	)	! get process info
		Select rc%								! test return code
		    case SS$_Normal							! success
			select IosbJob::iosb$l_getxxi_status				! so test completion code
			    case jbc$_normal						! found a job entry so drop thru
			    case else							! no more jobs?
				if log_level% >= 3 then					!
				    print "-d- job-trace2a: queue>"; temp_que$; " queue-status>";q_status$;" queue-type>";q_type$
				    print "-d- job-trace2b: GetQui(job) iosb: "+ str$(IosbJob::iosb$l_getxxi_status)	+	&
					" msg: "+ return_system_msg(IosbJob::iosb$l_getxxi_status, k_msg_limited )
				    print "=== not-normal (no-more-jobs?) ==="		!
				    print						!
				end if							!
				goto no_more_jobs					!
			end select							!
		    case else								!
			print "-e-GetQui(job): "+ str$(rc%) +" msg: "+ return_system_msg(rc%, k_msg_all)
			goto fini_rc							!
		end select								!
		!
		!	now that we've got a queue entry to test...
		!
		temp_job$ = left$( JobNam, JobNam_RtnLn)				!
		temp_job$ = edit$( temp_job$ , 32)					! upcase
		!
		if log_level% >= 3 then							!
		    print "-d- job-trace1a: queue>"; temp_que$; " queue-status>";q_status$;				&
			" queue-type>";q_type$; " job>"; temp_job$
		    print "-d- job-trace1b: GetQui(job) iosb: "+ str$(IosbJob::iosb$l_getxxi_status)	+		&
			" msg: "+ return_system_msg(IosbJob::iosb$l_getxxi_status, k_msg_limited )
		    print "=== jbc$_normal ==="						!
		    print								!
		end if									!
		!
		!	sometimes a TCP/IP job will get stuck (in a running state) causing hundreds of other jobs to become blocked
		!
		queue_jobs%(save_i%) = queue_jobs%(save_i%) + 1				! remember the job count for later
		!
		!	if this is a batch queue, then locate the queue's job names in the health registry array
		!
		!	(note 1: if they're not being tracked via the HealthManager, then they won't be found there)
		!	(note 2: don't let a retained print job make us think that we've found the correct batch job)
		!
		goto early_scan_job_exit if q_type$ <> "B"				! if not a batch queue		bf_106.1
		for i% = 1% to count_batchjob%						!
		    if items_names$(k_scope_batchjob, i%) = temp_job$ then		!
			items_flags%(k_scope_batchjob, i%) = items_flags%(k_scope_batchjob, i%) + 1	! show it found
			goto early_scan_job_exit					!
		    end if								!
		next i%									!
		early_scan_job_exit:							!
	        !
	    next									!
	    no_more_jobs:								!
	next										! Go back for next queue
	que_scan_exit:									!
	!
	!	<<< test batchjob flags >>>
	!
7000	test_batchjob_flags:								!
	for i% = 1% to count_batchjob%							! scan the health registry list of jobs
	    select items_flags%(k_scope_batchjob, i%)					! test found count
		case 1									! was found once
		case 0									! wasn't found...
		    print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +	&
			" batch job: "+ items_names$(k_scope_batchjob, i%) +" is missing"+ bel
		    count_bogey% = count_bogey% + 1					!
		    if pgm_mode$ = "BATCH" then						!
			msg$ = "batch job: "+ items_names$(k_scope_batchjob, i%) +" is missing"
			log_class$ = "B"						! B/atch
			gosub send_alarm_error 						!				bf_103.1
		    end if								!
		case else								! multiple copies...
		    print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +	&
			" batch job: "; items_names$(k_scope_batchjob, i%) +" has multiple entries"+bel
			count_bogey% = count_bogey% + 1					!
	    end select									!
	next i%										!
	!
	!	<<< test queue state >>>
	!
8000	print "-i-queue scan (state) is complete"	if log_level% >= 2		!
	for i% = 1% to array_size%							!
	    select queue_names$(i%)							!
		case "ICSIS$SPOOL_TOOL"							! ignore this queue		bf_107.3
		case <> ""								! entry is not blank
		    if queue_state%(i%) = 0% then					!
			print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +	&
			    " queue: "+ queue_names$(i%) +" is not ready"+ bel		!
			count_bogey% = count_bogey% + 1					!
			if pgm_mode$ = "BATCH" then					!
			    msg$ = "queue: "+ queue_names$(i%) +" is not ready"		!
			    log_class$ = "Q"						! Q/ueue
			    gosub send_alarm_error 					!				bf_103.1
			end if								!
		    end if								!
		    !
		    select queue_jobs%(i%)						!
			case >= (k_job_limit% * 2)					! if really high		bf_111.6
			    print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +					&
				" queue: "+ queue_names$(i%) +" too many jobs ("+ str$(queue_jobs%(i%)) +")"+ bel
			    count_bogey% = count_bogey% + 1				!
			    if pgm_mode$ = "BATCH" then					!
				msg$ = "queue: "+ queue_names$(i%) +" too many jobs ("+ str$(queue_jobs%(i%)) +")"
				log_class$ = "Q"					! Q/ueue
				gosub send_alarm_error 					!				-e-
			    end if							!
			case >= k_job_limit%						! if too high
			    print "-e-time: "+ formatted_dt_stamp14 +" node: "+ node_name$ +					&
				" queue: "+ queue_names$(i%) +" too many jobs ("+ str$(queue_jobs%(i%)) +")"+ bel
			    count_bogey% = count_bogey% + 1				!
			    if pgm_mode$ = "BATCH" then					!
				msg$ = "queue: "+ queue_names$(i%) +" too many jobs ("+ str$(queue_jobs%(i%)) +")"
				log_class$ = "Q"					! Q/ueue
				gosub send_alarm_warning 				!				-w-
			    end if							!
		    end select								!
		case = ""								! this entry is blank
		    goto	scan_queue_info_exit					!
	    end select									!
	next i%										!
	scan_queue_info_exit:								!
	!
        return										!

	!====================================================================================================
	!	send a text message to alarm server (via an OpenVMS mailbox)						bf_103.1
	!
	!	note: msg$ must be set up before this point
	!====================================================================================================
	open_msg_file_108:							!
	%include "[.fil]icsis_health_event_log_100.rec"				! actual code can been seen at the bottom
	!
	local% = 0								!
	when error in								!
	    d108_chunk = ""							! init our map
	    ! note: default_node$ might be blank or a DECnet address (see above logic)
	    %include "[.fil]icsis_health_event_log_100.opn"			! actual code can been seen at the bottom
	use									!
	    if default_node$ = "" then
		print "-e-error: "+ str$(err) +" opening health-event-log on channel 108"
	    else
		print "-e-error: "+ str$(err) +" opening health-event-log on channel 108 via DECnet ";default_node$
	    end if
	    print "-i-time : "+ formatted_dt_stamp14				!
	    print "-i-file : "+ default_node$ + k_fs_icsis_health_event_log$	!
	end when								!
	return									!
	!=======================================================================
	!	write to our web-viewable log file
	!
	! note: the contents of this file are displayed in my web-based viewer
	!=======================================================================
	!
	!	write log only (informational)
	!
	write_log_only_I:							!
	log_only%	= 1							!
	log_type$	= "I"							! I/nformational
	log_class$	= ""							! no class (not: B/P/Q)
	goto send_alarm_continue						!			***--->>>
	!
	!	write log only (error)
	!
	write_log_only_E:							!
	log_only%	= 1							!
	log_type$	= "E"							! E/rror
	log_class$	= ""							! no class (not: B/P/Q)
	goto send_alarm_continue						!			***--->>>
	!
	!	send alarm (and write log)
	!	entry: log_class$ should be defined as one of: B/P/Q
	!
9000	send_alarm_warning:							!
	log_type$ = "W"								!
	log_only% = 0								!
	goto send_alarm_continue						!			***--->>>
	!
	!	send alarm (and write log)
	!	entry: log_class$ should be defined as one of: B/P/Q
	!
	send_alarm_error:							!
	log_type$ = "E"								!
	log_only% = 0								!
	!
	!	write the message to our log file
	!
	send_alarm_continue:							!			<<<---***
	when error in								!
!~~~	    gosub open_msg_file_108	if file108_open% = 0			x open the file (only if necessary)
	    gosub open_msg_file_108						! always open/reopen the file
	    d108_time		= wcsm_dt_stamp16				!
	    d108_node		= node_name$					!
	    d108_facility	= k_facility$					!
	    d108_type		= log_type$					!
	    d108_class		= log_class$					!
	    d108_text		= msg$						!
	    print "-i-writing to log file" if log_level% >= 2			!
	    put #108								!
	use									!
	    print "-e-error: "+ str$(err) +" during put 108"			!
	    close #108								! this will force a reopen on the next attempt
	    file108_open% = 0							!
	end when								!
	if default_node$ <> "" then						! don't leave this DECnet connection nailed-up
	    close #108								!
	    file108_open% = 0							!
	end if									!
	if log_only% > 0 then							!
	    log_only% = 0							!
	    goto send_alarm_exit						!
	end if									!
	!
	!	now send to our system's alarm mailbox
	!
	!	VMS has an interprocess communication device known as a "mailbox" (has nothing to do with mail)
	!	In the UNIX world we might use ht ename "pipe"
	!
    %if %alarmmbx=1 %then							! if mailbox communications are desired
	!
	!	type:	0 create permanent (public ) mailbox (need privs)
	!		1 create temporary (private) mailbox
	!		2 bypass mailbox create and just do a qio (non-priv)
	!
	map(mbxTemp)string	mbx_temp_whole		= 218			!					bf_108.1
	map(mbxTemp)string	mbx_temp_noun		=  15,			!&
				mbx_temp_verb		=  15,			!&
				mbx_temp_sender		=  15,			!&
				mbx_temp_receive	=  15,			!&
				mbx_temp_misc1		=  15,			!&
				mbx_temp_misc2		=  15,			!&
				mbx_temp_misc3		=  15,			!&
				mbx_temp_misc4		=  15,			!&
				mbx_temp_misc5		=  15,			!&
				mbx_temp_misc6		=  15,			!&
				mbx_temp_misc7		=  15,			!&
				mbx_temp_misc8		=  15,			!&
				mbx_temp_tkt_serial	=   6,			!&
				mbx_temp_lst_type	=   2,			! eg. "01" = just 3 letter groups	bf_108.1 &
				mbx_temp_rcv_list	=  30			! eg. "CTL,ADM,ELG"			bf_108.1
	map(mbxTemp)string	mbx_temp_noun		=  15,			!&
				mbx_temp_verb		=  15,			!&
				mbx_temp_sender		=  15,			!&
				mbx_temp_receive	=  15,			!&
				mbx_temp_misc		= 120,			!&
				mbx_temp_tkt_serial	=   6,			!&
				mbx_temp_lst_type	=   2,			! eg. "01" = just 3 letter groups	bf_108.1 &
				mbx_temp_rcv_list	=  30			! eg. "CTL,ADM,ELG"			bf_108.1
	!
	mbx_temp_whole		= ""						! zap everything
	mbx_temp_lst_type	= "01"						! just 3 letter groups			bf_108.1
	mbx_temp_noun		= "HEALTH"					! use this routine in the alarm process
	mbx_temp_verb		= "LIST"					!
	mbx_temp_sender		= "SYS.SYS"					!
	mbx_temp_rcv_list	= "ATC,CTL,ADM,ELG"				!
	!
	msg$ = "Health: "+ msg$							!
	mbx_temp_misc	= msg$							!
	!
	!	CTL (our control center)
	!	ADM (the admin group)
	!	ELG (our off normal control center)
	!
	print "-i-sending this text> "+ msg$ +" < to alarm server" if log_level% >= 2
	junk% = WCSM_Mbx_Send4_102(	"CSMIS$ALARM"				! mbx name			&
					,2%					! mbx_type%=skip create		&
					,7%					! qio_type%=qioW + now + no-rez	&
					,mbx_temp_whole+cr	)		!
	if (junk% and 7%)<>1% then						!
	    print "-e-WCSM_Mbx_Send4_102-rc: "+ str$(rc%)
	else									!
	    sleep 1								! no need to delay if function failed
	end if									!
   %end %if									!
	!
	send_alarm_exit:							!
	return									!
	!=======================================================================
	!	common BASIC error handler
	!=======================================================================
31000	trap:
	!
	print	cr + lf + "Line = "+ str$(erl) + 				&
		cr + lf + "Error= "+ str$(err) + 				&
		cr + lf + "Text = "+ ert$(err) + 				&
		cr + lf + "At   = "+ formatted_dt_stamp14			!
	!
	resume fini								! fix stack + exit
	!
	!=======================================================================
	!	adios...
	!=======================================================================
	!
	!	rc% must be set up before this point
	!
	fini_rc:								!
	goto fini_common							!
	!
	Fini:									!
	rc% = 1									! VMS-s-
	!
	fini_common:
	select pgm_mode$							!
	    case "BATCH"							! define in DCL script that runs this
		msg$	= k_program$ +"_"+ k_version$ +" is exiting abnormally"	!
		gosub write_log_only_E						!
	end select								!
	print "============================================================"	! the last line goes out (user will see it)
32000	end program rc%								!				<<<---***
	!
	!########################################################################################################################
	!
	!	<<< external functions >>>
	!
32100	!=======================================================================
	! title  : formatted_dt_stamp14
	! author : Neil Rieck
	!=======================================================================
	function string formatted_dt_stamp14
	option type = explicit							!
	external string function WCSM_dt_stamp16				!
	declare string junk$							!
	junk$ = WCSM_dt_stamp16							!
	formatted_dt_stamp14 =							!						&
	    seg$(junk$, 1, 8) +"."+ seg$(junk$, 9,14)				! ccyymmdd.hhmmss
	end function								!
	!
32110	!=======================================================================
	! title  : return_system_msg
	! author : Neil Rieck
	! created: 2002-10-16
	! purpose: returns system error text from an error number
	! flags  :	k_msg_text	=  1
	!		k_msg_ident	=  2
	!		k_msg_severity	=  4
	!		k_msg_facilty	=  8
	!=======================================================================
	function string return_system_msg(long msgid%, long flags%)		!
	option type = explicit							!
	%include "starlet" %from %library "sys$library:basic$starlet"		! system services
	map(GetMsg)string temp$							!
	declare long	rc%		,&
		word	rtn_len%
	!
	!	SYS$GETMSG msgid ,msglen ,bufadr ,[flags] ,[outadr]
	!
	rc% = SYS$GETMSG(msgid% by value, rtn_len% by ref, temp$ by desc, flags% by value,)
	if ((rc% and 7%) = 1%) then						!
	    return_system_msg = left$(temp$,rtn_len%)				!
	else									!
	    return_system_msg = "-e-SYS$GETMSG rc: "+str$(rc%)			!
	end if									!
	end function								!
	!
32120	%include "[.fun]wcsm_dt_stamp.fun"					!
        ! function string WCSM_DT_Stamp
	!
32130	%include "[.fun]wcsm_dt_stamp16.fun"					!
        ! function string WCSM_DT_Stamp16
	!
32140	%include "[.fun]wcsm_trnlnm.fun"					!
	!	function string WCSM_Trnlnm
	!
32150	%include "[.fun]WCSM_CRELNM.FUN"					!
        !       long wcsm_crelnm(string, string, string)
	!
	!=======================================================================
	!	title  : wcsm_mbx_send4
	!	purpose: sends text messages to the alarm server
	!=======================================================================
32170	%include "[.fun]wcsm_mbx_send4_102.fun"
	!
	!	function long function WCSM_Mbx_Send4_102 (string,long,long,string)
	!		(mailbox_name$, mailbox_type%, qio_mode%, Mailbox_msg$)
	!
32180	%include "[.fun]WCSM_CALC_DIFF_IN_MINS.fun"
	!
%let %enable=0									! not needed for production builds
%if  %enable=1 %then								!
	!
	!	this stuff was tacked-on here so the public could see the contents of 2 included files above
	!
	!=============================================================================================
	! Title  : [.fil]icsis_event_event_log_xxx.rec
	! Author : Neil Rieck
	! Purpose: to record events for the ICSIS Health manager
	!
	! ver who when   what
	! --- --- ------ -----------------------------------------------------------------------------
	! 100 NSR 091016 1. original work
	!     NSR 091023 2. expanded d108_text from 72 -> 90
	!=============================================================================================
	declare string constant									&
		k_fs_icsis_event_event_log$	= "csmis$dat:icsis_health_event_log_100.dat"
	!
	map (HealthEventLog)									&
		string	d108_time			= 16,	! 16 ccyymmddhhmmsstt	 	&
			d108_node			= 15,	! 31 kawc96			&
			d108_facility			= 15,	! 46 HealthCheck		&
			d108_type			=  1,	! 47 E/rror		 	&
			d108_class			=  1,	! 48 B/atch			&
			d108_text			= 90,	!138				&
			d108_align			=  0	!138
	map (HealthEventLog)									&
		string	d108_chunk			=138,	!138  the whole nine yards	&
			d108_align			=  0	!138
	!
	!==========================================================================================
	! Title  : [.fil]icsis_health_event_log_100.opn
	! Purpose: opens 'vdsl auto discovery history' on channel 108
	!==========================================================================================
	open_file108:
	!
	file108_open% = 0							! show file closed
	!
	open ( default_node$ + k_fs_icsis_healt_event_log$ ) as #108				&
		,access		modify								&
		,allow		modify								&
		,organization	indexed								&
		,map		HealthEventLog							&
		,primary key	( d108_time )		duplicates descending
	!
	file108_open% = 1							! show file open
	!
    %end %if