patch-2.4.20 linux-2.4.20/drivers/s390/s390mach.c

Next file: linux-2.4.20/drivers/s390/sysinfo.c
Previous file: linux-2.4.20/drivers/s390/s390io.c
Back to the patch index
Back to the overall index

diff -urN linux-2.4.19/drivers/s390/s390mach.c linux-2.4.20/drivers/s390/s390mach.c
@@ -22,6 +22,11 @@
 #include <asm/s390io.h>
 #include <asm/s390dyn.h>
 #include <asm/s390mach.h>
+#ifdef CONFIG_MACHCHK_WARNING
+#include <asm/signal.h>
+#endif
+
+extern void ctrl_alt_del(void);
 
 #define S390_MACHCHK_DEBUG
 
@@ -31,6 +36,9 @@
 static void        s390_enqueue_free_mchchk( mache_t *mchchk );
 static mache_t    *s390_dequeue_free_mchchk( void );
 static int         s390_collect_crw_info( void );
+#ifdef CONFIG_MACHCHK_WARNING
+static int         s390_post_warning( void );
+#endif
 
 static mache_t    *mchchk_queue_head = NULL;
 static mache_t    *mchchk_queue_tail = NULL;
@@ -41,6 +49,9 @@
 
 static struct semaphore s_sem;
 
+#ifdef CONFIG_MACHCHK_WARNING
+static int mchchk_wng_posted = 0;
+#endif
 
 /*
  * s390_init_machine_check
@@ -98,31 +109,50 @@
 	} /* endif */
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "init_mach : starting machine check handler\n");
+	printk( KERN_NOTICE "init_mach : starting machine check handler\n");
 #endif	
 
 	kernel_thread( s390_machine_check_handler, &s_sem, CLONE_FS | CLONE_FILES);
 
 	ctl_clear_bit( 14, 25 );  // disable damage MCH 	
+
+	ctl_set_bit( 14, 26 ); /* enable degradation MCH */
+	ctl_set_bit( 14, 27 ); /* enable system recovery MCH */
 #if 1
   	ctl_set_bit( 14, 28 );		// enable channel report MCH
 #endif
+#ifdef CONFIG_MACHCK_WARNING
+	ctl_set_bit( 14, 24);   /* enable warning MCH */
+#endif
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "init_mach : machine check buffer : head = %08X\n",
+	printk( KERN_DEBUG "init_mach : machine check buffer : head = %08X\n",
             (unsigned)&mchchk_queue_head);
-	printk( "init_mach : machine check buffer : tail = %08X\n",
+	printk( KERN_DEBUG "init_mach : machine check buffer : tail = %08X\n",
             (unsigned)&mchchk_queue_tail);
-	printk( "init_mach : machine check buffer : free = %08X\n",
+	printk( KERN_DEBUG "init_mach : machine check buffer : free = %08X\n",
             (unsigned)&mchchk_queue_free);
-	printk( "init_mach : CRW entry buffer anchor = %08X\n",
+	printk( KERN_DEBUG "init_mach : CRW entry buffer anchor = %08X\n",
             (unsigned)&crw_buffer_anchor);
-	printk( "init_mach : machine check handler ready\n");
+	printk( KERN_DEBUG "init_mach : machine check handler ready\n");
 #endif	
 
 	return;
 }
 
+static void s390_handle_damage(char * msg){
+
+	unsigned long caller = (unsigned long) __builtin_return_address(0);
+
+	printk(KERN_EMERG "%s\n", msg);
+#ifdef CONFIG_SMP
+	smp_send_stop();
+#endif
+	disabled_wait(caller);
+	return;
+}
+
+
 /*
  * s390_do_machine_check
  *
@@ -135,13 +165,31 @@
 	mcic_t   mcic;
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "s390_do_machine_check : starting ...\n");
+	printk( KERN_INFO "s390_do_machine_check : starting ...\n");
 #endif
 
 	memcpy( &mcic,
 	        &S390_lowcore.mcck_interruption_code,
 	        sizeof(__u64));
  		
+	if (mcic.mcc.mcd.sd) /* system damage */
+		s390_handle_damage("received system damage machine check\n");
+
+	if (mcic.mcc.mcd.pd) /* instruction processing damage */
+		s390_handle_damage("received instruction processing damage machine check\n");
+
+	if (mcic.mcc.mcd.se) /* storage error uncorrected */
+		s390_handle_damage("received storage error uncorrected machine check\n");
+
+	if (mcic.mcc.mcd.sc) /* storage error corrected */
+		printk(KERN_WARNING "received storage error corrected machine check\n");
+
+	if (mcic.mcc.mcd.ke) /* storage key-error uncorrected */
+		s390_handle_damage("received storage key-error uncorrected machine check\n");
+
+	if (mcic.mcc.mcd.ds && mcic.mcc.mcd.fa) /* storage degradation */
+		s390_handle_damage("received storage degradation machine check\n");
+
 	if ( mcic.mcc.mcd.cp )	// CRW pending ?
 	{
 		crw_count = s390_collect_crw_info();
@@ -153,9 +201,38 @@
 		} /* endif */
 
 	} /* endif */
+#ifdef CONFIG_MACHCHK_WARNING
+/*
+ * The warning may remain for a prolonged period on the bare iron.
+ * (actually till the machine is powered off, or until the problem is gone)
+ * So we just stop listening for the WARNING MCH and prevent continuously
+ * being interrupted.  One caveat is however, that we must do this per 
+ * processor and cannot use the smp version of ctl_clear_bit().
+ * On VM we only get one interrupt per virtally presented machinecheck.
+ * Though one suffices, we may get one interrupt per (virtual) processor. 
+ */
+	if ( mcic.mcc.mcd.w )	// WARNING pending ?
+	{
+		// Use single machine clear, as we cannot handle smp right now
+		__ctl_clear_bit( 14, 24 );	// Disable WARNING MCH
+
+		if ( ! mchchk_wng_posted )
+		{ 
+			mchchk_wng_posted = s390_post_warning();
+
+			if ( mchchk_wng_posted )
+			{
+				up( &s_sem );
+
+			} /* endif */
+
+		} /* endif */
+
+	} /* endif */
+#endif
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "s390_do_machine_check : done \n");
+	printk( KERN_INFO "s390_do_machine_check : done \n");
 #endif
 
 	return;
@@ -183,19 +260,19 @@
         sigfillset(&current->blocked);
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "mach_handler : ready\n");
+	printk( KERN_NOTICE "mach_handler : ready\n");
 #endif	
 
 	do {
 
 #ifdef S390_MACHCHK_DEBUG
-		printk( "mach_handler : waiting for wakeup\n");
+		printk( KERN_NOTICE "mach_handler : waiting for wakeup\n");
 #endif	
 
 		down_interruptible( sem );
 
 #ifdef S390_MACHCHK_DEBUG
-		printk( "\nmach_handler : wakeup ... \n");
+		printk( KERN_NOTICE "\nmach_handler : wakeup ... \n");
 #endif	
 		found = 0; /* init ... */
 
@@ -240,6 +317,16 @@
 
 			} /* endif */
 
+#ifdef CONFIG_MACHCHK_WARNING
+			if ( pmache->mcic.mcc.mcd.w )
+			{
+				ctrl_alt_del();		// shutdown NOW!
+#ifdef S390_MACHCHK_DEBUG
+			printk( KERN_DEBUG "mach_handler : kill -SIGPWR init\n");
+#endif
+			} /* endif */
+#endif
+
 			s390_enqueue_free_mchchk( pmache );
 		}
 		else
@@ -247,7 +334,7 @@
 
 			// unconditional surrender ...
 #ifdef S390_MACHCHK_DEBUG
-			printk( "mach_handler : nothing to do, sleeping\n");
+			printk( KERN_DEBUG "mach_handler : nothing to do, sleeping\n");
 #endif	
 
 		} /* endif */	
@@ -424,7 +511,7 @@
 	int      count  = 0;    /* CRW count */
 
 #ifdef S390_MACHCHK_DEBUG
-	printk( "crw_info : looking for CRWs ...\n");
+	printk( KERN_DEBUG "crw_info : looking for CRWs ...\n");
 #endif
 
 	do
@@ -436,7 +523,7 @@
 			count++;
 			
 #ifdef S390_MACHCHK_DEBUG
-			printk( "crw_info : CRW reports "
+			printk( KERN_DEBUG "crw_info : CRW reports "
 			        "slct=%d, oflw=%d, chn=%d, "
 			        "rsc=%X, anc=%d, erc=%X, "
 			        "rsid=%X\n",
@@ -508,7 +595,7 @@
 			if ( pccrw->crw.chn )
 			{
 #ifdef S390_MACHCHK_DEBUG
-				printk( "crw_info : "
+				printk( KERN_DEBUG "crw_info : "
 				        "chained CRWs pending ...\n\n");
 #endif
 				chain = 1;
@@ -532,3 +619,40 @@
 	return( count );
 }
 
+#ifdef CONFIG_MACHCHK_WARNING
+/*
+ * s390_post_warning
+ *
+ * Post a warning type machine check
+ *
+ * The function returns 1 when succesfull (panics otherwise)
+ */
+static int s390_post_warning( void )
+{
+	mache_t  *pmache = NULL; /* ptr to mchchk entry */
+
+	pmache = s390_dequeue_free_mchchk();
+
+	if ( pmache != NULL )
+	{
+		memset( pmache, '\0', sizeof(mache_t) );
+
+		pmache->mcic.mcc.mcd.w = 1;
+
+		s390_enqueue_mchchk( pmache );
+	}
+	else
+	{
+		panic( 	"post_warning : "
+			"unable to dequeue "
+			"free mchchk buffer" );
+	} /* endif */
+
+#ifdef S390_MACHCHK_DEBUG
+	printk( KERN_DEBUG "post_warning : 1 warning machine check posted\n");
+#endif
+
+	return ( 1 );
+}
+#endif
+

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)