[commit][2718] Occasionally bizarre interactions cause the heartbeat' s interval timer to

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[commit][2718] Occasionally bizarre interactions cause the heartbeat' s interval timer to

commits-3
 
Revision: 2718
Author:   eliot
Date:     2013-04-11 16:37:17 -0700 (Thu, 11 Apr 2013)
Log Message:
-----------
Occasionally bizarre interactions cause the heartbeat's interval timer to
disable.  e.g. on CentOS linux when using PAM to authenticate, a failing authen-
tication sequence disables the interval timer, for reasons unknown (setting
a breakpoint in setitimer doesn't show an actual call).  So a work around is
to check the timer as a side-effect of ioRelinquishProcessorForMicroseconds.

Modified Paths:
--------------
    branches/Cog/platforms/unix/vm/sqUnixITimerHeartbeat.c
    branches/Cog/platforms/unix/vm/sqUnixITimerTickerHeartbeat.c
    branches/Cog/platforms/unix/vm/sqUnixMain.c

Property Changed:
----------------
    branches/Cog/platforms/Cross/vm/sqSCCSVersion.h


Property changes on: branches/Cog/platforms/Cross/vm/sqSCCSVersion.h
___________________________________________________________________
Modified: checkindate
   - Wed Apr 10 17:15:30 PDT 2013
   + Thu Apr 11 16:36:31 PDT 2013

Modified: branches/Cog/platforms/unix/vm/sqUnixITimerHeartbeat.c
===================================================================
--- branches/Cog/platforms/unix/vm/sqUnixITimerHeartbeat.c 2013-04-11 00:17:11 UTC (rev 2717)
+++ branches/Cog/platforms/unix/vm/sqUnixITimerHeartbeat.c 2013-04-11 23:37:17 UTC (rev 2718)
@@ -385,13 +385,26 @@
 static stack_t signal_stack;
 #endif /* NEED_SIGALTSTACK */
 
+static void
+setIntervalTimer(long milliseconds)
+{
+ struct itimerval pulse;
+
+ pulse.it_interval.tv_sec = milliseconds / 1000;
+ pulse.it_interval.tv_usec = (milliseconds % 1000) * 1000;
+ pulse.it_value = pulse.it_interval;
+ if (setitimer(THE_ITIMER, &pulse, &pulse)) {
+ perror("ioInitHeartbeat setitimer");
+ exit(1);
+ }
+}
+
 void
 ioInitHeartbeat()
 {
 extern sqInt suppressHeartbeatFlag;
  int er;
  struct sigaction heartbeat_handler_action;
- struct itimerval pulse;
 
  if (suppressHeartbeatFlag) return;
 
@@ -427,30 +440,57 @@
  exit(1);
  }
 
- pulse.it_interval.tv_sec = beatMilliseconds / 1000;
- pulse.it_interval.tv_usec = (beatMilliseconds % 1000) * 1000;
- pulse.it_value = pulse.it_interval;
- if (setitimer(THE_ITIMER, &pulse, &pulse)) {
- perror("ioInitHeartbeat setitimer");
- exit(1);
- }
+ setIntervalTimer(beatMilliseconds);
 }
 
 void
 ioDisableHeartbeat() /* for debugging */
 {
- struct itimerval expire;
+ setIntervalTimer(0);
+}
 
- expire.it_interval.tv_sec =
- expire.it_interval.tv_usec = 0;
- expire.it_value = expire.it_interval;
- if (setitimer(THE_ITIMER, &expire, 0)) {
- perror("ioDisableHeartbeat setitimer");
- exit(1);
- }
+/* Occasionally bizarre interactions cause the heartbeat's interval timer to
+ * disable.  On CentOS linux when using PAM to authenticate, a failing authen-
+ * tication sequence disables the interval timer, for reasons unknown (setting
+ * a breakpoint in setitimer doesn't show an actual call).  So a work around is
+ * to check the timer as a side-effect of ioRelinquishProcessorForMicroseconds.
+ */
+void
+checkHeartStillBeats()
+{
+ struct itimerval hb_itimer;
+
+ if (getitimer(THE_ITIMER, &hb_itimer) < 0)
+ perror("getitimer");
+ else if (!hb_itimer.it_interval.tv_sec
+  && !hb_itimer.it_interval.tv_usec)
+ setIntervalTimer(beatMilliseconds);
 }
 
 void
+printHeartbeatTimer()
+{
+ struct itimerval hb_itimer;
+ struct sigaction hb_handler_action;
+
+ if (getitimer(THE_ITIMER, &hb_itimer) < 0)
+ perror("getitimer");
+ else
+ printf("heartbeat timer interval s %ld us %ld value s %ld us %ld\n",
+ hb_itimer.it_interval.tv_sec, hb_itimer.it_interval.tv_usec,
+ hb_itimer.it_value.tv_sec, hb_itimer.it_value.tv_usec);
+
+ if (sigaction(ITIMER_SIGNAL, 0, &hb_handler_action) < 0)
+ perror("sigaction");
+ else
+ printf("heartbeat signal handler %p (%s)\n",
+ hb_handler_action.sa_sigaction,
+ hb_handler_action.sa_sigaction == heartbeat_handler
+ ? "heartbeat_handler"
+ : "????");
+}
+
+void
 ioSetHeartbeatMilliseconds(int ms)
 {
  beatMilliseconds = ms;

Modified: branches/Cog/platforms/unix/vm/sqUnixITimerTickerHeartbeat.c
===================================================================
--- branches/Cog/platforms/unix/vm/sqUnixITimerTickerHeartbeat.c 2013-04-11 00:17:11 UTC (rev 2717)
+++ branches/Cog/platforms/unix/vm/sqUnixITimerTickerHeartbeat.c 2013-04-11 23:37:17 UTC (rev 2718)
@@ -515,6 +515,20 @@
 static stack_t signal_stack;
 #endif /* NEED_SIGALTSTACK */
 
+static void
+setIntervalTimer(long milliseconds)
+{
+ struct itimerval pulse;
+
+ pulse.it_interval.tv_sec = milliseconds / 1000;
+ pulse.it_interval.tv_usec = (milliseconds % 1000) * 1000;
+ pulse.it_value = pulse.it_interval;
+ if (setitimer(THE_ITIMER, &pulse, &pulse)) {
+ perror("ioInitHeartbeat setitimer");
+ exit(1);
+ }
+}
+
 void
 ioInitHeartbeat()
 {
@@ -522,7 +536,6 @@
  int er;
  struct timespec halfAMo;
  struct sigaction heartbeat_handler_action, ticker_handler_action;
- struct itimerval pulse;
 
  if (suppressHeartbeatFlag) return;
 
@@ -587,30 +600,57 @@
  exit(1);
  }
 
- pulse.it_interval.tv_sec = beatMilliseconds / 1000;
- pulse.it_interval.tv_usec = (beatMilliseconds % 1000) * 1000;
- pulse.it_value = pulse.it_interval;
- if (setitimer(THE_ITIMER, &pulse, &pulse)) {
- perror("ioInitHeartbeat setitimer");
- exit(1);
- }
+ setIntervalTimer(beatMilliseconds);
 }
 
 void
 ioDisableHeartbeat() /* for debugging */
 {
- struct itimerval expire;
+ setIntervalTimer(0);
+}
 
- expire.it_interval.tv_sec =
- expire.it_interval.tv_usec = 0;
- expire.it_value = expire.it_interval;
- if (setitimer(THE_ITIMER, &expire, 0)) {
- perror("ioDisableHeartbeat setitimer");
- exit(1);
- }
+/* Occasionally bizarre interactions cause the heartbeat's interval timer to
+ * disable.  On CentOS linux when using PAM to authenticate, a failing authen-
+ * tication sequence disables the interval timer, for reasons unknown (setting
+ * a breakpoint in setitimer doesn't show an actual call).  So a work around is
+ * to check the timer as a side-effect of ioRelinquishProcessorForMicroseconds.
+ */
+void
+checkHeartStillBeats()
+{
+ struct itimerval hb_itimer;
+
+ if (getitimer(THE_ITIMER, &hb_itimer) < 0)
+ perror("getitimer");
+ else if (!hb_itimer.it_interval.tv_sec
+  && !hb_itimer.it_interval.tv_usec)
+ setIntervalTimer(beatMilliseconds);
 }
 
 void
+printHeartbeatTimer()
+{
+ struct itimerval hb_itimer;
+ struct sigaction hb_handler_action;
+
+ if (getitimer(THE_ITIMER, &hb_itimer) < 0)
+ perror("getitimer");
+ else
+ printf("heartbeat timer interval s %ld us %ld value s %ld us %ld\n",
+ hb_itimer.it_interval.tv_sec, hb_itimer.it_interval.tv_usec,
+ hb_itimer.it_value.tv_sec, hb_itimer.it_value.tv_usec);
+
+ if (sigaction(ITIMER_SIGNAL, 0, &hb_handler_action) < 0)
+ perror("sigaction");
+ else
+ printf("heartbeat signal handler %p (%s)\n",
+ hb_handler_action.sa_sigaction,
+ hb_handler_action.sa_sigaction == heartbeat_handler
+ ? "heartbeat_handler"
+ : "????");
+}
+
+void
 ioSetHeartbeatMilliseconds(int ms)
 {
  beatMilliseconds = ms;

Modified: branches/Cog/platforms/unix/vm/sqUnixMain.c
===================================================================
--- branches/Cog/platforms/unix/vm/sqUnixMain.c 2013-04-11 00:17:11 UTC (rev 2717)
+++ branches/Cog/platforms/unix/vm/sqUnixMain.c 2013-04-11 23:37:17 UTC (rev 2718)
@@ -544,6 +544,11 @@
 #if STACKVM
 sqInt ioRelinquishProcessorForMicroseconds(sqInt us)
 {
+# if ITIMER_HEARTBEAT
+  extern void checkHeartStillBeats();
+
+  checkHeartStillBeats();
+# endif
   dpy->ioRelinquishProcessorForMicroseconds(us);
   return 0;
 }