From 12e46bad2c8d04ca817c69f6adb592ee1757504d Mon Sep 17 00:00:00 2001 From: David C Somayajulu Date: Wed, 19 Jul 2017 19:08:37 +0000 Subject: [PATCH] Raise the watchdog timer interval to 2 ticks, there by guaranteeing that it fires between 1ms and 2ms. ` Treat two consecutive occurrences of Heartbeat failures as a legitimate Heartbeat failure MFC after:3 days --- sys/dev/qlxgbe/ql_def.h | 2 +- sys/dev/qlxgbe/ql_hw.c | 14 +++++++++++--- sys/dev/qlxgbe/ql_hw.h | 1 + sys/dev/qlxgbe/ql_os.c | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/sys/dev/qlxgbe/ql_def.h b/sys/dev/qlxgbe/ql_def.h index 83774a7d7d7..c03c5c974b3 100644 --- a/sys/dev/qlxgbe/ql_def.h +++ b/sys/dev/qlxgbe/ql_def.h @@ -105,7 +105,7 @@ struct qla_ivec { typedef struct qla_ivec qla_ivec_t; -#define QLA_WATCHDOG_CALLOUT_TICKS 1 +#define QLA_WATCHDOG_CALLOUT_TICKS 2 typedef struct _qla_tx_ring { qla_tx_buf_t tx_buf[NUM_TX_DESCRIPTORS]; diff --git a/sys/dev/qlxgbe/ql_hw.c b/sys/dev/qlxgbe/ql_hw.c index 8a8494ce5e7..877dbf30ccd 100644 --- a/sys/dev/qlxgbe/ql_hw.c +++ b/sys/dev/qlxgbe/ql_hw.c @@ -3366,7 +3366,7 @@ ql_hw_check_health(qla_host_t *ha) ha->hw.health_count++; - if (ha->hw.health_count < 1000) + if (ha->hw.health_count < 500) return 0; ha->hw.health_count = 0; @@ -3385,10 +3385,18 @@ ql_hw_check_health(qla_host_t *ha) if ((val != ha->hw.hbeat_value) && (!(QL_ERR_INJECT(ha, INJCT_HEARTBEAT_FAILURE)))) { ha->hw.hbeat_value = val; + ha->hw.hbeat_failure = 0; return 0; } - device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n", - __func__, val); + + ha->hw.hbeat_failure++; + + if (ha->hw.hbeat_failure < 2) /* we ignore the first failure */ + return 0; + else + device_printf(ha->pci_dev, "%s: Heartbeat Failue [0x%08x]\n", + __func__, val); + return -1; } diff --git a/sys/dev/qlxgbe/ql_hw.h b/sys/dev/qlxgbe/ql_hw.h index 0c71dc2cfe6..6a4e7e9a11e 100644 --- a/sys/dev/qlxgbe/ql_hw.h +++ b/sys/dev/qlxgbe/ql_hw.h @@ -1671,6 +1671,7 @@ typedef struct _qla_hw { /* heart beat register value */ uint32_t hbeat_value; uint32_t health_count; + uint32_t hbeat_failure; uint32_t max_tx_segs; uint32_t min_lro_pkt_size; diff --git a/sys/dev/qlxgbe/ql_os.c b/sys/dev/qlxgbe/ql_os.c index d4a18270dd0..99d4d504ecc 100644 --- a/sys/dev/qlxgbe/ql_os.c +++ b/sys/dev/qlxgbe/ql_os.c @@ -276,7 +276,7 @@ qla_watchdog(void *arg) ha->qla_watchdog_paused = 1; } - ha->watchdog_ticks = ha->watchdog_ticks++ % 1000; + ha->watchdog_ticks = ha->watchdog_ticks++ % 500; callout_reset(&ha->tx_callout, QLA_WATCHDOG_CALLOUT_TICKS, qla_watchdog, ha); }