diff --git a/tests/sys/cddl/zfs/include/libgnop.kshlib b/tests/sys/cddl/zfs/include/libgnop.kshlib index 1e285bb9f02..2a3a7eeb45b 100644 --- a/tests/sys/cddl/zfs/include/libgnop.kshlib +++ b/tests/sys/cddl/zfs/include/libgnop.kshlib @@ -85,6 +85,12 @@ function destroy_gnop # Use "-f" so we can destroy a gnop with a consumer (like ZFS) gnop destroy -f ${disk}.nop + + # Wait for it to disappear + for i in `seq 5`; do + gnop status ${disk}.nop >/dev/null 2>/dev/null || break + sleep $i + done } # Destroy multiple gnop devices. Attempt to destroy them all, ignoring errors diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib index a14136677ba..5e79365438a 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd.kshlib @@ -72,19 +72,32 @@ function corrupt_pool_vdev typeset pool=$1 typeset vdev=$2 typeset file=$3 + typeset -li start=0 + typeset -li now=0 + typeset -li timeout=60 # do some IO on the pool log_must $DD if=/dev/zero of=$file bs=1024k count=64 $FSYNC $file - # scribble on the underlying file to corrupt the vdev - log_must $DD if=/dev/urandom of=$vdev bs=1024k count=64 conv=notrunc + # ZFS rate limits checksum errors to about 20 per second. So in order + # to ensure that we reach zfsd's threshold, we must alternately + # scribble and scrub. + while (( "$now" - "$start" < "$timeout" )); do + # scribble on the underlying file to corrupt the vdev + log_must $DD if=/dev/urandom of=$vdev bs=1024k count=64 conv=notrunc - # Scrub the pool to detect the corruption - log_must $ZPOOL scrub $pool - wait_until_scrubbed $pool + # Scrub the pool to detect and repair the corruption + log_must $ZPOOL scrub $pool + wait_until_scrubbed $pool + now=`date +%s` + if [ "$start" -eq 0 ]; then + start=`date +%s` + fi + check_state "$pool" "$vdev" DEGRADED && return + $SLEEP 1 + done - # ZFSD can take up to 60 seconds to degrade an array in response to - # errors (though it's usually faster). - wait_for_pool_dev_state_change 60 $vdev DEGRADED + log_must $ZPOOL status "$pool" + log_fail "ERROR: Disk $vdev not marked as DEGRADED in $pool" } diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_002_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_002_pos.ksh index 64be3c05503..03cccf56683 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_002_pos.ksh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_002_pos.ksh @@ -83,6 +83,7 @@ for keyword in "${MY_KEYWORDS[@]}" ; do log_must $ZPOOL set autoreplace=on $TESTPOOL log_must destroy_gnop $REMOVAL_DISK + log_must wait_for_pool_removal 20 log_must create_gnop $NEW_DISK $PHYSPATH verify_assertion destroy_pool "$TESTPOOL" diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_003_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_003_pos.ksh index 604071f5ef8..18cb36d20a7 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_003_pos.ksh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_autoreplace_003_pos.ksh @@ -93,6 +93,7 @@ for keyword in "${MY_KEYWORDS[@]}" ; do log_must $ZPOOL set autoreplace=on $TESTPOOL log_must destroy_gnop $REMOVAL_DISK + log_must wait_for_pool_removal 20 log_must create_gnop $NEW_DISK $PHYSPATH verify_assertion destroy_pool "$TESTPOOL" diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh index 5dfc8e9a9d1..57233070a1b 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_fault_001_pos.ksh @@ -27,6 +27,7 @@ # $FreeBSD$ . $STF_SUITE/include/libtest.kshlib +. $STF_SUITE/include/libgnop.kshlib ################################################################################ # @@ -39,8 +40,7 @@ # # # STRATEGY: -# 1. Create a storage pool. Only use the da driver (FreeBSD's SCSI disk -# driver) because it has a special interface for simulating IO errors. +# 1. Create a storage pool. Use gnop vdevs so we can inject I/O errors. # 2. Inject IO errors while doing IO to the pool. # 3. Verify that the vdev becomes FAULTED. # 4. ONLINE it and verify that it resilvers and joins the pool. @@ -57,65 +57,28 @@ verify_runnable "global" -function cleanup -{ - # Disable error injection, if still active - sysctl kern.cam.da.$TMPDISKNUM.error_inject=0 > /dev/null - - if poolexists $TESTPOOL; then - # We should not get here if the test passed. Print the output - # of zpool status to assist in debugging. - $ZPOOL status - # Clear out artificially generated errors and destroy the pool - $ZPOOL clear $TESTPOOL - destroy_pool $TESTPOOL - fi -} - log_assert "ZFS will fault a vdev that produces IO errors" -log_onexit cleanup ensure_zfsd_running -# Make sure that at least one of the disks is using the da driver, and use -# that disk for inject errors -typeset TMPDISK="" -for d in $DISKS -do - b=`basename $d` - if test ${b%%[0-9]*} == da - then - TMPDISK=$b - TMPDISKNUM=${b##da} - break - fi -done -if test -z $TMPDISK -then - log_unsupported "This test requires at least one disk to use the da driver" -fi +DISK0_NOP=${DISK0}.nop +DISK1_NOP=${DISK1}.nop +log_must create_gnops $DISK0 $DISK1 for type in "raidz" "mirror"; do log_note "Testing raid type $type" # Create a pool on the supplied disks - create_pool $TESTPOOL $type $DISKS + create_pool $TESTPOOL $type "$DISK0_NOP" "$DISK1_NOP" log_must $ZFS create $TESTPOOL/$TESTFS # Cause some IO errors writing to the pool while true; do - # Running zpool status after every dd operation is too slow. - # So we will run several dd's in a row before checking zpool - # status. sync between dd operations to ensure that the disk - # gets IO - for ((i=0; $i<64; i=$i+1)); do - sysctl kern.cam.da.$TMPDISKNUM.error_inject=1 > \ - /dev/null - $DD if=/dev/zero bs=128k count=1 >> \ - /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null - $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE - done + log_must gnop configure -e 5 -w 100 "$DISK1_NOP" + $DD if=/dev/zero bs=128k count=1 >> \ + /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null + $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE # Check to see if the pool is faulted yet $ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED' if [ $? == 0 ] @@ -127,15 +90,9 @@ for type in "raidz" "mirror"; do log_must check_state $TESTPOOL $TMPDISK "FAULTED" - #find the failed disk guid - typeset FAILED_VDEV=`$ZPOOL status $TESTPOOL | - awk "/^[[:space:]]*$TMPDISK[[:space:]]*FAULTED/ {print \\$1}"` - - # Reattach the failed disk - $ZPOOL online $TESTPOOL $FAILED_VDEV > /dev/null - if [ $? != 0 ]; then - log_fail "Could not reattach $FAILED_VDEV" - fi + # Heal and reattach the failed disk + log_must gnop configure -w 0 "$DISK1_NOP" + log_must $ZPOOL online $TESTPOOL "$DISK1_NOP" # Verify that the pool resilvers and goes to the ONLINE state for (( retries=60; $retries>0; retries=$retries+1 )) diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh index 21850af96f9..006eaeaef92 100644 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_replace_001_pos.ksh @@ -58,6 +58,7 @@ for type in "raidz" "mirror"; do # Disable the first disk. log_must destroy_gnop $REMOVAL_DISK + log_must wait_for_pool_removal 20 # Write out data to make sure we can do I/O after the disk failure log_must $DD if=/dev/zero of=$TESTDIR/$TESTFILE bs=1m count=1 diff --git a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh index b15208973bf..4c43ba4e601 100755 --- a/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh +++ b/tests/sys/cddl/zfs/tests/zfsd/zfsd_test.sh @@ -30,12 +30,14 @@ atf_test_case zfsd_fault_001_pos cleanup zfsd_fault_001_pos_head() { atf_set "descr" "ZFS will fault a vdev that produces IO errors" - atf_set "require.progs" "ksh93 zfs zpool zfsd" + atf_set "require.progs" "ksh93 gnop zfs zpool zfsd" atf_set "timeout" 300 } zfsd_fault_001_pos_body() { . $(atf_get_srcdir)/../../include/default.cfg + . $(atf_get_srcdir)/../hotspare/hotspare.kshlib + . $(atf_get_srcdir)/../hotspare/hotspare.cfg . $(atf_get_srcdir)/zfsd.cfg verify_disk_count "$DISKS" 2 @@ -212,7 +214,7 @@ atf_test_case zfsd_hotspare_004_pos cleanup zfsd_hotspare_004_pos_head() { atf_set "descr" "Removing a disk from a pool results in the spare activating" - atf_set "require.progs" "ksh93 gnop zpool camcontrol zfsd" + atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_004_pos_body() @@ -303,7 +305,7 @@ atf_test_case zfsd_hotspare_007_pos cleanup zfsd_hotspare_007_pos_head() { atf_set "descr" "zfsd will swap failed drives at startup" - atf_set "require.progs" "ksh93 gnop zpool camcontrol zfsd" + atf_set "require.progs" "ksh93 gnop zpool" atf_set "timeout" 3600 } zfsd_hotspare_007_pos_body() @@ -364,7 +366,7 @@ atf_test_case zfsd_autoreplace_001_neg cleanup zfsd_autoreplace_001_neg_head() { atf_set "descr" "A pool without autoreplace set will not replace by physical path" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd gnop" + atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_001_neg_body() @@ -425,7 +427,7 @@ atf_test_case zfsd_autoreplace_003_pos cleanup zfsd_autoreplace_003_pos_head() { atf_set "descr" "A pool with autoreplace set will replace by physical path even if a spare is active" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd gnop" + atf_set "require.progs" "ksh93 zpool gnop" atf_set "timeout" 3600 } zfsd_autoreplace_003_pos_body() @@ -456,7 +458,7 @@ atf_test_case zfsd_replace_001_pos cleanup zfsd_replace_001_pos_head() { atf_set "descr" "ZFSD will automatically replace a SAS disk that disappears and reappears in the same location, with the same devname" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs gnop" + atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_001_pos_body() { @@ -485,7 +487,7 @@ atf_test_case zfsd_replace_002_pos cleanup zfsd_replace_002_pos_head() { atf_set "descr" "zfsd will reactivate a pool after all disks are failed and reappeared" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs" + atf_set "require.progs" "ksh93 zpool zfs" } zfsd_replace_002_pos_body() { @@ -514,7 +516,7 @@ atf_test_case zfsd_replace_003_pos cleanup zfsd_replace_003_pos_head() { atf_set "descr" "ZFSD will correctly replace disks that dissapear and reappear with different devnames" - atf_set "require.progs" "ksh93 zpool camcontrol zfsd zfs gnop" + atf_set "require.progs" "ksh93 zpool zfs gnop" } zfsd_replace_003_pos_body() {