VAULT-36112: Better handling for Retry-After rate limit header. (#30887)

* round up

* round up, test, update backoff

* add external test

* changelog

* use released version of go-retryablehttp

* update api version of go-retryablehttp

* fix name
This commit is contained in:
miagilepner 2025-06-19 11:22:11 +02:00 committed by GitHub
parent bef4afdb44
commit 544edd58d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 77 additions and 10 deletions

View file

@ -258,7 +258,7 @@ func DefaultConfig() *Config {
MinRetryWait: time.Millisecond * 1000,
MaxRetryWait: time.Millisecond * 1500,
MaxRetries: 2,
Backoff: retryablehttp.LinearJitterBackoff,
Backoff: retryablehttp.RateLimitLinearJitterBackoff,
}
transport := config.HttpClient.Transport.(*http.Transport)

View file

@ -17,7 +17,7 @@ require (
github.com/hashicorp/go-cleanhttp v0.5.2
github.com/hashicorp/go-hclog v1.6.3
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/hashicorp/go-retryablehttp v0.7.8
github.com/hashicorp/go-rootcerts v1.0.2
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2

View file

@ -25,8 +25,8 @@ github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVH
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48=
github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6 h1:om4Al8Oy7kCm/B86rLCLah4Dt5Aa0Fr5rYBG60OzwHQ=

6
changelog/30887.txt Normal file
View file

@ -0,0 +1,6 @@
```release-note:change
api: Update the default API client to check for the `Retry-After` header and, if it exists, wait for the specified duration before retrying the request.
```
```release-note:change
quotas/rate-limit: Round up the `Retry-After` value to the nearest second when calculating the retry delay.
```

2
go.mod
View file

@ -105,7 +105,7 @@ require (
github.com/hashicorp/go-pgmultiauth v1.0.0
github.com/hashicorp/go-plugin v1.6.1
github.com/hashicorp/go-raftchunking v0.6.3-0.20191002164813-7e9e8525653a
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/hashicorp/go-retryablehttp v0.7.8
github.com/hashicorp/go-rootcerts v1.0.2
github.com/hashicorp/go-secure-stdlib/awsutil v0.3.0
github.com/hashicorp/go-secure-stdlib/base62 v0.1.2

4
go.sum
View file

@ -1449,8 +1449,8 @@ github.com/hashicorp/go-plugin v1.6.1/go.mod h1:XPHFku2tFo3o3QKFgSYo+cghcUhw1NA1
github.com/hashicorp/go-raftchunking v0.6.3-0.20191002164813-7e9e8525653a h1:FmnBDwGwlTgugDGbVxwV8UavqSMACbGrUpfc98yFLR4=
github.com/hashicorp/go-raftchunking v0.6.3-0.20191002164813-7e9e8525653a/go.mod h1:xbXnmKqX9/+RhPkJ4zrEx4738HacP72aaUPlT2RZ4sU=
github.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48=
github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw=
github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc=
github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8=
github.com/hashicorp/go-secure-stdlib/awsutil v0.3.0 h1:I8bynUKMh9I7JdwtW9voJ0xmHvBpxQtLjrMFDYmhOxY=

View file

@ -6,6 +6,7 @@ package quotas
import (
"encoding/json"
"fmt"
"sync"
"testing"
"time"
@ -13,6 +14,7 @@ import (
"github.com/hashicorp/vault/builtin/credential/userpass"
"github.com/hashicorp/vault/builtin/logical/pki"
"github.com/hashicorp/vault/helper/constants"
"github.com/hashicorp/vault/helper/testhelpers"
"github.com/hashicorp/vault/helper/testhelpers/teststorage"
"github.com/hashicorp/vault/sdk/helper/testhelpers/schema"
"github.com/hashicorp/vault/sdk/logical"
@ -727,3 +729,40 @@ func TestQuotas_RateLimitQuota_GroupByConfig(t *testing.T) {
})
}
}
// TestQuotas_RateLimit_ZeroRetryRegression verifies that the rate limit response
// headers do not return a Retry-After value of 0.
func TestQuotas_RateLimit_ZeroRetryRegression(t *testing.T) {
conf, opts := teststorage.ClusterSetup(coreConfig, nil, nil)
cluster := vault.NewTestCluster(t, conf, opts)
cluster.Start()
defer cluster.Cleanup()
testhelpers.WaitForActiveNode(t, cluster)
client := cluster.Cores[0].Client
_, err := client.Logical().Write("sys/quotas/config", map[string]interface{}{
"enable_rate_limit_response_headers": true,
})
require.NoError(t, err)
_, err = client.Logical().Write("sys/quotas/rate-limit/root-rlq", map[string]interface{}{
"name": "root-rlq",
"rate": 1,
})
require.NoError(t, err)
failed := atomic.NewBool(false)
wg := sync.WaitGroup{}
client = client.WithResponseCallbacks(func(response *api.Response) {
if response.Header.Get("Retry-After") == "0" {
failed.Store(true)
}
})
for i := 0; i < 20; i++ {
wg.Add(1)
go func() {
defer wg.Done()
client.Logical().Read("sys/mounts")
}()
}
wg.Wait()
require.False(t, failed.Load())
}

View file

@ -350,7 +350,7 @@ func (rlq *RateLimitQuota) allow(ctx context.Context, req *Request) (Response, e
} else {
// deny the request and return early
resp.Allowed = false
retryAfter = strconv.Itoa(int(time.Until(blockedAt.Add(rlq.BlockInterval)).Seconds()))
retryAfter = rlq.retryAfterSeconds(time.Now(), blockedAt.Add(rlq.BlockInterval))
return resp, nil
}
}
@ -363,20 +363,24 @@ func (rlq *RateLimitQuota) allow(ctx context.Context, req *Request) (Response, e
resp.Allowed = allow
resp.Headers[httplimit.HeaderRateLimitLimit] = strconv.FormatUint(limit, 10)
resp.Headers[httplimit.HeaderRateLimitRemaining] = strconv.FormatUint(remaining, 10)
resp.Headers[httplimit.HeaderRateLimitReset] = strconv.Itoa(int(time.Until(time.Unix(0, int64(reset))).Seconds()))
resp.Headers[httplimit.HeaderRateLimitReset] = rlq.retryAfterSeconds(time.Now(), time.Unix(0, int64(reset)))
retryAfter = resp.Headers[httplimit.HeaderRateLimitReset]
// If the request is not allowed (i.e. rate limit threshold reached) and blocking
// is enabled, we add the client to the set of blocked clients.
if !resp.Allowed && rlq.purgeBlocked {
blockedAt := time.Now()
retryAfter = strconv.Itoa(int(time.Until(blockedAt.Add(rlq.BlockInterval)).Seconds()))
retryAfter = rlq.retryAfterSeconds(time.Now(), blockedAt.Add(rlq.BlockInterval))
rlq.blockedClients.Store(key, blockedAt)
}
return resp, nil
}
func (rlq *RateLimitQuota) retryAfterSeconds(now, waitUntil time.Time) string {
return strconv.Itoa(int(math.Ceil(waitUntil.Sub(now).Seconds())))
}
// close stops the current running client purge loop.
// It should be called with the write lock held.
func (rlq *RateLimitQuota) close(ctx context.Context) error {

View file

@ -234,3 +234,21 @@ func TestRateLimitQuota_Update(t *testing.T) {
require.Nil(t, quota.close(context.Background()))
require.Nil(t, quotaUpdate.close(context.Background()))
}
// TestRateLimitQuota_retryAfterSeconds tests the that retryAfterSeconds rounds
// up the number of seconds until the block ends
func TestRateLimitQuota_retryAfterSeconds(t *testing.T) {
quota := NewRateLimitQuota("quota1", "", "", "", "", GroupByIp, false, time.Second, 0, 10, 0)
now := time.Now()
t.Run("less than 1", func(t *testing.T) {
blockedUntil := time.Now().Add(200 * time.Millisecond)
retryAfter := quota.retryAfterSeconds(now, blockedUntil)
require.Equal(t, "1", retryAfter)
})
t.Run("more than 1", func(t *testing.T) {
blockedUntil := time.Now().Add(1300 * time.Millisecond)
retryAfter := quota.retryAfterSeconds(now, blockedUntil)
require.Equal(t, "2", retryAfter)
})
}