diff --git a/changelog/10514.txt b/changelog/10514.txt new file mode 100644 index 0000000000..60c2eb31bb --- /dev/null +++ b/changelog/10514.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core: add metrics for active entity count +``` diff --git a/vault/activity_log.go b/vault/activity_log.go index d9811d5a5b..026ccca9ff 100644 --- a/vault/activity_log.go +++ b/vault/activity_log.go @@ -1499,6 +1499,17 @@ func (a *ActivityLog) HandleTokenCreation(entry *logical.TokenEntry) { } } +func (a *ActivityLog) namespaceToLabel(ctx context.Context, nsID string) string { + ns, err := NamespaceByID(ctx, nsID, a.core) + if err != nil || ns == nil { + return fmt.Sprintf("deleted-%v", nsID) + } + if ns.Path == "" { + return "root" + } + return ns.Path +} + // goroutine to process the request in the intent log, creating precomputed queries. // We expect the return value won't be checked, so log errors as they occur // (but for unit testing having the error return should help.) @@ -1601,7 +1612,13 @@ func (a *ActivityLog) precomputedQueryWorker() error { byNamespace[nsID].Tokens += v } } + endTime := timeutil.EndOfMonth(time.Unix(lastMonth, 0).UTC()) + activePeriodStart := timeutil.MonthsPreviousTo(a.defaultReportMonths, endTime) + // If not enough data, report as much as we have in the window + if activePeriodStart.Before(times[len(times)-1]) { + activePeriodStart = times[len(times)-1] + } for _, startTime := range times { // Do not work back further than the current retention window, @@ -1627,12 +1644,33 @@ func (a *ActivityLog) precomputedQueryWorker() error { EndTime: endTime, Namespaces: make([]*activity.NamespaceRecord, 0, len(byNamespace)), } + for nsID, counts := range byNamespace { pq.Namespaces = append(pq.Namespaces, &activity.NamespaceRecord{ NamespaceID: nsID, Entities: uint64(len(counts.Entities)), NonEntityTokens: counts.Tokens, }) + + // If this is the most recent month, or the start of the reporting period, output + // a metric for each namespace. + if startTime == times[0] { + a.metrics.SetGaugeWithLabels( + []string{"identity", "entity", "active", "monthly"}, + float32(len(counts.Entities)), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + } else if startTime == activePeriodStart { + a.metrics.SetGaugeWithLabels( + []string{"identity", "entity", "active", "reporting_period"}, + float32(len(counts.Entities)), + []metricsutil.Label{ + {Name: "namespace", Value: a.namespaceToLabel(ctx, nsID)}, + }, + ) + } } err = a.queryStore.Put(ctx, pq) @@ -1641,7 +1679,7 @@ func (a *ActivityLog) precomputedQueryWorker() error { } } - // Delete the intent log + // delete the intent log a.view.Delete(ctx, activityIntentLogKey) a.logger.Info("finished computing queries", "month", endTime) @@ -1694,3 +1732,33 @@ func (a *ActivityLog) retentionWorker(currentTime time.Time, retentionMonths int return nil } + +// Periodic report of number of active entities, with the current month. +// We don't break this down by namespace because that would require going to storage (that information +// is not currently stored in memory.) +func (a *ActivityLog) PartialMonthMetrics(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) { + a.fragmentLock.RLock() + defer a.fragmentLock.RUnlock() + if !a.enabled { + // Empty list + return []metricsutil.GaugeLabelValues{}, nil + } + count := len(a.activeEntities) + + return []metricsutil.GaugeLabelValues{ + { + Labels: []metricsutil.Label{}, + Value: float32(count), + }, + }, nil +} + +func (c *Core) activeEntityGaugeCollector(ctx context.Context) ([]metricsutil.GaugeLabelValues, error) { + c.stateLock.RLock() + a := c.activityLog + c.stateLock.RUnlock() + if a == nil { + return []metricsutil.GaugeLabelValues{}, nil + } + return a.PartialMonthMetrics(ctx) +} diff --git a/vault/activity_log_test.go b/vault/activity_log_test.go index fa1bf52095..866e57cbc6 100644 --- a/vault/activity_log_test.go +++ b/vault/activity_log_test.go @@ -1895,7 +1895,7 @@ func TestActivityLog_Precompute(t *testing.T) { october := timeutil.StartOfMonth(time.Date(2020, 10, 1, 0, 0, 0, 0, time.UTC)) november := timeutil.StartOfMonth(time.Date(2020, 11, 1, 0, 0, 0, 0, time.UTC)) - core, _, _ := TestCoreUnsealed(t) + core, _, _, sink := TestCoreUnsealedWithMetrics(t) a := core.activityLog ctx := namespace.RootContext(nil) @@ -2137,8 +2137,84 @@ func TestActivityLog_Precompute(t *testing.T) { for i := 0; i <= tc.ExpectedUpTo; i++ { checkPrecomputedQuery(i) } - } + + // Check metrics on the last precomputed query + // (otherwise we need a way to reset the in-memory metrics between test cases.) + + intervals := sink.Data() + // Test crossed an interval boundary, don't try to deal with it. + if len(intervals) > 1 { + t.Skip("Detected interval crossing.") + } + expectedGauges := []struct { + Name string + NamespaceLabel string + Value float32 + }{ + // october values + { + "identity.entity.active.monthly", + "root", + 15.0, + }, + { + "identity.entity.active.monthly", + "deleted-bbbbb", // No namespace entry for this fake ID + 5.0, + }, + { + "identity.entity.active.monthly", + "deleted-ccccc", + 5.0, + }, + // august-september values + { + "identity.entity.active.reporting_period", + "root", + 20.0, + }, + { + "identity.entity.active.reporting_period", + "deleted-aaaaa", + 5.0, + }, + { + "identity.entity.active.reporting_period", + "deleted-bbbbb", + 10.0, + }, + { + "identity.entity.active.reporting_period", + "deleted-ccccc", + 5.0, + }, + } + for _, g := range expectedGauges { + found := false + for _, actual := range intervals[0].Gauges { + actualNamespaceLabel := "" + for _, l := range actual.Labels { + if l.Name == "namespace" { + actualNamespaceLabel = l.Value + break + } + } + if actual.Name == g.Name && actualNamespaceLabel == g.NamespaceLabel { + found = true + if actual.Value != g.Value { + t.Errorf("Mismatched value for %v %v %v != %v", + g.Name, g.NamespaceLabel, actual.Value, g.Value) + } + break + } + } + if !found { + t.Errorf("No guage found for %v %v", + g.Name, g.NamespaceLabel) + } + } + } type BlockingInmemStorage struct { diff --git a/vault/core_metrics.go b/vault/core_metrics.go index c7d829566b..08bab26a1c 100644 --- a/vault/core_metrics.go +++ b/vault/core_metrics.go @@ -211,6 +211,12 @@ func (c *Core) emitMetrics(stopCh chan struct{}) { c.entityGaugeCollectorByMount, "", }, + { + []string{"identity", "entity", "active", "partial_month"}, + []metrics.Label{{"gauge", "identity_active_month"}}, + c.activeEntityGaugeCollector, + "", + }, } // Disable collection if configured, or if we're a performance standby diff --git a/website/content/docs/internals/telemetry.mdx b/website/content/docs/internals/telemetry.mdx index 4d5ec7bc8a..6101e1c117 100644 --- a/website/content/docs/internals/telemetry.mdx +++ b/website/content/docs/internals/telemetry.mdx @@ -162,6 +162,9 @@ These metrics cover measurement of token, identity, and lease operations, and co | `vault.expire.register` | Time taken for register operations | ms | summary | | `vault.expire.register-auth` | Time taken for register authentication operations which create lease entries without lease ID | ms | summary | | `vault.identity.num_entities` | Number of identity entities stored in Vault | entities | gauge | +| `vault.identity.entity.active.monthly` (cluster, namespace) | Number of distinct entities that created a token during the past month, per namespace. Only available if client count is enabled. Reported at the start of each month. | entities | gauge | +| `vault.identity.entity.active.partial_month` (cluster) | Total number of distinct entities that created a token during the current month. Only available if client count is enabled. Reported periodically within each month. | entities | gauge | +| `vault.identity.entity.active.reporting_period` (cluster, namespace) | Number of distinct entities that created a token in the past N months, as defined by the client count default reporting period. Only available if client count is enabled. Reported at the start of each month. | entities | gauge | | `vault.identity.entity.alias.count` (cluster, namespace, auth_method, mount_point) | Number of identity entities aliases stored in Vault, grouped by the auth mount that created them. This gauge is computed every 10 minutes. | aliases | gauge | | `vault.identity.entity.count` (cluster, namespace) | Number of identity entities stored in Vault, grouped by namespace. | entities | gauge | | `vault.identity.entity.creation` (cluster, namespace, auth_method, mount_point) | Number of identity entities created, grouped by the auth mount that created them. | entities | counter |