// Copyright 2013 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package model import ( "encoding/json" "errors" "fmt" "regexp" "sort" "strconv" "strings" "unicode/utf8" dto "github.com/prometheus/client_model/go" "go.yaml.in/yaml/v2" "google.golang.org/protobuf/proto" ) var ( // NameValidationScheme determines the global default method of the name // validation to be used by all calls to IsValidMetricName() and LabelName // IsValid(). // // Deprecated: This variable should not be used and might be removed in the // far future. If you wish to stick to the legacy name validation use // `IsValidLegacyMetricName()` and `LabelName.IsValidLegacy()` methods // instead. This variable is here as an escape hatch for emergency cases, // given the recent change from `LegacyValidation` to `UTF8Validation`, e.g., // to delay UTF-8 migrations in time or aid in debugging unforeseen results of // the change. In such a case, a temporary assignment to `LegacyValidation` // value in the `init()` function in your main.go or so, could be considered. // // Historically we opted for a global variable for feature gating different // validation schemes in operations that were not otherwise easily adjustable // (e.g. Labels yaml unmarshaling). That could have been a mistake, a separate // Labels structure or package might have been a better choice. Given the // change was made and many upgraded the common already, we live this as-is // with this warning and learning for the future. NameValidationScheme = UTF8Validation // NameEscapingScheme defines the default way that names will be escaped when // presented to systems that do not support UTF-8 names. If the Content-Type // "escaping" term is specified, that will override this value. // NameEscapingScheme should not be set to the NoEscaping value. That string // is used in content negotiation to indicate that a system supports UTF-8 and // has that feature enabled. NameEscapingScheme = UnderscoreEscaping ) // ValidationScheme is a Go enum for determining how metric and label names will // be validated by this library. type ValidationScheme int const ( // UnsetValidation represents an undefined ValidationScheme. // Should not be used in practice. UnsetValidation ValidationScheme = iota // LegacyValidation is a setting that requires that all metric and label names // conform to the original Prometheus character requirements described by // MetricNameRE and LabelNameRE. LegacyValidation // UTF8Validation only requires that metric and label names be valid UTF-8 // strings. UTF8Validation ) var _ interface { yaml.Marshaler yaml.Unmarshaler json.Marshaler json.Unmarshaler fmt.Stringer } = new(ValidationScheme) // String returns the string representation of s. func (s ValidationScheme) String() string { switch s { case UnsetValidation: return "unset" case LegacyValidation: return "legacy" case UTF8Validation: return "utf8" default: panic(fmt.Errorf("unhandled ValidationScheme: %d", s)) } } // MarshalYAML implements the yaml.Marshaler interface. func (s ValidationScheme) MarshalYAML() (any, error) { switch s { case UnsetValidation: return "", nil case LegacyValidation, UTF8Validation: return s.String(), nil default: panic(fmt.Errorf("unhandled ValidationScheme: %d", s)) } } // UnmarshalYAML implements the yaml.Unmarshaler interface. func (s *ValidationScheme) UnmarshalYAML(unmarshal func(any) error) error { var scheme string if err := unmarshal(&scheme); err != nil { return err } return s.Set(scheme) } // MarshalJSON implements the json.Marshaler interface. func (s ValidationScheme) MarshalJSON() ([]byte, error) { switch s { case UnsetValidation: return json.Marshal("") case UTF8Validation, LegacyValidation: return json.Marshal(s.String()) default: return nil, fmt.Errorf("unhandled ValidationScheme: %d", s) } } // UnmarshalJSON implements the json.Unmarshaler interface. func (s *ValidationScheme) UnmarshalJSON(bytes []byte) error { var repr string if err := json.Unmarshal(bytes, &repr); err != nil { return err } return s.Set(repr) } // Set implements the pflag.Value interface. func (s *ValidationScheme) Set(text string) error { switch text { case "": // Don't change the value. case LegacyValidation.String(): *s = LegacyValidation case UTF8Validation.String(): *s = UTF8Validation default: return fmt.Errorf("unrecognized ValidationScheme: %q", text) } return nil } // IsValidMetricName returns whether metricName is valid according to s. func (s ValidationScheme) IsValidMetricName(metricName string) bool { switch s { case LegacyValidation: if len(metricName) == 0 { return false } for i, b := range metricName { if !isValidLegacyRune(b, i) { return false } } return true case UTF8Validation: if len(metricName) == 0 { return false } return utf8.ValidString(metricName) default: panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s.String())) } } // IsValidLabelName returns whether labelName is valid according to s. func (s ValidationScheme) IsValidLabelName(labelName string) bool { switch s { case LegacyValidation: if len(labelName) == 0 { return false } for i, b := range labelName { // TODO: Apply De Morgan's law. Make sure there are tests for this. if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { //nolint:staticcheck return false } } return true case UTF8Validation: if len(labelName) == 0 { return false } return utf8.ValidString(labelName) default: panic(fmt.Sprintf("Invalid name validation scheme requested: %s", s)) } } // Type implements the pflag.Value interface. func (ValidationScheme) Type() string { return "validationScheme" } type EscapingScheme int const ( // NoEscaping indicates that a name will not be escaped. Unescaped names that // do not conform to the legacy validity check will use a new exposition // format syntax that will be officially standardized in future versions. NoEscaping EscapingScheme = iota // UnderscoreEscaping replaces all legacy-invalid characters with underscores. UnderscoreEscaping // DotsEscaping is similar to UnderscoreEscaping, except that dots are // converted to `_dot_` and pre-existing underscores are converted to `__`. DotsEscaping // ValueEncodingEscaping prepends the name with `U__` and replaces all invalid // characters with the unicode value, surrounded by underscores. Single // underscores are replaced with double underscores. ValueEncodingEscaping ) const ( // EscapingKey is the key in an Accept or Content-Type header that defines how // metric and label names that do not conform to the legacy character // requirements should be escaped when being scraped by a legacy prometheus // system. If a system does not explicitly pass an escaping parameter in the // Accept header, the default NameEscapingScheme will be used. EscapingKey = "escaping" // Possible values for Escaping Key. AllowUTF8 = "allow-utf-8" // No escaping required. EscapeUnderscores = "underscores" EscapeDots = "dots" EscapeValues = "values" ) // MetricNameRE is a regular expression matching valid metric // names. Note that the IsValidMetricName function performs the same // check but faster than a match with this regular expression. var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) // A Metric is similar to a LabelSet, but the key difference is that a Metric is // a singleton and refers to one and only one stream of samples. type Metric LabelSet // Equal compares the metrics. func (m Metric) Equal(o Metric) bool { return LabelSet(m).Equal(LabelSet(o)) } // Before compares the metrics' underlying label sets. func (m Metric) Before(o Metric) bool { return LabelSet(m).Before(LabelSet(o)) } // Clone returns a copy of the Metric. func (m Metric) Clone() Metric { clone := make(Metric, len(m)) for k, v := range m { clone[k] = v } return clone } func (m Metric) String() string { metricName, hasName := m[MetricNameLabel] numLabels := len(m) - 1 if !hasName { numLabels = len(m) } labelStrings := make([]string, 0, numLabels) for label, value := range m { if label != MetricNameLabel { labelStrings = append(labelStrings, fmt.Sprintf("%s=%q", label, value)) } } switch numLabels { case 0: if hasName { return string(metricName) } return "{}" default: sort.Strings(labelStrings) return fmt.Sprintf("%s{%s}", metricName, strings.Join(labelStrings, ", ")) } } // Fingerprint returns a Metric's Fingerprint. func (m Metric) Fingerprint() Fingerprint { return LabelSet(m).Fingerprint() } // FastFingerprint returns a Metric's Fingerprint calculated by a faster hashing // algorithm, which is, however, more susceptible to hash collisions. func (m Metric) FastFingerprint() Fingerprint { return LabelSet(m).FastFingerprint() } // IsValidMetricName returns true iff name matches the pattern of MetricNameRE // for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is // selected. // // Deprecated: This function should not be used and might be removed in the future. // Use [ValidationScheme.IsValidMetricName] instead. func IsValidMetricName(n LabelValue) bool { return NameValidationScheme.IsValidMetricName(string(n)) } // IsValidLegacyMetricName is similar to IsValidMetricName but always uses the // legacy validation scheme regardless of the value of NameValidationScheme. // This function, however, does not use MetricNameRE for the check but a much // faster hardcoded implementation. // // Deprecated: This function should not be used and might be removed in the future. // Use [LegacyValidation.IsValidMetricName] instead. func IsValidLegacyMetricName(n string) bool { return LegacyValidation.IsValidMetricName(n) } // EscapeMetricFamily escapes the given metric names and labels with the given // escaping scheme. Returns a new object that uses the same pointers to fields // when possible and creates new escaped versions so as not to mutate the // input. func EscapeMetricFamily(v *dto.MetricFamily, scheme EscapingScheme) *dto.MetricFamily { if v == nil { return nil } if scheme == NoEscaping { return v } out := &dto.MetricFamily{ Help: v.Help, Type: v.Type, Unit: v.Unit, } // If the name is nil, copy as-is, don't try to escape. if v.Name == nil || IsValidLegacyMetricName(v.GetName()) { out.Name = v.Name } else { out.Name = proto.String(EscapeName(v.GetName(), scheme)) } for _, m := range v.Metric { if !metricNeedsEscaping(m) { out.Metric = append(out.Metric, m) continue } escaped := &dto.Metric{ Gauge: m.Gauge, Counter: m.Counter, Summary: m.Summary, Untyped: m.Untyped, Histogram: m.Histogram, TimestampMs: m.TimestampMs, } for _, l := range m.Label { if l.GetName() == MetricNameLabel { if l.Value == nil || IsValidLegacyMetricName(l.GetValue()) { escaped.Label = append(escaped.Label, l) continue } escaped.Label = append(escaped.Label, &dto.LabelPair{ Name: proto.String(MetricNameLabel), Value: proto.String(EscapeName(l.GetValue(), scheme)), }) continue } if l.Name == nil || IsValidLegacyMetricName(l.GetName()) { escaped.Label = append(escaped.Label, l) continue } escaped.Label = append(escaped.Label, &dto.LabelPair{ Name: proto.String(EscapeName(l.GetName(), scheme)), Value: l.Value, }) } out.Metric = append(out.Metric, escaped) } return out } func metricNeedsEscaping(m *dto.Metric) bool { for _, l := range m.Label { if l.GetName() == MetricNameLabel && !IsValidLegacyMetricName(l.GetValue()) { return true } if !IsValidLegacyMetricName(l.GetName()) { return true } } return false } // EscapeName escapes the incoming name according to the provided escaping // scheme. Depending on the rules of escaping, this may cause no change in the // string that is returned. (Especially NoEscaping, which by definition is a // noop). This function does not do any validation of the name. func EscapeName(name string, scheme EscapingScheme) string { if len(name) == 0 { return name } var escaped strings.Builder switch scheme { case NoEscaping: return name case UnderscoreEscaping: if IsValidLegacyMetricName(name) { return name } for i, b := range name { if isValidLegacyRune(b, i) { escaped.WriteRune(b) } else { escaped.WriteRune('_') } } return escaped.String() case DotsEscaping: // Do not early return for legacy valid names, we still escape underscores. for i, b := range name { switch { case b == '_': escaped.WriteString("__") case b == '.': escaped.WriteString("_dot_") case isValidLegacyRune(b, i): escaped.WriteRune(b) default: escaped.WriteString("__") } } return escaped.String() case ValueEncodingEscaping: if IsValidLegacyMetricName(name) { return name } escaped.WriteString("U__") for i, b := range name { switch { case b == '_': escaped.WriteString("__") case isValidLegacyRune(b, i): escaped.WriteRune(b) case !utf8.ValidRune(b): escaped.WriteString("_FFFD_") default: escaped.WriteRune('_') escaped.WriteString(strconv.FormatInt(int64(b), 16)) escaped.WriteRune('_') } } return escaped.String() default: panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) } } // lower function taken from strconv.atoi. func lower(c byte) byte { return c | ('x' - 'X') } // UnescapeName unescapes the incoming name according to the provided escaping // scheme if possible. Some schemes are partially or totally non-roundtripable. // If any error is enountered, returns the original input. func UnescapeName(name string, scheme EscapingScheme) string { if len(name) == 0 { return name } switch scheme { case NoEscaping: return name case UnderscoreEscaping: // It is not possible to unescape from underscore replacement. return name case DotsEscaping: name = strings.ReplaceAll(name, "_dot_", ".") name = strings.ReplaceAll(name, "__", "_") return name case ValueEncodingEscaping: escapedName, found := strings.CutPrefix(name, "U__") if !found { return name } var unescaped strings.Builder TOP: for i := 0; i < len(escapedName); i++ { // All non-underscores are treated normally. if escapedName[i] != '_' { unescaped.WriteByte(escapedName[i]) continue } i++ if i >= len(escapedName) { return name } // A double underscore is a single underscore. if escapedName[i] == '_' { unescaped.WriteByte('_') continue } // We think we are in a UTF-8 code, process it. var utf8Val uint for j := 0; i < len(escapedName); j++ { // This is too many characters for a utf8 value based on the MaxRune // value of '\U0010FFFF'. if j >= 6 { return name } // Found a closing underscore, convert to a rune, check validity, and append. if escapedName[i] == '_' { utf8Rune := rune(utf8Val) if !utf8.ValidRune(utf8Rune) { return name } unescaped.WriteRune(utf8Rune) continue TOP } r := lower(escapedName[i]) utf8Val *= 16 switch { case r >= '0' && r <= '9': utf8Val += uint(r) - '0' case r >= 'a' && r <= 'f': utf8Val += uint(r) - 'a' + 10 default: return name } i++ } // Didn't find closing underscore, invalid. return name } return unescaped.String() default: panic(fmt.Sprintf("invalid escaping scheme %d", scheme)) } } func isValidLegacyRune(b rune, i int) bool { return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || b == ':' || (b >= '0' && b <= '9' && i > 0) } func (e EscapingScheme) String() string { switch e { case NoEscaping: return AllowUTF8 case UnderscoreEscaping: return EscapeUnderscores case DotsEscaping: return EscapeDots case ValueEncodingEscaping: return EscapeValues default: panic(fmt.Sprintf("unknown format scheme %d", e)) } } func ToEscapingScheme(s string) (EscapingScheme, error) { if s == "" { return NoEscaping, errors.New("got empty string instead of escaping scheme") } switch s { case AllowUTF8: return NoEscaping, nil case EscapeUnderscores: return UnderscoreEscaping, nil case EscapeDots: return DotsEscaping, nil case EscapeValues: return ValueEncodingEscaping, nil default: return NoEscaping, fmt.Errorf("unknown format scheme %s", s) } }