chore: go mod tidy / vendor / make deps

2025-10-02 08:25:31 +02:00
parent 1c10e64c58
commit d63a1c28ea
505 changed files with 34448 additions and 35285 deletions
--- a/vendor/github.com/clipperhouse/uax29/v2/LICENSE
+++ b/vendor/github.com/clipperhouse/uax29/v2/LICENSE
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Matt Sherman
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
+++ b/vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
@ -0,0 +1,82 @@
+An implementation of grapheme cluster boundaries from [Unicode text segmentation](https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) (UAX 29), for Unicode version 15.0.0.
+
+## Quick start
+
+```
+go get "github.com/clipperhouse/uax29/v2/graphemes"
+```
+
+```go
+import "github.com/clipperhouse/uax29/v2/graphemes"
+
+text := "Hello, 世界. Nice dog! 👍🐶"
+
+tokens := graphemes.FromString(text)
+
+for tokens.Next() {                     // Next() returns true until end of data
+	fmt.Println(tokens.Value())         // Do something with the current grapheme
+}
+```
+
+[![Documentation](https://pkg.go.dev/badge/github.com/clipperhouse/uax29/v2/graphemes.svg)](https://pkg.go.dev/github.com/clipperhouse/uax29/v2/graphemes)
+
+_A grapheme is a “single visible character”, which might be a simple as a single letter, or a complex emoji that consists of several Unicode code points._
+
+## Conformance
+
+We use the Unicode [test suite](https://unicode.org/reports/tr41/tr41-26.html#Tests29). Status:
+
+![Go](https://github.com/clipperhouse/uax29/actions/workflows/gotest.yml/badge.svg)
+
+## APIs
+
+### If you have a `string`
+
+```go
+text := "Hello, 世界. Nice dog! 👍🐶"
+
+tokens := graphemes.FromString(text)
+
+for tokens.Next() {                     // Next() returns true until end of data
+	fmt.Println(tokens.Value())         // Do something with the current grapheme
+}
+```
+
+### If you have an `io.Reader`
+
+`FromReader` embeds a [`bufio.Scanner`](https://pkg.go.dev/bufio#Scanner), so just use those methods.
+
+```go
+r := getYourReader()                        // from a file or network maybe
+tokens := graphemes.FromReader(r)
+
+for tokens.Scan() {                         // Scan() returns true until error or EOF
+	fmt.Println(tokens.Text())              // Do something with the current grapheme
+}
+
+if tokens.Err() != nil {                    // Check the error
+	log.Fatal(tokens.Err())
+}
+```
+
+### If you have a `[]byte`
+
+```go
+b := []byte("Hello, 世界. Nice dog! 👍🐶")
+
+tokens := graphemes.FromBytes(b)
+
+for tokens.Next() {                     // Next() returns true until end of data
+	fmt.Println(tokens.Value())         // Do something with the current grapheme
+}
+```
+
+### Performance
+
+On a Mac M2 laptop, we see around 200MB/s, or around 100 million graphemes per second. You should see ~constant memory, and no allocations.
+
+### Invalid inputs
+
+Invalid UTF-8 input is considered undefined behavior. We test to ensure that bad inputs will not cause pathological outcomes, such as a panic or infinite loop. Callers should expect “garbage-in, garbage-out”.
+
+Your pipeline should probably include a call to [`utf8.Valid()`](https://pkg.go.dev/unicode/utf8#Valid).
--- a/vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
+++ b/vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
@ -0,0 +1,28 @@
+package graphemes
+
+import "github.com/clipperhouse/uax29/v2/internal/iterators"
+
+type Iterator[T iterators.Stringish] struct {
+	*iterators.Iterator[T]
+}
+
+var (
+	splitFuncString = splitFunc[string]
+	splitFuncBytes  = splitFunc[[]byte]
+)
+
+// FromString returns an iterator for the grapheme clusters in the input string.
+// Iterate while Next() is true, and access the grapheme via Value().
+func FromString(s string) Iterator[string] {
+	return Iterator[string]{
+		iterators.New(splitFuncString, s),
+	}
+}
+
+// FromBytes returns an iterator for the grapheme clusters in the input bytes.
+// Iterate while Next() is true, and access the grapheme via Value().
+func FromBytes(b []byte) Iterator[[]byte] {
+	return Iterator[[]byte]{
+		iterators.New(splitFuncBytes, b),
+	}
+}
--- a/vendor/github.com/clipperhouse/uax29/v2/graphemes/reader.go
+++ b/vendor/github.com/clipperhouse/uax29/v2/graphemes/reader.go
@ -0,0 +1,25 @@
+// Package graphemes implements Unicode grapheme cluster boundaries: https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
+package graphemes
+
+import (
+	"bufio"
+	"io"
+)
+
+type Scanner struct {
+	*bufio.Scanner
+}
+
+// FromReader returns a Scanner, to split graphemes per
+// https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
+//
+// It embeds a [bufio.Scanner], so you can use its methods.
+//
+// Iterate through graphemes by calling Scan() until false, then check Err().
+func FromReader(r io.Reader) *Scanner {
+	sc := bufio.NewScanner(r)
+	sc.Split(SplitFunc)
+	return &Scanner{
+		Scanner: sc,
+	}
+}
--- a/vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
+++ b/vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
@ -0,0 +1,174 @@
+package graphemes
+
+import (
+	"bufio"
+
+	"github.com/clipperhouse/uax29/v2/internal/iterators"
+)
+
+// is determines if lookup intersects propert(ies)
+func (lookup property) is(properties property) bool {
+	return (lookup & properties) != 0
+}
+
+const _Ignore = _Extend
+
+// SplitFunc is a bufio.SplitFunc implementation of Unicode grapheme cluster segmentation, for use with bufio.Scanner.
+//
+// See https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
+var SplitFunc bufio.SplitFunc = splitFunc[[]byte]
+
+func splitFunc[T iterators.Stringish](data T, atEOF bool) (advance int, token T, err error) {
+	var empty T
+	if len(data) == 0 {
+		return 0, empty, nil
+	}
+
+	// These vars are stateful across loop iterations
+	var pos int
+	var lastExIgnore property = 0     // "last excluding ignored categories"
+	var lastLastExIgnore property = 0 // "last one before that"
+	var regionalIndicatorCount int
+
+	// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
+	// to the right of the ×, from which we look back or forward
+
+	current, w := lookup(data[pos:])
+	if w == 0 {
+		if !atEOF {
+			// Rune extends past current data, request more
+			return 0, empty, nil
+		}
+		pos = len(data)
+		return pos, data[:pos], nil
+	}
+
+	// https://unicode.org/reports/tr29/#GB1
+	// Start of text always advances
+	pos += w
+
+	for {
+		eot := pos == len(data) // "end of text"
+
+		if eot {
+			if !atEOF {
+				// Token extends past current data, request more
+				return 0, empty, nil
+			}
+
+			// https://unicode.org/reports/tr29/#GB2
+			break
+		}
+
+		/*
+			We've switched the evaluation order of GB1↓ and GB2↑. It's ok:
+			because we've checked for len(data) at the top of this function,
+			sot and eot are mutually exclusive, order doesn't matter.
+		*/
+
+		// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
+		// to the right of the ×, from which we look back or forward
+
+		// Remember previous properties to avoid lookups/lookbacks
+		last := current
+		if !last.is(_Ignore) {
+			lastLastExIgnore = lastExIgnore
+			lastExIgnore = last
+		}
+
+		current, w = lookup(data[pos:])
+		if w == 0 {
+			if atEOF {
+				// Just return the bytes, we can't do anything with them
+				pos = len(data)
+				break
+			}
+			// Rune extends past current data, request more
+			return 0, empty, nil
+		}
+
+		// Optimization: no rule can possibly apply
+		if current|last == 0 { // i.e. both are zero
+			break
+		}
+
+		// https://unicode.org/reports/tr29/#GB3
+		if current.is(_LF) && last.is(_CR) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB4
+		// https://unicode.org/reports/tr29/#GB5
+		if (current | last).is(_Control | _CR | _LF) {
+			break
+		}
+
+		// https://unicode.org/reports/tr29/#GB6
+		if current.is(_L|_V|_LV|_LVT) && last.is(_L) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB7
+		if current.is(_V|_T) && last.is(_LV|_V) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB8
+		if current.is(_T) && last.is(_LVT|_T) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB9
+		if current.is(_Extend | _ZWJ) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB9a
+		if current.is(_SpacingMark) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB9b
+		if last.is(_Prepend) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB9c
+		// TODO(clipperhouse):
+		// It appears to be added in Unicode 15.1.0:
+		// https://unicode.org/versions/Unicode15.1.0/#Migration
+		// This package currently supports Unicode 15.0.0, so
+		// out of scope for now
+
+		// https://unicode.org/reports/tr29/#GB11
+		if current.is(_ExtendedPictographic) && last.is(_ZWJ) && lastLastExIgnore.is(_ExtendedPictographic) {
+			pos += w
+			continue
+		}
+
+		// https://unicode.org/reports/tr29/#GB12
+		// https://unicode.org/reports/tr29/#GB13
+		if (current & last).is(_RegionalIndicator) {
+			regionalIndicatorCount++
+
+			odd := regionalIndicatorCount%2 == 1
+			if odd {
+				pos += w
+				continue
+			}
+		}
+
+		// If we fall through all the above rules, it's a grapheme cluster break
+		break
+	}
+
+	// Return token
+	return pos, data[:pos], nil
+}
--- a/vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
+++ b/vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
--- a/vendor/github.com/clipperhouse/uax29/v2/internal/iterators/iterator.go
+++ b/vendor/github.com/clipperhouse/uax29/v2/internal/iterators/iterator.go
@ -0,0 +1,85 @@
+package iterators
+
+type Stringish interface {
+	[]byte | string
+}
+
+type SplitFunc[T Stringish] func(T, bool) (int, T, error)
+
+// Iterator is a generic iterator for words that are either []byte or string.
+// Iterate while Next() is true, and access the word via Value().
+type Iterator[T Stringish] struct {
+	split SplitFunc[T]
+	data  T
+	start int
+	pos   int
+}
+
+// New creates a new Iterator for the given data and SplitFunc.
+func New[T Stringish](split SplitFunc[T], data T) *Iterator[T] {
+	return &Iterator[T]{
+		split: split,
+		data:  data,
+	}
+}
+
+// SetText sets the text for the iterator to operate on, and resets all state.
+func (iter *Iterator[T]) SetText(data T) {
+	iter.data = data
+	iter.start = 0
+	iter.pos = 0
+}
+
+// Split sets the SplitFunc for the Iterator.
+func (iter *Iterator[T]) Split(split SplitFunc[T]) {
+	iter.split = split
+}
+
+// Next advances the iterator to the next token. It returns false when there
+// are no remaining tokens or an error occurred.
+func (iter *Iterator[T]) Next() bool {
+	if iter.pos == len(iter.data) {
+		return false
+	}
+	if iter.pos > len(iter.data) {
+		panic("SplitFunc advanced beyond the end of the data")
+	}
+
+	iter.start = iter.pos
+
+	advance, _, err := iter.split(iter.data[iter.pos:], true)
+	if err != nil {
+		panic(err)
+	}
+	if advance <= 0 {
+		panic("SplitFunc returned a zero or negative advance")
+	}
+
+	iter.pos += advance
+	if iter.pos > len(iter.data) {
+		panic("SplitFunc advanced beyond the end of the data")
+	}
+
+	return true
+}
+
+// Value returns the current token.
+func (iter *Iterator[T]) Value() T {
+	return iter.data[iter.start:iter.pos]
+}
+
+// Start returns the byte position of the current token in the original data.
+func (iter *Iterator[T]) Start() int {
+	return iter.start
+}
+
+// End returns the byte position after the current token in the original data.
+func (iter *Iterator[T]) End() int {
+	return iter.pos
+}
+
+// Reset resets the iterator to the beginning of the data.
+func (iter *Iterator[T]) Reset() {
+	iter.start = 0
+	iter.pos = 0
+}