vendor: github.com/mattn/go-runewidth v0.0.19

full diff: https://github.com/mattn/go-runewidth/compare/v0.0.17...v0.0.19

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn
2025-11-04 14:28:14 +01:00
parent c01696ffde
commit 9bce085b13
18 changed files with 1980 additions and 2018 deletions

View File

@ -26,7 +26,7 @@ require (
github.com/google/go-cmp v0.7.0
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/google/uuid v1.6.0
github.com/mattn/go-runewidth v0.0.17
github.com/mattn/go-runewidth v0.0.19
github.com/moby/go-archive v0.1.0
github.com/moby/moby/api v1.52.0-beta.4
github.com/moby/moby/client v0.1.0-beta.3
@ -71,6 +71,7 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/clipperhouse/uax29/v2 v2.2.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
github.com/docker/go-events v0.0.0-20250808211157-605354379745 // indirect
@ -92,7 +93,6 @@ require (
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect

View File

@ -29,6 +29,8 @@ github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK3
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/clipperhouse/uax29/v2 v2.2.0 h1:ChwIKnQN3kcZteTXMgb1wztSgaU+ZemkgWdohwgs8tY=
github.com/clipperhouse/uax29/v2 v2.2.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM=
github.com/cloudflare/cfssl v0.0.0-20180223231731-4e2dcbde5004/go.mod h1:yMWuSON2oQp+43nFtAV/uvKQIFpSPerB57DCt9t8sSA=
github.com/cloudflare/cfssl v1.6.4 h1:NMOvfrEjFfC63K3SGXgAnFdsgkmiq4kATme5BfcqrO8=
github.com/cloudflare/cfssl v1.6.4/go.mod h1:8b3CQMxfWPAeom3zBnGJ6sd+G1NkL5TXqmDXacb+1J0=
@ -157,8 +159,8 @@ github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+
github.com/lib/pq v1.9.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/magiconair/properties v1.5.3 h1:C8fxWnhYyME3n0klPOhVM7PtYUB3eV1W3DeFmN3j53Y=
github.com/magiconair/properties v1.5.3/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/mattn/go-runewidth v0.0.17 h1:78v8ZlW0bP43XfmAfPsdXcoNCelfMHsDmd/pkENfrjQ=
github.com/mattn/go-runewidth v0.0.17/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/mattn/go-sqlite3 v1.6.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
@ -243,8 +245,6 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
github.com/prometheus/procfs v0.0.3/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019 Oliver Kuederle
Copyright (c) 2020 Matt Sherman
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -0,0 +1,82 @@
An implementation of grapheme cluster boundaries from [Unicode text segmentation](https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) (UAX 29), for Unicode version 15.0.0.
## Quick start
```
go get "github.com/clipperhouse/uax29/v2/graphemes"
```
```go
import "github.com/clipperhouse/uax29/v2/graphemes"
text := "Hello, 世界. Nice dog! 👍🐶"
tokens := graphemes.FromString(text)
for tokens.Next() { // Next() returns true until end of data
fmt.Println(tokens.Value()) // Do something with the current grapheme
}
```
[![Documentation](https://pkg.go.dev/badge/github.com/clipperhouse/uax29/v2/graphemes.svg)](https://pkg.go.dev/github.com/clipperhouse/uax29/v2/graphemes)
_A grapheme is a “single visible character”, which might be a simple as a single letter, or a complex emoji that consists of several Unicode code points._
## Conformance
We use the Unicode [test suite](https://unicode.org/reports/tr41/tr41-26.html#Tests29). Status:
![Go](https://github.com/clipperhouse/uax29/actions/workflows/gotest.yml/badge.svg)
## APIs
### If you have a `string`
```go
text := "Hello, 世界. Nice dog! 👍🐶"
tokens := graphemes.FromString(text)
for tokens.Next() { // Next() returns true until end of data
fmt.Println(tokens.Value()) // Do something with the current grapheme
}
```
### If you have an `io.Reader`
`FromReader` embeds a [`bufio.Scanner`](https://pkg.go.dev/bufio#Scanner), so just use those methods.
```go
r := getYourReader() // from a file or network maybe
tokens := graphemes.FromReader(r)
for tokens.Scan() { // Scan() returns true until error or EOF
fmt.Println(tokens.Text()) // Do something with the current grapheme
}
if tokens.Err() != nil { // Check the error
log.Fatal(tokens.Err())
}
```
### If you have a `[]byte`
```go
b := []byte("Hello, 世界. Nice dog! 👍🐶")
tokens := graphemes.FromBytes(b)
for tokens.Next() { // Next() returns true until end of data
fmt.Println(tokens.Value()) // Do something with the current grapheme
}
```
### Performance
On a Mac M2 laptop, we see around 200MB/s, or around 100 million graphemes per second. You should see ~constant memory, and no allocations.
### Invalid inputs
Invalid UTF-8 input is considered undefined behavior. We test to ensure that bad inputs will not cause pathological outcomes, such as a panic or infinite loop. Callers should expect “garbage-in, garbage-out”.
Your pipeline should probably include a call to [`utf8.Valid()`](https://pkg.go.dev/unicode/utf8#Valid).

View File

@ -0,0 +1,28 @@
package graphemes
import "github.com/clipperhouse/uax29/v2/internal/iterators"
type Iterator[T iterators.Stringish] struct {
*iterators.Iterator[T]
}
var (
splitFuncString = splitFunc[string]
splitFuncBytes = splitFunc[[]byte]
)
// FromString returns an iterator for the grapheme clusters in the input string.
// Iterate while Next() is true, and access the grapheme via Value().
func FromString(s string) Iterator[string] {
return Iterator[string]{
iterators.New(splitFuncString, s),
}
}
// FromBytes returns an iterator for the grapheme clusters in the input bytes.
// Iterate while Next() is true, and access the grapheme via Value().
func FromBytes(b []byte) Iterator[[]byte] {
return Iterator[[]byte]{
iterators.New(splitFuncBytes, b),
}
}

View File

@ -0,0 +1,25 @@
// Package graphemes implements Unicode grapheme cluster boundaries: https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
package graphemes
import (
"bufio"
"io"
)
type Scanner struct {
*bufio.Scanner
}
// FromReader returns a Scanner, to split graphemes per
// https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
//
// It embeds a [bufio.Scanner], so you can use its methods.
//
// Iterate through graphemes by calling Scan() until false, then check Err().
func FromReader(r io.Reader) *Scanner {
sc := bufio.NewScanner(r)
sc.Split(SplitFunc)
return &Scanner{
Scanner: sc,
}
}

View File

@ -0,0 +1,174 @@
package graphemes
import (
"bufio"
"github.com/clipperhouse/uax29/v2/internal/iterators"
)
// is determines if lookup intersects propert(ies)
func (lookup property) is(properties property) bool {
return (lookup & properties) != 0
}
const _Ignore = _Extend
// SplitFunc is a bufio.SplitFunc implementation of Unicode grapheme cluster segmentation, for use with bufio.Scanner.
//
// See https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
var SplitFunc bufio.SplitFunc = splitFunc[[]byte]
func splitFunc[T iterators.Stringish](data T, atEOF bool) (advance int, token T, err error) {
var empty T
if len(data) == 0 {
return 0, empty, nil
}
// These vars are stateful across loop iterations
var pos int
var lastExIgnore property = 0 // "last excluding ignored categories"
var lastLastExIgnore property = 0 // "last one before that"
var regionalIndicatorCount int
// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
// to the right of the ×, from which we look back or forward
current, w := lookup(data[pos:])
if w == 0 {
if !atEOF {
// Rune extends past current data, request more
return 0, empty, nil
}
pos = len(data)
return pos, data[:pos], nil
}
// https://unicode.org/reports/tr29/#GB1
// Start of text always advances
pos += w
for {
eot := pos == len(data) // "end of text"
if eot {
if !atEOF {
// Token extends past current data, request more
return 0, empty, nil
}
// https://unicode.org/reports/tr29/#GB2
break
}
/*
We've switched the evaluation order of GB1↓ and GB2↑. It's ok:
because we've checked for len(data) at the top of this function,
sot and eot are mutually exclusive, order doesn't matter.
*/
// Rules are usually of the form Cat1 × Cat2; "current" refers to the first property
// to the right of the ×, from which we look back or forward
// Remember previous properties to avoid lookups/lookbacks
last := current
if !last.is(_Ignore) {
lastLastExIgnore = lastExIgnore
lastExIgnore = last
}
current, w = lookup(data[pos:])
if w == 0 {
if atEOF {
// Just return the bytes, we can't do anything with them
pos = len(data)
break
}
// Rune extends past current data, request more
return 0, empty, nil
}
// Optimization: no rule can possibly apply
if current|last == 0 { // i.e. both are zero
break
}
// https://unicode.org/reports/tr29/#GB3
if current.is(_LF) && last.is(_CR) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB4
// https://unicode.org/reports/tr29/#GB5
if (current | last).is(_Control | _CR | _LF) {
break
}
// https://unicode.org/reports/tr29/#GB6
if current.is(_L|_V|_LV|_LVT) && last.is(_L) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB7
if current.is(_V|_T) && last.is(_LV|_V) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB8
if current.is(_T) && last.is(_LVT|_T) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB9
if current.is(_Extend | _ZWJ) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB9a
if current.is(_SpacingMark) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB9b
if last.is(_Prepend) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB9c
// TODO(clipperhouse):
// It appears to be added in Unicode 15.1.0:
// https://unicode.org/versions/Unicode15.1.0/#Migration
// This package currently supports Unicode 15.0.0, so
// out of scope for now
// https://unicode.org/reports/tr29/#GB11
if current.is(_ExtendedPictographic) && last.is(_ZWJ) && lastLastExIgnore.is(_ExtendedPictographic) {
pos += w
continue
}
// https://unicode.org/reports/tr29/#GB12
// https://unicode.org/reports/tr29/#GB13
if (current & last).is(_RegionalIndicator) {
regionalIndicatorCount++
odd := regionalIndicatorCount%2 == 1
if odd {
pos += w
continue
}
}
// If we fall through all the above rules, it's a grapheme cluster break
break
}
// Return token
return pos, data[:pos], nil
}

1409
vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,85 @@
package iterators
type Stringish interface {
[]byte | string
}
type SplitFunc[T Stringish] func(T, bool) (int, T, error)
// Iterator is a generic iterator for words that are either []byte or string.
// Iterate while Next() is true, and access the word via Value().
type Iterator[T Stringish] struct {
split SplitFunc[T]
data T
start int
pos int
}
// New creates a new Iterator for the given data and SplitFunc.
func New[T Stringish](split SplitFunc[T], data T) *Iterator[T] {
return &Iterator[T]{
split: split,
data: data,
}
}
// SetText sets the text for the iterator to operate on, and resets all state.
func (iter *Iterator[T]) SetText(data T) {
iter.data = data
iter.start = 0
iter.pos = 0
}
// Split sets the SplitFunc for the Iterator.
func (iter *Iterator[T]) Split(split SplitFunc[T]) {
iter.split = split
}
// Next advances the iterator to the next token. It returns false when there
// are no remaining tokens or an error occurred.
func (iter *Iterator[T]) Next() bool {
if iter.pos == len(iter.data) {
return false
}
if iter.pos > len(iter.data) {
panic("SplitFunc advanced beyond the end of the data")
}
iter.start = iter.pos
advance, _, err := iter.split(iter.data[iter.pos:], true)
if err != nil {
panic(err)
}
if advance <= 0 {
panic("SplitFunc returned a zero or negative advance")
}
iter.pos += advance
if iter.pos > len(iter.data) {
panic("SplitFunc advanced beyond the end of the data")
}
return true
}
// Value returns the current token.
func (iter *Iterator[T]) Value() T {
return iter.data[iter.start:iter.pos]
}
// Start returns the byte position of the current token in the original data.
func (iter *Iterator[T]) Start() int {
return iter.start
}
// End returns the byte position after the current token in the original data.
func (iter *Iterator[T]) End() int {
return iter.pos
}
// Reset resets the iterator to the beginning of the data.
func (iter *Iterator[T]) Reset() {
iter.start = 0
iter.pos = 0
}

43
vendor/github.com/mattn/go-runewidth/benchstat.txt generated vendored Normal file
View File

@ -0,0 +1,43 @@
goos: darwin
goarch: arm64
pkg: github.com/mattn/go-runewidth
cpu: Apple M2
│ old.txt │ new.txt │
│ sec/op │ sec/op vs base │
String1WidthAll/regular-8 108.92m ± 0% 35.09m ± 3% -67.78% (p=0.002 n=6)
String1WidthAll/lut-8 93.97m ± 0% 18.70m ± 0% -80.10% (p=0.002 n=6)
String1Width768/regular-8 60.62µ ± 1% 11.54µ ± 0% -80.97% (p=0.002 n=6)
String1Width768/lut-8 60.66µ ± 1% 11.43µ ± 0% -81.16% (p=0.002 n=6)
String1WidthAllEastAsian/regular-8 115.13m ± 1% 40.79m ± 8% -64.57% (p=0.002 n=6)
String1WidthAllEastAsian/lut-8 93.65m ± 0% 18.70m ± 2% -80.03% (p=0.002 n=6)
String1Width768EastAsian/regular-8 75.32µ ± 0% 23.49µ ± 0% -68.82% (p=0.002 n=6)
String1Width768EastAsian/lut-8 60.76µ ± 0% 11.50µ ± 0% -81.07% (p=0.002 n=6)
geomean 2.562m 604.5µ -76.41%
│ old.txt │ new.txt │
│ B/op │ B/op vs base │
String1WidthAll/regular-8 106.3Mi ± 0% 0.0Mi ± 0% -100.00% (p=0.002 n=6)
String1WidthAll/lut-8 106.3Mi ± 0% 0.0Mi ± 0% -100.00% (p=0.002 n=6)
String1Width768/regular-8 75.00Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.002 n=6)
String1Width768/lut-8 75.00Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.002 n=6)
String1WidthAllEastAsian/regular-8 106.3Mi ± 0% 0.0Mi ± 0% -100.00% (p=0.002 n=6)
String1WidthAllEastAsian/lut-8 106.3Mi ± 0% 0.0Mi ± 0% -100.00% (p=0.002 n=6)
String1Width768EastAsian/regular-8 75.00Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.002 n=6)
String1Width768EastAsian/lut-8 75.00Ki ± 0% 0.00Ki ± 0% -100.00% (p=0.002 n=6)
geomean 2.790Mi ? ¹ ²
¹ summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean
│ old.txt │ new.txt │
│ allocs/op │ allocs/op vs base │
String1WidthAll/regular-8 3.342M ± 0% 0.000M ± 0% -100.00% (p=0.002 n=6)
String1WidthAll/lut-8 3.342M ± 0% 0.000M ± 0% -100.00% (p=0.002 n=6)
String1Width768/regular-8 2.304k ± 0% 0.000k ± 0% -100.00% (p=0.002 n=6)
String1Width768/lut-8 2.304k ± 0% 0.000k ± 0% -100.00% (p=0.002 n=6)
String1WidthAllEastAsian/regular-8 3.342M ± 0% 0.000M ± 0% -100.00% (p=0.002 n=6)
String1WidthAllEastAsian/lut-8 3.342M ± 0% 0.000M ± 0% -100.00% (p=0.002 n=6)
String1Width768EastAsian/regular-8 2.304k ± 0% 0.000k ± 0% -100.00% (p=0.002 n=6)
String1Width768EastAsian/lut-8 2.304k ± 0% 0.000k ± 0% -100.00% (p=0.002 n=6)
geomean 87.75k ? ¹ ²
¹ summaries must be >0 to compute geomean
² ratios must be >0 to compute geomean

54
vendor/github.com/mattn/go-runewidth/new.txt generated vendored Normal file
View File

@ -0,0 +1,54 @@
goos: darwin
goarch: arm64
pkg: github.com/mattn/go-runewidth
cpu: Apple M2
BenchmarkString1WidthAll/regular-8 33 35033923 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/regular-8 33 34965112 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/regular-8 33 36307234 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/regular-8 33 35007705 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/regular-8 33 35154182 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/regular-8 34 35155400 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 63 18688500 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 63 18712474 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 63 18700211 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 62 18694179 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 62 18708392 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAll/lut-8 63 18770608 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 104137 11526 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 103986 11540 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 104079 11552 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 103963 11530 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 103714 11538 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/regular-8 104181 11537 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 105150 11420 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 104778 11423 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 105069 11422 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 105127 11475 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 104742 11433 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768/lut-8 105163 11432 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 40723347 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 40790299 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 40801338 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 40798216 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 44135253 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 28 40779546 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 62 18694165 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 62 18685047 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 62 18689273 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 62 19150346 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 63 19126154 ns/op 0 B/op 0 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 62 18712619 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 50775 23595 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 51061 23563 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 51057 23492 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 51138 23445 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 51195 23469 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/regular-8 51087 23482 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104559 11549 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104508 11483 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104296 11503 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104606 11485 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104588 11495 ns/op 0 B/op 0 allocs/op
BenchmarkString1Width768EastAsian/lut-8 104602 11518 ns/op 0 B/op 0 allocs/op
PASS
ok github.com/mattn/go-runewidth 64.455s

54
vendor/github.com/mattn/go-runewidth/old.txt generated vendored Normal file
View File

@ -0,0 +1,54 @@
goos: darwin
goarch: arm64
pkg: github.com/mattn/go-runewidth
cpu: Apple M2
BenchmarkString1WidthAll/regular-8 10 108559258 ns/op 111412145 B/op 3342342 allocs/op
BenchmarkString1WidthAll/regular-8 10 108968079 ns/op 111412364 B/op 3342343 allocs/op
BenchmarkString1WidthAll/regular-8 10 108890338 ns/op 111412388 B/op 3342344 allocs/op
BenchmarkString1WidthAll/regular-8 10 108940704 ns/op 111412584 B/op 3342346 allocs/op
BenchmarkString1WidthAll/regular-8 10 108632796 ns/op 111412348 B/op 3342343 allocs/op
BenchmarkString1WidthAll/regular-8 10 109354546 ns/op 111412777 B/op 3342343 allocs/op
BenchmarkString1WidthAll/lut-8 12 93844406 ns/op 111412569 B/op 3342345 allocs/op
BenchmarkString1WidthAll/lut-8 12 93991080 ns/op 111412512 B/op 3342344 allocs/op
BenchmarkString1WidthAll/lut-8 12 93980632 ns/op 111412413 B/op 3342343 allocs/op
BenchmarkString1WidthAll/lut-8 12 94004083 ns/op 111412396 B/op 3342343 allocs/op
BenchmarkString1WidthAll/lut-8 12 93959795 ns/op 111412445 B/op 3342343 allocs/op
BenchmarkString1WidthAll/lut-8 12 93846198 ns/op 111412556 B/op 3342345 allocs/op
BenchmarkString1Width768/regular-8 19785 60696 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/regular-8 19824 60520 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/regular-8 19832 60547 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/regular-8 19778 60543 ns/op 76800 B/op 2304 allocs/op
BenchmarkString1Width768/regular-8 19842 61142 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/regular-8 19780 60696 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19598 61161 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19731 60707 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19738 60626 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19764 60670 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19797 60642 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768/lut-8 19738 60608 ns/op 76800 B/op 2304 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 115080431 ns/op 111412458 B/op 3342345 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 114908880 ns/op 111412476 B/op 3342345 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 115077134 ns/op 111412540 B/op 3342345 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 115175292 ns/op 111412467 B/op 3342345 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 115792653 ns/op 111412362 B/op 3342344 allocs/op
BenchmarkString1WidthAllEastAsian/regular-8 9 115255417 ns/op 111412572 B/op 3342346 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 93761542 ns/op 111412538 B/op 3342345 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 94089990 ns/op 111412440 B/op 3342343 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 93721410 ns/op 111412514 B/op 3342344 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 93572951 ns/op 111412329 B/op 3342342 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 93536052 ns/op 111412206 B/op 3342341 allocs/op
BenchmarkString1WidthAllEastAsian/lut-8 12 93532365 ns/op 111412412 B/op 3342343 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15904 75401 ns/op 76800 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15932 75449 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15944 75181 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15963 75311 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15879 75292 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/regular-8 15955 75334 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19692 60692 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19712 60699 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19741 60819 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19771 60653 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19737 61027 ns/op 76801 B/op 2304 allocs/op
BenchmarkString1Width768EastAsian/lut-8 19657 60820 ns/op 76801 B/op 2304 allocs/op
PASS
ok github.com/mattn/go-runewidth 76.165s

View File

@ -4,7 +4,7 @@ import (
"os"
"strings"
"github.com/rivo/uniseg"
"github.com/clipperhouse/uax29/v2/graphemes"
)
//go:generate go run script/generate.go
@ -64,6 +64,9 @@ func inTable(r rune, t table) bool {
if r < t[0].first {
return false
}
if r > t[len(t)-1].last {
return false
}
bot := 0
top := len(t) - 1
@ -175,10 +178,10 @@ func (c *Condition) CreateLUT() {
// StringWidth return width as you can see
func (c *Condition) StringWidth(s string) (width int) {
g := uniseg.NewGraphemes(s)
g := graphemes.FromString(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
for _, r := range g.Value() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // Our best guess at this point is to use the width of the first non-zero-width rune.
@ -197,17 +200,17 @@ func (c *Condition) Truncate(s string, w int, tail string) string {
w -= c.StringWidth(tail)
var width int
pos := len(s)
g := uniseg.NewGraphemes(s)
g := graphemes.FromString(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
for _, r := range g.Value() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // See StringWidth() for details.
}
}
if width+chWidth > w {
pos, _ = g.Positions()
pos = g.Start()
break
}
width += chWidth
@ -224,10 +227,10 @@ func (c *Condition) TruncateLeft(s string, w int, prefix string) string {
var width int
pos := len(s)
g := uniseg.NewGraphemes(s)
g := graphemes.FromString(s)
for g.Next() {
var chWidth int
for _, r := range g.Runes() {
for _, r := range g.Value() {
chWidth = c.RuneWidth(r)
if chWidth > 0 {
break // See StringWidth() for details.
@ -236,10 +239,10 @@ func (c *Condition) TruncateLeft(s string, w int, prefix string) string {
if width+chWidth > w {
if width < w {
_, pos = g.Positions()
pos = g.End()
prefix += strings.Repeat(" ", width+chWidth-w)
} else {
pos, _ = g.Positions()
pos = g.Start()
}
break

View File

@ -1,62 +0,0 @@
# Unicode Text Segmentation for Go
[![Godoc Reference](https://img.shields.io/badge/godoc-reference-blue.svg)](https://godoc.org/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](http://unicode.org/reports/tr29/) (Unicode version 12.0.0).
At this point, only the determination of grapheme cluster boundaries is implemented.
## Background
In Go, [strings are read-only slices of bytes](https://blog.golang.org/strings). They can be turned into Unicode code points using the `for` loop or by casting: `[]rune(str)`. However, multiple code points may be combined into one user-perceived character or what the Unicode specification calls "grapheme cluster". Here are some examples:
|String|Bytes (UTF-8)|Code points (runes)|Grapheme clusters|
|-|-|-|-|
|Käse|6 bytes: `4b 61 cc 88 73 65`|5 code points: `4b 61 308 73 65`|4 clusters: `[4b],[61 308],[73],[65]`|
|🏳️‍🌈|14 bytes: `f0 9f 8f b3 ef b8 8f e2 80 8d f0 9f 8c 88`|4 code points: `1f3f3 fe0f 200d 1f308`|1 cluster: `[1f3f3 fe0f 200d 1f308]`|
|🇩🇪|8 bytes: `f0 9f 87 a9 f0 9f 87 aa`|2 code points: `1f1e9 1f1ea`|1 cluster: `[1f1e9 1f1ea]`|
This package provides a tool to iterate over these grapheme clusters. This may be used to determine the number of user-perceived characters, to split strings in their intended places, or to extract individual characters which form a unit.
## Installation
```bash
go get github.com/rivo/uniseg
```
## Basic Example
```go
package uniseg
import (
"fmt"
"github.com/rivo/uniseg"
)
func main() {
gr := uniseg.NewGraphemes("👍🏼!")
for gr.Next() {
fmt.Printf("%x ", gr.Runes())
}
// Output: [1f44d 1f3fc] [21]
}
```
## Documentation
Refer to https://godoc.org/github.com/rivo/uniseg for the package's documentation.
## Dependencies
This package does not depend on any packages outside the standard library.
## Your Feedback
Add your issue here on GitHub. Feel free to get in touch if you have any questions.
## Version
Version tags will be introduced once Golang modules are official. Consider this version 0.1.

View File

@ -1,8 +0,0 @@
/*
Package uniseg implements Unicode Text Segmentation according to Unicode
Standard Annex #29 (http://unicode.org/reports/tr29/).
At this point, only the determination of grapheme cluster boundaries is
implemented.
*/
package uniseg

View File

@ -1,268 +0,0 @@
package uniseg
import "unicode/utf8"
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
//
// This map is queried as follows:
//
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state and breaking instruction from
// the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
{grCR, prLF}: {grAny, grNoBoundary, 30},
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPreprend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// Graphemes implements an iterator over Unicode extended grapheme clusters,
// specified in the Unicode Standard Annex #29. Grapheme clusters correspond to
// "user-perceived characters". These characters often consist of multiple
// code points (e.g. the "woman kissing woman" emoji consists of 8 code points:
// woman + ZWJ + heavy black heart (2 code points) + ZWJ + kiss mark + ZWJ +
// woman) and the rules described in Annex #29 must be applied to group those
// code points into clusters perceived by the user as one character.
type Graphemes struct {
// The code points over which this class iterates.
codePoints []rune
// The (byte-based) indices of the code points into the original string plus
// len(original string). Thus, len(indices) = len(codePoints) + 1.
indices []int
// The current grapheme cluster to be returned. These are indices into
// codePoints/indices. If start == end, we either haven't started iterating
// yet (0) or the iteration has already completed (1).
start, end int
// The index of the next code point to be parsed.
pos int
// The current state of the code point parser.
state int
}
// NewGraphemes returns a new grapheme cluster iterator.
func NewGraphemes(s string) *Graphemes {
l := utf8.RuneCountInString(s)
codePoints := make([]rune, l)
indices := make([]int, l+1)
i := 0
for pos, r := range s {
codePoints[i] = r
indices[i] = pos
i++
}
indices[l] = len(s)
g := &Graphemes{
codePoints: codePoints,
indices: indices,
}
g.Next() // Parse ahead.
return g
}
// Next advances the iterator by one grapheme cluster and returns false if no
// clusters are left. This function must be called before the first cluster is
// accessed.
func (g *Graphemes) Next() bool {
g.start = g.end
// The state transition gives us a boundary instruction BEFORE the next code
// point so we always need to stay ahead by one code point.
// Parse the next code point.
for g.pos <= len(g.codePoints) {
// GB2.
if g.pos == len(g.codePoints) {
g.end = g.pos
g.pos++
break
}
// Determine the property of the next character.
nextProperty := property(g.codePoints[g.pos])
g.pos++
// Find the applicable transition.
var boundary bool
transition, ok := grTransitions[[2]int{g.state, nextProperty}]
if ok {
// We have a specific transition. We'll use it.
g.state = transition[0]
boundary = transition[1] == grBoundary
} else {
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{g.state, prAny}]
transAnyState, okAnyState := grTransitions[[2]int{grAny, nextProperty}]
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
}
} else if okAnyProp {
// We only have a specific state.
g.state = transAnyProp[0]
boundary = transAnyProp[1] == grBoundary
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
} else if okAnyState {
// We only have a specific property.
g.state = transAnyState[0]
boundary = transAnyState[1] == grBoundary
} else {
// No known transition. GB999: Any x Any.
g.state = grAny
boundary = true
}
}
// If we found a cluster boundary, let's stop here. The current cluster will
// be the one that just ended.
if g.pos-1 == 0 /* GB1 */ || boundary {
g.end = g.pos - 1
break
}
}
return g.start != g.end
}
// Runes returns a slice of runes (code points) which corresponds to the current
// grapheme cluster. If the iterator is already past the end or Next() has not
// yet been called, nil is returned.
func (g *Graphemes) Runes() []rune {
if g.start == g.end {
return nil
}
return g.codePoints[g.start:g.end]
}
// Str returns a substring of the original string which corresponds to the
// current grapheme cluster. If the iterator is already past the end or Next()
// has not yet been called, an empty string is returned.
func (g *Graphemes) Str() string {
if g.start == g.end {
return ""
}
return string(g.codePoints[g.start:g.end])
}
// Bytes returns a byte slice which corresponds to the current grapheme cluster.
// If the iterator is already past the end or Next() has not yet been called,
// nil is returned.
func (g *Graphemes) Bytes() []byte {
if g.start == g.end {
return nil
}
return []byte(string(g.codePoints[g.start:g.end]))
}
// Positions returns the interval of the current grapheme cluster as byte
// positions into the original string. The first returned value "from" indexes
// the first byte and the second returned value "to" indexes the first byte that
// is not included anymore, i.e. str[from:to] is the current grapheme cluster of
// the original string "str". If Next() has not yet been called, both values are
// 0. If the iterator is already past the end, both values are 1.
func (g *Graphemes) Positions() (int, int) {
return g.indices[g.start], g.indices[g.end]
}
// Reset puts the iterator into its initial state such that the next call to
// Next() sets it to the first grapheme cluster again.
func (g *Graphemes) Reset() {
g.start, g.end, g.pos, g.state = 0, 0, 0, grAny
g.Next() // Parse ahead again.
}
// GraphemeClusterCount returns the number of user-perceived characters
// (grapheme clusters) for the given string. To calculate this number, it
// iterates through the string using the Graphemes iterator.
func GraphemeClusterCount(s string) (n int) {
g := NewGraphemes(s)
for g.Next() {
n++
}
return
}

File diff suppressed because it is too large Load Diff

11
vendor/modules.txt vendored
View File

@ -21,6 +21,10 @@ github.com/cenkalti/backoff/v4
# github.com/cespare/xxhash/v2 v2.3.0
## explicit; go 1.11
github.com/cespare/xxhash/v2
# github.com/clipperhouse/uax29/v2 v2.2.0
## explicit; go 1.18
github.com/clipperhouse/uax29/v2/graphemes
github.com/clipperhouse/uax29/v2/internal/iterators
# github.com/containerd/errdefs v1.0.0
## explicit; go 1.20
github.com/containerd/errdefs
@ -154,8 +158,8 @@ github.com/klauspost/compress/internal/le
github.com/klauspost/compress/internal/snapref
github.com/klauspost/compress/zstd
github.com/klauspost/compress/zstd/internal/xxhash
# github.com/mattn/go-runewidth v0.0.17
## explicit; go 1.9
# github.com/mattn/go-runewidth v0.0.19
## explicit; go 1.20
github.com/mattn/go-runewidth
# github.com/miekg/pkcs11 v1.1.1
## explicit; go 1.12
@ -274,9 +278,6 @@ github.com/prometheus/common/model
github.com/prometheus/procfs
github.com/prometheus/procfs/internal/fs
github.com/prometheus/procfs/internal/util
# github.com/rivo/uniseg v0.2.0
## explicit; go 1.12
github.com/rivo/uniseg
# github.com/russross/blackfriday/v2 v2.1.0
## explicit
github.com/russross/blackfriday/v2