chore: make deps
This commit is contained in:
1
vendor/github.com/clipperhouse/displaywidth/.gitignore
generated
vendored
Normal file
1
vendor/github.com/clipperhouse/displaywidth/.gitignore
generated
vendored
Normal file
@ -0,0 +1 @@
|
||||
.DS_Store
|
||||
37
vendor/github.com/clipperhouse/displaywidth/AGENTS.md
generated
vendored
Normal file
37
vendor/github.com/clipperhouse/displaywidth/AGENTS.md
generated
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
The goals and overview of this package can be found in the README.md file,
|
||||
start by reading that.
|
||||
|
||||
The goal of this package is to determine the display (column) width of a
|
||||
string, UTF-8 bytes, or runes, as would happen in a monospace font, especially
|
||||
in a terminal.
|
||||
|
||||
When troubleshooting, write Go unit tests instead of executing debug scripts.
|
||||
The tests can return whatever logs or output you need. If those tests are
|
||||
only for temporary troubleshooting, clean up the tests after the debugging is
|
||||
done.
|
||||
|
||||
(Separate executable debugging scripts are messy, tend to have conflicting
|
||||
dependencies and are hard to cleanup.)
|
||||
|
||||
If you make changes to the trie generation in internal/gen, it can be invoked
|
||||
by running `go generate` from the top package directory.
|
||||
|
||||
## Pull Requests and branches
|
||||
|
||||
For PRs (pull requests), you can use the gh CLI tool to retrieve details,
|
||||
or post comments. Then, compare the current branch with main. Reviewing a PR
|
||||
and reviewing a branch are about the same, but the PR may add context.
|
||||
|
||||
Look for bugs. Think like GitHub Copilot or Cursor BugBot.
|
||||
|
||||
Offer to post a brief summary of the review to the PR, via the gh CLI tool.
|
||||
|
||||
## Comparisons to go-runewidth
|
||||
|
||||
We originally attempted to make this package compatible with go-runewidth.
|
||||
However, we found that there were too many differences in the handling of
|
||||
certain characters and properties.
|
||||
|
||||
We believe, preliminarily, that our choices are more correct and complete,
|
||||
by using more complete categories such as Unicode Cf (format) for zero-width
|
||||
and Mn (Nonspacing_Mark) for combining marks.
|
||||
49
vendor/github.com/clipperhouse/displaywidth/CHANGELOG.md
generated
vendored
Normal file
49
vendor/github.com/clipperhouse/displaywidth/CHANGELOG.md
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
# Changelog
|
||||
|
||||
## [0.5.0]
|
||||
|
||||
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.4.1...v0.5.0)
|
||||
|
||||
### Added
|
||||
- Unicode 16 support
|
||||
- Improved emoji presentation handling per Unicode TR51
|
||||
|
||||
### Changed
|
||||
- Corrected VS15 (U+FE0E) handling: now preserves base character width (no-op) per Unicode TR51
|
||||
- Performance optimizations: reduced property lookups
|
||||
|
||||
### Fixed
|
||||
- VS15 variation selector now correctly preserves base character width instead of forcing width 1
|
||||
|
||||
## [0.4.1]
|
||||
|
||||
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.4.0...v0.4.1)
|
||||
|
||||
### Changed
|
||||
- Updated uax29 dependency
|
||||
- Improved flag handling
|
||||
|
||||
## [0.4.0]
|
||||
|
||||
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.3.1...v0.4.0)
|
||||
|
||||
### Added
|
||||
- Support for variation selectors (VS15, VS16) and regional indicator pairs (flags)
|
||||
|
||||
## [0.3.1]
|
||||
|
||||
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.3.0...v0.3.1)
|
||||
|
||||
### Added
|
||||
- Fuzz testing support
|
||||
|
||||
### Changed
|
||||
- Updated stringish dependency
|
||||
|
||||
## [0.3.0]
|
||||
|
||||
[Compare](https://github.com/clipperhouse/displaywidth/compare/v0.2.0...v0.3.0)
|
||||
|
||||
### Changed
|
||||
- Dropped compatibility with go-runewidth
|
||||
- Trie implementation cleanup
|
||||
21
vendor/github.com/clipperhouse/displaywidth/LICENSE
generated
vendored
Normal file
21
vendor/github.com/clipperhouse/displaywidth/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Matt Sherman
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
123
vendor/github.com/clipperhouse/displaywidth/README.md
generated
vendored
Normal file
123
vendor/github.com/clipperhouse/displaywidth/README.md
generated
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
# displaywidth
|
||||
|
||||
A high-performance Go package for measuring the monospace display width of strings, UTF-8 bytes, and runes.
|
||||
|
||||
[](https://pkg.go.dev/github.com/clipperhouse/displaywidth)
|
||||
[](https://github.com/clipperhouse/displaywidth/actions/workflows/gotest.yml)
|
||||
[](https://github.com/clipperhouse/displaywidth/actions/workflows/gofuzz.yml)
|
||||
|
||||
## Install
|
||||
```bash
|
||||
go get github.com/clipperhouse/displaywidth
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/clipperhouse/displaywidth"
|
||||
)
|
||||
|
||||
func main() {
|
||||
width := displaywidth.String("Hello, 世界!")
|
||||
fmt.Println(width)
|
||||
|
||||
width = displaywidth.Bytes([]byte("🌍"))
|
||||
fmt.Println(width)
|
||||
|
||||
width = displaywidth.Rune('🌍')
|
||||
fmt.Println(width)
|
||||
}
|
||||
```
|
||||
|
||||
For most purposes, you should use the `String` or `Bytes` methods.
|
||||
|
||||
|
||||
### Options
|
||||
|
||||
You can specify East Asian Width settings. When false (default),
|
||||
[East Asian Ambiguous characters](https://www.unicode.org/reports/tr11/#Ambiguous)
|
||||
are treated as width 1. When true, East Asian Ambiguous characters are treated
|
||||
as width 2.
|
||||
|
||||
```go
|
||||
myOptions := displaywidth.Options{
|
||||
EastAsianWidth: true,
|
||||
}
|
||||
|
||||
width := myOptions.String("Hello, 世界!")
|
||||
fmt.Println(width)
|
||||
```
|
||||
|
||||
## Technical details
|
||||
|
||||
This package implements the Unicode East Asian Width standard
|
||||
([UAX #11](https://www.unicode.org/reports/tr11/)), and handles
|
||||
[version selectors](https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)),
|
||||
and [regional indicator pairs](https://en.wikipedia.org/wiki/Regional_indicator_symbol)
|
||||
(flags). We implement [Unicode TR51](https://unicode.org/reports/tr51/).
|
||||
|
||||
`clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg` will
|
||||
give the same outputs for most real-world text. See extensive details in the
|
||||
[compatibility analysis](comparison/COMPATIBILITY_ANALYSIS.md).
|
||||
|
||||
If you wish to investigate the core logic, see the `lookupProperties` and `width`
|
||||
functions in [width.go](width.go#L135). The essential trie generation logic is in
|
||||
`buildPropertyBitmap` in [unicode.go](internal/gen/unicode.go#L317).
|
||||
|
||||
I (@clipperhouse) am keeping an eye on [emerging standards and test suites](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/).
|
||||
|
||||
## Prior Art
|
||||
|
||||
[mattn/go-runewidth](https://github.com/mattn/go-runewidth)
|
||||
|
||||
[rivo/uniseg](https://github.com/rivo/uniseg)
|
||||
|
||||
[x/text/width](https://pkg.go.dev/golang.org/x/text/width)
|
||||
|
||||
[x/text/internal/triegen](https://pkg.go.dev/golang.org/x/text/internal/triegen)
|
||||
|
||||
## Benchmarks
|
||||
|
||||
```bash
|
||||
cd comparison
|
||||
go test -bench=. -benchmem
|
||||
```
|
||||
|
||||
```
|
||||
goos: darwin
|
||||
goarch: arm64
|
||||
pkg: github.com/clipperhouse/displaywidth/comparison
|
||||
cpu: Apple M2
|
||||
|
||||
BenchmarkString_Mixed/clipperhouse/displaywidth-8 10929 ns/op 154.36 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_Mixed/mattn/go-runewidth-8 14540 ns/op 116.02 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_Mixed/rivo/uniseg-8 19751 ns/op 85.41 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkString_EastAsian/clipperhouse/displaywidth-8 10885 ns/op 154.98 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_EastAsian/mattn/go-runewidth-8 23969 ns/op 70.38 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_EastAsian/rivo/uniseg-8 19852 ns/op 84.98 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkString_ASCII/clipperhouse/displaywidth-8 1103 ns/op 116.01 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_ASCII/mattn/go-runewidth-8 1166 ns/op 109.79 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_ASCII/rivo/uniseg-8 1584 ns/op 80.83 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkString_Emoji/clipperhouse/displaywidth-8 3108 ns/op 232.93 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_Emoji/mattn/go-runewidth-8 4802 ns/op 150.76 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkString_Emoji/rivo/uniseg-8 6607 ns/op 109.58 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkRune_Mixed/clipperhouse/displaywidth-8 3456 ns/op 488.20 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkRune_Mixed/mattn/go-runewidth-8 5400 ns/op 312.39 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkRune_EastAsian/clipperhouse/displaywidth-8 3475 ns/op 485.41 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkRune_EastAsian/mattn/go-runewidth-8 15701 ns/op 107.44 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkRune_ASCII/clipperhouse/displaywidth-8 257.0 ns/op 498.13 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkRune_ASCII/mattn/go-runewidth-8 266.4 ns/op 480.50 MB/s 0 B/op 0 allocs/op
|
||||
|
||||
BenchmarkRune_Emoji/clipperhouse/displaywidth-8 1384 ns/op 523.02 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkRune_Emoji/mattn/go-runewidth-8 2273 ns/op 318.45 MB/s 0 B/op 0 allocs/op
|
||||
```
|
||||
3
vendor/github.com/clipperhouse/displaywidth/gen.go
generated
vendored
Normal file
3
vendor/github.com/clipperhouse/displaywidth/gen.go
generated
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
package displaywidth
|
||||
|
||||
//go:generate go run -C internal/gen .
|
||||
1716
vendor/github.com/clipperhouse/displaywidth/trie.go
generated
vendored
Normal file
1716
vendor/github.com/clipperhouse/displaywidth/trie.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
210
vendor/github.com/clipperhouse/displaywidth/width.go
generated
vendored
Normal file
210
vendor/github.com/clipperhouse/displaywidth/width.go
generated
vendored
Normal file
@ -0,0 +1,210 @@
|
||||
package displaywidth
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/clipperhouse/stringish"
|
||||
"github.com/clipperhouse/uax29/v2/graphemes"
|
||||
)
|
||||
|
||||
// String calculates the display width of a string,
|
||||
// by iterating over grapheme clusters in the string
|
||||
// and summing their widths.
|
||||
func String(s string) int {
|
||||
return DefaultOptions.String(s)
|
||||
}
|
||||
|
||||
// Bytes calculates the display width of a []byte,
|
||||
// by iterating over grapheme clusters in the byte slice
|
||||
// and summing their widths.
|
||||
func Bytes(s []byte) int {
|
||||
return DefaultOptions.Bytes(s)
|
||||
}
|
||||
|
||||
// Rune calculates the display width of a rune. You
|
||||
// should almost certainly use [String] or [Bytes] for
|
||||
// most purposes.
|
||||
//
|
||||
// The smallest unit of display width is a grapheme
|
||||
// cluster, not a rune. Iterating over runes to measure
|
||||
// width is incorrect in most cases.
|
||||
func Rune(r rune) int {
|
||||
return DefaultOptions.Rune(r)
|
||||
}
|
||||
|
||||
// Options allows you to specify the treatment of ambiguous East Asian
|
||||
// characters. When EastAsianWidth is false (default), ambiguous East Asian
|
||||
// characters are treated as width 1. When EastAsianWidth is true, ambiguous
|
||||
// East Asian characters are treated as width 2.
|
||||
type Options struct {
|
||||
EastAsianWidth bool
|
||||
}
|
||||
|
||||
// DefaultOptions is the default options for the display width
|
||||
// calculation, which is EastAsianWidth: false.
|
||||
var DefaultOptions = Options{EastAsianWidth: false}
|
||||
|
||||
// String calculates the display width of a string,
|
||||
// for the given options, by iterating over grapheme clusters
|
||||
// and summing their widths.
|
||||
func (options Options) String(s string) int {
|
||||
if len(s) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
total := 0
|
||||
g := graphemes.FromString(s)
|
||||
for g.Next() {
|
||||
props := lookupProperties(g.Value())
|
||||
total += props.width(options)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// Bytes calculates the display width of a []byte,
|
||||
// for the given options, by iterating over grapheme
|
||||
// clusters in the byte slice and summing their widths.
|
||||
func (options Options) Bytes(s []byte) int {
|
||||
if len(s) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
total := 0
|
||||
g := graphemes.FromBytes(s)
|
||||
for g.Next() {
|
||||
props := lookupProperties(g.Value())
|
||||
total += props.width(options)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// Rune calculates the display width of a rune,
|
||||
// for the given options.
|
||||
//
|
||||
// The smallest unit of display width is a grapheme
|
||||
// cluster, not a rune. Iterating over runes to measure
|
||||
// width is incorrect in most cases.
|
||||
func (options Options) Rune(r rune) int {
|
||||
// Fast path for ASCII
|
||||
if r < utf8.RuneSelf {
|
||||
if isASCIIControl(byte(r)) {
|
||||
// Control (0x00-0x1F) and DEL (0x7F)
|
||||
return 0
|
||||
}
|
||||
// ASCII printable (0x20-0x7E)
|
||||
return 1
|
||||
}
|
||||
|
||||
// Surrogates (U+D800-U+DFFF) are invalid UTF-8 and have zero width
|
||||
// Other packages might turn them into the replacement character (U+FFFD)
|
||||
// in which case, we won't see it.
|
||||
if r >= 0xD800 && r <= 0xDFFF {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Stack-allocated to avoid heap allocation
|
||||
var buf [4]byte // UTF-8 is at most 4 bytes
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
// Skip the grapheme iterator and directly lookup properties
|
||||
props := lookupProperties(buf[:n])
|
||||
return props.width(options)
|
||||
}
|
||||
|
||||
func isASCIIControl(b byte) bool {
|
||||
return b < 0x20 || b == 0x7F
|
||||
}
|
||||
|
||||
// isRIPrefix checks if the slice matches the Regional Indicator prefix
|
||||
// (F0 9F 87). It assumes len(s) >= 3.
|
||||
func isRIPrefix[T stringish.Interface](s T) bool {
|
||||
return s[0] == 0xF0 && s[1] == 0x9F && s[2] == 0x87
|
||||
}
|
||||
|
||||
// isVS16 checks if the slice matches VS16 (U+FE0F) UTF-8 encoding
|
||||
// (EF B8 8F). It assumes len(s) >= 3.
|
||||
func isVS16[T stringish.Interface](s T) bool {
|
||||
return s[0] == 0xEF && s[1] == 0xB8 && s[2] == 0x8F
|
||||
}
|
||||
|
||||
// lookupProperties returns the properties for the first character in a string
|
||||
func lookupProperties[T stringish.Interface](s T) property {
|
||||
l := len(s)
|
||||
|
||||
if l == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
b := s[0]
|
||||
if isASCIIControl(b) {
|
||||
return _Zero_Width
|
||||
}
|
||||
|
||||
if b < utf8.RuneSelf {
|
||||
// Check for variation selector after ASCII (e.g., keycap sequences like 1️⃣)
|
||||
if l >= 4 {
|
||||
// Subslice may help eliminate bounds checks
|
||||
vs := s[1:4]
|
||||
if isVS16(vs) {
|
||||
// VS16 requests emoji presentation (width 2)
|
||||
return _Emoji
|
||||
}
|
||||
// VS15 (0x8E) requests text presentation but does not affect width,
|
||||
// in my reading of Unicode TR51. Falls through to _Default.
|
||||
}
|
||||
return _Default
|
||||
}
|
||||
|
||||
// Regional indicator pair (flag)
|
||||
if l >= 8 {
|
||||
// Subslice may help eliminate bounds checks
|
||||
ri := s[:8]
|
||||
if isRIPrefix(ri[0:3]) {
|
||||
b3 := ri[3]
|
||||
if b3 >= 0xA6 && b3 <= 0xBF && isRIPrefix(ri[4:7]) {
|
||||
b7 := ri[7]
|
||||
if b7 >= 0xA6 && b7 <= 0xBF {
|
||||
return _Emoji
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
props, size := lookup(s)
|
||||
p := property(props)
|
||||
|
||||
// Variation Selectors
|
||||
if size > 0 && l >= size+3 {
|
||||
// Subslice may help eliminate bounds checks
|
||||
vs := s[size : size+3]
|
||||
if isVS16(vs) {
|
||||
// VS16 requests emoji presentation (width 2)
|
||||
return _Emoji
|
||||
}
|
||||
// VS15 (0x8E) requests text presentation but does not affect width,
|
||||
// in my reading of Unicode TR51. Falls through to return the base
|
||||
// character's property (p).
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
const _Default property = 0
|
||||
|
||||
// a jump table of sorts, instead of a switch
|
||||
var widthTable = [5]int{
|
||||
_Default: 1,
|
||||
_Zero_Width: 0,
|
||||
_East_Asian_Wide: 2,
|
||||
_East_Asian_Ambiguous: 1,
|
||||
_Emoji: 2,
|
||||
}
|
||||
|
||||
// width determines the display width of a character based on its properties
|
||||
// and configuration options
|
||||
func (p property) width(options Options) int {
|
||||
if options.EastAsianWidth && p == _East_Asian_Ambiguous {
|
||||
return 2
|
||||
}
|
||||
|
||||
return widthTable[p]
|
||||
}
|
||||
2
vendor/github.com/clipperhouse/stringish/.gitignore
generated
vendored
Normal file
2
vendor/github.com/clipperhouse/stringish/.gitignore
generated
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.DS_Store
|
||||
*.test
|
||||
21
vendor/github.com/clipperhouse/stringish/LICENSE
generated
vendored
Normal file
21
vendor/github.com/clipperhouse/stringish/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Matt Sherman
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
64
vendor/github.com/clipperhouse/stringish/README.md
generated
vendored
Normal file
64
vendor/github.com/clipperhouse/stringish/README.md
generated
vendored
Normal file
@ -0,0 +1,64 @@
|
||||
# stringish
|
||||
|
||||
A small Go module that provides a generic type constraint for “string-like”
|
||||
data, and a utf8 package that works with both strings and byte slices
|
||||
without conversions.
|
||||
|
||||
```go
|
||||
type Interface interface {
|
||||
~[]byte | ~string
|
||||
}
|
||||
```
|
||||
|
||||
[](https://pkg.go.dev/github.com/clipperhouse/stringish/utf8)
|
||||
[](https://github.com/clipperhouse/stringish/actions/workflows/gotest.yml)
|
||||
|
||||
## Install
|
||||
|
||||
```
|
||||
go get github.com/clipperhouse/stringish
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
```go
|
||||
import (
|
||||
"github.com/clipperhouse/stringish"
|
||||
"github.com/clipperhouse/stringish/utf8"
|
||||
)
|
||||
|
||||
s := "Hello, 世界"
|
||||
r, size := utf8.DecodeRune(s) // not DecodeRuneInString 🎉
|
||||
|
||||
b := []byte("Hello, 世界")
|
||||
r, size = utf8.DecodeRune(b) // same API!
|
||||
|
||||
func MyFoo[T stringish.Interface](s T) T {
|
||||
// pass a string or a []byte
|
||||
// iterate, slice, transform, whatever
|
||||
}
|
||||
```
|
||||
|
||||
## Motivation
|
||||
|
||||
Sometimes we want APIs to accept `string` or `[]byte` without having to convert
|
||||
between those types. That conversion usually allocates!
|
||||
|
||||
By implementing with `stringish.Interface`, we can have a single API, and
|
||||
single implementation for both types: one `Foo` instead of `Foo` and
|
||||
`FooString`.
|
||||
|
||||
We have converted the
|
||||
[`unicode/utf8` package](https://github.com/clipperhouse/stringish/blob/main/utf8/utf8.go)
|
||||
as an example -- note the absence of`*InString` funcs. We might look at `x/text`
|
||||
next.
|
||||
|
||||
## Used by
|
||||
|
||||
- clipperhouse/uax29: [stringish trie](https://github.com/clipperhouse/uax29/blob/master/graphemes/trie.go#L27), [stringish iterator](https://github.com/clipperhouse/uax29/blob/master/internal/iterators/iterator.go#L9), [stringish SplitFunc](https://github.com/clipperhouse/uax29/blob/master/graphemes/splitfunc.go#L21)
|
||||
|
||||
- [clipperhouse/displaywidth](https://github.com/clipperhouse/displaywidth)
|
||||
|
||||
## Prior discussion
|
||||
|
||||
- [Consideration of similar by the Go team](https://github.com/golang/go/issues/48643)
|
||||
5
vendor/github.com/clipperhouse/stringish/interface.go
generated
vendored
Normal file
5
vendor/github.com/clipperhouse/stringish/interface.go
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
package stringish
|
||||
|
||||
type Interface interface {
|
||||
~[]byte | ~string
|
||||
}
|
||||
24
vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
generated
vendored
24
vendor/github.com/clipperhouse/uax29/v2/graphemes/README.md
generated
vendored
@ -1,5 +1,9 @@
|
||||
An implementation of grapheme cluster boundaries from [Unicode text segmentation](https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries) (UAX 29), for Unicode version 15.0.0.
|
||||
|
||||
[](https://pkg.go.dev/github.com/clipperhouse/uax29/v2/graphemes)
|
||||

|
||||

|
||||
|
||||
## Quick start
|
||||
|
||||
```
|
||||
@ -18,15 +22,14 @@ for tokens.Next() { // Next() returns true until end of data
|
||||
}
|
||||
```
|
||||
|
||||
[](https://pkg.go.dev/github.com/clipperhouse/uax29/v2/graphemes)
|
||||
|
||||
_A grapheme is a “single visible character”, which might be a simple as a single letter, or a complex emoji that consists of several Unicode code points._
|
||||
|
||||
## Conformance
|
||||
|
||||
We use the Unicode [test suite](https://unicode.org/reports/tr41/tr41-26.html#Tests29). Status:
|
||||
We use the Unicode [test suite](https://unicode.org/reports/tr41/tr41-26.html#Tests29).
|
||||
|
||||

|
||||

|
||||

|
||||
|
||||
## APIs
|
||||
|
||||
@ -71,9 +74,18 @@ for tokens.Next() { // Next() returns true until end of data
|
||||
}
|
||||
```
|
||||
|
||||
### Performance
|
||||
### Benchmarks
|
||||
|
||||
On a Mac M2 laptop, we see around 200MB/s, or around 100 million graphemes per second. You should see ~constant memory, and no allocations.
|
||||
On a Mac M2 laptop, we see around 200MB/s, or around 100 million graphemes per second, and no allocations.
|
||||
|
||||
```
|
||||
goos: darwin
|
||||
goarch: arm64
|
||||
pkg: github.com/clipperhouse/uax29/graphemes/comparative
|
||||
cpu: Apple M2
|
||||
BenchmarkGraphemes/clipperhouse/uax29-8 173805 ns/op 201.16 MB/s 0 B/op 0 allocs/op
|
||||
BenchmarkGraphemes/rivo/uniseg-8 2045128 ns/op 17.10 MB/s 0 B/op 0 allocs/op
|
||||
```
|
||||
|
||||
### Invalid inputs
|
||||
|
||||
|
||||
7
vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
generated
vendored
7
vendor/github.com/clipperhouse/uax29/v2/graphemes/iterator.go
generated
vendored
@ -1,8 +1,11 @@
|
||||
package graphemes
|
||||
|
||||
import "github.com/clipperhouse/uax29/v2/internal/iterators"
|
||||
import (
|
||||
"github.com/clipperhouse/stringish"
|
||||
"github.com/clipperhouse/uax29/v2/internal/iterators"
|
||||
)
|
||||
|
||||
type Iterator[T iterators.Stringish] struct {
|
||||
type Iterator[T stringish.Interface] struct {
|
||||
*iterators.Iterator[T]
|
||||
}
|
||||
|
||||
|
||||
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
generated
vendored
4
vendor/github.com/clipperhouse/uax29/v2/graphemes/splitfunc.go
generated
vendored
@ -3,7 +3,7 @@ package graphemes
|
||||
import (
|
||||
"bufio"
|
||||
|
||||
"github.com/clipperhouse/uax29/v2/internal/iterators"
|
||||
"github.com/clipperhouse/stringish"
|
||||
)
|
||||
|
||||
// is determines if lookup intersects propert(ies)
|
||||
@ -18,7 +18,7 @@ const _Ignore = _Extend
|
||||
// See https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
|
||||
var SplitFunc bufio.SplitFunc = splitFunc[[]byte]
|
||||
|
||||
func splitFunc[T iterators.Stringish](data T, atEOF bool) (advance int, token T, err error) {
|
||||
func splitFunc[T stringish.Interface](data T, atEOF bool) (advance int, token T, err error) {
|
||||
var empty T
|
||||
if len(data) == 0 {
|
||||
return 0, empty, nil
|
||||
|
||||
6
vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
generated
vendored
6
vendor/github.com/clipperhouse/uax29/v2/graphemes/trie.go
generated
vendored
@ -1,10 +1,10 @@
|
||||
package graphemes
|
||||
|
||||
import "github.com/clipperhouse/stringish"
|
||||
|
||||
// generated by github.com/clipperhouse/uax29/v2
|
||||
// from https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
|
||||
|
||||
import "github.com/clipperhouse/uax29/v2/internal/iterators"
|
||||
|
||||
type property uint16
|
||||
|
||||
const (
|
||||
@ -27,7 +27,7 @@ const (
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func lookup[T iterators.Stringish](s T) (v property, sz int) {
|
||||
func lookup[T stringish.Interface](s T) (v property, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
|
||||
27
vendor/github.com/clipperhouse/uax29/v2/internal/iterators/iterator.go
generated
vendored
27
vendor/github.com/clipperhouse/uax29/v2/internal/iterators/iterator.go
generated
vendored
@ -1,14 +1,12 @@
|
||||
package iterators
|
||||
|
||||
type Stringish interface {
|
||||
[]byte | string
|
||||
}
|
||||
import "github.com/clipperhouse/stringish"
|
||||
|
||||
type SplitFunc[T Stringish] func(T, bool) (int, T, error)
|
||||
type SplitFunc[T stringish.Interface] func(T, bool) (int, T, error)
|
||||
|
||||
// Iterator is a generic iterator for words that are either []byte or string.
|
||||
// Iterate while Next() is true, and access the word via Value().
|
||||
type Iterator[T Stringish] struct {
|
||||
type Iterator[T stringish.Interface] struct {
|
||||
split SplitFunc[T]
|
||||
data T
|
||||
start int
|
||||
@ -16,7 +14,7 @@ type Iterator[T Stringish] struct {
|
||||
}
|
||||
|
||||
// New creates a new Iterator for the given data and SplitFunc.
|
||||
func New[T Stringish](split SplitFunc[T], data T) *Iterator[T] {
|
||||
func New[T stringish.Interface](split SplitFunc[T], data T) *Iterator[T] {
|
||||
return &Iterator[T]{
|
||||
split: split,
|
||||
data: data,
|
||||
@ -83,3 +81,20 @@ func (iter *Iterator[T]) Reset() {
|
||||
iter.start = 0
|
||||
iter.pos = 0
|
||||
}
|
||||
|
||||
func (iter *Iterator[T]) First() T {
|
||||
if len(iter.data) == 0 {
|
||||
return iter.data
|
||||
}
|
||||
advance, _, err := iter.split(iter.data, true)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if advance <= 0 {
|
||||
panic("SplitFunc returned a zero or negative advance")
|
||||
}
|
||||
if advance > len(iter.data) {
|
||||
panic("SplitFunc advanced beyond the end of the data")
|
||||
}
|
||||
return iter.data[:advance]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user