This commit is contained in:
208
vendor/github.com/rivo/uniseg/properties.go
generated
vendored
Normal file
208
vendor/github.com/rivo/uniseg/properties.go
generated
vendored
Normal file
@ -0,0 +1,208 @@
|
||||
package uniseg
|
||||
|
||||
// The Unicode properties as used in the various parsers. Only the ones needed
|
||||
// in the context of this package are included.
|
||||
const (
|
||||
prXX = 0 // Same as prAny.
|
||||
prAny = iota // prAny must be 0.
|
||||
prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector.
|
||||
prCR
|
||||
prLF
|
||||
prControl
|
||||
prExtend
|
||||
prRegionalIndicator
|
||||
prSpacingMark
|
||||
prL
|
||||
prV
|
||||
prT
|
||||
prLV
|
||||
prLVT
|
||||
prZWJ
|
||||
prExtendedPictographic
|
||||
prNewline
|
||||
prWSegSpace
|
||||
prDoubleQuote
|
||||
prSingleQuote
|
||||
prMidNumLet
|
||||
prNumeric
|
||||
prMidLetter
|
||||
prMidNum
|
||||
prExtendNumLet
|
||||
prALetter
|
||||
prFormat
|
||||
prHebrewLetter
|
||||
prKatakana
|
||||
prSp
|
||||
prSTerm
|
||||
prClose
|
||||
prSContinue
|
||||
prATerm
|
||||
prUpper
|
||||
prLower
|
||||
prSep
|
||||
prOLetter
|
||||
prCM
|
||||
prBA
|
||||
prBK
|
||||
prSP
|
||||
prEX
|
||||
prQU
|
||||
prAL
|
||||
prPR
|
||||
prPO
|
||||
prOP
|
||||
prCP
|
||||
prIS
|
||||
prHY
|
||||
prSY
|
||||
prNU
|
||||
prCL
|
||||
prNL
|
||||
prGL
|
||||
prAI
|
||||
prBB
|
||||
prHL
|
||||
prSA
|
||||
prJL
|
||||
prJV
|
||||
prJT
|
||||
prNS
|
||||
prZW
|
||||
prB2
|
||||
prIN
|
||||
prWJ
|
||||
prID
|
||||
prEB
|
||||
prCJ
|
||||
prH2
|
||||
prH3
|
||||
prSG
|
||||
prCB
|
||||
prRI
|
||||
prEM
|
||||
prN
|
||||
prNa
|
||||
prA
|
||||
prW
|
||||
prH
|
||||
prF
|
||||
prEmojiPresentation
|
||||
)
|
||||
|
||||
// Unicode General Categories. Only the ones needed in the context of this
|
||||
// package are included.
|
||||
const (
|
||||
gcNone = iota // gcNone must be 0.
|
||||
gcCc
|
||||
gcZs
|
||||
gcPo
|
||||
gcSc
|
||||
gcPs
|
||||
gcPe
|
||||
gcSm
|
||||
gcPd
|
||||
gcNd
|
||||
gcLu
|
||||
gcSk
|
||||
gcPc
|
||||
gcLl
|
||||
gcSo
|
||||
gcLo
|
||||
gcPi
|
||||
gcCf
|
||||
gcNo
|
||||
gcPf
|
||||
gcLC
|
||||
gcLm
|
||||
gcMn
|
||||
gcMe
|
||||
gcMc
|
||||
gcNl
|
||||
gcZl
|
||||
gcZp
|
||||
gcCn
|
||||
gcCs
|
||||
gcCo
|
||||
)
|
||||
|
||||
// Special code points.
|
||||
const (
|
||||
vs15 = 0xfe0e // Variation Selector-15 (text presentation)
|
||||
vs16 = 0xfe0f // Variation Selector-16 (emoji presentation)
|
||||
)
|
||||
|
||||
// propertySearch performs a binary search on a property slice and returns the
|
||||
// entry whose range (start = first array element, end = second array element)
|
||||
// includes r, or an array of 0's if no such entry was found.
|
||||
func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) {
|
||||
// Run a binary search.
|
||||
from := 0
|
||||
to := len(dictionary)
|
||||
for to > from {
|
||||
middle := (from + to) / 2
|
||||
cpRange := dictionary[middle]
|
||||
if int(r) < cpRange[0] {
|
||||
to = middle
|
||||
continue
|
||||
}
|
||||
if int(r) > cpRange[1] {
|
||||
from = middle + 1
|
||||
continue
|
||||
}
|
||||
return cpRange
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// property returns the Unicode property value (see constants above) of the
|
||||
// given code point.
|
||||
func property(dictionary [][3]int, r rune) int {
|
||||
return propertySearch(dictionary, r)[2]
|
||||
}
|
||||
|
||||
// propertyLineBreak returns the Unicode property value and General Category
|
||||
// (see constants above) of the given code point, as listed in the line break
|
||||
// code points table, while fast tracking ASCII digits and letters.
|
||||
func propertyLineBreak(r rune) (property, generalCategory int) {
|
||||
if r >= 'a' && r <= 'z' {
|
||||
return prAL, gcLl
|
||||
}
|
||||
if r >= 'A' && r <= 'Z' {
|
||||
return prAL, gcLu
|
||||
}
|
||||
if r >= '0' && r <= '9' {
|
||||
return prNU, gcNd
|
||||
}
|
||||
entry := propertySearch(lineBreakCodePoints, r)
|
||||
return entry[2], entry[3]
|
||||
}
|
||||
|
||||
// propertyGraphemes returns the Unicode grapheme cluster property value of the
|
||||
// given code point while fast tracking ASCII characters.
|
||||
func propertyGraphemes(r rune) int {
|
||||
if r >= 0x20 && r <= 0x7e {
|
||||
return prAny
|
||||
}
|
||||
if r == 0x0a {
|
||||
return prLF
|
||||
}
|
||||
if r == 0x0d {
|
||||
return prCR
|
||||
}
|
||||
if r >= 0 && r <= 0x1f || r == 0x7f {
|
||||
return prControl
|
||||
}
|
||||
return property(graphemeCodePoints, r)
|
||||
}
|
||||
|
||||
// propertyEastAsianWidth returns the Unicode East Asian Width property value of
|
||||
// the given code point while fast tracking ASCII characters.
|
||||
func propertyEastAsianWidth(r rune) int {
|
||||
if r >= 0x20 && r <= 0x7e {
|
||||
return prNa
|
||||
}
|
||||
if r >= 0 && r <= 0x1f || r == 0x7f {
|
||||
return prN
|
||||
}
|
||||
return property(eastAsianWidth, r)
|
||||
}
|
Reference in New Issue
Block a user