forked from toolshed/abra
We were running behind and there were quite some deprecations to update. This was mostly in the upstream copy/pasta package but seems quite minimal.
525 lines
15 KiB
Go
525 lines
15 KiB
Go
package ansi
|
||
|
||
import (
|
||
"unicode/utf8"
|
||
|
||
"github.com/charmbracelet/x/ansi/parser"
|
||
"github.com/mattn/go-runewidth"
|
||
"github.com/rivo/uniseg"
|
||
)
|
||
|
||
// State represents the state of the ANSI escape sequence parser used by
|
||
// [DecodeSequence].
|
||
type State = byte
|
||
|
||
// ANSI escape sequence states used by [DecodeSequence].
|
||
const (
|
||
NormalState State = iota
|
||
PrefixState
|
||
ParamsState
|
||
IntermedState
|
||
EscapeState
|
||
StringState
|
||
)
|
||
|
||
// DecodeSequence decodes the first ANSI escape sequence or a printable
|
||
// grapheme from the given data. It returns the sequence slice, the number of
|
||
// bytes read, the cell width for each sequence, and the new state.
|
||
//
|
||
// The cell width will always be 0 for control and escape sequences, 1 for
|
||
// ASCII printable characters, and the number of cells other Unicode characters
|
||
// occupy. It uses the uniseg package to calculate the width of Unicode
|
||
// graphemes and characters. This means it will always do grapheme clustering
|
||
// (mode 2027).
|
||
//
|
||
// Passing a non-nil [*Parser] as the last argument will allow the decoder to
|
||
// collect sequence parameters, data, and commands. The parser cmd will have
|
||
// the packed command value that contains intermediate and prefix characters.
|
||
// In the case of a OSC sequence, the cmd will be the OSC command number. Use
|
||
// [Cmd] and [Param] types to unpack command intermediates and prefixes as well
|
||
// as parameters.
|
||
//
|
||
// Zero [Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
|
||
// validity of other data sequences, OSC, DCS, etc, will require checking for
|
||
// the returned sequence terminator bytes such as ST (ESC \\) and BEL).
|
||
//
|
||
// We store the command byte in [Cmd] in the most significant byte, the
|
||
// prefix byte in the next byte, and the intermediate byte in the least
|
||
// significant byte. This is done to avoid using a struct to store the command
|
||
// and its intermediates and prefixes. The command byte is always the least
|
||
// significant byte i.e. [Cmd & 0xff]. Use the [Cmd] type to unpack the
|
||
// command, intermediate, and prefix bytes. Note that we only collect the last
|
||
// prefix character and intermediate byte.
|
||
//
|
||
// The [p.Params] slice will contain the parameters of the sequence. Any
|
||
// sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
|
||
// to unpack the parameters.
|
||
//
|
||
// Example:
|
||
//
|
||
// var state byte // the initial state is always zero [NormalState]
|
||
// p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
|
||
// input := []byte("\x1b[31mHello, World!\x1b[0m")
|
||
// for len(input) > 0 {
|
||
// seq, width, n, newState := DecodeSequence(input, state, p)
|
||
// log.Printf("seq: %q, width: %d", seq, width)
|
||
// state = newState
|
||
// input = input[n:]
|
||
// }
|
||
//
|
||
// This function treats the text as a sequence of grapheme clusters.
|
||
func DecodeSequence[T string | []byte](b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
|
||
return decodeSequence(GraphemeWidth, b, state, p)
|
||
}
|
||
|
||
// DecodeSequenceWc decodes the first ANSI escape sequence or a printable
|
||
// grapheme from the given data. It returns the sequence slice, the number of
|
||
// bytes read, the cell width for each sequence, and the new state.
|
||
//
|
||
// The cell width will always be 0 for control and escape sequences, 1 for
|
||
// ASCII printable characters, and the number of cells other Unicode characters
|
||
// occupy. It uses the uniseg package to calculate the width of Unicode
|
||
// graphemes and characters. This means it will always do grapheme clustering
|
||
// (mode 2027).
|
||
//
|
||
// Passing a non-nil [*Parser] as the last argument will allow the decoder to
|
||
// collect sequence parameters, data, and commands. The parser cmd will have
|
||
// the packed command value that contains intermediate and prefix characters.
|
||
// In the case of a OSC sequence, the cmd will be the OSC command number. Use
|
||
// [Cmd] and [Param] types to unpack command intermediates and prefixes as well
|
||
// as parameters.
|
||
//
|
||
// Zero [Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
|
||
// validity of other data sequences, OSC, DCS, etc, will require checking for
|
||
// the returned sequence terminator bytes such as ST (ESC \\) and BEL).
|
||
//
|
||
// We store the command byte in [Cmd] in the most significant byte, the
|
||
// prefix byte in the next byte, and the intermediate byte in the least
|
||
// significant byte. This is done to avoid using a struct to store the command
|
||
// and its intermediates and prefixes. The command byte is always the least
|
||
// significant byte i.e. [Cmd & 0xff]. Use the [Cmd] type to unpack the
|
||
// command, intermediate, and prefix bytes. Note that we only collect the last
|
||
// prefix character and intermediate byte.
|
||
//
|
||
// The [p.Params] slice will contain the parameters of the sequence. Any
|
||
// sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
|
||
// to unpack the parameters.
|
||
//
|
||
// Example:
|
||
//
|
||
// var state byte // the initial state is always zero [NormalState]
|
||
// p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
|
||
// input := []byte("\x1b[31mHello, World!\x1b[0m")
|
||
// for len(input) > 0 {
|
||
// seq, width, n, newState := DecodeSequenceWc(input, state, p)
|
||
// log.Printf("seq: %q, width: %d", seq, width)
|
||
// state = newState
|
||
// input = input[n:]
|
||
// }
|
||
//
|
||
// This function treats the text as a sequence of wide characters and runes.
|
||
func DecodeSequenceWc[T string | []byte](b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
|
||
return decodeSequence(WcWidth, b, state, p)
|
||
}
|
||
|
||
func decodeSequence[T string | []byte](m Method, b T, state State, p *Parser) (seq T, width int, n int, newState byte) {
|
||
for i := 0; i < len(b); i++ {
|
||
c := b[i]
|
||
|
||
switch state {
|
||
case NormalState:
|
||
switch c {
|
||
case ESC:
|
||
if p != nil {
|
||
if len(p.params) > 0 {
|
||
p.params[0] = parser.MissingParam
|
||
}
|
||
p.cmd = 0
|
||
p.paramsLen = 0
|
||
p.dataLen = 0
|
||
}
|
||
state = EscapeState
|
||
continue
|
||
case CSI, DCS:
|
||
if p != nil {
|
||
if len(p.params) > 0 {
|
||
p.params[0] = parser.MissingParam
|
||
}
|
||
p.cmd = 0
|
||
p.paramsLen = 0
|
||
p.dataLen = 0
|
||
}
|
||
state = PrefixState
|
||
continue
|
||
case OSC, APC, SOS, PM:
|
||
if p != nil {
|
||
p.cmd = parser.MissingCommand
|
||
p.dataLen = 0
|
||
}
|
||
state = StringState
|
||
continue
|
||
}
|
||
|
||
if p != nil {
|
||
p.dataLen = 0
|
||
p.paramsLen = 0
|
||
p.cmd = 0
|
||
}
|
||
if c > US && c < DEL {
|
||
// ASCII printable characters
|
||
return b[i : i+1], 1, 1, NormalState
|
||
}
|
||
|
||
if c <= US || c == DEL || c < 0xC0 {
|
||
// C0 & C1 control characters & DEL
|
||
return b[i : i+1], 0, 1, NormalState
|
||
}
|
||
|
||
if utf8.RuneStart(c) {
|
||
seq, _, width, _ = FirstGraphemeCluster(b, -1)
|
||
if m == WcWidth {
|
||
width = runewidth.StringWidth(string(seq))
|
||
}
|
||
i += len(seq)
|
||
return b[:i], width, i, NormalState
|
||
}
|
||
|
||
// Invalid UTF-8 sequence
|
||
return b[:i], 0, i, NormalState
|
||
case PrefixState:
|
||
if c >= '<' && c <= '?' {
|
||
if p != nil {
|
||
// We only collect the last prefix character.
|
||
p.cmd &^= 0xff << parser.PrefixShift
|
||
p.cmd |= int(c) << parser.PrefixShift
|
||
}
|
||
break
|
||
}
|
||
|
||
state = ParamsState
|
||
fallthrough
|
||
case ParamsState:
|
||
if c >= '0' && c <= '9' {
|
||
if p != nil {
|
||
if p.params[p.paramsLen] == parser.MissingParam {
|
||
p.params[p.paramsLen] = 0
|
||
}
|
||
|
||
p.params[p.paramsLen] *= 10
|
||
p.params[p.paramsLen] += int(c - '0')
|
||
}
|
||
break
|
||
}
|
||
|
||
if c == ':' {
|
||
if p != nil {
|
||
p.params[p.paramsLen] |= parser.HasMoreFlag
|
||
}
|
||
}
|
||
|
||
if c == ';' || c == ':' {
|
||
if p != nil {
|
||
p.paramsLen++
|
||
if p.paramsLen < len(p.params) {
|
||
p.params[p.paramsLen] = parser.MissingParam
|
||
}
|
||
}
|
||
break
|
||
}
|
||
|
||
state = IntermedState
|
||
fallthrough
|
||
case IntermedState:
|
||
if c >= ' ' && c <= '/' {
|
||
if p != nil {
|
||
p.cmd &^= 0xff << parser.IntermedShift
|
||
p.cmd |= int(c) << parser.IntermedShift
|
||
}
|
||
break
|
||
}
|
||
|
||
if p != nil {
|
||
// Increment the last parameter
|
||
if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
|
||
p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
|
||
p.paramsLen++
|
||
}
|
||
}
|
||
|
||
if c >= '@' && c <= '~' {
|
||
if p != nil {
|
||
p.cmd &^= 0xff
|
||
p.cmd |= int(c)
|
||
}
|
||
|
||
if HasDcsPrefix(b) {
|
||
// Continue to collect DCS data
|
||
if p != nil {
|
||
p.dataLen = 0
|
||
}
|
||
state = StringState
|
||
continue
|
||
}
|
||
|
||
return b[:i+1], 0, i + 1, NormalState
|
||
}
|
||
|
||
// Invalid CSI/DCS sequence
|
||
return b[:i], 0, i, NormalState
|
||
case EscapeState:
|
||
switch c {
|
||
case '[', 'P':
|
||
if p != nil {
|
||
if len(p.params) > 0 {
|
||
p.params[0] = parser.MissingParam
|
||
}
|
||
p.paramsLen = 0
|
||
p.cmd = 0
|
||
}
|
||
state = PrefixState
|
||
continue
|
||
case ']', 'X', '^', '_':
|
||
if p != nil {
|
||
p.cmd = parser.MissingCommand
|
||
p.dataLen = 0
|
||
}
|
||
state = StringState
|
||
continue
|
||
}
|
||
|
||
if c >= ' ' && c <= '/' {
|
||
if p != nil {
|
||
p.cmd &^= 0xff << parser.IntermedShift
|
||
p.cmd |= int(c) << parser.IntermedShift
|
||
}
|
||
continue
|
||
} else if c >= '0' && c <= '~' {
|
||
if p != nil {
|
||
p.cmd &^= 0xff
|
||
p.cmd |= int(c)
|
||
}
|
||
return b[:i+1], 0, i + 1, NormalState
|
||
}
|
||
|
||
// Invalid escape sequence
|
||
return b[:i], 0, i, NormalState
|
||
case StringState:
|
||
switch c {
|
||
case BEL:
|
||
if HasOscPrefix(b) {
|
||
parseOscCmd(p)
|
||
return b[:i+1], 0, i + 1, NormalState
|
||
}
|
||
case CAN, SUB:
|
||
if HasOscPrefix(b) {
|
||
// Ensure we parse the OSC command number
|
||
parseOscCmd(p)
|
||
}
|
||
|
||
// Cancel the sequence
|
||
return b[:i], 0, i, NormalState
|
||
case ST:
|
||
if HasOscPrefix(b) {
|
||
// Ensure we parse the OSC command number
|
||
parseOscCmd(p)
|
||
}
|
||
|
||
return b[:i+1], 0, i + 1, NormalState
|
||
case ESC:
|
||
if HasStPrefix(b[i:]) {
|
||
if HasOscPrefix(b) {
|
||
// Ensure we parse the OSC command number
|
||
parseOscCmd(p)
|
||
}
|
||
|
||
// End of string 7-bit (ST)
|
||
return b[:i+2], 0, i + 2, NormalState
|
||
}
|
||
|
||
// Otherwise, cancel the sequence
|
||
return b[:i], 0, i, NormalState
|
||
}
|
||
|
||
if p != nil && p.dataLen < len(p.data) {
|
||
p.data[p.dataLen] = c
|
||
p.dataLen++
|
||
|
||
// Parse the OSC command number
|
||
if c == ';' && HasOscPrefix(b) {
|
||
parseOscCmd(p)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return b, 0, len(b), state
|
||
}
|
||
|
||
func parseOscCmd(p *Parser) {
|
||
if p == nil || p.cmd != parser.MissingCommand {
|
||
return
|
||
}
|
||
for j := 0; j < p.dataLen; j++ {
|
||
d := p.data[j]
|
||
if d < '0' || d > '9' {
|
||
break
|
||
}
|
||
if p.cmd == parser.MissingCommand {
|
||
p.cmd = 0
|
||
}
|
||
p.cmd *= 10
|
||
p.cmd += int(d - '0')
|
||
}
|
||
}
|
||
|
||
// Equal returns true if the given byte slices are equal.
|
||
func Equal[T string | []byte](a, b T) bool {
|
||
return string(a) == string(b)
|
||
}
|
||
|
||
// HasPrefix returns true if the given byte slice has prefix.
|
||
func HasPrefix[T string | []byte](b, prefix T) bool {
|
||
return len(b) >= len(prefix) && Equal(b[0:len(prefix)], prefix)
|
||
}
|
||
|
||
// HasSuffix returns true if the given byte slice has suffix.
|
||
func HasSuffix[T string | []byte](b, suffix T) bool {
|
||
return len(b) >= len(suffix) && Equal(b[len(b)-len(suffix):], suffix)
|
||
}
|
||
|
||
// HasCsiPrefix returns true if the given byte slice has a CSI prefix.
|
||
func HasCsiPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == CSI) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == '[')
|
||
}
|
||
|
||
// HasOscPrefix returns true if the given byte slice has an OSC prefix.
|
||
func HasOscPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == OSC) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == ']')
|
||
}
|
||
|
||
// HasApcPrefix returns true if the given byte slice has an APC prefix.
|
||
func HasApcPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == APC) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == '_')
|
||
}
|
||
|
||
// HasDcsPrefix returns true if the given byte slice has a DCS prefix.
|
||
func HasDcsPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == DCS) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == 'P')
|
||
}
|
||
|
||
// HasSosPrefix returns true if the given byte slice has a SOS prefix.
|
||
func HasSosPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == SOS) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == 'X')
|
||
}
|
||
|
||
// HasPmPrefix returns true if the given byte slice has a PM prefix.
|
||
func HasPmPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == PM) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == '^')
|
||
}
|
||
|
||
// HasStPrefix returns true if the given byte slice has a ST prefix.
|
||
func HasStPrefix[T string | []byte](b T) bool {
|
||
return (len(b) > 0 && b[0] == ST) ||
|
||
(len(b) > 1 && b[0] == ESC && b[1] == '\\')
|
||
}
|
||
|
||
// HasEscPrefix returns true if the given byte slice has an ESC prefix.
|
||
func HasEscPrefix[T string | []byte](b T) bool {
|
||
return len(b) > 0 && b[0] == ESC
|
||
}
|
||
|
||
// FirstGraphemeCluster returns the first grapheme cluster in the given string or byte slice.
|
||
// This is a syntactic sugar function that wraps
|
||
// uniseg.FirstGraphemeClusterInString and uniseg.FirstGraphemeCluster.
|
||
func FirstGraphemeCluster[T string | []byte](b T, state int) (T, T, int, int) {
|
||
switch b := any(b).(type) {
|
||
case string:
|
||
cluster, rest, width, newState := uniseg.FirstGraphemeClusterInString(b, state)
|
||
return T(cluster), T(rest), width, newState
|
||
case []byte:
|
||
cluster, rest, width, newState := uniseg.FirstGraphemeCluster(b, state)
|
||
return T(cluster), T(rest), width, newState
|
||
}
|
||
panic("unreachable")
|
||
}
|
||
|
||
// Cmd represents a sequence command. This is used to pack/unpack a sequence
|
||
// command with its intermediate and prefix characters. Those are commonly
|
||
// found in CSI and DCS sequences.
|
||
type Cmd int
|
||
|
||
// Prefix returns the unpacked prefix byte of the CSI sequence.
|
||
// This is always gonna be one of the following '<' '=' '>' '?' and in the
|
||
// range of 0x3C-0x3F.
|
||
// Zero is returned if the sequence does not have a prefix.
|
||
func (c Cmd) Prefix() byte {
|
||
return byte(parser.Prefix(int(c)))
|
||
}
|
||
|
||
// Intermediate returns the unpacked intermediate byte of the CSI sequence.
|
||
// An intermediate byte is in the range of 0x20-0x2F. This includes these
|
||
// characters from ' ', '!', '"', '#', '$', '%', '&', ”', '(', ')', '*', '+',
|
||
// ',', '-', '.', '/'.
|
||
// Zero is returned if the sequence does not have an intermediate byte.
|
||
func (c Cmd) Intermediate() byte {
|
||
return byte(parser.Intermediate(int(c)))
|
||
}
|
||
|
||
// Final returns the unpacked command byte of the CSI sequence.
|
||
func (c Cmd) Final() byte {
|
||
return byte(parser.Command(int(c)))
|
||
}
|
||
|
||
// Command packs a command with the given prefix, intermediate, and final. A
|
||
// zero byte means the sequence does not have a prefix or intermediate.
|
||
//
|
||
// Prefixes are in the range of 0x3C-0x3F that is one of `<=>?`.
|
||
//
|
||
// Intermediates are in the range of 0x20-0x2F that is anything in
|
||
// `!"#$%&'()*+,-./`.
|
||
//
|
||
// Final bytes are in the range of 0x40-0x7E that is anything in the range
|
||
// `@A–Z[\]^_`a–z{|}~`.
|
||
func Command(prefix, inter, final byte) (c int) {
|
||
c = int(final)
|
||
c |= int(prefix) << parser.PrefixShift
|
||
c |= int(inter) << parser.IntermedShift
|
||
return
|
||
}
|
||
|
||
// Param represents a sequence parameter. Sequence parameters with
|
||
// sub-parameters are packed with the HasMoreFlag set. This is used to unpack
|
||
// the parameters from a CSI and DCS sequences.
|
||
type Param int
|
||
|
||
// Param returns the unpacked parameter at the given index.
|
||
// It returns the default value if the parameter is missing.
|
||
func (s Param) Param(def int) int {
|
||
p := int(s) & parser.ParamMask
|
||
if p == parser.MissingParam {
|
||
return def
|
||
}
|
||
return p
|
||
}
|
||
|
||
// HasMore unpacks the HasMoreFlag from the parameter.
|
||
func (s Param) HasMore() bool {
|
||
return s&parser.HasMoreFlag != 0
|
||
}
|
||
|
||
// Parameter packs an escape code parameter with the given parameter and
|
||
// whether this parameter has following sub-parameters.
|
||
func Parameter(p int, hasMore bool) (s int) {
|
||
s = p & parser.ParamMask
|
||
if hasMore {
|
||
s |= parser.HasMoreFlag
|
||
}
|
||
return
|
||
}
|