decentral1se 31fa9b1a7a
Some checks failed
continuous-integration/drone/push Build is failing
chore: make deps, go mod vendor
2024-12-02 01:45:06 +01:00

414 lines
11 KiB
Go

package ansi
import (
"unicode/utf8"
"unsafe"
"github.com/charmbracelet/x/ansi/parser"
)
// ParserDispatcher is a function that dispatches a sequence.
type ParserDispatcher func(Sequence)
// Parser represents a DEC ANSI compatible sequence parser.
//
// It uses a state machine to parse ANSI escape sequences and control
// characters. The parser is designed to be used with a terminal emulator or
// similar application that needs to parse ANSI escape sequences and control
// characters.
// See package [parser] for more information.
//
//go:generate go run ./gen.go
type Parser struct {
// the dispatch function to call when a sequence is complete
dispatcher ParserDispatcher
// params contains the raw parameters of the sequence.
// These parameters used when constructing CSI and DCS sequences.
params []int
// data contains the raw data of the sequence.
// These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
data []byte
// dataLen keeps track of the length of the data buffer.
// If dataLen is -1, the data buffer is unlimited and will grow as needed.
// Otherwise, dataLen is limited by the size of the data buffer.
dataLen int
// paramsLen keeps track of the number of parameters.
// This is limited by the size of the params buffer.
//
// This is also used when collecting UTF-8 runes to keep track of the
// number of rune bytes collected.
paramsLen int
// cmd contains the raw command along with the private marker and
// intermediate bytes of the sequence.
// The first lower byte contains the command byte, the next byte contains
// the private marker, and the next byte contains the intermediate byte.
//
// This is also used when collecting UTF-8 runes treating it as a slice of
// 4 bytes.
cmd int
// state is the current state of the parser.
state byte
}
// NewParser returns a new parser with an optional [ParserDispatcher].
// The [Parser] uses a default size of 32 for the parameters and 64KB for the
// data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
// size of the parameters and data buffer respectively.
func NewParser(d ParserDispatcher) *Parser {
p := new(Parser)
p.SetDispatcher(d)
p.SetParamsSize(parser.MaxParamsSize)
p.SetDataSize(1024 * 64) // 64KB data buffer
return p
}
// SetDispatcher sets the dispatcher function to call when a sequence is
// complete.
func (p *Parser) SetDispatcher(d ParserDispatcher) {
p.dispatcher = d
}
// SetParamsSize sets the size of the parameters buffer.
// This is used when constructing CSI and DCS sequences.
func (p *Parser) SetParamsSize(size int) {
p.params = make([]int, size)
}
// SetDataSize sets the size of the data buffer.
// This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
// If size is less than or equal to 0, the data buffer is unlimited and will
// grow as needed.
func (p *Parser) SetDataSize(size int) {
if size <= 0 {
size = 0
p.dataLen = -1
}
p.data = make([]byte, size)
}
// Params returns the list of parsed packed parameters.
func (p *Parser) Params() []Parameter {
return unsafe.Slice((*Parameter)(unsafe.Pointer(&p.params[0])), p.paramsLen)
}
// Param returns the parameter at the given index and falls back to the default
// value if the parameter is missing. If the index is out of bounds, it returns
// the default value and false.
func (p *Parser) Param(i, def int) (int, bool) {
if i < 0 || i >= p.paramsLen {
return def, false
}
return Parameter(p.params[i]).Param(def), true
}
// Cmd returns the packed command of the last dispatched sequence.
func (p *Parser) Cmd() Command {
return Command(p.cmd)
}
// Rune returns the last dispatched sequence as a rune.
func (p *Parser) Rune() rune {
rw := utf8ByteLen(byte(p.cmd & 0xff))
if rw == -1 {
return utf8.RuneError
}
r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
return r
}
// Data returns the raw data of the last dispatched sequence.
func (p *Parser) Data() []byte {
return p.data[:p.dataLen]
}
// Reset resets the parser to its initial state.
func (p *Parser) Reset() {
p.clear()
p.state = parser.GroundState
}
// clear clears the parser parameters and command.
func (p *Parser) clear() {
if len(p.params) > 0 {
p.params[0] = parser.MissingParam
}
p.paramsLen = 0
p.cmd = 0
}
// State returns the current state of the parser.
func (p *Parser) State() parser.State {
return p.state
}
// StateName returns the name of the current state.
func (p *Parser) StateName() string {
return parser.StateNames[p.state]
}
// Parse parses the given dispatcher and byte buffer.
// Deprecated: Loop over the buffer and call [Parser.Advance] instead.
func (p *Parser) Parse(b []byte) {
for i := 0; i < len(b); i++ {
p.Advance(b[i])
}
}
// Advance advances the parser using the given byte. It returns the action
// performed by the parser.
func (p *Parser) Advance(b byte) parser.Action {
switch p.state {
case parser.Utf8State:
// We handle UTF-8 here.
return p.advanceUtf8(b)
default:
return p.advance(b)
}
}
func (p *Parser) collectRune(b byte) {
if p.paramsLen >= utf8.UTFMax {
return
}
shift := p.paramsLen * 8
p.cmd &^= 0xff << shift
p.cmd |= int(b) << shift
p.paramsLen++
}
func (p *Parser) dispatch(s Sequence) {
if p.dispatcher != nil {
p.dispatcher(s)
}
}
func (p *Parser) advanceUtf8(b byte) parser.Action {
// Collect UTF-8 rune bytes.
p.collectRune(b)
rw := utf8ByteLen(byte(p.cmd & 0xff))
if rw == -1 {
// We panic here because the first byte comes from the state machine,
// if this panics, it means there is a bug in the state machine!
panic("invalid rune") // unreachable
}
if p.paramsLen < rw {
return parser.CollectAction
}
// We have enough bytes to decode the rune using unsafe
p.dispatch(Rune(p.Rune()))
p.state = parser.GroundState
p.paramsLen = 0
return parser.PrintAction
}
func (p *Parser) advance(b byte) parser.Action {
state, action := parser.Table.Transition(p.state, b)
// We need to clear the parser state if the state changes from EscapeState.
// This is because when we enter the EscapeState, we don't get a chance to
// clear the parser state. For example, when a sequence terminates with a
// ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
// EscapeState. However, the parser state is not cleared in this case and
// we need to clear it here before dispatching the esc sequence.
if p.state != state {
if p.state == parser.EscapeState {
p.performAction(parser.ClearAction, state, b)
}
if action == parser.PutAction &&
p.state == parser.DcsEntryState && state == parser.DcsStringState {
// XXX: This is a special case where we need to start collecting
// non-string parameterized data i.e. doesn't follow the ECMA-48 §
// 5.4.1 string parameters format.
p.performAction(parser.StartAction, state, 0)
}
}
// Handle special cases
switch {
case b == ESC && p.state == parser.EscapeState:
// Two ESCs in a row
p.performAction(parser.ExecuteAction, state, b)
default:
p.performAction(action, state, b)
}
p.state = state
return action
}
func (p *Parser) parseStringCmd() {
// Try to parse the command
datalen := len(p.data)
if p.dataLen >= 0 {
datalen = p.dataLen
}
for i := 0; i < datalen; i++ {
d := p.data[i]
if d < '0' || d > '9' {
break
}
if p.cmd == parser.MissingCommand {
p.cmd = 0
}
p.cmd *= 10
p.cmd += int(d - '0')
}
}
func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
switch action {
case parser.IgnoreAction:
break
case parser.ClearAction:
p.clear()
case parser.PrintAction:
p.dispatch(Rune(b))
case parser.ExecuteAction:
p.dispatch(ControlCode(b))
case parser.MarkerAction:
// Collect private marker
// we only store the last marker
p.cmd &^= 0xff << parser.MarkerShift
p.cmd |= int(b) << parser.MarkerShift
case parser.CollectAction:
if state == parser.Utf8State {
// Reset the UTF-8 counter
p.paramsLen = 0
p.collectRune(b)
} else {
// Collect intermediate bytes
// we only store the last intermediate byte
p.cmd &^= 0xff << parser.IntermedShift
p.cmd |= int(b) << parser.IntermedShift
}
case parser.ParamAction:
// Collect parameters
if p.paramsLen >= len(p.params) {
break
}
if b >= '0' && b <= '9' {
if p.params[p.paramsLen] == parser.MissingParam {
p.params[p.paramsLen] = 0
}
p.params[p.paramsLen] *= 10
p.params[p.paramsLen] += int(b - '0')
}
if b == ':' {
p.params[p.paramsLen] |= parser.HasMoreFlag
}
if b == ';' || b == ':' {
p.paramsLen++
if p.paramsLen < len(p.params) {
p.params[p.paramsLen] = parser.MissingParam
}
}
case parser.StartAction:
if p.dataLen < 0 && p.data != nil {
p.data = p.data[:0]
} else {
p.dataLen = 0
}
if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
// Collect the command byte for DCS
p.cmd |= int(b)
} else {
p.cmd = parser.MissingCommand
}
case parser.PutAction:
switch p.state {
case parser.OscStringState:
if b == ';' && p.cmd == parser.MissingCommand {
p.parseStringCmd()
}
}
if p.dataLen < 0 {
p.data = append(p.data, b)
} else {
if p.dataLen < len(p.data) {
p.data[p.dataLen] = b
p.dataLen++
}
}
case parser.DispatchAction:
// Increment the last parameter
if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
p.paramsLen++
}
if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
// Ensure we have a command for OSC
p.parseStringCmd()
}
if p.dispatcher == nil {
break
}
var seq Sequence
data := p.data
if p.dataLen >= 0 {
data = data[:p.dataLen]
}
switch p.state {
case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
p.cmd |= int(b)
seq = CsiSequence{Cmd: Command(p.cmd), Params: p.Params()}
case parser.EscapeState, parser.EscapeIntermediateState:
p.cmd |= int(b)
seq = EscSequence(p.cmd)
case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
seq = DcsSequence{Cmd: Command(p.cmd), Params: p.Params(), Data: data}
case parser.OscStringState:
seq = OscSequence{Cmd: p.cmd, Data: data}
case parser.SosStringState:
seq = SosSequence{Data: data}
case parser.PmStringState:
seq = PmSequence{Data: data}
case parser.ApcStringState:
seq = ApcSequence{Data: data}
}
p.dispatch(seq)
}
}
func utf8ByteLen(b byte) int {
if b <= 0b0111_1111 { // 0x00-0x7F
return 1
} else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
return 2
} else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
return 3
} else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
return 4
}
return -1
}