0
0
forked from toolshed/abra
2024-08-04 11:06:58 +02:00

358 lines
9.5 KiB
Go

package ansi
import (
"unicode/utf8"
"github.com/charmbracelet/x/ansi/parser"
)
// ParserDispatcher is a function that dispatches a sequence.
type ParserDispatcher func(Sequence)
// Parser represents a DEC ANSI compatible sequence parser.
//
// It uses a state machine to parse ANSI escape sequences and control
// characters. The parser is designed to be used with a terminal emulator or
// similar application that needs to parse ANSI escape sequences and control
// characters.
// See package [parser] for more information.
//
//go:generate go run ./gen.go
type Parser struct {
// Params contains the raw parameters of the sequence.
// These parameters used when constructing CSI and DCS sequences.
Params []int
// Data contains the raw data of the sequence.
// These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
Data []byte
// DataLen keeps track of the length of the data buffer.
// If DataLen is -1, the data buffer is unlimited and will grow as needed.
// Otherwise, DataLen is limited by the size of the Data buffer.
DataLen int
// ParamsLen keeps track of the number of parameters.
// This is limited by the size of the Params buffer.
ParamsLen int
// Cmd contains the raw command along with the private marker and
// intermediate bytes of the sequence.
// The first lower byte contains the command byte, the next byte contains
// the private marker, and the next byte contains the intermediate byte.
Cmd int
// RuneLen keeps track of the number of bytes collected for a UTF-8 rune.
RuneLen int
// RuneBuf contains the bytes collected for a UTF-8 rune.
RuneBuf [utf8.MaxRune]byte
// State is the current state of the parser.
State byte
}
// NewParser returns a new parser with the given sizes allocated.
// If dataSize is zero, the underlying data buffer will be unlimited and will
// grow as needed.
func NewParser(paramsSize, dataSize int) *Parser {
s := &Parser{
Params: make([]int, paramsSize),
Data: make([]byte, dataSize),
}
if dataSize <= 0 {
s.DataLen = -1
}
return s
}
// Reset resets the parser to its initial state.
func (p *Parser) Reset() {
p.clear()
p.State = parser.GroundState
}
// clear clears the parser parameters and command.
func (p *Parser) clear() {
if len(p.Params) > 0 {
p.Params[0] = parser.MissingParam
}
p.ParamsLen = 0
p.Cmd = 0
p.RuneLen = 0
}
// StateName returns the name of the current state.
func (p *Parser) StateName() string {
return parser.StateNames[p.State]
}
// Parse parses the given dispatcher and byte buffer.
func (p *Parser) Parse(dispatcher ParserDispatcher, b []byte) {
for i := 0; i < len(b); i++ {
p.Advance(dispatcher, b[i], i < len(b)-1)
}
}
// Advance advances the parser with the given dispatcher and byte.
func (p *Parser) Advance(dispatcher ParserDispatcher, b byte, more bool) parser.Action {
switch p.State {
case parser.Utf8State:
// We handle UTF-8 here.
return p.advanceUtf8(dispatcher, b)
default:
return p.advance(dispatcher, b, more)
}
}
func (p *Parser) collectRune(b byte) {
if p.RuneLen < utf8.UTFMax {
p.RuneBuf[p.RuneLen] = b
p.RuneLen++
}
}
func (p *Parser) advanceUtf8(dispatcher ParserDispatcher, b byte) parser.Action {
// Collect UTF-8 rune bytes.
p.collectRune(b)
rw := utf8ByteLen(p.RuneBuf[0])
if rw == -1 {
// We panic here because the first byte comes from the state machine,
// if this panics, it means there is a bug in the state machine!
panic("invalid rune") // unreachable
}
if p.RuneLen < rw {
return parser.NoneAction
}
// We have enough bytes to decode the rune
bts := p.RuneBuf[:rw]
r, _ := utf8.DecodeRune(bts)
if dispatcher != nil {
dispatcher(Rune(r))
}
p.State = parser.GroundState
p.RuneLen = 0
return parser.NoneAction
}
func (p *Parser) advance(d ParserDispatcher, b byte, more bool) parser.Action {
state, action := parser.Table.Transition(p.State, b)
// We need to clear the parser state if the state changes from EscapeState.
// This is because when we enter the EscapeState, we don't get a chance to
// clear the parser state. For example, when a sequence terminates with a
// ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
// EscapeState. However, the parser state is not cleared in this case and
// we need to clear it here before dispatching the esc sequence.
if p.State != state {
switch p.State {
case parser.EscapeState:
p.performAction(d, parser.ClearAction, b)
}
if action == parser.PutAction &&
p.State == parser.DcsEntryState && state == parser.DcsStringState {
// XXX: This is a special case where we need to start collecting
// non-string parameterized data i.e. doesn't follow the ECMA-48 §
// 5.4.1 string parameters format.
p.performAction(d, parser.StartAction, 0)
}
}
// Handle special cases
switch {
case b == ESC && p.State == parser.EscapeState:
// Two ESCs in a row
p.performAction(d, parser.ExecuteAction, b)
if !more {
// Two ESCs at the end of the buffer
p.performAction(d, parser.ExecuteAction, b)
}
case b == ESC && !more:
// Last byte is an ESC
p.performAction(d, parser.ExecuteAction, b)
case p.State == parser.EscapeState && b == 'P' && !more:
// ESC P (DCS) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
case p.State == parser.EscapeState && b == 'X' && !more:
// ESC X (SOS) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
case p.State == parser.EscapeState && b == '[' && !more:
// ESC [ (CSI) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
case p.State == parser.EscapeState && b == ']' && !more:
// ESC ] (OSC) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
case p.State == parser.EscapeState && b == '^' && !more:
// ESC ^ (PM) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
case p.State == parser.EscapeState && b == '_' && !more:
// ESC _ (APC) at the end of the buffer
p.performAction(d, parser.DispatchAction, b)
default:
p.performAction(d, action, b)
}
p.State = state
return action
}
func (p *Parser) performAction(dispatcher ParserDispatcher, action parser.Action, b byte) {
switch action {
case parser.IgnoreAction:
break
case parser.ClearAction:
p.clear()
case parser.PrintAction:
if utf8ByteLen(b) > 1 {
p.collectRune(b)
} else if dispatcher != nil {
dispatcher(Rune(b))
}
case parser.ExecuteAction:
if dispatcher != nil {
dispatcher(ControlCode(b))
}
case parser.MarkerAction:
// Collect private marker
// we only store the last marker
p.Cmd &^= 0xff << parser.MarkerShift
p.Cmd |= int(b) << parser.MarkerShift
case parser.CollectAction:
// Collect intermediate bytes
// we only store the last intermediate byte
p.Cmd &^= 0xff << parser.IntermedShift
p.Cmd |= int(b) << parser.IntermedShift
case parser.ParamAction:
// Collect parameters
if p.ParamsLen >= len(p.Params) {
break
}
if b >= '0' && b <= '9' {
if p.Params[p.ParamsLen] == parser.MissingParam {
p.Params[p.ParamsLen] = 0
}
p.Params[p.ParamsLen] *= 10
p.Params[p.ParamsLen] += int(b - '0')
}
if b == ':' {
p.Params[p.ParamsLen] |= parser.HasMoreFlag
}
if b == ';' || b == ':' {
p.ParamsLen++
if p.ParamsLen < len(p.Params) {
p.Params[p.ParamsLen] = parser.MissingParam
}
}
case parser.StartAction:
if p.DataLen < 0 {
p.Data = make([]byte, 0)
} else {
p.DataLen = 0
}
if p.State >= parser.DcsEntryState && p.State <= parser.DcsStringState {
// Collect the command byte for DCS
p.Cmd |= int(b)
} else {
p.Cmd = parser.MissingCommand
}
case parser.PutAction:
switch p.State {
case parser.OscStringState:
if b == ';' && p.Cmd == parser.MissingCommand {
// Try to parse the command
datalen := len(p.Data)
if p.DataLen >= 0 {
datalen = p.DataLen
}
for i := 0; i < datalen; i++ {
d := p.Data[i]
if d < '0' || d > '9' {
break
}
if p.Cmd == parser.MissingCommand {
p.Cmd = 0
}
p.Cmd *= 10
p.Cmd += int(d - '0')
}
}
}
if p.DataLen < 0 {
p.Data = append(p.Data, b)
} else {
if p.DataLen < len(p.Data) {
p.Data[p.DataLen] = b
p.DataLen++
}
}
case parser.DispatchAction:
// Increment the last parameter
if p.ParamsLen > 0 && p.ParamsLen < len(p.Params)-1 ||
p.ParamsLen == 0 && len(p.Params) > 0 && p.Params[0] != parser.MissingParam {
p.ParamsLen++
}
if dispatcher == nil {
break
}
var seq Sequence
data := p.Data
if p.DataLen >= 0 {
data = data[:p.DataLen]
}
switch p.State {
case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
p.Cmd |= int(b)
seq = CsiSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen]}
case parser.EscapeState, parser.EscapeIntermediateState:
p.Cmd |= int(b)
seq = EscSequence(p.Cmd)
case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
seq = DcsSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen], Data: data}
case parser.OscStringState:
seq = OscSequence{Cmd: p.Cmd, Data: data}
case parser.SosStringState:
seq = SosSequence{Data: data}
case parser.PmStringState:
seq = PmSequence{Data: data}
case parser.ApcStringState:
seq = ApcSequence{Data: data}
}
dispatcher(seq)
}
}
func utf8ByteLen(b byte) int {
if b <= 0b0111_1111 { // 0x00-0x7F
return 1
} else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
return 2
} else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
return 3
} else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
return 4
}
return -1
}