abra/vendor/github.com/charmbracelet/x/ansi/parser.go

package ansi

import (
	"unicode/utf8"
	"unsafe"

	"github.com/charmbracelet/x/ansi/parser"
)

// ParserDispatcher is a function that dispatches a sequence.
type ParserDispatcher func(Sequence)

// Parser represents a DEC ANSI compatible sequence parser.
//
// It uses a state machine to parse ANSI escape sequences and control
// characters. The parser is designed to be used with a terminal emulator or
// similar application that needs to parse ANSI escape sequences and control
// characters.
// See package [parser] for more information.
//
//go:generate go run ./gen.go
type Parser struct {
	// the dispatch function to call when a sequence is complete
	dispatcher ParserDispatcher

	// params contains the raw parameters of the sequence.
	// These parameters used when constructing CSI and DCS sequences.
	params []int

	// data contains the raw data of the sequence.
	// These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
	data []byte

	// dataLen keeps track of the length of the data buffer.
	// If dataLen is -1, the data buffer is unlimited and will grow as needed.
	// Otherwise, dataLen is limited by the size of the data buffer.
	dataLen int

	// paramsLen keeps track of the number of parameters.
	// This is limited by the size of the params buffer.
	//
	// This is also used when collecting UTF-8 runes to keep track of the
	// number of rune bytes collected.
	paramsLen int

	// cmd contains the raw command along with the private marker and
	// intermediate bytes of the sequence.
	// The first lower byte contains the command byte, the next byte contains
	// the private marker, and the next byte contains the intermediate byte.
	//
	// This is also used when collecting UTF-8 runes treating it as a slice of
	// 4 bytes.
	cmd int

	// state is the current state of the parser.
	state byte
}

// NewParser returns a new parser with an optional [ParserDispatcher].
// The [Parser] uses a default size of 32 for the parameters and 64KB for the
// data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
// size of the parameters and data buffer respectively.
func NewParser(d ParserDispatcher) *Parser {
	p := new(Parser)
	p.SetDispatcher(d)
	p.SetParamsSize(parser.MaxParamsSize)
	p.SetDataSize(1024 * 64) // 64KB data buffer
	return p
}

// SetDispatcher sets the dispatcher function to call when a sequence is
// complete.
func (p *Parser) SetDispatcher(d ParserDispatcher) {
	p.dispatcher = d
}

// SetParamsSize sets the size of the parameters buffer.
// This is used when constructing CSI and DCS sequences.
func (p *Parser) SetParamsSize(size int) {
	p.params = make([]int, size)
}

// SetDataSize sets the size of the data buffer.
// This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
// If size is less than or equal to 0, the data buffer is unlimited and will
// grow as needed.
func (p *Parser) SetDataSize(size int) {
	if size <= 0 {
		size = 0
		p.dataLen = -1
	}
	p.data = make([]byte, size)
}

// Params returns the list of parsed packed parameters.
func (p *Parser) Params() []Parameter {
	return unsafe.Slice((*Parameter)(unsafe.Pointer(&p.params[0])), p.paramsLen)
}

// Param returns the parameter at the given index and falls back to the default
// value if the parameter is missing. If the index is out of bounds, it returns
// the default value and false.
func (p *Parser) Param(i, def int) (int, bool) {
	if i < 0 || i >= p.paramsLen {
		return def, false
	}
	return Parameter(p.params[i]).Param(def), true
}

// Cmd returns the packed command of the last dispatched sequence.
func (p *Parser) Cmd() Command {
	return Command(p.cmd)
}

// Rune returns the last dispatched sequence as a rune.
func (p *Parser) Rune() rune {
	rw := utf8ByteLen(byte(p.cmd & 0xff))
	if rw == -1 {
		return utf8.RuneError
	}
	r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
	return r
}

// Data returns the raw data of the last dispatched sequence.
func (p *Parser) Data() []byte {
	return p.data[:p.dataLen]
}

// Reset resets the parser to its initial state.
func (p *Parser) Reset() {
	p.clear()
	p.state = parser.GroundState
}

// clear clears the parser parameters and command.
func (p *Parser) clear() {
	if len(p.params) > 0 {
		p.params[0] = parser.MissingParam
	}
	p.paramsLen = 0
	p.cmd = 0
}

// State returns the current state of the parser.
func (p *Parser) State() parser.State {
	return p.state
}

// StateName returns the name of the current state.
func (p *Parser) StateName() string {
	return parser.StateNames[p.state]
}

// Parse parses the given dispatcher and byte buffer.
// Deprecated: Loop over the buffer and call [Parser.Advance] instead.
func (p *Parser) Parse(b []byte) {
	for i := 0; i < len(b); i++ {
		p.Advance(b[i])
	}
}

// Advance advances the parser using the given byte. It	returns the action
// performed by the parser.
func (p *Parser) Advance(b byte) parser.Action {
	switch p.state {
	case parser.Utf8State:
		// We handle UTF-8 here.
		return p.advanceUtf8(b)
	default:
		return p.advance(b)
	}
}

func (p *Parser) collectRune(b byte) {
	if p.paramsLen >= utf8.UTFMax {
		return
	}

	shift := p.paramsLen * 8
	p.cmd &^= 0xff << shift
	p.cmd |= int(b) << shift
	p.paramsLen++
}

func (p *Parser) dispatch(s Sequence) {
	if p.dispatcher != nil {
		p.dispatcher(s)
	}
}

func (p *Parser) advanceUtf8(b byte) parser.Action {
	// Collect UTF-8 rune bytes.
	p.collectRune(b)
	rw := utf8ByteLen(byte(p.cmd & 0xff))
	if rw == -1 {
		// We panic here because the first byte comes from the state machine,
		// if this panics, it means there is a bug in the state machine!
		panic("invalid rune") // unreachable
	}

	if p.paramsLen < rw {
		return parser.CollectAction
	}

	// We have enough bytes to decode the rune using unsafe
	p.dispatch(Rune(p.Rune()))

	p.state = parser.GroundState
	p.paramsLen = 0

	return parser.PrintAction
}

func (p *Parser) advance(b byte) parser.Action {
	state, action := parser.Table.Transition(p.state, b)

	// We need to clear the parser state if the state changes from EscapeState.
	// This is because when we enter the EscapeState, we don't get a chance to
	// clear the parser state. For example, when a sequence terminates with a
	// ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
	// EscapeState. However, the parser state is not cleared in this case and
	// we need to clear it here before dispatching the esc sequence.
	if p.state != state {
		if p.state == parser.EscapeState {
			p.performAction(parser.ClearAction, state, b)
		}
		if action == parser.PutAction &&
			p.state == parser.DcsEntryState && state == parser.DcsStringState {
			// XXX: This is a special case where we need to start collecting
			// non-string parameterized data i.e. doesn't follow the ECMA-48 §
			// 5.4.1 string parameters format.
			p.performAction(parser.StartAction, state, 0)
		}
	}

	// Handle special cases
	switch {
	case b == ESC && p.state == parser.EscapeState:
		// Two ESCs in a row
		p.performAction(parser.ExecuteAction, state, b)
	default:
		p.performAction(action, state, b)
	}

	p.state = state

	return action
}

func (p *Parser) parseStringCmd() {
	// Try to parse the command
	datalen := len(p.data)
	if p.dataLen >= 0 {
		datalen = p.dataLen
	}
	for i := 0; i < datalen; i++ {
		d := p.data[i]
		if d < '0' || d > '9' {
			break
		}
		if p.cmd == parser.MissingCommand {
			p.cmd = 0
		}
		p.cmd *= 10
		p.cmd += int(d - '0')
	}
}

func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
	switch action {
	case parser.IgnoreAction:
		break

	case parser.ClearAction:
		p.clear()

	case parser.PrintAction:
		p.dispatch(Rune(b))

	case parser.ExecuteAction:
		p.dispatch(ControlCode(b))

	case parser.MarkerAction:
		// Collect private marker
		// we only store the last marker
		p.cmd &^= 0xff << parser.MarkerShift
		p.cmd |= int(b) << parser.MarkerShift

	case parser.CollectAction:
		if state == parser.Utf8State {
			// Reset the UTF-8 counter
			p.paramsLen = 0
			p.collectRune(b)
		} else {
			// Collect intermediate bytes
			// we only store the last intermediate byte
			p.cmd &^= 0xff << parser.IntermedShift
			p.cmd |= int(b) << parser.IntermedShift
		}

	case parser.ParamAction:
		// Collect parameters
		if p.paramsLen >= len(p.params) {
			break
		}

		if b >= '0' && b <= '9' {
			if p.params[p.paramsLen] == parser.MissingParam {
				p.params[p.paramsLen] = 0
			}

			p.params[p.paramsLen] *= 10
			p.params[p.paramsLen] += int(b - '0')
		}

		if b == ':' {
			p.params[p.paramsLen] |= parser.HasMoreFlag
		}

		if b == ';' || b == ':' {
			p.paramsLen++
			if p.paramsLen < len(p.params) {
				p.params[p.paramsLen] = parser.MissingParam
			}
		}

	case parser.StartAction:
		if p.dataLen < 0 && p.data != nil {
			p.data = p.data[:0]
		} else {
			p.dataLen = 0
		}
		if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
			// Collect the command byte for DCS
			p.cmd |= int(b)
		} else {
			p.cmd = parser.MissingCommand
		}

	case parser.PutAction:
		switch p.state {
		case parser.OscStringState:
			if b == ';' && p.cmd == parser.MissingCommand {
				p.parseStringCmd()
			}
		}

		if p.dataLen < 0 {
			p.data = append(p.data, b)
		} else {
			if p.dataLen < len(p.data) {
				p.data[p.dataLen] = b
				p.dataLen++
			}
		}

	case parser.DispatchAction:
		// Increment the last parameter
		if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
			p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
			p.paramsLen++
		}

		if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
			// Ensure we have a command for OSC
			p.parseStringCmd()
		}

		if p.dispatcher == nil {
			break
		}

		var seq Sequence
		data := p.data
		if p.dataLen >= 0 {
			data = data[:p.dataLen]
		}
		switch p.state {
		case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
			p.cmd |= int(b)
			seq = CsiSequence{Cmd: Command(p.cmd), Params: p.Params()}
		case parser.EscapeState, parser.EscapeIntermediateState:
			p.cmd |= int(b)
			seq = EscSequence(p.cmd)
		case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
			seq = DcsSequence{Cmd: Command(p.cmd), Params: p.Params(), Data: data}
		case parser.OscStringState:
			seq = OscSequence{Cmd: p.cmd, Data: data}
		case parser.SosStringState:
			seq = SosSequence{Data: data}
		case parser.PmStringState:
			seq = PmSequence{Data: data}
		case parser.ApcStringState:
			seq = ApcSequence{Data: data}
		}

		p.dispatch(seq)
	}
}

func utf8ByteLen(b byte) int {
	if b <= 0b0111_1111 { // 0x00-0x7F
		return 1
	} else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
		return 2
	} else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
		return 3
	} else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
		return 4
	}
	return -1
}