abra/vendor/github.com/charmbracelet/x/ansi/parser.go

package ansi

import (
	"unicode/utf8"

	"github.com/charmbracelet/x/ansi/parser"
)

// ParserDispatcher is a function that dispatches a sequence.
type ParserDispatcher func(Sequence)

// Parser represents a DEC ANSI compatible sequence parser.
//
// It uses a state machine to parse ANSI escape sequences and control
// characters. The parser is designed to be used with a terminal emulator or
// similar application that needs to parse ANSI escape sequences and control
// characters.
// See package [parser] for more information.
//
//go:generate go run ./gen.go
type Parser struct {
	// Params contains the raw parameters of the sequence.
	// These parameters used when constructing CSI and DCS sequences.
	Params []int

	// Data contains the raw data of the sequence.
	// These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
	Data []byte

	// DataLen keeps track of the length of the data buffer.
	// If DataLen is -1, the data buffer is unlimited and will grow as needed.
	// Otherwise, DataLen is limited by the size of the Data buffer.
	DataLen int

	// ParamsLen keeps track of the number of parameters.
	// This is limited by the size of the Params buffer.
	ParamsLen int

	// Cmd contains the raw command along with the private marker and
	// intermediate bytes of the sequence.
	// The first lower byte contains the command byte, the next byte contains
	// the private marker, and the next byte contains the intermediate byte.
	Cmd int

	// RuneLen keeps track of the number of bytes collected for a UTF-8 rune.
	RuneLen int

	// RuneBuf contains the bytes collected for a UTF-8 rune.
	RuneBuf [utf8.MaxRune]byte

	// State is the current state of the parser.
	State byte
}

// NewParser returns a new parser with the given sizes allocated.
// If dataSize is zero, the underlying data buffer will be unlimited and will
// grow as needed.
func NewParser(paramsSize, dataSize int) *Parser {
	s := &Parser{
		Params: make([]int, paramsSize),
		Data:   make([]byte, dataSize),
	}
	if dataSize <= 0 {
		s.DataLen = -1
	}
	return s
}

// Reset resets the parser to its initial state.
func (p *Parser) Reset() {
	p.clear()
	p.State = parser.GroundState
}

// clear clears the parser parameters and command.
func (p *Parser) clear() {
	if len(p.Params) > 0 {
		p.Params[0] = parser.MissingParam
	}
	p.ParamsLen = 0
	p.Cmd = 0
	p.RuneLen = 0
}

// StateName returns the name of the current state.
func (p *Parser) StateName() string {
	return parser.StateNames[p.State]
}

// Parse parses the given dispatcher and byte buffer.
func (p *Parser) Parse(dispatcher ParserDispatcher, b []byte) {
	for i := 0; i < len(b); i++ {
		p.Advance(dispatcher, b[i], i < len(b)-1)
	}
}

// Advance advances the parser with the given dispatcher and byte.
func (p *Parser) Advance(dispatcher ParserDispatcher, b byte, more bool) parser.Action {
	switch p.State {
	case parser.Utf8State:
		// We handle UTF-8 here.
		return p.advanceUtf8(dispatcher, b)
	default:
		return p.advance(dispatcher, b, more)
	}
}

func (p *Parser) collectRune(b byte) {
	if p.RuneLen < utf8.UTFMax {
		p.RuneBuf[p.RuneLen] = b
		p.RuneLen++
	}
}

func (p *Parser) advanceUtf8(dispatcher ParserDispatcher, b byte) parser.Action {
	// Collect UTF-8 rune bytes.
	p.collectRune(b)
	rw := utf8ByteLen(p.RuneBuf[0])
	if rw == -1 {
		// We panic here because the first byte comes from the state machine,
		// if this panics, it means there is a bug in the state machine!
		panic("invalid rune") // unreachable
	}

	if p.RuneLen < rw {
		return parser.NoneAction
	}

	// We have enough bytes to decode the rune
	bts := p.RuneBuf[:rw]
	r, _ := utf8.DecodeRune(bts)
	if dispatcher != nil {
		dispatcher(Rune(r))
	}

	p.State = parser.GroundState
	p.RuneLen = 0

	return parser.NoneAction
}

func (p *Parser) advance(d ParserDispatcher, b byte, more bool) parser.Action {
	state, action := parser.Table.Transition(p.State, b)

	// We need to clear the parser state if the state changes from EscapeState.
	// This is because when we enter the EscapeState, we don't get a chance to
	// clear the parser state. For example, when a sequence terminates with a
	// ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
	// EscapeState. However, the parser state is not cleared in this case and
	// we need to clear it here before dispatching the esc sequence.
	if p.State != state {
		switch p.State {
		case parser.EscapeState:
			p.performAction(d, parser.ClearAction, b)
		}
		if action == parser.PutAction &&
			p.State == parser.DcsEntryState && state == parser.DcsStringState {
			// XXX: This is a special case where we need to start collecting
			// non-string parameterized data i.e. doesn't follow the ECMA-48 §
			// 5.4.1 string parameters format.
			p.performAction(d, parser.StartAction, 0)
		}
	}

	// Handle special cases
	switch {
	case b == ESC && p.State == parser.EscapeState:
		// Two ESCs in a row
		p.performAction(d, parser.ExecuteAction, b)
		if !more {
			// Two ESCs at the end of the buffer
			p.performAction(d, parser.ExecuteAction, b)
		}
	case b == ESC && !more:
		// Last byte is an ESC
		p.performAction(d, parser.ExecuteAction, b)
	case p.State == parser.EscapeState && b == 'P' && !more:
		// ESC P (DCS) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	case p.State == parser.EscapeState && b == 'X' && !more:
		// ESC X (SOS) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	case p.State == parser.EscapeState && b == '[' && !more:
		// ESC [ (CSI) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	case p.State == parser.EscapeState && b == ']' && !more:
		// ESC ] (OSC) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	case p.State == parser.EscapeState && b == '^' && !more:
		// ESC ^ (PM) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	case p.State == parser.EscapeState && b == '_' && !more:
		// ESC _ (APC) at the end of the buffer
		p.performAction(d, parser.DispatchAction, b)
	default:
		p.performAction(d, action, b)
	}

	p.State = state

	return action
}

func (p *Parser) performAction(dispatcher ParserDispatcher, action parser.Action, b byte) {
	switch action {
	case parser.IgnoreAction:
		break

	case parser.ClearAction:
		p.clear()

	case parser.PrintAction:
		if utf8ByteLen(b) > 1 {
			p.collectRune(b)
		} else if dispatcher != nil {
			dispatcher(Rune(b))
		}

	case parser.ExecuteAction:
		if dispatcher != nil {
			dispatcher(ControlCode(b))
		}

	case parser.MarkerAction:
		// Collect private marker
		// we only store the last marker
		p.Cmd &^= 0xff << parser.MarkerShift
		p.Cmd |= int(b) << parser.MarkerShift

	case parser.CollectAction:
		// Collect intermediate bytes
		// we only store the last intermediate byte
		p.Cmd &^= 0xff << parser.IntermedShift
		p.Cmd |= int(b) << parser.IntermedShift

	case parser.ParamAction:
		// Collect parameters
		if p.ParamsLen >= len(p.Params) {
			break
		}

		if b >= '0' && b <= '9' {
			if p.Params[p.ParamsLen] == parser.MissingParam {
				p.Params[p.ParamsLen] = 0
			}

			p.Params[p.ParamsLen] *= 10
			p.Params[p.ParamsLen] += int(b - '0')
		}

		if b == ':' {
			p.Params[p.ParamsLen] |= parser.HasMoreFlag
		}

		if b == ';' || b == ':' {
			p.ParamsLen++
			if p.ParamsLen < len(p.Params) {
				p.Params[p.ParamsLen] = parser.MissingParam
			}
		}

	case parser.StartAction:
		if p.DataLen < 0 {
			p.Data = make([]byte, 0)
		} else {
			p.DataLen = 0
		}
		if p.State >= parser.DcsEntryState && p.State <= parser.DcsStringState {
			// Collect the command byte for DCS
			p.Cmd |= int(b)
		} else {
			p.Cmd = parser.MissingCommand
		}

	case parser.PutAction:
		switch p.State {
		case parser.OscStringState:
			if b == ';' && p.Cmd == parser.MissingCommand {
				// Try to parse the command
				datalen := len(p.Data)
				if p.DataLen >= 0 {
					datalen = p.DataLen
				}
				for i := 0; i < datalen; i++ {
					d := p.Data[i]
					if d < '0' || d > '9' {
						break
					}
					if p.Cmd == parser.MissingCommand {
						p.Cmd = 0
					}
					p.Cmd *= 10
					p.Cmd += int(d - '0')
				}
			}
		}

		if p.DataLen < 0 {
			p.Data = append(p.Data, b)
		} else {
			if p.DataLen < len(p.Data) {
				p.Data[p.DataLen] = b
				p.DataLen++
			}
		}

	case parser.DispatchAction:
		// Increment the last parameter
		if p.ParamsLen > 0 && p.ParamsLen < len(p.Params)-1 ||
			p.ParamsLen == 0 && len(p.Params) > 0 && p.Params[0] != parser.MissingParam {
			p.ParamsLen++
		}

		if dispatcher == nil {
			break
		}

		var seq Sequence
		data := p.Data
		if p.DataLen >= 0 {
			data = data[:p.DataLen]
		}
		switch p.State {
		case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
			p.Cmd |= int(b)
			seq = CsiSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen]}
		case parser.EscapeState, parser.EscapeIntermediateState:
			p.Cmd |= int(b)
			seq = EscSequence(p.Cmd)
		case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
			seq = DcsSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen], Data: data}
		case parser.OscStringState:
			seq = OscSequence{Cmd: p.Cmd, Data: data}
		case parser.SosStringState:
			seq = SosSequence{Data: data}
		case parser.PmStringState:
			seq = PmSequence{Data: data}
		case parser.ApcStringState:
			seq = ApcSequence{Data: data}
		}

		dispatcher(seq)
	}
}

func utf8ByteLen(b byte) int {
	if b <= 0b0111_1111 { // 0x00-0x7F
		return 1
	} else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
		return 2
	} else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
		return 3
	} else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
		return 4
	}
	return -1
}