package ansi import ( "unicode/utf8" "unsafe" "github.com/charmbracelet/x/ansi/parser" ) // Parser represents a DEC ANSI compatible sequence parser. // // It uses a state machine to parse ANSI escape sequences and control // characters. The parser is designed to be used with a terminal emulator or // similar application that needs to parse ANSI escape sequences and control // characters. // See package [parser] for more information. // //go:generate go run ./gen.go type Parser struct { handler Handler // params contains the raw parameters of the sequence. // These parameters used when constructing CSI and DCS sequences. params []int // data contains the raw data of the sequence. // These data used when constructing OSC, DCS, SOS, PM, and APC sequences. data []byte // dataLen keeps track of the length of the data buffer. // If dataLen is -1, the data buffer is unlimited and will grow as needed. // Otherwise, dataLen is limited by the size of the data buffer. dataLen int // paramsLen keeps track of the number of parameters. // This is limited by the size of the params buffer. // // This is also used when collecting UTF-8 runes to keep track of the // number of rune bytes collected. paramsLen int // cmd contains the raw command along with the private prefix and // intermediate bytes of the sequence. // The first lower byte contains the command byte, the next byte contains // the private prefix, and the next byte contains the intermediate byte. // // This is also used when collecting UTF-8 runes treating it as a slice of // 4 bytes. cmd int // state is the current state of the parser. state byte } // NewParser returns a new parser with the default settings. // The [Parser] uses a default size of 32 for the parameters and 64KB for the // data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the // size of the parameters and data buffer respectively. func NewParser() *Parser { p := new(Parser) p.SetParamsSize(parser.MaxParamsSize) p.SetDataSize(1024 * 64) // 64KB data buffer return p } // SetParamsSize sets the size of the parameters buffer. // This is used when constructing CSI and DCS sequences. func (p *Parser) SetParamsSize(size int) { p.params = make([]int, size) } // SetDataSize sets the size of the data buffer. // This is used when constructing OSC, DCS, SOS, PM, and APC sequences. // If size is less than or equal to 0, the data buffer is unlimited and will // grow as needed. func (p *Parser) SetDataSize(size int) { if size <= 0 { size = 0 p.dataLen = -1 } p.data = make([]byte, size) } // Params returns the list of parsed packed parameters. func (p *Parser) Params() Params { return unsafe.Slice((*Param)(unsafe.Pointer(&p.params[0])), p.paramsLen) } // Param returns the parameter at the given index and falls back to the default // value if the parameter is missing. If the index is out of bounds, it returns // the default value and false. func (p *Parser) Param(i, def int) (int, bool) { if i < 0 || i >= p.paramsLen { return def, false } return Param(p.params[i]).Param(def), true } // Command returns the packed command of the last dispatched sequence. Use // [Cmd] to unpack the command. func (p *Parser) Command() int { return p.cmd } // Rune returns the last dispatched sequence as a rune. func (p *Parser) Rune() rune { rw := utf8ByteLen(byte(p.cmd & 0xff)) if rw == -1 { return utf8.RuneError } r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw]) return r } // Control returns the last dispatched sequence as a control code. func (p *Parser) Control() byte { return byte(p.cmd & 0xff) } // Data returns the raw data of the last dispatched sequence. func (p *Parser) Data() []byte { return p.data[:p.dataLen] } // Reset resets the parser to its initial state. func (p *Parser) Reset() { p.clear() p.state = parser.GroundState } // clear clears the parser parameters and command. func (p *Parser) clear() { if len(p.params) > 0 { p.params[0] = parser.MissingParam } p.paramsLen = 0 p.cmd = 0 } // State returns the current state of the parser. func (p *Parser) State() parser.State { return p.state } // StateName returns the name of the current state. func (p *Parser) StateName() string { return parser.StateNames[p.state] } // Parse parses the given dispatcher and byte buffer. // Deprecated: Loop over the buffer and call [Parser.Advance] instead. func (p *Parser) Parse(b []byte) { for i := 0; i < len(b); i++ { p.Advance(b[i]) } } // Advance advances the parser using the given byte. It returns the action // performed by the parser. func (p *Parser) Advance(b byte) parser.Action { switch p.state { case parser.Utf8State: // We handle UTF-8 here. return p.advanceUtf8(b) default: return p.advance(b) } } func (p *Parser) collectRune(b byte) { if p.paramsLen >= utf8.UTFMax { return } shift := p.paramsLen * 8 p.cmd &^= 0xff << shift p.cmd |= int(b) << shift p.paramsLen++ } func (p *Parser) advanceUtf8(b byte) parser.Action { // Collect UTF-8 rune bytes. p.collectRune(b) rw := utf8ByteLen(byte(p.cmd & 0xff)) if rw == -1 { // We panic here because the first byte comes from the state machine, // if this panics, it means there is a bug in the state machine! panic("invalid rune") // unreachable } if p.paramsLen < rw { return parser.CollectAction } // We have enough bytes to decode the rune using unsafe if p.handler.Print != nil { p.handler.Print(p.Rune()) } p.state = parser.GroundState p.paramsLen = 0 return parser.PrintAction } func (p *Parser) advance(b byte) parser.Action { state, action := parser.Table.Transition(p.state, b) // We need to clear the parser state if the state changes from EscapeState. // This is because when we enter the EscapeState, we don't get a chance to // clear the parser state. For example, when a sequence terminates with a // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to // EscapeState. However, the parser state is not cleared in this case and // we need to clear it here before dispatching the esc sequence. if p.state != state { if p.state == parser.EscapeState { p.performAction(parser.ClearAction, state, b) } if action == parser.PutAction && p.state == parser.DcsEntryState && state == parser.DcsStringState { // XXX: This is a special case where we need to start collecting // non-string parameterized data i.e. doesn't follow the ECMA-48 ยง // 5.4.1 string parameters format. p.performAction(parser.StartAction, state, 0) } } // Handle special cases switch { case b == ESC && p.state == parser.EscapeState: // Two ESCs in a row p.performAction(parser.ExecuteAction, state, b) default: p.performAction(action, state, b) } p.state = state return action } func (p *Parser) parseStringCmd() { // Try to parse the command datalen := len(p.data) if p.dataLen >= 0 { datalen = p.dataLen } for i := 0; i < datalen; i++ { d := p.data[i] if d < '0' || d > '9' { break } if p.cmd == parser.MissingCommand { p.cmd = 0 } p.cmd *= 10 p.cmd += int(d - '0') } } func (p *Parser) performAction(action parser.Action, state parser.State, b byte) { switch action { case parser.IgnoreAction: break case parser.ClearAction: p.clear() case parser.PrintAction: p.cmd = int(b) if p.handler.Print != nil { p.handler.Print(rune(b)) } case parser.ExecuteAction: p.cmd = int(b) if p.handler.Execute != nil { p.handler.Execute(b) } case parser.PrefixAction: // Collect private prefix // we only store the last prefix p.cmd &^= 0xff << parser.PrefixShift p.cmd |= int(b) << parser.PrefixShift case parser.CollectAction: if state == parser.Utf8State { // Reset the UTF-8 counter p.paramsLen = 0 p.collectRune(b) } else { // Collect intermediate bytes // we only store the last intermediate byte p.cmd &^= 0xff << parser.IntermedShift p.cmd |= int(b) << parser.IntermedShift } case parser.ParamAction: // Collect parameters if p.paramsLen >= len(p.params) { break } if b >= '0' && b <= '9' { if p.params[p.paramsLen] == parser.MissingParam { p.params[p.paramsLen] = 0 } p.params[p.paramsLen] *= 10 p.params[p.paramsLen] += int(b - '0') } if b == ':' { p.params[p.paramsLen] |= parser.HasMoreFlag } if b == ';' || b == ':' { p.paramsLen++ if p.paramsLen < len(p.params) { p.params[p.paramsLen] = parser.MissingParam } } case parser.StartAction: if p.dataLen < 0 && p.data != nil { p.data = p.data[:0] } else { p.dataLen = 0 } if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState { // Collect the command byte for DCS p.cmd |= int(b) } else { p.cmd = parser.MissingCommand } case parser.PutAction: switch p.state { case parser.OscStringState: if b == ';' && p.cmd == parser.MissingCommand { p.parseStringCmd() } } if p.dataLen < 0 { p.data = append(p.data, b) } else { if p.dataLen < len(p.data) { p.data[p.dataLen] = b p.dataLen++ } } case parser.DispatchAction: // Increment the last parameter if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 || p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam { p.paramsLen++ } if p.state == parser.OscStringState && p.cmd == parser.MissingCommand { // Ensure we have a command for OSC p.parseStringCmd() } data := p.data if p.dataLen >= 0 { data = data[:p.dataLen] } switch p.state { case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState: p.cmd |= int(b) if p.handler.HandleCsi != nil { p.handler.HandleCsi(Cmd(p.cmd), p.Params()) } case parser.EscapeState, parser.EscapeIntermediateState: p.cmd |= int(b) if p.handler.HandleEsc != nil { p.handler.HandleEsc(Cmd(p.cmd)) } case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState: if p.handler.HandleDcs != nil { p.handler.HandleDcs(Cmd(p.cmd), p.Params(), data) } case parser.OscStringState: if p.handler.HandleOsc != nil { p.handler.HandleOsc(p.cmd, data) } case parser.SosStringState: if p.handler.HandleSos != nil { p.handler.HandleSos(data) } case parser.PmStringState: if p.handler.HandlePm != nil { p.handler.HandlePm(data) } case parser.ApcStringState: if p.handler.HandleApc != nil { p.handler.HandleApc(data) } } } } func utf8ByteLen(b byte) int { if b <= 0b0111_1111 { // 0x00-0x7F return 1 } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF return 2 } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF return 3 } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7 return 4 } return -1 }