Merge pull request #25383 from tonistiigi/fix-restart-err
Fix possible race on container restart and restart error handling Upstream-commit: c1e019585773f92b3300070283701811980a64c7 Component: engine
This commit is contained in:
@ -144,6 +144,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
||||
defer ctr.client.unlock(ctr.containerID)
|
||||
switch e.Type {
|
||||
case StateExit, StatePause, StateResume, StateOOM:
|
||||
var waitRestart chan error
|
||||
st := StateInfo{
|
||||
CommonStateInfo: CommonStateInfo{
|
||||
State: e.Type,
|
||||
@ -166,26 +167,7 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
||||
st.State = StateRestart
|
||||
ctr.restarting = true
|
||||
ctr.client.deleteContainer(e.Id)
|
||||
go func() {
|
||||
err := <-wait
|
||||
ctr.client.lock(ctr.containerID)
|
||||
defer ctr.client.unlock(ctr.containerID)
|
||||
ctr.restarting = false
|
||||
if err != nil {
|
||||
st.State = StateExit
|
||||
ctr.clean()
|
||||
ctr.client.q.append(e.Id, func() {
|
||||
if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
})
|
||||
if err != restartmanager.ErrRestartCanceled {
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
} else {
|
||||
ctr.start()
|
||||
}
|
||||
}()
|
||||
waitRestart = wait
|
||||
}
|
||||
}
|
||||
|
||||
@ -202,6 +184,32 @@ func (ctr *container) handleEvent(e *containerd.Event) error {
|
||||
if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
|
||||
logrus.Errorf("libcontainerd: backend.StateChanged(): %v", err)
|
||||
}
|
||||
if st.State == StateRestart {
|
||||
go func() {
|
||||
err := <-waitRestart
|
||||
ctr.client.lock(ctr.containerID)
|
||||
defer ctr.client.unlock(ctr.containerID)
|
||||
ctr.restarting = false
|
||||
if err == nil {
|
||||
if err = ctr.start(); err != nil {
|
||||
logrus.Errorf("libcontainerd: error restarting %v", err)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
st.State = StateExit
|
||||
ctr.clean()
|
||||
ctr.client.q.append(e.Id, func() {
|
||||
if err := ctr.client.backend.StateChanged(e.Id, st); err != nil {
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
})
|
||||
if err != restartmanager.ErrRestartCanceled {
|
||||
logrus.Errorf("libcontainerd: %v", err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
if e.Type == StatePause || e.Type == StateResume {
|
||||
ctr.pauseMonitor.handle(e.Type)
|
||||
}
|
||||
|
||||
@ -195,6 +195,7 @@ func (ctr *container) waitProcessExitCode(process *process) int {
|
||||
// equivalent to (in the linux containerd world) where events come in for
|
||||
// state change notifications from containerd.
|
||||
func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) error {
|
||||
var waitRestart chan error
|
||||
logrus.Debugln("libcontainerd: waitExit() on pid", process.systemPid)
|
||||
|
||||
exitCode := ctr.waitProcessExitCode(process)
|
||||
@ -238,20 +239,7 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
|
||||
} else if restart {
|
||||
si.State = StateRestart
|
||||
ctr.restarting = true
|
||||
go func() {
|
||||
err := <-wait
|
||||
ctr.restarting = false
|
||||
ctr.client.deleteContainer(ctr.friendlyName)
|
||||
if err != nil {
|
||||
si.State = StateExit
|
||||
if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
logrus.Error(err)
|
||||
} else {
|
||||
ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...)
|
||||
}
|
||||
}()
|
||||
waitRestart = wait
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,6 +255,24 @@ func (ctr *container) waitExit(process *process, isFirstProcessToStart bool) err
|
||||
if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
if si.State == StateRestart {
|
||||
go func() {
|
||||
err := <-waitRestart
|
||||
ctr.restarting = false
|
||||
ctr.client.deleteContainer(ctr.friendlyName)
|
||||
if err == nil {
|
||||
if err = ctr.client.Create(ctr.containerID, ctr.ociSpec, ctr.options...); err != nil {
|
||||
logrus.Errorf("libcontainerd: error restarting %v", err)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
si.State = StateExit
|
||||
if err := ctr.client.backend.StateChanged(ctr.containerID, si); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
logrus.Debugf("libcontainerd: waitExit() completed OK, %+v", si)
|
||||
return nil
|
||||
|
||||
Reference in New Issue
Block a user