diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index b6ec440..3eb3b88 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -88,9 +88,11 @@ func (s *scheduler) processInstance(ctx context.Context, si *scheduledInstance) log.Printf("Error retrieving instance %s from storage. Got: %v\n", si.instance.Name, err) continue } + failed := false for _, task := range s.tasks { err := task.Run(ctx, si.instance) if err != nil { + failed = true log.Printf("Error running task %s on instance %s. Got: %v\n", task.Name(), si.instance.Name, err) // Since one task failed, we just assume something might be wrong with the instance, so we don't try to process the rest of the tasks. si.fails++ @@ -98,10 +100,12 @@ func (s *scheduler) processInstance(ctx context.Context, si *scheduledInstance) log.Printf("Instance %s has failed to execute tasks too many times. Giving up.\n", si.instance.Name) return } - continue + break } } - si.fails = 0 + if !failed { + si.fails = 0 + } case <-ctx.Done(): log.Printf("Processing tasks for instance %s has been canceled.\n", si.instance.Name) return