FIX AutoDL修复exception导致worker停止问题

This commit is contained in:
kyj@bowong.ai 2025-04-17 15:40:28 +08:00
parent 1dfab45d83
commit d0cc6f27b5
1 changed files with 9 additions and 6 deletions

View File

@ -61,13 +61,16 @@ class Server:
def introspect_instance(self):
loguru.logger.info("Introspecting worker started || Scaledown Window: %ds" % self.instance_pool.scaledown_window)
while True:
self.instance_pool.introspection()
time.sleep(1)
try:
self.instance_pool.introspection()
time.sleep(1)
except:
traceback.print_exc()
def scaling_worker(self):
loguru.logger.info("Scaling worker started")
try:
while True:
while True:
try:
# 提交任务
self.instance_pool.scale_instance(self.waiting_queue.get_size()+self.running_pool.get_running_size(), disable_shrink=True)
for instance in self.instance_pool.instances:
@ -104,8 +107,8 @@ class Server:
instance.active = False
instance.last_active_time = time.time()
time.sleep(0.5)
except:
traceback.print_exc()
except:
traceback.print_exc()
if __name__=="__main__":