diff --git a/AutoDL/audodl_sdk.py b/AutoDL/audodl_sdk.py new file mode 100644 index 0000000..64277c5 --- /dev/null +++ b/AutoDL/audodl_sdk.py @@ -0,0 +1,246 @@ +import time +from typing import Literal, Union, Any + +import logger +import loguru +import paramiko +import requests + +token = "eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1aWQiOjMzNjM5MSwidXVpZCI6ImU2MDU0ZGI4LTlhN2UtNDllNC1hNDQ1LWI4N2M1NGViMjU4ZCIsImlzX2FkbWluIjpmYWxzZSwiYmFja3N0YWdlX3JvbGUiOiIiLCJpc19zdXBlcl9hZG1pbiI6ZmFsc2UsInN1Yl9uYW1lIjoid3F5QGI4N2M1NGViMjU4ZCIsInRlbmFudCI6IiIsInVwayI6IiJ9.4MV4P1feiUmrrzFbtTQpNQjvYyezPdaLxRJ79y0VyRAxR0aS5NQJGJPxa-6wuqsgzY-E1rvf5S8FCY92ZnViFQ" +req_instance_page_size = 1500 +chk_instance_page_size = 1500 +instances = {} +LIM = 30 # 等待状态时间LIM*5s + +def ssh_try(host,port,pwd): + # 建立连接 + trans = paramiko.Transport((host, int(port))) + trans.connect(username="root", password=pwd) + + # 将sshclient的对象的transport指定为以上的trans + ssh = paramiko.SSHClient() + ssh._transport = trans + + # 剩下的就和上面一样了 + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command("/bin/bash -lc \"source /root/.bashrc && nohup python /root/AutoDL_pure_heygem.py > log.txt 2>&1 &\"") + + # 关闭连接 + trans.close() + +def get_autodl_machines() -> Union[list, None]: + machines = list() + headers = { + "Authorization": token, + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Host": "www.autodl.com" + } + index = 1 + payload = { + "charge_type":"payg", + "region_sign":"", + "gpu_type_name":["RTX 4090", "RTX 4090D", "RTX 3090", "RTX 3080", "RTX 3080x2", "RTX 3080 Ti", "RTX 3060", "RTX A4000", "RTX 2080 Ti", "RTX 2080 Ti x2", "GTX 1080 Ti"], + "machine_tag_name":[], + "gpu_idle_num":1, + "mount_net_disk":False, + "instance_disk_size_order":"", + "date_range":"", + "date_from":"", + "date_to":"", + "page_index":index, + "page_size":req_instance_page_size, + "pay_price_order":"", + "gpu_idle_type":"", + "default_order":False, + "region_sign_list":["nm-B1","nm-B2", "west-B", "west-C", "west-X", "bj-B1", "beijing-A", "beijing-B", "beijing-D", "beijing-E"], + "cpu_arch":["x86"], + "chip_corp":["nvidia"], + "machine_id":"" + } + loguru.logger.info("Req Machine index {}".format(index)) + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/user/machine/list", json=payload, headers=headers) + + if rsp.status_code == 200: + machine_list = rsp.json() + loguru.logger.info("Machine Result Total {}".format(machine_list["data"]["result_total"])) + while index < machine_list["data"]["max_page"]: + index += 1 + loguru.logger.info("Req Machine index {}/{}".format(index, machine_list["data"]["max_page"])) + payload["page_index"] = index + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/user/machine/list", json=payload, headers=headers) + if rsp.status_code == 200: + machine_list["data"]["list"].extend(rsp.json()["data"]["list"]) + else: + loguru.logger.error("Get Machines Req Error") + return None + else: + loguru.logger.error("Get Machines Req Error") + return None + i = 0 + for machine in machine_list["data"]["list"]: + if machine["health_status"] == 0 \ + and machine["gpu_order_num"] > 0 \ + and float(machine["highest_cuda_version"])>12. \ + and machine["payg"] == True \ + and machine["rent_mode"] == "" \ + and not machine["user_visible_limit"]: + i += 1 + machines.append({ + "machine_id": machine["machine_id"], + "region_name": machine["region_name"], + "machine_alias": machine["machine_alias"], + "gpu_name": machine["gpu_name"], + "gpu_order_num": machine["gpu_order_num"], + "gpu_number": machine["gpu_number"], + "region_sign": machine["region_sign"], + }) + return sorted(machines, key=lambda machine: machine["gpu_order_num"], reverse=True) + +def payg(region_name:str, machine_id:str) -> tuple[Any, Any] | None: + region_image = { + "西北": ["hub.kce.ksyun.com/autodl-image/miniconda:cuda11.8-cudnn8-devel-ubuntu20.04-py38","image-232ac04d3b"], + "内蒙": ["hub.kce.ksyun.com/autodl-image/miniconda:cuda11.8-cudnn8-devel-ubuntu20.04-py38","image-06814c02d1"], + "北京": ["hub.kce.ksyun.com/autodl-image/miniconda:cuda11.8-cudnn8-devel-ubuntu20.04-py38","image-e5334cc4f3"] + } + headers = { + "Authorization": token, + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Host": "www.autodl.com" + } + payload = { + "instance_info":{ + "machine_id":machine_id, + "charge_type":"payg", + "req_gpu_amount":1, + "image":region_image[region_name[:2]][0], + "private_image_uuid":region_image[region_name[:2]][1], + "reproduction_uuid":"", + "instance_name":"", + "expand_data_disk":0, + "reproduction_id":0 + }, + "price_info":{ + "coupon_id_list":[], + "machine_id":machine_id, + "charge_type":"payg", + "duration":1, + "num":1, + "expand_data_disk":0 + } + } + loguru.logger.info("Try Create Payg Container on Machine {}/{}".format(region_name, machine_id)) + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/order/instance/create/payg", json=payload, headers=headers) + + if rsp.status_code == 200: + j = rsp.json() + if j["code"] == "Success": + lim = LIM + while lim>0: + time.sleep(5) + status, host, port, pwd, domain = check_status(j['data']) + if status == "running": + ssh_try(host, port, pwd) + break + else: + lim = lim-1 + if lim > 0: + loguru.logger.success("Create Payg Instance Success: %s" % j['data']) + return j['data'],domain + else: + logger.logger.error("Create Payg Instance Error: Wait for Created Timeout, Please Check!!! instance_uuid(%s)" % j['data']) + return None + else: + loguru.logger.error("Create Payg Instance Error: %s" % j['msg']) + return None + else: + loguru.logger.error("Create Payg Instance Error: Status Code[%s]" % rsp.status_code) + return None + +def instance_operate(instance_uuid:str, operation: Literal["power_off","power_on","release"]) -> bool: + dest_dict={ + "power_off":"shutdown", + "power_on":"running", + } + headers = { + "Authorization": token, + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Host": "www.autodl.com" + } + payload = {"instance_uuid":instance_uuid} + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/instance/%s" % operation, json=payload, headers=headers) + if rsp.status_code == 200: + j = rsp.json() + if j["code"] == "Success": + lim = LIM + if operation in dest_dict.keys(): + while lim>0: + time.sleep(5) + status = check_status(instance_uuid)[0] + if status == dest_dict[operation]: + break + else: + lim = lim-1 + if lim > 0: + loguru.logger.success("Operate[%s] Instance Success" % operation) + return True + else: + loguru.logger.error("Operate[%s] Instance Error: Timeout, Please Check!!! instance_uuid(%s)" % (operation, instance_uuid)) + return False + else: + loguru.logger.error("Operate[%s] Instance Error: %s" % (operation, j['msg'])) + return False + else: + loguru.logger.error("Operate[%s] Instance Error: Status Code[%s]" % (operation, rsp.status_code)) + return False + +def check_status(instance_uuid:str) -> tuple[Any, Any, Any, Any, Any] | None: + headers = { + "Authorization": token, + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36", + "Host": "www.autodl.com" + } + index = 1 + payload = { + "date_from":"", + "date_to":"", + "page_index": index, + "page_size":chk_instance_page_size, + "status":[], + "charge_type":[] + } + # loguru.logger.info("Req Instance index {}".format(index)) + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/instance", json=payload, headers=headers) + if rsp.status_code == 200: + instance_list = rsp.json() + # loguru.logger.info("Instance Result Total {}".format(instance_list["data"]["result_total"])) + while index < instance_list["data"]["max_page"]: + # loguru.logger.info("Req Instance index {}/{}".format(index, instance_list["data"]["max_page"])) + payload["page_index"] = index + rsp = requests.post("https://www.autodl.com/api/v1/sub_user/instance", json=payload, headers=headers) + if rsp.status_code == 200: + instance_list["data"]["list"].extend(rsp.json()["data"]["list"]) + else: + loguru.logger.error("Get Instance Req Error") + return None + for l in instance_list["data"]["list"]: + if l["uuid"] == instance_uuid: + loguru.logger.info("Instance {} Status {}".format(instance_uuid, l["status"])) + return l["status"], l["proxy_host"], l["ssh_port"], l["root_password"], l["tensorboard_domain"] + loguru.logger.warning("Instance {} Not Found".format(instance_uuid)) + return None + else: + loguru.logger.error("Get Instance Req Error") + return None + + + + +if __name__=="__main__": + machines = get_autodl_machines() + for m in machines: + instance_uuid, domain = payg(m["region_name"], m["machine_id"]) + print(instance_uuid, "https://"+domain) + # if instance_uuid: + # instance_operate(instance_uuid, "power_off") + # instance_operate(instance_uuid, "release") + break \ No newline at end of file