diff --git a/src/cluster/rpa.py b/src/cluster/rpa.py index 943968c..0a789d8 100644 --- a/src/cluster/rpa.py +++ b/src/cluster/rpa.py @@ -105,6 +105,9 @@ async def check_live_status(webcast_id: str, page: Optional[Page] = None) -> boo # 监听网络响应 async def handle_response(custom_param, response): + """ + 有部分主播会移除商品,搞一个全局记录,类似page的部分返回值,初始化使用这个 + """ if "live.douyin.com/live/promotions/page" in response.url: print(f"Custom param: {custom_param}") print(f"Response URL: {response.url}") @@ -160,9 +163,6 @@ async def get_promotion_list_text(page: Page, webcast_id: str, max_duration: int break last_status_check = time.time() - # page.on("response", handle_response) - page.on("response", partial(handle_response, webcast_id)) - # Refresh the page to ensure updated innerText await reload_page(page) @@ -186,8 +186,14 @@ async def get_promotion_list_text(page: Page, webcast_id: str, max_duration: int continue current_product_id, current_product_img, current_product_name, last_live_time, start_time = await do_ended_product( - current_product_id, current_product_img, current_product_name, duration, last_live_time, - product_sessions, start_time) + current_product_id, + current_product_img, + current_product_name, + duration, + last_live_time, + product_sessions, + start_time, + page) await asyncio.sleep(3) # Poll every 3 seconds # except Exception as e: @@ -199,14 +205,25 @@ async def get_promotion_list_text(page: Page, webcast_id: str, max_duration: int # Finalize ongoing session if valid if start_time is not None and last_live_time is not None and current_product_name and current_product_img: - await do_finalized_product(current_product_id, current_product_img, current_product_name, last_live_time, - product_sessions, start_time) + await do_finalized_product( + current_product_id, + current_product_img, + current_product_name, + last_live_time, + product_sessions, + start_time, + page) return product_sessions -async def do_finalized_product(current_product_id, current_product_img, current_product_name, last_live_time, - product_sessions, start_time): +async def do_finalized_product(current_product_id:str, + current_product_img:str, + current_product_name:str, + last_live_time:int, + product_sessions:list[ProductSession], + start_time:int, + page:Page): duration = last_live_time - start_time if duration > 0: product_sessions.append(ProductSession( @@ -217,6 +234,8 @@ async def do_finalized_product(current_product_id, current_product_img, current_ product_id=current_product_id, product_img=current_product_img )) + # page.on("response", handle_response) + page.on("response", partial(handle_response, current_product_name)) logger.info( f"****** Finalized product session - Start: {start_time}, End: {last_live_time}, Duration: {duration}ms, Product: {current_product_name}") @@ -255,8 +274,14 @@ async def get_product_data(page, product_name_js, promotion_list_js): return product_id, product_img, product_name, text -async def do_ended_product(current_product_id, current_product_img, current_product_name, duration, last_live_time, - product_sessions, start_time): +async def do_ended_product(current_product_id:str, + current_product_img:str, + current_product_name:str, + duration:int, + last_live_time:int, + product_sessions:list[ProductSession], + start_time:int, + page:Page): product_sessions.append(ProductSession( start_time=start_time, end_time=last_live_time, @@ -265,6 +290,9 @@ async def do_ended_product(current_product_id, current_product_img, current_prod product_id=current_product_id, product_img=current_product_img )) + # page.on("response", handle_response) + page.on("response", partial(handle_response, current_product_name)) + logger.info( f"****** Ended product session - Start: {start_time}, End: {last_live_time}, Duration: {duration}ms, Product: {current_product_name}, ID: {current_product_id}") start_time = None @@ -406,13 +434,13 @@ with rpa_image.imports(): live_status = None logger.debug(f"Getting live status for webcast_id: {webcast_id}") try: - await page.wait_for_selector("[class='pip-anchor']", timeout=10 * 000) + await page.wait_for_selector("[class='pip-anchor']", timeout=3 * 000) await page.wait_for_function( """() => { const element = document.querySelector('[class="pip-anchor"]'); return element && element.innerText !== ""; }""", - timeout=10 * 000 + timeout=3 * 000 ) live_status_js = """document.querySelector('[class="pip-anchor"]')?.innerText || ''""" live_status_text: str = await page.evaluate(live_status_js) @@ -423,6 +451,7 @@ with rpa_image.imports(): except BaseException as e: logger.error(f"Failed to save JSON file: {str(e)}") + logger.info(f"Live status: {live_status}") return live_status