#Code Generated by Parser:12f331c7 Rule: 2025-01-05 11:46:40
def get_html(url):
import time
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto(url)
time.sleep(10)
page_source = page.content()
browser.close()
with open("debug.html", "w", encoding="utf-8") as file:
file.write(page_source)
return page_source
def extract_ListPrice(lxml_tree):
from lxml.cssselect import CSSSelector
from decimal import Decimal
import re
selector = CSSSelector('span.product-price__compare span.money')
result = selector(lxml_tree)
if result:
price_text = result[0].text_content()
cleaned_price = re.sub(r'[^\d.]', '', price_text)
return Decimal(cleaned_price)
def extract_SellPrice(lxml_tree):
from lxml.cssselect import CSSSelector
from decimal import Decimal
import re
selector = CSSSelector('span.product-price__reduced span.money, span.theme-money.large-title span.money')
result = selector(lxml_tree)
if result:
price_text = result[0].text_content()
cleaned_price = re.sub(r'[^\d.]', '', price_text)
normalized_price = re.sub(r'\.+', '.', cleaned_price)
return Decimal(normalized_price)
def extract_ProductName(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("div.product-detail__title-area h1.product-detail__title")
result = selector(lxml_tree)
return re.sub(r'\s+', ' ', result[0].text).strip() if result else None
def extract_TotalReview(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("div.loox-rating-label, li.cc-select__option span")
result = selector(lxml_tree)
return int(re.sub(r'\D', '', result[0].text).strip()) if result else None
def extract_ProductImage(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("div.product-media img.rimage__image")
result = selector(lxml_tree)
return result[0].get('src').strip() if result else None
if __name__ == '__main__':
import lxml.html
url='https://www.buykud.com/collections/buykud-best-sellers/products/5-pairs-cotton-women-casual-warm-socks'
html=get_html(url)
tree = lxml.html.fromstring(html)
result={}
result['ListPrice']=extract_ListPrice(tree)
result['SellPrice']=extract_SellPrice(tree)
result['ProductName']=extract_ProductName(tree)
result['TotalReview']=extract_TotalReview(tree)
result['ProductImage']=extract_ProductImage(tree)
print(result)