#Code Generated by Parser:e665430d Rule: 2025-01-11 03:13:12
def get_html(url):
import time
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto(url)
time.sleep(10)
page_source = page.content()
browser.close()
with open("debug.html", "w", encoding="utf-8") as file:
file.write(page_source)
return page_source
from lxml.cssselect import CSSSelector
from decimal import Decimal
import re
def extract_ListPrice(lxml_tree):
selector = CSSSelector(".nowrap.items-center.inline-flex span.w_iUH7")
elements = selector(lxml_tree)
if elements:
price_text = elements[0].text_content()
match = re.search(r"\$([\d.,]+)", price_text)
if match:
return Decimal(re.sub(r"[^\d.]", "", match.group(1)))
return None
def extract_SellPrice(lxml_tree):
selector = CSSSelector(".b.lh-copy.dark-gray.f1.mr2.green span.inline-flex.flex-column span, .b.lh-copy.dark-gray.f1.mr2 span.inline-flex.flex-column span")
elements = selector(lxml_tree)
if elements:
price_text = elements[0].text_content()
price_text = re.sub(r'\.\.', '.', price_text)
match = re.search(r"\$([\d.,]+)", price_text)
if match:
return Decimal(re.sub(r"[^\d.]", "", match.group(1)))
return None
def extract_SellPrice2(lxml_tree):
try:
selector = CSSSelector(".mr1.mr2-xl.b.black.green.lh-copy.f5.f4-l")
elements = selector(lxml_tree)
if elements:
price_text = elements[0].text_content()
price_text = re.sub(r'\.\.', '.', price_text)
match = re.findall(r"\$([\d.,]+)", price_text)
if match:
return Decimal(re.sub(r"[^\d.]", "", max(match, key=Decimal)))
except:
return extract_SellPrice(lxml_tree)
return extract_SellPrice(lxml_tree)
def extract_Discount(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector('span.w_iUH7 div')
result = selector(lxml_tree)
if result:
text = result[0].text_content()
match = re.search(r'\$([\d.,]+)', text)
if match:
return match.group(1)
return None
def extract_ProductName(lxml_tree):
from lxml.cssselect import CSSSelector
selector = CSSSelector('h1#main-title')
result = selector(lxml_tree)
if result:
return result[0].text_content().strip()
return None
def extract_Availability(lxml_tree):
from lxml.cssselect import CSSSelector
selector = CSSSelector('div.mt1 div.f7')
result = selector(lxml_tree)
return bool(result)
def extract_ProductImage(lxml_tree):
from lxml.cssselect import CSSSelector
selector = CSSSelector('img.db')
result = selector(lxml_tree)
if result:
return result[0].get('src')
return None
def extract_AverageReview(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector('div.gray span.f7')
result = selector(lxml_tree)
if result:
text = result[0].text_content()
match = re.search(r'\(([\d.]+)\)', text)
if match:
return float(match.group(1))
return None
if __name__ == '__main__':
import lxml.html
url='<URL>'
html=get_html(url)
tree = lxml.html.fromstring(html)
result={}
result['ListPrice']=extract_ListPrice(tree)
result['SellPrice']=extract_SellPrice(tree)
result['SellPrice2']=extract_SellPrice2(tree)
result['Discount']=extract_Discount(tree)
result['ProductName']=extract_ProductName(tree)
result['Availability']=extract_Availability(tree)
result['ProductImage']=extract_ProductImage(tree)
result['AverageReview']=extract_AverageReview(tree)
print(result)