#Code Generated by Parser:c2b9311d Rule: 2025-01-11 03:13:12
def get_html(url):
import time
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(headless=False)
page = browser.new_page()
page.goto(url)
time.sleep(10)
page_source = page.content()
browser.close()
with open("debug.html", "w", encoding="utf-8") as file:
file.write(page_source)
return page_source
from decimal import Decimal
import re
from lxml.cssselect import CSSSelector
def extract_SellPrice(lxml_tree):
selector = CSSSelector('#corePriceDisplay_desktop_feature_div .a-price .a-price-whole')
price_whole = ''.join(selector(lxml_tree)[0].text_content().strip())
selector = CSSSelector('#corePriceDisplay_desktop_feature_div .a-price .a-price-fraction')
price_fraction = ''.join(selector(lxml_tree)[0].text_content().strip())
price = price_whole + '.' + price_fraction
price_cleaned = re.sub(r'\.\.+', '.', price)
return Decimal(price_cleaned)
def extract_Location(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("span#glow-ingress-line2")
elements = selector(lxml_tree)
return re.sub(r'\s+', ' ', elements[0].text_content()).strip() if elements else None
def extract_ProductName(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("span#productTitle")
elements = selector(lxml_tree)
return re.sub(r'\s+', ' ', elements[0].text_content()).strip() if elements else None
def extract_TotalReview(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("span#acrCustomerReviewText")
elements = selector(lxml_tree)
if elements:
match = re.search(r'\d+', elements[0].text_content())
return int(match.group(0)) if match else None
return None
def extract_Availability(lxml_tree):
from lxml.cssselect import CSSSelector
selector = CSSSelector("div#availability span.a-size-medium.a-color-success")
elements = selector(lxml_tree)
return bool(elements)
def extract_ProductImage(lxml_tree):
from lxml.cssselect import CSSSelector
selector = CSSSelector("img#landingImage")
elements = selector(lxml_tree)
return elements[0].get("src") if elements else None
def extract_AverageReview(lxml_tree):
from lxml.cssselect import CSSSelector
import re
selector = CSSSelector("span#acrPopover span.a-size-base.a-color-base")
elements = selector(lxml_tree)
if elements:
match = re.search(r'\d+(\.\d+)?', elements[0].text_content())
return float(match.group(0)) if match else None
return None
if __name__ == '__main__':
import lxml.html
url='<URL>'
html=get_html(url)
tree = lxml.html.fromstring(html)
result={}
result['SellPrice']=extract_SellPrice(tree)
result['Location']=extract_Location(tree)
result['ProductName']=extract_ProductName(tree)
result['TotalReview']=extract_TotalReview(tree)
result['Availability']=extract_Availability(tree)
result['ProductImage']=extract_ProductImage(tree)
result['AverageReview']=extract_AverageReview(tree)
print(result)