AI Agent for Generating Web Scraper Parsing Code.

Parser No	cadc0c30	Created By:	coparser@2025-01-04 15:11:46	Status:	code_gen
Prompt	Create python to extract product detail
Description	www.walmart.com Plus product Chevron Vest Big Women's Size Puffer Chill Quilted walmart
Training Cases:	TC-204 https://www.walmart.com/ip/Big-Chill-Women-s-Plus-Size-Chevron-Quilted-Puffer-Vest/253995296 completed TC-205 https://www.walmart.com/ip/Powerful-Suction-Handheld-Vacuum-Cleaner-Cordless-Blowing-Portable-Car-Pet-Hair-Cleaner-Quick-Charge-Lightweight-Washable-HEPA-Filter-Car-Home/6242356661 completed
Buy me a Coffe:
Action:

Source Code:



         
#Code Generated by Parser:cadc0c30 Rule: 2025-01-11 03:13:12
  
      
def get_html(url):
    import time
    from playwright.sync_api import sync_playwright
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        page = browser.new_page()
        page.goto(url)       
        time.sleep(10)
        page_source = page.content()
        browser.close()
        with open("debug.html", "w", encoding="utf-8") as file:
            file.write(page_source)
        return page_source

def extract_ListPrice(lxml_tree):
    from lxml.cssselect import CSSSelector
    from decimal import Decimal
    import re
    sel = CSSSelector('div.buy-box-container .nowrap span.w_iUH7')
    result = sel(lxml_tree)
    if result:
        text = result[0].text_content()
        match = re.search(r'\$\d+(\.\d+)?', text)
        if match:
            return Decimal(match.group(0).replace('$', ''))

def extract_SellPrice(lxml_tree):
    from lxml.cssselect import CSSSelector
    from decimal import Decimal
    import re
    sel = CSSSelector('div.buy-box-container span.b.lh-copy.dark-gray.f1.mr2 span.inline-flex span')
    result = sel(lxml_tree)
    if result:
        text = result[0].text_content()
        text = re.sub(r'\.{2,}', '.', text)
        match = re.search(r'\$\d+(\.\d+)?', text)
        if match:
            return Decimal(match.group(0).replace('$', ''))

def extract_ProductName(lxml_tree):
    from lxml.cssselect import CSSSelector
    import re
    selector = CSSSelector('h1#main-title')
    result = selector(lxml_tree)
    return re.sub(r'\s+', ' ', result[0].text.strip()) if result else None

def extract_TotalReview(lxml_tree):
    from lxml.cssselect import CSSSelector
    import re
    selector = CSSSelector('a.inline-button')
    result = selector(lxml_tree)
    match = re.search(r'(\d+)', result[0].text) if result else None
    return int(match.group(1)) if match else None

def extract_Availability(lxml_tree):
    from lxml.cssselect import CSSSelector
    selector = CSSSelector('div.buy-box-container')
    result = selector(lxml_tree)
    return True if result else False

def extract_ProductImage(lxml_tree):
    from lxml.cssselect import CSSSelector
    selector = CSSSelector('img.db')
    result = selector(lxml_tree)
    return result[0].get('src') if result else None

def extract_AverageReview(lxml_tree):
    from lxml.cssselect import CSSSelector
    import re
    selector = CSSSelector('div.gray span.f7')
    result = selector(lxml_tree)
    match = re.search(r'(\d+\.\d+)', result[0].text) if result else None
    return float(match.group(1)) if match else None

if __name__ == '__main__':
        import lxml.html
        url='https://www.walmart.com/ip/Powerful-Suction-Handheld-Vacuum-Cleaner-Cordless-Blowing-Portable-Car-Pet-Hair-Cleaner-Quick-Charge-Lightweight-Washable-HEPA-Filter-Car-Home/6242356661'
        html=get_html(url)
        tree = lxml.html.fromstring(html)
        result={}
        
        result['ListPrice']=extract_ListPrice(tree)
        result['SellPrice']=extract_SellPrice(tree)
        result['ProductName']=extract_ProductName(tree)
        result['TotalReview']=extract_TotalReview(tree)
        result['Availability']=extract_Availability(tree)
        result['ProductImage']=extract_ProductImage(tree)
        result['AverageReview']=extract_AverageReview(tree)

        print(result)

Co.Parser

Source Code: Copy

Source Code: