0

I have a Python webscraper that pulls a specific value every 1 second. The target website it AJAXed, so I'm not hitting it with too many requests.

This is the Python code:

import time
import logging
import sys
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Suppress all unnecessary logs
logging.basicConfig(level=logging.CRITICAL)  # Suppress logs lower than CRITICAL
logging.getLogger("selenium").setLevel(logging.CRITICAL)  # Suppress Selenium logs

# Specify the path to your chromedriver
chromedriver_path = 'C:/GoldScraper/chromedriver.exe'

# Set up Chrome options
options = Options()
options.add_argument("--headless")  # Run in headless mode
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
options.add_argument("start-maximized")
options.add_argument("--log-level=3")  # Suppress logs

# Suppress WebGL and other errors
options.add_argument("--disable-software-rasterizer")
options.add_argument("--disable-extensions")
options.add_argument("--disable-logging")

# Create a Service object
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=options)

# Navigate to the website
driver.get('https://www.cnbc.com/quotes/XAU=')

# Print waiting message once
print("Waiting for price element...")

try:
    # Wait for the gold price element to be visible
    gold_price_element = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.CLASS_NAME, "QuoteStrip-lastPrice"))
    )

    print("Price element found. Fetching prices...")

    while True:
        # Extract and strip commas, convert to float, and format to 2 decimal places
        gold_price = round(float(gold_price_element.text.replace(",", "")), 2)  # Remove commas, convert to float, and round to 2 decimal places
        print(f"{gold_price:.2f}")  # Print with 2 decimal places
        sys.stdout.flush()  # Ensure output is immediately available to VBA

        # Wait for 1 second before next fetch
        time.sleep(1)


except KeyboardInterrupt:
    print("\nScript stopped by user.")

except Exception as e:
    print(f"Error extracting gold price: {e}")
    sys.stdout.flush()  # Ensure error messages are flushed too

finally:
    # Ensure the browser closes
    driver.quit()
    print("Driver closed.")

I need to have the VBA macro capture the gold_price variable each second and display the value in a named cell called "Live_AU". This is the VBA code I have come up with:

Sub StartScraping()
    Dim objShell As Object
    Dim pythonScriptPath As String
    Dim pythonExePath As String
    Dim price As String
    
    ' Define the path to your Python executable and script
    pythonExePath = "C:\python313\python.exe"  ' Adjust this if needed
    pythonScriptPath = "C:\GoldScraper\goldscraper.py"  ' Adjust this if needed
    
    ' Run the Python script and capture the output
    Set objShell = CreateObject("WScript.Shell")
    
    ' Capture the output from the Python script (output redirected to text)
    Dim output As String
    output = objShell.Exec(pythonExePath & " " & pythonScriptPath).StdOut.ReadAll  ' Use ReadAll to capture everything
    
    ' Debug: Show the price output from Python
    MsgBox "Python Output: " & output  ' Display the output for debugging
    
    ' Check if price is valid and update the Live_AU cell
    If IsNumeric(output) Then
        Range("Live_AU").Value = output
    Else
        MsgBox "No valid output from Python. Output: " & output  ' Show the captured output if invalid
    End If
End Sub

This returns nothing or an empty string, I can't tell. Can anyone help debug this for me so that the correct value is displayed in the required Excel cell?

1 Answer 1

1

Your implementation is more complex than it needs to be providing you're using an up-to-date version of selenium (currently 4.30.0)

Furthermore, the key to success here is to refresh the driver after each delay (sleep)

from selenium.webdriver import Chrome
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver import ChromeOptions
import time

URL = "https://www.cnbc.com/quotes/XAU="

options = ChromeOptions()
options.add_argument("--headless=new")

def etext(e: WebElement) -> str:
    if e:
        if t := e.text.strip():
            return t
        if (p := e.get_property("textContent")) and isinstance(p, str):
            return p.strip()
    return ""

with Chrome(options) as driver:
    driver.get(URL)
    wait = WebDriverWait(driver, 10)
    ec = EC.presence_of_element_located
    sel = (By.CSS_SELECTOR, "span.QuoteStrip-lastPrice")
    while True:
        try:
            span = wait.until(ec(sel))
            gold_price = float(etext(span).replace(",", ""))
            print(f"{gold_price:.2f}")
            time.sleep(1)
            driver.refresh()
        except KeyboardInterrupt:
            print("\nScript stopped by user.")
            break
        except Exception as e:
            print(f"Error extracting gold price: {e}")
            break
1
  • Your code is more concise, but it doesn't return anything to the Excel worksheet and its refresh rate is greater than once per second. I'm trying to capture 1-second chart data and need the script to capture the AJAX change on the webpage instead of reloading the browser driver each iteration. I don't want to have the site block too many requests. My main concern is to get the data scraped into Excel every second and right now, Excel isn't getting anything from the Python code error check: MsgBox "No valid output from Python. Output: " & output
    – Matteo
    Commented Apr 7 at 15:46

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.