How can I save results of my web scraping to an excel file?
I've tried to find a way for a while. Or are there any ideas for me to be able to select other pages that I want to fetch?
This my code:
from urllib.request import urlopen as req
from openpyxl import Workbook
from bs4 import BeautifulSoup as soup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from urllib.request import urlopen
import time
import requests
def checkproduct(url):
opt = webdriver.ChromeOptions()
opt.add_argument('headless')
driver = webdriver.Chrome()
#driver = webdriver.Chrome(options=opt)
driver.get(url)
time.sleep(1)
driver.execute_script("window.scrollTo(0, 400);")
time.sleep(1)
driver.execute_script("window.scrollTo(0, 1200);")
time.sleep(1)
driver.execute_script("window.scrollTo(0, 3000);")
time.sleep(1)
page_html = driver.page_source
data = soup(page_html,'html.parser')
allproduct = data.findAll('div',{'class':'c16H9d'})
product_title = allproduct[0].text
product_url = 'https:'+ allproduct[0].a['href']
list_title = []
list_url = []
list_price = []
list_image = []
for pd in allproduct:
pd_title = pd.text
pd_url = 'https:' + pd.a['href']
list_title.append(pd_title)
list_url.append('https:' + pd.a['href'])
allprice = data.findAll('span',{'class':'c13VH6'})
for pc in allprice:
pc_price = pc.text
pc_price = pc_price.replace('฿','')
pc_price = pc_price.replace(',','')
list_price.append(float(pc_price))
allimages = data.findAll('img',{'class':'c1ZEkM'})
for productimages in allimages:
productimages_url = productimages['src']
list_image.append(productimages_url)
print(list_title)
print(list_url)
print(pc_price)
print(list_image)
driver.close()
return(list_title,list_price,list_url,list_image)
base_url = "https://www.lazada.co.th/shop-smart-tv?pages="
n = 2
for i in range(1, n+1):
response = base_url + "%d" %i
url = response
print (url)
checkproduct(url)
print ('_________________________')
laptop = checkproduct(url)
excelfile = Workbook()
row = excelfile.active
header = ['Product','Price','URL','Images']
row.append(header)
for i,j,k,l in zip(laptop[0],laptop[1],laptop[2],laptop[3]):
row.append([i,j,k,l])
#row['A45'] = 'ถูกสุด'
#row['A46'] = 'แพงสุด'
#min_price = min(laptop[1])
#find = laptop[1].index(min_price)
#row['B45'] = laptop[0][find]
#row['C45'] = laptop[1][find]
#row['D45'] = laptop[2][find]
#max_price = max(laptop[1])
#find = laptop[1].index(max_price)
#row['B46'] = laptop[0][find]
#row['C46'] = laptop[1][find]
#row['D46'] = laptop[2][find]
excelfile.save('Lazada_Product2.xlsx')
print('Done')
In this loop it only extracts one set of data into an Excel file, what do I need to do to be able to extract more? Or extract more than 1 page