We’ve got some code provided by Akash D. working on ticketmaster.co.uk. He automates browser (Chrome as well as Edge) using Selenium with Python. The rotating authenticated proxies are leveraged to keep undetected. Yet, the site is protected with Distil network.
How to make HTTP Proxy Authentication with Chrome driver in Selenium?
See the code below:
import os import zipfile from selenium import webdriver PROXY_HOST = 'x.botproxy.net' # rotating proxy PROXY_PORT = 8080 PROXY_USER = 'proxy-user' PROXY_PASS = 'proxy-password' manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "<all_urls>", "webRequest", "webRequestBlocking" ], "background": { "scripts": ["background.js"] }, "minimum_chrome_version":"22.0.0" } """ background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "%s", port: parseInt(%s) }, bypassList: ["localhost"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["<all_urls>"]}, ['blocking'] ); """ % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS) def get_chromedriver(use_proxy=False, user_agent=None): path = os.path.dirname(os.path.abspath(__file__)) chrome_options = webdriver.ChromeOptions() if use_proxy: pluginfile = 'proxy_auth_plugin.zip' with zipfile.ZipFile(pluginfile, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", background_js) chrome_options.add_extension(pluginfile) if user_agent: chrome_options.add_argument('--user-agent=%s' % user_agent) driver = webdriver.Chrome( os.path.join(path, 'chromedriver'), chrome_options=chrome_options) return driver def main(): driver = get_chromedriver(use_proxy=True) #driver.get('https://www.google.com/search?q=my+ip+address') driver.get('https://httpbin.org/ip') if __name__ == '__main__': main()
The whole code
We put here the whole code for scraping ticketmaster.co.uk. using Selenium with Python.
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as ec from selenium.common.exceptions import NoSuchElementException, ElementNotInteractableException, SessionNotCreatedException # from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys import zipfile # from webdriver_manager.chrome import EdgeChromiumDriverManager from msedge.selenium_tools import EdgeOptions from msedge.selenium_tools import Edge from time import sleep import undetected_chromedriver as uc import random manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "<all_urls>", "webRequest", "webRequestBlocking" ], "background": { "scripts": ["background.js"] }, "minimum_chrome_version":"22.0.0" } """ # add your proxies here proxies = [ ] class tm_bot(): def __init__(self): self.driver = self.get_chrome_driver() self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") self.driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'}) print('excuting script') print(self.driver.execute_script("return navigator.userAgent;")) # self.driver.get("https://www.google.com") sleep(2) self.driver.get("https://www.ticketmaster.co.uk/freedom-starring-joe-mcelderry-portsmouth-05-10-2022/event/1F005B56BB598D3B") def get_background_js(self, proxy): self.prox = proxy.split(":") self.PROXY_HOST = self.prox[0] self.PROXY_PORT = int(self.prox[1]) self.PROXY_USER = self.prox[2] self.PROXY_PASS = self.prox[3] self.background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "%s", port: parseInt(%s) }, bypassList: ["foobar.com"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: ["<all_urls>"]}, ['blocking'] ); """%(self.PROXY_HOST, self.PROXY_PORT, self.PROXY_USER, self.PROXY_PASS) return self.background_js def get_edge_driver(self, use_proxy=True): self.edge_options = EdgeOptions() self.edge_options.use_chromium = True self.edge_options.add_argument('--disable-gpu') self.edge_options.add_argument("start-maximized") self.edge_options.add_argument("disable-infobars") self.edge_options.add_experimental_option("detach", True) self.edge_options.add_argument('--DBUS_SESSION_BUS_ADDRESS=/dev/null') self.edge_options.add_argument('--disable-blink-features=AutomationControlled') self.edge_options.add_experimental_option("excludeSwitches", ["disable-automation"]) self.edge_options.add_experimental_option('useAutomationExtension', True) self.edge_options.add_argument('--user_agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36') # self.PROXY = "195.7.7.169:3199:shubkrsharma-tvw8u:KhyMg7k4WN" if use_proxy: print("using proxy") pluginfile = 'proxy_auth_plugin.zip' with zipfile.ZipFile(pluginfile, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", self.get_background_js(self.PROXY)) self.edge_options.add_extension(pluginfile) self.driver = Edge(executable_path='msedgedriver.exe', options=self.edge_options) # self.driver.get('https://www.google.com') return self.driver def get_chrome_driver(self, use_proxy=True): self.PROXY = random.choice(proxies) print(self.PROXY) self.edge_options = webdriver.ChromeOptions() self.edge_options.use_chromium = True self.edge_options.add_argument('--no-sandbox') self.edge_options.add_argument('start-maximized') # self.edge_options.add_argument('enable-automation') self.edge_options.add_argument('--disable-infobars') self.edge_options.add_argument('--disable-dev-shm-usage') self.edge_options.add_argument('--disable-browser-side-navigation') # self.edge_options.add_argument("--remote-debugging-port=9222") # options.add_argument("--headless") self.edge_options.add_argument('--disable-gpu') # self.edge_options.add_argument("--log-level=3") self.edge_options.add_argument('--disable-blink-features=AutomationControlled') self.edge_options.add_experimental_option("useAutomationExtension", False) if use_proxy: print("using proxy") pluginfile = 'proxy_auth_plugin.zip' with zipfile.ZipFile(pluginfile, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", self.get_background_js(self.PROXY)) # self.edge_options.add_extension(pluginfile) # self.edge_options.add_extension('google_bot.crx') self.driver = webdriver.Chrome(options=self.edge_options) # self.driver.execute_cdp_cmd('Network.setUserAgentOverride', {"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'}) # self.driver.get('https://www.google.com') return self.driver def clear_cookies(self): cookies_accepted = False while cookies_accepted == True: try: self.driver.find_element_by_xpath('//*[@id="onetrust-accept-btn-handler"]') cookies_accepted = True except: self.driver.find_element_by_xpath('//*[@id="onetrust-accept-btn-handler"]') cookies_accepted = True if __name__ == "__main__": test = tm_bot() test.clear_cookies()
One reply on “Scrape ‘Ticketmaster’ using Selenium with Python”
doesn’t work, it says pardon – error