scrapy 如何发送请求到Cloudflare保护的网站?

l2osamch  于 2022-11-23  发布在  其他
关注(0)|答案(1)|浏览(222)

我尝试向某个网站发送请求,但收到了503 status code。该网站似乎受Cloudflare保护。是否可以使用python-requests library向受Cloudflare保护的网站发送请求?我已随请求沿着发送了cookiesheaders,但无法通过。
下面是我的代码。

import requests

cookies = {
    'SSPV_C': 'BPwAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAA',
    'locale': 'en',
    'cookieID': '390778282271656143963365',
    'uui': '800.606.6969|',
    'cartId': '42951851197',
    'mapp': '0',
    '__cfruid': '7f4badea550ab7327454d1e2bac7cdec7c0701cf-1656593179',
    '__cf_bm': 'gn_yevoOR3SIcU9B8oDZQ.r_e9300kA61vY264Hls64-1656593179-0-AaRJibHSFeN0Z7jkQTvjq+HQMc3lRPlhM850slZTqy7uy5QzXhmRx3M6rxlwk78kIU+zC8Vb7eDsPpuhdnNOhAkil4ZdBSaZW4pRvSMX53Xd',
    'SSLB_C': '0',
    'SSID_C': 'CQD72x0AAAAAAABbwLZiApVDB1vAtmICAAAAAAAAAAAAHJu9YgANyA',
    'SSSC_C': '333.G7113084158674703618.2|0.0',
    'SSRT_C': 'HJu9YgAAAA',
    'lpi': 'cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11,ipp=24,view=L,sort=BS,priv=N,state=',
    'sessionKey': 'f6fbd948-2fed-41f9-bcf3-7defa626f36a',
    'dpi': 'cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11',
    'utkn': '97655ab6781ce66340f0d2aa809c3f68',
    'build': '20220629v10-20220629v10',
    'aperture-be-commit-id': 'n/a',
    'JSESSIONID': 'SUq0pea5K0bNUyQEJscyjUnJFvvEGjW7!622981770',
    'sshc': '61f5b3f36d4907c548b3efc82cfcecd9',
    '_pxhd': '53bIlMthB4XG3X644UXFOgn-jRSXY56BvM49fjHfOdSg53A7NqKSOXYc0jBByweKQ4NgEZR/R61UG9ouHxGSUw==:Kfr40D-EuMhLJ4qxdatLAMna184C2zbBIJV3xlOVy2hTdUEI3sN3kCGBQV73oDxdiOoVZAKilYlJZn--t492StGQHTm21i-GiwB5xxziLd8=',
    'cf-colo': 'KHI',
    'aperture-be-commit-id': 'n/a',
    'TS0188dba5': '01ec39615f5a1331c083e7ac7ff7f2895322c069326ea3e7a0fb426c2906479f8fdba41c2cbebcf1669847d2488313d23495cf506ce0991eb9af796b9032458b1a715a28e71e7a31b64b6791644a6f092364bff1d8e79d027277b851adf5faa365dd8e2609',
    'TS01d628c4': '01ec39615f8b9833712bd8ae68ec8c0798bd1df2e408a949a17c3772b8419cc7bbfe911b2b2798bd33f09b9e2fa7d6837ec5814f8ca97bd51f8eccc8779214eac7cd387b8f1f1d5097bca3b926c8d264dd80d59d7e4879197618d3a0ef6777bdb5902263106d9d95ac8fd7d92cd8458f02fb7c1409230f71f6b3a638107bbd8a73aa1629da3456ce69fd32f210cf1826979006e713',
    'TopBarCart': '0|0',
    'dlc': '%43%4D%5F%4D%4D%43%3D%7C%54%59%50%45%44%56%41%4C%55%45%3D%7C%45%4D%4C%45%3D%7C%55%4E%42%49%3D%6E%75%6C%6C%7C%4C%4F%4E%47%3D%37%30%2E%30%30%30%30%30%7C%4C%41%54%3D%33%30%2E%30%30%30%30%30%7C',
    'app_cookie': '1656593927',
    'TS01e1f1fd': '01ec39615fa2aeefd67a3c8e74158e94069993ea3308a949a17c3772b8419cc7bbfe911b2b55db8474b97fd606a862d187b6fdf539dfd177a32a93169e75a1c8599fc7428443914075f1081235d9564cc0fc8b69460d7a08aef755b5c296a42cf6b735f4953465ca238a6965b0625b2de8e4934e04',
    'forterToken': 'a1ba6a2e88e74edb91df3bcf567bdd45_1656593924573_588_dUAL43-mnts-ants_13ck',
}

headers = {
    'authority': 'www.bhphotovideo.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'en-PK,en;q=0.9,ur-PK;q=0.8,ur;q=0.7,en-GB;q=0.6,en-US;q=0.5,sv;q=0.4,it;q=0.3',
    'cache-control': 'no-cache',
    # Requests sorts cookies= alphabetically
    # 'cookie': 'SSPV_C=BPwAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAA; locale=en; cookieID=390778282271656143963365; uui=800.606.6969|; cartId=42951851197; mapp=0; __cfruid=7f4badea550ab7327454d1e2bac7cdec7c0701cf-1656593179; __cf_bm=gn_yevoOR3SIcU9B8oDZQ.r_e9300kA61vY264Hls64-1656593179-0-AaRJibHSFeN0Z7jkQTvjq+HQMc3lRPlhM850slZTqy7uy5QzXhmRx3M6rxlwk78kIU+zC8Vb7eDsPpuhdnNOhAkil4ZdBSaZW4pRvSMX53Xd; SSLB_C=0; SSID_C=CQD72x0AAAAAAABbwLZiApVDB1vAtmICAAAAAAAAAAAAHJu9YgANyA; SSSC_C=333.G7113084158674703618.2|0.0; SSRT_C=HJu9YgAAAA; lpi=cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11,ipp=24,view=L,sort=BS,priv=N,state=; sessionKey=f6fbd948-2fed-41f9-bcf3-7defa626f36a; dpi=cat=2,cur=USD,app=D,lang=E,view=L,lgdin=N,cache=release-WEB-20220629v10-BHJ-DVB31150-11; utkn=97655ab6781ce66340f0d2aa809c3f68; build=20220629v10-20220629v10; aperture-be-commit-id=n/a; JSESSIONID=SUq0pea5K0bNUyQEJscyjUnJFvvEGjW7!622981770; sshc=61f5b3f36d4907c548b3efc82cfcecd9; _pxhd=53bIlMthB4XG3X644UXFOgn-jRSXY56BvM49fjHfOdSg53A7NqKSOXYc0jBByweKQ4NgEZR/R61UG9ouHxGSUw==:Kfr40D-EuMhLJ4qxdatLAMna184C2zbBIJV3xlOVy2hTdUEI3sN3kCGBQV73oDxdiOoVZAKilYlJZn--t492StGQHTm21i-GiwB5xxziLd8=; cf-colo=KHI; aperture-be-commit-id=n/a; TS0188dba5=01ec39615f5a1331c083e7ac7ff7f2895322c069326ea3e7a0fb426c2906479f8fdba41c2cbebcf1669847d2488313d23495cf506ce0991eb9af796b9032458b1a715a28e71e7a31b64b6791644a6f092364bff1d8e79d027277b851adf5faa365dd8e2609; TS01d628c4=01ec39615f8b9833712bd8ae68ec8c0798bd1df2e408a949a17c3772b8419cc7bbfe911b2b2798bd33f09b9e2fa7d6837ec5814f8ca97bd51f8eccc8779214eac7cd387b8f1f1d5097bca3b926c8d264dd80d59d7e4879197618d3a0ef6777bdb5902263106d9d95ac8fd7d92cd8458f02fb7c1409230f71f6b3a638107bbd8a73aa1629da3456ce69fd32f210cf1826979006e713; TopBarCart=0|0; dlc=%43%4D%5F%4D%4D%43%3D%7C%54%59%50%45%44%56%41%4C%55%45%3D%7C%45%4D%4C%45%3D%7C%55%4E%42%49%3D%6E%75%6C%6C%7C%4C%4F%4E%47%3D%37%30%2E%30%30%30%30%30%7C%4C%41%54%3D%33%30%2E%30%30%30%30%30%7C; app_cookie=1656593927; TS01e1f1fd=01ec39615fa2aeefd67a3c8e74158e94069993ea3308a949a17c3772b8419cc7bbfe911b2b55db8474b97fd606a862d187b6fdf539dfd177a32a93169e75a1c8599fc7428443914075f1081235d9564cc0fc8b69460d7a08aef755b5c296a42cf6b735f4953465ca238a6965b0625b2de8e4934e04; forterToken=a1ba6a2e88e74edb91df3bcf567bdd45_1656593924573_588_dUAL43-mnts-ants_13ck',
    'pragma': 'no-cache',
    'referer': 'https://www.bhphotovideo.com/c/buy/Notebooks/ci/6782/N/4110474287',
    'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
}

response = requests.get('https://www.bhphotovideo.com/c/product/1663923-REG/lenovo_82h801ekus_ip3_15itl6_i3_1115g4_8gb.html', cookies=cookies, headers=headers)
print(response)
bvpmtnay

bvpmtnay1#

你可以使用cloudscraper来摆脱cloudflare的保护

from bs4 import BeautifulSoup
import cloudscraper
scraper = cloudscraper.create_scraper(delay=10,   browser={'custom': 'ScraperBot/1.0',})
url = 'https://www.bhphotovideo.com/c/product/1663923-REG/lenovo_82h801ekus_ip3_15itl6_i3_1115g4_8gb.html'
req = scraper.get(url)
print(req)

输出:

<Response [200]>

cloudscraper

相关问题