我有一个代码,将刮沃尔玛。它将输入约100个邮政编码为每个产品(300个总数),并刮相应的价格。我测试了3个产品的网址和2个邮政编码,它应该给予我的六个数据。它工作得很好,当我运行的代码无代理服务。输出是这样的:
43819800,041167412213,10003,3520,19.96,2022-07-05 14:06:47
43819800,041167412213,48104,5472,19.96,2022-07-05 14:06:47
224749468,300450206909,10003,3520,42.47,2022-07-05 14:06:49
224749468,300450206909,48104,5472,42.47,2022-07-05 14:06:50
14053317,681131187091,10003,3520,2.52,2022-07-05 14:06:51
14053317,681131187091,48104,5472,2.52,2022-07-05 14:06:52
我最终知道如果我想运行它的大量产品,我需要代理服务,所以我买了一个。但当我运行的代码与代理服务,大多数数据将丢失。
我想我不能得到所有数据的原因是因为cookie的问题。我想cookie =将过期,我不知道如何生成一个新的和正确的。有人知道如何处理cookie的问题时,刮沃尔玛?任何帮助感谢,谢谢!
下面是我的代码
def main(url_list, zip_code_list, ip_list, _now, save_dict, num, csv_list, utc_tz):
_dict = {}
debug = False
s = requests.Session()
output_json_file = f'backup/{num}_' + _now.strftime("%Y%m%d_%H%M.json")
output_csv_file = f'backup/{num}_' + _now.strftime("%Y%m%d_%H%M.csv")
flag = True
if debug:
url_list = [
'https://www.target.com/p/claritin-24-hour-non-drowsy-allergy-relief-tablets-loratadine/-/A-80354268?preselect=14351285#lnk=sametab',
'https://www.target.com/p/genexa-dextromethorphan-kids-39-cough-and-chest-congestion-suppressant-4-fl-oz/-/A-80130848#lnk=sametab'
# 'https://www.target.com/p/claritin-24-hour-non-drowsy-allergy-relief-tablets-loratadine/-/A-80354268?preselect=14351285#lnk=sametab'
]
zip_code_list = [
10005,
]
i = 0
for _url in url_list:
for zip_code in zip_code_list:
# proxy service
proxies = {"http": None, "https": None}
# proxies = {
...,
...}
i += 1
_dict[i] = {}
start_time = time.perf_counter()
try:
item = _url.split("/")[-1] # A-80354268?preselect=14351285#lnk=sametab or A-80130848#lnk=sametab
url_type = 1
page_num = item
if '?' in item:
url_type = 3
item2 = item.split("?")
page_num = item2[0]
else:
pass
except Exception as e:
end_time = time.perf_counter()
continue
zip_code_url = "https://www.walmart.com/orchestra/home/graphql"
payload = json.dumps({
"query": "......",
"variables": {
"input": {
"postalCode": str(zip_code),
"accessTypes": [
"PICKUP_INSTORE",
"PICKUP_CURBSIDE",
"PICKUP_SPOKE",
"PICKUP_POPUP"
],
"nodeTypes": [
"STORE",
"PICKUP_SPOKE",
"PICKUP_POPUP"
],
"latitude": None,
"longitude": None,
"radius": None
},
"checkItemAvailability": False,
"checkWeeklyReservation": False,
"enableStoreSelectorMarketplacePickup": False
}
})
headers = {
'authority': 'www.walmart.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'x-o-segment': 'oaoh',
'x-o-correlation-id': 'Tt33HoVZ_Pqtlie1ABII1nfekFaSEtbRQPSc',
'device_profile_ref_id': '-f6R8qf8Vd3gwky1UOzoEwW_XoTeRKqppMfK',
'x-latency-trace': '1',
'wm_mp': 'true',
'wm_page_url': 'https://www.walmart.com/ip/Allegra-Adult-24HR-Gelcaps-24-Ct-180-mg-Allergy-Relief/43819800',
'x-o-platform-version': 'main-1.2.0-3a465c',
'x-o-gql-query': 'query nearByNodes',
'x-o-bu': 'WALMART-US',
'x-apollo-operation-name': 'nearByNodes',
'traceparent': 'Tt33HoVZ_Pqtlie1ABII1nfekFaSEtbRQPSc',
'x-o-mart': 'B2C',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
'x-o-platform': 'rweb',
'content-type': 'application/json',
'accept': 'application/json',
'x-enable-server-timing': '1',
'x-o-ccm': 'server',
'wm_qos.correlation_id': 'Tt33HoVZ_Pqtlie1ABII1nfekFaSEtbRQPSc',
'origin': 'https://www.walmart.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.walmart.com/ip/Allegra-Adult-24HR-Gelcaps-24-Ct-180-mg-Allergy-Relief/43819800',
'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'cookie': '_pxvid=10811607-f238-11ec-a720-4e756c594d76; ACID=2263b9c6-4e5a-44ce-a9da-05e028a9b8c7; hasACID=true; vtc=ShJZRQkr5ADHxW2cc6mpW0; TBV=7; adblocked=false; locGuestData=eyJpbnRlbnQiOiJTSElQUElORyIsImlzRXhwbGljaXQiOmZhbHNlLCJzdG9yZUludGVudCI6IlBJQ0tVUCIsIm1lcmdlRmxhZyI6ZmFsc2UsImlzRGVmYXVsdGVkIjpmYWxzZSwicGlja3VwIjp7Im5vZGVJZCI6IjMwODEiLCJ0aW1lc3RhbXAiOjE2NTU5MDgzNjMwNTF9LCJwb3N0YWxDb2RlIjp7InRpbWVzdGFtcCI6MTY1NTkwODM2MzA1MSwiYmFzZSI6Ijk1ODI5In0sInZhbGlkYXRlS2V5IjoicHJvZDp2MjoyMjYzYjljNi00ZTVhLTQ0Y2UtYTlkYS0wNWUwMjhhOWI4YzcifQ%3D%3D; tb_sw_supported=false; auth=MTAyOTYyMDE4fNO1docV1h53scwTVZ09zkq8CMnyd0xQJ2B%2BWcsOyT6LROWRUsmVs%2Bp%2BqdGXlTGcs43hwU%2BCNtvvfjRjs7lCjIvrK3NOp%2FSayADm%2FauT3or57dQr8nIRizNO0Go9X0h5767wuZloTfhm7Wk2KcjygsAEeU%2BeKCMhfP9XV060SY%2FgcNaenrudSCZlEFJXNSGF6XDQTWwNYV8JcXQmjWW2gCuRvXtkjUhxyHY9czMQpG4UMk70P8glgOEpLOprhDfMM%2FFHGZ2dCNmxWrdkwqEKrhrUOgaJ2pqn5A3SHetSvGPUoDKun8p%2FM%2BS69xpJ5GAkRAvQ9UuySW7l7kgiPOgyF60jwwfDPWGSAbjbNQ7pWssaQQ2kgt4PeOLZzshCiRmgr%2B51HN4wOouRjTjluv08HZE5WBBdZBCyKnCQAR7o6eg%3D; assortmentStoreId=3081; hasLocData=1; TB_Latency_Tracker_100=1; TB_Navigation_Preload_01=1; TB_SFOU-100=; bstc=bdAHA-WWcfXk0PMnP-uT0o; mobileweb=0; xpa=3Fi1g|3_gkh|3pRU7|4NCWH|55b29|5_9FA|DAwQd|Ecx7k|EjkLl|FYe-R|Hv6FZ|LTD5Y|LguYm|NbUbl|NoJl6|O1c3v|OuwKl|Pgtnl|Q-bGe|TMjj7|V0SkO|VAuQw|_hSAz|cL8HI|ccWng|cfVAR|duBe9|eEnay|eWARP|hGNr-|hPI48|hqy5q|jUi64|kFqfr|kLRY3|lQHtM|rdfjX|wGrec|zCylr; xpm=1%2B1656425217%2BShJZRQkr5ADHxW2cc6mpW0~%2B0; exp-ck=3_gkh24NCWH155b2925_9FA1DAwQd3Ecx7k1EjkLl1FYe-R1NbUbl1O1c3v1OuwKl1Pgtnl1V0SkO1_hSAz1cL8HI1ccWng2eEnay2eWARP1hGNr-1jUi641kFqfr1kLRY31lQHtM1; helpgql=1; _pxhd=f5c62b38667d2415146fc6f1cf93cdcc3327afc72fb672b6c09d362415e56f38:10811607-f238-11ec-a720-4e756c594d76; ak_bmsc=1BE44332F7F4F1A06C35DB50CE7A244A~000000000000000000000000000000~YAAQLcMTArBBZZeBAQAArhKjqhBsrms6EgHZfDiLKYf+xjOEF+/vrnzQPmoENr4GabQK+uGCmEd636jjgHyE5IXaf6eZM80f5m5gykIdVmdAcFD78W+uLxPV7zlVbetRHR4yQ3osOU9yTbAIxm5And82l8zx1c+OLZZDAn6cC8CKtnboNmXITB0mT+0BxrSMWr6FUUHNgN0BjPwCEW0NqdGPe6o57ttnUoJELTEeXnUdiAZB784srPtFJgZmxk8F6jADpYMaSrqGlGv+Uh2jVAHJRIYeQU3kcxbocc7/nJrGHHFJn0lbXOoltW7Qz8AFPIgHhHWP9/2COrQo/EWwZm/88zqCi9/l7kD7dtgabe1ICq/CgDz4rl41QKOe+lZYQVf9uqF0CrTfea/anOjVdEHEXZvaGq+803mSqffDitzIXTNDym8PnPeMFa4WJTqCF/apIJiT2rvLKPRgn2CsOlxCTkHlAuCUJG8JYWYGloDso0ULfjd/xEhxhe6CCy6HYwJ3E8fkVF8i5dG04U+o/URP05RiYZc8F+F1dMl91Q==; bm_sv=84EAF547030A90DB5BCD4679165B254D~YAAQLcMTAvZBZZeBAQAAthWjqhAc7AUJuL6t1BdztQNVwhWMVE5DVAXWjtGVZh8Xm0VgJ47u+ho0b3dkGGOwoGGOwbooZSpjcjxOOiEXIzTZgWRkQQeyTz5OB8ixRgE2Fqg1vQiCeOZkQWRmu+zj+P9ZwuNPusS4/dIStSfzYA4sDRQ6KPwQMqsMLFvPhDxjiR/ByNKugRT//CgOWQgb12FlDL5o/kJUQaRsUA00Dc9zKt/XUexTgCG91S5atV9iXQ==~1; locDataV3=eyJpc0RlZmF1bHRlZCI6ZmFsc2UsImlzRXhwbGljaXQiOmZhbHNlLCJpbnRlbnQiOiJTSElQUElORyIsInBpY2t1cCI6W3siYnVJZCI6IjAiLCJub2RlSWQiOiIzMDgxIiwiZGlzcGxheU5hbWUiOiJTYWNyYW1lbnRvIFN1cGVyY2VudGVyIiwibm9kZVR5cGUiOiJTVE9SRSIsImFkZHJlc3MiOnsicG9zdGFsQ29kZSI6Ijk1ODI5IiwiYWRkcmVzc0xpbmUxIjoiODkxNSBHZXJiZXIgUm9hZCIsImNpdHkiOiJTYWNyYW1lbnRvIiwic3RhdGUiOiJDQSIsImNvdW50cnkiOiJVUyIsInBvc3RhbENvZGU5IjoiOTU4MjktMDAwMCJ9LCJnZW9Qb2ludCI6eyJsYXRpdHVkZSI6MzguNDgyNjc3LCJsb25naXR1ZGUiOi0xMjEuMzY5MDI2fSwiaXNHbGFzc0VuYWJsZWQiOnRydWUsInNjaGVkdWxlZEVuYWJsZWQiOnRydWUsInVuU2NoZWR1bGVkRW5hYmxlZCI6dHJ1ZSwiaHViTm9kZUlkIjoiMzA4MSIsInN0b3JlSHJzIjoiMDY6MDAtMjM6MDAiLCJzdXBwb3J0ZWRBY2Nlc3NUeXBlcyI6WyJQSUNLVVBfQ1VSQlNJREUiLCJQSUNLVVBfSU5TVE9SRSJdfV0sInNoaXBwaW5nQWRkcmVzcyI6eyJsYXRpdHVkZSI6MzguNDc0NCwibG9uZ2l0dWRlIjotMTIxLjM0MzcsInBvc3RhbENvZGUiOiI5NTgyOSIsImNpdHkiOiJTYWNyYW1lbnRvIiwic3RhdGUiOiJDQSIsImNvdW50cnlDb2RlIjoiVVNBIiwiZ2lmdEFkZHJlc3MiOmZhbHNlfSwiYXNzb3J0bWVudCI6eyJub2RlSWQiOiIzMDgxIiwiZGlzcGxheU5hbWUiOiJTYWNyYW1lbnRvIFN1cGVyY2VudGVyIiwiYWNjZXNzUG9pbnRzIjpudWxsLCJzdXBwb3J0ZWRBY2Nlc3NUeXBlcyI6W10sImludGVudCI6IlBJQ0tVUCIsInNjaGVkdWxlRW5hYmxlZCI6ZmFsc2V9LCJkZWxpdmVyeSI6eyJidUlkIjoiMCIsIm5vZGVJZCI6IjMwODEiLCJkaXNwbGF5TmFtZSI6IlNhY3JhbWVudG8gU3VwZXJjZW50ZXIiLCJub2RlVHlwZSI6IlNUT1JFIiwiYWRkcmVzcyI6eyJwb3N0YWxDb2RlIjoiOTU4MjkiLCJhZGRyZXNzTGluZTEiOiI4OTE1IEdlcmJlciBSb2FkIiwiY2l0eSI6IlNhY3JhbWVudG8iLCJzdGF0ZSI6IkNBIiwiY291bnRyeSI6IlVTIiwicG9zdGFsQ29kZTkiOiI5NTgyOS0wMDAwIn0sImdlb1BvaW50Ijp7ImxhdGl0dWRlIjozOC40ODI2NzcsImxvbmdpdHVkZSI6LTEyMS4zNjkwMjZ9LCJpc0dsYXNzRW5hYmxlZCI6dHJ1ZSwic2NoZWR1bGVkRW5hYmxlZCI6dHJ1ZSwidW5TY2hlZHVsZWRFbmFibGVkIjp0cnVlLCJhY2Nlc3NQb2ludHMiOlt7ImFjY2Vzc1R5cGUiOiJERUxJVkVSWV9BRERSRVNTIn1dLCJodWJOb2RlSWQiOiIzMDgxIiwiaXNFeHByZXNzRGVsaXZlcnlPbmx5IjpmYWxzZSwic3VwcG9ydGVkQWNjZXNzVHlwZXMiOlsiREVMSVZFUllfQUREUkVTUyJdfSwiaW5zdG9yZSI6ZmFsc2UsInJlZnJlc2hBdCI6MTY1NjQ0NjgyMjk5NCwidmFsaWRhdGVLZXkiOiJwcm9kOnYyOjIyNjNiOWM2LTRlNWEtNDRjZS1hOWRhLTA1ZTAyOGE5YjhjNyJ9; _px3=fd4a806b205916413bf99a01e942ff9336232e851610f563e729bd8270721edd:bOVkbtqNZf6CXjv41nbl5RGLFNxuANcsgSOgoqttHpCpEzg2Mto0wjrxDFfh6zUSiA5wDDm5rTHKfX2lAiPg0Q==:1000:zDPkfwX/OSrZ75Ggjs1Krpm4L6f17sXBDANaE4TV+9j6Y6dnGPRddnxuV+8zV6iiq/iJexlqrtw3brpn59WivGDsHGwucjfO5cRyfNrUryok4xbUwr1yK/iAyP1t4vdvf8bS4jGOBM9xp8zMe44W7tOveajOsuF64IAfP4GPGNBGPUZkIE3I+bocrKQJA7sdD12/BOw6goT1VCddGHvnEQ==; QuantumMetricSessionID=18cfa4319b8ef1e79540e53bae1b9f4a; QuantumMetricUserID=86906ef3aba513b1593763543a679f7b; xptwg=3560776434:CF9364D4F670D8:21A39D6:281D1B4C:2C081154:A12BB2D4:; TS01b0be75=01538efd7cb337960d25ee6309ddbe2d9b73d42de4db21f151df666ad7ab7cde6a6e48b30e01e0ae9a71bdd492f4d3cefe3367d67f; TS013ed49a=01538efd7cb337960d25ee6309ddbe2d9b73d42de4db21f151df666ad7ab7cde6a6e48b30e01e0ae9a71bdd492f4d3cefe3367d67f; akavpau_p2=1656426003~id=7229bf055a987cf4ec0baf62877b3b53; _astc=f75dda122e22d06cf00905ef84d586f8; pxcts=f6d5b40e-f6eb-11ec-b18e-5746686f6a76; _pxff_cfp=1' }
try:
response = requests.request("POST", zip_code_url, headers=headers, data=payload, proxies=proxies,timeout=10)
content = response.json()
except Exception as e:
end_time = time.perf_counter()
continue
try:
store_id = content['data']['nearByNodes']['nodes'][0]['id']
except Exception as e:
end_time = time.perf_counter()
continue
url2 = "https://www.walmart.com/orchestra/home/graphql/ip/"+page_num
payload2 = json.dumps({
"query": "...",
"variables": {
"channel": "WWW",
"pageType": "ItemPageGlobal",
"tenant": "WM_GLASS",
"version": "v1",
"itemId": str(page_num),
"layout": [
"itemDesktop"
],
"tempo": {
"targeting": "%7B%22userState%22%3A%22loggedIn%22%7D",
"params": [
{
"key": "expoVars",
"value": "expoVariationValue"
},
{
"key": "expoVars",
"value": "expoVariationValue2"
}
]
},
"p13N": {
"reqId": "zAKgORT4feGddLyly5nLXRCH16egOJ0JwL4x",
"pageId": str(page_num),
"modules": [
{
"moduleType": "PersonalizedLabels",
"moduleId": "234-sdfsfvns-sdfdskvl"
}
],
"userClientInfo": {
"ipAddress": "IP=0:0:0:0:0:0:0:1-0:0:0:0:0:0:0:1",
"isZipLocated": True,
"callType": "CLIENT",
"deviceType": "desktop"
},
"userReqInfo": {
"refererContext": {
"source": "itempage"
},
"pageUrl": "/ip/Equate-Maximum-Strength-Severe-Allergy-Plus-Sinus-Headache-Caplets-20-Count/14053317"
}
},
"p13nCls": {
"pageId": str(page_num),
"userClientInfo": {
"ipAddress": "IP=0:0:0:0:0:0:0:1-0:0:0:0:0:0:0:1",
"isZipLocated": True,
"deviceType": "desktop",
"callType": "CLIENT"
},
"userReqInfo": {
"refererContext": {
"source": "itempage"
}
},
"p13NCallType": "ATF"
},
"fetchBuyBoxAd": True,
"fetchSkyline": True,
"fetchIdml": True,
"fetchReviews": True,
"fetchFitment": True,
"fetchSEO": True,
"fetchP13N": True,
"fetchAffirm": True,
"fetchMarquee": True,
"fetchSpCarousel": True,
"fetchBrandBox": True,
"fetchDiscounts": False,
"enableItemIbotta": True
}
})
headers2 = {
'authority': 'www.walmart.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="99", "Google Chrome";v="99"',
'x-o-correlation-id': 'EtyZZHcTSBCiVMxl4i44a6h_Gzqf1F-MkpXd',
'x-o-item-id': str(page_num),
'device_profile_ref_id': '-1fXNwg-2wCxSXoXmzk8jg4T_XgDMMHhHTMN',
'x-latency-trace': '1',
'wm_mp': 'true',
'wm_page_url': 'https://www.walmart.com/ip/Equate-Maximum-Strength-Severe-Allergy-Plus-Sinus-Headache-Caplets-20-Count/14053317',
'x-o-platform-version': 'main-1.3.0-e51fc3',
'x-o-segment': 'oaoh',
'calltype': 'CLIENT',
'x-o-gql-query': 'query ItemById',
'x-o-bu': 'WALMART-US',
'x-apollo-operation-name': 'ItemById',
'ip-referer': '',
'sec-ch-ua-platform': '"Linux"',
'traceparent': 'EtyZZHcTSBCiVMxl4i44a6h_Gzqf1F-MkpXd',
'x-o-mart': 'B2C',
'sec-ch-ua-mobile': '?0',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36',
'x-o-platform': 'rweb',
'content-type': 'application/json',
'accept': 'application/json',
'is-variant-fetch': 'false',
'x-enable-server-timing': '1',
'x-o-ccm': 'server',
'wm_qos.correlation_id': 'EtyZZHcTSBCiVMxl4i44a6h_Gzqf1F-MkpXd',
'origin': 'https://www.walmart.com',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': 'https://www.walmart.com/ip/Equate-Maximum-Strength-Severe-Allergy-Plus-Sinus-Headache-Caplets-20-Count/14053317',
'accept-language': 'en-US,en;q=0.9',
'cookie': '_pxvid=10811607-f238-11ec-a720-4e756c594d76; ACID=2263b9c6-4e5a-44ce-a9da-05e028a9b8c7; hasACID=true; vtc=ShJZRQkr5ADHxW2cc6mpW0; TBV=7; adblocked=false; locGuestData=eyJpbnRlbnQiOiJTSElQUElORyIsImlzRXhwbGljaXQiOmZhbHNlLCJzdG9yZUludGVudCI6IlBJQ0tVUCIsIm1lcmdlRmxhZyI6ZmFsc2UsImlzRGVmYXVsdGVkIjpmYWxzZSwicGlja3VwIjp7Im5vZGVJZCI6IjMwODEiLCJ0aW1lc3RhbXAiOjE2NTU5MDgzNjMwNTF9LCJwb3N0YWxDb2RlIjp7InRpbWVzdGFtcCI6MTY1NTkwODM2MzA1MSwiYmFzZSI6Ijk1ODI5In0sInZhbGlkYXRlS2V5IjoicHJvZDp2MjoyMjYzYjljNi00ZTVhLTQ0Y2UtYTlkYS0wNWUwMjhhOWI4YzcifQ%3D%3D; tb_sw_supported=false; auth=MTAyOTYyMDE4fNO1docV1h53scwTVZ09zkq8CMnyd0xQJ2B%2BWcsOyT6LROWRUsmVs%2Bp%2BqdGXlTGcs43hwU%2BCNtvvfjRjs7lCjIvrK3NOp%2FSayADm%2FauT3or57dQr8nIRizNO0Go9X0h5767wuZloTfhm7Wk2KcjygsAEeU%2BeKCMhfP9XV060SY%2FgcNaenrudSCZlEFJXNSGF6XDQTWwNYV8JcXQmjWW2gCuRvXtkjUhxyHY9czMQpG4UMk70P8glgOEpLOprhDfMM%2FFHGZ2dCNmxWrdkwqEKrhrUOgaJ2pqn5A3SHetSvGPUoDKun8p%2FM%2BS69xpJ5GAkRAvQ9UuySW7l7kgiPOgyF60jwwfDPWGSAbjbNQ7pWssaQQ2kgt4PeOLZzshCiRmgr%2B51HN4wOouRjTjluv08HZE5WBBdZBCyKnCQAR7o6eg%3D; assortmentStoreId=3081; hasLocData=1; TB_Latency_Tracker_100=1; TB_Navigation_Preload_01=1; TB_SFOU-100=; bstc=bdAHA-WWcfXk0PMnP-uT0o; mobileweb=0; xpa=3Fi1g|3_gkh|3pRU7|4NCWH|55b29|5_9FA|DAwQd|Ecx7k|EjkLl|FYe-R|Hv6FZ|LTD5Y|LguYm|NbUbl|NoJl6|O1c3v|OuwKl|Pgtnl|Q-bGe|TMjj7|V0SkO|VAuQw|_hSAz|cL8HI|ccWng|cfVAR|duBe9|eEnay|eWARP|hGNr-|hPI48|hqy5q|jUi64|kFqfr|kLRY3|lQHtM|rdfjX|wGrec|zCylr; xpm=1%2B1656425217%2BShJZRQkr5ADHxW2cc6mpW0~%2B0; exp-ck=3_gkh24NCWH155b2925_9FA1DAwQd3Ecx7k1EjkLl1FYe-R1NbUbl1O1c3v1OuwKl1Pgtnl1V0SkO1_hSAz1cL8HI1ccWng2eEnay2eWARP1hGNr-1jUi641kFqfr1kLRY31lQHtM1; helpgql=1; _pxhd=f5c62b38667d2415146fc6f1cf93cdcc3327afc72fb672b6c09d362415e56f38:10811607-f238-11ec-a720-4e756c594d76; ak_bmsc=1BE44332F7F4F1A06C35DB50CE7A244A~000000000000000000000000000000~YAAQLcMTArBBZZeBAQAArhKjqhBsrms6EgHZfDiLKYf+xjOEF+/vrnzQPmoENr4GabQK+uGCmEd636jjgHyE5IXaf6eZM80f5m5gykIdVmdAcFD78W+uLxPV7zlVbetRHR4yQ3osOU9yTbAIxm5And82l8zx1c+OLZZDAn6cC8CKtnboNmXITB0mT+0BxrSMWr6FUUHNgN0BjPwCEW0NqdGPe6o57ttnUoJELTEeXnUdiAZB784srPtFJgZmxk8F6jADpYMaSrqGlGv+Uh2jVAHJRIYeQU3kcxbocc7/nJrGHHFJn0lbXOoltW7Qz8AFPIgHhHWP9/2COrQo/EWwZm/88zqCi9/l7kD7dtgabe1ICq/CgDz4rl41QKOe+lZYQVf9uqF0CrTfea/anOjVdEHEXZvaGq+803mSqffDitzIXTNDym8PnPeMFa4WJTqCF/apIJiT2rvLKPRgn2CsOlxCTkHlAuCUJG8JYWYGloDso0ULfjd/xEhxhe6CCy6HYwJ3E8fkVF8i5dG04U+o/URP05RiYZc8F+F1dMl91Q==; bm_sv=84EAF547030A90DB5BCD4679165B254D~YAAQLcMTAvZBZZeBAQAAthWjqhAc7AUJuL6t1BdztQNVwhWMVE5DVAXWjtGVZh8Xm0VgJ47u+ho0b3dkGGOwoGGOwbooZSpjcjxOOiEXIzTZgWRkQQeyTz5OB8ixRgE2Fqg1vQiCeOZkQWRmu+zj+P9ZwuNPusS4/dIStSfzYA4sDRQ6KPwQMqsMLFvPhDxjiR/ByNKugRT//CgOWQgb12FlDL5o/kJUQaRsUA00Dc9zKt/XUexTgCG91S5atV9iXQ==~1; locDataV3=eyJpc0RlZmF1bHRlZCI6ZmFsc2UsImlzRXhwbGljaXQiOmZhbHNlLCJpbnRlbnQiOiJTSElQUElORyIsInBpY2t1cCI6W3siYnVJZCI6IjAiLCJub2RlSWQiOiIzMDgxIiwiZGlzcGxheU5hbWUiOiJTYWNyYW1lbnRvIFN1cGVyY2VudGVyIiwibm9kZVR5cGUiOiJTVE9SRSIsImFkZHJlc3MiOnsicG9zdGFsQ29kZSI6Ijk1ODI5IiwiYWRkcmVzc0xpbmUxIjoiODkxNSBHZXJiZXIgUm9hZCIsImNpdHkiOiJTYWNyYW1lbnRvIiwic3RhdGUiOiJDQSIsImNvdW50cnkiOiJVUyIsInBvc3RhbENvZGU5IjoiOTU4MjktMDAwMCJ9LCJnZW9Qb2ludCI6eyJsYXRpdHVkZSI6MzguNDgyNjc3LCJsb25naXR1ZGUiOi0xMjEuMzY5MDI2fSwiaXNHbGFzc0VuYWJsZWQiOnRydWUsInNjaGVkdWxlZEVuYWJsZWQiOnRydWUsInVuU2NoZWR1bGVkRW5hYmxlZCI6dHJ1ZSwiaHViTm9kZUlkIjoiMzA4MSIsInN0b3JlSHJzIjoiMDY6MDAtMjM6MDAiLCJzdXBwb3J0ZWRBY2Nlc3NUeXBlcyI6WyJQSUNLVVBfQ1VSQlNJREUiLCJQSUNLVVBfSU5TVE9SRSJdfV0sInNoaXBwaW5nQWRkcmVzcyI6eyJsYXRpdHVkZSI6MzguNDc0NCwibG9uZ2l0dWRlIjotMTIxLjM0MzcsInBvc3RhbENvZGUiOiI5NTgyOSIsImNpdHkiOiJTYWNyYW1lbnRvIiwic3RhdGUiOiJDQSIsImNvdW50cnlDb2RlIjoiVVNBIiwiZ2lmdEFkZHJlc3MiOmZhbHNlfSwiYXNzb3J0bWVudCI6eyJub2RlSWQiOiIzMDgxIiwiZGlzcGxheU5hbWUiOiJTYWNyYW1lbnRvIFN1cGVyY2VudGVyIiwiYWNjZXNzUG9pbnRzIjpudWxsLCJzdXBwb3J0ZWRBY2Nlc3NUeXBlcyI6W10sImludGVudCI6IlBJQ0tVUCIsInNjaGVkdWxlRW5hYmxlZCI6ZmFsc2V9LCJkZWxpdmVyeSI6eyJidUlkIjoiMCIsIm5vZGVJZCI6IjMwODEiLCJkaXNwbGF5TmFtZSI6IlNhY3JhbWVudG8gU3VwZXJjZW50ZXIiLCJub2RlVHlwZSI6IlNUT1JFIiwiYWRkcmVzcyI6eyJwb3N0YWxDb2RlIjoiOTU4MjkiLCJhZGRyZXNzTGluZTEiOiI4OTE1IEdlcmJlciBSb2FkIiwiY2l0eSI6IlNhY3JhbWVudG8iLCJzdGF0ZSI6IkNBIiwiY291bnRyeSI6IlVTIiwicG9zdGFsQ29kZTkiOiI5NTgyOS0wMDAwIn0sImdlb1BvaW50Ijp7ImxhdGl0dWRlIjozOC40ODI2NzcsImxvbmdpdHVkZSI6LTEyMS4zNjkwMjZ9LCJpc0dsYXNzRW5hYmxlZCI6dHJ1ZSwic2NoZWR1bGVkRW5hYmxlZCI6dHJ1ZSwidW5TY2hlZHVsZWRFbmFibGVkIjp0cnVlLCJhY2Nlc3NQb2ludHMiOlt7ImFjY2Vzc1R5cGUiOiJERUxJVkVSWV9BRERSRVNTIn1dLCJodWJOb2RlSWQiOiIzMDgxIiwiaXNFeHByZXNzRGVsaXZlcnlPbmx5IjpmYWxzZSwic3VwcG9ydGVkQWNjZXNzVHlwZXMiOlsiREVMSVZFUllfQUREUkVTUyJdfSwiaW5zdG9yZSI6ZmFsc2UsInJlZnJlc2hBdCI6MTY1NjQ0NjgyMjk5NCwidmFsaWRhdGVLZXkiOiJwcm9kOnYyOjIyNjNiOWM2LTRlNWEtNDRjZS1hOWRhLTA1ZTAyOGE5YjhjNyJ9; _px3=fd4a806b205916413bf99a01e942ff9336232e851610f563e729bd8270721edd:bOVkbtqNZf6CXjv41nbl5RGLFNxuANcsgSOgoqttHpCpEzg2Mto0wjrxDFfh6zUSiA5wDDm5rTHKfX2lAiPg0Q==:1000:zDPkfwX/OSrZ75Ggjs1Krpm4L6f17sXBDANaE4TV+9j6Y6dnGPRddnxuV+8zV6iiq/iJexlqrtw3brpn59WivGDsHGwucjfO5cRyfNrUryok4xbUwr1yK/iAyP1t4vdvf8bS4jGOBM9xp8zMe44W7tOveajOsuF64IAfP4GPGNBGPUZkIE3I+bocrKQJA7sdD12/BOw6goT1VCddGHvnEQ==; QuantumMetricSessionID=18cfa4319b8ef1e79540e53bae1b9f4a; QuantumMetricUserID=86906ef3aba513b1593763543a679f7b; xptwg=3560776434:CF9364D4F670D8:21A39D6:281D1B4C:2C081154:A12BB2D4:; TS01b0be75=01538efd7cb337960d25ee6309ddbe2d9b73d42de4db21f151df666ad7ab7cde6a6e48b30e01e0ae9a71bdd492f4d3cefe3367d67f; TS013ed49a=01538efd7cb337960d25ee6309ddbe2d9b73d42de4db21f151df666ad7ab7cde6a6e48b30e01e0ae9a71bdd492f4d3cefe3367d67f; akavpau_p2=1656426003~id=7229bf055a987cf4ec0baf62877b3b53; _astc=f75dda122e22d06cf00905ef84d586f8; pxcts=f6d5b40e-f6eb-11ec-b18e-5746686f6a76; _pxff_cfp=1'
}
try:
response2 = requests.request("POST", url2, headers=headers2, data=payload2, proxies=proxies,timeout=10)
content2 = response2.json()
except Exception as e:
end_time = time.perf_counter()
continue
_now2 = datetime.datetime.now(tz=utc_tz)
try:
product_price = content2['data']['product']['priceInfo']['currentPrice']['price']
except Exception as e:
product_price = None
try:
product_gtin = content2['data']['product']['upc']
except Exception as e:
product_gtin = None
try:
pruduct_time = _now2.strftime("%Y-%m-%d %H:%M:%S")
_dict[i] = {
'page_num': page_num,
'product_gtin': product_gtin,
'store_id': store_id,
'product_price': product_price,
'zip_code': zip_code,
'pruduct_time': pruduct_time
}
with open(output_csv_file, "a", encoding='utf-8') as fw2:
new_line = f'{page_num},{product_gtin},{zip_code},{store_id},{product_price},{pruduct_time}\n'
fw2.write(new_line)
if output_csv_file not in csv_list:
csv_list.append(output_csv_file)
end_time = time.perf_counter()
except Exception as e:
end_time = time.perf_counter()
# print(_dict)
with open(output_json_file, "w", encoding='utf-8') as fw3:
fw3.write(json.dumps(_dict))
save_dict[num] = _dict
return _dict
if __name__ == '__main__':
# output_csv_file = 'output.csv'
if not os.path.exists('output'):
os.makedirs('output')
else:
pass
if not os.path.exists('backup'):
os.makedirs('backup')
else:
pass
utc_tz = pytz.timezone('UTC')
_now = datetime.datetime.now(tz=utc_tz)
ip_list = [
'45.142.28.83:8094',
'45.137.60.112:6640',
]
url_list = [
# 'https://www.walmart.com/browse/health/allergy-and-sinus/976760_3771182',
'https://www.walmart.com/ip/Allegra-Adult-24HR-Gelcaps-24-Ct-180-mg-Allergy-Relief/43819800',
'https://www.walmart.com/ip/Zyrtec-24-Hour-Allergy-Relief-Tablets-with-10-mg-Cetirizine-HCl-90-ct/224749468?athbdg=L1600',
'https://www.walmart.com/ip/Equate-Maximum-Strength-Severe-Allergy-Plus-Sinus-Headache-Caplets-20-Count/14053317'
]
zip_code_list = [
10003,
48104
]
start_time = time.perf_counter()
save_dict = {}
thread_num = 1
one_thread_url_num = 3
pool = threadpool.ThreadPool(thread_num)
param_list = []
csv_list = []
for i in range(thread_num):
save_dict[i + 1] = {}
start_url_num = i * one_thread_url_num
end_url_num = start_url_num + one_thread_url_num
if end_url_num >= len(url_list):
param_list.append(([url_list[start_url_num:], zip_code_list, ip_list, _now, save_dict, i + 1,
csv_list, utc_tz], None))
print("start_url_num", start_url_num, end_url_num)
else:
param_list.append(([url_list[start_url_num:end_url_num], zip_code_list, ip_list, _now, save_dict, i + 1,
csv_list, utc_tz], None))
print("start_url_num", start_url_num, len(url_list))
tasks = threadpool.makeRequests(main, param_list)
[pool.putRequest(task) for task in tasks]
pool.wait()
2条答案
按热度按时间myss37ts1#
我不知道这是否有帮助,但他们使用了大量的cookie。以下是我的系统,但你必须检查你的代理,如果它仍然工作:
2uluyalo2#
对于Walmart scraping,您可以使用来自第三方API SerpApi的Walmart Search Engine Results API替代品。这是一个免费的付费API。
它将绕过来自Google和其他搜索引擎的屏蔽(包括CAPTCHA),并且不需要创建解析器和维护它。
每家商店都有自己的
Location cookies
。你可以在scrape沃尔玛搜索特定商店的博客文章中了解更多。您可以使用JSON list of supported Walmart Stores从任何商店获取信息(总共4,640个):
为了收集所有页面的信息,你需要使用分页,在我们的例子中使用
while
循环:检查在线IDE中的代码。
输出示例:
有一篇SerpApi Demo Project: Walmart Coffee Exploratory Data Analysis博客文章展示了使用分页(在每个商店内部)从500家商店中提取咖啡列表数据。
免责声明我为SerpApi工作。