scrapy 出现临时错误“请求生成失败”

siotufzp  于 2022-11-09  发布在  其他
关注(0)|答案(1)|浏览(206)

Pycharm venv 中 , 我 使用 pyinstaller 创建 了 一 个 GUI ,其 在 具有 Popen 的 文件 上 调用 scrapy 。 从 终端 Popen 调用 scrapy , 并且 成功 地 进行 了 刮 取 。 在 打包 后 , gui 被 打开 , 但是 Popen 的 stderr 告诉 scrapy not found 。我 在 Githubissue 上 打开 的 一 个 问题 帮助 我 发现 pyinstaller 使用 的 是 user 包 而 不是 venv 。我 通过 为 user 安装 scrapy 解决 了 这个 问题 , 在 打包 pyinstaller 后 , 构建 的 gui 调用 了 scrapy 。 仍然 不 知道 为什么 pyinstaller 没有 " t 使用 venv 包 。
但 现在 我 又 犯 了 个 小 错误

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/twisted/internet/defer.py", line 62, in run
    return f(*args,**kwargs)
  File "/usr/local/lib/python3.6/dist-packages/scrapy/core/downloader/middleware.py", line 49, in process_request
    return (yield download_func(request=request, spider=spider))
twisted.web._newclient.RequestGenerationFailed: [<twisted.python.failure.Failure builtins.AttributeError: __enter__>]

中 的 每 一 个
即使 我 以 用户 的 身份 运行 Scrapy ( 在 venv 之外 ) , 在 终端 中 我 也 会 得到 这个 错误 。 在 终端 中 运行 的 venv 内部 一切 都 很 好 。
这 也 是 从 pyinstaller 打包 的 理想 方式 吗 ? 也 就是 说 , 在 用户 中 安装 软件 包 , 以便 pyinstaller 可以 获得 它们 ?
桂 码

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
import shlex
from subprocess import Popen
import json

def get_url():
    #printing Entry url to a file

harvest = None

def watch():
    global harvest
    if harvest:
        if harvest.poll() != None:
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

        harvest = None

        else:
            # indicate that process is running.
            progress_bar.grid()
            progress_bar.start(10)
            root.after(100, watch)

def scrape():
    global harvest
    command_line = shlex.split('scrapy runspider ./scrape.py')
    with open ('stdout.txt', 'wb') as out, open('stderr', 'wb') as err:
        harvest = Popen(command_line, stdout=out, stderr=err)
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=lambda: [get_url(), scrape()])
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

格式
可 复制 的 代码

import scrapy
import json

class ImgSpider(scrapy.Spider):
    name = 'img'

    #allowed_domains = [user_domain]
    start_urls = ['xyz']

    def parse(self, response):
        title = response.css('img::attr(alt)').getall()
        links = response.css('img::attr(src)').getall()

        with open('../images/urls.txt', 'w') as f:
            for i in title:
                f.write(i)
            f.close

格式

qfe3c7zg

qfe3c7zg1#

我能让它工作。
重现步骤...
打开一个新的目录并启动一个新的python虚拟环境,并更新pip install scrapy和pyinstaller到虚拟环境中。
在新目录中创建两个python脚本...我的脚本是main.pyscrape.py

主文件名.py

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
import shlex
import os
import scrapy
from subprocess import Popen
import json

def get_path(name):
    return os.path.join(os.path.dirname(__file__),name).replace("\\","/")

harvest = None

def watch():
    global harvest
    if harvest:
        if harvest.poll() != None:
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

        harvest = None

    else:
        # indicate that process is running.
        progress_bar.grid()
        progress_bar.start(10)
        root.after(100, watch)

def scrape():
    global harvest
    command_line = shlex.split('scrapy runspider ' + get_url('scrape.py'))
    with open ('stdout.txt', 'wb') as out, open('stderr.txt', 'wb') as err:
        harvest = Popen(command_line, stdout=out, stderr=err)
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=scrape)
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

刮擦.py

import scrapy
import os

class ImgSpider(scrapy.Spider):
    name = 'img'

    #allowed_domains = [user_domain]
    start_urls = ['https://www.bbc.com/news/in_pictures']  # i just used this for testing.

    def parse(self, response):
        title = response.css('img::attr(alt)').getall()
        links = response.css('img::attr(src)').getall()

        if not os.path.exists('./images'):
            os.makedirs('./images')
        with open('./images/urls.txt', 'w') as f:
            for i in title:
                f.write(i)
            f.close
        yield {"title": title, "links": links}

然后运行pyinstaller -F main.py,它将生成一个main.spec文件。打开该文件并对该文件进行这些更改。

主要规格


# -*- mode: python ; coding: utf-8 -*-

block_cipher = None
import os

scrape = "scrape.py"
imagesdir = "images"  

a = Analysis(
    ['main.py'],
    pathex=[],
    binaries=[],
    datas=[(scrape,'.'), (imagesdir,'.')],  # add these lines
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.zipfiles,
    a.datas,
    [],
    name='main',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,  # Once you have confirmed it is working you can set this to false
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)

然后一旦这一切都完成了。回到你的终端,运行pyinstaller main.spec,和bobs你的叔叔...

更新

main.py -
实际上,我只是删除了shlex部分,并使www.example.com的路径scrape.py相对于main.py文件路径。

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
from subprocess import Popen
import json
import os

def get_url():
    print('Getting URL...')
    data = url.get()
    if not os.path.exists('./data'):
        os.makedirs('./data')
    with open('./data/url.json', 'w') as f:
        json.dump(data, f)

harvest = None

def watch():
    global harvest
    print('watch started')
    if harvest:
        if harvest.poll() != None:
            print('progress bar ends')
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

            # Maybe report harvest.returncode?
            print(f'harvest return code if Poll !None =--######==== {harvest.returncode}')
            print(f'harvest poll =--######==== {harvest.poll}')
            # Re-schedule `watch` to be called again after 0.1 s.
            harvest = None

        else:
            # indicate that process is running.
            print('progress bar starts')
            progress_bar.grid()
            progress_bar.start(10)
            print(f'harvest return code =--######==== {harvest.returncode}')
            root.after(100, watch)

def scrape():
    global harvest
    scrapefile = os.path.join(os.path.dirname(__file__),'scrape.py')
    # harvest = Popen(command_line)
    with open ('stdout.txt', 'wb') as out, open('stderr.txt', 'wb') as err:
        # harvest = Popen('scrapy runspider ./scrape.py', stdout=out, stderr=err, shell=True)
        harvest = Popen(["python3", scrapefile], stdout=out, stderr=err)
        out.close(), err.close()
    print('harvesting started')
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=lambda: [get_url(), scrape()])
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

main.spec


# -*- mode: python ; coding: utf-8 -*-

block_cipher = None
a = Analysis(['main.py'], pathex=[], binaries=[],
    datas=[('scrape.py','.')],   # <------- this is the only change that I made
    hiddenimports=[], hookspath=[],
    hooksconfig={}, runtime_hooks=[], excludes=[],
    win_no_prefer_redirects=False, win_private_assemblies=False,
    cipher=block_cipher, noarchive=False,)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(pyz, a.scripts, a.binaries, a.zipfiles, a.datas, [],
    name='main', debug=False, bootloader_ignore_signals=False, strip=False,
    upx=True, upx_exclude=[], runtime_tmpdir=None, console=False,
    disable_windowed_traceback=False, argv_emulation=False, target_arch=None,
    codesign_identity=None, entitlements_file=None,)

我没有对www.example.com进行任何更改scrape.py

相关问题