scrapy 出现临时错误“请求生成失败”

在 Pycharm venv 中，我使用 pyinstaller 创建了一个 GUI ，其在具有 Popen 的文件上调用 scrapy 。从终端 Popen 调用 scrapy ，并且成功地进行了刮取。在打包后， gui 被打开，但是 Popen 的 stderr 告诉 scrapy not found 。我在 Githubissue 上打开的一个问题帮助我发现 pyinstaller 使用的是 user 包而不是 venv 。我通过为 user 安装 scrapy 解决了这个问题，在打包 pyinstaller 后，构建的 gui 调用了 scrapy 。仍然不知道为什么 pyinstaller 没有 " t 使用 venv 包。
但现在我又犯了个小错误

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/twisted/internet/defer.py", line 62, in run
    return f(*args,**kwargs)
  File "/usr/local/lib/python3.6/dist-packages/scrapy/core/downloader/middleware.py", line 49, in process_request
    return (yield download_func(request=request, spider=spider))
twisted.web._newclient.RequestGenerationFailed: [<twisted.python.failure.Failure builtins.AttributeError: __enter__>]

中的每一个
即使我以用户的身份运行 Scrapy （在 venv 之外），在终端中我也会得到这个错误。在终端中运行的 venv 内部一切都很好。
这也是从 pyinstaller 打包的理想方式吗？也就是说，在用户中安装软件包，以便 pyinstaller 可以获得它们？
桂码

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
import shlex
from subprocess import Popen
import json

def get_url():
    #printing Entry url to a file

harvest = None

def watch():
    global harvest
    if harvest:
        if harvest.poll() != None:
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

        harvest = None

        else:
            # indicate that process is running.
            progress_bar.grid()
            progress_bar.start(10)
            root.after(100, watch)

def scrape():
    global harvest
    command_line = shlex.split('scrapy runspider ./scrape.py')
    with open ('stdout.txt', 'wb') as out, open('stderr', 'wb') as err:
        harvest = Popen(command_line, stdout=out, stderr=err)
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=lambda: [get_url(), scrape()])
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

格式
可复制的代码

import scrapy
import json

class ImgSpider(scrapy.Spider):
    name = 'img'

    #allowed_domains = [user_domain]
    start_urls = ['xyz']

    def parse(self, response):
        title = response.css('img::attr(alt)').getall()
        links = response.css('img::attr(src)').getall()

        with open('../images/urls.txt', 'w') as f:
            for i in title:
                f.write(i)
            f.close

格式

我能让它工作。
重现步骤...
打开一个新的目录并启动一个新的python虚拟环境，并更新pip install scrapy和pyinstaller到虚拟环境中。
在新目录中创建两个python脚本...我的脚本是main.py和scrape.py

主文件名.py

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
import shlex
import os
import scrapy
from subprocess import Popen
import json

def get_path(name):
    return os.path.join(os.path.dirname(__file__),name).replace("\\","/")

harvest = None

def watch():
    global harvest
    if harvest:
        if harvest.poll() != None:
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

        harvest = None

    else:
        # indicate that process is running.
        progress_bar.grid()
        progress_bar.start(10)
        root.after(100, watch)

def scrape():
    global harvest
    command_line = shlex.split('scrapy runspider ' + get_url('scrape.py'))
    with open ('stdout.txt', 'wb') as out, open('stderr.txt', 'wb') as err:
        harvest = Popen(command_line, stdout=out, stderr=err)
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=scrape)
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

刮擦.py

import scrapy
import os

class ImgSpider(scrapy.Spider):
    name = 'img'

    #allowed_domains = [user_domain]
    start_urls = ['https://www.bbc.com/news/in_pictures']  # i just used this for testing.

    def parse(self, response):
        title = response.css('img::attr(alt)').getall()
        links = response.css('img::attr(src)').getall()

        if not os.path.exists('./images'):
            os.makedirs('./images')
        with open('./images/urls.txt', 'w') as f:
            for i in title:
                f.write(i)
            f.close
        yield {"title": title, "links": links}

然后运行pyinstaller -F main.py，它将生成一个main.spec文件。打开该文件并对该文件进行这些更改。

主要规格


# -*- mode: python ; coding: utf-8 -*-

block_cipher = None
import os

scrape = "scrape.py"
imagesdir = "images"  

a = Analysis(
    ['main.py'],
    pathex=[],
    binaries=[],
    datas=[(scrape,'.'), (imagesdir,'.')],  # add these lines
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    win_no_prefer_redirects=False,
    win_private_assemblies=False,
    cipher=block_cipher,
    noarchive=False,
)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.zipfiles,
    a.datas,
    [],
    name='main',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,  # Once you have confirmed it is working you can set this to false
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
)

然后一旦这一切都完成了。回到你的终端，运行pyinstaller main.spec，和bobs你的叔叔...

更新

main.py -
实际上，我只是删除了shlex部分，并使www.example.com的路径scrape.py相对于main.py文件路径。

import tkinter as tk
from tkinter import messagebox as tkms
from tkinter import ttk
from subprocess import Popen
import json
import os

def get_url():
    print('Getting URL...')
    data = url.get()
    if not os.path.exists('./data'):
        os.makedirs('./data')
    with open('./data/url.json', 'w') as f:
        json.dump(data, f)

harvest = None

def watch():
    global harvest
    print('watch started')
    if harvest:
        if harvest.poll() != None:
            print('progress bar ends')
            # Update your progressbar to finished.
            progress_bar.stop()
            #if harvest finishes OK then show confirmation message otherwise show error.
            if harvest.returncode == 0:
                mes = tkms.showinfo(title='progress', message='Scraping Done')
                if mes == 'ok':
                    root.destroy()
            else:
                tkms.showinfo(title='Error', message=f'harvest returncode == {harvest.returncode}')

            # Maybe report harvest.returncode?
            print(f'harvest return code if Poll !None =--######==== {harvest.returncode}')
            print(f'harvest poll =--######==== {harvest.poll}')
            # Re-schedule `watch` to be called again after 0.1 s.
            harvest = None

        else:
            # indicate that process is running.
            print('progress bar starts')
            progress_bar.grid()
            progress_bar.start(10)
            print(f'harvest return code =--######==== {harvest.returncode}')
            root.after(100, watch)

def scrape():
    global harvest
    scrapefile = os.path.join(os.path.dirname(__file__),'scrape.py')
    # harvest = Popen(command_line)
    with open ('stdout.txt', 'wb') as out, open('stderr.txt', 'wb') as err:
        # harvest = Popen('scrapy runspider ./scrape.py', stdout=out, stderr=err, shell=True)
        harvest = Popen(["python3", scrapefile], stdout=out, stderr=err)
        out.close(), err.close()
    print('harvesting started')
    watch()

root = tk.Tk()
root.title("Title")

url = tk.StringVar(root)

entry1 = tk.Entry(root, width=90, textvariable=url)
entry1.grid(row=0, column=0, columnspan=3)

my_button = tk.Button(root, text="Process", command=lambda: [get_url(), scrape()])
my_button.grid(row=2, column=2)

progress_bar = ttk.Progressbar(root, orient=tk.HORIZONTAL, length=300, mode='indeterminate')
progress_bar.grid(row=3, column=2)
progress_bar.grid_forget()

root.mainloop()

main.spec


# -*- mode: python ; coding: utf-8 -*-

block_cipher = None
a = Analysis(['main.py'], pathex=[], binaries=[],
    datas=[('scrape.py','.')],   # <------- this is the only change that I made
    hiddenimports=[], hookspath=[],
    hooksconfig={}, runtime_hooks=[], excludes=[],
    win_no_prefer_redirects=False, win_private_assemblies=False,
    cipher=block_cipher, noarchive=False,)
pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(pyz, a.scripts, a.binaries, a.zipfiles, a.datas, [],
    name='main', debug=False, bootloader_ignore_signals=False, strip=False,
    upx=True, upx_exclude=[], runtime_tmpdir=None, console=False,
    disable_windowed_traceback=False, argv_emulation=False, target_arch=None,
    codesign_identity=None, entitlements_file=None,)

我没有对www.example.com进行任何更改scrape.py

scrapy 出现临时错误“请求生成失败”

1条答案

更新

相关问题

热门标签

最新问答