在C++中使用zlib创建zip归档文件时,遇到解压缩时出现“error invalid compressed data to inflate”的问题

46scxncf  于 2023-07-01  发布在  其他
关注(0)|答案(1)|浏览(796)

我的目标是只使用zlib从零开始创建一个zip压缩包。我已经走得很远了,当我不压缩数据并设置适当的头标志时,我可以创建一个归档文件。但是,当我将压缩数据添加到归档文件中并尝试使用unzip解压缩时,它会显示错误invalid compressed data to inflate
我已经尝试从创建的存档中读取文件并解压缩文件数据,以检查压缩是否确实损坏。不过,我可以解压缩所有的文件数据。此测试部分位于create函数的底部。
我也尝试过用其他工具甚至是在线zip解压工具来解压存档,但它们都失败了。
目前这个例子不支持ZIP 64,因为我想先让它工作,然后从那里扩展。
我知道这不是一个完全可复制的例子,但我不能包括所有必要的代码,我真的坚持与此代码。尽管所有未包含但实现的代码都经过了彻底的测试。Array类的工作方式与std::vector类似。String类的工作方式也很像std::stringFile类只是FILE*的 Package 器。而Compression类只使用zlib压缩数据。这些类中的每一个都经过了彻底的测试,并且工作正常。SICEstatic inline constexpr的快捷方式。
这是用于压缩归档文件的结构。

struct Zip {

// Private.
private:
    
    // ---------------------------------------------------------
    // Structs.
    
    struct Entry {
        Path    sub_path;
        String  data;
        String  compressed;
    };
    
    struct FileHeader {
        SICE uint32_t   signature = 0x04034b50;
        uint16_t        version = 20;
        uint16_t        general_flag = 0;
        uint16_t        compression_method = 8; // deflated.
        uint16_t        mod_time = 0;
        uint16_t        mod_date = 0;
        uint32_t        crc = 0; // TO ASSIGN.
        uint32_t        compressed_len; // TO ASSIGN.
        uint32_t        uncompressed_len; // TO ASSIGN.
        uint16_t        name_len; // TO ASSIGN.
        uint16_t        extra_field_len = 0;
        
        // Equals (for debugging).
        constexpr friend
        bool    operator ==(const FileHeader& x, const FileHeader& y) {
            return
            // x.signature == y.signature &&
            x.version == y.version &&
            x.general_flag == y.general_flag &&
            x.compression_method == y.compression_method &&
            x.mod_time == y.mod_time &&
            x.mod_date == y.mod_date &&
            x.crc == y.crc &&
            x.compressed_len == y.compressed_len &&
            x.uncompressed_len == y.uncompressed_len &&
            x.name_len == y.name_len &&
            x.extra_field_len == y.extra_field_len;
        }
            
        // Dump to pipe (for debugging).
        constexpr friend
        auto&   operator <<(Pipe& pipe, const FileHeader& obj) {
            return pipe << "FileHeader(\n"
            "    signature: " << obj.signature << ",\n" <<
            "    version: " << obj.version << ",\n" <<
            "    general_flag: " << obj.general_flag << ",\n" <<
            "    compression_method: " << obj.compression_method << ",\n" <<
            "    mod_time: " << obj.mod_time << ",\n" <<
            "    mod_date: " << obj.mod_date << ",\n" <<
            "    crc: " << obj.crc << ",\n" <<
            "    compressed_len: " << obj.compressed_len << ",\n" <<
            "    uncompressed_len: " << obj.uncompressed_len << ",\n" <<
            "    name_len: " << obj.name_len << ",\n" <<
            "    extra_field_len: " << obj.extra_field_len <<
            ")";
        }
    };
    
    struct CentralDirHeader {
        SICE uint32_t   signature = 0x02014b50;
        uint16_t        made_version = 45;
        uint16_t        version = 20;
        uint16_t        general_flag = 0;
        uint16_t        compression_method = 8; // deflated.
        uint16_t        mod_time = 0;
        uint16_t        mod_date = 0;
        uint32_t        crc = 0; // TO ASSIGN.
        uint32_t        compressed_len; // TO ASSIGN.
        uint32_t        uncompressed_len; // TO ASSIGN.
        uint16_t        name_len; // TO ASSIGN.
        uint16_t        extra_field_len = 0;
        uint16_t        comment_len = 0;
        uint16_t        disk = 0;
        uint16_t        internal_file_attr = 0;
        uint32_t        external_file_attr = 0;
        uint32_t        relative_offset = 0;
    };
    
    struct EndOfCentralDir {
        SICE uint32_t   signature = 0x06054b50;
        uint16_t        disk = 0;
        uint16_t        start_central_disk = 0;
        uint16_t        start_disk_entries; // TO ASSIGN.
        uint16_t        entries; // TO ASSIGN.
        uint32_t        central_size; // TO ASSIGN.
        uint32_t        relative_offset; // TO ASSIGN
        uint16_t        comment_len = 0;
    };

    // ---------------------------------------------------------
    // Attributes.

    Path    m_path;
    
    // ---------------------------------------------------------
    // Private functions.
    
    // Compute CRC-32 checksum.
    static
    uint32_t compute_crc32(const char* data, const ullong& len) {
        uLong crc = crc32(0L, Z_NULL, 0);
        crc = crc32(crc, (uchar*) data, len);
        return (uint32_t) crc;
    }

    // Write local file header for a file entry
    void    write_file_header(File& archive, const Entry& entry, const FileHeader& header) const {
        
        // Write header.
        archive.write((char*) &header.signature, sizeof(header.signature));
        archive.write((char*) &header.version, sizeof(header.version));
        archive.write((char*) &header.general_flag, sizeof(header.general_flag));
        archive.write((char*) &header.compression_method, sizeof(header.compression_method));
        archive.write((char*) &header.mod_time, sizeof(header.mod_time));
        archive.write((char*) &header.mod_date, sizeof(header.mod_date));
        archive.write((char*) &header.crc, sizeof(header.crc));
        archive.write((char*) &header.compressed_len, sizeof(header.compressed_len));
        archive.write((char*) &header.uncompressed_len, sizeof(header.uncompressed_len));
        archive.write((char*) &header.name_len, sizeof(header.name_len));
        archive.write((char*) &header.extra_field_len, sizeof(header.extra_field_len));
        
        // File name
        archive.write(entry.sub_path.c_str(), entry.sub_path.len());
        
        // Flush to file.
        archive.flush();
        
    }

    // Write central directory file header for a file entry
    void    write_central_dir_header(File& archive, const Entry& entry, const CentralDirHeader& header) const {
        
        // Write header.
        archive.write((char*) &header.signature, sizeof(header.signature));
        archive.write((char*) &header.made_version, sizeof(header.made_version));
        archive.write((char*) &header.version, sizeof(header.version));
        archive.write((char*) &header.general_flag, sizeof(header.general_flag));
        archive.write((char*) &header.compression_method, sizeof(header.compression_method));
        archive.write((char*) &header.mod_time, sizeof(header.mod_time));
        archive.write((char*) &header.mod_date, sizeof(header.mod_date));
        archive.write((char*) &header.crc, sizeof(header.crc));
        archive.write((char*) &header.compressed_len, sizeof(header.compressed_len));
        archive.write((char*) &header.uncompressed_len, sizeof(header.uncompressed_len));
        archive.write((char*) &header.name_len, sizeof(header.name_len));
        archive.write((char*) &header.extra_field_len, sizeof(header.extra_field_len));
        archive.write((char*) &header.comment_len, sizeof(header.comment_len));
        archive.write((char*) &header.disk, sizeof(header.disk));
        archive.write((char*) &header.internal_file_attr, sizeof(header.internal_file_attr));
        archive.write((char*) &header.external_file_attr, sizeof(header.external_file_attr));
        archive.write((char*) &header.relative_offset, sizeof(header.relative_offset));

        // File name
        archive.write(entry.sub_path.c_str(), entry.sub_path.len());
        
        // Flush to file.
        archive.flush();
        
    }

    // Write end of central directory record
    void    write_end_of_central_dir(File& archive, const EndOfCentralDir& header) const {
        
        // Write header.
        archive.write((char*) &header.signature, sizeof(header.signature));
        archive.write((char*) &header.disk, sizeof(header.disk));
        archive.write((char*) &header.start_central_disk, sizeof(header.start_central_disk));
        archive.write((char*) &header.start_disk_entries, sizeof(header.start_disk_entries));
        archive.write((char*) &header.entries, sizeof(header.entries));
        archive.write((char*) &header.central_size, sizeof(header.central_size));
        archive.write((char*) &header.relative_offset, sizeof(header.relative_offset));
        archive.write((char*) &header.comment_len, sizeof(header.comment_len));
        
        // Flush to file.
        archive.flush();
        
    }
    
// Public.
public:
    // ---------------------------------------------------------
    // Constructor.

    // Default constructor.
    constexpr
    Zip() = default;
    
    // Constructor from path.
    constexpr
    Zip(const Path& path) :
    m_path(path) {}

    // Copy constructor.
    constexpr
    Zip(const Zip& obj) :
    m_path(obj.m_path) {}

    // Move constructor.
    constexpr
    Zip(Zip&& obj) :
    m_path(move(obj.m_path)) {}

    // ---------------------------------------------------------
    // Functions.

    // Compress data.
    /*  @docs {
     *  @title: Create
     *  @description:
     *      Create a zip archive from a file or directory.
     *  @parameter: {
     *      @name: _source
     *      @description: The source file or directory.
     *  }
     *  @usage:
     *      vlib::Zip zip("/tmp/zip.archive");
     *      zip.create("/tmp/dir/");
     } */
    void    create(const Path& _source) const {
        
        // Vars.
        Path                    source = _source; // Make non const for certain funcs
        File                    output (m_path);
        Compression             compression (Z_BEST_COMPRESSION);
        Array<Entry>            entries;
        Array<FileHeader>       file_headers;
        Array<CentralDirHeader> central_dir_headers;
        EndOfCentralDir         end_of_central_dir;
        
        // Remove & check.
        if (m_path.exists()) {
            m_path.remove();
        }
        if (!source.exists()) {
            throw exceptions::FileNotFoundError("File \"", source, "\" does not exist.");
        }
        
        // Open output file.
        output.close();
        output.open();
        
        // Path is a file.
        if (source.is_file()) {
            throw exceptions::CreateError("TODO.");
        }
        
        // Path is a dir.
        else {
            
            // Vars.
            
            // Create entries.
            const ullong slice = source.len() + 1;
            
            for (auto& path: source.paths()) {
                
                // Skip.
                if (path.is_dir()) {
                    continue;
                }
                
                // Vars.
                String sub_path = path.slice(slice);
                String data = path.load();
                String compressed = compression.compress(data);
                uint32_t crc = compute_crc32(data.data(), data.len());
                
                // Append.
                file_headers.append(FileHeader {
                    .crc = (uint32_t) crc,
                    .compressed_len = (uint32_t) compressed.len(),
                    .uncompressed_len = (uint32_t) data.len(),
                    .name_len = (uint16_t) sub_path.len(),
                });
                central_dir_headers.append(CentralDirHeader {
                    .crc = (uint32_t) crc,
                    .compressed_len = (uint32_t) compressed.len(),
                    .uncompressed_len = (uint32_t) data.len(),
                    .name_len = (uint16_t) sub_path.len(),
                });
                entries.append(Entry {
                    .sub_path = move(sub_path),
                    .data = move(data),
                    .compressed = move(compressed),
                });
                
            }
            
            // Write files.
            for (auto& index: entries.indexes()) {
                central_dir_headers[index].relative_offset = ftell(output.file());
                write_file_header(output, entries[index], file_headers[index]);
                output.write(entries[index].compressed.data(), entries[index].compressed.len());
                // output.write(entries[index].data.data(), entries[index].data.len());
            }
            
            // Write central dir.
            end_of_central_dir.relative_offset = ftell(output.file());
            for (auto& index: entries.indexes()) {
                write_central_dir_header(output, entries[index], central_dir_headers[index]);
            }
            end_of_central_dir.start_disk_entries = entries.len();
            end_of_central_dir.entries = entries.len();
            end_of_central_dir.central_size = ftell(output.file()) - end_of_central_dir.relative_offset;
            write_end_of_central_dir(output, end_of_central_dir);
            
        }
        
        // Close.
        output.close();
        
        // Read output to check if the written data can be decompressed.
        // Which works.
        String data = Path::load(m_path);
        ullong pos = 0, file_header_index = 0;
        constexpr uint signature_len = sizeof(uint32_t);
        while (pos + signature_len <= data.len()) {
            const uint32_t signature = *((uint32_t*) &data[pos]);
            
            // File header.
            if (signature == FileHeader::signature) {
                print("Found file header at ", pos, ".");
                
                FileHeader header;
                pos += sizeof(uint32_t); // skip signature.
                
                header.version = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.general_flag = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.compression_method = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.mod_time = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.mod_date = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.crc = *((uint32_t*) &data[pos]);
                pos += sizeof(uint32_t);
                
                header.compressed_len = *((uint32_t*) &data[pos]);
                pos += sizeof(uint32_t);
                
                header.uncompressed_len = *((uint32_t*) &data[pos]);
                pos += sizeof(uint32_t);
                
                header.name_len = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                header.extra_field_len = *((uint16_t*) &data[pos]);
                pos += sizeof(uint16_t);
                
                String sub_path (&data[pos], header.name_len);
                pos += header.name_len;
                
                String compressed (&data[pos], header.compressed_len);
                compression.m_level = header.compression_method;
                String raw = compression.decompress(compressed);
                
                // They are all equal the original.
                print("HEADER EQ = ", header == file_headers[file_header_index]);
                print("SUBPATH EQ = ", sub_path == entries[file_header_index].sub_path);
                print("DATA EQ = ", raw == entries[file_header_index].data);
                ++file_header_index;
            }
            
            // Central dir header.
            else if (signature == CentralDirHeader::signature) {
                print("Found central dir header at ", pos, ".");
            }
            
            // End of central dir header.
            else if (signature == EndOfCentralDir::signature) {
                print("Found end of central dir header at ", pos, ".");
            }
            pos += 1;
        }
        
    }

};

zip是用创建的。

Zip zip("/tmp/archive.zip");
zip.create("/tmp/dir/");

编辑。

这是Zip使用的压缩函数。m_level用于zlib压缩级别,默认情况下Z_BEST_COMPRESSIONm_limit用于最大可压缩字节数。

String  compress(const char* data, const ullong& len) const {
    if (len == 0) { return String(); }
    
    // Init.
    // if (output.is_undefined()) {
    //  output.alloc(len / 2);
    // }

    // Verify if len input will fit into uint, type used for zlib's avail_in
    if (len > m_limit) {
        throw exceptions::LimitError(max_len_err);
    }

    // Verify length.
    if (m_max != -1 && len > (Length) m_max) {
        throw exceptions::LimitError(max_len_err);
    }

    z_stream deflate_s;
    deflate_s.zalloc = Z_NULL;
    deflate_s.zfree = Z_NULL;
    deflate_s.opaque = Z_NULL;
    deflate_s.avail_in = 0;
    deflate_s.next_in = Z_NULL;

    // The windowBits parameter is the base two logarithm of the window size (the size of the history buffer).
    // It should be in the range 8..15 for this version of the library.
    // Larger values of this parameter result in better compression at the expense of memory usage.
    // This range of values also changes the decoding type:
    //  -8 to -15 for raw deflate
    //  8 to 15 for zlib
    // (8 to 15) + 16 for gzip
    // (8 to 15) + 32 to automatically detect gzip/zlib header (decompression/inflate only)
    constexpr int window_bits = 15 + 16; // gzip with windowbits of 15

    constexpr int mem_level = 8;
    // The memory requirements for deflate are (in bytes):
    // (1 << (window_bits+2)) +  (1 << (mem_level+9))
    // with a default value of 8 for mem_level and our window_bits of 15
    // this is 128Kb

    #pragma GCC diagnostic push
    #pragma GCC diagnostic ignored "-Wold-style-cast"
    if (deflateInit2(&deflate_s, m_level, Z_DEFLATED, window_bits, mem_level, Z_DEFAULT_STRATEGY) != Z_OK) {
        throw exceptions::DeflateError(deflate_err);
    }
    #pragma GCC diagnostic pop

    deflate_s.next_in = reinterpret_cast<z_const Bytef*>((char*) data);
    deflate_s.avail_in = static_cast<uint>(len);
    String output;
    do {
        Length increase = len / 2 + 1024;
        output.expand(increase);

        // There is no way we see that "increase" would not fit in an uint,
        // hence we use static cast here to avoid -Wshorten-64-to-32 error
        deflate_s.avail_out = static_cast<uint>(increase);
        //deflate_s.next_out = reinterpret_cast<Bytef*>((&output.data()[0] + output.len()));
        deflate_s.next_out = reinterpret_cast<Bytef*>((output.data() + output.len()));

        // From http://www.zlib.net/zlib_how.html
        // "deflate() has a return value that can indicate errors, yet we do not check it here.
        // Why not? Well, it turns out that deflate() can do no wrong here."
        // Basically only possible error is from deflateInit not working properly
        deflate(&deflate_s, Z_FINISH);
        output.len() += (increase - deflate_s.avail_out);
    } while (deflate_s.avail_out == 0);
    deflateEnd(&deflate_s);
    return output;
}

我使用的是MacOS 13. 4和解压6. 00(我也尝试过较新的版本,但结果是同样的问题)。
下面是一些zipinfo / unzip日志。

$ unzip -t ../archive.zip
Archive:  ../archive.zip
    testing: install                 
  error:  invalid compressed data to inflate
    testing: README.md               
  error:  invalid compressed data to inflate
At least one error was detected in ../archive.zip.
$ zipinfo ../archive.zip
Archive:  ../archive.zip
Zip file size: 1142 bytes, number of entries: 2
-rw----     4.5 fat     1242 b- defN 80-000-00 00:00 install
-rw----     4.5 fat      917 b- defN 80-000-00 00:00 README.md
2 files, 2159 bytes uncompressed, 936 bytes compressed:  56.6%
$ zipinfo -v ../archive.zip
Archive:  ../archive.zip
There is no zipfile comment.

End-of-central-directory record:
-------------------------------

  Zip archive file size:                      1142 (0000000000000476h)
  Actual end-cent-dir record offset:          1120 (0000000000000460h)
  Expected end-cent-dir record offset:        1120 (0000000000000460h)
  (based on the length of the central directory and its expected offset)

  This zipfile constitutes the sole disk of a single-part archive; its
  central directory contains 2 entries.
  The central directory is 108 (000000000000006Ch) bytes long,
  and its (expected) offset in bytes from the beginning of the zipfile
  is 1012 (00000000000003F4h).

Central directory entry #1:
---------------------------

  install

  offset of local header from start of archive:   0
                                                  (0000000000000000h) bytes
  file system or operating system of origin:      MS-DOS, OS/2 or NT FAT
  version of encoding software:                   4.5
  minimum file system compatibility required:     MS-DOS, OS/2 or NT FAT
  minimum software version required to extract:   2.0
  compression method:                             deflated
  compression sub-type (deflation):               normal
  file security status:                           not encrypted
  extended local header:                          no
  file last modified on (DOS date/time):          1980 000 0 00:00:00
  32-bit CRC value (hex):                         440cf502
  compressed size:                                424 bytes
  uncompressed size:                              1242 bytes
  length of filename:                             7 characters
  length of extra field:                          0 bytes
  length of file comment:                         0 characters
  disk number on which file begins:               disk 1
  apparent file type:                             binary
  non-MSDOS external file attributes:             000000 hex
  MS-DOS file attributes (00 hex):                none

  There is no file comment.

Central directory entry #2:
---------------------------

  README.md

  offset of local header from start of archive:   461
                                                  (00000000000001CDh) bytes
  file system or operating system of origin:      MS-DOS, OS/2 or NT FAT
  version of encoding software:                   4.5
  minimum file system compatibility required:     MS-DOS, OS/2 or NT FAT
  minimum software version required to extract:   2.0
  compression method:                             deflated
  compression sub-type (deflation):               normal
  file security status:                           not encrypted
  extended local header:                          no
  file last modified on (DOS date/time):          1980 000 0 00:00:00
  32-bit CRC value (hex):                         1db2d91a
  compressed size:                                512 bytes
  uncompressed size:                              917 bytes
  length of filename:                             9 characters
  length of extra field:                          0 bytes
  length of file comment:                         0 characters
  disk number on which file begins:               disk 1
  apparent file type:                             binary
  non-MSDOS external file attributes:             000000 hex
  MS-DOS file attributes (00 hex):                none

  There is no file comment.
kx1ctssn

kx1ctssn1#

您需要对zip条目使用raw deflate。将window_bits设置为-15
您应该能够处理大于unsigned int最大值的输入大小。您已经有了一个用于多个deflate()调用的循环,所以您只需要更新avail_in,并且只在提供最后一个输入时使用Z_FINISH。类似于:

...
    deflate_s.avail_in = 0;
    do {
        if (deflate_s.avail_in == 0) {
            deflate_s.avail_in = len > UINT_MAX ? UINT_MAX : (uint)len;
            len -= deflate_s.avail_in;
        }
        ...
        deflate(&deflate_s, len ? Z_NO_FLUSH : Z_FINISH);
        ...
    } while (len && deflate_s.avail_out == 0);
    ...

应该不需要m_max

相关问题