java 持久化sha256哈希对象?

3duebb1j  于 2023-05-21  发布在  Java
关注(0)|答案(2)|浏览(110)

我需要一个Python/C/C++/Java实现,它可以暂停散列进度将进度存储在文件中,以便在稍后阶段从该文件中恢复进度
不管上面列出的是用什么语言编写的,它都应该在Python中正常工作。建议您可以提供它,以便与“hashlib”一起工作,但这不是必要的。此外,如果这样的东西已经存在,一个链接就足够了。

对于一个想法,你的实现应该实现什么。

import hashlib
import hashpersist #THIS IS NEEDED.

sha256 = hashlib.sha256("Hello ")
hashpersist.save_state(sha256, open('test_file', 'w'))

sha256_recovered = hashpersist.load_state(open('test_file', 'r'))
sha256_recovered.update("World")
print sha256_recovered.hexdigest()

这应该给予与我们使用标准sha 256函数对“Hello World”进行简单散列相同的输出。

a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e
6mzjoqzu

6mzjoqzu1#

事实证明,重写hashlib以使其可恢复比我想象的要容易,至少SHA-256部分是这样。我花了一些时间玩使用OpenSSL加密库的C代码,但后来我意识到我不需要所有这些东西,我可以使用ctypes。

rehash.py

#! /usr/bin/env python

''' A resumable implementation of SHA-256 using ctypes with the OpenSSL crypto library

    Written by PM 2Ring 2014.11.13
'''

from ctypes import *

SHA_LBLOCK = 16
SHA256_DIGEST_LENGTH = 32

class SHA256_CTX(Structure):
    _fields_ = [
        ("h", c_long * 8),
        ("Nl", c_long),
        ("Nh", c_long),
        ("data", c_long * SHA_LBLOCK),
        ("num", c_uint),
        ("md_len", c_uint)
    ]

HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH

#crypto = cdll.LoadLibrary("libcrypto.so")
crypto = cdll.LoadLibrary("libeay32.dll" if os.name == "nt" else "libssl.so")

class sha256(object):
    digest_size = SHA256_DIGEST_LENGTH

    def __init__(self, datastr=None):
        self.ctx = SHA256_CTX()
        crypto.SHA256_Init(byref(self.ctx))
        if datastr:
            self.update(datastr)

    def update(self, datastr):
        crypto.SHA256_Update(byref(self.ctx), datastr, c_int(len(datastr)))

    #Clone the current context
    def _copy_ctx(self):
        ctx = SHA256_CTX()
        pointer(ctx)[0] = self.ctx
        return ctx

    def copy(self):
        other = sha256()
        other.ctx = self._copy_ctx()
        return other

    def digest(self):
        #Preserve context in case we get called before hashing is
        # really finished, since SHA256_Final() clears the SHA256_CTX
        ctx = self._copy_ctx()
        hashbuff = HashBuffType()
        crypto.SHA256_Final(hashbuff, byref(self.ctx))
        self.ctx = ctx
        return str(bytearray(hashbuff))

    def hexdigest(self):
        return self.digest().encode('hex')

#Tests
def main():
    import cPickle
    import hashlib

    data = ("Nobody expects ", "the spammish ", "imposition!")

    print "rehash\n"

    shaA = sha256(''.join(data))
    print shaA.hexdigest()
    print repr(shaA.digest())
    print "digest size =", shaA.digest_size
    print

    shaB = sha256()
    shaB.update(data[0])
    print shaB.hexdigest()

    #Test pickling
    sha_pickle = cPickle.dumps(shaB, -1)
    print "Pickle length:", len(sha_pickle)
    shaC = cPickle.loads(sha_pickle)

    shaC.update(data[1])
    print shaC.hexdigest()

    #Test copying. Note that copy can be pickled
    shaD = shaC.copy()

    shaC.update(data[2])
    print shaC.hexdigest()

    #Verify against hashlib.sha256()
    print "\nhashlib\n"

    shaD = hashlib.sha256(''.join(data))
    print shaD.hexdigest()
    print repr(shaD.digest())
    print "digest size =", shaD.digest_size
    print

    shaE = hashlib.sha256(data[0])
    print shaE.hexdigest()

    shaE.update(data[1])
    print shaE.hexdigest()

    #Test copying. Note that hashlib copy can NOT be pickled
    shaF = shaE.copy()
    shaF.update(data[2])
    print shaF.hexdigest()

if __name__ == '__main__':
    main()

可恢复_SHA-256.py

#! /usr/bin/env python

''' Resumable SHA-256 hash for large files using the OpenSSL crypto library

    The hashing process may be interrupted by Control-C (SIGINT) or SIGTERM.
    When a signal is received, hashing continues until the end of the
    current chunk, then the current file position, total file size, and
    the sha object is saved to a file. The name of this file is formed by
    appending '.hash' to the name of the file being hashed.

    Just re-run the program to resume hashing. The '.hash' file will be deleted
    once hashing is completed.

    Written by PM 2Ring 2014.11.14
'''

import cPickle as pickle
import os
import signal
import sys

import rehash

quit = False

blocksize = 1<<16   # 64kB
blocksperchunk = 1<<8

chunksize = blocksize * blocksperchunk

def handler(signum, frame):
    global quit
    print "\nGot signal %d, cleaning up." % signum
    quit = True

def do_hash(fname, filesize):
    hashname = fname + '.hash'
    if os.path.exists(hashname):
        with open(hashname, 'rb') as f:
            pos, fsize, sha = pickle.load(f)
        if fsize != filesize:
            print "Error: file size of '%s' doesn't match size recorded in '%s'" % (fname, hashname)
            print "%d != %d. Aborting" % (fsize, filesize)
            exit(1)
    else:
        pos, fsize, sha = 0, filesize, rehash.sha256()

    finished = False
    with open(fname, 'rb') as f:
        f.seek(pos)
        while not (quit or finished):
            for _ in xrange(blocksperchunk):
                block = f.read(blocksize)
                if block == '':
                    finished = True
                    break
                sha.update(block)

            pos += chunksize
            sys.stderr.write(" %6.2f%% of %d\r" % (100.0 * pos / fsize, fsize))
            if finished or quit:
                break

    if quit:
        with open(hashname, 'wb') as f:
            pickle.dump((pos, fsize, sha), f, -1)
    elif os.path.exists(hashname):
        os.remove(hashname)

    return (not quit), pos, sha.hexdigest()

def main():
    if len(sys.argv) != 2:
        print "Resumable SHA-256 hash of a file."
        print "Usage:\npython %s filename\n" % sys.argv[0]
        exit(1)

    fname = sys.argv[1]
    filesize = os.path.getsize(fname)

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    finished, pos, hexdigest = do_hash(fname, filesize)
    if finished:
        print "%s  %s" % (hexdigest, fname)
    else:
        print "sha-256 hash of '%s' incomplete" % fname
        print "%s" % hexdigest
        print "%d / %d bytes processed." % (pos, filesize)

if __name__ == '__main__':
    main()

demo

import rehash
import pickle
sha=rehash.sha256("Hello ")
s=pickle.dumps(sha.ctx)
sha=rehash.sha256()
sha.ctx=pickle.loads(s)
sha.update("World")
print sha.hexdigest()

输出

a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e

编辑

我刚刚做了一个小的编辑,使rehash也能在Windows上工作,尽管我只在WinXP上测试过它。libeay32.dll可以在当前目录中,也可以在系统库搜索路径中的某个位置,例如WINDOWS\system32。我的相当古老的(而且大多数未使用的)XP安装无法找到.dll,即使它被OpenOffice和Avira使用。所以我只是把它从Avira文件夹复制到system32。现在它完美地工作了。:)

e4yzc0pl

e4yzc0pl2#

一个支持导入/导出到python dict的纯python:https://pypi.org/project/sha256bit/
演示:

>>> from sha256bit import sha256bit
>>> h1 = sha256bit("a".encode())
>>> state = h1.export_state()
>>> h2 = sha256bit.import_state(state)
>>> h2.update("bc".encode())
>>> h2.hexdigest()
'ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad'

state是一个可以持久化的常规python dict。

相关问题