NodeJS mmap对象和内存使用问题

mxg2im7a  于 2023-03-01  发布在  Node.js
关注(0)|答案(1)|浏览(210)

我有一个很大的(ASCII)文本文件,我想把它作为内存Map文件存储在Node.js中,我使用mmap-object模块来实现这个目的,但是,我在创建一个新的mmap-object示例时,很难理解如何分配适当的内存。
文本文件是一个条目列表,每一个条目用换行符分隔,我想把每一行作为共享对象的一个属性存储,下面是使用mmap-object创建新共享对象的方法签名:

new Create(path, [file_size], [initial_bucket_count], [max_file_size]);

我知道文本文件中的行数,所以我将该值用于initial_bucket_count参数,但是,我不确定file_size参数使用什么值。
这就是我当前使用mmap-object创建共享对象的方式:

const sharedObject = new Shared.Create(filePath, textFileSizeInKByte * 2, linesCount);

对于文本文件中的每一行,我尝试用两种方式存储行内容:

let lineIndex = 0;

for await (const line of rl) {

    // Option 1: Store the line as an ASCII-encoded buffer
    sharedObject[lineIndex] = Buffer.from(line, "ascii");

    // Option 2: Store the line as a regular string
    sharedObject[lineIndex] = line;

    lineIndex++;
}

然而,在这两种情况下,内存Map文件最终都比原始文本文件大得多。我理解这会有一些开销,但是对于大约220 MB的文本文件,内存使用量可能高达文件大小的两倍,对于大约2.5GB的较大文本文件,内存使用量可能高达文件大小的五倍。
有人能帮助我理解为什么内存使用量比文件大小高得多,以及如何减少内存Map文件使用的内存量吗?

xt0899hw

xt0899hw1#

你能解释一下你的意思吗?内存Map非随机访问文件是非常不寻常的,所以也许你的意思是/从一个文本文件/读取/到/共享内存中的一个不同的数据结构/。
如果是这样,考虑一个具有精心选择的数据结构的段管理器:

using Segment = bip::managed_mapped_file;
using Mgr     = Segment::segment_manager;

template <typename T> using Alloc = bc::scoped_allocator_adaptor<bip::allocator<T, Mgr>>;
template <typename K, typename V, typename Cmp = std::less<K>>
using Map    = bc::flat_map<K, V, Cmp, Alloc<std::pair<K const, V>>>;
using String = bc::basic_string<char, std::char_traits<char>, Alloc<char>>;
using Dict   = Map<String, String>;

bool load_from_textfile(std::string const& fname, Dict& into);
void dump_table(Dict const& d);

int main() {
    Segment mapping(bip::open_or_create, "mapped.file", 30ull << 30);
    auto&   dict = *mapping.find_or_construct<Dict>("shared_table")(mapping.get_segment_manager());
    dict.emplace("Hello world", "Bye");
    load_from_textfile("input.txt", dict);
    dump_table(dict);
}

这里,我们将使用libfmt实现转储:

#include <fmt/ranges.h>
// teach libfmt to print shared strings
template <typename... T>
struct fmt::formatter<bc::basic_string<T...>> : fmt::formatter<std::string_view> {};

void dump_table(Dict const& d) {
    // fmt::print("shared table: {}\n", d);
    fmt::print("shared table:\n - {}\n", fmt::join(d, "\n - "));
}

我们将a textfile读为:

tumid=peyote titbit bemusing Alpert
"obsesses contains = equals sign"=Leila
epicures
midterm
pirates
pads
bobcat
cognac
docket        = abrade synchs councilwomen
epilepsy      = dawdled AC capitulated
freelances    = plenteous sloops above
watchers      = disproving tiaras unbinding
drudging      = scamps unbearable hydrology
temporized    = dart custodian dissimulating
Wilcox        = coopered initiated overprinting
Hazlitt       = amphibious soulful opts
chiropractics = month invocations Laue
Fm            = coin slime conspirator
Goya          = slender included embroiled
taxidermist   = Buckingham spates hairlines
sadists       = perfidious gibe Leiden
godless       = welted assiduous negation
spacier       = snorkels airfoil noisier
robocalled    = Glover snuffers magnified
Kirsten       = spotters Rojas lounging
creepy        = asseverating phased overflows
vertebrates   = propensities somewhats cinchonas
meaningless   = tinsel bewitching photojournalist
halogens      = routing runes necklines
alligator     = anus avid

输出为:**第一个e第一个f第一个x

#include <boost/container/flat_map.hpp>
#include <boost/container/scoped_allocator.hpp>
#include <boost/container/string.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/managed_mapped_file.hpp>
namespace bip = boost::interprocess;
namespace bc  = boost::container;

using Segment = bip::managed_mapped_file;
using Mgr     = Segment::segment_manager;

template <typename T> using Alloc = bc::scoped_allocator_adaptor<bip::allocator<T, Mgr>>;
template <typename K, typename V, typename Cmp = std::less<K>>
using Map    = bc::flat_map<K, V, Cmp, Alloc<std::pair<K const, V>>>;
using String = bc::basic_string<char, std::char_traits<char>, Alloc<char>>;
using Dict   = Map<String, String>;

bool load_from_textfile(std::string const& fname, Dict& into);
void dump_table(Dict const& d);

int main() {
    Segment mapping(bip::open_or_create, "mapped.file", 30ull << 30);
    auto&   dict = *mapping.find_or_construct<Dict>("shared_table")(mapping.get_segment_manager());
    dict.emplace("Hello world", "Bye");
    load_from_textfile("input.txt", dict);
    dump_table(dict);
}

#include <fmt/ranges.h>
// teach libfmt to print shared strings
template <typename... T>
struct fmt::formatter<bc::basic_string<T...>> : fmt::formatter<std::string_view> {};

void dump_table(Dict const& d) {
    // fmt::print("shared table: {}\n", d);
    fmt::print("shared table:\n - {}\n", fmt::join(d, "\n - "));
}

#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/interprocess/file_mapping.hpp>
#include <boost/spirit/home/x3.hpp>

bool load_from_textfile(std::string const& fname, Dict& into) { using namespace boost::spirit::x3;
    namespace x3 = boost::spirit::x3;
    using boost::fusion::at_c;

    bip::file_mapping  text(fname.c_str(), bip::read_only);
    bip::mapped_region region(text, bip::read_only);
    auto f = static_cast<char const*>(region.get_address()), l = f + region.get_size();

    using namespace x3;
    auto str   = [](auto expr) { return rule<struct _, std::string>{} = expr; };
    auto key   = str(lexeme['"' >> *('\\' >> char_ | ~char_('"')) >> '"'] | +~char_("=\r\n"));
    auto value = str(-('=' >> lexeme[*(char_ - eol)]));
    auto line  = key >> value;

    auto insert = [&into](auto& ctx) {
        into.emplace(at_c<0>(_attr(ctx)), at_c<1>(_attr(ctx)));
    };

    return phrase_parse(f, l, (line[insert] % x3::eol) >> x3::eoi, x3::blank);
}

印刷

shared table:
 - ("Fm", "coin slime conspirator")
 - ("Goya", "slender included embroiled")
 - ("Hazlitt", "amphibious soulful opts")
 - ("Hello world", "Bye")
 - ("Kirsten", "spotters Rojas lounging")
 - ("Wilcox", "coopered initiated overprinting")
 - ("alligator", "anus avid")
 - ("bobcat", "")
 - ("chiropractics", "month invocations Laue")
 - ("cognac", "")
 - ("creepy", "asseverating phased overflows")
 - ("docket", "abrade synchs councilwomen")
 - ("drudging", "scamps unbearable hydrology")
 - ("epicures", "")
 - ("epilepsy", "dawdled AC capitulated")
 - ("freelances", "plenteous sloops above")
 - ("godless", "welted assiduous negation")
 - ("halogens", "routing runes necklines")
 - ("meaningless", "tinsel bewitching photojournalist")
 - ("midterm", "")
 - ("obsesses contains = equals sign", "Leila")
 - ("pads", "")
 - ("pirates", "")
 - ("robocalled", "Glover snuffers magnified")
 - ("sadists", "perfidious gibe Leiden")
 - ("spacier", "snorkels airfoil noisier")
 - ("taxidermist", "Buckingham spates hairlines")
 - ("temporized", "dart custodian dissimulating")
 - ("tumid", "peyote titbit bemusing Alpert")
 - ("vertebrates", "propensities somewhats cinchonas")
 - ("watchers", "disproving tiaras unbinding")

相关问题