erlang 在Map中合并嵌套值的正确方法?

vlurs2pr  于 2022-12-08  发布在  Erlang
关注(0)|答案(2)|浏览(164)

Given the following:

M1 = #{ "Robert" => #{"Scott" => #{}} },
M2 = #{ "Robert" => #{"Adams" => #{}} }

Merged should be:

M3 = #{ "Robert" => #{ "Scott" => #{}, "Adams" => {}}

Now if we merge in the following:

M4 = #{ "William" => #{ "Robert" => #{ "Scott" => {} }}}
M5 = #{ "William" => #{ "Robert" => #{ "Fitzgerald" => {} }}}

We should get the following:

M6 = #{ "Robert" => #{ "Scott" => #{}, "Adams" => {}, 
        "William" => #{ "Robert" => #{ "Fitzgerald" => {}, "Scott" => {} }}}

I had the idea of iterating, getting each level's key and iterating over them. Checking if they're the same, merging the map if not, check if it's map or not, if it not stop and merge, otherwise call itself again. The problem I'm having is the function keeps crashing, is there a better way to do this?
This is the code I have so far:

merger(M1, M2) ->
   M1_Keys = maps:keys(M1),
   M2_Keys = maps:keys(M2),
   do_merge(M1, M2, M1_Keys).

do_merge(M1, M2, [Head|Tail]) ->
   Check = check_if_same(M1, M2),
   io:fwrite("Check is: ~p\n", [Check]),
   case Check of 
     {ok, true} -> 
       io:fwrite("true\n");
     {ok, false} ->
       io:fwrite("false\n")
   end,
   do_merge(M1, M2, Tail);
%   P1 = maps:get(Head, M1),
%   P2 = maps:get(Head, M2),
%   P3 = maps:merge(P1, P2),
%   M4 = maps:update(Head, P3, M1),
%   io:fwrite("~p \n", [M4]),
%   do_merge(M1, M2, Tail);

do_merge(M1, M2, []) -> 
   ok.
check_if_same(M1, M2) -> 
   {ok, lists:sort( maps:keys(M1) ) == lists:sort( maps:keys(M2) )}.

However, it crashes with the following error:

$erlc *.erl
helloworld.erl:10: Warning: variable 'M2_Keys' is unused
helloworld.erl:13: Warning: variable 'Head' is unused
helloworld.erl:30: Warning: variable 'M1' is unused
helloworld.erl:30: Warning: variable 'M2' is unused
$erl -noshell -s helloworld start -s init stop
Check is: {ok,true}
true
{"init terminating in do_boot",{{badmap,ok},[{maps,keys,[ok],[]},{helloworld,merger,2,[{file,"helloworld.erl"},{line,10}]},{init,start_em,1,[]},{init,do_boot,3,[]}]}}
init terminating in do_boot ()

Crash dump is being written to: erl_crash.dump...done
yvfmudvl

yvfmudvl1#

As I answered in a previous post, I can't see why you get this result, Need more information how you start the shell, type the command, and the complete result.
Unfortunately, I have not enough time to go in details and comment you code, I put here a code that does what you want, if I can I'll add comments later:

-module (merger).

-compile(export_all).

% yourType = maps(Key :: term() => Value :: yourType()) | #{}.

% merge operation:
%   get all keys from 2 inputs
%   if a key belongs to one input only, insert key => value in the result
%   if a key belongs to 2 inputs, insert key => merge(Value1,value2) in the result
%   
% lets write this

merger(M1, M2) ->
   Keys = lists:usort(maps:keys(M1) ++ maps:keys(M2)), % will produce a list containing  all the keys without repetition
   lists:foldl(fun(Key,Acc) -> do_merge(Key,M1,M2,Acc) end,#{},Keys).

do_merge(Key, M1, M2, Acc) ->
   case {maps:is_key(Key, M1),maps:is_key(Key, M2)} of 
     {true, true} -> 
       maps:put(Key, merger(maps:get(Key, M1),maps:get(Key, M2)), Acc);
     {true, false} ->
       maps:put(Key,maps:get(Key, M1),Acc);
     {false, true} ->
       maps:put(Key,maps:get(Key, M2),Acc)
   end.

test() ->
 R1 = merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}} }),
 R2 = merger(R1,#{ "William" => #{ "Robert" => #{ "Scott" => #{} }}}),
 merger(R2,#{ "William" => #{ "Robert" => #{ "Fitzgerald" => #{} }}}).

Which gives in the shell:

1> c(merger).    
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
{ok,merger}
2> merger:test().
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
3>

[EDIT]

Here is a commented version with 2 methods for the merge

-module (merger).

-compile(export_all).

% yourType = maps(Key :: term() => Value :: yourType()) | #{}.

% This first version sticks to the description in natural language
% merge operation:
%   get all keys from 2 inputs
%   if a key belongs to one input only, insert key => value in the result
%   if a key belongs to 2 inputs, insert key => merge(Value1,value2) in the result
%   
% let's write this

merger(M1, M2) ->
   Keys = lists:usort(maps:keys(M1) ++ maps:keys(M2)), % will produce a list containing  all the keys without repetition
   lists:foldl(fun(Key,Acc) -> do_merge(Key,M1,M2,Acc) end,#{},Keys).
   % will execute the do_merge function for each element in the Keys list and accumulate the result in Acc.
   % The initial value of the accumulator is set to #{}
   % https://erlang.org/doc/man/lists.html#foldl-3

% This function is the direct translation of the description above.
do_merge(Key, M1, M2, Acc) ->
   % The case statement returns the result of the matching case.
   case {maps:is_key(Key, M1),maps:is_key(Key, M2)} of 
     {true, true} -> 
       maps:put(Key, merger(maps:get(Key, M1),maps:get(Key, M2)), Acc);
     {true, false} ->
       maps:put(Key,maps:get(Key, M1),Acc);
     {false, true} ->
       maps:put(Key,maps:get(Key, M2),Acc)
   end.

% the previous algorithm does a lot of useless operations: extract and combine the key lists, unique sort
% and uses 3 maps to build the result.
% a more efficient method is to break the symmetry of M1 and M2, and consider that you merge M2 into M1,
% so M1 is the the initial value of the algorithm.
% then, rather than extract the keys from M2, it is more direct to use the maps:foldl function.
% https://erlang.org/doc/man/maps.html#fold-3
% now the merge operation is :
%   insert {key, Value} in the accumulator.
%       If the key already exits in the accumulator, then the new value is the merge of the accumulator value and of the parameter value,
%       If not then simply put Key,Value in the accumulator

% fold will call do_merge2 with each Key and Value from M2, the result of previous operations
% and the Value for Key in the accumulator (undefined if Key does not exist in the accumulator).
% The initial value is M1.
merger2(M1,M2) ->
    maps:fold(fun(Key,Value,AccIn) -> do_merge2(Key,Value,AccIn,maps:get(Key,AccIn,undefined)) end, M1, M2).

% In the parameter I have added the result of maps:get/3, it returns either the Value if the key exists,
% either a default value, here: undefined if it does not exist. This allows to use pattern matching (more erlang way) rather than a case or if statement.
do_merge2(Key,Value,Acc,undefined) ->
    % the Key was not present in ACC
    maps:put(Key, Value, Acc);
do_merge2(Key,Value1,Acc,Value2) ->
    % the Key was present in ACC associated to Value2
    maps:put(Key,merger2(Value1,Value2),Acc).

% The nice thing is now the whole code needs only 3 function declarations containing 1 line of code each.
% It is pretty neat, maybe less easy to start with.

% For the test, I now pass the merger function name to use as a parameter
 test(Merger) ->
     R1 = Merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}}}),
     R2 = Merger(R1,#{ "William" => #{ "Robert" => #{ "Scott" => #{}}}}),
     Merger(R2,#{ "William" => #{ "Robert" => #{ "Fitzgerald" => #{}}}}).

 test1() -> 
    io:format("using merger  :~n~p~n~n",[test(fun merger:merger/2)]),
    io:format("using merger2 :~n~p~n~n",[test(fun merger:merger2/2)]).

In the shell, it gives:

$ erl
Erlang/OTP 22 [erts-10.6] [64-bit] [smp:8:8] [ds:8:8:10] [async-threads:1]

Eshell V10.6  (abort with ^G)
1> c(merger).           
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
{ok,merger}
2> merger:test(fun merger:merger/2). 
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
3> merger:test(fun merger:merger2/2).
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" =>
      #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}
4>

or invoked from PowerShell window:

PS C:\git\test_area\src> erlc merger.erl
merger.erl:3: Warning: export_all flag enabled - all functions will be exported
PS C:\git\test_area\src> erl -noshell -s merger test1 -s init stop
using merger  :
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" => #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}

using merger2 :
#{"Robert" => #{"Adams" => #{},"Scott" => #{}},
  "William" => #{"Robert" => #{"Fitzgerald" => #{},"Scott" => #{}}}}

PS C:\git\test_area\src>

For the reason why you get a crash dump, I have to make some guess (you do not provide the stat function :o). I think that you do a test like mine, which combines several evaluations. The problem in this case is that at the end of the recursion, for the first evaluation (R1 = Merger(#{ "Robert" => #{"Scott" => #{}} },#{ "Robert" => #{"Adams" => #{}}}) in my case) , you get the return value ok ( do_merge(M1, M2, []) -> ok in your code). This result is then reused for the next evaluation, and the program fails on invocation of maps:keys(ok) saying that it got a badmap: ok.

lvmkulzt

lvmkulzt2#

do_merge总是返回ok(基本递归情况)。
这里有两个解决方案,第一个更易读,但我会选择第二个

deep_map_merge(M1, M2) when is_map(M1), is_map(M2) ->
    % Merge both as if they had no common keys
    FlatMerge = maps:merge(M1, M2),
    % Get common keys (This is O(N^2), there are better ways)
    CommonKeys = [K || K <- maps:keys(M1), K2 <- maps:keys(M2), K == K2],
    % Update the merged map with the merge of the common keys
    lists:foldl(fun(K, MergeAcc) ->
                        MergeAcc#{K => deep_map_merge(maps:get(K, M1), maps:get(K, M2))}
                end, FlatMerge, CommonKeys);
deep_map_merge(_, Override) ->
    Override.

deep_map_merge2(M1, M2) when is_map(M1), is_map(M2) ->
    maps:fold(fun(K, V2, Acc) ->
                      case Acc of
                          #{K := V1} ->
                              Acc#{K => deep_map_merge2(V1, V2)};
                          _ ->
                              Acc#{K => V2}
                      end
              end, M1, M2);
deep_map_merge2(_, Override) ->
    Override.

相关问题