💾 Archived View for gemini.ucant.org › notes › tmp-misfin-parser.txt captured on 2024-08-18 at 17:14:48.

View Raw

More Information

⬅️ Previous capture (2024-02-05)





-type header_tag() :: env_recipient | recipient | sender | timestamp.
-type header() :: {header_tag(), binary(), blurb()}.

-type misfin_b_result() :: {misfin_b, [header()], binary()}.

-type result() ::
        misfin_b_result() |
        empty |
        no_mailbox |
        wrong_protocol |
        missing_footer |
        missing_body |

-spec parse_request(Req :: binary()) ->
          {ok, misfin_request()} | incomplete | {error, atom()}.
parse_request(Req) ->
    case request(Req) of
        {misfin_b, Headers, Body} ->
            Result = full_request(Headers, Body),
            {ok, Result};
        incomplete ->
        E ->
            {error, E}

-spec request(Req :: binary()) -> result().
request(<<"">>) ->
request(<<"misfin://", Rest/binary>>) ->
    mailbox(Rest, <<"">>);
request(Req) when is_binary(Req) ->

-spec mailbox(Frag :: binary(),
              Acc :: binary()) -> result().
mailbox(<<"">>, <<"">>) ->
mailbox(<<"@", _Rest/binary>>, <<"">>) ->
mailbox(<<"@", Rest/binary>>, Mailbox) ->
    host(Rest, <<"">>, {mailbox, Mailbox});
mailbox(<<Byte:8, Rest/binary>>, Acc) ->
    mailbox(Rest, <<Acc/binary, Byte>>).

-spec host(Frag :: binary(),
           Acc :: binary(),
           Mailbox :: {mailbox, binary()})
          -> result().
host(<<"">>, <<"">>, _) ->
host(<<"">>, _Acc, _) ->
host(<<" ", _Rest/binary>>, <<"">>, _) ->
host(<<" ", Rest/binary>>, Host, {mailbox, Mailbox}) ->
    Rec = <<Mailbox/bytes, "@", Host/bytes>>,
    Hdr = {env_recipient, Rec, no_blurb},
    headers(Rest, no_header, [Hdr]);
host(<<Byte:8, Rest/binary>>, Acc, Mailbox) ->
    host(Rest, <<Acc/binary, Byte>>, Mailbox).

% we'll assume that all the headers come at the top of the message, for now,
% since in the Misfin C proposal, this is mandatory

% headers can start with:
%   "<" - sender
%   ":" - recipient
%   "@" - timestamp
-spec headers(Frag :: binary(),
              Acc :: no_header | header(),
              Headers :: [header()]) ->

headers(<<"<", Rest/binary>>, no_header, Headers) ->
    headers(Rest, {sender, <<"">>, no_blurb}, Headers);

headers(<<":", Rest/binary>>, no_header, Headers) ->
    headers(Rest, {recipient, <<"">>, no_blurb}, Headers);

headers(<<"@", Rest/binary>>, no_header, Headers) ->
    headers(Rest, {timestamp, <<"">>, no_blurb}, Headers);

headers(<<" ", Rest/binary>>, {Tag, <<"">>, Blurb}, Headers) ->
    headers(Rest, {Tag, <<"">>, Blurb}, Headers);

headers(<<"\t", Rest/binary>>, {Tag, <<"">>, Blurb}, Headers) ->
    headers(Rest, {Tag, <<"">>, Blurb}, Headers);

headers(<<"\r\n", Rest/binary>>, {Tag, Data, Blurb}, Headers) ->
    headers(Rest, no_header, [{Tag, Data, Blurb} | Headers]);

headers(<<" ", Rest/binary>>, {Tag, Data, no_blurb}, Headers) ->
    headers(Rest, {Tag, Data, <<"">>}, Headers);

headers(<<"\t", Rest/binary>>, {Tag, Data, no_blurb}, Headers) ->
    headers(Rest, {Tag, Data, <<"">>}, Headers);

headers(<<" ", Rest/binary>>, {Tag, Data, <<"">>}, Headers) ->
    headers(Rest, {Tag, Data, <<"">>}, Headers);

headers(<<"\t", Rest/binary>>, {Tag, Data, <<"">>}, Headers) ->
    headers(Rest, {Tag, Data, <<"">>}, Headers);

headers(<<Byte:8, Rest/binary>>, {Tag, Data, no_blurb}, Headers) ->
    headers(Rest, {Tag, <<Data/binary, Byte>>, no_blurb}, Headers);

headers(<<Byte:8, Rest/binary>>, {Tag, Data, Blurb}, Headers) ->
    headers(Rest, {Tag, Data, <<Blurb/binary, Byte>>}, Headers);

headers(<<"">>, {_, _, _}, _Headers) ->
    incomplete; % header does not end in CRLF?

headers(Rest, no_header, Headers) ->
    body(Rest, <<"">>, Headers).

-spec body(Rest :: binary(),
           Acc :: binary(),
           Headers :: [header()]) ->
body(<<"\r\n">>, Acc, Headers) ->
    {misfin_b, lists:reverse(Headers), Acc};
body(<<"">>, _, _) ->
body(<<Byte:8, Rest/binary>>, Acc, Headers) ->
    body(Rest, <<Acc/binary, Byte>>, Headers).

%% TESTS %%

heterogeneous_test_() ->
    Tests = [
             {empty,          <<"">>},
             {wrong_protocol, <<"foo://">>},
             {incomplete,     <<"misfin://">>},
             {incomplete,     <<"misfin://username@">>},
             {incomplete,     <<"misfin://username@ ">>},
             {no_mailbox,     <<"misfin://@foo.com s y zar\r\n">>},
             {missing_footer, <<"misfin://foo@bar.com ">>},
             {missing_body,   <<"misfin://martin@misfin.ucant.org">>},
             {incomplete,     <<"misfin://u@h.io < billg@ms.com">>},
             {missing_footer, <<"misfin://username@host.com rest">>},

                         {sender,<<"foo@bar.com">>,<<"Foo man  choo">>},
               <<"the message">>},
              <<"misfin://martin@misfin.ucant.org <   foo@bar.com\t Foo man  choo\r\n:martin@no.ucant.org Mk\r\nthe message\r\n">>}

    [?_assertEqual(Expected, request(Input)) ||
        {Expected, Input} <- Tests].

% Given a list of 2-tuples `List`, get the cdr of those whose car matches `K`.
% this fn probably could have been done with list comprehensions
% or proplists:get_value?

matching(K, List) ->
    lists:filtermap(fun(Elt) -> case Elt of
                                    {K, V1, V2} -> {true, {V1, V2}};
                                    _ -> false
                    end, List).

-spec full_request(Headers :: [header()],
                   Body :: binary()) ->
full_request(Headers, Body) ->
    ERs = matching(env_recipient, Headers),
    Recipients = matching(recipient, Headers),
    Senders = matching(sender, Headers),
    RawTimestamps = matching(timestamp, Headers),

    [{ER, _}] = ERs,

    Timestamps = lists:filtermap(
                   fun({Timestamp, _}) ->
                           try iso8601:parse(Timestamp) of
                               {Date, Time} -> {true, {Date, Time}}
                               error:badarg -> false
                   end, RawTimestamps),

       envelope_recipient = ER,
       recipients = Recipients,
       senders = Senders,
       timestamps = Timestamps,
       body = Body