# [Erlang]用erlang实现binary_to_term

abv123456789 分享于 2015-06-04

erlang提供了两个函数用于erlang数据与二进制的转换，term_to_binary 把erlang数据转成一个二进制数据， binary_to_term 则是 把 二进制数据转为原始的erlang数据。这两个函数都是c实现的，效率很高，可用于序列化和反序列化数据，可以被直接用来当作网络封包协议。文章主要探讨这种二进制协议数据的组织形式，以及用erlang语言实现binary_to_term

## 前言

1. 1> term_to_binary(a).
2. <<131,100,0,1,97>>
3. 2> term_to_binary(1).
4. <<131,97,1>>
5. 3> term_to_binary({a,b,c}).
6. <<131,104,3,100,0,1,97,100,0,1,98,100,0,1,99>>
7. 4> binary_to_term(v(1)).
8. a
9. 5> binary_to_term(v(2)).
10. 1
11. 6> binary_to_term(v(3)).
12. {a,b,c}

## 初识binary数据

### erlang长整形

1 1 1 n 110 n Sign d(0) ... d(n-1) Table 10.24:

Bignums are stored in unary form with a Sign byte that is 0 if the binum is positive and 1 if is negative. The digits are stored with the LSB byte stored first. To calculate the integer the following formula can be used:
B = 256
(d0*B^0 + d1*B^1 + d2*B^2 + ... d(N-1)*B^(n-1))

### erlang原子

10.7  ATOM_EXT 1 2 Len 100 Len AtomName Table 10.13:

An atom is stored with a 2 byte unsigned length in big-endian order, followed by Len numbers of 8 bit Latin1 characters that forms theAtomName. Note: The maximum allowed value for Len is 255.

erlang原子会转成字符串，所以，太长会影响二进制封包大小。

http://www.ctiforum.com/news/guandian/332694.html 《FreeSWITCH: VoIP实战》： 使用Erlang建立IVR实现复杂业务逻辑 2012-08-24 11:00:29   作者：杜金房

### erlang浮点数

10.6  FLOAT_EXT 1 31 99 Float String Table 10.12:

A float is stored in string format. the format used in sprintf to format the float is "%.20e" (there are more bytes allocated than necessary). To unpack the float use sscanf with format "%lf".

10.26  NEW_FLOAT_EXT 1 8 70 IEEE float Table 10.32:

A float is stored as 8 bytes in big-endian IEEE format.

This term is used in minor version 1 of the external format.

[plain]  view plain copy
1. Eshell V5.9.1 (abort with ^G)
2. 1> term_to_binary(19.2).
3. <<131,99,49,46,57,49,57,57,57,57,57,57,57,57,57,57,57,57,
4.   57,57,48,48,48,48,101,43,48,48,49,...>>
[plain]  view plain copy
1. Eshell V6.2 (abort with ^G)
2. 1> term_to_binary(19.2).
3. <<131,70,64,51,51,51,51,51,51,51>>

### erlang列表

10.16  LIST_EXT 1 4

108 Length Elements Tail Table 10.22:

Length is the number of elements that follows in the Elements section. Tail is the final tail of the list; it is NIL_EXT for a proper list, but may be anything type if the list is improper (for instance [a|b]).

1. -module(test).
2. -compile(export_all).
3.
4. term_to_data(Term) ->
5.     Bin = term_to_binary(Term),
6.     binary_to_data(Bin).
7.
8. binary_to_data(<<131, Bin/binary>>) ->
9.     binary_to_data1(Bin);
10. binary_to_data(_) ->
11.     error.
12.
13. -define(NEW_FLOAT_EXT, 70).
14. -define(SMALL_INTEGER_EXT, 97).
15. -define(INTEGER_EXT, 98).
16. -define(FLOAT_EXT, 99).
17. -define(ATOM_EXT, 100).
18. -define(SMALL_TUPLE_EXT, 104).
19. -define(LARGE_TUPLE_EXT, 105).
20. -define(NIL_EXT, 106).
21. -define(STRING_EXT, 107).
22. -define(LIST_EXT, 108).
23. -define(BINARY_EXT, 109).
24. -define(SMALL_BIG_EXT, 110).
25. -define(LARGE_BIG_EXT, 111).
26. -define(SMALL_ATOM_EXT, 115).
27.
28. binary_to_data1(<<?LARGE_TUPLE_EXT, _ElemtSize:32, Bin/binary>>) ->
29.     binary_to_data1(Bin);
30. binary_to_data1(<<?SMALL_TUPLE_EXT, _ElemtSize:8, Bin/binary>>) ->
31.     binary_to_data1(Bin);
32. binary_to_data1(<<?SMALL_INTEGER_EXT, Int:8, Bin/binary>>) ->
33.     msg(int, Int),
34.     binary_to_data1(Bin);
35. binary_to_data1(<<?INTEGER_EXT, Int:32, Bin/binary>>) ->
36.     msg(int2, Int),
37.     binary_to_data1(Bin);
38. binary_to_data1(<<?FLOAT_EXT, Float:31/binary, Bin/binary>>) ->
39.     F=erlang:binary_to_float(Float),
40.     msg(float, F),
41.     binary_to_data1(Bin);
42. binary_to_data1(<<?NEW_FLOAT_EXT, Float:64/unsigned-big-float, Bin/binary>>) ->
43.     msg(float2, Float),
44.     binary_to_data1(Bin);
45. binary_to_data1(<<?SMALL_BIG_EXT, N:8, _Sign:8, Bin:N/binary, Rest/binary>>) ->
46.     {N, Big} = gen_small_big(Bin),
47.     msg(big, Big),
48.     binary_to_data1(Rest);
49. binary_to_data1(<<?LARGE_BIG_EXT, N:32, _Sign:8, Bin:N/binary, Rest/binary>>) ->
50.     {N, Big} = gen_small_big(Bin),
51.     msg(big2, Big),
52.     binary_to_data1(Rest);
53. binary_to_data1(<<?ATOM_EXT, Len:16, Bin:Len/binary, Rest/binary>>) ->
54.     msg(atom, erlang:binary_to_atom(Bin, latin1)),
55.     binary_to_data1(Rest);
56. binary_to_data1(<<?SMALL_ATOM_EXT, Len:8, Bin:Len/binary, Rest/binary>>) ->
57.     msg(atom2, erlang:binary_to_atom(Bin, latin1)),
58.     binary_to_data1(Rest);
59. binary_to_data1(<<?STRING_EXT, Len:16, Bin:Len/binary, Rest/binary>>) ->
60.     msg(string, Bin),
61.     binary_to_data1(Rest);
62. binary_to_data1(<<?BINARY_EXT, Len:32, Bin:Len/binary, Rest/binary>>) ->
63.     msg(binary, Bin),
64.     binary_to_data1(Rest);
65. binary_to_data1(<<?LIST_EXT, _ElemtSize:32, Bin/binary>>) ->
66.     %%msg(list, Bin),
67.     binary_to_data1(Bin);
68. binary_to_data1(<<?NIL_EXT, Rest/binary>>) ->
69.     %%msg(list_nil, []),
70.     binary_to_data1(Rest);
71. binary_to_data1(<<>>) ->
72.     ok;
73. binary_to_data1(Bin) ->
74.     msg(unknown, Bin).
75.
76. gen_small_big(<<Bin/binary>>) ->
77.     gen_small_big(Bin, 0, 0).
78. gen_small_big(<<>>, Number, Index) ->
79.     {Index, Number};
80. gen_small_big(<<Num:8, Rest/binary>>, Number, Index) ->
81.     gen_small_big(Rest, Number + Num * (1 bsl (Index * 8)), Index+1).
82. msg(Type, Data) ->
83.     io:format("~w ~w~n", [Type,Data]),
84.     ok.

1. 7> c(test).
2. {ok,test}
3. 8> test:term_to_data({a,1,"abc"}).
4. atom a
5. int 1
6. string <<97,98,99>>
7. ok
8. 9> term_to_binary(11111111111111).
9. <<131,110,6,0,199,177,212,1,27,10>>
10. 10> test:binary_to_data(term_to_binary(11111111111111)).
11. big 11111111111111
12. ok

## 实现binary_to_term

1. -module(test).
2. -compile(export_all).
3.
4. term_to_data(Term) ->
5.     Bin = term_to_binary(Term),
6.     binary_to_data(Bin).
7.
8. binary_to_data(<<131, Bin/binary>>) ->
9.     binary_to_data1(Bin, [], []);
10. binary_to_data(_) ->
11.     error.
12.
13. -define(NEW_FLOAT_EXT, 70).
14. -define(SMALL_INTEGER_EXT, 97).
15. -define(INTEGER_EXT, 98).
16. -define(FLOAT_EXT, 99).
17. -define(ATOM_EXT, 100).
18. -define(SMALL_TUPLE_EXT, 104).
19. -define(LARGE_TUPLE_EXT, 105).
20. -define(NIL_EXT, 106).
21. -define(STRING_EXT, 107).
22. -define(LIST_EXT, 108).
23. -define(BINARY_EXT, 109).
24. -define(SMALL_BIG_EXT, 110).
25. -define(LARGE_BIG_EXT, 111).
26. -define(SMALL_ATOM_EXT, 115).
27.
28. binary_to_data2( DataList, SizeList, Data) ->
29.     DataList1 = case Data of
30.         undefined -> DataList;
31.         _ -> [Data|DataList]
32.     end,
33.     case SizeList of
34.         [{Type, Size, Index}|R] ->
35.             Index1 = Index +1,
36.             case Size=:=Index1 of
37.                 true ->
38.                     {List, DataList2} = split_list(Type, DataList1, Size, []),
39.                     DataList3 = gen_data_block(Type, List, DataList2),
40.                     case R of
41.                         [_|_] ->
42.                             binary_to_data2( DataList3, R, undefined);
43.                         _ ->
44.                             {DataList3, R}
45.                     end;
46.                 _ ->
47.                     {DataList1, [{Type, Size, Index1}|R]}
48.             end;
49.         _ ->
50.             {DataList1, SizeList}
51.     end.
52.
53. split_list(list, [[]|TailList], Size, List) ->
54.     split_list(list, TailList, Size-1, List);
55. split_list(_Type, [], _Size, List) ->
56.     {List,[]};
57. split_list(_Type, TailList, 0, List) ->
58.     {List, TailList};
59. split_list(Type, [Data|TailList], Size, List) ->
60.     split_list(Type, TailList, Size-1, [Data|List]).
61.
62.
63. gen_data_block(tuple, List, DataList) ->
64.     [list_to_tuple(List)|DataList];
65. gen_data_block(list, List, DataList) ->
66.     [List|DataList].
67.
68. binary_to_data1(<<?LARGE_TUPLE_EXT, ElemtSize:32, Bin/binary>>, DataList, SizeList) ->
69.     binary_to_data1(Bin, DataList, [{tuple, ElemtSize, 0}|SizeList]);
70. binary_to_data1(<<?SMALL_TUPLE_EXT, ElemtSize:8, Bin/binary>>, DataList, SizeList) ->
71.     binary_to_data1(Bin, DataList, [{tuple, ElemtSize, 0}|SizeList]);
72. binary_to_data1(<<?SMALL_INTEGER_EXT, Int:8, Bin/binary>>, DataList, SizeList) ->
73.     %%msg(int, Int),
74.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Int),
75.     binary_to_data1(Bin, DataList1, SizeList1);
76. binary_to_data1(<<?INTEGER_EXT, Int:32, Bin/binary>>, DataList, SizeList) ->
77.     %%msg(int2, Int),
78.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Int),
79.     binary_to_data1(Bin, DataList1, SizeList1);
80. binary_to_data1(<<?FLOAT_EXT, F:31/binary, Bin/binary>>, DataList, SizeList) ->
81.     Float = erlang:binary_to_float(F),
82.     %%msg(float, Float),
83.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Float),
84.     binary_to_data1(Bin, DataList1, SizeList1);
85. binary_to_data1(<<?NEW_FLOAT_EXT, Float:64/unsigned-big-float, Bin/binary>>, DataList, SizeList) ->
86.     %%msg(float2, Float),
87.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Float),
88.     binary_to_data1(Bin, DataList1, SizeList1);
89. binary_to_data1(<<?SMALL_BIG_EXT, N:8, _Sign:8, Bin:N/binary, Rest/binary>>, DataList, SizeList) ->
90.     {N, Big} = gen_small_big(Bin),
91.     %%msg(big, Big),
92.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Big),
93.     binary_to_data1(Rest, DataList1, SizeList1);
94. binary_to_data1(<<?LARGE_BIG_EXT, N:32, _Sign:8, Bin:N/binary, Rest/binary>>, DataList, SizeList) ->
95.     {N, Big} = gen_small_big(Bin),
96.     %%msg(big2, Big),
97.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Big),
98.     binary_to_data1(Rest, DataList1, SizeList1);
99. binary_to_data1(<<?ATOM_EXT, Len:16, Bin:Len/binary, Rest/binary>>, DataList, SizeList) ->
100.     Atom = erlang:binary_to_atom(Bin, latin1),
101.     %%msg(atom, Atom),
102.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Atom),
103.     binary_to_data1(Rest, DataList1, SizeList1);
104. binary_to_data1(<<?SMALL_ATOM_EXT, Len:8, Bin:Len/binary, Rest/binary>>, DataList, SizeList) ->
105.     Atom = erlang:binary_to_atom(Bin, latin1),
106.     %%msg(atom2, Atom),
107.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Atom),
108.     binary_to_data1(Rest, DataList1, SizeList1);
109. binary_to_data1(<<?STRING_EXT, Len:16, Bin:Len/binary, Rest/binary>>, DataList, SizeList) ->
110.     String = binary_to_list(Bin),
111.     %%msg(string, String),
112.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, String),
113.     binary_to_data1(Rest, DataList1, SizeList1);
114. binary_to_data1(<<?BINARY_EXT, Len:32, Bin:Len/binary, Rest/binary>>, DataList, SizeList) ->
115.     %%msg(binary, Bin),
116.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, Bin),
117.     binary_to_data1(Rest, DataList1, SizeList1);
118. binary_to_data1(<<?LIST_EXT, ElemtSize:32, Bin/binary>>, DataList, SizeList) ->
119.     %%msg(list, Bin),
120.     binary_to_data1(Bin, DataList, [{list, ElemtSize+1, 0}|SizeList]);
121. binary_to_data1(<<?NIL_EXT, Rest/binary>>, DataList, SizeList) ->
122.     %%msg(list_nil, []),
123.     {DataList1, SizeList1} = binary_to_data2( DataList, SizeList, []),
124.     binary_to_data1(Rest, DataList1, SizeList1);
125. binary_to_data1(<<>>, DataList, _SizeList) ->
126.     %%msg(final, DataList),
127.     case lists:reverse(DataList) of
128.         [Data] -> next;
129.         [] -> Data=error;
130.         Data -> next
131.     end,
132.     Data;
133. binary_to_data1(Bin, _DataList, _SizeList) ->
134.     msg(unknown, Bin),
135.     error.
136.
137. gen_small_big(<<Bin/binary>>) ->
138.     gen_small_big(Bin, 0, 0).
139. gen_small_big(<<>>, Number, Index) ->
140.     {Index, Number};
141. gen_small_big(<<Num:8, Rest/binary>>, Number, Index) ->
142.     gen_small_big(Rest, Number + Num * (1 bsl (Index * 8)), Index+1).
143.
144. msg(Type, Data) ->
145.     io:format("~w ~w~n", [Type,Data]),
146.     ok.

1. 16> c(test).
2. {ok,test}
3. 17> test:term_to_data({a,b,c}).
4. {a,b,c}
5. 18> test:term_to_data([]).
6. []
7. 19> test:term_to_data({a,b,{c,{d,{e,"TTT"}},f,[g,h,i],[j]}}).
8. {a,b,{c,{d,{e,"TTT"}},f,[g,h,i],[j]}}

erlang提供了两个函数用于erlang数据与二进制的转换，term_to_binary 把erlang数据转成一个二进制数据， binary_to_term 则是 把 二进制数据转为原始的erlang数据。这两个函数都是c实现的，效率

×
• 登录
• 注册

×