Filename | /usr/local/lib/perl5/site_perl/mach/5.24/Razor2/String.pm |
Statements | Executed 82 statements in 12.9ms |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
1 | 1 | 1 | 11.0ms | 12.2ms | BEGIN@7 | Razor2::String::
1 | 1 | 1 | 4.33ms | 4.84ms | BEGIN@5 | Razor2::String::
1 | 1 | 1 | 742µs | 742µs | BEGIN@6 | Razor2::String::
1 | 1 | 1 | 727µs | 1.88ms | BEGIN@4 | Razor2::String::
1 | 1 | 1 | 560µs | 560µs | BEGIN@31 | Razor2::String::
1 | 1 | 1 | 28µs | 231µs | BEGIN@12 | Razor2::String::
0 | 0 | 0 | 0s | 0s | base64tohex | Razor2::String::
0 | 0 | 0 | 0s | 0s | clean_body | Razor2::String::
0 | 0 | 0 | 0s | 0s | debugobj | Razor2::String::
0 | 0 | 0 | 0s | 0s | dump_entity | Razor2::String::
0 | 0 | 0 | 0s | 0s | escape_smtp_terminator | Razor2::String::
0 | 0 | 0 | 0s | 0s | findsimilar | Razor2::String::
0 | 0 | 0 | 0s | 0s | fisher_yates_shuffle | Razor2::String::
0 | 0 | 0 | 0s | 0s | from_batched_query | Razor2::String::
0 | 0 | 0 | 0s | 0s | hash2hexbits | Razor2::String::
0 | 0 | 0 | 0s | 0s | hash2str | Razor2::String::
0 | 0 | 0 | 0s | 0s | hex_dump | Razor2::String::
0 | 0 | 0 | 0s | 0s | hexbits2hash | Razor2::String::
0 | 0 | 0 | 0s | 0s | hextobase64 | Razor2::String::
0 | 0 | 0 | 0s | 0s | hmac2_sha1 | Razor2::String::
0 | 0 | 0 | 0s | 0s | hmac3_sha1 | Razor2::String::
0 | 0 | 0 | 0s | 0s | hmac_sha1 | Razor2::String::
0 | 0 | 0 | 0s | 0s | makesis | Razor2::String::
0 | 0 | 0 | 0s | 0s | makesis_nue | Razor2::String::
0 | 0 | 0 | 0s | 0s | parsesis | Razor2::String::
0 | 0 | 0 | 0s | 0s | parsesis_nue | Razor2::String::
0 | 0 | 0 | 0s | 0s | prep_mail | Razor2::String::
0 | 0 | 0 | 0s | 0s | prep_part | Razor2::String::
0 | 0 | 0 | 0s | 0s | printb64table | Razor2::String::
0 | 0 | 0 | 0s | 0s | randstr | Razor2::String::
0 | 0 | 0 | 0s | 0s | round | Razor2::String::
0 | 0 | 0 | 0s | 0s | split_mime | Razor2::String::
0 | 0 | 0 | 0s | 0s | str2hash | Razor2::String::
0 | 0 | 0 | 0s | 0s | to_batched_query | Razor2::String::
0 | 0 | 0 | 0s | 0s | unescape_smtp_terminator | Razor2::String::
0 | 0 | 0 | 0s | 0s | xor_key | Razor2::String::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # $Id: String.pm,v 1.48 2005/06/13 21:09:59 vipul Exp $ | ||||
2 | package Razor2::String; | ||||
3 | |||||
4 | 2 | 350µs | 2 | 2.08ms | # spent 1.88ms (727µs+1.15) within Razor2::String::BEGIN@4 which was called:
# once (727µs+1.15ms) by Razor2::Client::Agent::BEGIN@18 at line 4 # spent 1.88ms making 1 call to Razor2::String::BEGIN@4
# spent 201µs making 1 call to Exporter::import |
5 | 2 | 284µs | 2 | 5.05ms | # spent 4.84ms (4.33+516µs) within Razor2::String::BEGIN@5 which was called:
# once (4.33ms+516µs) by Razor2::Client::Agent::BEGIN@18 at line 5 # spent 4.84ms making 1 call to Razor2::String::BEGIN@5
# spent 206µs making 1 call to Exporter::import |
6 | 2 | 803µs | 1 | 742µs | # spent 742µs within Razor2::String::BEGIN@6 which was called:
# once (742µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 6 # spent 742µs making 1 call to Razor2::String::BEGIN@6 |
7 | 2 | 426µs | 2 | 12.3ms | # spent 12.2ms (11.0+1.21) within Razor2::String::BEGIN@7 which was called:
# once (11.0ms+1.21ms) by Razor2::Client::Agent::BEGIN@18 at line 7 # spent 12.2ms making 1 call to Razor2::String::BEGIN@7
# spent 123µs making 1 call to Exporter::import |
8 | |||||
9 | #use MIME::Parser; | ||||
10 | |||||
11 | 1 | 2µs | require Exporter; | ||
12 | 2 | 303µs | 2 | 434µs | # spent 231µs (28+203) within Razor2::String::BEGIN@12 which was called:
# once (28µs+203µs) by Razor2::Client::Agent::BEGIN@18 at line 12 # spent 231µs making 1 call to Razor2::String::BEGIN@12
# spent 203µs making 1 call to vars::import |
13 | 1 | 20µs | @ISA = qw(Exporter); | ||
14 | |||||
15 | 1 | 6µs | @EXPORT = qw( hmac_sha1 xor_key | ||
16 | from_batched_query | ||||
17 | to_batched_query findsimilar debugobj | ||||
18 | makesis parsesis makesis_nue parsesis_nue | ||||
19 | hextobase64 base64tohex | ||||
20 | randstr round | ||||
21 | hex_dump prep_mail | ||||
22 | prehash printb64table | ||||
23 | hexbits2hash hmac2_sha1 | ||||
24 | fisher_yates_shuffle | ||||
25 | ); | ||||
26 | |||||
27 | |||||
28 | # Same as the alphabet from RFC 1521, except s:/:_: and s:+:-: | ||||
29 | 1 | 2µs | my %b64table; | ||
30 | |||||
31 | # spent 560µs within Razor2::String::BEGIN@31 which was called:
# once (560µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 42 | ||||
32 | # ASCII | ||||
33 | # 33-126 printable chars | ||||
34 | # 48-57 numbers | ||||
35 | # 65-90 uppercase alpha | ||||
36 | # 97-122 lowercase alpha | ||||
37 | 27 | 220µs | foreach (0..25) { $b64table{$_} = chr($_ + 65); } | ||
38 | 27 | 219µs | foreach (26..51) { $b64table{$_} = chr($_ + 71); } | ||
39 | 11 | 101µs | foreach (52..61) { $b64table{$_} = chr($_ - 4 ); } | ||
40 | 1 | 2µs | $b64table{62} = "-"; | ||
41 | 1 | 22µs | $b64table{63} = "_"; | ||
42 | 1 | 10.1ms | 1 | 560µs | } # spent 560µs making 1 call to Razor2::String::BEGIN@31 |
43 | |||||
44 | |||||
45 | sub printb64table { | ||||
46 | foreach (0..63) { | ||||
47 | print "$_ = $b64table{$_}\n"; | ||||
48 | } | ||||
49 | } | ||||
50 | |||||
51 | |||||
52 | sub hmac_sha1 { | ||||
53 | my $text = shift; | ||||
54 | my $iv1 = shift; | ||||
55 | my $iv2 = shift; | ||||
56 | my ($b64, $hex) = hmac2_sha1($text, $iv1, $iv2); | ||||
57 | return $b64; | ||||
58 | } | ||||
59 | |||||
60 | |||||
61 | # taken in part from RFC 2104 | ||||
62 | # http://www.cs.ucsd.edu/users/mihir/papers/hmac.html | ||||
63 | |||||
64 | sub hmac2_sha1 { | ||||
65 | my $text = shift; | ||||
66 | my $iv1 = shift; | ||||
67 | my $iv2 = shift; | ||||
68 | |||||
69 | return unless $text && $iv1 && $iv2; | ||||
70 | die "no ref's allowed" if ref($text); | ||||
71 | |||||
72 | my $ctx = Digest::SHA1->new; | ||||
73 | $ctx->add($iv2); | ||||
74 | $ctx->add($text); | ||||
75 | my $digest = $ctx->hexdigest; | ||||
76 | |||||
77 | $ctx = Digest::SHA1->new; | ||||
78 | $ctx->add($iv1); | ||||
79 | $ctx->add($digest); | ||||
80 | $digest = $ctx->hexdigest; | ||||
81 | |||||
82 | return (hextobase64($digest), $digest); | ||||
83 | } | ||||
84 | |||||
85 | |||||
86 | sub hmac3_sha1 { | ||||
87 | my $text = shift; | ||||
88 | my $iv1 = shift; | ||||
89 | my $iv2 = shift; | ||||
90 | |||||
91 | return unless $text && $iv1 && $iv2; | ||||
92 | die "no ref's allowed" if ref($text); | ||||
93 | |||||
94 | my $digest = $text; | ||||
95 | $digest = sha1_hex($iv1 . $digest); | ||||
96 | $digest = sha1_hex($iv2 . $digest); | ||||
97 | return (hextobase64($digest), $digest); | ||||
98 | } | ||||
99 | |||||
100 | |||||
101 | # part of RFC 2104 - see hmac_sha1() | ||||
102 | |||||
103 | sub xor_key { | ||||
104 | my $key = shift; | ||||
105 | |||||
106 | # key length should never be > 64 chars; | ||||
107 | # | ||||
108 | # dont need this ... see Bitwise String Operators | ||||
109 | # $enc .= '\0' x (64 - length($pass)); | ||||
110 | |||||
111 | my $iv1 = "\x36" x 64 ^ $key; | ||||
112 | my $iv2 = "\x5C" x 64 ^ $key; | ||||
113 | |||||
114 | return ($iv1, $iv2); | ||||
115 | } | ||||
116 | |||||
117 | |||||
118 | # converts a string where each char is a hex (4-bit) value | ||||
119 | # to a string where each char is a base64 (6-bit) value | ||||
120 | |||||
121 | sub hextobase64 { | ||||
122 | |||||
123 | my $hs = shift; | ||||
124 | |||||
125 | my @b64s; | ||||
126 | my $i = 0; | ||||
127 | |||||
128 | while ($i < length($hs)) { | ||||
129 | |||||
130 | # process 3 hex char chunks at a time | ||||
131 | my $hex3 = substr $hs, $i, 3; | ||||
132 | $i += 3; | ||||
133 | |||||
134 | my $bv = pack "h3", $hex3; | ||||
135 | my $cur = 0; | ||||
136 | foreach (0..5) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; } | ||||
137 | push @b64s, $cur/2; $cur = 0; | ||||
138 | foreach (6..11) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; } | ||||
139 | push @b64s, $cur/2; | ||||
140 | #foreach (0..15) { my $bt = vec($bv,$_,1); print "$_=$bt, cur=$cur\n"; } | ||||
141 | #print " -- hex=$hex3; @b64s\n"; | ||||
142 | } | ||||
143 | |||||
144 | my $bs = ""; | ||||
145 | foreach (@b64s) { $bs .= $b64table{$_}; } | ||||
146 | |||||
147 | # print "b64=$bs; hex=". base64tohex($bs) ."\n"; | ||||
148 | |||||
149 | # Fixme - change encoding so 1 hex char ==> 1 b64 char | ||||
150 | # 64-char hex string ==> 44-char b64 string. truncate to 43. | ||||
151 | # 40-char hex string ==> 28-char b64 string. truncate to 27. | ||||
152 | # $bs = substr($bs, 0, 43) if (length $bs == 44) && (substr($bs, -1) eq '0'); | ||||
153 | # $bs = substr($bs, 0, 27) if (length $bs == 28) && (substr($bs, -1) eq '0'); | ||||
154 | |||||
155 | return $bs; | ||||
156 | |||||
157 | } | ||||
158 | |||||
159 | |||||
160 | # converts a string where each char is a base64 (6-bit) value | ||||
161 | # to a string where each char is a hex (4-bit) value | ||||
162 | |||||
163 | sub base64tohex { | ||||
164 | |||||
165 | my $bs = shift; | ||||
166 | my @b64s; | ||||
167 | my $hexstr; | ||||
168 | |||||
169 | # convert string to list of numbers base 10 | ||||
170 | foreach my $chr (split '', $bs) { | ||||
171 | foreach (keys %b64table) { | ||||
172 | push @b64s, $_ if $b64table{$_} eq $chr; | ||||
173 | } | ||||
174 | } | ||||
175 | |||||
176 | while (@b64s) { | ||||
177 | my $bv = ""; vec($bv,0,16) = 0; | ||||
178 | my $a = shift @b64s; | ||||
179 | foreach (0..5) {my $i=5-$_; my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); } | ||||
180 | $a = shift @b64s; | ||||
181 | foreach (6..11) {my $i=17-$_;my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); } | ||||
182 | $hexstr .= unpack "h3", $bv; | ||||
183 | } | ||||
184 | |||||
185 | # print "hexstr=$hexstr; @b64s\n"; | ||||
186 | |||||
187 | # | ||||
188 | # NOTE on padding | ||||
189 | # if we pad 4 0-bits, we need to know that there wasn't an actual 0 | ||||
190 | # on the input string (hexstr). | ||||
191 | # | ||||
192 | # since padding 4 0's is more common than having the last hex | ||||
193 | # be a 0, we could append a special char indicating last 4 0 bits | ||||
194 | # were not padding 0's. | ||||
195 | # | ||||
196 | # But, we will customize these functions for razor2's needs. | ||||
197 | # 64-char hex string ==> 43-char b64 string ==> 66-char hex. truncate. | ||||
198 | # 40-char hex string ==> 27-char b64 string ==> 42-char hex. truncate. | ||||
199 | # 15-char hex string ==> 10-char b64 string ==> 15-char hex. ok. | ||||
200 | # | ||||
201 | # 20-byte hex string is 40 chars | ||||
202 | # $hexstr = substr($hexstr, 0, 20) if (length $hexstr == 21) && (substr($hexstr, -1) eq '0'); | ||||
203 | # $hexstr = substr($hexstr, 0, 40) if (length $hexstr == 42) && (substr($hexstr, -2) eq '00'); | ||||
204 | # $hexstr = substr($hexstr, 0, 64) if (length $hexstr == 66) && (substr($hexstr, -2) eq '00'); | ||||
205 | |||||
206 | $hexstr = substr($hexstr, 0, 40) if (length($hexstr) == 42); | ||||
207 | $hexstr = substr($hexstr, 0, 64) if (length($hexstr) == 66); | ||||
208 | return $hexstr; | ||||
209 | |||||
210 | } | ||||
211 | |||||
212 | |||||
213 | # can be called 2 ways | ||||
214 | # - makesis(%hash) aka makesis( p => 0, cf => 95 ) | ||||
215 | # - makesis($hashref) aka makesis({p => 0, cf => 95}) | ||||
216 | |||||
217 | sub makesis { | ||||
218 | my $first = shift; | ||||
219 | my $data; | ||||
220 | if (ref($first) eq 'HASH') { | ||||
221 | $data = $first; | ||||
222 | } else { | ||||
223 | $data = {$first, @_}; | ||||
224 | } | ||||
225 | my $sis = ''; | ||||
226 | foreach (sort keys %$data) { | ||||
227 | $sis .= "$_=" . (exists $data->{$_} ? uri_escape($data->{$_}) : '') . '&'; | ||||
228 | } | ||||
229 | |||||
230 | # This is 10x faster than the equivalent regex version. | ||||
231 | return substr($sis, 0, length($sis)-1) . "\r\n"; | ||||
232 | } | ||||
233 | |||||
234 | |||||
235 | sub parsesis { | ||||
236 | |||||
237 | my $query = $_[1] || {}; | ||||
238 | my $wantref = 1 if $_[1]; | ||||
239 | |||||
240 | # Parse the query. | ||||
241 | |||||
242 | $_[0] =~ s/\n$//; # SIS shouldn't have this! | ||||
243 | $_[0] =~ s/\r$//; # SIS shouldn't have this! | ||||
244 | |||||
245 | my @pairs = split /\&/, $_[0]; | ||||
246 | |||||
247 | for (@pairs) { | ||||
248 | my ($key, $value) = split /=/, $_; | ||||
249 | $query->{$key} = defined $value ? uri_unescape($value) : ''; | ||||
250 | } | ||||
251 | |||||
252 | return $query if $wantref; | ||||
253 | return %$query; | ||||
254 | } | ||||
255 | |||||
256 | |||||
257 | # version of makesis that doesn't to uri escaping | ||||
258 | # for things we know don't require escaping | ||||
259 | |||||
260 | # can be called 2 ways | ||||
261 | # - makesis(%hash) aka makesis( p => 0, cf => 95 ) | ||||
262 | # - makesis($hashref) aka makesis({p => 0, cf => 95}) | ||||
263 | |||||
264 | sub makesis_nue { | ||||
265 | my $first = shift; | ||||
266 | my $data; | ||||
267 | if (ref($first) eq 'HASH') { | ||||
268 | $data = $first; | ||||
269 | } else { | ||||
270 | $data = {$first, @_}; | ||||
271 | } | ||||
272 | my $sis = ''; | ||||
273 | foreach (sort keys %$data) { | ||||
274 | $sis .= "$_="; | ||||
275 | $sis .= $data->{$_} if exists($data->{$_}); | ||||
276 | $sis .= '&'; | ||||
277 | } | ||||
278 | |||||
279 | # This is 10x faster than the equivalent regex version. | ||||
280 | return substr($sis, 0, length($sis)-1) . "\r\n"; | ||||
281 | } | ||||
282 | |||||
283 | |||||
284 | sub parsesis_nue { | ||||
285 | |||||
286 | my $query = $_[1] || {}; | ||||
287 | my $wantref = 1 if $_[1]; | ||||
288 | |||||
289 | # Parse the query. | ||||
290 | $_[0] =~ s/\r\n$//; | ||||
291 | my @pairs = split /\&/, $_[0]; | ||||
292 | |||||
293 | for (@pairs) { | ||||
294 | my ($key, $value) = split /=/, $_; | ||||
295 | $query->{$key} = $value; | ||||
296 | } | ||||
297 | |||||
298 | return $query if $wantref; | ||||
299 | return %$query; | ||||
300 | } | ||||
301 | |||||
302 | |||||
303 | sub to_batched_query { | ||||
304 | my ($queries, $bql, $bqs, $novar) = @_; | ||||
305 | my @bqueries; | ||||
306 | |||||
307 | # Breaks up queries into batches, where batches are limited to: | ||||
308 | # - at most $bql lines long --OR-- | ||||
309 | # - at most $bqs kb in size | ||||
310 | # if bqs or bql == 0 or undef, no limit. | ||||
311 | # | ||||
312 | # fixme - optimization for aggregator: | ||||
313 | # sort, so all checks are together, all reports together, etc. | ||||
314 | # problem is user will want to maintain array order | ||||
315 | |||||
316 | # $queries is array ref of either: | ||||
317 | # strings - sis, ready to go | ||||
318 | # hash ref - need to create sis | ||||
319 | # my $q = ref($queries->[0]) eq 'HASH' ? makesis_batch($queries) : $queries; | ||||
320 | |||||
321 | # for right now, we'll just assume hash ref | ||||
322 | return unless ref($queries->[0]) eq 'HASH'; | ||||
323 | |||||
324 | my $last; | ||||
325 | my $line; | ||||
326 | my $linecnt = 0; | ||||
327 | my $batchmode = 0; | ||||
328 | foreach my $cur (@$queries) { | ||||
329 | |||||
330 | # my $dobj = debugobj($cur); print "dbg-doing obj: $dobj\n"; | ||||
331 | |||||
332 | # | ||||
333 | # handle cases where we submit email blob (message = * ) | ||||
334 | # | ||||
335 | if (exists $cur->{message}) { | ||||
336 | my $msg = $cur->{message}; | ||||
337 | delete $cur->{message}; | ||||
338 | $line = "-". makesis($cur); | ||||
339 | $cur->{message} = $msg; | ||||
340 | $line =~ s/\r\n$//s; | ||||
341 | $line .= "&message=*\r\n$msg\r\n.\r\n"; | ||||
342 | push @bqueries, $line; | ||||
343 | next; | ||||
344 | } | ||||
345 | |||||
346 | unless ($last) { | ||||
347 | # | ||||
348 | # start beginning of new batch | ||||
349 | # | ||||
350 | $last = $cur; | ||||
351 | next; | ||||
352 | } | ||||
353 | unless ($batchmode) { | ||||
354 | # | ||||
355 | # line after beginning of new batch | ||||
356 | # if similar, start variable batchmode. | ||||
357 | # if not, start batchmode without variables | ||||
358 | # | ||||
359 | my ($both, $diff) = findsimilar($last, $cur); | ||||
360 | if ($diff && !$novar) { | ||||
361 | $batchmode = 2; | ||||
362 | $line = "-". makesis_nue($both); | ||||
363 | # fixme - we might want to uri_escape() | ||||
364 | # but everything should be alphanum or our uri-safe base64 | ||||
365 | $line .= join(",", map "$last->{$_}", @$diff) ."\r\n"; | ||||
366 | $line .= join(",", map "$cur->{$_}", @$diff) ."\r\n"; | ||||
367 | $last = $both; # last is now 'template' | ||||
368 | $linecnt = 2; | ||||
369 | } else { | ||||
370 | $batchmode = 1; | ||||
371 | $line = "-". makesis($last); | ||||
372 | $line .= makesis_nue($cur); | ||||
373 | $linecnt = 2; | ||||
374 | } | ||||
375 | next; | ||||
376 | } else { | ||||
377 | # | ||||
378 | # We're in batchmode. | ||||
379 | # end if batch maxed out (bqs or bql reached) | ||||
380 | # end if batchmode with variables and cur doesn't match | ||||
381 | # end batch | ||||
382 | # | ||||
383 | my ($both, $diff) = findsimilar($last, $cur) if ($batchmode == 2); | ||||
384 | if ( ($bqs && (length($line) > ($bqs*1024))) || | ||||
385 | ($bql && ($linecnt >= $bql)) || | ||||
386 | ($batchmode == 2 && !$diff) ) { | ||||
387 | $batchmode = 0; | ||||
388 | $line .= ".\r\n"; | ||||
389 | push @bqueries, $line; | ||||
390 | $last = $cur; | ||||
391 | } else { | ||||
392 | # | ||||
393 | # fixme - we might go passed bqs by a little bit. prolly ok. | ||||
394 | # | ||||
395 | if ($batchmode == 2) { | ||||
396 | $line .= join(",", map "$cur->{$_}", @$diff) ."\r\n"; | ||||
397 | } else { | ||||
398 | $line .= makesis_nue($cur); | ||||
399 | } | ||||
400 | $linecnt++; | ||||
401 | } | ||||
402 | } | ||||
403 | } | ||||
404 | if ($batchmode) { | ||||
405 | $line .= ".\r\n"; | ||||
406 | push @bqueries, $line; | ||||
407 | } elsif ($last) { | ||||
408 | $line = makesis($last); | ||||
409 | push @bqueries, $line; | ||||
410 | } | ||||
411 | |||||
412 | return \@bqueries; | ||||
413 | } | ||||
414 | |||||
415 | |||||
416 | # compares keys in hash ref's a & b | ||||
417 | # | ||||
418 | # return | ||||
419 | # if both hashes have different keys | ||||
420 | # | ||||
421 | # return (1) | ||||
422 | # if both hashes have same keys and values, | ||||
423 | # | ||||
424 | # returns 2 refs | ||||
425 | # if both hashes have same keys but different values | ||||
426 | # - first is hash, copy of a & b where vals are same. | ||||
427 | # where vals are diff, keys are copied with val = '?' | ||||
428 | # - second is list contains keys where values are different | ||||
429 | |||||
430 | sub findsimilar { | ||||
431 | my ($a, $b) = @_; | ||||
432 | my @diffvalues = (); | ||||
433 | my %samevalues = (); | ||||
434 | |||||
435 | foreach (sort keys %$a) { | ||||
436 | return unless exists $b->{$_}; | ||||
437 | if ($b->{$_} eq $a->{$_}) { | ||||
438 | $samevalues{$_} = $a->{$_}; | ||||
439 | } else { | ||||
440 | $samevalues{$_} = "?"; | ||||
441 | push @diffvalues, $_; | ||||
442 | } | ||||
443 | } | ||||
444 | foreach (sort keys %$b) { | ||||
445 | return unless exists $a->{$_}; | ||||
446 | } | ||||
447 | # if too hashes are exactly the same, not sure. | ||||
448 | # treat as if they are totally different. | ||||
449 | return (1) unless scalar(@diffvalues) > 0; | ||||
450 | |||||
451 | return (\%samevalues, \@diffvalues); | ||||
452 | } | ||||
453 | |||||
454 | sub from_batched_query { | ||||
455 | |||||
456 | my ($queries) = @_; | ||||
457 | my @queries; | ||||
458 | |||||
459 | my ($fq, $rq) = $queries =~ m:^\-(.*?)\r\n(.*)$:sm; | ||||
460 | |||||
461 | unless ($fq && $rq) { | ||||
462 | # allow from_batched_query to handle non-batches | ||||
463 | $fq = $queries; | ||||
464 | $rq = ""; | ||||
465 | } | ||||
466 | |||||
467 | if ($fq =~ m:\?:) { | ||||
468 | |||||
469 | my %template_query = (); | ||||
470 | my @seq = (); | ||||
471 | my @pairs = split /\&/, $fq; | ||||
472 | for (@pairs) { | ||||
473 | my ($key, $value) = split /=/, $_; | ||||
474 | if ($value eq "?") { | ||||
475 | push @seq, $key; | ||||
476 | } else { | ||||
477 | $template_query{$key} = $value ? uri_unescape($value) : ''; | ||||
478 | } | ||||
479 | } | ||||
480 | |||||
481 | for (split /\r\n/, $rq) { | ||||
482 | my @values = split /,/, $_; | ||||
483 | my %foo = %template_query; | ||||
484 | @foo{@seq} = @values; | ||||
485 | push @queries, \%foo; | ||||
486 | } | ||||
487 | |||||
488 | return undef unless @queries; | ||||
489 | |||||
490 | } elsif ($fq =~ m:\*:) { | ||||
491 | |||||
492 | my %query = parsesis($fq); | ||||
493 | for (keys %query) { | ||||
494 | if ($query{$_} eq "*") { | ||||
495 | $query{$_} = $rq; | ||||
496 | last; | ||||
497 | } | ||||
498 | } | ||||
499 | push @queries, \%query; | ||||
500 | |||||
501 | } else { | ||||
502 | |||||
503 | |||||
504 | # Don't split $queries. Use $fq and $rq instead since | ||||
505 | # $fq is already normalized. | ||||
506 | |||||
507 | my %q = parsesis($fq); | ||||
508 | push @queries, \%q; | ||||
509 | for (split /\r\n/, $rq) { | ||||
510 | my %q = parsesis($_); | ||||
511 | push @queries, \%q; | ||||
512 | } | ||||
513 | |||||
514 | } | ||||
515 | |||||
516 | return \@queries; | ||||
517 | |||||
518 | } | ||||
519 | |||||
520 | |||||
521 | sub randstr { | ||||
522 | |||||
523 | my $size = shift; | ||||
524 | my $alphanum = shift; | ||||
525 | my $str; | ||||
526 | |||||
527 | $alphanum = 1 if !defined($alphanum); | ||||
528 | |||||
529 | # ASCII | ||||
530 | # 33-126 printable chars | ||||
531 | # 48-57 numbers | ||||
532 | # 65-90 uppercase alpha | ||||
533 | # 97-122 lowercase alpha | ||||
534 | |||||
535 | while ($size--) { | ||||
536 | if ($alphanum) { | ||||
537 | $str .= $b64table{ int(rand 64) }; | ||||
538 | } else { | ||||
539 | $str .= chr(int(rand 94) + 33); | ||||
540 | } | ||||
541 | } | ||||
542 | |||||
543 | return $str; | ||||
544 | |||||
545 | } | ||||
546 | |||||
547 | |||||
548 | sub escape_smtp_terminator { | ||||
549 | |||||
550 | my ($textref) = @_; | ||||
551 | $$textref =~ s/\r\n\./\r\n\.\./gm | ||||
552 | |||||
553 | } | ||||
554 | |||||
555 | |||||
556 | sub unescape_smtp_terminator { | ||||
557 | |||||
558 | my ($textref) = @_; | ||||
559 | $$textref =~ s/\r\n\.\./\r\n\./gm; | ||||
560 | |||||
561 | } | ||||
562 | |||||
563 | |||||
564 | sub hex_dump { | ||||
565 | my $string = shift; | ||||
566 | |||||
567 | for (split //, $string) { | ||||
568 | print ord($_) . " "; | ||||
569 | } | ||||
570 | print "\n"; | ||||
571 | } | ||||
572 | |||||
- - | |||||
575 | sub hash2str { | ||||
576 | |||||
577 | my $href = shift; | ||||
578 | my %hash = %$href; | ||||
579 | my ($str, $key); | ||||
580 | |||||
581 | for $key ( keys %hash ) { | ||||
582 | my $tstr; | ||||
583 | if ( ref $hash{$key} eq 'ARRAY' ) { | ||||
584 | for ( @{ $hash{ $key }} ) { $tstr .= escape( $_ ) . "," } $str =~ s/,$//; | ||||
585 | } elsif ( !(ref $hash{$key}) ) { | ||||
586 | $tstr .= escape ( $hash{$key} ); | ||||
587 | } | ||||
588 | if ( $tstr ) { $str .= "$key:$tstr&" } | ||||
589 | } | ||||
590 | |||||
591 | $str =~ s/&$//; return $str; | ||||
592 | |||||
593 | } | ||||
594 | |||||
595 | |||||
596 | sub str2hash { | ||||
597 | |||||
598 | my $str = shift; | ||||
599 | my %hash; | ||||
600 | my @pairs = split /(?<!\\)&/, $str; | ||||
601 | |||||
602 | for ( @pairs ) { | ||||
603 | my ( $key, $data ) = split /(?<!\\):/, $_, 2; | ||||
604 | if ( $data =~ /(?<!\\),/ ) { | ||||
605 | my @list = split /(?<!\\),/, $data; | ||||
606 | for ( @list ) { $_ = unescape ( $_ ) }; | ||||
607 | $hash{$key} = [@list]; | ||||
608 | } else { $hash{$key} = unescape ( $data ) } | ||||
609 | } | ||||
610 | |||||
611 | return \%hash; | ||||
612 | |||||
613 | } | ||||
614 | |||||
615 | # | ||||
616 | # If body of an email has mime attachments, the headers | ||||
617 | # will indicate this. likewise, each mime attachment | ||||
618 | # could also have nested mime attachments with headers that | ||||
619 | # must indicate this. standard recursion. | ||||
620 | # | ||||
621 | # However, all 'leaf node' attachments don't have to have | ||||
622 | # headers based on RFC xxx. They must be created before | ||||
623 | # sending to razor servers. | ||||
624 | # | ||||
625 | # | ||||
626 | # Example of mail with nested MIME attachments: | ||||
627 | # | ||||
628 | # level 1 level 2 level 3 .... | ||||
629 | # -------------------------------------- | ||||
630 | # * - Header 1 | ||||
631 | # * - Body 1 | ||||
632 | # * -- A -- mime-header 2A | ||||
633 | # | mime-body 2A | ||||
634 | # | | | ||||
635 | # | * ---- a --- mime-header 3a | ||||
636 | # | | mime-body 3a | ||||
637 | # | | | ||||
638 | # | * -----b --- mime-header 3b | ||||
639 | # | mime-body 3b | ||||
640 | # * -- B -- mime-body 2B | ||||
641 | # | | ||||
642 | # * -- C -- mime-header 2C | ||||
643 | # | | ||||
644 | # * ---- c -- mime-body 3c | ||||
645 | # | | ||||
646 | # * ---- d -- mime-header 3d | ||||
647 | # mime-body 3d | ||||
648 | # | ||||
649 | # should be reported as | ||||
650 | # | ||||
651 | # Header 1 \r\n | ||||
652 | # part 1 = p(header 3a, body 3a) \r\n | ||||
653 | # part 2 = p(header 3b, body 3b) \r\n | ||||
654 | # part 3 = p(<generated header 2B>, body 2B) \r\n | ||||
655 | # part 4 = p(<generated header 3c>, body 3c) \r\n | ||||
656 | # part 5 = p(header 3d, body 3d) \r\n | ||||
657 | # .\r\n | ||||
658 | # | ||||
659 | # Notes: | ||||
660 | # - Order of parts does not matter. | ||||
661 | # | ||||
662 | # - Each part is processed by prep_mail, p(), before report/check | ||||
663 | # | ||||
664 | # - Except for original Header everything but leaf nodes | ||||
665 | # are discarded. In the above example, | ||||
666 | # | ||||
667 | # Body 1, header 2A, header 2C - are discarded | ||||
668 | # | ||||
669 | # | ||||
670 | # Detailed Explanation: | ||||
671 | # | ||||
672 | # Header 1 says 'Content-Type: multipart' with boundary definition | ||||
673 | # Based on the Boundary, Body 1 is split into A, B, C. | ||||
674 | # | ||||
675 | # A is analyzed, has headers which also say 'Content-Type: multipart' | ||||
676 | # with a different boundary, and it is split into 3a, 3b. 2A is what | ||||
677 | # appears between header 2a and first boundary, so its ignored. | ||||
678 | # 3a and 3b both have header info, so they are sent thru prep_mail | ||||
679 | # and reported/checked | ||||
680 | # | ||||
681 | # <generated header 2B> is based on Header 1 to determine content | ||||
682 | # type. if unknown, dummy header is added, | ||||
683 | # and both are reported as a body part | ||||
684 | # | ||||
685 | # C is analyzed, has headers which also say 'Content-Type: multipart' | ||||
686 | # with a different boundary, and it is split into 3c, 3d. | ||||
687 | # | ||||
688 | # <generated header 3c> is based on header 2c to determine content | ||||
689 | # type. if unknown, dummy header is added, | ||||
690 | # and both are reported as a body part | ||||
691 | # | ||||
692 | # 3d has header info, so header+body are sent thru prep_mail | ||||
693 | # and reported/checked | ||||
694 | # | ||||
695 | # | ||||
696 | # | ||||
697 | # prep_mail() basically truncates msgs that are too big and/or | ||||
698 | # base64 encodes binaries or 8-bit msgs. | ||||
699 | # | ||||
700 | |||||
701 | |||||
702 | # Split mime splits up multi-part mime mails. | ||||
703 | # | ||||
704 | # returns array of parts, where each part is | ||||
705 | # headers\n\nbody | ||||
706 | # | ||||
707 | # headers will only contain X-Razor2 and Content- headers | ||||
708 | # | ||||
709 | # If not a mime mail, and the headers do not have any | ||||
710 | # Content-* headers, then the only headers will be X-Razor2 ones | ||||
711 | # (perhaps create Content-Type in da future?) | ||||
712 | # | ||||
713 | # body can be blank. nuked in prep_part | ||||
714 | # | ||||
715 | sub split_mime { | ||||
716 | |||||
717 | my ($mailref, $ver, $recursive, $debug ) = @_; | ||||
718 | |||||
719 | return unless ref($mailref); | ||||
720 | |||||
721 | # mime-bodies must have header or initial blank lines. | ||||
722 | # | ||||
723 | my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2; | ||||
724 | my $no_valid_mime_hdr = 0; | ||||
725 | |||||
726 | unless ($body) { | ||||
727 | |||||
728 | # no blank lines, definately no header, so no nested mimes | ||||
729 | |||||
730 | print "split_mime: no blank lines\n" if $debug > 1; | ||||
731 | $no_valid_mime_hdr = 1; | ||||
732 | } | ||||
733 | # fixme - handle attachments? i.e. if header has this | ||||
734 | # Content-Disposition: attachment | ||||
735 | # than body is mail, we could recursively call ourselves | ||||
736 | # again with body (check body for hdrs first?) | ||||
737 | |||||
738 | # Make sure $hdr is really a hdr | ||||
739 | # | ||||
740 | # Details: If mime part is not RFC compliant, it could just | ||||
741 | # be a body with blank lines. hdr could have just matched part | ||||
742 | # of the body. | ||||
743 | # | ||||
744 | # valid mime header is determined by existance of 'Content-Type' | ||||
745 | # If we're not recursive, we don't check orig_headers, we assume its ok. | ||||
746 | # not sure if this is the best way ... | ||||
747 | # | ||||
748 | |||||
749 | if ($recursive && ($hdr !~ /^Content-Type:/i)) { | ||||
750 | $no_valid_mime_hdr = 1; | ||||
751 | print "uh-oh, bad mime-body len=". length($$mailref) .":\n$$mailref\n" if $debug; | ||||
752 | #print "split_mime: recur=($recursive)\n"; | ||||
753 | } | ||||
754 | |||||
755 | if ($no_valid_mime_hdr) { | ||||
756 | # | ||||
757 | # create dummy header and return it | ||||
758 | # | ||||
759 | # $ver should be '1' or client name + version | ||||
760 | my $mimepart = "X-Razor2-Agent: $ver\n"; | ||||
761 | my $hrdlen = length($mimepart); | ||||
762 | |||||
763 | # if it has initial blank line, hurray for rfc compliance | ||||
764 | if ($$mailref =~ /^\n/) { | ||||
765 | $mimepart .= $$mailref; | ||||
766 | } else { | ||||
767 | $mimepart .= "\n". $$mailref; | ||||
768 | } | ||||
769 | print "split_mime: returning total_len=". length($mimepart) ."; hdrs=". | ||||
770 | $hdrlen .", body=". length($$mailref) ."\n" if $debug; | ||||
771 | return (\$mimepart); | ||||
772 | } | ||||
773 | |||||
774 | # | ||||
775 | # Now we split mailref into hdr and body | ||||
776 | # check hdr for nested mime (boundary) | ||||
777 | # | ||||
778 | |||||
779 | my $orig_hdr = $hdr; | ||||
780 | $hdr =~ s/\n\s+//sg; # merge multi-line headers | ||||
781 | # nuke everything but X-Razor2 and Content-* headers | ||||
782 | my $trimmed_hdr = ""; | ||||
783 | foreach (split '\n',$hdr) { | ||||
784 | /^Content-/i and $trimmed_hdr .= "$_\n"; | ||||
785 | /^X-Razor2/i and $trimmed_hdr .= "$_\n"; | ||||
786 | } | ||||
787 | |||||
788 | my $boundary = ""; | ||||
789 | |||||
790 | if ($trimmed_hdr =~ /Content-Type: multipart.+boundary=("[^"]+"|\S+)/ig) { | ||||
791 | $boundary = $1; | ||||
792 | } | ||||
793 | |||||
794 | if ($boundary eq "") { | ||||
795 | # | ||||
796 | # valid mime hdr, but no nested mime. | ||||
797 | # add razor hdr and return. | ||||
798 | # | ||||
799 | print "split_mime: valid_mime_hdr [len=". length($orig_hdr) | ||||
800 | ."], but no nested mime\n$orig_hdr\n" if $debug > 1; | ||||
801 | $trimmed_hdr = "X-Razor2-Agent: $ver\n" . $trimmed_hdr; | ||||
802 | my $mimepart = "$trimmed_hdr\n$body"; | ||||
803 | print "split_mime: returning total=". length($mimepart) ."; hdrs=". | ||||
804 | length($trimmed_hdr) .", body=". length($body) ."\n" if $debug; | ||||
805 | return (\$mimepart); | ||||
806 | } | ||||
807 | $boundary = $1 if $boundary =~ /^"(.*)"$/; | ||||
808 | |||||
809 | # At this point, we know body has mime parts. | ||||
810 | # | ||||
811 | my @mimeparts; | ||||
812 | |||||
813 | # | ||||
814 | # According to RFC 1341 | ||||
815 | # http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html | ||||
816 | # | ||||
817 | # mimes are separated by \n--boundary\n | ||||
818 | # and are followed immediately by header, blank line, body; | ||||
819 | # or blank line and body. | ||||
820 | # | ||||
821 | # if no header in mime part, default content type for mime body is | ||||
822 | # based on header where 'Content-Type: multipart*' was defined, where | ||||
823 | # multipart/digest --> message/rfc822 | ||||
824 | # multipart/* --> text/plain | ||||
825 | # perhaps we should add a header if none present? | ||||
826 | # | ||||
827 | # if a body contains mimes, the 'preable', or stuff before | ||||
828 | # the first boundary, and the 'epilogue', the stuff after the | ||||
829 | # last boudary, are to be ignored. | ||||
830 | # | ||||
831 | # NOTE: We split up multiparts, but content-type's can also be | ||||
832 | # nested. i.e, a header of 'Content-Type: message' can have a body | ||||
833 | # of 'Content-Type: image' | ||||
834 | # | ||||
835 | $body =~ s/\n\Q--$boundary--\E.*$//sg; # trash last boundary and epilogue | ||||
836 | if ($body =~ /^\Q--$boundary\E\r*\n/) { | ||||
837 | # bug in some mails, make it RFC compliant | ||||
838 | # now our split will work correctly | ||||
839 | print "bad mime body [len=". length($body) ."], not doing \\n--boundary, fixed tho.\n" if $debug > 1; | ||||
840 | $body = "garbage\n$body"; | ||||
841 | } | ||||
842 | my @tmpparts; | ||||
843 | unless ($body =~ /\Q--$boundary\E/) { | ||||
844 | # Sometimes there's a boundary in the headers | ||||
845 | # but none in email. In such cases, we'll | ||||
846 | # treat the entire body as a part. | ||||
847 | push @tmpparts, "garbage"; | ||||
848 | push @tmpparts, $body; | ||||
849 | } else { | ||||
850 | @tmpparts = split /\n\Q--$boundary\E\r*\n/, $body; | ||||
851 | } | ||||
852 | shift @tmpparts; # trash everything up to the first boundary; | ||||
853 | foreach (@tmpparts) { | ||||
854 | # perhaps we should add a header based on default content-type? | ||||
855 | unless (/\S/s) { | ||||
856 | print "skipping body part containing only whitespace [len=". length($_) ."]\n" if $debug; | ||||
857 | next; | ||||
858 | } | ||||
859 | print "boundary: ". $recursive . "$boundary\n" if $debug > 1; | ||||
860 | push @mimeparts, split_mime(\$_, $ver, " ". $recursive, $debug); | ||||
861 | } | ||||
862 | print "Saweeet!!! Boundary (". scalar(@mimeparts) ."): $boundary\n" if defined($boundary) && ($debug > 1); | ||||
863 | |||||
864 | return @mimeparts; | ||||
865 | } | ||||
866 | |||||
867 | # mailref is not modified by this sub | ||||
868 | # | ||||
869 | sub prep_part { | ||||
870 | my ($mailref, $maxheader, $maxbody) = @_; | ||||
871 | #print "[". length($$mailref) ."] maxsize=$maxheader + $maxbody\n"; | ||||
872 | |||||
873 | my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2; | ||||
874 | $hdr .= "\n"; # put newline back on last header line | ||||
875 | |||||
876 | unless ($body) { | ||||
877 | # | ||||
878 | # fixme - this should not happen. | ||||
879 | # if it does, split_mime needs work | ||||
880 | # | ||||
881 | # print "prep_part got F**KED-up mimepart [len=". length($$mailref) ."]\n$$mailref\n"; | ||||
882 | return; # body is empty | ||||
883 | } | ||||
884 | |||||
885 | # fixme - are these the best chars to check for binary? | ||||
886 | my $is_binary = ($hdr =~ /^Content-Type-Encoding: 8-bit/) || | ||||
887 | ($body =~ /([\x00-\x1f|\x7f-\xff])/ and $1 !~ /[\r\n\t]/); | ||||
888 | |||||
889 | my $enBase64 = new Razor2::Preproc::enBase64; | ||||
890 | $is_binary = $enBase64->isit($mailref); | ||||
891 | $enBase64->doit(\$body) if $is_binary; | ||||
892 | |||||
893 | $body =~ s/\r+\n/\n/sg; # outlook sometimes does \r\r\n | ||||
894 | $hdr =~ s/\r+\n/\n/sg; | ||||
895 | |||||
896 | if ((my $len = length($body)) > $maxbody) { | ||||
897 | $body = substr $body, 0, $maxbody; | ||||
898 | substr($body, -2) = "==" if $is_binary; | ||||
899 | |||||
900 | $hdr = "X-Razor2-Origlen-Body: $len\n" . $hdr; | ||||
901 | #print "maxbody=$maxbody body went from $len to ". length($body) ."\n"; | ||||
902 | } | ||||
903 | |||||
904 | if ((my $len = length($hdr)) > $maxheader) { | ||||
905 | $hdr = "X-Razor2-Origlen-Header: $len\n" . $hdr; | ||||
906 | if (length($hdr) > $maxheader) { | ||||
907 | $hdr = substr $hdr, 0, $maxheader; | ||||
908 | $hdr =~ s/([^\n]+)$//s; # remove last, incomplete line | ||||
909 | } | ||||
910 | # print "maxhdr=$maxheader header went from $len to ". length($hdr) ."\n"; | ||||
911 | } | ||||
912 | |||||
913 | my $dude = "$hdr\n$body"; | ||||
914 | |||||
915 | return $mailref if $dude eq $$mailref; # this happens majority of the time | ||||
916 | return \$dude; | ||||
917 | } | ||||
918 | |||||
- - | |||||
921 | # NOTE: Important function! | ||||
922 | # *must* be kept in sync with server and all clients | ||||
923 | # same holds true for prep_part() | ||||
924 | # | ||||
925 | # This is the preprocessing done on a mail before sent over network | ||||
926 | # | ||||
927 | sub prep_mail { | ||||
928 | my ($mailref, $report_headers, $maxheader, $maxbody, $maxorighdr, $versionstring, $debug) = @_; | ||||
929 | return unless ref($mailref); | ||||
930 | |||||
931 | print " prep_mail: orig=". length($$mailref) ."\n" if ($debug > 1); | ||||
932 | |||||
933 | my ($orig_hdr) = split /\n\r*\n/, $$mailref, 2; | ||||
934 | $orig_hdr .= "\n"; # put newline back on last header line | ||||
935 | |||||
936 | my $ver = $versionstring || 1; | ||||
937 | my @mimeparts = split_mime($mailref, $ver, 0, $debug); | ||||
938 | |||||
939 | my @mimeparts_prep; | ||||
940 | foreach (@mimeparts) { | ||||
941 | push @mimeparts_prep, prep_part($_, $maxheader, $maxbody); | ||||
942 | } | ||||
943 | unless ($report_headers) { | ||||
944 | my $hdr = "X-Razor2-Headers-Suppressed: 1\n"; | ||||
945 | foreach (split '\n',$orig_hdr) { | ||||
946 | /^Content-/i and $hdr .= "$_\n"; | ||||
947 | /^X-Razor2/i and $hdr .= "$_\n"; | ||||
948 | } | ||||
949 | $orig_hdr = $hdr; | ||||
950 | } | ||||
951 | |||||
952 | if ((my $len = length($orig_hdr)) > $maxorighdr) { | ||||
953 | $hdr = "X-Razor2-Origlen-Header: $len\n" . $orig_hdr; | ||||
954 | if (length($hdr) > $maxorighdr) { | ||||
955 | $hdr = substr $hdr, 0, $maxorighdr; | ||||
956 | $hdr =~ s/([^\n]+)$//s; # remove last, incomplete line | ||||
957 | } | ||||
958 | #print "max=$maxorighdr orig_header went from $len to ". length($hdr) ."\n"; | ||||
959 | $orig_hdr = $hdr; | ||||
960 | } | ||||
961 | |||||
962 | if ($debug > 1) { | ||||
963 | print "**** prep_mail done: headers=". length($orig_hdr); | ||||
964 | foreach (0..$#mimeparts_prep) { | ||||
965 | print "\n**** mail $_ [". length(${$mimeparts_prep[$_]}) ."] ". substr(${$mimeparts_prep[$_]} ,0,40); | ||||
966 | } | ||||
967 | print "\n\n"; | ||||
968 | } | ||||
969 | |||||
970 | return (\$orig_hdr, @mimeparts_prep); | ||||
971 | } | ||||
972 | |||||
- - | |||||
975 | # from MIME::Parser | ||||
976 | #my $parser = new MIME::Parser; | ||||
977 | #my $entity = $parser->parse($body); | ||||
978 | # foreach (dump_entity($entity)) | ||||
979 | sub dump_entity { | ||||
980 | my $ent = shift; | ||||
981 | my @parts = $ent->parts; | ||||
982 | |||||
983 | if (@parts) { # multipart... | ||||
984 | map { dump_entity($_) } @parts; | ||||
985 | } else { # single part... | ||||
986 | return ( $ent->body ); # return text blob | ||||
987 | print " Part: ", $ent->bodyhandle->path, " (", scalar($ent->head->mime_type), ")\n"; | ||||
988 | } | ||||
989 | } | ||||
990 | |||||
991 | |||||
992 | # input: hex string ("2D") | ||||
993 | # output: hash ref or array containg bits that are set | ||||
994 | # 2D == (1, 3, 4, 6) | ||||
995 | |||||
996 | sub hexbits2hash { | ||||
997 | |||||
998 | my $hex = shift; | ||||
999 | my %h; | ||||
1000 | for (0..31) { if (hex($hex) & (2**31)>>(31-$_)) { $h{$_+1} = 1 } } | ||||
1001 | return wantarray ? (sort keys %h) : \%h; | ||||
1002 | |||||
1003 | } | ||||
1004 | |||||
1005 | |||||
1006 | # input: hash ref, array ref, or array containg bits that are set | ||||
1007 | # output: hex string ("2D") | ||||
1008 | # 2D == (4, 8, 32) | ||||
1009 | |||||
1010 | sub hash2hexbits { | ||||
1011 | my @bits = @_; | ||||
1012 | |||||
1013 | @bits = @{$bits[0]} if ref($bits[0]) eq 'ARRAY'; | ||||
1014 | @bits = (sort keys %{$bits[0]}) if ref($bits[0]) eq 'HASH'; | ||||
1015 | |||||
1016 | my @all; | ||||
1017 | my $i = 1; | ||||
1018 | foreach (sort {$a <=> $b} @bits) { | ||||
1019 | while (1) { | ||||
1020 | push @all, 1 if $_ == $i; | ||||
1021 | last if $_ == $i; | ||||
1022 | push @all, 0; | ||||
1023 | $i++; | ||||
1024 | } | ||||
1025 | } | ||||
1026 | my $bs = join '', reverse @all; | ||||
1027 | # fixme needs testing | ||||
1028 | my $hex = (unpack "H*", pack "B*", join '', reverse @all); | ||||
1029 | |||||
1030 | return $hex | ||||
1031 | } | ||||
1032 | |||||
1033 | # for debugging - dumps a obj to a string | ||||
1034 | sub debugobj { | ||||
1035 | my ($obj, $prefix, $maxwidth) = @_; | ||||
1036 | |||||
1037 | $maxwidth ||= 70; | ||||
1038 | return if (defined($prefix) && length($prefix) > $maxwidth); | ||||
1039 | |||||
1040 | my $line = ""; | ||||
1041 | $prefix .= " "x4; | ||||
1042 | |||||
1043 | if (my $r = ref($obj)) { | ||||
1044 | if ($r eq 'HASH') { | ||||
1045 | $line = "$r - $obj,". scalar(keys %$obj) ." keys\n"; | ||||
1046 | foreach (sort keys %$obj) { | ||||
1047 | $line .= "$prefix$_ => ". debugobj($obj->{$_}, $prefix); | ||||
1048 | } | ||||
1049 | $line .= $prefix ."[empty]\n" unless (keys %$obj); | ||||
1050 | } elsif ($r eq 'ARRAY') { | ||||
1051 | $line = "$r - $obj,". scalar(@$obj) ." items\n"; | ||||
1052 | foreach (@$obj) { | ||||
1053 | $line .= $prefix . debugobj($_, $prefix); | ||||
1054 | } | ||||
1055 | $line .= $prefix ."[empty]\n" unless (@$obj); | ||||
1056 | } elsif ($r eq 'REF') { | ||||
1057 | $line = "$r - $obj\n"; | ||||
1058 | $line .= $prefix . debugobj($$obj, $prefix); | ||||
1059 | } elsif ($r eq 'SCALAR') { | ||||
1060 | $line = "$r - $obj\n"; | ||||
1061 | $line .= $prefix . debugobj($$obj, $prefix); | ||||
1062 | } | ||||
1063 | } else { | ||||
1064 | if (defined $obj) { | ||||
1065 | $line = $1 if substr($obj, 0, $maxwidth-length($prefix)) =~ /^([^\n]+)/; | ||||
1066 | $line = "[length=". length($obj) ."] ". $line | ||||
1067 | if (length($line) ne length($obj)); | ||||
1068 | } else { | ||||
1069 | $line = "[empty]"; | ||||
1070 | } | ||||
1071 | $line .= "\n"; | ||||
1072 | } | ||||
1073 | return $line; | ||||
1074 | } | ||||
1075 | |||||
1076 | |||||
1077 | sub clean_body { | ||||
1078 | |||||
1079 | my ($self, $bodyref) = @_; | ||||
1080 | |||||
1081 | my $hasheaders = 1; | ||||
1082 | |||||
1083 | if ($self->{preprocs}->{deBase64}->isit($bodyref)) { | ||||
1084 | $self->{preprocs}->{deBase64}->doit($bodyref); | ||||
1085 | $hasheaders = 0; | ||||
1086 | } | ||||
1087 | |||||
1088 | if ($self->{preprocs}->{deQP}->isit($bodyref)) { | ||||
1089 | $self->{preprocs}->{deQP}->doit($bodyref); | ||||
1090 | $hasheaders = 0; | ||||
1091 | } | ||||
1092 | |||||
1093 | if ($self->{preprocs}->{deHTML}->isit($bodyref)) { | ||||
1094 | $self->{preprocs}->{deHTML}->doit($bodyref); | ||||
1095 | } | ||||
1096 | |||||
1097 | if ($hasheaders) { | ||||
1098 | $$bodyref =~ s/^.*?\n\n//s; | ||||
1099 | } | ||||
1100 | |||||
1101 | |||||
1102 | } | ||||
1103 | |||||
1104 | |||||
1105 | sub round { | ||||
1106 | |||||
1107 | my $float = shift; | ||||
1108 | return sprintf("%.0f", $float); | ||||
1109 | |||||
1110 | } | ||||
1111 | |||||
- - | |||||
1114 | sub fisher_yates_shuffle { | ||||
1115 | my $deck = shift; # $deck is a reference to an array | ||||
1116 | my $i = @$deck; | ||||
1117 | while ($i--) { | ||||
1118 | my $j = int rand ($i+1); | ||||
1119 | @$deck[$i,$j] = @$deck[$j,$i]; | ||||
1120 | } | ||||
1121 | } | ||||
1122 | |||||
1123 | |||||
1124 | 1 | 22µs | 1; | ||
1125 |