← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Sun Nov 5 02:36:06 2017
Reported on Sun Nov 5 02:56:20 2017

Filename/usr/local/lib/perl5/site_perl/mach/5.24/Razor2/String.pm
StatementsExecuted 82 statements in 12.9ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11111.0ms12.2msRazor2::String::::BEGIN@7Razor2::String::BEGIN@7
1114.33ms4.84msRazor2::String::::BEGIN@5Razor2::String::BEGIN@5
111742µs742µsRazor2::String::::BEGIN@6Razor2::String::BEGIN@6
111727µs1.88msRazor2::String::::BEGIN@4Razor2::String::BEGIN@4
111560µs560µsRazor2::String::::BEGIN@31Razor2::String::BEGIN@31
11128µs231µsRazor2::String::::BEGIN@12Razor2::String::BEGIN@12
0000s0sRazor2::String::::base64tohexRazor2::String::base64tohex
0000s0sRazor2::String::::clean_bodyRazor2::String::clean_body
0000s0sRazor2::String::::debugobjRazor2::String::debugobj
0000s0sRazor2::String::::dump_entityRazor2::String::dump_entity
0000s0sRazor2::String::::escape_smtp_terminatorRazor2::String::escape_smtp_terminator
0000s0sRazor2::String::::findsimilarRazor2::String::findsimilar
0000s0sRazor2::String::::fisher_yates_shuffleRazor2::String::fisher_yates_shuffle
0000s0sRazor2::String::::from_batched_queryRazor2::String::from_batched_query
0000s0sRazor2::String::::hash2hexbitsRazor2::String::hash2hexbits
0000s0sRazor2::String::::hash2strRazor2::String::hash2str
0000s0sRazor2::String::::hex_dumpRazor2::String::hex_dump
0000s0sRazor2::String::::hexbits2hashRazor2::String::hexbits2hash
0000s0sRazor2::String::::hextobase64Razor2::String::hextobase64
0000s0sRazor2::String::::hmac2_sha1Razor2::String::hmac2_sha1
0000s0sRazor2::String::::hmac3_sha1Razor2::String::hmac3_sha1
0000s0sRazor2::String::::hmac_sha1Razor2::String::hmac_sha1
0000s0sRazor2::String::::makesisRazor2::String::makesis
0000s0sRazor2::String::::makesis_nueRazor2::String::makesis_nue
0000s0sRazor2::String::::parsesisRazor2::String::parsesis
0000s0sRazor2::String::::parsesis_nueRazor2::String::parsesis_nue
0000s0sRazor2::String::::prep_mailRazor2::String::prep_mail
0000s0sRazor2::String::::prep_partRazor2::String::prep_part
0000s0sRazor2::String::::printb64tableRazor2::String::printb64table
0000s0sRazor2::String::::randstrRazor2::String::randstr
0000s0sRazor2::String::::roundRazor2::String::round
0000s0sRazor2::String::::split_mimeRazor2::String::split_mime
0000s0sRazor2::String::::str2hashRazor2::String::str2hash
0000s0sRazor2::String::::to_batched_queryRazor2::String::to_batched_query
0000s0sRazor2::String::::unescape_smtp_terminatorRazor2::String::unescape_smtp_terminator
0000s0sRazor2::String::::xor_keyRazor2::String::xor_key
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# $Id: String.pm,v 1.48 2005/06/13 21:09:59 vipul Exp $
2package Razor2::String;
3
42350µs22.08ms
# spent 1.88ms (727µs+1.15) within Razor2::String::BEGIN@4 which was called: # once (727µs+1.15ms) by Razor2::Client::Agent::BEGIN@18 at line 4
use Digest::SHA1 qw(sha1_hex);
# spent 1.88ms making 1 call to Razor2::String::BEGIN@4 # spent 201µs making 1 call to Exporter::import
52284µs25.05ms
# spent 4.84ms (4.33+516µs) within Razor2::String::BEGIN@5 which was called: # once (4.33ms+516µs) by Razor2::Client::Agent::BEGIN@18 at line 5
use URI::Escape;
# spent 4.84ms making 1 call to Razor2::String::BEGIN@5 # spent 206µs making 1 call to Exporter::import
62803µs1742µs
# spent 742µs within Razor2::String::BEGIN@6 which was called: # once (742µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 6
use Razor2::Preproc::enBase64;
# spent 742µs making 1 call to Razor2::String::BEGIN@6
72426µs212.3ms
# spent 12.2ms (11.0+1.21) within Razor2::String::BEGIN@7 which was called: # once (11.0ms+1.21ms) by Razor2::Client::Agent::BEGIN@18 at line 7
use Data::Dumper;
# spent 12.2ms making 1 call to Razor2::String::BEGIN@7 # spent 123µs making 1 call to Exporter::import
8
9#use MIME::Parser;
10
1112µsrequire Exporter;
122303µs2434µs
# spent 231µs (28+203) within Razor2::String::BEGIN@12 which was called: # once (28µs+203µs) by Razor2::Client::Agent::BEGIN@18 at line 12
use vars qw ( @ISA $VERSION @EXPORT );
# spent 231µs making 1 call to Razor2::String::BEGIN@12 # spent 203µs making 1 call to vars::import
13120µs@ISA = qw(Exporter);
14
1516µs@EXPORT = qw( hmac_sha1 xor_key
16 from_batched_query
17 to_batched_query findsimilar debugobj
18 makesis parsesis makesis_nue parsesis_nue
19 hextobase64 base64tohex
20 randstr round
21 hex_dump prep_mail
22 prehash printb64table
23 hexbits2hash hmac2_sha1
24 fisher_yates_shuffle
25 );
26
27
28# Same as the alphabet from RFC 1521, except s:/:_: and s:+:-:
2912µsmy %b64table;
30
31
# spent 560µs within Razor2::String::BEGIN@31 which was called: # once (560µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 42
BEGIN {
32 # ASCII
33 # 33-126 printable chars
34 # 48-57 numbers
35 # 65-90 uppercase alpha
36 # 97-122 lowercase alpha
3727220µs foreach (0..25) { $b64table{$_} = chr($_ + 65); }
3827219µs foreach (26..51) { $b64table{$_} = chr($_ + 71); }
3911101µs foreach (52..61) { $b64table{$_} = chr($_ - 4 ); }
4012µs $b64table{62} = "-";
41122µs $b64table{63} = "_";
42110.1ms1560µs}
# spent 560µs making 1 call to Razor2::String::BEGIN@31
43
44
45sub printb64table {
46 foreach (0..63) {
47 print "$_ = $b64table{$_}\n";
48 }
49}
50
51
52sub hmac_sha1 {
53 my $text = shift;
54 my $iv1 = shift;
55 my $iv2 = shift;
56 my ($b64, $hex) = hmac2_sha1($text, $iv1, $iv2);
57 return $b64;
58}
59
60
61# taken in part from RFC 2104
62# http://www.cs.ucsd.edu/users/mihir/papers/hmac.html
63
64sub hmac2_sha1 {
65 my $text = shift;
66 my $iv1 = shift;
67 my $iv2 = shift;
68
69 return unless $text && $iv1 && $iv2;
70 die "no ref's allowed" if ref($text);
71
72 my $ctx = Digest::SHA1->new;
73 $ctx->add($iv2);
74 $ctx->add($text);
75 my $digest = $ctx->hexdigest;
76
77 $ctx = Digest::SHA1->new;
78 $ctx->add($iv1);
79 $ctx->add($digest);
80 $digest = $ctx->hexdigest;
81
82 return (hextobase64($digest), $digest);
83}
84
85
86sub hmac3_sha1 {
87 my $text = shift;
88 my $iv1 = shift;
89 my $iv2 = shift;
90
91 return unless $text && $iv1 && $iv2;
92 die "no ref's allowed" if ref($text);
93
94 my $digest = $text;
95 $digest = sha1_hex($iv1 . $digest);
96 $digest = sha1_hex($iv2 . $digest);
97 return (hextobase64($digest), $digest);
98}
99
100
101# part of RFC 2104 - see hmac_sha1()
102
103sub xor_key {
104 my $key = shift;
105
106 # key length should never be > 64 chars;
107 #
108 # dont need this ... see Bitwise String Operators
109 # $enc .= '\0' x (64 - length($pass));
110
111 my $iv1 = "\x36" x 64 ^ $key;
112 my $iv2 = "\x5C" x 64 ^ $key;
113
114 return ($iv1, $iv2);
115}
116
117
118# converts a string where each char is a hex (4-bit) value
119# to a string where each char is a base64 (6-bit) value
120
121sub hextobase64 {
122
123 my $hs = shift;
124
125 my @b64s;
126 my $i = 0;
127
128 while ($i < length($hs)) {
129
130 # process 3 hex char chunks at a time
131 my $hex3 = substr $hs, $i, 3;
132 $i += 3;
133
134 my $bv = pack "h3", $hex3;
135 my $cur = 0;
136 foreach (0..5) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
137 push @b64s, $cur/2; $cur = 0;
138 foreach (6..11) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
139 push @b64s, $cur/2;
140 #foreach (0..15) { my $bt = vec($bv,$_,1); print "$_=$bt, cur=$cur\n"; }
141 #print " -- hex=$hex3; @b64s\n";
142 }
143
144 my $bs = "";
145 foreach (@b64s) { $bs .= $b64table{$_}; }
146
147 # print "b64=$bs; hex=". base64tohex($bs) ."\n";
148
149 # Fixme - change encoding so 1 hex char ==> 1 b64 char
150 # 64-char hex string ==> 44-char b64 string. truncate to 43.
151 # 40-char hex string ==> 28-char b64 string. truncate to 27.
152 # $bs = substr($bs, 0, 43) if (length $bs == 44) && (substr($bs, -1) eq '0');
153 # $bs = substr($bs, 0, 27) if (length $bs == 28) && (substr($bs, -1) eq '0');
154
155 return $bs;
156
157}
158
159
160# converts a string where each char is a base64 (6-bit) value
161# to a string where each char is a hex (4-bit) value
162
163sub base64tohex {
164
165 my $bs = shift;
166 my @b64s;
167 my $hexstr;
168
169 # convert string to list of numbers base 10
170 foreach my $chr (split '', $bs) {
171 foreach (keys %b64table) {
172 push @b64s, $_ if $b64table{$_} eq $chr;
173 }
174 }
175
176 while (@b64s) {
177 my $bv = ""; vec($bv,0,16) = 0;
178 my $a = shift @b64s;
179 foreach (0..5) {my $i=5-$_; my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
180 $a = shift @b64s;
181 foreach (6..11) {my $i=17-$_;my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
182 $hexstr .= unpack "h3", $bv;
183 }
184
185 # print "hexstr=$hexstr; @b64s\n";
186
187 #
188 # NOTE on padding
189 # if we pad 4 0-bits, we need to know that there wasn't an actual 0
190 # on the input string (hexstr).
191 #
192 # since padding 4 0's is more common than having the last hex
193 # be a 0, we could append a special char indicating last 4 0 bits
194 # were not padding 0's.
195 #
196 # But, we will customize these functions for razor2's needs.
197 # 64-char hex string ==> 43-char b64 string ==> 66-char hex. truncate.
198 # 40-char hex string ==> 27-char b64 string ==> 42-char hex. truncate.
199 # 15-char hex string ==> 10-char b64 string ==> 15-char hex. ok.
200 #
201 # 20-byte hex string is 40 chars
202 # $hexstr = substr($hexstr, 0, 20) if (length $hexstr == 21) && (substr($hexstr, -1) eq '0');
203 # $hexstr = substr($hexstr, 0, 40) if (length $hexstr == 42) && (substr($hexstr, -2) eq '00');
204 # $hexstr = substr($hexstr, 0, 64) if (length $hexstr == 66) && (substr($hexstr, -2) eq '00');
205
206 $hexstr = substr($hexstr, 0, 40) if (length($hexstr) == 42);
207 $hexstr = substr($hexstr, 0, 64) if (length($hexstr) == 66);
208 return $hexstr;
209
210}
211
212
213# can be called 2 ways
214# - makesis(%hash) aka makesis( p => 0, cf => 95 )
215# - makesis($hashref) aka makesis({p => 0, cf => 95})
216
217sub makesis {
218 my $first = shift;
219 my $data;
220 if (ref($first) eq 'HASH') {
221 $data = $first;
222 } else {
223 $data = {$first, @_};
224 }
225 my $sis = '';
226 foreach (sort keys %$data) {
227 $sis .= "$_=" . (exists $data->{$_} ? uri_escape($data->{$_}) : '') . '&';
228 }
229
230 # This is 10x faster than the equivalent regex version.
231 return substr($sis, 0, length($sis)-1) . "\r\n";
232}
233
234
235sub parsesis {
236
237 my $query = $_[1] || {};
238 my $wantref = 1 if $_[1];
239
240 # Parse the query.
241
242 $_[0] =~ s/\n$//; # SIS shouldn't have this!
243 $_[0] =~ s/\r$//; # SIS shouldn't have this!
244
245 my @pairs = split /\&/, $_[0];
246
247 for (@pairs) {
248 my ($key, $value) = split /=/, $_;
249 $query->{$key} = defined $value ? uri_unescape($value) : '';
250 }
251
252 return $query if $wantref;
253 return %$query;
254}
255
256
257# version of makesis that doesn't to uri escaping
258# for things we know don't require escaping
259
260# can be called 2 ways
261# - makesis(%hash) aka makesis( p => 0, cf => 95 )
262# - makesis($hashref) aka makesis({p => 0, cf => 95})
263
264sub makesis_nue {
265 my $first = shift;
266 my $data;
267 if (ref($first) eq 'HASH') {
268 $data = $first;
269 } else {
270 $data = {$first, @_};
271 }
272 my $sis = '';
273 foreach (sort keys %$data) {
274 $sis .= "$_=";
275 $sis .= $data->{$_} if exists($data->{$_});
276 $sis .= '&';
277 }
278
279 # This is 10x faster than the equivalent regex version.
280 return substr($sis, 0, length($sis)-1) . "\r\n";
281}
282
283
284sub parsesis_nue {
285
286 my $query = $_[1] || {};
287 my $wantref = 1 if $_[1];
288
289 # Parse the query.
290 $_[0] =~ s/\r\n$//;
291 my @pairs = split /\&/, $_[0];
292
293 for (@pairs) {
294 my ($key, $value) = split /=/, $_;
295 $query->{$key} = $value;
296 }
297
298 return $query if $wantref;
299 return %$query;
300}
301
302
303sub to_batched_query {
304 my ($queries, $bql, $bqs, $novar) = @_;
305 my @bqueries;
306
307 # Breaks up queries into batches, where batches are limited to:
308 # - at most $bql lines long --OR--
309 # - at most $bqs kb in size
310 # if bqs or bql == 0 or undef, no limit.
311 #
312 # fixme - optimization for aggregator:
313 # sort, so all checks are together, all reports together, etc.
314 # problem is user will want to maintain array order
315
316 # $queries is array ref of either:
317 # strings - sis, ready to go
318 # hash ref - need to create sis
319 # my $q = ref($queries->[0]) eq 'HASH' ? makesis_batch($queries) : $queries;
320
321 # for right now, we'll just assume hash ref
322 return unless ref($queries->[0]) eq 'HASH';
323
324 my $last;
325 my $line;
326 my $linecnt = 0;
327 my $batchmode = 0;
328 foreach my $cur (@$queries) {
329
330 # my $dobj = debugobj($cur); print "dbg-doing obj: $dobj\n";
331
332 #
333 # handle cases where we submit email blob (message = * )
334 #
335 if (exists $cur->{message}) {
336 my $msg = $cur->{message};
337 delete $cur->{message};
338 $line = "-". makesis($cur);
339 $cur->{message} = $msg;
340 $line =~ s/\r\n$//s;
341 $line .= "&message=*\r\n$msg\r\n.\r\n";
342 push @bqueries, $line;
343 next;
344 }
345
346 unless ($last) {
347 #
348 # start beginning of new batch
349 #
350 $last = $cur;
351 next;
352 }
353 unless ($batchmode) {
354 #
355 # line after beginning of new batch
356 # if similar, start variable batchmode.
357 # if not, start batchmode without variables
358 #
359 my ($both, $diff) = findsimilar($last, $cur);
360 if ($diff && !$novar) {
361 $batchmode = 2;
362 $line = "-". makesis_nue($both);
363 # fixme - we might want to uri_escape()
364 # but everything should be alphanum or our uri-safe base64
365 $line .= join(",", map "$last->{$_}", @$diff) ."\r\n";
366 $line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
367 $last = $both; # last is now 'template'
368 $linecnt = 2;
369 } else {
370 $batchmode = 1;
371 $line = "-". makesis($last);
372 $line .= makesis_nue($cur);
373 $linecnt = 2;
374 }
375 next;
376 } else {
377 #
378 # We're in batchmode.
379 # end if batch maxed out (bqs or bql reached)
380 # end if batchmode with variables and cur doesn't match
381 # end batch
382 #
383 my ($both, $diff) = findsimilar($last, $cur) if ($batchmode == 2);
384 if ( ($bqs && (length($line) > ($bqs*1024))) ||
385 ($bql && ($linecnt >= $bql)) ||
386 ($batchmode == 2 && !$diff) ) {
387 $batchmode = 0;
388 $line .= ".\r\n";
389 push @bqueries, $line;
390 $last = $cur;
391 } else {
392 #
393 # fixme - we might go passed bqs by a little bit. prolly ok.
394 #
395 if ($batchmode == 2) {
396 $line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
397 } else {
398 $line .= makesis_nue($cur);
399 }
400 $linecnt++;
401 }
402 }
403 }
404 if ($batchmode) {
405 $line .= ".\r\n";
406 push @bqueries, $line;
407 } elsif ($last) {
408 $line = makesis($last);
409 push @bqueries, $line;
410 }
411
412 return \@bqueries;
413}
414
415
416# compares keys in hash ref's a & b
417#
418# return
419# if both hashes have different keys
420#
421# return (1)
422# if both hashes have same keys and values,
423#
424# returns 2 refs
425# if both hashes have same keys but different values
426# - first is hash, copy of a & b where vals are same.
427# where vals are diff, keys are copied with val = '?'
428# - second is list contains keys where values are different
429
430sub findsimilar {
431 my ($a, $b) = @_;
432 my @diffvalues = ();
433 my %samevalues = ();
434
435 foreach (sort keys %$a) {
436 return unless exists $b->{$_};
437 if ($b->{$_} eq $a->{$_}) {
438 $samevalues{$_} = $a->{$_};
439 } else {
440 $samevalues{$_} = "?";
441 push @diffvalues, $_;
442 }
443 }
444 foreach (sort keys %$b) {
445 return unless exists $a->{$_};
446 }
447 # if too hashes are exactly the same, not sure.
448 # treat as if they are totally different.
449 return (1) unless scalar(@diffvalues) > 0;
450
451 return (\%samevalues, \@diffvalues);
452}
453
454sub from_batched_query {
455
456 my ($queries) = @_;
457 my @queries;
458
459 my ($fq, $rq) = $queries =~ m:^\-(.*?)\r\n(.*)$:sm;
460
461 unless ($fq && $rq) {
462 # allow from_batched_query to handle non-batches
463 $fq = $queries;
464 $rq = "";
465 }
466
467 if ($fq =~ m:\?:) {
468
469 my %template_query = ();
470 my @seq = ();
471 my @pairs = split /\&/, $fq;
472 for (@pairs) {
473 my ($key, $value) = split /=/, $_;
474 if ($value eq "?") {
475 push @seq, $key;
476 } else {
477 $template_query{$key} = $value ? uri_unescape($value) : '';
478 }
479 }
480
481 for (split /\r\n/, $rq) {
482 my @values = split /,/, $_;
483 my %foo = %template_query;
484 @foo{@seq} = @values;
485 push @queries, \%foo;
486 }
487
488 return undef unless @queries;
489
490 } elsif ($fq =~ m:\*:) {
491
492 my %query = parsesis($fq);
493 for (keys %query) {
494 if ($query{$_} eq "*") {
495 $query{$_} = $rq;
496 last;
497 }
498 }
499 push @queries, \%query;
500
501 } else {
502
503
504 # Don't split $queries. Use $fq and $rq instead since
505 # $fq is already normalized.
506
507 my %q = parsesis($fq);
508 push @queries, \%q;
509 for (split /\r\n/, $rq) {
510 my %q = parsesis($_);
511 push @queries, \%q;
512 }
513
514 }
515
516 return \@queries;
517
518}
519
520
521sub randstr {
522
523 my $size = shift;
524 my $alphanum = shift;
525 my $str;
526
527 $alphanum = 1 if !defined($alphanum);
528
529 # ASCII
530 # 33-126 printable chars
531 # 48-57 numbers
532 # 65-90 uppercase alpha
533 # 97-122 lowercase alpha
534
535 while ($size--) {
536 if ($alphanum) {
537 $str .= $b64table{ int(rand 64) };
538 } else {
539 $str .= chr(int(rand 94) + 33);
540 }
541 }
542
543 return $str;
544
545}
546
547
548sub escape_smtp_terminator {
549
550 my ($textref) = @_;
551 $$textref =~ s/\r\n\./\r\n\.\./gm
552
553}
554
555
556sub unescape_smtp_terminator {
557
558 my ($textref) = @_;
559 $$textref =~ s/\r\n\.\./\r\n\./gm;
560
561}
562
563
564sub hex_dump {
565 my $string = shift;
566
567 for (split //, $string) {
568 print ord($_) . " ";
569 }
570 print "\n";
571}
572
- -
575sub hash2str {
576
577 my $href = shift;
578 my %hash = %$href;
579 my ($str, $key);
580
581 for $key ( keys %hash ) {
582 my $tstr;
583 if ( ref $hash{$key} eq 'ARRAY' ) {
584 for ( @{ $hash{ $key }} ) { $tstr .= escape( $_ ) . "," } $str =~ s/,$//;
585 } elsif ( !(ref $hash{$key}) ) {
586 $tstr .= escape ( $hash{$key} );
587 }
588 if ( $tstr ) { $str .= "$key:$tstr&" }
589 }
590
591 $str =~ s/&$//; return $str;
592
593}
594
595
596sub str2hash {
597
598 my $str = shift;
599 my %hash;
600 my @pairs = split /(?<!\\)&/, $str;
601
602 for ( @pairs ) {
603 my ( $key, $data ) = split /(?<!\\):/, $_, 2;
604 if ( $data =~ /(?<!\\),/ ) {
605 my @list = split /(?<!\\),/, $data;
606 for ( @list ) { $_ = unescape ( $_ ) };
607 $hash{$key} = [@list];
608 } else { $hash{$key} = unescape ( $data ) }
609 }
610
611 return \%hash;
612
613}
614
615#
616# If body of an email has mime attachments, the headers
617# will indicate this. likewise, each mime attachment
618# could also have nested mime attachments with headers that
619# must indicate this. standard recursion.
620#
621# However, all 'leaf node' attachments don't have to have
622# headers based on RFC xxx. They must be created before
623# sending to razor servers.
624#
625#
626# Example of mail with nested MIME attachments:
627#
628# level 1 level 2 level 3 ....
629# --------------------------------------
630# * - Header 1
631# * - Body 1
632# * -- A -- mime-header 2A
633# | mime-body 2A
634# | |
635# | * ---- a --- mime-header 3a
636# | | mime-body 3a
637# | |
638# | * -----b --- mime-header 3b
639# | mime-body 3b
640# * -- B -- mime-body 2B
641# |
642# * -- C -- mime-header 2C
643# |
644# * ---- c -- mime-body 3c
645# |
646# * ---- d -- mime-header 3d
647# mime-body 3d
648#
649# should be reported as
650#
651# Header 1 \r\n
652# part 1 = p(header 3a, body 3a) \r\n
653# part 2 = p(header 3b, body 3b) \r\n
654# part 3 = p(<generated header 2B>, body 2B) \r\n
655# part 4 = p(<generated header 3c>, body 3c) \r\n
656# part 5 = p(header 3d, body 3d) \r\n
657# .\r\n
658#
659# Notes:
660# - Order of parts does not matter.
661#
662# - Each part is processed by prep_mail, p(), before report/check
663#
664# - Except for original Header everything but leaf nodes
665# are discarded. In the above example,
666#
667# Body 1, header 2A, header 2C - are discarded
668#
669#
670# Detailed Explanation:
671#
672# Header 1 says 'Content-Type: multipart' with boundary definition
673# Based on the Boundary, Body 1 is split into A, B, C.
674#
675# A is analyzed, has headers which also say 'Content-Type: multipart'
676# with a different boundary, and it is split into 3a, 3b. 2A is what
677# appears between header 2a and first boundary, so its ignored.
678# 3a and 3b both have header info, so they are sent thru prep_mail
679# and reported/checked
680#
681# <generated header 2B> is based on Header 1 to determine content
682# type. if unknown, dummy header is added,
683# and both are reported as a body part
684#
685# C is analyzed, has headers which also say 'Content-Type: multipart'
686# with a different boundary, and it is split into 3c, 3d.
687#
688# <generated header 3c> is based on header 2c to determine content
689# type. if unknown, dummy header is added,
690# and both are reported as a body part
691#
692# 3d has header info, so header+body are sent thru prep_mail
693# and reported/checked
694#
695#
696#
697# prep_mail() basically truncates msgs that are too big and/or
698# base64 encodes binaries or 8-bit msgs.
699#
700
701
702# Split mime splits up multi-part mime mails.
703#
704# returns array of parts, where each part is
705# headers\n\nbody
706#
707# headers will only contain X-Razor2 and Content- headers
708#
709# If not a mime mail, and the headers do not have any
710# Content-* headers, then the only headers will be X-Razor2 ones
711# (perhaps create Content-Type in da future?)
712#
713# body can be blank. nuked in prep_part
714#
715sub split_mime {
716
717 my ($mailref, $ver, $recursive, $debug ) = @_;
718
719 return unless ref($mailref);
720
721 # mime-bodies must have header or initial blank lines.
722 #
723 my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
724 my $no_valid_mime_hdr = 0;
725
726 unless ($body) {
727
728 # no blank lines, definately no header, so no nested mimes
729
730 print "split_mime: no blank lines\n" if $debug > 1;
731 $no_valid_mime_hdr = 1;
732 }
733 # fixme - handle attachments? i.e. if header has this
734 # Content-Disposition: attachment
735 # than body is mail, we could recursively call ourselves
736 # again with body (check body for hdrs first?)
737
738 # Make sure $hdr is really a hdr
739 #
740 # Details: If mime part is not RFC compliant, it could just
741 # be a body with blank lines. hdr could have just matched part
742 # of the body.
743 #
744 # valid mime header is determined by existance of 'Content-Type'
745 # If we're not recursive, we don't check orig_headers, we assume its ok.
746 # not sure if this is the best way ...
747 #
748
749 if ($recursive && ($hdr !~ /^Content-Type:/i)) {
750 $no_valid_mime_hdr = 1;
751 print "uh-oh, bad mime-body len=". length($$mailref) .":\n$$mailref\n" if $debug;
752 #print "split_mime: recur=($recursive)\n";
753 }
754
755 if ($no_valid_mime_hdr) {
756 #
757 # create dummy header and return it
758 #
759 # $ver should be '1' or client name + version
760 my $mimepart = "X-Razor2-Agent: $ver\n";
761 my $hrdlen = length($mimepart);
762
763 # if it has initial blank line, hurray for rfc compliance
764 if ($$mailref =~ /^\n/) {
765 $mimepart .= $$mailref;
766 } else {
767 $mimepart .= "\n". $$mailref;
768 }
769 print "split_mime: returning total_len=". length($mimepart) ."; hdrs=".
770 $hdrlen .", body=". length($$mailref) ."\n" if $debug;
771 return (\$mimepart);
772 }
773
774 #
775 # Now we split mailref into hdr and body
776 # check hdr for nested mime (boundary)
777 #
778
779 my $orig_hdr = $hdr;
780 $hdr =~ s/\n\s+//sg; # merge multi-line headers
781 # nuke everything but X-Razor2 and Content-* headers
782 my $trimmed_hdr = "";
783 foreach (split '\n',$hdr) {
784 /^Content-/i and $trimmed_hdr .= "$_\n";
785 /^X-Razor2/i and $trimmed_hdr .= "$_\n";
786 }
787
788 my $boundary = "";
789
790 if ($trimmed_hdr =~ /Content-Type: multipart.+boundary=("[^"]+"|\S+)/ig) {
791 $boundary = $1;
792 }
793
794 if ($boundary eq "") {
795 #
796 # valid mime hdr, but no nested mime.
797 # add razor hdr and return.
798 #
799 print "split_mime: valid_mime_hdr [len=". length($orig_hdr)
800 ."], but no nested mime\n$orig_hdr\n" if $debug > 1;
801 $trimmed_hdr = "X-Razor2-Agent: $ver\n" . $trimmed_hdr;
802 my $mimepart = "$trimmed_hdr\n$body";
803 print "split_mime: returning total=". length($mimepart) ."; hdrs=".
804 length($trimmed_hdr) .", body=". length($body) ."\n" if $debug;
805 return (\$mimepart);
806 }
807 $boundary = $1 if $boundary =~ /^"(.*)"$/;
808
809 # At this point, we know body has mime parts.
810 #
811 my @mimeparts;
812
813 #
814 # According to RFC 1341
815 # http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
816 #
817 # mimes are separated by \n--boundary\n
818 # and are followed immediately by header, blank line, body;
819 # or blank line and body.
820 #
821 # if no header in mime part, default content type for mime body is
822 # based on header where 'Content-Type: multipart*' was defined, where
823 # multipart/digest --> message/rfc822
824 # multipart/* --> text/plain
825 # perhaps we should add a header if none present?
826 #
827 # if a body contains mimes, the 'preable', or stuff before
828 # the first boundary, and the 'epilogue', the stuff after the
829 # last boudary, are to be ignored.
830 #
831 # NOTE: We split up multiparts, but content-type's can also be
832 # nested. i.e, a header of 'Content-Type: message' can have a body
833 # of 'Content-Type: image'
834 #
835 $body =~ s/\n\Q--$boundary--\E.*$//sg; # trash last boundary and epilogue
836 if ($body =~ /^\Q--$boundary\E\r*\n/) {
837 # bug in some mails, make it RFC compliant
838 # now our split will work correctly
839 print "bad mime body [len=". length($body) ."], not doing \\n--boundary, fixed tho.\n" if $debug > 1;
840 $body = "garbage\n$body";
841 }
842 my @tmpparts;
843 unless ($body =~ /\Q--$boundary\E/) {
844 # Sometimes there's a boundary in the headers
845 # but none in email. In such cases, we'll
846 # treat the entire body as a part.
847 push @tmpparts, "garbage";
848 push @tmpparts, $body;
849 } else {
850 @tmpparts = split /\n\Q--$boundary\E\r*\n/, $body;
851 }
852 shift @tmpparts; # trash everything up to the first boundary;
853 foreach (@tmpparts) {
854 # perhaps we should add a header based on default content-type?
855 unless (/\S/s) {
856 print "skipping body part containing only whitespace [len=". length($_) ."]\n" if $debug;
857 next;
858 }
859 print "boundary: ". $recursive . "$boundary\n" if $debug > 1;
860 push @mimeparts, split_mime(\$_, $ver, " ". $recursive, $debug);
861 }
862 print "Saweeet!!! Boundary (". scalar(@mimeparts) ."): $boundary\n" if defined($boundary) && ($debug > 1);
863
864 return @mimeparts;
865}
866
867# mailref is not modified by this sub
868#
869sub prep_part {
870 my ($mailref, $maxheader, $maxbody) = @_;
871 #print "[". length($$mailref) ."] maxsize=$maxheader + $maxbody\n";
872
873 my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
874 $hdr .= "\n"; # put newline back on last header line
875
876 unless ($body) {
877 #
878 # fixme - this should not happen.
879 # if it does, split_mime needs work
880 #
881 # print "prep_part got F**KED-up mimepart [len=". length($$mailref) ."]\n$$mailref\n";
882 return; # body is empty
883 }
884
885 # fixme - are these the best chars to check for binary?
886 my $is_binary = ($hdr =~ /^Content-Type-Encoding: 8-bit/) ||
887 ($body =~ /([\x00-\x1f|\x7f-\xff])/ and $1 !~ /[\r\n\t]/);
888
889 my $enBase64 = new Razor2::Preproc::enBase64;
890 $is_binary = $enBase64->isit($mailref);
891 $enBase64->doit(\$body) if $is_binary;
892
893 $body =~ s/\r+\n/\n/sg; # outlook sometimes does \r\r\n
894 $hdr =~ s/\r+\n/\n/sg;
895
896 if ((my $len = length($body)) > $maxbody) {
897 $body = substr $body, 0, $maxbody;
898 substr($body, -2) = "==" if $is_binary;
899
900 $hdr = "X-Razor2-Origlen-Body: $len\n" . $hdr;
901 #print "maxbody=$maxbody body went from $len to ". length($body) ."\n";
902 }
903
904 if ((my $len = length($hdr)) > $maxheader) {
905 $hdr = "X-Razor2-Origlen-Header: $len\n" . $hdr;
906 if (length($hdr) > $maxheader) {
907 $hdr = substr $hdr, 0, $maxheader;
908 $hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
909 }
910 # print "maxhdr=$maxheader header went from $len to ". length($hdr) ."\n";
911 }
912
913 my $dude = "$hdr\n$body";
914
915 return $mailref if $dude eq $$mailref; # this happens majority of the time
916 return \$dude;
917}
918
- -
921# NOTE: Important function!
922# *must* be kept in sync with server and all clients
923# same holds true for prep_part()
924#
925# This is the preprocessing done on a mail before sent over network
926#
927sub prep_mail {
928 my ($mailref, $report_headers, $maxheader, $maxbody, $maxorighdr, $versionstring, $debug) = @_;
929 return unless ref($mailref);
930
931 print " prep_mail: orig=". length($$mailref) ."\n" if ($debug > 1);
932
933 my ($orig_hdr) = split /\n\r*\n/, $$mailref, 2;
934 $orig_hdr .= "\n"; # put newline back on last header line
935
936 my $ver = $versionstring || 1;
937 my @mimeparts = split_mime($mailref, $ver, 0, $debug);
938
939 my @mimeparts_prep;
940 foreach (@mimeparts) {
941 push @mimeparts_prep, prep_part($_, $maxheader, $maxbody);
942 }
943 unless ($report_headers) {
944 my $hdr = "X-Razor2-Headers-Suppressed: 1\n";
945 foreach (split '\n',$orig_hdr) {
946 /^Content-/i and $hdr .= "$_\n";
947 /^X-Razor2/i and $hdr .= "$_\n";
948 }
949 $orig_hdr = $hdr;
950 }
951
952 if ((my $len = length($orig_hdr)) > $maxorighdr) {
953 $hdr = "X-Razor2-Origlen-Header: $len\n" . $orig_hdr;
954 if (length($hdr) > $maxorighdr) {
955 $hdr = substr $hdr, 0, $maxorighdr;
956 $hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
957 }
958 #print "max=$maxorighdr orig_header went from $len to ". length($hdr) ."\n";
959 $orig_hdr = $hdr;
960 }
961
962 if ($debug > 1) {
963 print "**** prep_mail done: headers=". length($orig_hdr);
964 foreach (0..$#mimeparts_prep) {
965 print "\n**** mail $_ [". length(${$mimeparts_prep[$_]}) ."] ". substr(${$mimeparts_prep[$_]} ,0,40);
966 }
967 print "\n\n";
968 }
969
970 return (\$orig_hdr, @mimeparts_prep);
971}
972
- -
975# from MIME::Parser
976 #my $parser = new MIME::Parser;
977 #my $entity = $parser->parse($body);
978 # foreach (dump_entity($entity))
979sub dump_entity {
980 my $ent = shift;
981 my @parts = $ent->parts;
982
983 if (@parts) { # multipart...
984 map { dump_entity($_) } @parts;
985 } else { # single part...
986 return ( $ent->body ); # return text blob
987print " Part: ", $ent->bodyhandle->path, " (", scalar($ent->head->mime_type), ")\n";
988 }
989}
990
991
992# input: hex string ("2D")
993# output: hash ref or array containg bits that are set
994# 2D == (1, 3, 4, 6)
995
996sub hexbits2hash {
997
998 my $hex = shift;
999 my %h;
1000 for (0..31) { if (hex($hex) & (2**31)>>(31-$_)) { $h{$_+1} = 1 } }
1001 return wantarray ? (sort keys %h) : \%h;
1002
1003}
1004
1005
1006# input: hash ref, array ref, or array containg bits that are set
1007# output: hex string ("2D")
1008# 2D == (4, 8, 32)
1009
1010sub hash2hexbits {
1011 my @bits = @_;
1012
1013 @bits = @{$bits[0]} if ref($bits[0]) eq 'ARRAY';
1014 @bits = (sort keys %{$bits[0]}) if ref($bits[0]) eq 'HASH';
1015
1016 my @all;
1017 my $i = 1;
1018 foreach (sort {$a <=> $b} @bits) {
1019 while (1) {
1020 push @all, 1 if $_ == $i;
1021 last if $_ == $i;
1022 push @all, 0;
1023 $i++;
1024 }
1025 }
1026 my $bs = join '', reverse @all;
1027 # fixme needs testing
1028 my $hex = (unpack "H*", pack "B*", join '', reverse @all);
1029
1030 return $hex
1031}
1032
1033# for debugging - dumps a obj to a string
1034sub debugobj {
1035 my ($obj, $prefix, $maxwidth) = @_;
1036
1037 $maxwidth ||= 70;
1038 return if (defined($prefix) && length($prefix) > $maxwidth);
1039
1040 my $line = "";
1041 $prefix .= " "x4;
1042
1043 if (my $r = ref($obj)) {
1044 if ($r eq 'HASH') {
1045 $line = "$r - $obj,". scalar(keys %$obj) ." keys\n";
1046 foreach (sort keys %$obj) {
1047 $line .= "$prefix$_ => ". debugobj($obj->{$_}, $prefix);
1048 }
1049 $line .= $prefix ."[empty]\n" unless (keys %$obj);
1050 } elsif ($r eq 'ARRAY') {
1051 $line = "$r - $obj,". scalar(@$obj) ." items\n";
1052 foreach (@$obj) {
1053 $line .= $prefix . debugobj($_, $prefix);
1054 }
1055 $line .= $prefix ."[empty]\n" unless (@$obj);
1056 } elsif ($r eq 'REF') {
1057 $line = "$r - $obj\n";
1058 $line .= $prefix . debugobj($$obj, $prefix);
1059 } elsif ($r eq 'SCALAR') {
1060 $line = "$r - $obj\n";
1061 $line .= $prefix . debugobj($$obj, $prefix);
1062 }
1063 } else {
1064 if (defined $obj) {
1065 $line = $1 if substr($obj, 0, $maxwidth-length($prefix)) =~ /^([^\n]+)/;
1066 $line = "[length=". length($obj) ."] ". $line
1067 if (length($line) ne length($obj));
1068 } else {
1069 $line = "[empty]";
1070 }
1071 $line .= "\n";
1072 }
1073 return $line;
1074}
1075
1076
1077sub clean_body {
1078
1079 my ($self, $bodyref) = @_;
1080
1081 my $hasheaders = 1;
1082
1083 if ($self->{preprocs}->{deBase64}->isit($bodyref)) {
1084 $self->{preprocs}->{deBase64}->doit($bodyref);
1085 $hasheaders = 0;
1086 }
1087
1088 if ($self->{preprocs}->{deQP}->isit($bodyref)) {
1089 $self->{preprocs}->{deQP}->doit($bodyref);
1090 $hasheaders = 0;
1091 }
1092
1093 if ($self->{preprocs}->{deHTML}->isit($bodyref)) {
1094 $self->{preprocs}->{deHTML}->doit($bodyref);
1095 }
1096
1097 if ($hasheaders) {
1098 $$bodyref =~ s/^.*?\n\n//s;
1099 }
1100
1101
1102}
1103
1104
1105sub round {
1106
1107 my $float = shift;
1108 return sprintf("%.0f", $float);
1109
1110}
1111
- -
1114sub fisher_yates_shuffle {
1115 my $deck = shift; # $deck is a reference to an array
1116 my $i = @$deck;
1117 while ($i--) {
1118 my $j = int rand ($i+1);
1119 @$deck[$i,$j] = @$deck[$j,$i];
1120 }
1121}
1122
1123
1124122µs1;
1125