Profile of Razor2/String.pm

Filename	/usr/local/lib/perl5/site_perl/mach/5.24/Razor2/String.pm
Statements	Executed 82 statements in 12.9ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	11.0ms	12.2ms	Razor2::String::::BEGIN@7Razor2::String::BEGIN@7
1	1	1	4.33ms	4.84ms	Razor2::String::::BEGIN@5Razor2::String::BEGIN@5
1	1	1	742µs	742µs	Razor2::String::::BEGIN@6Razor2::String::BEGIN@6
1	1	1	727µs	1.88ms	Razor2::String::::BEGIN@4Razor2::String::BEGIN@4
1	1	1	560µs	560µs	Razor2::String::::BEGIN@31Razor2::String::BEGIN@31
1	1	1	28µs	231µs	Razor2::String::::BEGIN@12Razor2::String::BEGIN@12
0	0	0	0s	0s	Razor2::String::::base64tohexRazor2::String::base64tohex
0	0	0	0s	0s	Razor2::String::::clean_bodyRazor2::String::clean_body
0	0	0	0s	0s	Razor2::String::::debugobjRazor2::String::debugobj
0	0	0	0s	0s	Razor2::String::::dump_entityRazor2::String::dump_entity
0	0	0	0s	0s	Razor2::String::escape_smtp_terminator
0	0	0	0s	0s	Razor2::String::::findsimilarRazor2::String::findsimilar
0	0	0	0s	0s	Razor2::String::::fisher_yates_shuffleRazor2::String::fisher_yates_shuffle
0	0	0	0s	0s	Razor2::String::::from_batched_queryRazor2::String::from_batched_query
0	0	0	0s	0s	Razor2::String::::hash2hexbitsRazor2::String::hash2hexbits
0	0	0	0s	0s	Razor2::String::::hash2strRazor2::String::hash2str
0	0	0	0s	0s	Razor2::String::::hex_dumpRazor2::String::hex_dump
0	0	0	0s	0s	Razor2::String::::hexbits2hashRazor2::String::hexbits2hash
0	0	0	0s	0s	Razor2::String::::hextobase64Razor2::String::hextobase64
0	0	0	0s	0s	Razor2::String::::hmac2_sha1Razor2::String::hmac2_sha1
0	0	0	0s	0s	Razor2::String::::hmac3_sha1Razor2::String::hmac3_sha1
0	0	0	0s	0s	Razor2::String::::hmac_sha1Razor2::String::hmac_sha1
0	0	0	0s	0s	Razor2::String::::makesisRazor2::String::makesis
0	0	0	0s	0s	Razor2::String::::makesis_nueRazor2::String::makesis_nue
0	0	0	0s	0s	Razor2::String::::parsesisRazor2::String::parsesis
0	0	0	0s	0s	Razor2::String::::parsesis_nueRazor2::String::parsesis_nue
0	0	0	0s	0s	Razor2::String::::prep_mailRazor2::String::prep_mail
0	0	0	0s	0s	Razor2::String::::prep_partRazor2::String::prep_part
0	0	0	0s	0s	Razor2::String::::printb64tableRazor2::String::printb64table
0	0	0	0s	0s	Razor2::String::::randstrRazor2::String::randstr
0	0	0	0s	0s	Razor2::String::::roundRazor2::String::round
0	0	0	0s	0s	Razor2::String::::split_mimeRazor2::String::split_mime
0	0	0	0s	0s	Razor2::String::::str2hashRazor2::String::str2hash
0	0	0	0s	0s	Razor2::String::::to_batched_queryRazor2::String::to_batched_query
0	0	0	0s	0s	Razor2::String::unescape_smtp_terminator
0	0	0	0s	0s	Razor2::String::::xor_keyRazor2::String::xor_key

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					# $Id: String.pm,v 1.48 2005/06/13 21:09:59 vipul Exp $
2					package Razor2::String;
3
4	2	350µs	2	2.08ms	# spent 1.88ms (727µs+1.15) within Razor2::String::BEGIN@4 which was called: # once (727µs+1.15ms) by Razor2::Client::Agent::BEGIN@18 at line 4 use Digest::SHA1 qw(sha1_hex); # spent 1.88ms making 1 call to Razor2::String::BEGIN@4 # spent 201µs making 1 call to Exporter::import
5	2	284µs	2	5.05ms	# spent 4.84ms (4.33+516µs) within Razor2::String::BEGIN@5 which was called: # once (4.33ms+516µs) by Razor2::Client::Agent::BEGIN@18 at line 5 use URI::Escape; # spent 4.84ms making 1 call to Razor2::String::BEGIN@5 # spent 206µs making 1 call to Exporter::import
6	2	803µs	1	742µs	# spent 742µs within Razor2::String::BEGIN@6 which was called: # once (742µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 6 use Razor2::Preproc::enBase64; # spent 742µs making 1 call to Razor2::String::BEGIN@6
7	2	426µs	2	12.3ms	# spent 12.2ms (11.0+1.21) within Razor2::String::BEGIN@7 which was called: # once (11.0ms+1.21ms) by Razor2::Client::Agent::BEGIN@18 at line 7 use Data::Dumper; # spent 12.2ms making 1 call to Razor2::String::BEGIN@7 # spent 123µs making 1 call to Exporter::import
8
9					#use MIME::Parser;
10
11	1	2µs			require Exporter;
12	2	303µs	2	434µs	# spent 231µs (28+203) within Razor2::String::BEGIN@12 which was called: # once (28µs+203µs) by Razor2::Client::Agent::BEGIN@18 at line 12 use vars qw ( @ISA $VERSION @EXPORT ); # spent 231µs making 1 call to Razor2::String::BEGIN@12 # spent 203µs making 1 call to vars::import
13	1	20µs			@ISA = qw(Exporter);
14
15	1	6µs			@EXPORT = qw( hmac_sha1 xor_key
16					from_batched_query
17					to_batched_query findsimilar debugobj
18					makesis parsesis makesis_nue parsesis_nue
19					hextobase64 base64tohex
20					randstr round
21					hex_dump prep_mail
22					prehash printb64table
23					hexbits2hash hmac2_sha1
24					fisher_yates_shuffle
25					);
26
27
28					# Same as the alphabet from RFC 1521, except s:/:_: and s:+:-:
29	1	2µs			my %b64table;
30
31					# spent 560µs within Razor2::String::BEGIN@31 which was called: # once (560µs+0s) by Razor2::Client::Agent::BEGIN@18 at line 42 BEGIN {
32					# ASCII
33					# 33-126 printable chars
34					# 48-57 numbers
35					# 65-90 uppercase alpha
36					# 97-122 lowercase alpha
37	27	220µs			foreach (0..25) { $b64table{$_} = chr($_ + 65); }
38	27	219µs			foreach (26..51) { $b64table{$_} = chr($_ + 71); }
39	11	101µs			foreach (52..61) { $b64table{$_} = chr($_ - 4 ); }
40	1	2µs			$b64table{62} = "-";
41	1	22µs			$b64table{63} = "_";
42	1	10.1ms	1	560µs	} # spent 560µs making 1 call to Razor2::String::BEGIN@31
43
44
45					sub printb64table {
46					foreach (0..63) {
47					print "$_ = $b64table{$_}\n";
48					}
49					}
50
51
52					sub hmac_sha1 {
53					my $text = shift;
54					my $iv1 = shift;
55					my $iv2 = shift;
56					my ($b64, $hex) = hmac2_sha1($text, $iv1, $iv2);
57					return $b64;
58					}
59
60
61					# taken in part from RFC 2104
62					# http://www.cs.ucsd.edu/users/mihir/papers/hmac.html
63
64					sub hmac2_sha1 {
65					my $text = shift;
66					my $iv1 = shift;
67					my $iv2 = shift;
68
69					return unless $text && $iv1 && $iv2;
70					die "no ref's allowed" if ref($text);
71
72					my $ctx = Digest::SHA1->new;
73					$ctx->add($iv2);
74					$ctx->add($text);
75					my $digest = $ctx->hexdigest;
76
77					$ctx = Digest::SHA1->new;
78					$ctx->add($iv1);
79					$ctx->add($digest);
80					$digest = $ctx->hexdigest;
81
82					return (hextobase64($digest), $digest);
83					}
84
85
86					sub hmac3_sha1 {
87					my $text = shift;
88					my $iv1 = shift;
89					my $iv2 = shift;
90
91					return unless $text && $iv1 && $iv2;
92					die "no ref's allowed" if ref($text);
93
94					my $digest = $text;
95					$digest = sha1_hex($iv1 . $digest);
96					$digest = sha1_hex($iv2 . $digest);
97					return (hextobase64($digest), $digest);
98					}
99
100
101					# part of RFC 2104 - see hmac_sha1()
102
103					sub xor_key {
104					my $key = shift;
105
106					# key length should never be > 64 chars;
107					#
108					# dont need this ... see Bitwise String Operators
109					# $enc .= '\0' x (64 - length($pass));
110
111					my $iv1 = "\x36" x 64 ^ $key;
112					my $iv2 = "\x5C" x 64 ^ $key;
113
114					return ($iv1, $iv2);
115					}
116
117
118					# converts a string where each char is a hex (4-bit) value
119					# to a string where each char is a base64 (6-bit) value
120
121					sub hextobase64 {
122
123					my $hs = shift;
124
125					my @b64s;
126					my $i = 0;
127
128					while ($i < length($hs)) {
129
130					# process 3 hex char chunks at a time
131					my $hex3 = substr $hs, $i, 3;
132					$i += 3;
133
134					my $bv = pack "h3", $hex3;
135					my $cur = 0;
136					foreach (0..5) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
137					push @b64s, $cur/2; $cur = 0;
138					foreach (6..11) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
139					push @b64s, $cur/2;
140					#foreach (0..15) { my $bt = vec($bv,$_,1); print "$_=$bt, cur=$cur\n"; }
141					#print " -- hex=$hex3; @b64s\n";
142					}
143
144					my $bs = "";
145					foreach (@b64s) { $bs .= $b64table{$_}; }
146
147					# print "b64=$bs; hex=". base64tohex($bs) ."\n";
148
149					# Fixme - change encoding so 1 hex char ==> 1 b64 char
150					# 64-char hex string ==> 44-char b64 string. truncate to 43.
151					# 40-char hex string ==> 28-char b64 string. truncate to 27.
152					# $bs = substr($bs, 0, 43) if (length $bs == 44) && (substr($bs, -1) eq '0');
153					# $bs = substr($bs, 0, 27) if (length $bs == 28) && (substr($bs, -1) eq '0');
154
155					return $bs;
156
157					}
158
159
160					# converts a string where each char is a base64 (6-bit) value
161					# to a string where each char is a hex (4-bit) value
162
163					sub base64tohex {
164
165					my $bs = shift;
166					my @b64s;
167					my $hexstr;
168
169					# convert string to list of numbers base 10
170					foreach my $chr (split '', $bs) {
171					foreach (keys %b64table) {
172					push @b64s, $_ if $b64table{$_} eq $chr;
173					}
174					}
175
176					while (@b64s) {
177					my $bv = ""; vec($bv,0,16) = 0;
178					my $a = shift @b64s;
179					foreach (0..5) {my $i=5-$_; my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
180					$a = shift @b64s;
181					foreach (6..11) {my $i=17-$_;my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
182					$hexstr .= unpack "h3", $bv;
183					}
184
185					# print "hexstr=$hexstr; @b64s\n";
186
187					#
188					# NOTE on padding
189					# if we pad 4 0-bits, we need to know that there wasn't an actual 0
190					# on the input string (hexstr).
191					#
192					# since padding 4 0's is more common than having the last hex
193					# be a 0, we could append a special char indicating last 4 0 bits
194					# were not padding 0's.
195					#
196					# But, we will customize these functions for razor2's needs.
197					# 64-char hex string ==> 43-char b64 string ==> 66-char hex. truncate.
198					# 40-char hex string ==> 27-char b64 string ==> 42-char hex. truncate.
199					# 15-char hex string ==> 10-char b64 string ==> 15-char hex. ok.
200					#
201					# 20-byte hex string is 40 chars
202					# $hexstr = substr($hexstr, 0, 20) if (length $hexstr == 21) && (substr($hexstr, -1) eq '0');
203					# $hexstr = substr($hexstr, 0, 40) if (length $hexstr == 42) && (substr($hexstr, -2) eq '00');
204					# $hexstr = substr($hexstr, 0, 64) if (length $hexstr == 66) && (substr($hexstr, -2) eq '00');
205
206					$hexstr = substr($hexstr, 0, 40) if (length($hexstr) == 42);
207					$hexstr = substr($hexstr, 0, 64) if (length($hexstr) == 66);
208					return $hexstr;
209
210					}
211
212
213					# can be called 2 ways
214					# - makesis(%hash) aka makesis( p => 0, cf => 95 )
215					# - makesis($hashref) aka makesis({p => 0, cf => 95})
216
217					sub makesis {
218					my $first = shift;
219					my $data;
220					if (ref($first) eq 'HASH') {
221					$data = $first;
222					} else {
223					$data = {$first, @_};
224					}
225					my $sis = '';
226					foreach (sort keys %$data) {
227					$sis .= "$_=" . (exists $data->{$_} ? uri_escape($data->{$_}) : '') . '&';
228					}
229
230					# This is 10x faster than the equivalent regex version.
231					return substr($sis, 0, length($sis)-1) . "\r\n";
232					}
233
234
235					sub parsesis {
236
237					my $query = $_[1] \|\| {};
238					my $wantref = 1 if $_[1];
239
240					# Parse the query.
241
242					$_[0] =~ s/\n$//; # SIS shouldn't have this!
243					$_[0] =~ s/\r$//; # SIS shouldn't have this!
244
245					my @pairs = split /\&/, $_[0];
246
247					for (@pairs) {
248					my ($key, $value) = split /=/, $_;
249					$query->{$key} = defined $value ? uri_unescape($value) : '';
250					}
251
252					return $query if $wantref;
253					return %$query;
254					}
255
256
257					# version of makesis that doesn't to uri escaping
258					# for things we know don't require escaping
259
260					# can be called 2 ways
261					# - makesis(%hash) aka makesis( p => 0, cf => 95 )
262					# - makesis($hashref) aka makesis({p => 0, cf => 95})
263
264					sub makesis_nue {
265					my $first = shift;
266					my $data;
267					if (ref($first) eq 'HASH') {
268					$data = $first;
269					} else {
270					$data = {$first, @_};
271					}
272					my $sis = '';
273					foreach (sort keys %$data) {
274					$sis .= "$_=";
275					$sis .= $data->{$_} if exists($data->{$_});
276					$sis .= '&';
277					}
278
279					# This is 10x faster than the equivalent regex version.
280					return substr($sis, 0, length($sis)-1) . "\r\n";
281					}
282
283
284					sub parsesis_nue {
285
286					my $query = $_[1] \|\| {};
287					my $wantref = 1 if $_[1];
288
289					# Parse the query.
290					$_[0] =~ s/\r\n$//;
291					my @pairs = split /\&/, $_[0];
292
293					for (@pairs) {
294					my ($key, $value) = split /=/, $_;
295					$query->{$key} = $value;
296					}
297
298					return $query if $wantref;
299					return %$query;
300					}
301
302
303					sub to_batched_query {
304					my ($queries, $bql, $bqs, $novar) = @_;
305					my @bqueries;
306
307					# Breaks up queries into batches, where batches are limited to:
308					# - at most $bql lines long --OR--
309					# - at most $bqs kb in size
310					# if bqs or bql == 0 or undef, no limit.
311					#
312					# fixme - optimization for aggregator:
313					# sort, so all checks are together, all reports together, etc.
314					# problem is user will want to maintain array order
315
316					# $queries is array ref of either:
317					# strings - sis, ready to go
318					# hash ref - need to create sis
319					# my $q = ref($queries->[0]) eq 'HASH' ? makesis_batch($queries) : $queries;
320
321					# for right now, we'll just assume hash ref
322					return unless ref($queries->[0]) eq 'HASH';
323
324					my $last;
325					my $line;
326					my $linecnt = 0;
327					my $batchmode = 0;
328					foreach my $cur (@$queries) {
329
330					# my $dobj = debugobj($cur); print "dbg-doing obj: $dobj\n";
331
332					#
333					# handle cases where we submit email blob (message = * )
334					#
335					if (exists $cur->{message}) {
336					my $msg = $cur->{message};
337					delete $cur->{message};
338					$line = "-". makesis($cur);
339					$cur->{message} = $msg;
340					$line =~ s/\r\n$//s;
341					$line .= "&message=*\r\n$msg\r\n.\r\n";
342					push @bqueries, $line;
343					next;
344					}
345
346					unless ($last) {
347					#
348					# start beginning of new batch
349					#
350					$last = $cur;
351					next;
352					}
353					unless ($batchmode) {
354					#
355					# line after beginning of new batch
356					# if similar, start variable batchmode.
357					# if not, start batchmode without variables
358					#
359					my ($both, $diff) = findsimilar($last, $cur);
360					if ($diff && !$novar) {
361					$batchmode = 2;
362					$line = "-". makesis_nue($both);
363					# fixme - we might want to uri_escape()
364					# but everything should be alphanum or our uri-safe base64
365					$line .= join(",", map "$last->{$_}", @$diff) ."\r\n";
366					$line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
367					$last = $both; # last is now 'template'
368					$linecnt = 2;
369					} else {
370					$batchmode = 1;
371					$line = "-". makesis($last);
372					$line .= makesis_nue($cur);
373					$linecnt = 2;
374					}
375					next;
376					} else {
377					#
378					# We're in batchmode.
379					# end if batch maxed out (bqs or bql reached)
380					# end if batchmode with variables and cur doesn't match
381					# end batch
382					#
383					my ($both, $diff) = findsimilar($last, $cur) if ($batchmode == 2);
384					if ( ($bqs && (length($line) > ($bqs*1024))) \|\|
385					($bql && ($linecnt >= $bql)) \|\|
386					($batchmode == 2 && !$diff) ) {
387					$batchmode = 0;
388					$line .= ".\r\n";
389					push @bqueries, $line;
390					$last = $cur;
391					} else {
392					#
393					# fixme - we might go passed bqs by a little bit. prolly ok.
394					#
395					if ($batchmode == 2) {
396					$line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
397					} else {
398					$line .= makesis_nue($cur);
399					}
400					$linecnt++;
401					}
402					}
403					}
404					if ($batchmode) {
405					$line .= ".\r\n";
406					push @bqueries, $line;
407					} elsif ($last) {
408					$line = makesis($last);
409					push @bqueries, $line;
410					}
411
412					return \@bqueries;
413					}
414
415
416					# compares keys in hash ref's a & b
417					#
418					# return
419					# if both hashes have different keys
420					#
421					# return (1)
422					# if both hashes have same keys and values,
423					#
424					# returns 2 refs
425					# if both hashes have same keys but different values
426					# - first is hash, copy of a & b where vals are same.
427					# where vals are diff, keys are copied with val = '?'
428					# - second is list contains keys where values are different
429
430					sub findsimilar {
431					my ($a, $b) = @_;
432					my @diffvalues = ();
433					my %samevalues = ();
434
435					foreach (sort keys %$a) {
436					return unless exists $b->{$_};
437					if ($b->{$_} eq $a->{$_}) {
438					$samevalues{$_} = $a->{$_};
439					} else {
440					$samevalues{$_} = "?";
441					push @diffvalues, $_;
442					}
443					}
444					foreach (sort keys %$b) {
445					return unless exists $a->{$_};
446					}
447					# if too hashes are exactly the same, not sure.
448					# treat as if they are totally different.
449					return (1) unless scalar(@diffvalues) > 0;
450
451					return (\%samevalues, \@diffvalues);
452					}
453
454					sub from_batched_query {
455
456					my ($queries) = @_;
457					my @queries;
458
459					my ($fq, $rq) = $queries =~ m:^\-(.?)\r\n(.)$:sm;
460
461					unless ($fq && $rq) {
462					# allow from_batched_query to handle non-batches
463					$fq = $queries;
464					$rq = "";
465					}
466
467					if ($fq =~ m:\?:) {
468
469					my %template_query = ();
470					my @seq = ();
471					my @pairs = split /\&/, $fq;
472					for (@pairs) {
473					my ($key, $value) = split /=/, $_;
474					if ($value eq "?") {
475					push @seq, $key;
476					} else {
477					$template_query{$key} = $value ? uri_unescape($value) : '';
478					}
479					}
480
481					for (split /\r\n/, $rq) {
482					my @values = split /,/, $_;
483					my %foo = %template_query;
484					@foo{@seq} = @values;
485					push @queries, \%foo;
486					}
487
488					return undef unless @queries;
489
490					} elsif ($fq =~ m:\*:) {
491
492					my %query = parsesis($fq);
493					for (keys %query) {
494					if ($query{$_} eq "*") {
495					$query{$_} = $rq;
496					last;
497					}
498					}
499					push @queries, \%query;
500
501					} else {
502
503
504					# Don't split $queries. Use $fq and $rq instead since
505					# $fq is already normalized.
506
507					my %q = parsesis($fq);
508					push @queries, \%q;
509					for (split /\r\n/, $rq) {
510					my %q = parsesis($_);
511					push @queries, \%q;
512					}
513
514					}
515
516					return \@queries;
517
518					}
519
520
521					sub randstr {
522
523					my $size = shift;
524					my $alphanum = shift;
525					my $str;
526
527					$alphanum = 1 if !defined($alphanum);
528
529					# ASCII
530					# 33-126 printable chars
531					# 48-57 numbers
532					# 65-90 uppercase alpha
533					# 97-122 lowercase alpha
534
535					while ($size--) {
536					if ($alphanum) {
537					$str .= $b64table{ int(rand 64) };
538					} else {
539					$str .= chr(int(rand 94) + 33);
540					}
541					}
542
543					return $str;
544
545					}
546
547
548					sub escape_smtp_terminator {
549
550					my ($textref) = @_;
551					$$textref =~ s/\r\n\./\r\n\.\./gm
552
553					}
554
555
556					sub unescape_smtp_terminator {
557
558					my ($textref) = @_;
559					$$textref =~ s/\r\n\.\./\r\n\./gm;
560
561					}
562
563
564					sub hex_dump {
565					my $string = shift;
566
567					for (split //, $string) {
568					print ord($_) . " ";
569					}
570					print "\n";
571					}
572
- -
575					sub hash2str {
576
577					my $href = shift;
578					my %hash = %$href;
579					my ($str, $key);
580
581					for $key ( keys %hash ) {
582					my $tstr;
583					if ( ref $hash{$key} eq 'ARRAY' ) {
584					for ( @{ $hash{ $key }} ) { $tstr .= escape( $_ ) . "," } $str =~ s/,$//;
585					} elsif ( !(ref $hash{$key}) ) {
586					$tstr .= escape ( $hash{$key} );
587					}
588					if ( $tstr ) { $str .= "$key:$tstr&" }
589					}
590
591					$str =~ s/&$//; return $str;
592
593					}
594
595
596					sub str2hash {
597
598					my $str = shift;
599					my %hash;
600					my @pairs = split /(?<!\\)&/, $str;
601
602					for ( @pairs ) {
603					my ( $key, $data ) = split /(?<!\\):/, $_, 2;
604					if ( $data =~ /(?<!\\),/ ) {
605					my @list = split /(?<!\\),/, $data;
606					for ( @list ) { $_ = unescape ( $_ ) };
607					$hash{$key} = [@list];
608					} else { $hash{$key} = unescape ( $data ) }
609					}
610
611					return \%hash;
612
613					}
614
615					#
616					# If body of an email has mime attachments, the headers
617					# will indicate this. likewise, each mime attachment
618					# could also have nested mime attachments with headers that
619					# must indicate this. standard recursion.
620					#
621					# However, all 'leaf node' attachments don't have to have
622					# headers based on RFC xxx. They must be created before
623					# sending to razor servers.
624					#
625					#
626					# Example of mail with nested MIME attachments:
627					#
628					# level 1 level 2 level 3 ....
629					# --------------------------------------
630					# * - Header 1
631					# * - Body 1
632					# * -- A -- mime-header 2A
633					# \| mime-body 2A
634					# \| \|
635					# \| * ---- a --- mime-header 3a
636					# \| \| mime-body 3a
637					# \| \|
638					# \| * -----b --- mime-header 3b
639					# \| mime-body 3b
640					# * -- B -- mime-body 2B
641					# \|
642					# * -- C -- mime-header 2C
643					# \|
644					# * ---- c -- mime-body 3c
645					# \|
646					# * ---- d -- mime-header 3d
647					# mime-body 3d
648					#
649					# should be reported as
650					#
651					# Header 1 \r\n
652					# part 1 = p(header 3a, body 3a) \r\n
653					# part 2 = p(header 3b, body 3b) \r\n
654					# part 3 = p(<generated header 2B>, body 2B) \r\n
655					# part 4 = p(<generated header 3c>, body 3c) \r\n
656					# part 5 = p(header 3d, body 3d) \r\n
657					# .\r\n
658					#
659					# Notes:
660					# - Order of parts does not matter.
661					#
662					# - Each part is processed by prep_mail, p(), before report/check
663					#
664					# - Except for original Header everything but leaf nodes
665					# are discarded. In the above example,
666					#
667					# Body 1, header 2A, header 2C - are discarded
668					#
669					#
670					# Detailed Explanation:
671					#
672					# Header 1 says 'Content-Type: multipart' with boundary definition
673					# Based on the Boundary, Body 1 is split into A, B, C.
674					#
675					# A is analyzed, has headers which also say 'Content-Type: multipart'
676					# with a different boundary, and it is split into 3a, 3b. 2A is what
677					# appears between header 2a and first boundary, so its ignored.
678					# 3a and 3b both have header info, so they are sent thru prep_mail
679					# and reported/checked
680					#
681					# <generated header 2B> is based on Header 1 to determine content
682					# type. if unknown, dummy header is added,
683					# and both are reported as a body part
684					#
685					# C is analyzed, has headers which also say 'Content-Type: multipart'
686					# with a different boundary, and it is split into 3c, 3d.
687					#
688					# <generated header 3c> is based on header 2c to determine content
689					# type. if unknown, dummy header is added,
690					# and both are reported as a body part
691					#
692					# 3d has header info, so header+body are sent thru prep_mail
693					# and reported/checked
694					#
695					#
696					#
697					# prep_mail() basically truncates msgs that are too big and/or
698					# base64 encodes binaries or 8-bit msgs.
699					#
700
701
702					# Split mime splits up multi-part mime mails.
703					#
704					# returns array of parts, where each part is
705					# headers\n\nbody
706					#
707					# headers will only contain X-Razor2 and Content- headers
708					#
709					# If not a mime mail, and the headers do not have any
710					# Content-* headers, then the only headers will be X-Razor2 ones
711					# (perhaps create Content-Type in da future?)
712					#
713					# body can be blank. nuked in prep_part
714					#
715					sub split_mime {
716
717					my ($mailref, $ver, $recursive, $debug ) = @_;
718
719					return unless ref($mailref);
720
721					# mime-bodies must have header or initial blank lines.
722					#
723					my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
724					my $no_valid_mime_hdr = 0;
725
726					unless ($body) {
727
728					# no blank lines, definately no header, so no nested mimes
729
730					print "split_mime: no blank lines\n" if $debug > 1;
731					$no_valid_mime_hdr = 1;
732					}
733					# fixme - handle attachments? i.e. if header has this
734					# Content-Disposition: attachment
735					# than body is mail, we could recursively call ourselves
736					# again with body (check body for hdrs first?)
737
738					# Make sure $hdr is really a hdr
739					#
740					# Details: If mime part is not RFC compliant, it could just
741					# be a body with blank lines. hdr could have just matched part
742					# of the body.
743					#
744					# valid mime header is determined by existance of 'Content-Type'
745					# If we're not recursive, we don't check orig_headers, we assume its ok.
746					# not sure if this is the best way ...
747					#
748
749					if ($recursive && ($hdr !~ /^Content-Type:/i)) {
750					$no_valid_mime_hdr = 1;
751					print "uh-oh, bad mime-body len=". length($$mailref) .":\n$$mailref\n" if $debug;
752					#print "split_mime: recur=($recursive)\n";
753					}
754
755					if ($no_valid_mime_hdr) {
756					#
757					# create dummy header and return it
758					#
759					# $ver should be '1' or client name + version
760					my $mimepart = "X-Razor2-Agent: $ver\n";
761					my $hrdlen = length($mimepart);
762
763					# if it has initial blank line, hurray for rfc compliance
764					if ($$mailref =~ /^\n/) {
765					$mimepart .= $$mailref;
766					} else {
767					$mimepart .= "\n". $$mailref;
768					}
769					print "split_mime: returning total_len=". length($mimepart) ."; hdrs=".
770					$hdrlen .", body=". length($$mailref) ."\n" if $debug;
771					return (\$mimepart);
772					}
773
774					#
775					# Now we split mailref into hdr and body
776					# check hdr for nested mime (boundary)
777					#
778
779					my $orig_hdr = $hdr;
780					$hdr =~ s/\n\s+//sg; # merge multi-line headers
781					# nuke everything but X-Razor2 and Content-* headers
782					my $trimmed_hdr = "";
783					foreach (split '\n',$hdr) {
784					/^Content-/i and $trimmed_hdr .= "$_\n";
785					/^X-Razor2/i and $trimmed_hdr .= "$_\n";
786					}
787
788					my $boundary = "";
789
790					if ($trimmed_hdr =~ /Content-Type: multipart.+boundary=("[^"]+"\|\S+)/ig) {
791					$boundary = $1;
792					}
793
794					if ($boundary eq "") {
795					#
796					# valid mime hdr, but no nested mime.
797					# add razor hdr and return.
798					#
799					print "split_mime: valid_mime_hdr [len=". length($orig_hdr)
800					."], but no nested mime\n$orig_hdr\n" if $debug > 1;
801					$trimmed_hdr = "X-Razor2-Agent: $ver\n" . $trimmed_hdr;
802					my $mimepart = "$trimmed_hdr\n$body";
803					print "split_mime: returning total=". length($mimepart) ."; hdrs=".
804					length($trimmed_hdr) .", body=". length($body) ."\n" if $debug;
805					return (\$mimepart);
806					}
807					$boundary = $1 if $boundary =~ /^"(.*)"$/;
808
809					# At this point, we know body has mime parts.
810					#
811					my @mimeparts;
812
813					#
814					# According to RFC 1341
815					# http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
816					#
817					# mimes are separated by \n--boundary\n
818					# and are followed immediately by header, blank line, body;
819					# or blank line and body.
820					#
821					# if no header in mime part, default content type for mime body is
822					# based on header where 'Content-Type: multipart*' was defined, where
823					# multipart/digest --> message/rfc822
824					# multipart/* --> text/plain
825					# perhaps we should add a header if none present?
826					#
827					# if a body contains mimes, the 'preable', or stuff before
828					# the first boundary, and the 'epilogue', the stuff after the
829					# last boudary, are to be ignored.
830					#
831					# NOTE: We split up multiparts, but content-type's can also be
832					# nested. i.e, a header of 'Content-Type: message' can have a body
833					# of 'Content-Type: image'
834					#
835					$body =~ s/\n\Q--$boundary--\E.*$//sg; # trash last boundary and epilogue
836					if ($body =~ /^\Q--$boundary\E\r*\n/) {
837					# bug in some mails, make it RFC compliant
838					# now our split will work correctly
839					print "bad mime body [len=". length($body) ."], not doing \\n--boundary, fixed tho.\n" if $debug > 1;
840					$body = "garbage\n$body";
841					}
842					my @tmpparts;
843					unless ($body =~ /\Q--$boundary\E/) {
844					# Sometimes there's a boundary in the headers
845					# but none in email. In such cases, we'll
846					# treat the entire body as a part.
847					push @tmpparts, "garbage";
848					push @tmpparts, $body;
849					} else {
850					@tmpparts = split /\n\Q--$boundary\E\r*\n/, $body;
851					}
852					shift @tmpparts; # trash everything up to the first boundary;
853					foreach (@tmpparts) {
854					# perhaps we should add a header based on default content-type?
855					unless (/\S/s) {
856					print "skipping body part containing only whitespace [len=". length($_) ."]\n" if $debug;
857					next;
858					}
859					print "boundary: ". $recursive . "$boundary\n" if $debug > 1;
860					push @mimeparts, split_mime(\$_, $ver, " ". $recursive, $debug);
861					}
862					print "Saweeet!!! Boundary (". scalar(@mimeparts) ."): $boundary\n" if defined($boundary) && ($debug > 1);
863
864					return @mimeparts;
865					}
866
867					# mailref is not modified by this sub
868					#
869					sub prep_part {
870					my ($mailref, $maxheader, $maxbody) = @_;
871					#print "[". length($$mailref) ."] maxsize=$maxheader + $maxbody\n";
872
873					my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
874					$hdr .= "\n"; # put newline back on last header line
875
876					unless ($body) {
877					#
878					# fixme - this should not happen.
879					# if it does, split_mime needs work
880					#
881					# print "prep_part got F**KED-up mimepart [len=". length($$mailref) ."]\n$$mailref\n";
882					return; # body is empty
883					}
884
885					# fixme - are these the best chars to check for binary?
886					my $is_binary = ($hdr =~ /^Content-Type-Encoding: 8-bit/) \|\|
887					($body =~ /([\x00-\x1f\|\x7f-\xff])/ and $1 !~ /[\r\n\t]/);
888
889					my $enBase64 = new Razor2::Preproc::enBase64;
890					$is_binary = $enBase64->isit($mailref);
891					$enBase64->doit(\$body) if $is_binary;
892
893					$body =~ s/\r+\n/\n/sg; # outlook sometimes does \r\r\n
894					$hdr =~ s/\r+\n/\n/sg;
895
896					if ((my $len = length($body)) > $maxbody) {
897					$body = substr $body, 0, $maxbody;
898					substr($body, -2) = "==" if $is_binary;
899
900					$hdr = "X-Razor2-Origlen-Body: $len\n" . $hdr;
901					#print "maxbody=$maxbody body went from $len to ". length($body) ."\n";
902					}
903
904					if ((my $len = length($hdr)) > $maxheader) {
905					$hdr = "X-Razor2-Origlen-Header: $len\n" . $hdr;
906					if (length($hdr) > $maxheader) {
907					$hdr = substr $hdr, 0, $maxheader;
908					$hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
909					}
910					# print "maxhdr=$maxheader header went from $len to ". length($hdr) ."\n";
911					}
912
913					my $dude = "$hdr\n$body";
914
915					return $mailref if $dude eq $$mailref; # this happens majority of the time
916					return \$dude;
917					}
918
- -
921					# NOTE: Important function!
922					# must be kept in sync with server and all clients
923					# same holds true for prep_part()
924					#
925					# This is the preprocessing done on a mail before sent over network
926					#
927					sub prep_mail {
928					my ($mailref, $report_headers, $maxheader, $maxbody, $maxorighdr, $versionstring, $debug) = @_;
929					return unless ref($mailref);
930
931					print " prep_mail: orig=". length($$mailref) ."\n" if ($debug > 1);
932
933					my ($orig_hdr) = split /\n\r*\n/, $$mailref, 2;
934					$orig_hdr .= "\n"; # put newline back on last header line
935
936					my $ver = $versionstring \|\| 1;
937					my @mimeparts = split_mime($mailref, $ver, 0, $debug);
938
939					my @mimeparts_prep;
940					foreach (@mimeparts) {
941					push @mimeparts_prep, prep_part($_, $maxheader, $maxbody);
942					}
943					unless ($report_headers) {
944					my $hdr = "X-Razor2-Headers-Suppressed: 1\n";
945					foreach (split '\n',$orig_hdr) {
946					/^Content-/i and $hdr .= "$_\n";
947					/^X-Razor2/i and $hdr .= "$_\n";
948					}
949					$orig_hdr = $hdr;
950					}
951
952					if ((my $len = length($orig_hdr)) > $maxorighdr) {
953					$hdr = "X-Razor2-Origlen-Header: $len\n" . $orig_hdr;
954					if (length($hdr) > $maxorighdr) {
955					$hdr = substr $hdr, 0, $maxorighdr;
956					$hdr =~ s/([^\n]+)$//s; # remove last, incomplete line
957					}
958					#print "max=$maxorighdr orig_header went from $len to ". length($hdr) ."\n";
959					$orig_hdr = $hdr;
960					}
961
962					if ($debug > 1) {
963					print "**** prep_mail done: headers=". length($orig_hdr);
964					foreach (0..$#mimeparts_prep) {
965					print "\n**** mail $_ [". length(${$mimeparts_prep[$_]}) ."] ". substr(${$mimeparts_prep[$_]} ,0,40);
966					}
967					print "\n\n";
968					}
969
970					return (\$orig_hdr, @mimeparts_prep);
971					}
972
- -
975					# from MIME::Parser
976					#my $parser = new MIME::Parser;
977					#my $entity = $parser->parse($body);
978					# foreach (dump_entity($entity))
979					sub dump_entity {
980					my $ent = shift;
981					my @parts = $ent->parts;
982
983					if (@parts) { # multipart...
984					map { dump_entity($_) } @parts;
985					} else { # single part...
986					return ( $ent->body ); # return text blob
987					print " Part: ", $ent->bodyhandle->path, " (", scalar($ent->head->mime_type), ")\n";
988					}
989					}
990
991
992					# input: hex string ("2D")
993					# output: hash ref or array containg bits that are set
994					# 2D == (1, 3, 4, 6)
995
996					sub hexbits2hash {
997
998					my $hex = shift;
999					my %h;
1000					for (0..31) { if (hex($hex) & (2**31)>>(31-$_)) { $h{$_+1} = 1 } }
1001					return wantarray ? (sort keys %h) : \%h;
1002
1003					}
1004
1005
1006					# input: hash ref, array ref, or array containg bits that are set
1007					# output: hex string ("2D")
1008					# 2D == (4, 8, 32)
1009
1010					sub hash2hexbits {
1011					my @bits = @_;
1012
1013					@bits = @{$bits[0]} if ref($bits[0]) eq 'ARRAY';
1014					@bits = (sort keys %{$bits[0]}) if ref($bits[0]) eq 'HASH';
1015
1016					my @all;
1017					my $i = 1;
1018					foreach (sort {$a <=> $b} @bits) {
1019					while (1) {
1020					push @all, 1 if $_ == $i;
1021					last if $_ == $i;
1022					push @all, 0;
1023					$i++;
1024					}
1025					}
1026					my $bs = join '', reverse @all;
1027					# fixme needs testing
1028					my $hex = (unpack "H", pack "B", join '', reverse @all);
1029
1030					return $hex
1031					}
1032
1033					# for debugging - dumps a obj to a string
1034					sub debugobj {
1035					my ($obj, $prefix, $maxwidth) = @_;
1036
1037					$maxwidth \|\|= 70;
1038					return if (defined($prefix) && length($prefix) > $maxwidth);
1039
1040					my $line = "";
1041					$prefix .= " "x4;
1042
1043					if (my $r = ref($obj)) {
1044					if ($r eq 'HASH') {
1045					$line = "$r - $obj,". scalar(keys %$obj) ." keys\n";
1046					foreach (sort keys %$obj) {
1047					$line .= "$prefix$_ => ". debugobj($obj->{$_}, $prefix);
1048					}
1049					$line .= $prefix ."[empty]\n" unless (keys %$obj);
1050					} elsif ($r eq 'ARRAY') {
1051					$line = "$r - $obj,". scalar(@$obj) ." items\n";
1052					foreach (@$obj) {
1053					$line .= $prefix . debugobj($_, $prefix);
1054					}
1055					$line .= $prefix ."[empty]\n" unless (@$obj);
1056					} elsif ($r eq 'REF') {
1057					$line = "$r - $obj\n";
1058					$line .= $prefix . debugobj($$obj, $prefix);
1059					} elsif ($r eq 'SCALAR') {
1060					$line = "$r - $obj\n";
1061					$line .= $prefix . debugobj($$obj, $prefix);
1062					}
1063					} else {
1064					if (defined $obj) {
1065					$line = $1 if substr($obj, 0, $maxwidth-length($prefix)) =~ /^([^\n]+)/;
1066					$line = "[length=". length($obj) ."] ". $line
1067					if (length($line) ne length($obj));
1068					} else {
1069					$line = "[empty]";
1070					}
1071					$line .= "\n";
1072					}
1073					return $line;
1074					}
1075
1076
1077					sub clean_body {
1078
1079					my ($self, $bodyref) = @_;
1080
1081					my $hasheaders = 1;
1082
1083					if ($self->{preprocs}->{deBase64}->isit($bodyref)) {
1084					$self->{preprocs}->{deBase64}->doit($bodyref);
1085					$hasheaders = 0;
1086					}
1087
1088					if ($self->{preprocs}->{deQP}->isit($bodyref)) {
1089					$self->{preprocs}->{deQP}->doit($bodyref);
1090					$hasheaders = 0;
1091					}
1092
1093					if ($self->{preprocs}->{deHTML}->isit($bodyref)) {
1094					$self->{preprocs}->{deHTML}->doit($bodyref);
1095					}
1096
1097					if ($hasheaders) {
1098					$$bodyref =~ s/^.*?\n\n//s;
1099					}
1100
1101
1102					}
1103
1104
1105					sub round {
1106
1107					my $float = shift;
1108					return sprintf("%.0f", $float);
1109
1110					}
1111
- -
1114					sub fisher_yates_shuffle {
1115					my $deck = shift; # $deck is a reference to an array
1116					my $i = @$deck;
1117					while ($i--) {
1118					my $j = int rand ($i+1);
1119					@$deck[$i,$j] = @$deck[$j,$i];
1120					}
1121					}
1122
1123
1124	1	22µs			1;
1125