Filename | /usr/local/lib/perl5/site_perl/Mail/SpamAssassin/RegistryBoundaries.pm |
Statements | Executed 358917 statements in 2.43s |
Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
---|---|---|---|---|---|
7730 | 4 | 1 | 963ms | 2.38s | uri_to_domain | Mail::SpamAssassin::RegistryBoundaries::
7730 | 1 | 1 | 695ms | 741ms | split_domain | Mail::SpamAssassin::RegistryBoundaries::
7730 | 1 | 1 | 240ms | 297ms | is_domain_valid | Mail::SpamAssassin::RegistryBoundaries::
54046 | 7 | 1 | 194ms | 194ms | CORE:subst (opcode) | Mail::SpamAssassin::RegistryBoundaries::
7730 | 1 | 1 | 180ms | 920ms | trim_domain | Mail::SpamAssassin::RegistryBoundaries::
38650 | 5 | 1 | 106ms | 106ms | CORE:match (opcode) | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 4.72ms | 4.72ms | CORE:regcomp (opcode) | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 468µs | 5.19ms | new | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 53µs | 63µs | BEGIN@28 | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 33µs | 38µs | BEGIN@30 | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 32µs | 100µs | BEGIN@31 | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 26µs | 96µs | BEGIN@34 | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 23µs | 62µs | BEGIN@29 | Mail::SpamAssassin::RegistryBoundaries::
1 | 1 | 1 | 8µs | 8µs | CORE:qr (opcode) | Mail::SpamAssassin::RegistryBoundaries::
Line | State ments |
Time on line |
Calls | Time in subs |
Code |
---|---|---|---|---|---|
1 | # The (extremely complex) rules for domain delegation. | ||||
2 | |||||
3 | # <@LICENSE> | ||||
4 | # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
5 | # contributor license agreements. See the NOTICE file distributed with | ||||
6 | # this work for additional information regarding copyright ownership. | ||||
7 | # The ASF licenses this file to you under the Apache License, Version 2.0 | ||||
8 | # (the "License"); you may not use this file except in compliance with | ||||
9 | # the License. You may obtain a copy of the License at: | ||||
10 | # | ||||
11 | # http://www.apache.org/licenses/LICENSE-2.0 | ||||
12 | # | ||||
13 | # Unless required by applicable law or agreed to in writing, software | ||||
14 | # distributed under the License is distributed on an "AS IS" BASIS, | ||||
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
16 | # See the License for the specific language governing permissions and | ||||
17 | # limitations under the License. | ||||
18 | # </@LICENSE> | ||||
19 | |||||
20 | =head1 NAME | ||||
21 | |||||
22 | Mail::SpamAssassin::RegistryBoundaries - domain delegation rules | ||||
23 | |||||
24 | =cut | ||||
25 | |||||
26 | package Mail::SpamAssassin::RegistryBoundaries; | ||||
27 | |||||
28 | 2 | 68µs | 2 | 74µs | # spent 63µs (53+10) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@28 which was called:
# once (53µs+10µs) by Mail::SpamAssassin::BEGIN@78 at line 28 # spent 63µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@28
# spent 10µs making 1 call to strict::import |
29 | 2 | 76µs | 2 | 100µs | # spent 62µs (23+39) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@29 which was called:
# once (23µs+39µs) by Mail::SpamAssassin::BEGIN@78 at line 29 # spent 62µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@29
# spent 39µs making 1 call to warnings::import |
30 | 2 | 89µs | 2 | 43µs | # spent 38µs (33+5) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@30 which was called:
# once (33µs+5µs) by Mail::SpamAssassin::BEGIN@78 at line 30 # spent 38µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@30
# spent 5µs making 1 call to bytes::import |
31 | 2 | 97µs | 2 | 168µs | # spent 100µs (32+68) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@31 which was called:
# once (32µs+68µs) by Mail::SpamAssassin::BEGIN@78 at line 31 # spent 100µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@31
# spent 68µs making 1 call to re::import |
32 | |||||
33 | 1 | 13µs | our @ISA = qw(); | ||
34 | 2 | 1.72ms | 2 | 166µs | # spent 96µs (26+70) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@34 which was called:
# once (26µs+70µs) by Mail::SpamAssassin::BEGIN@78 at line 34 # spent 96µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@34
# spent 70µs making 1 call to vars::import |
35 | |||||
36 | # called from SpamAssassin->init() to create $self->{util_rb} | ||||
37 | # spent 5.19ms (468µs+4.73) within Mail::SpamAssassin::RegistryBoundaries::new which was called:
# once (468µs+4.73ms) by Mail::SpamAssassin::new at line 432 of Mail/SpamAssassin.pm | ||||
38 | 1 | 2µs | my $class = shift; | ||
39 | 1 | 2µs | $class = ref($class) || $class; | ||
40 | |||||
41 | 1 | 2µs | my ($main) = @_; | ||
42 | my $self = { | ||||
43 | 'main' => $main, | ||||
44 | 'conf' => $main->{conf}, | ||||
45 | 1 | 6µs | }; | ||
46 | 1 | 2µs | bless ($self, $class); | ||
47 | |||||
48 | # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc | ||||
49 | 1 | 111µs | if ($self->{conf}->{valid_tlds}) { | ||
50 | 2 | 301µs | my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}}); | ||
51 | # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing | ||||
52 | 1 | 4.76ms | 2 | 4.73ms | $self->{valid_tlds_re} = qr/(?:$tlds)/i; # spent 4.72ms making 1 call to Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp
# spent 8µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::CORE:qr |
53 | } | ||||
54 | else { | ||||
55 | # Failsafe in case no tlds defined, we don't want this to match everything.. | ||||
56 | $self->{valid_tlds_re} = qr/no_tlds_defined/; | ||||
57 | } | ||||
58 | |||||
59 | 1 | 11µs | $self; | ||
60 | } | ||||
61 | |||||
62 | # This is required because the .us domain is nuts. See split_domain. | ||||
63 | 1 | 5µs | foreach (qw/ | ||
64 | ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi | ||||
65 | mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi | ||||
66 | vt wa wi wv wy | ||||
67 | /) { | ||||
68 | 54 | 306µs | $US_STATES{$_} = 1; | ||
69 | } | ||||
70 | |||||
71 | ########################################################################### | ||||
72 | |||||
73 | =head1 METHODS | ||||
74 | |||||
75 | =over 4 | ||||
76 | |||||
77 | =item ($hostname, $domain) = split_domain ($fqdn) | ||||
78 | |||||
79 | Cut a fully-qualified hostname into the hostname part and the domain | ||||
80 | part, splitting at the DNS registry boundary. | ||||
81 | |||||
82 | Examples: | ||||
83 | |||||
84 | "www.foo.com" => ( "www", "foo.com" ) | ||||
85 | "www.foo.co.uk" => ( "www", "foo.co.uk" ) | ||||
86 | |||||
87 | =cut | ||||
88 | |||||
89 | # spent 741ms (695+45.7) within Mail::SpamAssassin::RegistryBoundaries::split_domain which was called 7730 times, avg 96µs/call:
# 7730 times (695ms+45.7ms) by Mail::SpamAssassin::RegistryBoundaries::trim_domain at line 171, avg 96µs/call | ||||
90 | 7730 | 13.1ms | my $self = shift; | ||
91 | 7730 | 18.8ms | my $domain = lc shift; | ||
92 | |||||
93 | 7730 | 14.1ms | my $hostname = ''; | ||
94 | |||||
95 | 7730 | 27.6ms | if (defined $domain && $domain ne '') { | ||
96 | # www..spamassassin.org -> www.spamassassin.org | ||||
97 | 7698 | 25.0ms | $domain =~ tr/././s; | ||
98 | |||||
99 | # leading/trailing dots | ||||
100 | 7698 | 114ms | 7698 | 14.6ms | $domain =~ s/^\.+//; # spent 14.6ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call |
101 | 7698 | 86.7ms | 7698 | 31.1ms | $domain =~ s/\.+$//; # spent 31.1ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 4µs/call |
102 | |||||
103 | # Split scalar domain into components | ||||
104 | 7698 | 38.7ms | my @domparts = split(/\./, $domain); | ||
105 | 7698 | 12.2ms | my @hostname; | ||
106 | |||||
107 | 7698 | 35.7ms | while (@domparts > 1) { # go until we find the TLD | ||
108 | 15044 | 71.1ms | if (@domparts == 4) { | ||
109 | 926 | 1.75ms | if ($domparts[3] eq 'us' && | ||
110 | (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') || | ||||
111 | ($domparts[0] =~ /^c[io]$/))) | ||||
112 | { | ||||
113 | # http://www.neustar.us/policies/docs/rfc_1480.txt | ||||
114 | # "Fire-Dept.CI.Los-Angeles.CA.US" | ||||
115 | # "<school-name>.PVT.K12.<state>.US" | ||||
116 | last if ($US_STATES{$domparts[2]}); | ||||
117 | } | ||||
118 | } | ||||
119 | elsif (@domparts == 3) { | ||||
120 | # http://www.neustar.us/policies/docs/rfc_1480.txt | ||||
121 | # demon.co.uk | ||||
122 | # esc.edu.ar | ||||
123 | # [^\.]+\.${US_STATES}\.us | ||||
124 | 6426 | 22.2ms | if ($domparts[2] eq 'us') { | ||
125 | 2 | 7µs | last if ($US_STATES{$domparts[1]}); | ||
126 | } | ||||
127 | else { | ||||
128 | 6424 | 16.7ms | my $temp = join(".", @domparts); | ||
129 | 6424 | 29.4ms | last if ($self->{conf}->{three_level_domains}{$temp}); | ||
130 | } | ||||
131 | } | ||||
132 | elsif (@domparts == 2) { | ||||
133 | # co.uk, etc. | ||||
134 | 7674 | 17.5ms | my $temp = join(".", @domparts); | ||
135 | 7674 | 24.4ms | last if ($self->{conf}->{two_level_domains}{$temp}); | ||
136 | } | ||||
137 | 14974 | 64.8ms | push(@hostname, shift @domparts); | ||
138 | } | ||||
139 | |||||
140 | # Look for a sub-delegated TLD | ||||
141 | # use @domparts to skip trying to match on TLDs that can't possibly | ||||
142 | # match, but keep in mind that the hostname can be blank, so 4TLD needs 4, | ||||
143 | # 3TLD needs 3, 2TLD needs 2 ... | ||||
144 | # | ||||
145 | 7698 | 25.6ms | unshift @domparts, pop @hostname if @hostname; | ||
146 | 7698 | 16.2ms | $domain = join(".", @domparts); | ||
147 | 7698 | 25.8ms | $hostname = join(".", @hostname); | ||
148 | } | ||||
149 | |||||
150 | 7730 | 89.1ms | ($hostname, $domain); | ||
151 | } | ||||
152 | |||||
153 | ########################################################################### | ||||
154 | |||||
155 | =item $domain = trim_domain($fqdn) | ||||
156 | |||||
157 | Cut a fully-qualified hostname into the hostname part and the domain | ||||
158 | part, returning just the domain. | ||||
159 | |||||
160 | Examples: | ||||
161 | |||||
162 | "www.foo.com" => "foo.com" | ||||
163 | "www.foo.co.uk" => "foo.co.uk" | ||||
164 | |||||
165 | =cut | ||||
166 | |||||
167 | # spent 920ms (180+741) within Mail::SpamAssassin::RegistryBoundaries::trim_domain which was called 7730 times, avg 119µs/call:
# 7730 times (180ms+741ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 230, avg 119µs/call | ||||
168 | 7730 | 13.6ms | my $self = shift; | ||
169 | 7730 | 14.2ms | my $domain = shift; | ||
170 | |||||
171 | 7730 | 65.4ms | 7730 | 741ms | my ($host, $dom) = $self->split_domain($domain); # spent 741ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::split_domain, avg 96µs/call |
172 | 7730 | 78.6ms | return $dom; | ||
173 | } | ||||
174 | |||||
175 | ########################################################################### | ||||
176 | |||||
177 | =item $ok = is_domain_valid($dom) | ||||
178 | |||||
179 | Return C<1> if the domain is valid, C<undef> otherwise. A valid domain | ||||
180 | (a) does not contain whitespace, (b) contains at least one dot, and (c) | ||||
181 | uses a valid TLD or ccTLD. | ||||
182 | |||||
183 | =back | ||||
184 | |||||
185 | =cut | ||||
186 | |||||
187 | # spent 297ms (240+56.9) within Mail::SpamAssassin::RegistryBoundaries::is_domain_valid which was called 7730 times, avg 38µs/call:
# 7730 times (240ms+56.9ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 233, avg 38µs/call | ||||
188 | 7730 | 13.7ms | my $self = shift; | ||
189 | 7730 | 19.5ms | my $dom = lc shift; | ||
190 | |||||
191 | # domains don't have whitespace | ||||
192 | 7730 | 92.4ms | 7730 | 19.2ms | return 0 if ($dom =~ /\s/); # spent 19.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call |
193 | |||||
194 | # ensure it ends in a known-valid TLD, and has at least 1 dot | ||||
195 | 7730 | 93.7ms | 7730 | 37.7ms | return 0 unless ($dom =~ /\.([^.]+)$/); # spent 37.7ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 5µs/call |
196 | 7674 | 25.5ms | return 0 unless ($self->{conf}->{valid_tlds}{$1}); | ||
197 | |||||
198 | 7668 | 114ms | return 1; # nah, it's ok. | ||
199 | } | ||||
200 | |||||
201 | # | ||||
202 | |||||
203 | # spent 2.38s (963ms+1.41) within Mail::SpamAssassin::RegistryBoundaries::uri_to_domain which was called 7730 times, avg 308µs/call:
# 4467 times (525ms+808ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2276 of Mail/SpamAssassin/PerMsgStatus.pm, avg 298µs/call
# 1954 times (255ms+384ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2423 of Mail/SpamAssassin/PerMsgStatus.pm, avg 327µs/call
# 949 times (149ms+163ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2317 of Mail/SpamAssassin/PerMsgStatus.pm, avg 329µs/call
# 360 times (34.2ms+60.3ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2412 of Mail/SpamAssassin/PerMsgStatus.pm, avg 262µs/call | ||||
204 | 7730 | 14.4ms | my $self = shift; | ||
205 | 7730 | 24.2ms | my $uri = lc shift; | ||
206 | |||||
207 | # Javascript is not going to help us, so return. | ||||
208 | 7730 | 94.4ms | 7730 | 16.0ms | return if ($uri =~ /^javascript:/); # spent 16.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call |
209 | |||||
210 | 7730 | 121ms | 7730 | 17.2ms | $uri =~ s{\#.*$}{}gs; # drop fragment # spent 17.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call |
211 | 7730 | 135ms | 7730 | 57.1ms | $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol # spent 57.1ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 7µs/call |
212 | 7730 | 124ms | 7730 | 21.0ms | $uri =~ s{^[^/]*\@}{}gs; # username/passwd # spent 21.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 3µs/call |
213 | |||||
214 | # strip path and CGI params. note: bug 4213 shows that "&" should | ||||
215 | # *not* be likewise stripped here -- it's permitted in hostnames by | ||||
216 | # some common MUAs! | ||||
217 | 7730 | 123ms | 7730 | 38.1ms | $uri =~ s{[/?].*$}{}gs; # spent 38.1ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 5µs/call |
218 | |||||
219 | 7730 | 79.9ms | 7730 | 15.3ms | $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing # spent 15.3ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call |
220 | |||||
221 | # skip undecoded URIs if the encoded bits shouldn't be. | ||||
222 | # we'll see the decoded version as well. see url_encode() | ||||
223 | 7730 | 71.1ms | 7730 | 14.8ms | return if $uri =~ /\%(?:2[1-9a-f]|[3-6][0-9a-f]|7[0-9a-e])/; # spent 14.8ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call |
224 | |||||
225 | 7730 | 14.8ms | my $host = $uri; # unstripped/full domain name | ||
226 | |||||
227 | # keep IPs intact | ||||
228 | 7730 | 98.1ms | 7730 | 17.9ms | if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) { # spent 17.9ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call |
229 | # get rid of hostname part of domain, understanding delegation | ||||
230 | 7730 | 50.8ms | 7730 | 920ms | $uri = $self->trim_domain($uri); # spent 920ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::trim_domain, avg 119µs/call |
231 | |||||
232 | # ignore invalid domains | ||||
233 | 7730 | 59.3ms | 7730 | 297ms | return unless ($self->is_domain_valid($uri)); # spent 297ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::is_domain_valid, avg 38µs/call |
234 | } | ||||
235 | |||||
236 | # $uri is now the domain only, optionally return unstripped host name | ||||
237 | 7668 | 89.8ms | return !wantarray ? $uri : ($uri, $host); | ||
238 | } | ||||
239 | |||||
240 | 1 | 52µs | 1; | ||
241 | |||||
# spent 106ms within Mail::SpamAssassin::RegistryBoundaries::CORE:match which was called 38650 times, avg 3µs/call:
# 7730 times (37.7ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 195, avg 5µs/call
# 7730 times (19.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 192, avg 2µs/call
# 7730 times (17.9ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 228, avg 2µs/call
# 7730 times (16.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 208, avg 2µs/call
# 7730 times (14.8ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 223, avg 2µs/call | |||||
# spent 8µs within Mail::SpamAssassin::RegistryBoundaries::CORE:qr which was called:
# once (8µs+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52 | |||||
# spent 4.72ms within Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp which was called:
# once (4.72ms+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52 | |||||
# spent 194ms within Mail::SpamAssassin::RegistryBoundaries::CORE:subst which was called 54046 times, avg 4µs/call:
# 7730 times (57.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 211, avg 7µs/call
# 7730 times (38.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 217, avg 5µs/call
# 7730 times (21.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 212, avg 3µs/call
# 7730 times (17.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 210, avg 2µs/call
# 7730 times (15.3ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 219, avg 2µs/call
# 7698 times (31.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 101, avg 4µs/call
# 7698 times (14.6ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 100, avg 2µs/call |