← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Sun Nov 5 03:09:29 2017
Reported on Mon Nov 6 13:20:47 2017

Filename/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/RegistryBoundaries.pm
StatementsExecuted 358917 statements in 2.56s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
773041925ms2.52sMail::SpamAssassin::RegistryBoundaries::::uri_to_domainMail::SpamAssassin::RegistryBoundaries::uri_to_domain
773011789ms842msMail::SpamAssassin::RegistryBoundaries::::split_domainMail::SpamAssassin::RegistryBoundaries::split_domain
773011300ms355msMail::SpamAssassin::RegistryBoundaries::::is_domain_validMail::SpamAssassin::RegistryBoundaries::is_domain_valid
773011204ms1.05sMail::SpamAssassin::RegistryBoundaries::::trim_domainMail::SpamAssassin::RegistryBoundaries::trim_domain
5404671203ms203msMail::SpamAssassin::RegistryBoundaries::::CORE:substMail::SpamAssassin::RegistryBoundaries::CORE:subst (opcode)
3865051103ms103msMail::SpamAssassin::RegistryBoundaries::::CORE:matchMail::SpamAssassin::RegistryBoundaries::CORE:match (opcode)
1114.04ms4.04msMail::SpamAssassin::RegistryBoundaries::::CORE:regcompMail::SpamAssassin::RegistryBoundaries::CORE:regcomp (opcode)
111472µs4.52msMail::SpamAssassin::RegistryBoundaries::::newMail::SpamAssassin::RegistryBoundaries::new
11151µs62µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@28Mail::SpamAssassin::RegistryBoundaries::BEGIN@28
11125µs81µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@31Mail::SpamAssassin::RegistryBoundaries::BEGIN@31
11121µs51µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@29Mail::SpamAssassin::RegistryBoundaries::BEGIN@29
11121µs26µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@30Mail::SpamAssassin::RegistryBoundaries::BEGIN@30
11120µs85µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@34Mail::SpamAssassin::RegistryBoundaries::BEGIN@34
1118µs8µsMail::SpamAssassin::RegistryBoundaries::::CORE:qrMail::SpamAssassin::RegistryBoundaries::CORE:qr (opcode)
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# The (extremely complex) rules for domain delegation.
2
3# <@LICENSE>
4# Licensed to the Apache Software Foundation (ASF) under one or more
5# contributor license agreements. See the NOTICE file distributed with
6# this work for additional information regarding copyright ownership.
7# The ASF licenses this file to you under the Apache License, Version 2.0
8# (the "License"); you may not use this file except in compliance with
9# the License. You may obtain a copy of the License at:
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18# </@LICENSE>
19
20=head1 NAME
21
22Mail::SpamAssassin::RegistryBoundaries - domain delegation rules
23
24=cut
25
26package Mail::SpamAssassin::RegistryBoundaries;
27
28259µs272µs
# spent 62µs (51+10) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@28 which was called: # once (51µs+10µs) by Mail::SpamAssassin::BEGIN@78 at line 28
use strict;
# spent 62µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@28 # spent 10µs making 1 call to strict::import
29253µs281µs
# spent 51µs (21+30) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@29 which was called: # once (21µs+30µs) by Mail::SpamAssassin::BEGIN@78 at line 29
use warnings;
# spent 51µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@29 # spent 30µs making 1 call to warnings::import
30266µs231µs
# spent 26µs (21+5) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@30 which was called: # once (21µs+5µs) by Mail::SpamAssassin::BEGIN@78 at line 30
use bytes;
# spent 26µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@30 # spent 5µs making 1 call to bytes::import
31286µs2137µs
# spent 81µs (25+56) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@31 which was called: # once (25µs+56µs) by Mail::SpamAssassin::BEGIN@78 at line 31
use re 'taint';
# spent 81µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@31 # spent 56µs making 1 call to re::import
32
3318µsour @ISA = qw();
3421.55ms2151µs
# spent 85µs (20+66) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@34 which was called: # once (20µs+66µs) by Mail::SpamAssassin::BEGIN@78 at line 34
use vars qw(%US_STATES);
# spent 85µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@34 # spent 66µs making 1 call to vars::import
35
36# called from SpamAssassin->init() to create $self->{util_rb}
37
# spent 4.52ms (472µs+4.04) within Mail::SpamAssassin::RegistryBoundaries::new which was called: # once (472µs+4.04ms) by Mail::SpamAssassin::new at line 432 of Mail/SpamAssassin.pm
sub new {
3812µs my $class = shift;
3912µs $class = ref($class) || $class;
40
4112µs my ($main) = @_;
42 my $self = {
43 'main' => $main,
44 'conf' => $main->{conf},
4515µs };
4612µs bless ($self, $class);
47
48 # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
491100µs if ($self->{conf}->{valid_tlds}) {
502319µs my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
51 # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
5214.07ms24.04ms $self->{valid_tlds_re} = qr/(?:$tlds)/i;
53 }
54 else {
55 # Failsafe in case no tlds defined, we don't want this to match everything..
56 $self->{valid_tlds_re} = qr/no_tlds_defined/;
57 }
58
59111µs $self;
60}
61
62# This is required because the .us domain is nuts. See split_domain.
6316µsforeach (qw/
64 ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
65 mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi
66 vt wa wi wv wy
67 /) {
6854208µs $US_STATES{$_} = 1;
69}
70
71###########################################################################
72
73=head1 METHODS
74
75=over 4
76
77=item ($hostname, $domain) = split_domain ($fqdn)
78
79Cut a fully-qualified hostname into the hostname part and the domain
80part, splitting at the DNS registry boundary.
81
82Examples:
83
84 "www.foo.com" => ( "www", "foo.com" )
85 "www.foo.co.uk" => ( "www", "foo.co.uk" )
86
87=cut
88
89
# spent 842ms (789+53.6) within Mail::SpamAssassin::RegistryBoundaries::split_domain which was called 7730 times, avg 109µs/call: # 7730 times (789ms+53.6ms) by Mail::SpamAssassin::RegistryBoundaries::trim_domain at line 171, avg 109µs/call
sub split_domain {
90773013.4ms my $self = shift;
91773019.1ms my $domain = lc shift;
92
93773014.2ms my $hostname = '';
94
95773027.8ms if (defined $domain && $domain ne '') {
96 # www..spamassassin.org -> www.spamassassin.org
97769825.7ms $domain =~ tr/././s;
98
99 # leading/trailing dots
100769887.9ms769814.7ms $domain =~ s/^\.+//;
# spent 14.7ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
1017698136ms769838.9ms $domain =~ s/\.+$//;
# spent 38.9ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 5µs/call
102
103 # Split scalar domain into components
104769849.8ms my @domparts = split(/\./, $domain);
105769812.4ms my @hostname;
106
107769836.1ms while (@domparts > 1) { # go until we find the TLD
1081504462.4ms if (@domparts == 4) {
1099261.73ms if ($domparts[3] eq 'us' &&
110 (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||
111 ($domparts[0] =~ /^c[io]$/)))
112 {
113 # http://www.neustar.us/policies/docs/rfc_1480.txt
114 # "Fire-Dept.CI.Los-Angeles.CA.US"
115 # "<school-name>.PVT.K12.<state>.US"
116 last if ($US_STATES{$domparts[2]});
117 }
118 }
119 elsif (@domparts == 3) {
120 # http://www.neustar.us/policies/docs/rfc_1480.txt
121 # demon.co.uk
122 # esc.edu.ar
123 # [^\.]+\.${US_STATES}\.us
124642622.4ms if ($domparts[2] eq 'us') {
12529µs last if ($US_STATES{$domparts[1]});
126 }
127 else {
128642417.3ms my $temp = join(".", @domparts);
129642451.2ms last if ($self->{conf}->{three_level_domains}{$temp});
130 }
131 }
132 elsif (@domparts == 2) {
133 # co.uk, etc.
134767417.5ms my $temp = join(".", @domparts);
135767428.7ms last if ($self->{conf}->{two_level_domains}{$temp});
136 }
13714974105ms push(@hostname, shift @domparts);
138 }
139
140 # Look for a sub-delegated TLD
141 # use @domparts to skip trying to match on TLDs that can't possibly
142 # match, but keep in mind that the hostname can be blank, so 4TLD needs 4,
143 # 3TLD needs 3, 2TLD needs 2 ...
144 #
145769826.4ms unshift @domparts, pop @hostname if @hostname;
146769816.5ms $domain = join(".", @domparts);
147769828.3ms $hostname = join(".", @hostname);
148 }
149
150773088.7ms ($hostname, $domain);
151}
152
153###########################################################################
154
155=item $domain = trim_domain($fqdn)
156
157Cut a fully-qualified hostname into the hostname part and the domain
158part, returning just the domain.
159
160Examples:
161
162 "www.foo.com" => "foo.com"
163 "www.foo.co.uk" => "foo.co.uk"
164
165=cut
166
167
# spent 1.05s (204ms+842ms) within Mail::SpamAssassin::RegistryBoundaries::trim_domain which was called 7730 times, avg 135µs/call: # 7730 times (204ms+842ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 230, avg 135µs/call
sub trim_domain {
168773013.9ms my $self = shift;
169773014.4ms my $domain = shift;
170
171773053.9ms7730842ms my ($host, $dom) = $self->split_domain($domain);
# spent 842ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::split_domain, avg 109µs/call
1727730133ms return $dom;
173}
174
175###########################################################################
176
177=item $ok = is_domain_valid($dom)
178
179Return C<1> if the domain is valid, C<undef> otherwise. A valid domain
180(a) does not contain whitespace, (b) contains at least one dot, and (c)
181uses a valid TLD or ccTLD.
182
183=back
184
185=cut
186
187
# spent 355ms (300+54.6) within Mail::SpamAssassin::RegistryBoundaries::is_domain_valid which was called 7730 times, avg 46µs/call: # 7730 times (300ms+54.6ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 233, avg 46µs/call
sub is_domain_valid {
188773014.0ms my $self = shift;
189773019.4ms my $dom = lc shift;
190
191 # domains don't have whitespace
192773090.4ms773019.3ms return 0 if ($dom =~ /\s/);
# spent 19.3ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
193
194 # ensure it ends in a known-valid TLD, and has at least 1 dot
1957730129ms773035.3ms return 0 unless ($dom =~ /\.([^.]+)$/);
# spent 35.3ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 5µs/call
196767425.4ms return 0 unless ($self->{conf}->{valid_tlds}{$1});
197
1987668104ms return 1; # nah, it's ok.
199}
200
201#
202
203
# spent 2.52s (925ms+1.60) within Mail::SpamAssassin::RegistryBoundaries::uri_to_domain which was called 7730 times, avg 327µs/call: # 4467 times (508ms+935ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2276 of Mail/SpamAssassin/PerMsgStatus.pm, avg 323µs/call # 1954 times (263ms+409ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2423 of Mail/SpamAssassin/PerMsgStatus.pm, avg 344µs/call # 949 times (97.8ms+191ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2317 of Mail/SpamAssassin/PerMsgStatus.pm, avg 305µs/call # 360 times (56.3ms+64.6ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2412 of Mail/SpamAssassin/PerMsgStatus.pm, avg 336µs/call
sub uri_to_domain {
204773014.4ms my $self = shift;
205773023.6ms my $uri = lc shift;
206
207 # Javascript is not going to help us, so return.
2087730110ms773016.0ms return if ($uri =~ /^javascript:/);
# spent 16.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
209
210773066.6ms773016.9ms $uri =~ s{\#.*$}{}gs; # drop fragment
# spent 16.9ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
2117730122ms773057.3ms $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
# spent 57.3ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 7µs/call
212773094.9ms773020.8ms $uri =~ s{^[^/]*\@}{}gs; # username/passwd
# spent 20.8ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 3µs/call
213
214 # strip path and CGI params. note: bug 4213 shows that "&" should
215 # *not* be likewise stripped here -- it's permitted in hostnames by
216 # some common MUAs!
2177730119ms773039.0ms $uri =~ s{[/?].*$}{}gs;
# spent 39.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 5µs/call
218
219773078.9ms773015.2ms $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
# spent 15.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
220
221 # skip undecoded URIs if the encoded bits shouldn't be.
222 # we'll see the decoded version as well. see url_encode()
2237730128ms773015.0ms return if $uri =~ /\%(?:2[1-9a-f]|[3-6][0-9a-f]|7[0-9a-e])/;
# spent 15.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
224
225773014.9ms my $host = $uri; # unstripped/full domain name
226
227 # keep IPs intact
228773087.4ms773017.8ms if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
# spent 17.8ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
229 # get rid of hostname part of domain, understanding delegation
230773051.8ms77301.05s $uri = $self->trim_domain($uri);
# spent 1.05s making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::trim_domain, avg 135µs/call
231
232 # ignore invalid domains
233773069.0ms7730355ms return unless ($self->is_domain_valid($uri));
# spent 355ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::is_domain_valid, avg 46µs/call
234 }
235
236 # $uri is now the domain only, optionally return unstripped host name
237766885.2ms return !wantarray ? $uri : ($uri, $host);
238}
239
240122µs1;
241
 
# spent 103ms within Mail::SpamAssassin::RegistryBoundaries::CORE:match which was called 38650 times, avg 3µs/call: # 7730 times (35.3ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 195, avg 5µs/call # 7730 times (19.3ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 192, avg 2µs/call # 7730 times (17.8ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 228, avg 2µs/call # 7730 times (16.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 208, avg 2µs/call # 7730 times (15.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 223, avg 2µs/call
sub Mail::SpamAssassin::RegistryBoundaries::CORE:match; # opcode
# spent 8µs within Mail::SpamAssassin::RegistryBoundaries::CORE:qr which was called: # once (8µs+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52
sub Mail::SpamAssassin::RegistryBoundaries::CORE:qr; # opcode
# spent 4.04ms within Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp which was called: # once (4.04ms+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52
sub Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp; # opcode
# spent 203ms within Mail::SpamAssassin::RegistryBoundaries::CORE:subst which was called 54046 times, avg 4µs/call: # 7730 times (57.3ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 211, avg 7µs/call # 7730 times (39.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 217, avg 5µs/call # 7730 times (20.8ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 212, avg 3µs/call # 7730 times (16.9ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 210, avg 2µs/call # 7730 times (15.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 219, avg 2µs/call # 7698 times (38.9ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 101, avg 5µs/call # 7698 times (14.7ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 100, avg 2µs/call
sub Mail::SpamAssassin::RegistryBoundaries::CORE:subst; # opcode