← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Sun Nov 5 02:36:06 2017
Reported on Sun Nov 5 02:56:20 2017

Filename/usr/local/lib/perl5/site_perl/Mail/SpamAssassin/RegistryBoundaries.pm
StatementsExecuted 358917 statements in 2.52s
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
773041974ms2.46sMail::SpamAssassin::RegistryBoundaries::::uri_to_domainMail::SpamAssassin::RegistryBoundaries::uri_to_domain
773011715ms767msMail::SpamAssassin::RegistryBoundaries::::split_domainMail::SpamAssassin::RegistryBoundaries::split_domain
773011290ms347msMail::SpamAssassin::RegistryBoundaries::::is_domain_validMail::SpamAssassin::RegistryBoundaries::is_domain_valid
5404671205ms205msMail::SpamAssassin::RegistryBoundaries::::CORE:substMail::SpamAssassin::RegistryBoundaries::CORE:subst (opcode)
773011172ms939msMail::SpamAssassin::RegistryBoundaries::::trim_domainMail::SpamAssassin::RegistryBoundaries::trim_domain
3865051106ms106msMail::SpamAssassin::RegistryBoundaries::::CORE:matchMail::SpamAssassin::RegistryBoundaries::CORE:match (opcode)
1114.16ms4.16msMail::SpamAssassin::RegistryBoundaries::::CORE:regcompMail::SpamAssassin::RegistryBoundaries::CORE:regcomp (opcode)
111456µs4.62msMail::SpamAssassin::RegistryBoundaries::::newMail::SpamAssassin::RegistryBoundaries::new
11151µs61µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@28Mail::SpamAssassin::RegistryBoundaries::BEGIN@28
11133µs100µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@31Mail::SpamAssassin::RegistryBoundaries::BEGIN@31
11128µs60µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@29Mail::SpamAssassin::RegistryBoundaries::BEGIN@29
11126µs32µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@30Mail::SpamAssassin::RegistryBoundaries::BEGIN@30
11126µs100µsMail::SpamAssassin::RegistryBoundaries::::BEGIN@34Mail::SpamAssassin::RegistryBoundaries::BEGIN@34
1117µs7µsMail::SpamAssassin::RegistryBoundaries::::CORE:qrMail::SpamAssassin::RegistryBoundaries::CORE:qr (opcode)
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1# The (extremely complex) rules for domain delegation.
2
3# <@LICENSE>
4# Licensed to the Apache Software Foundation (ASF) under one or more
5# contributor license agreements. See the NOTICE file distributed with
6# this work for additional information regarding copyright ownership.
7# The ASF licenses this file to you under the Apache License, Version 2.0
8# (the "License"); you may not use this file except in compliance with
9# the License. You may obtain a copy of the License at:
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing, software
14# distributed under the License is distributed on an "AS IS" BASIS,
15# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16# See the License for the specific language governing permissions and
17# limitations under the License.
18# </@LICENSE>
19
20=head1 NAME
21
22Mail::SpamAssassin::RegistryBoundaries - domain delegation rules
23
24=cut
25
26package Mail::SpamAssassin::RegistryBoundaries;
27
28270µs271µs
# spent 61µs (51+10) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@28 which was called: # once (51µs+10µs) by Mail::SpamAssassin::BEGIN@78 at line 28
use strict;
# spent 61µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@28 # spent 10µs making 1 call to strict::import
29263µs293µs
# spent 60µs (28+33) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@29 which was called: # once (28µs+33µs) by Mail::SpamAssassin::BEGIN@78 at line 29
use warnings;
# spent 60µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@29 # spent 33µs making 1 call to warnings::import
30290µs237µs
# spent 32µs (26+6) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@30 which was called: # once (26µs+6µs) by Mail::SpamAssassin::BEGIN@78 at line 30
use bytes;
# spent 32µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@30 # spent 6µs making 1 call to bytes::import
312101µs2166µs
# spent 100µs (33+66) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@31 which was called: # once (33µs+66µs) by Mail::SpamAssassin::BEGIN@78 at line 31
use re 'taint';
# spent 100µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@31 # spent 66µs making 1 call to re::import
32
3318µsour @ISA = qw();
3421.69ms2174µs
# spent 100µs (26+74) within Mail::SpamAssassin::RegistryBoundaries::BEGIN@34 which was called: # once (26µs+74µs) by Mail::SpamAssassin::BEGIN@78 at line 34
use vars qw(%US_STATES);
# spent 100µs making 1 call to Mail::SpamAssassin::RegistryBoundaries::BEGIN@34 # spent 74µs making 1 call to vars::import
35
36# called from SpamAssassin->init() to create $self->{util_rb}
37
# spent 4.62ms (456µs+4.17) within Mail::SpamAssassin::RegistryBoundaries::new which was called: # once (456µs+4.17ms) by Mail::SpamAssassin::new at line 432 of Mail/SpamAssassin.pm
sub new {
3813µs my $class = shift;
3912µs $class = ref($class) || $class;
40
4112µs my ($main) = @_;
42 my $self = {
43 'main' => $main,
44 'conf' => $main->{conf},
4516µs };
4612µs bless ($self, $class);
47
48 # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc
491100µs if ($self->{conf}->{valid_tlds}) {
502301µs my $tlds = join('|', keys %{$self->{conf}->{valid_tlds}});
51 # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing
5214.20ms24.17ms $self->{valid_tlds_re} = qr/(?:$tlds)/i;
53 }
54 else {
55 # Failsafe in case no tlds defined, we don't want this to match everything..
56 $self->{valid_tlds_re} = qr/no_tlds_defined/;
57 }
58
59111µs $self;
60}
61
62# This is required because the .us domain is nuts. See split_domain.
6318µsforeach (qw/
64 ak al ar az ca co ct dc de fl ga gu hi ia id il in ks ky la ma md me mi
65 mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa pr ri sc sd tn tx ut va vi
66 vt wa wi wv wy
67 /) {
6854313µs $US_STATES{$_} = 1;
69}
70
71###########################################################################
72
73=head1 METHODS
74
75=over 4
76
77=item ($hostname, $domain) = split_domain ($fqdn)
78
79Cut a fully-qualified hostname into the hostname part and the domain
80part, splitting at the DNS registry boundary.
81
82Examples:
83
84 "www.foo.com" => ( "www", "foo.com" )
85 "www.foo.co.uk" => ( "www", "foo.co.uk" )
86
87=cut
88
89
# spent 767ms (715+51.9) within Mail::SpamAssassin::RegistryBoundaries::split_domain which was called 7730 times, avg 99µs/call: # 7730 times (715ms+51.9ms) by Mail::SpamAssassin::RegistryBoundaries::trim_domain at line 171, avg 99µs/call
sub split_domain {
90773013.2ms my $self = shift;
91773019.6ms my $domain = lc shift;
92
93773014.0ms my $hostname = '';
94
95773027.6ms if (defined $domain && $domain ne '') {
96 # www..spamassassin.org -> www.spamassassin.org
97769827.9ms $domain =~ tr/././s;
98
99 # leading/trailing dots
100769882.1ms769814.8ms $domain =~ s/^\.+//;
# spent 14.8ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
1017698120ms769837.1ms $domain =~ s/\.+$//;
# spent 37.1ms making 7698 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 5µs/call
102
103 # Split scalar domain into components
104769844.4ms my @domparts = split(/\./, $domain);
105769812.3ms my @hostname;
106
107769837.6ms while (@domparts > 1) { # go until we find the TLD
1081504466.3ms if (@domparts == 4) {
1099261.74ms if ($domparts[3] eq 'us' &&
110 (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||
111 ($domparts[0] =~ /^c[io]$/)))
112 {
113 # http://www.neustar.us/policies/docs/rfc_1480.txt
114 # "Fire-Dept.CI.Los-Angeles.CA.US"
115 # "<school-name>.PVT.K12.<state>.US"
116 last if ($US_STATES{$domparts[2]});
117 }
118 }
119 elsif (@domparts == 3) {
120 # http://www.neustar.us/policies/docs/rfc_1480.txt
121 # demon.co.uk
122 # esc.edu.ar
123 # [^\.]+\.${US_STATES}\.us
124642622.5ms if ($domparts[2] eq 'us') {
12527µs last if ($US_STATES{$domparts[1]});
126 }
127 else {
128642416.9ms my $temp = join(".", @domparts);
129642437.6ms last if ($self->{conf}->{three_level_domains}{$temp});
130 }
131 }
132 elsif (@domparts == 2) {
133 # co.uk, etc.
134767417.4ms my $temp = join(".", @domparts);
135767424.4ms last if ($self->{conf}->{two_level_domains}{$temp});
136 }
1371497471.5ms push(@hostname, shift @domparts);
138 }
139
140 # Look for a sub-delegated TLD
141 # use @domparts to skip trying to match on TLDs that can't possibly
142 # match, but keep in mind that the hostname can be blank, so 4TLD needs 4,
143 # 3TLD needs 3, 2TLD needs 2 ...
144 #
145769827.4ms unshift @domparts, pop @hostname if @hostname;
146769816.6ms $domain = join(".", @domparts);
147769825.8ms $hostname = join(".", @hostname);
148 }
149
150773093.3ms ($hostname, $domain);
151}
152
153###########################################################################
154
155=item $domain = trim_domain($fqdn)
156
157Cut a fully-qualified hostname into the hostname part and the domain
158part, returning just the domain.
159
160Examples:
161
162 "www.foo.com" => "foo.com"
163 "www.foo.co.uk" => "foo.co.uk"
164
165=cut
166
167
# spent 939ms (172+767) within Mail::SpamAssassin::RegistryBoundaries::trim_domain which was called 7730 times, avg 122µs/call: # 7730 times (172ms+767ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 230, avg 122µs/call
sub trim_domain {
168773013.8ms my $self = shift;
169773014.2ms my $domain = shift;
170
171773053.3ms7730767ms my ($host, $dom) = $self->split_domain($domain);
# spent 767ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::split_domain, avg 99µs/call
172773074.4ms return $dom;
173}
174
175###########################################################################
176
177=item $ok = is_domain_valid($dom)
178
179Return C<1> if the domain is valid, C<undef> otherwise. A valid domain
180(a) does not contain whitespace, (b) contains at least one dot, and (c)
181uses a valid TLD or ccTLD.
182
183=back
184
185=cut
186
187
# spent 347ms (290+56.7) within Mail::SpamAssassin::RegistryBoundaries::is_domain_valid which was called 7730 times, avg 45µs/call: # 7730 times (290ms+56.7ms) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 233, avg 45µs/call
sub is_domain_valid {
188773019.2ms my $self = shift;
189773019.7ms my $dom = lc shift;
190
191 # domains don't have whitespace
1927730109ms773021.1ms return 0 if ($dom =~ /\s/);
# spent 21.1ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 3µs/call
193
194 # ensure it ends in a known-valid TLD, and has at least 1 dot
1957730113ms773035.6ms return 0 unless ($dom =~ /\.([^.]+)$/);
# spent 35.6ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 5µs/call
196767425.7ms return 0 unless ($self->{conf}->{valid_tlds}{$1});
197
198766890.1ms return 1; # nah, it's ok.
199}
200
201#
202
203
# spent 2.46s (974ms+1.49) within Mail::SpamAssassin::RegistryBoundaries::uri_to_domain which was called 7730 times, avg 319µs/call: # 4467 times (533ms+847ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2276 of Mail/SpamAssassin/PerMsgStatus.pm, avg 309µs/call # 1954 times (262ms+399ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2423 of Mail/SpamAssassin/PerMsgStatus.pm, avg 338µs/call # 949 times (137ms+170ms) by Mail::SpamAssassin::PerMsgStatus::get_uri_detail_list at line 2317 of Mail/SpamAssassin/PerMsgStatus.pm, avg 323µs/call # 360 times (42.0ms+72.7ms) by Mail::SpamAssassin::PerMsgStatus::_get_parsed_uri_list at line 2412 of Mail/SpamAssassin/PerMsgStatus.pm, avg 319µs/call
sub uri_to_domain {
204773014.4ms my $self = shift;
205773024.1ms my $uri = lc shift;
206
207 # Javascript is not going to help us, so return.
2087730133ms773016.1ms return if ($uri =~ /^javascript:/);
# spent 16.1ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
209
210773099.1ms773017.2ms $uri =~ s{\#.*$}{}gs; # drop fragment
# spent 17.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
2117730147ms773058.2ms $uri =~ s{^[a-z]+:/{0,2}}{}gs; # drop the protocol
# spent 58.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 8µs/call
212773084.0ms773020.9ms $uri =~ s{^[^/]*\@}{}gs; # username/passwd
# spent 20.9ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 3µs/call
213
214 # strip path and CGI params. note: bug 4213 shows that "&" should
215 # *not* be likewise stripped here -- it's permitted in hostnames by
216 # some common MUAs!
2177730136ms773042.0ms $uri =~ s{[/?].*$}{}gs;
# spent 42.0ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 5µs/call
218
2197730112ms773015.2ms $uri =~ s{:\d*$}{}gs; # port, bug 4191: sometimes the # is missing
# spent 15.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:subst, avg 2µs/call
220
221 # skip undecoded URIs if the encoded bits shouldn't be.
222 # we'll see the decoded version as well. see url_encode()
223773088.9ms773014.9ms return if $uri =~ /\%(?:2[1-9a-f]|[3-6][0-9a-f]|7[0-9a-e])/;
# spent 14.9ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
224
225773014.6ms my $host = $uri; # unstripped/full domain name
226
227 # keep IPs intact
228773095.9ms773018.2ms if ($uri !~ /^\d+\.\d+\.\d+\.\d+$/) {
# spent 18.2ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::CORE:match, avg 2µs/call
229 # get rid of hostname part of domain, understanding delegation
230773050.8ms7730939ms $uri = $self->trim_domain($uri);
# spent 939ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::trim_domain, avg 122µs/call
231
232 # ignore invalid domains
233773061.5ms7730347ms return unless ($self->is_domain_valid($uri));
# spent 347ms making 7730 calls to Mail::SpamAssassin::RegistryBoundaries::is_domain_valid, avg 45µs/call
234 }
235
236 # $uri is now the domain only, optionally return unstripped host name
2377668102ms return !wantarray ? $uri : ($uri, $host);
238}
239
240139µs1;
241
 
# spent 106ms within Mail::SpamAssassin::RegistryBoundaries::CORE:match which was called 38650 times, avg 3µs/call: # 7730 times (35.6ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 195, avg 5µs/call # 7730 times (21.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::is_domain_valid at line 192, avg 3µs/call # 7730 times (18.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 228, avg 2µs/call # 7730 times (16.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 208, avg 2µs/call # 7730 times (14.9ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 223, avg 2µs/call
sub Mail::SpamAssassin::RegistryBoundaries::CORE:match; # opcode
# spent 7µs within Mail::SpamAssassin::RegistryBoundaries::CORE:qr which was called: # once (7µs+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52
sub Mail::SpamAssassin::RegistryBoundaries::CORE:qr; # opcode
# spent 4.16ms within Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp which was called: # once (4.16ms+0s) by Mail::SpamAssassin::RegistryBoundaries::new at line 52
sub Mail::SpamAssassin::RegistryBoundaries::CORE:regcomp; # opcode
# spent 205ms within Mail::SpamAssassin::RegistryBoundaries::CORE:subst which was called 54046 times, avg 4µs/call: # 7730 times (58.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 211, avg 8µs/call # 7730 times (42.0ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 217, avg 5µs/call # 7730 times (20.9ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 212, avg 3µs/call # 7730 times (17.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 210, avg 2µs/call # 7730 times (15.2ms+0s) by Mail::SpamAssassin::RegistryBoundaries::uri_to_domain at line 219, avg 2µs/call # 7698 times (37.1ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 101, avg 5µs/call # 7698 times (14.8ms+0s) by Mail::SpamAssassin::RegistryBoundaries::split_domain at line 100, avg 2µs/call
sub Mail::SpamAssassin::RegistryBoundaries::CORE:subst; # opcode