Profile of URI/Escape.pm

Filename	/usr/local/lib/perl5/site_perl/URI/Escape.pm
Statements	Executed 272 statements in 4.28ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	95µs	112µs	URI::Escape::::BEGIN@3URI::Escape::BEGIN@3
1	1	1	50µs	99µs	URI::Escape::::BEGIN@140URI::Escape::BEGIN@140
1	1	1	27µs	58µs	URI::Escape::::BEGIN@4URI::Escape::BEGIN@4
1	1	1	24µs	24µs	URI::Escape::::BEGIN@146URI::Escape::BEGIN@146
2	1	1	15µs	15µs	URI::Escape::::CORE:qrURI::Escape::CORE:qr (opcode)
0	0	0	0s	0s	URI::Escape::::_fail_hiURI::Escape::_fail_hi
0	0	0	0s	0s	URI::Escape::::escape_charURI::Escape::escape_char
0	0	0	0s	0s	URI::Escape::::uri_escapeURI::Escape::uri_escape
0	0	0	0s	0s	URI::Escape::::uri_escape_utf8URI::Escape::uri_escape_utf8
0	0	0	0s	0s	URI::Escape::::uri_unescapeURI::Escape::uri_unescape

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package URI::Escape;
2
3	2	64µs	2	129µs	# spent 112µs (95+17) within URI::Escape::BEGIN@3 which was called: # once (95µs+17µs) by Razor2::String::BEGIN@5 at line 3 use strict; # spent 112µs making 1 call to URI::Escape::BEGIN@3 # spent 17µs making 1 call to strict::import
4	2	312µs	2	90µs	# spent 58µs (27+31) within URI::Escape::BEGIN@4 which was called: # once (27µs+31µs) by Razor2::String::BEGIN@5 at line 4 use warnings; # spent 58µs making 1 call to URI::Escape::BEGIN@4 # spent 31µs making 1 call to warnings::import
5
6					=head1 NAME
7
8					URI::Escape - Percent-encode and percent-decode unsafe characters
9
10					=head1 SYNOPSIS
11
12					use URI::Escape;
13					$safe = uri_escape("10% is enough\n");
14					$verysafe = uri_escape("foo", "\0-\377");
15					$str = uri_unescape($safe);
16
17					=head1 DESCRIPTION
18
19					This module provides functions to percent-encode and percent-decode URI strings as
20					defined by RFC 3986. Percent-encoding URI's is informally called "URI escaping".
21					This is the terminology used by this module, which predates the formalization of the
22					terms by the RFC by several years.
23
24					A URI consists of a restricted set of characters. The restricted set
25					of characters consists of digits, letters, and a few graphic symbols
26					chosen from those common to most of the character encodings and input
27					facilities available to Internet users. They are made up of the
28					"unreserved" and "reserved" character sets as defined in RFC 3986.
29
30					unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
31					reserved = ":" / "/" / "?" / "#" / "[" / "]" / "@"
32					"!" / "$" / "&" / "'" / "(" / ")"
33					/ "*" / "+" / "," / ";" / "="
34
35					In addition, any byte (octet) can be represented in a URI by an escape
36					sequence: a triplet consisting of the character "%" followed by two
37					hexadecimal digits. A byte can also be represented directly by a
38					character, using the US-ASCII character for that octet.
39
40					Some of the characters are I<reserved> for use as delimiters or as
41					part of certain URI components. These must be escaped if they are to
42					be treated as ordinary data. Read RFC 3986 for further details.
43
44					The functions provided (and exported by default) from this module are:
45
46					=over 4
47
48					=item uri_escape( $string )
49
50					=item uri_escape( $string, $unsafe )
51
52					Replaces each unsafe character in the $string with the corresponding
53					escape sequence and returns the result. The $string argument should
54					be a string of bytes. The uri_escape() function will croak if given a
55					characters with code above 255. Use uri_escape_utf8() if you know you
56					have such chars or/and want chars in the 128 .. 255 range treated as
57					UTF-8.
58
59					The uri_escape() function takes an optional second argument that
60					overrides the set of characters that are to be escaped. The set is
61					specified as a string that can be used in a regular expression
62					character class (between [ ]). E.g.:
63
64					"\x00-\x1f\x7f-\xff" # all control and hi-bit characters
65					"a-z" # all lower case characters
66					"^A-Za-z" # everything not a letter
67
68					The default set of characters to be escaped is all those which are
69					I<not> part of the C<unreserved> character class shown above as well
70					as the reserved characters. I.e. the default is:
71
72					"^A-Za-z0-9\-\._~"
73
74					=item uri_escape_utf8( $string )
75
76					=item uri_escape_utf8( $string, $unsafe )
77
78					Works like uri_escape(), but will encode chars as UTF-8 before
79					escaping them. This makes this function able to deal with characters
80					with code above 255 in $string. Note that chars in the 128 .. 255
81					range will be escaped differently by this function compared to what
82					uri_escape() would. For chars in the 0 .. 127 range there is no
83					difference.
84
85					Equivalent to:
86
87					utf8::encode($string);
88					my $uri = uri_escape($string);
89
90					Note: JavaScript has a function called escape() that produces the
91					sequence "%uXXXX" for chars in the 256 .. 65535 range. This function
92					has really nothing to do with URI escaping but some folks got confused
93					since it "does the right thing" in the 0 .. 255 range. Because of
94					this you sometimes see "URIs" with these kind of escapes. The
95					JavaScript encodeURIComponent() function is similar to uri_escape_utf8().
96
97					=item uri_unescape($string,...)
98
99					Returns a string with each %XX sequence replaced with the actual byte
100					(octet).
101
102					This does the same as:
103
104					$string =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
105
106					but does not modify the string in-place as this RE would. Using the
107					uri_unescape() function instead of the RE might make the code look
108					cleaner and is a few characters less to type.
109
110					In a simple benchmark test I did,
111					calling the function (instead of the inline RE above) if a few chars
112					were unescaped was something like 40% slower, and something like 700% slower if none were. If
113					you are going to unescape a lot of times it might be a good idea to
114					inline the RE.
115
116					If the uri_unescape() function is passed multiple strings, then each
117					one is returned unescaped.
118
119					=back
120
121					The module can also export the C<%escapes> hash, which contains the
122					mapping from all 256 bytes to the corresponding escape codes. Lookup
123					in this hash is faster than evaluating C<sprintf("%%%02X", ord($byte))>
124					each time.
125
126					=head1 SEE ALSO
127
128					L<URI>
129
130
131					=head1 COPYRIGHT
132
133					Copyright 1995-2004 Gisle Aas.
134
135					This program is free software; you can redistribute it and/or modify
136					it under the same terms as Perl itself.
137
138					=cut
139
140	3	194µs	3	148µs	# spent 99µs (50+49) within URI::Escape::BEGIN@140 which was called: # once (50µs+49µs) by Razor2::String::BEGIN@5 at line 140 use Exporter 5.57 'import'; # spent 99µs making 1 call to URI::Escape::BEGIN@140 # spent 30µs making 1 call to Exporter::import # spent 19µs making 1 call to version::_VERSION
141					our %escapes;
142	1	4µs			our @EXPORT = qw(uri_escape uri_unescape uri_escape_utf8);
143	1	2µs			our @EXPORT_OK = qw(%escapes);
144	1	2µs			our $VERSION = "3.31";
145
146	2	1.13ms	1	24µs	# spent 24µs within URI::Escape::BEGIN@146 which was called: # once (24µs+0s) by Razor2::String::BEGIN@5 at line 146 use Carp (); # spent 24µs making 1 call to URI::Escape::BEGIN@146
147
148					# Build a char->hex map
149	1	10µs			for (0..255) {
150	256	2.47ms			$escapes{chr($_)} = sprintf("%%%02X", $_);
151					}
152
153	1	5µs			my %subst; # compiled patterns
154
155	1	54µs	2	15µs	my %Unsafe = ( # spent 15µs making 2 calls to URI::Escape::CORE:qr, avg 8µs/call
156					RFC2732 => qr/[^A-Za-z0-9\-_.!~*'()]/,
157					RFC3986 => qr/[^A-Za-z0-9\-\._~]/,
158					);
159
160					sub uri_escape {
161					my($text, $patn) = @_;
162					return undef unless defined $text;
163					if (defined $patn){
164					unless (exists $subst{$patn}) {
165					# Because we can't compile the regex we fake it with a cached sub
166					(my $tmp = $patn) =~ s,/,\\/,g;
167					eval "\$subst{\$patn} = sub {\$_[0] =~ s/([$tmp])/\$escapes{\$1} \|\| _fail_hi(\$1)/ge; }";
168					Carp::croak("uri_escape: $@") if $@;
169					}
170					&{$subst{$patn}}($text);
171					} else {
172					$text =~ s/($Unsafe{RFC3986})/$escapes{$1} \|\| _fail_hi($1)/ge;
173					}
174					$text;
175					}
176
177					sub _fail_hi {
178					my $chr = shift;
179					Carp::croak(sprintf "Can't escape \\x{%04X}, try uri_escape_utf8() instead", ord($chr));
180					}
181
182					sub uri_escape_utf8 {
183					my $text = shift;
184					return undef unless defined $text;
185					utf8::encode($text);
186					return uri_escape($text, @_);
187					}
188
189					sub uri_unescape {
190					# Note from RFC1630: "Sequences which start with a percent sign
191					# but are not followed by two hexadecimal characters are reserved
192					# for future extension"
193					my $str = shift;
194					if (@_ && wantarray) {
195					# not executed for the common case of a single argument
196					my @str = ($str, @_); # need to copy
197					for (@str) {
198					s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
199					}
200					return @str;
201					}
202					$str =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg if defined $str;
203					$str;
204					}
205
206					# XXX FIXME escape_char is buggy as it assigns meaning to the string's storage format.
207					sub escape_char {
208					# Old versions of utf8::is_utf8() didn't properly handle magical vars (e.g. $1).
209					# The following forces a fetch to occur beforehand.
210					my $dummy = substr($_[0], 0, 0);
211
212					if (utf8::is_utf8($_[0])) {
213					my $s = shift;
214					utf8::encode($s);
215					unshift(@_, $s);
216					}
217
218					return join '', @URI::Escape::escapes{split //, $_[0]};
219					}
220
221	1	25µs			1;

					# spent 15µs within URI::Escape::CORE:qr which was called 2 times, avg 8µs/call: # 2 times (15µs+0s) by Razor2::String::BEGIN@5 at line 155, avg 8µs/call sub URI::Escape::CORE:qr; # opcode