Profile of Pod/Simple/BlackBox.pm

Filename	/usr/local/lib/perl5/5.24/Pod/Simple/BlackBox.pm
Statements	Executed 23 statements in 22.6ms

Subroutines
Calls	P	F	Exclusive Time	Inclusive Time	Subroutine
1	1	1	46µs	52µs	Pod::Simple::BlackBox::::BEGIN@21Pod::Simple::BlackBox::BEGIN@21
1	1	1	31µs	40µs	Pod::Simple::BlackBox::::BEGIN@1282Pod::Simple::BlackBox::BEGIN@1282
1	1	1	27µs	33µs	Pod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
1	1	1	20µs	98µs	Pod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
1	1	1	12µs	12µs	Pod::Simple::BlackBox::::BEGIN@27Pod::Simple::BlackBox::BEGIN@27
1	1	1	10µs	10µs	Pod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
1	1	1	7µs	7µs	Pod::Simple::BlackBox::::CORE:qrPod::Simple::BlackBox::CORE:qr (opcode)
0	0	0	0s	0s	Pod::Simple::BlackBox::_closers_for_all_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::_dump_curr_open
0	0	0	0s	0s	Pod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_line
0	0	0	0s	0s	Pod::Simple::BlackBox::_handle_encoding_second_level
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_Verbatim
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_doc_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0	0	0	0s	0s	Pod::Simple::BlackBox::_ponder_paragraph_buffer
0	0	0	0s	0s	Pod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0	0	0	0s	0s	Pod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::_traverse_treelet_bit
0	0	0	0s	0s	Pod::Simple::BlackBox::_treelet_from_formatting_codes
0	0	0	0s	0s	Pod::Simple::BlackBox::_verbatim_format
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0	0	0	0s	0s	Pod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0	0	0	0s	0s	Pod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0	0	0	0s	0s	Pod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0	0	0	0s	0s	Pod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0	0	0	0s	0s	Pod::Simple::BlackBox::text_content_of_treelet

Call graph for these subroutines as a Graphviz dot language file.

Line	State ments	Time on line	Calls	Time in subs	Code
1					package Pod::Simple::BlackBox;
2					#
3					# "What's in the box?" "Pain."
4					#
5					###########################################################################
6					#
7					# This is where all the scary things happen: parsing lines into
8					# paragraphs; and then into directives, verbatims, and then also
9					# turning formatting sequences into treelets.
10					#
11					# Are you really sure you want to read this code?
12					#
13					#-----------------------------------------------------------------------------
14					#
15					# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
16					# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
17					# to call the proper callbacks on the treelets.
18					#
19					# Every node in a treelet is a ['name', {attrhash}, ...children...]
20
21	2	67µs	2	57µs	# spent 52µs (46+5) within Pod::Simple::BlackBox::BEGIN@21 which was called: # once (46µs+5µs) by Pod::Simple::LinkSection::BEGIN@9 at line 21 use integer; # vroom! # spent 52µs making 1 call to Pod::Simple::BlackBox::BEGIN@21 # spent 5µs making 1 call to integer::import
22	2	60µs	2	39µs	# spent 33µs (27+6) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (27µs+6µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22 use strict; # spent 33µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 6µs making 1 call to strict::import
23	2	82µs	1	10µs	# spent 10µs within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (10µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 23 use Carp (); # spent 10µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
24	2	138µs	2	175µs	# spent 98µs (20+78) within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (20µs+78µs) by Pod::Simple::LinkSection::BEGIN@9 at line 24 use vars qw($VERSION ); # spent 98µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 # spent 78µs making 1 call to vars::import
25	1	2µs			$VERSION = '3.32';
26					#use constant DEBUG => 7;
27					# spent 12µs within Pod::Simple::BlackBox::BEGIN@27 which was called: # once (12µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 30 BEGIN {
28	1	2µs			require Pod::Simple;
29	1	10µs			*DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
30	1	12.8ms	1	12µs	} # spent 12µs making 1 call to Pod::Simple::BlackBox::BEGIN@27
31
32					# Matches a character iff the character will have a different meaning
33					# if we choose CP1252 vs UTF-8 if there is no =encoding line.
34					# This is broken for early Perls on non-ASCII platforms.
35	1	54µs			my $non_ascii_re = eval "qr/[[:^ascii:]]/"; # spent 29µs executing statements in string eval
36	1	2µs			$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re;
37
38	1	1µs			my $utf8_bom;
39	1	24µs			if (($] ge 5.007_003)) {
40	1	2µs			$utf8_bom = "\x{FEFF}";
41	1	26µs	1	8µs	utf8::encode($utf8_bom); # spent 8µs making 1 call to utf8::encode
42					} else {
43					$utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls.
44					}
45
46					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
47
48					sub parse_line { shift->parse_lines(@_) } # alias
49
50					# - - - Turn back now! Run away! - - -
51
52					sub parse_lines { # Usage: $parser->parse_lines(@lines)
53					# an undef means end-of-stream
54					my $self = shift;
55
56					my $code_handler = $self->{'code_handler'};
57					my $cut_handler = $self->{'cut_handler'};
58					my $wl_handler = $self->{'whiteline_handler'};
59					$self->{'line_count'} \|\|= 0;
60
61					my $scratch;
62
63					DEBUG > 4 and
64					print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
65
66					DEBUG > 5 and
67					print STDERR "# About to parse lines: ",
68					join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
69
70					my $paras = ($self->{'paras'} \|\|= []);
71					# paragraph buffer. Because we need to defer processing of =over
72					# directives and verbatim paragraphs. We call _ponder_paragraph_buffer
73					# to process this.
74
75					$self->{'pod_para_count'} \|\|= 0;
76
77					my $line;
78					foreach my $source_line (@_) {
79					if( $self->{'source_dead'} ) {
80					DEBUG > 4 and print STDERR "# Source is dead.\n";
81					last;
82					}
83
84					unless( defined $source_line ) {
85					DEBUG > 4 and print STDERR "# Undef-line seen.\n";
86
87					push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
88					push @$paras, $paras->[-1], $paras->[-1];
89					# So that it definitely fills the buffer.
90					$self->{'source_dead'} = 1;
91					$self->_ponder_paragraph_buffer;
92					next;
93					}
94
95
96					if( $self->{'line_count'}++ ) {
97					($line = $source_line) =~ tr/\n\r//d;
98					# If we don't have two vars, we'll end up with that there
99					# tr/// modding the (potentially read-only) original source line!
100
101					} else {
102					DEBUG > 2 and print STDERR "First line: [$source_line]\n";
103
104					if( ($line = $source_line) =~ s/^$utf8_bom//s ) {
105					DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
106					$self->_handle_encoding_line( "=encoding utf8" );
107					delete $self->{'_processed_encoding'};
108					$line =~ tr/\n\r//d;
109
110					} elsif( $line =~ s/^\xFE\xFF//s ) {
111					DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
112					$self->scream(
113					$self->{'line_count'},
114					"UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
115					);
116					splice @_;
117					push @_, undef;
118					next;
119
120					# TODO: implement somehow?
121
122					} elsif( $line =~ s/^\xFF\xFE//s ) {
123					DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
124					$self->scream(
125					$self->{'line_count'},
126					"UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
127					);
128					splice @_;
129					push @_, undef;
130					next;
131
132					# TODO: implement somehow?
133
134					} else {
135					DEBUG > 2 and print STDERR "First line is BOM-less.\n";
136					($line = $source_line) =~ tr/\n\r//d;
137					}
138					}
139
140					if(!$self->{'parse_characters'} && !$self->{'encoding'}
141					&& ($self->{'in_pod'} \|\| $line =~ /^=/s)
142					&& $line =~ /$non_ascii_re/
143					) {
144
145					my $encoding;
146
147					# No =encoding line, and we are at the first line in the input that
148					# contains a non-ascii byte, that is one whose meaning varies depending
149					# on whether the file is encoded in UTF-8 or CP1252, which are the two
150					# possibilities permitted by the pod spec. (ASCII is assumed if the
151					# file only contains ASCII bytes.) In order to process this line, we
152					# need to figure out what encoding we will use for the file.
153					#
154					# Strictly speaking ISO 8859-1 (Latin 1) refers to the code points
155					# 160-255, but it is used here, as it often colloquially is, to refer to
156					# the complete set of code points 0-255, including ASCII (0-127), the C1
157					# controls (128-159), and strict Latin 1 (160-255).
158					#
159					# CP1252 is effectively a superset of Latin 1, because it differs only
160					# from colloquial 8859-1 in the C1 controls, which are very unlikely to
161					# actually be present in 8859-1 files, so can be used for other purposes
162					# without conflict. CP 1252 uses most of them for graphic characters.
163					#
164					# Note that all ASCII-range bytes represent their corresponding code
165					# points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code
166					# points require multiple (non-ASCII) bytes to represent. (A separate
167					# paragraph for EBCDIC is below.) The multi-byte representation is
168					# quite structured. If we find an isolated byte that requires multiple
169					# bytes to represent in UTF-8, we know that the encoding is not UTF-8.
170					# If we find a sequence of bytes that violates the UTF-8 structure, we
171					# also can presume the encoding isn't UTF-8, and hence must be 1252.
172					#
173					# But there are ambiguous cases where we could guess wrong. If so, the
174					# user will end up having to supply an =encoding line. We use all
175					# readily available information to improve our chances of guessing
176					# right. The odds of something not being UTF-8, but still passing a
177					# UTF-8 validity test go down very rapidly with increasing length of the
178					# sequence. Therefore we look at all the maximal length non-ascii
179					# sequences on the line. If any of the sequences can't be UTF-8, we
180					# quit there and choose CP1252. If all could be UTF-8, we guess UTF-8.
181					#
182					# On EBCDIC platforms, the situation is somewhat different. In
183					# UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
184					# but so do the bytes that are for the C1 controls. Recall that these
185					# correspond to the unused portion of 8859-1 that 1252 mostly takes
186					# over. That means that there are fewer code points that are
187					# represented by multi-bytes. But, note that the these controls are
188					# very unlikely to be in pod text. So if we encounter one of them, it
189					# means that it is quite likely CP1252 and not UTF-8. The net result is
190					# the same code below is used for both platforms.
191					while ($line =~ m/($non_ascii_re+)/g) {
192					my $non_ascii_seq = $1;
193
194					if (length $non_ascii_seq == 1) {
195					$encoding = 'CP1252';
196					goto guessed;
197					} elsif ($] ge 5.007_003) {
198
199					# On Perls that have this function, we can see if the sequence is
200					# valid UTF-8 or not.
201					if (! utf8::decode($non_ascii_seq)) {
202					$encoding = 'CP1252';
203					goto guessed;
204					}
205					} elsif (ord("A") == 65) { # An early Perl, ASCII platform
206
207					# Without utf8::decode, it's a lot harder to do a rigorous check
208					# (though some early releases had a different function that
209					# accomplished the same thing). Since these are ancient Perls, not
210					# likely to be in use today, we take the easy way out, and look at
211					# just the first two bytes of the sequence to see if they are the
212					# start of a UTF-8 character. In ASCII UTF-8, continuation bytes
213					# must be between 0x80 and 0xBF. Start bytes can range from 0xC2
214					# through 0xFF, but anything above 0xF4 is not Unicode, and hence
215					# extremely unlikely to be in a pod.
216					if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) {
217					$encoding = 'CP1252';
218					goto guessed;
219					}
220
221					# We don't bother doing anything special for EBCDIC on early Perls.
222					# If there is a solitary variant, CP1252 will be chosen; otherwise
223					# UTF-8.
224					}
225					} # End of loop through all variant sequences on the line
226
227					# All sequences in the line could be UTF-8. Guess that.
228					$encoding = 'UTF-8';
229
230					guessed:
231					$self->_handle_encoding_line( "=encoding $encoding" );
232					delete $self->{'_processed_encoding'};
233					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
234
235					my ($word) = $line =~ /(\S$non_ascii_re\S)/;
236
237					$self->whine(
238					$self->{'line_count'},
239					"Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
240					);
241					}
242
243					DEBUG > 5 and print STDERR "# Parsing line: [$line]\n";
244
245					if(!$self->{'in_pod'}) {
246					if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s\|$)/s) {
247					if($1 eq 'cut') {
248					$self->scream(
249					$self->{'line_count'},
250					"=cut found outside a pod block. Skipping to next block."
251					);
252
253					## Before there were errata sections in the world, it was
254					## least-pessimal to abort processing the file. But now we can
255					## just barrel on thru (but still not start a pod block).
256					#splice @_;
257					#push @_, undef;
258
259					next;
260					} else {
261					$self->{'in_pod'} = $self->{'start_of_pod_block'}
262					= $self->{'last_was_blank'} = 1;
263					# And fall thru to the pod-mode block further down
264					}
265					} else {
266					DEBUG > 5 and print STDERR "# It's a code-line.\n";
267					$code_handler->(map $_, $line, $self->{'line_count'}, $self)
268					if $code_handler;
269					# Note: this may cause code to be processed out of order relative
270					# to pods, but in order relative to cuts.
271
272					# Note also that we haven't yet applied the transcoding to $line
273					# by time we call $code_handler!
274
275					if( $line =~ m/^#\sline\s+(\d+)\s(?:\s"([^"]+)")?\s*$/ ) {
276					# That RE is from perlsyn, section "Plain Old Comments (Not!)",
277					#$fname = $2 if defined $2;
278					#DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n";
279					DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
280					$self->{'line_count'} = $1 - 1;
281					}
282
283					next;
284					}
285					}
286
287					# . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
288					# Else we're in pod mode:
289
290					# Apply any necessary transcoding:
291					$self->{'_transcoder'} && $self->{'_transcoder'}->($line);
292
293					# HERE WE CATCH =encoding EARLY!
294					if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
295					next if $self->parse_characters; # Ignore this line
296					$line = $self->_handle_encoding_line( $line );
297					}
298
299					if($line =~ m/^=cut/s) {
300					# here ends the pod block, and therefore the previous pod para
301					DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n";
302					$self->{'in_pod'} = 0;
303					# ++$self->{'pod_para_count'};
304					$self->_ponder_paragraph_buffer();
305					# by now it's safe to consider the previous paragraph as done.
306					$cut_handler->(map $_, $line, $self->{'line_count'}, $self)
307					if $cut_handler;
308
309					# TODO: add to docs: Note: this may cause cuts to be processed out
310					# of order relative to pods, but in order relative to code.
311
312					} elsif($line =~ m/^(\s*)$/s) { # it's a blank line
313					if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
314					$wl_handler->(map $_, $line, $self->{'line_count'}, $self)
315					if $wl_handler;
316					}
317
318					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
319					DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
320					push @{$paras->[-1]}, $line;
321					} # otherwise it's not interesting
322
323					if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
324					DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
325					}
326
327					$self->{'last_was_blank'} = 1;
328
329					} elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
330
331					if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9])(?:\s+\|$)(.)/s) {
332					# THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
333					my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
334					# Note that in "=head1 foo", the WS is lost.
335					# Example: ['=head1', {'start_line' => 123}, ' foo']
336
337					++$self->{'pod_para_count'};
338
339					$self->_ponder_paragraph_buffer();
340					# by now it's safe to consider the previous paragraph as done.
341
342					push @$paras, $new; # the new incipient paragraph
343					DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
344
345					} elsif($line =~ m/^\s/s) {
346
347					if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
348					DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n";
349					push @{$paras->[-1]}, $line;
350					} else {
351					++$self->{'pod_para_count'};
352					$self->_ponder_paragraph_buffer();
353					# by now it's safe to consider the previous paragraph as done.
354					DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n";
355					push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
356					}
357					} else {
358					++$self->{'pod_para_count'};
359					$self->_ponder_paragraph_buffer();
360					# by now it's safe to consider the previous paragraph as done.
361					push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
362					DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n";
363					}
364					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
365
366					} else {
367					# It's a non-blank line /continuing/ the current para
368					if(@$paras) {
369					DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n";
370					push @{$paras->[-1]}, $line;
371					} else {
372					# Unexpected case!
373					die "Continuing a paragraph but \@\$paras is empty?";
374					}
375					$self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
376					}
377
378					} # ends the big while loop
379
380					DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
381					return $self;
382					}
383
384					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
385
386					sub _handle_encoding_line {
387					my($self, $line) = @_;
388
389					return if $self->parse_characters;
390
391					# The point of this routine is to set $self->{'_transcoder'} as indicated.
392
393					return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
394					DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n";
395
396					my $e = $1;
397					my $orig = $e;
398					push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
399
400					my $enc_error;
401
402					# Cf. perldoc Encode and perldoc Encode::Supported
403
404					require Pod::Simple::Transcode;
405
406					if( $self->{'encoding'} ) {
407					my $norm_current = $self->{'encoding'};
408					my $norm_e = $e;
409					foreach my $that ($norm_current, $norm_e) {
410					$that = lc($that);
411					$that =~ s/[-_]//g;
412					}
413					if($norm_current eq $norm_e) {
414					DEBUG > 1 and print STDERR "The '=encoding $orig' line is ",
415					"redundant. ($norm_current eq $norm_e). Ignoring.\n";
416					$enc_error = '';
417					# But that doesn't necessarily mean that the earlier one went okay
418					} else {
419					$enc_error = "Encoding is already set to " . $self->{'encoding'};
420					DEBUG > 1 and print STDERR $enc_error;
421					}
422					} elsif (
423					# OK, let's turn on the encoding
424					do {
425					DEBUG > 1 and print STDERR " Setting encoding to $e\n";
426					$self->{'encoding'} = $e;
427					1;
428					}
429					and $e eq 'HACKRAW'
430					) {
431					DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n";
432
433					} elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
434
435					die($enc_error = "WHAT? _transcoder is already set?!")
436					if $self->{'_transcoder'}; # should never happen
437					require Pod::Simple::Transcode;
438					$self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
439					eval {
440					my @x = ('', "abc", "123");
441					$self->{'_transcoder'}->(@x);
442					};
443					$@ && die( $enc_error =
444					"Really unexpected error setting up encoding $e: $@\nAborting"
445					);
446					$self->{'detected_encoding'} = $e;
447
448					} else {
449					my @supported = Pod::Simple::Transcode::->all_encodings;
450
451					# Note unsupported, and complain
452					DEBUG and print STDERR " Encoding [$e] is unsupported.",
453					"\nSupporteds: @supported\n";
454					my $suggestion = '';
455
456					# Look for a near match:
457					my $norm = lc($e);
458					$norm =~ tr[-_][]d;
459					my $n;
460					foreach my $enc (@supported) {
461					$n = lc($enc);
462					$n =~ tr[-_][]d;
463					next unless $n eq $norm;
464					$suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
465					last;
466					}
467					my $encmodver = Pod::Simple::Transcode::->encmodver;
468					$enc_error = join '' =>
469					"This document probably does not appear as it should, because its ",
470					"\"=encoding $e\" line calls for an unsupported encoding.",
471					$suggestion, " [$encmodver\'s supported encodings are: @supported]"
472					;
473
474					$self->scream( $self->{'line_count'}, $enc_error );
475					}
476					push @{ $self->{'encoding_command_statuses'} }, $enc_error;
477					if (defined($self->{'_processed_encoding'})) {
478					# Double declaration.
479					$self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
480					}
481					$self->{'_processed_encoding'} = $orig;
482
483					return $line;
484					}
485
486					# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
487
488					sub _handle_encoding_second_level {
489					# By time this is called, the encoding (if well formed) will already
490					# have been acted one.
491					my($self, $para) = @_;
492					my @x = @$para;
493					my $content = join ' ', splice @x, 2;
494					$content =~ s/^\s+//s;
495					$content =~ s/\s+$//s;
496
497					DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
498
499					if (defined($self->{'_processed_encoding'})) {
500					#if($content ne $self->{'_processed_encoding'}) {
501					# Could it happen?
502					#}
503					delete $self->{'_processed_encoding'};
504					# It's already been handled. Check for errors.
505					if(! $self->{'encoding_command_statuses'} ) {
506					DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n";
507					} elsif( $self->{'encoding_command_statuses'}[-1] ) {
508					$self->whine( $para->[1]{'start_line'},
509					sprintf "Couldn't do %s: %s",
510					$self->{'encoding_command_reqs' }[-1],
511					$self->{'encoding_command_statuses'}[-1],
512					);
513					} else {
514					DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
515					}
516
517					} else {
518					# Otherwise it's a syntax error
519					$self->whine( $para->[1]{'start_line'},
520					"Invalid =encoding syntax: $content"
521					);
522					}
523
524					return;
525					}
526
527					#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
528
529					{
530	1	2µs			my $m = -321; # magic line number
531
532					sub _gen_errata {
533					my $self = $_[0];
534					# Return 0 or more fake-o paragraphs explaining the accumulated
535					# errors on this document.
536
537					return() unless $self->{'errata'} and keys %{$self->{'errata'}};
538
539					my @out;
540
541					foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
542					push @out,
543					['=item', {'start_line' => $m}, "Around line $line:"],
544					map( ['~Para', {'start_line' => $m, '~cooked' => 1},
545					#['~Top', {'start_line' => $m},
546					$_
547					#]
548					],
549					@{$self->{'errata'}{$line}}
550					)
551					;
552					}
553
554					# TODO: report of unknown entities? unrenderable characters?
555
556					unshift @out,
557					['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
558					['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
559					"Hey! ",
560					['B', {},
561					'The above document had some coding errors, which are explained below:'
562					]
563					],
564					['=over', {'start_line' => $m, 'errata' => 1}, ''],
565					;
566
567					push @out,
568					['=back', {'start_line' => $m, 'errata' => 1}, ''],
569					;
570
571					DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n";
572
573					return @out;
574					}
575
576					}
577
578					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
579
580					##############################################################################
581					##
582					## stop reading now stop reading now stop reading now stop reading now stop
583					##
584					## HERE IT BECOMES REALLY SCARY
585					##
586					## stop reading now stop reading now stop reading now stop reading now stop
587					##
588					##############################################################################
589
590	1	4µs			sub _ponder_paragraph_buffer {
591
592					# Para-token types as found in the buffer.
593					# ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
594					# =over, =back, =item
595					# and the null =pod (to be complained about if over one line)
596					#
597					# "~data" paragraphs are something we generate at this level, depending on
598					# a currently open =over region
599
600					# Events fired: Begin and end for:
601					# directivename (like head1 .. head4), item, extend,
602					# for (from =begin...=end, =for),
603					# over-bullet, over-number, over-text, over-block,
604					# item-bullet, item-number, item-text,
605					# Document,
606					# Data, Para, Verbatim
607					# B, C, longdirname (TODO -- wha?), etc. for all directives
608					#
609
610					my $self = $_[0];
611					my $paras;
612					return unless @{$paras = $self->{'paras'}};
613					my $curr_open = ($self->{'curr_open'} \|\|= []);
614
615					my $scratch;
616
617					DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n";
618
619					# We have something in our buffer. So apparently the document has started.
620					unless($self->{'doc_has_started'}) {
621					$self->{'doc_has_started'} = 1;
622
623					my $starting_contentless;
624					$starting_contentless =
625					(
626					!@$curr_open
627					and @$paras and ! grep $_->[0] ne '~end', @$paras
628					# i.e., if the paras is all ~ends
629					)
630					;
631					DEBUG and print STDERR "# Starting ",
632					$starting_contentless ? 'contentless' : 'contentful',
633					" document\n"
634					;
635
636					$self->_handle_element_start(
637					($scratch = 'Document'),
638					{
639					'start_line' => $paras->[0][1]{'start_line'},
640					$starting_contentless ? ( 'contentless' => 1 ) : (),
641					},
642					);
643					}
644
645					my($para, $para_type);
646					while(@$paras) {
647					last if @$paras == 1 and
648					( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
649					or $paras->[0][0] eq '=item' )
650					;
651					# Those're the three kinds of paragraphs that require lookahead.
652					# Actually, an "=item Foo" inside an <over type=text> region
653					# and any =item inside an <over type=block> region (rare)
654					# don't require any lookahead, but all others (bullets
655					# and numbers) do.
656
657					# TODO: whinge about many kinds of directives in non-resolving =for regions?
658					# TODO: many? like what? =head1 etc?
659
660					$para = shift @$paras;
661					$para_type = $para->[0];
662
663					DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
664					$self->_dump_curr_open(), ")\n";
665
666					if($para_type eq '=for') {
667					next if $self->_ponder_for($para,$curr_open,$paras);
668
669					} elsif($para_type eq '=begin') {
670					next if $self->_ponder_begin($para,$curr_open,$paras);
671
672					} elsif($para_type eq '=end') {
673					next if $self->_ponder_end($para,$curr_open,$paras);
674
675					} elsif($para_type eq '~end') { # The virtual end-document signal
676					next if $self->_ponder_doc_end($para,$curr_open,$paras);
677					}
678
679
680					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
681					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
682					if(grep $_->[1]{'~ignore'}, @$curr_open) {
683					DEBUG > 1 and
684					print STDERR "Skipping $para_type paragraph because in ignore mode.\n";
685					next;
686					}
687					#~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
688					# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
689
690					if($para_type eq '=pod') {
691					$self->_ponder_pod($para,$curr_open,$paras);
692
693					} elsif($para_type eq '=over') {
694					next if $self->_ponder_over($para,$curr_open,$paras);
695
696					} elsif($para_type eq '=back') {
697					next if $self->_ponder_back($para,$curr_open,$paras);
698
699					} else {
700
701					# All non-magical codes!!!
702
703					# Here we start using $para_type for our own twisted purposes, to
704					# mean how it should get treated, not as what the element name
705					# should be.
706
707					DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n";
708
709					my $i;
710
711					# Enforce some =headN discipline
712					if($para_type =~ m/^=head\d$/s
713					and ! $self->{'accept_heads_anywhere'}
714					and @$curr_open
715					and $curr_open->[-1][0] eq '=over'
716					) {
717					DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n";
718					$self->whine(
719					$para->[1]{'start_line'},
720					"You forgot a '=back' before '$para_type'"
721					);
722					unshift @$paras, ['=back', {}, ''], $para; # close the =over
723					next;
724					}
725
726
727					if($para_type eq '=item') {
728
729					my $over;
730					unless(@$curr_open and
731					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
732					$self->whine(
733					$para->[1]{'start_line'},
734					"'=item' outside of any '=over'"
735					);
736					unshift @$paras,
737					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
738					$para
739					;
740					next;
741					}
742
743
744					my $over_type = $over->[1]{'~type'};
745
746					if(!$over_type) {
747					# Shouldn't happen1
748					die "Typeless over in stack, starting at line "
749					. $over->[1]{'start_line'};
750
751					} elsif($over_type eq 'block') {
752					unless($curr_open->[-1][1]{'~bitched_about'}) {
753					$curr_open->[-1][1]{'~bitched_about'} = 1;
754					$self->whine(
755					$curr_open->[-1][1]{'start_line'},
756					"You can't have =items (as at line "
757					. $para->[1]{'start_line'}
758					. ") unless the first thing after the =over is an =item"
759					);
760					}
761					# Just turn it into a paragraph and reconsider it
762					$para->[0] = '~Para';
763					unshift @$paras, $para;
764					next;
765
766					} elsif($over_type eq 'text') {
767					my $item_type = $self->_get_item_type($para);
768					# That kills the content of the item if it's a number or bullet.
769					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
770
771					if($item_type eq 'text') {
772					# Nothing special needs doing for 'text'
773					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
774					$self->whine(
775					$para->[1]{'start_line'},
776					"Expected text after =item, not a $item_type"
777					);
778					# Undo our clobbering:
779					push @$para, $para->[1]{'~orig_content'};
780					delete $para->[1]{'number'};
781					# Only a PROPER item-number element is allowed
782					# to have a number attribute.
783					} else {
784					die "Unhandled item type $item_type"; # should never happen
785					}
786
787					# =item-text thingies don't need any assimilation, it seems.
788
789					} elsif($over_type eq 'number') {
790					my $item_type = $self->_get_item_type($para);
791					# That kills the content of the item if it's a number or bullet.
792					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
793
794					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
795
796					if($item_type eq 'bullet') {
797					# Hm, it's not numeric. Correct for this.
798					$para->[1]{'number'} = $expected_value;
799					$self->whine(
800					$para->[1]{'start_line'},
801					"Expected '=item $expected_value'"
802					);
803					push @$para, $para->[1]{'~orig_content'};
804					# restore the bullet, blocking the assimilation of next para
805
806					} elsif($item_type eq 'text') {
807					# Hm, it's not numeric. Correct for this.
808					$para->[1]{'number'} = $expected_value;
809					$self->whine(
810					$para->[1]{'start_line'},
811					"Expected '=item $expected_value'"
812					);
813					# Text content will still be there and will block next ~Para
814
815					} elsif($item_type ne 'number') {
816					die "Unknown item type $item_type"; # should never happen
817
818					} elsif($expected_value == $para->[1]{'number'}) {
819					DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
820
821					} else {
822					DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
823					" instead of the expected value of $expected_value\n";
824					$self->whine(
825					$para->[1]{'start_line'},
826					"You have '=item " . $para->[1]{'number'} .
827					"' instead of the expected '=item $expected_value'"
828					);
829					$para->[1]{'number'} = $expected_value; # correcting!!
830					}
831
832					if(@$para == 2) {
833					# For the cases where we /didn't/ push to @$para
834					if($paras->[0][0] eq '~Para') {
835					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
836					push @$para, splice @{shift @$paras},2;
837					} else {
838					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
839					push @$para, ''; # Just so it's not contentless
840					}
841					}
842
843
844					} elsif($over_type eq 'bullet') {
845					my $item_type = $self->_get_item_type($para);
846					# That kills the content of the item if it's a number or bullet.
847					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
848
849					if($item_type eq 'bullet') {
850					# as expected!
851
852					if( $para->[1]{'~_freaky_para_hack'} ) {
853					DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
854					push @$para, delete $para->[1]{'~_freaky_para_hack'};
855					}
856
857					} elsif($item_type eq 'number') {
858					$self->whine(
859					$para->[1]{'start_line'},
860					"Expected '=item *'"
861					);
862					push @$para, $para->[1]{'~orig_content'};
863					# and block assimilation of the next paragraph
864					delete $para->[1]{'number'};
865					# Only a PROPER item-number element is allowed
866					# to have a number attribute.
867					} elsif($item_type eq 'text') {
868					$self->whine(
869					$para->[1]{'start_line'},
870					"Expected '=item *'"
871					);
872					# But doesn't need processing. But it'll block assimilation
873					# of the next para.
874					} else {
875					die "Unhandled item type $item_type"; # should never happen
876					}
877
878					if(@$para == 2) {
879					# For the cases where we /didn't/ push to @$para
880					if($paras->[0][0] eq '~Para') {
881					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
882					push @$para, splice @{shift @$paras},2;
883					} else {
884					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
885					push @$para, ''; # Just so it's not contentless
886					}
887					}
888
889					} else {
890					die "Unhandled =over type \"$over_type\"?";
891					# Shouldn't happen!
892					}
893
894					$para_type = 'Plain';
895					$para->[0] .= '-' . $over_type;
896					# Whew. Now fall thru and process it.
897
898
899					} elsif($para_type eq '=extend') {
900					# Well, might as well implement it here.
901					$self->_ponder_extend($para);
902					next; # and skip
903					} elsif($para_type eq '=encoding') {
904					# Not actually acted on here, but we catch errors here.
905					$self->_handle_encoding_second_level($para);
906					next unless $self->keep_encoding_directive;
907					$para_type = 'Plain';
908					} elsif($para_type eq '~Verbatim') {
909					$para->[0] = 'Verbatim';
910					$para_type = '?Verbatim';
911					} elsif($para_type eq '~Para') {
912					$para->[0] = 'Para';
913					$para_type = '?Plain';
914					} elsif($para_type eq 'Data') {
915					$para->[0] = 'Data';
916					$para_type = '?Data';
917					} elsif( $para_type =~ s/^=//s
918					and defined( $para_type = $self->{'accept_directives'}{$para_type} )
919					) {
920					DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n";
921					} else {
922					# An unknown directive!
923					DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n",
924					$para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
925					;
926					$self->whine(
927					$para->[1]{'start_line'},
928					"Unknown directive: $para->[0]"
929					);
930
931					# And maybe treat it as text instead of just letting it go?
932					next;
933					}
934
935					if($para_type =~ s/^\?//s) {
936					if(! @$curr_open) { # usual case
937					DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n";
938					} else {
939					my @fors = grep $_->[0] eq '=for', @$curr_open;
940					DEBUG > 1 and print STDERR "Containing fors: ",
941					join(',', map $_->[1]{'target'}, @fors), "\n";
942
943					if(! @fors) {
944					DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
945
946					#} elsif(grep $_->[1]{'~resolve'}, @fors) {
947					#} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
948					} elsif( $fors[-1][1]{'~resolve'} ) {
949					# Look to the immediately containing for
950
951					if($para_type eq 'Data') {
952					DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
953					$para->[0] = 'Para';
954					$para_type = 'Plain';
955					} else {
956					DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
957					}
958					} else {
959					DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
960					$para->[0] = $para_type = 'Data';
961					}
962					}
963					}
964
965					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
966					if($para_type eq 'Plain') {
967					$self->_ponder_Plain($para);
968					} elsif($para_type eq 'Verbatim') {
969					$self->_ponder_Verbatim($para);
970					} elsif($para_type eq 'Data') {
971					$self->_ponder_Data($para);
972					} else {
973					die "\$para type is $para_type -- how did that happen?";
974					# Shouldn't happen.
975					}
976
977					#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
978					$para->[0] =~ s/^[~=]//s;
979
980					DEBUG and print STDERR "\n", pretty($para), "\n";
981
982					# traverse the treelet (which might well be just one string scalar)
983					$self->{'content_seen'} \|\|= 1;
984					$self->_traverse_treelet_bit(@$para);
985					}
986					}
987
988					return;
989					}
990
991					###########################################################################
992					# The sub-ponderers...
993
- -
996					sub _ponder_for {
997					my ($self,$para,$curr_open,$paras) = @_;
998
999					# Fake it out as a begin/end
1000					my $target;
1001
1002					if(grep $_->[1]{'~ignore'}, @$curr_open) {
1003					DEBUG > 1 and print STDERR "Ignoring ignorable =for\n";
1004					return 1;
1005					}
1006
1007					for(my $i = 2; $i < @$para; ++$i) {
1008					if($para->[$i] =~ s/^\s(\S+)\s//s) {
1009					$target = $1;
1010					last;
1011					}
1012					}
1013					unless(defined $target) {
1014					$self->whine(
1015					$para->[1]{'start_line'},
1016					"=for without a target?"
1017					);
1018					return 1;
1019					}
1020					DEBUG > 1 and
1021					print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
1022
1023					$para->[0] = 'Data';
1024
1025					unshift @$paras,
1026					['=begin',
1027					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1028					$target,
1029					],
1030					$para,
1031					['=end',
1032					{'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1033					$target,
1034					],
1035					;
1036
1037					return 1;
1038					}
1039
1040					sub _ponder_begin {
1041					my ($self,$para,$curr_open,$paras) = @_;
1042					my $content = join ' ', splice @$para, 2;
1043					$content =~ s/^\s+//s;
1044					$content =~ s/\s+$//s;
1045					unless(length($content)) {
1046					$self->whine(
1047					$para->[1]{'start_line'},
1048					"=begin without a target?"
1049					);
1050					DEBUG and print STDERR "Ignoring targetless =begin\n";
1051					return 1;
1052					}
1053
1054					my ($target, $title) = $content =~ m/^(\S+)\s(.)$/;
1055					$para->[1]{'title'} = $title if ($title);
1056					$para->[1]{'target'} = $target; # without any ':'
1057					$content = $target; # strip off the title
1058
1059					$content =~ s/^:!/!:/s;
1060					my $neg; # whether this is a negation-match
1061					$neg = 1 if $content =~ s/^!//s;
1062					my $to_resolve; # whether to process formatting codes
1063					$to_resolve = 1 if $content =~ s/^://s;
1064
1065					my $dont_ignore; # whether this target matches us
1066
1067					foreach my $target_name (
1068					split(',', $content, -1),
1069					$neg ? () : '*'
1070					) {
1071					DEBUG > 2 and
1072					print STDERR " Considering whether =begin $content matches $target_name\n";
1073					next unless $self->{'accept_targets'}{$target_name};
1074
1075					DEBUG > 2 and
1076					print STDERR " It DOES match the acceptable target $target_name!\n";
1077					$to_resolve = 1
1078					if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
1079					$dont_ignore = 1;
1080					$para->[1]{'target_matching'} = $target_name;
1081					last; # stop looking at other target names
1082					}
1083
1084					if($neg) {
1085					if( $dont_ignore ) {
1086					$dont_ignore = '';
1087					delete $para->[1]{'target_matching'};
1088					DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n";
1089					} else {
1090					$dont_ignore = 1;
1091					$para->[1]{'target_matching'} = '!';
1092					DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n";
1093					}
1094					}
1095
1096					$para->[0] = '=for'; # Just what we happen to call these, internally
1097					$para->[1]{'~really'} \|\|= '=begin';
1098					$para->[1]{'~ignore'} = (! $dont_ignore) \|\| 0;
1099					$para->[1]{'~resolve'} = $to_resolve \|\| 0;
1100
1101					DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '',
1102					"ignore contents of this region\n";
1103					DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ",
1104					($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1105					DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n";
1106
1107					push @$curr_open, $para;
1108					if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1109					DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n";
1110					} else {
1111					$self->{'content_seen'} \|\|= 1;
1112					$self->_handle_element_start((my $scratch='for'), $para->[1]);
1113					}
1114
1115					return 1;
1116					}
1117
1118					sub _ponder_end {
1119					my ($self,$para,$curr_open,$paras) = @_;
1120					my $content = join ' ', splice @$para, 2;
1121					$content =~ s/^\s+//s;
1122					$content =~ s/\s+$//s;
1123					DEBUG and print STDERR "Ogling '=end $content' directive\n";
1124
1125					unless(length($content)) {
1126					$self->whine(
1127					$para->[1]{'start_line'},
1128					"'=end' without a target?" . (
1129					( @$curr_open and $curr_open->[-1][0] eq '=for' )
1130					? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1131					: ''
1132					)
1133					);
1134					DEBUG and print STDERR "Ignoring targetless =end\n";
1135					return 1;
1136					}
1137
1138					unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1139					$self->whine(
1140					$para->[1]{'start_line'},
1141					"'=end $content' is invalid. (Stack: "
1142					. $self->_dump_curr_open() . ')'
1143					);
1144					DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1145					return 1;
1146					}
1147
1148					unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1149					$self->whine(
1150					$para->[1]{'start_line'},
1151					"=end $content without matching =begin. (Stack: "
1152					. $self->_dump_curr_open() . ')'
1153					);
1154					DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1155					return 1;
1156					}
1157
1158					unless($content eq $curr_open->[-1][1]{'target'}) {
1159					$self->whine(
1160					$para->[1]{'start_line'},
1161					"=end $content doesn't match =begin "
1162					. $curr_open->[-1][1]{'target'}
1163					. ". (Stack: "
1164					. $self->_dump_curr_open() . ')'
1165					);
1166					DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1167					return 1;
1168					}
1169
1170					# Else it's okay to close...
1171					if(grep $_->[1]{'~ignore'}, @$curr_open) {
1172					DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n";
1173					# And that may be because of this to-be-closed =for region, or some
1174					# other one, but it doesn't matter.
1175					} else {
1176					$curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1177					# what's that for?
1178
1179					$self->{'content_seen'} \|\|= 1;
1180					$self->_handle_element_end( my $scratch = 'for', $para->[1]);
1181					}
1182					DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1183					pop @$curr_open;
1184
1185					return 1;
1186					}
1187
1188					sub _ponder_doc_end {
1189					my ($self,$para,$curr_open,$paras) = @_;
1190					if(@$curr_open) { # Deal with things left open
1191					DEBUG and print STDERR "Stack is nonempty at end-document: (",
1192					$self->_dump_curr_open(), ")\n";
1193
1194					DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
1195					unshift @$paras, $self->_closers_for_all_curr_open;
1196					# Make sure there is exactly one ~end in the parastack, at the end:
1197					@$paras = grep $_->[0] ne '~end', @$paras;
1198					push @$paras, $para, $para;
1199					# We need two -- once for the next cycle where we
1200					# generate errata, and then another to be at the end
1201					# when that loop back around to process the errata.
1202					return 1;
1203
1204					} else {
1205					DEBUG and print STDERR "Okay, stack is empty now.\n";
1206					}
1207
1208					# Try generating errata section, if applicable
1209					unless($self->{'~tried_gen_errata'}) {
1210					$self->{'~tried_gen_errata'} = 1;
1211					my @extras = $self->_gen_errata();
1212					if(@extras) {
1213					unshift @$paras, @extras;
1214					DEBUG and print STDERR "Generated errata... relooping...\n";
1215					return 1; # I.e., loop around again to process these fake-o paragraphs
1216					}
1217					}
1218
1219					splice @$paras; # Well, that's that for this paragraph buffer.
1220					DEBUG and print STDERR "Throwing end-document event.\n";
1221
1222					$self->_handle_element_end( my $scratch = 'Document' );
1223					return 1; # Hasta la byebye
1224					}
1225
1226					sub _ponder_pod {
1227					my ($self,$para,$curr_open,$paras) = @_;
1228					$self->whine(
1229					$para->[1]{'start_line'},
1230					"=pod directives shouldn't be over one line long! Ignoring all "
1231					. (@$para - 2) . " lines of content"
1232					) if @$para > 3;
1233
1234					# Content ignored unless 'pod_handler' is set
1235					if (my $pod_handler = $self->{'pod_handler'}) {
1236					my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1237					$line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1238					$pod_handler->($line, $line_num, $self);
1239					}
1240
1241					# The surrounding methods set content_seen, so let us remain consistent.
1242					# I do not know why it was not here before -- should it not be here?
1243					# $self->{'content_seen'} \|\|= 1;
1244
1245					return;
1246					}
1247
1248					sub _ponder_over {
1249					my ($self,$para,$curr_open,$paras) = @_;
1250					return 1 unless @$paras;
1251					my $list_type;
1252
1253					if($paras->[0][0] eq '=item') { # most common case
1254					$list_type = $self->_get_initial_item_type($paras->[0]);
1255
1256					} elsif($paras->[0][0] eq '=back') {
1257					# Ignore empty lists by default
1258					if ($self->{'parse_empty_lists'}) {
1259					$list_type = 'empty';
1260					} else {
1261					shift @$paras;
1262					return 1;
1263					}
1264					} elsif($paras->[0][0] eq '~end') {
1265					$self->whine(
1266					$para->[1]{'start_line'},
1267					"=over is the last thing in the document?!"
1268					);
1269					return 1; # But feh, ignore it.
1270					} else {
1271					$list_type = 'block';
1272					}
1273					$para->[1]{'~type'} = $list_type;
1274					push @$curr_open, $para;
1275					# yes, we reuse the paragraph as a stack item
1276
1277					my $content = join ' ', splice @$para, 2;
1278					my $overness;
1279					if($content =~ m/^\s*$/s) {
1280					$para->[1]{'indent'} = 4;
1281					} elsif($content =~ m/^\s((?:\d\.)?\d+)\s*$/s) {
1282	2	9.29ms	2	49µs	# spent 40µs (31+9) within Pod::Simple::BlackBox::BEGIN@1282 which was called: # once (31µs+9µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1282 no integer; # spent 40µs making 1 call to Pod::Simple::BlackBox::BEGIN@1282 # spent 9µs making 1 call to integer::unimport
1283					$para->[1]{'indent'} = $1;
1284					if($1 == 0) {
1285					$self->whine(
1286					$para->[1]{'start_line'},
1287					"Can't have a 0 in =over $content"
1288					);
1289					$para->[1]{'indent'} = 4;
1290					}
1291					} else {
1292					$self->whine(
1293					$para->[1]{'start_line'},
1294					"=over should be: '=over' or '=over positive_number'"
1295					);
1296					$para->[1]{'indent'} = 4;
1297					}
1298					DEBUG > 1 and print STDERR "=over found of type $list_type\n";
1299
1300					$self->{'content_seen'} \|\|= 1;
1301					$self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1302
1303					return;
1304					}
1305
1306					sub _ponder_back {
1307					my ($self,$para,$curr_open,$paras) = @_;
1308					# TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1309
1310					my $content = join ' ', splice @$para, 2;
1311					if($content =~ m/\S/) {
1312					$self->whine(
1313					$para->[1]{'start_line'},
1314					"=back doesn't take any parameters, but you said =back $content"
1315					);
1316					}
1317
1318					if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1319					DEBUG > 1 and print STDERR "=back happily closes matching =over\n";
1320					# Expected case: we're closing the most recently opened thing
1321					#my $over = pop @$curr_open;
1322					$self->{'content_seen'} \|\|= 1;
1323					$self->_handle_element_end( my $scratch =
1324					'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1325					);
1326					} else {
1327					DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (",
1328					join(', ', map $_->[0], @$curr_open), ").\n";
1329					$self->whine(
1330					$para->[1]{'start_line'},
1331					'=back without =over'
1332					);
1333					return 1; # and ignore it
1334					}
1335					}
1336
1337					sub _ponder_item {
1338					my ($self,$para,$curr_open,$paras) = @_;
1339					my $over;
1340					unless(@$curr_open and
1341					$over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1342					$self->whine(
1343					$para->[1]{'start_line'},
1344					"'=item' outside of any '=over'"
1345					);
1346					unshift @$paras,
1347					['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1348					$para
1349					;
1350					return 1;
1351					}
1352
1353
1354					my $over_type = $over->[1]{'~type'};
1355
1356					if(!$over_type) {
1357					# Shouldn't happen1
1358					die "Typeless over in stack, starting at line "
1359					. $over->[1]{'start_line'};
1360
1361					} elsif($over_type eq 'block') {
1362					unless($curr_open->[-1][1]{'~bitched_about'}) {
1363					$curr_open->[-1][1]{'~bitched_about'} = 1;
1364					$self->whine(
1365					$curr_open->[-1][1]{'start_line'},
1366					"You can't have =items (as at line "
1367					. $para->[1]{'start_line'}
1368					. ") unless the first thing after the =over is an =item"
1369					);
1370					}
1371					# Just turn it into a paragraph and reconsider it
1372					$para->[0] = '~Para';
1373					unshift @$paras, $para;
1374					return 1;
1375
1376					} elsif($over_type eq 'text') {
1377					my $item_type = $self->_get_item_type($para);
1378					# That kills the content of the item if it's a number or bullet.
1379					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1380
1381					if($item_type eq 'text') {
1382					# Nothing special needs doing for 'text'
1383					} elsif($item_type eq 'number' or $item_type eq 'bullet') {
1384					$self->whine(
1385					$para->[1]{'start_line'},
1386					"Expected text after =item, not a $item_type"
1387					);
1388					# Undo our clobbering:
1389					push @$para, $para->[1]{'~orig_content'};
1390					delete $para->[1]{'number'};
1391					# Only a PROPER item-number element is allowed
1392					# to have a number attribute.
1393					} else {
1394					die "Unhandled item type $item_type"; # should never happen
1395					}
1396
1397					# =item-text thingies don't need any assimilation, it seems.
1398
1399					} elsif($over_type eq 'number') {
1400					my $item_type = $self->_get_item_type($para);
1401					# That kills the content of the item if it's a number or bullet.
1402					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1403
1404					my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1405
1406					if($item_type eq 'bullet') {
1407					# Hm, it's not numeric. Correct for this.
1408					$para->[1]{'number'} = $expected_value;
1409					$self->whine(
1410					$para->[1]{'start_line'},
1411					"Expected '=item $expected_value'"
1412					);
1413					push @$para, $para->[1]{'~orig_content'};
1414					# restore the bullet, blocking the assimilation of next para
1415
1416					} elsif($item_type eq 'text') {
1417					# Hm, it's not numeric. Correct for this.
1418					$para->[1]{'number'} = $expected_value;
1419					$self->whine(
1420					$para->[1]{'start_line'},
1421					"Expected '=item $expected_value'"
1422					);
1423					# Text content will still be there and will block next ~Para
1424
1425					} elsif($item_type ne 'number') {
1426					die "Unknown item type $item_type"; # should never happen
1427
1428					} elsif($expected_value == $para->[1]{'number'}) {
1429					DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1430
1431					} else {
1432					DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1433					" instead of the expected value of $expected_value\n";
1434					$self->whine(
1435					$para->[1]{'start_line'},
1436					"You have '=item " . $para->[1]{'number'} .
1437					"' instead of the expected '=item $expected_value'"
1438					);
1439					$para->[1]{'number'} = $expected_value; # correcting!!
1440					}
1441
1442					if(@$para == 2) {
1443					# For the cases where we /didn't/ push to @$para
1444					if($paras->[0][0] eq '~Para') {
1445					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1446					push @$para, splice @{shift @$paras},2;
1447					} else {
1448					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1449					push @$para, ''; # Just so it's not contentless
1450					}
1451					}
1452
1453
1454					} elsif($over_type eq 'bullet') {
1455					my $item_type = $self->_get_item_type($para);
1456					# That kills the content of the item if it's a number or bullet.
1457					DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1458
1459					if($item_type eq 'bullet') {
1460					# as expected!
1461
1462					if( $para->[1]{'~_freaky_para_hack'} ) {
1463					DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1464					push @$para, delete $para->[1]{'~_freaky_para_hack'};
1465					}
1466
1467					} elsif($item_type eq 'number') {
1468					$self->whine(
1469					$para->[1]{'start_line'},
1470					"Expected '=item *'"
1471					);
1472					push @$para, $para->[1]{'~orig_content'};
1473					# and block assimilation of the next paragraph
1474					delete $para->[1]{'number'};
1475					# Only a PROPER item-number element is allowed
1476					# to have a number attribute.
1477					} elsif($item_type eq 'text') {
1478					$self->whine(
1479					$para->[1]{'start_line'},
1480					"Expected '=item *'"
1481					);
1482					# But doesn't need processing. But it'll block assimilation
1483					# of the next para.
1484					} else {
1485					die "Unhandled item type $item_type"; # should never happen
1486					}
1487
1488					if(@$para == 2) {
1489					# For the cases where we /didn't/ push to @$para
1490					if($paras->[0][0] eq '~Para') {
1491					DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1492					push @$para, splice @{shift @$paras},2;
1493					} else {
1494					DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1495					push @$para, ''; # Just so it's not contentless
1496					}
1497					}
1498
1499					} else {
1500					die "Unhandled =over type \"$over_type\"?";
1501					# Shouldn't happen!
1502					}
1503					$para->[0] .= '-' . $over_type;
1504
1505					return;
1506					}
1507
1508					sub _ponder_Plain {
1509					my ($self,$para) = @_;
1510					DEBUG and print STDERR " giving plain treatment...\n";
1511					unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1512					or $para->[1]{'~cooked'}
1513					) {
1514					push @$para,
1515					@{$self->_make_treelet(
1516					join("\n", splice(@$para, 2)),
1517					$para->[1]{'start_line'}
1518					)};
1519					}
1520					# Empty paragraphs don't need a treelet for any reason I can see.
1521					# And precooked paragraphs already have a treelet.
1522					return;
1523					}
1524
1525					sub _ponder_Verbatim {
1526					my ($self,$para) = @_;
1527					DEBUG and print STDERR " giving verbatim treatment...\n";
1528
1529					$para->[1]{'xml:space'} = 'preserve';
1530
1531					my $indent = $self->strip_verbatim_indent;
1532					if ($indent && ref $indent eq 'CODE') {
1533					my @shifted = (shift @{$para}, shift @{$para});
1534					$indent = $indent->($para);
1535					unshift @{$para}, @shifted;
1536					}
1537
1538					for(my $i = 2; $i < @$para; $i++) {
1539					foreach my $line ($para->[$i]) { # just for aliasing
1540					# Strip indentation.
1541					$line =~ s/^\Q$indent// if $indent
1542					&& !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1543					while( $line =~
1544					# Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1545					# tabs are at every EIGHTH column. For portability, it has to be
1546					# one setting everywhere, and 8th wins.
1547					s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1548					) {}
1549
1550					# TODO: whinge about (or otherwise treat) unindented or overlong lines
1551
1552					}
1553					}
1554
1555					# Now the VerbatimFormatted hoodoo...
1556					if( $self->{'accept_codes'} and
1557					$self->{'accept_codes'}{'VerbatimFormatted'}
1558					) {
1559					while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1560					# Kill any number of terminal newlines
1561					$self->_verbatim_format($para);
1562					} elsif ($self->{'codes_in_verbatim'}) {
1563					push @$para,
1564					@{$self->_make_treelet(
1565					join("\n", splice(@$para, 2)),
1566					$para->[1]{'start_line'}, $para->[1]{'xml:space'}
1567					)};
1568					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1569					} else {
1570					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1571					$para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1572					}
1573					return;
1574					}
1575
1576					sub _ponder_Data {
1577					my ($self,$para) = @_;
1578					DEBUG and print STDERR " giving data treatment...\n";
1579					$para->[1]{'xml:space'} = 'preserve';
1580					push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1581					return;
1582					}
1583
- -
1587					###########################################################################
1588
1589					sub _traverse_treelet_bit { # for use only by the routine above
1590					my($self, $name) = splice @_,0,2;
1591
1592					my $scratch;
1593					$self->_handle_element_start(($scratch=$name), shift @_);
1594
1595					while (@_) {
1596					my $x = shift;
1597					if (ref($x)) {
1598					&_traverse_treelet_bit($self, @$x);
1599					} else {
1600					$x .= shift while @_ && !ref($_[0]);
1601					$self->_handle_text($x);
1602					}
1603					}
1604
1605					$self->_handle_element_end($scratch=$name);
1606					return;
1607					}
1608
1609					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1610
1611					sub _closers_for_all_curr_open {
1612					my $self = $_[0];
1613					my @closers;
1614					foreach my $still_open (@{ $self->{'curr_open'} \|\| return }) {
1615					my @copy = @$still_open;
1616					$copy[1] = {%{ $copy[1] }};
1617					#$copy[1]{'start_line'} = -1;
1618					if($copy[0] eq '=for') {
1619					$copy[0] = '=end';
1620					} elsif($copy[0] eq '=over') {
1621					$self->whine(
1622					$still_open->[1]{start_line} ,
1623					"=over without closing =back"
1624					);
1625
1626					$copy[0] = '=back';
1627					} else {
1628					die "I don't know how to auto-close an open $copy[0] region";
1629					}
1630
1631					unless( @copy > 2 ) {
1632					push @copy, $copy[1]{'target'};
1633					$copy[-1] = '' unless defined $copy[-1];
1634					# since =over's don't have targets
1635					}
1636
1637					$copy[1]{'fake-closer'} = 1;
1638
1639					DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n";
1640					unshift @closers, \@copy;
1641					}
1642					return @closers;
1643					}
1644
1645					#--------------------------------------------------------------------------
1646
1647					sub _verbatim_format {
1648					my($it, $p) = @_;
1649
1650					my $formatting;
1651
1652					for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1653					DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n";
1654					$p->[$i] .= "\n";
1655					# Unlike with simple Verbatim blocks, we don't end up just doing
1656					# a join("\n", ...) on the contents, so we have to append a
1657					# newline to ever line, and then nix the last one later.
1658					}
1659
1660					if( DEBUG > 4 ) {
1661					print STDERR "<<\n";
1662					for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1663					print STDERR "_verbatim_format $i: $p->[$i]";
1664					}
1665					print STDERR ">>\n";
1666					}
1667
1668					for(my $i = $#$p; $i > 2; $i--) {
1669					# work backwards over the lines, except the first (#2)
1670
1671					#next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1672					# and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1673					# look at a formatty line preceding a nonformatty one
1674					DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n";
1675					if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1676					DEBUG > 5 and print STDERR " It's a formatty line. ",
1677					"Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1678
1679					if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1680					DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n";
1681					next;
1682					} else {
1683					DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n";
1684					}
1685					} else {
1686					DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n";
1687					next;
1688					}
1689
1690					# A formatty line has to have #: in the first two columns, and uses
1691					# "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1692					# Example:
1693					# What do you want? i like pie. [or whatever]
1694					# #:^^^^^^^^^^^^^^^^^ /////////////
1695
1696
1697					DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1698
1699					$formatting = ' ' . $1;
1700					$formatting =~ s/\s+$//s; # nix trailing whitespace
1701					unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1702					splice @$p,$i,1; # remove this line
1703					$i--; # don't consider next line
1704					next;
1705					}
1706
1707					if( length($formatting) >= length($p->[$i-1]) ) {
1708					$formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1709					} else {
1710					$formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1711					}
1712					# Make $formatting and the previous line be exactly the same length,
1713					# with $formatting having a " " as the last character.
1714
1715					DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1716
1717
1718					my @new_line;
1719					while( $formatting =~ m{\G(( +)\|(\^+)\|(\/+)\|(\%+))}g ) {
1720					#print STDERR "Format matches $1\n";
1721
1722					if($2) {
1723					#print STDERR "SKIPPING <$2>\n";
1724					push @new_line,
1725					substr($p->[$i-1], pos($formatting)-length($1), length($1));
1726					} else {
1727					#print STDERR "SNARING $+\n";
1728					push @new_line, [
1729					(
1730					$3 ? 'VerbatimB' :
1731					$4 ? 'VerbatimI' :
1732					$5 ? 'VerbatimBI' : die("Should never get called")
1733					), {},
1734					substr($p->[$i-1], pos($formatting)-length($1), length($1))
1735					];
1736					#print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1737					}
1738					}
1739					my @nixed =
1740					splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1741					DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
1742
1743					DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
1744					scalar(@new_line), "):",
1745					map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1746					$i--; # So the next line we scrutinize is the line before the one
1747					# that we just went and formatted
1748					}
1749
1750					$p->[0] = 'VerbatimFormatted';
1751
1752					# Collapse adjacent text nodes, just for kicks.
1753					for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1754					if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1755					DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1756					$p->[$i] .= splice @$p, $i+1, 1; # merge
1757					--$i; # and back up
1758					}
1759					}
1760
1761					# Now look for the last text token, and remove the terminal newline
1762					for( my $i = $#$p; $i >= 2; $i-- ) {
1763					# work backwards over the tokens, even the first
1764					if( !ref($p->[$i]) ) {
1765					if($p->[$i] =~ s/\n$//s) {
1766					DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1767					} else {
1768					DEBUG > 5 and print STDERR
1769					"No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1770					}
1771					last; # we only want the next one
1772					}
1773					}
1774
1775					return;
1776					}
1777
1778
1779					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1780
1781
1782					sub _treelet_from_formatting_codes {
1783					# Given a paragraph, returns a treelet. Full of scary tokenizing code.
1784					# Like [ '~Top', {'start_line' => $start_line},
1785					# "I like ",
1786					# [ 'B', {}, "pie" ],
1787					# "!"
1788					# ]
1789
1790					my($self, $para, $start_line, $preserve_space) = @_;
1791
1792					my $treelet = ['~Top', {'start_line' => $start_line},];
1793
1794					unless ($preserve_space \|\| $self->{'preserve_whitespace'}) {
1795					$para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1796					$para =~ s/ $//;
1797					$para =~ s/^ //;
1798					}
1799
1800					# Only apparent problem the above code is that N<< >> turns into
1801					# N<< >>. But then, word wrapping does that too! So don't do that!
1802
1803					my @stack;
1804					my @lineage = ($treelet);
1805					my $raw = ''; # raw content of L<> fcode before splitting/processing
1806					# XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1807					# into just 1 ' '. Is this the regex's doing or 'raw's?
1808					my $inL = 0;
1809
1810					DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
1811
1812					# Here begins our frightening tokenizer RE. The following regex matches
1813					# text in four main parts:
1814					#
1815					# * Start-codes. The first alternative matches C< or C<<, the latter
1816					# followed by some whitespace. $1 will hold the entire start code
1817					# (including any space following a multiple-angle-bracket delimiter),
1818					# and $2 will hold only the additional brackets past the first in a
1819					# multiple-bracket delimiter. length($2) + 1 will be the number of
1820					# closing brackets we have to find.
1821					#
1822					# * Closing brackets. Match some amount of whitespace followed by
1823					# multiple close brackets. The logic to see if this closes anything
1824					# is down below. Note that in order to parse C<< >> correctly, we
1825					# have to use look-behind (?<=\s\s), since the match of the starting
1826					# code will have consumed the whitespace.
1827					#
1828					# * A single closing bracket, to close a simple code like C<>.
1829					#
1830					# * Something that isn't a start or end code. We have to be careful
1831					# about accepting whitespace, since perlpodspec says that any whitespace
1832					# before a multiple-bracket closing delimiter should be ignored.
1833					#
1834					while($para =~
1835					m/\G
1836					(?:
1837					# Match starting codes, including the whitespace following a
1838					# multiple-delimiter start code. $1 gets the whole start code and
1839					# $2 gets all but one of the <s in the multiple-bracket case.
1840					([A-Z]<(?:(<+)\s+)?)
1841					\|
1842					# Match multiple-bracket end codes. $3 gets the whitespace that
1843					# should be discarded before an end bracket but kept in other cases
1844					# and $4 gets the end brackets themselves.
1845					(\s+\|(?<=\s\s))(>{2,})
1846					\|
1847					(\s?>) # $5: simple end-codes
1848					\|
1849					( # $6: stuff containing no start-codes or end-codes
1850					(?:
1851					[^A-Z\s>]
1852					\|
1853					(?:
1854					[A-Z](?!<)
1855					)
1856					\|
1857					# whitespace is ok, but we don't want to eat the whitespace before
1858					# a multiple-bracket end code.
1859					# NOTE: we may still have problems with e.g. S<< >>
1860					(?:
1861					\s(?!\s*>{2,})
1862					)
1863					)+
1864					)
1865					)
1866					/xgo
1867					) {
1868					DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
1869					if(defined $1) {
1870					if(defined $2) {
1871					DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
1872					push @stack, length($2) + 1;
1873					# length of the necessary complex end-code string
1874					} else {
1875					DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
1876					push @stack, 0; # signal that we're looking for simple
1877					}
1878					push @lineage, [ substr($1,0,1), {}, ]; # new node object
1879					push @{ $lineage[-2] }, $lineage[-1];
1880					if ('L' eq substr($1,0,1)) {
1881					$raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1882					$inL = 1;
1883					} else {
1884					$raw .= $1 if $inL;
1885					}
1886
1887					} elsif(defined $4) {
1888					DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
1889					# This is where it gets messy...
1890					if(! @stack) {
1891					# We saw " >>>>" but needed nothing. This is ALL just stuff then.
1892					DEBUG > 4 and print STDERR " But it's really just stuff.\n";
1893					push @{ $lineage[-1] }, $3, $4;
1894					next;
1895					} elsif(!$stack[-1]) {
1896					# We saw " >>>>" but needed only ">". Back pos up.
1897					DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n";
1898					push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1899					pos($para) = pos($para) - length($4) + 1;
1900					} elsif($stack[-1] == length($4)) {
1901					# We found " >>>>", and it was exactly what we needed. Commonest case.
1902					DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n";
1903					} elsif($stack[-1] < length($4)) {
1904					# We saw " >>>>" but needed only " >>". Back pos up.
1905					DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n";
1906					pos($para) = pos($para) - length($4) + $stack[-1];
1907					} else {
1908					# We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1909					DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n";
1910					push @{ $lineage[-1] }, $3, $4;
1911					next;
1912					}
1913					#print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1914
1915					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1916					# Keep the element from being childless
1917
1918					pop @stack;
1919					pop @lineage;
1920
1921					unless (@stack) { # not in an L if there are no open fcodes
1922					$inL = 0;
1923					if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1924					$lineage[-1][-1][1]{'raw'} = $raw
1925					}
1926					}
1927					$raw .= $3.$4 if $inL;
1928
1929					} elsif(defined $5) {
1930					DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
1931
1932					if(@stack and ! $stack[-1]) {
1933					# We're indeed expecting a simple end-code
1934					DEBUG > 4 and print STDERR " It's indeed an end-code.\n";
1935
1936					if(length($5) == 2) { # There was a space there: " >"
1937					push @{ $lineage[-1] }, ' ';
1938					} elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1939					push @{ $lineage[-1] }, ''; # keep it from being really childless
1940					}
1941
1942					pop @stack;
1943					pop @lineage;
1944					} else {
1945					DEBUG > 4 and print STDERR " It's just stuff.\n";
1946					push @{ $lineage[-1] }, $5;
1947					}
1948
1949					unless (@stack) { # not in an L if there are no open fcodes
1950					$inL = 0;
1951					if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1952					$lineage[-1][-1][1]{'raw'} = $raw
1953					}
1954					}
1955					$raw .= $5 if $inL;
1956
1957					} elsif(defined $6) {
1958					DEBUG > 3 and print STDERR "Found stuff \"$6\"\n";
1959					push @{ $lineage[-1] }, $6;
1960					$raw .= $6 if $inL;
1961					# XXX does not capture multiplace whitespaces -- 'raw' ends up with
1962					# at most 1 leading/trailing whitespace, why not all of it?
1963
1964					} else {
1965					# should never ever ever ever happen
1966					DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n";
1967					die "SPORK 512512!";
1968					}
1969					}
1970
1971					if(@stack) { # Uhoh, some sequences weren't closed.
1972					my $x= "...";
1973					while(@stack) {
1974					push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1975					# Hmmmmm!
1976
1977					my $code = (pop @lineage)->[0];
1978					my $ender_length = pop @stack;
1979					if($ender_length) {
1980					--$ender_length;
1981					$x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1982					} else {
1983					$x = $code . "<$x>";
1984					}
1985					}
1986					DEBUG > 1 and print STDERR "Unterminated $x sequence\n";
1987					$self->whine($start_line,
1988					"Unterminated $x sequence",
1989					);
1990					}
1991
1992					return $treelet;
1993					}
1994
1995					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1996
1997					sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1998					return stringify_lol($_[1]);
1999					}
2000
2001					sub stringify_lol { # function: stringify_lol($lol)
2002					my $string_form = '';
2003					_stringify_lol( $_[0] => \$string_form );
2004					return $string_form;
2005					}
2006
2007					sub _stringify_lol { # the real recursor
2008					my($lol, $to) = @_;
2009					for(my $i = 2; $i < @$lol; ++$i) {
2010					if( ref($lol->[$i] \|\| '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
2011					_stringify_lol( $lol->[$i], $to); # recurse!
2012					} else {
2013					$$to .= $lol->[$i];
2014					}
2015					}
2016					return;
2017					}
2018
2019					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2020
2021					sub _dump_curr_open { # return a string representation of the stack
2022					my $curr_open = $_[0]{'curr_open'};
2023
2024					return '[empty]' unless @$curr_open;
2025					return join '; ',
2026					map {;
2027					($_->[0] eq '=for')
2028					? ( ($_->[1]{'~really'} \|\| '=over')
2029					. ' ' . $_->[1]{'target'})
2030					: $_->[0]
2031					}
2032					@$curr_open
2033					;
2034					}
2035
2036					###########################################################################
2037	1	19µs			my %pretty_form = (
2038					"\a" => '\a', # ding!
2039					"\b" => '\b', # BS
2040					"\e" => '\e', # ESC
2041					"\f" => '\f', # FF
2042					"\t" => '\t', # tab
2043					"\cm" => '\cm',
2044					"\cj" => '\cj',
2045					"\n" => '\n', # probably overrides one of either \cm or \cj
2046					'"' => '\"',
2047					'\\' => '\\\\',
2048					'$' => '\\$',
2049					'@' => '\\@',
2050					'%' => '\\%',
2051					'#' => '\\#',
2052					);
2053
2054					sub pretty { # adopted from Class::Classless
2055					# Not the most brilliant routine, but passable.
2056					# Don't give it a cyclic data structure!
2057					my @stuff = @_; # copy
2058					my $x;
2059					my $out =
2060					# join ",\n" .
2061					join ", ",
2062					map {;
2063					if(!defined($_)) {
2064					"undef";
2065					} elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
2066					$x = "[ " . pretty(@$_) . " ]" ;
2067					$x;
2068					} elsif(ref($_) eq 'SCALAR') {
2069					$x = "\\" . pretty($$_) ;
2070					$x;
2071					} elsif(ref($_) eq 'HASH') {
2072					my $hr = $_;
2073					$x = "{" . join(", ",
2074					map(pretty($_) . '=>' . pretty($hr->{$_}),
2075					sort keys %$hr ) ) . "}" ;
2076					$x;
2077					} elsif(!length($_)) { q{''} # empty string
2078					} elsif(
2079					$_ eq '0' # very common case
2080					or(
2081					m/^-?(?:[123456789]\d*\|0)(?:\.\d+)?$/s
2082					and $_ ne '-0' # the strange case that RE lets thru
2083					)
2084					) { $_;
2085					} else {
2086					# Yes, explicitly name every character desired. There are shorcuts one
2087					# could make, but I (Karl Williamson) was afraid that some Perl
2088					# releases would have bugs in some of them. For example [A-Z] works
2089					# even on EBCDIC platforms to match exactly the 26 uppercase English
2090					# letters, but I don't know if it has always worked without bugs. It
2091					# seemed safest just to list the characters.
2092					# s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
2093					s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~])>
2094					<$pretty_form{$1} \|\| '\\x{'.sprintf("%x", ord($1)).'}'>eg;
2095					#<$pretty_form{$1} \|\| '\\x'.(unpack("H2",$1))>eg;
2096					qq{"$_"};
2097					}
2098					} @stuff;
2099					# $out =~ s/\n */ /g if length($out) < 75;
2100					return $out;
2101					}
2102
2103					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2104
2105					# A rather unsubtle method of blowing away all the state information
2106					# from a parser object so it can be reused. Provided as a utility for
2107					# backward compatibility in Pod::Man, etc. but not recommended for
2108					# general use.
2109
2110					sub reinit {
2111					my $self = shift;
2112					foreach (qw(source_dead source_filename doc_has_started
2113					start_of_pod_block content_seen last_was_blank paras curr_open
2114					line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen
2115					Title)) {
2116
2117					delete $self->{$_};
2118					}
2119					}
2120
2121					#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2122	1	23µs			1;
2123

					# spent 7µs within Pod::Simple::BlackBox::CORE:qr which was called: # once (7µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 1 of (eval 4)[Pod/Simple/BlackBox.pm:35] sub Pod::Simple::BlackBox::CORE:qr; # opcode