← Index
NYTProf Performance Profile   « line view »
For /usr/local/bin/sa-learn
  Run on Sun Nov 5 03:09:29 2017
Reported on Mon Nov 6 13:20:45 2017

Filename/usr/local/lib/perl5/5.24/Pod/Simple/BlackBox.pm
StatementsExecuted 23 statements in 20.3ms
Subroutines
Calls P F Exclusive
Time
Inclusive
Time
Subroutine
11139µs46µsPod::Simple::BlackBox::::BEGIN@21Pod::Simple::BlackBox::BEGIN@21
11134µs43µsPod::Simple::BlackBox::::BEGIN@1282Pod::Simple::BlackBox::BEGIN@1282
11119µs25µsPod::Simple::BlackBox::::BEGIN@22Pod::Simple::BlackBox::BEGIN@22
11118µs73µsPod::Simple::BlackBox::::BEGIN@24Pod::Simple::BlackBox::BEGIN@24
11116µs16µsPod::Simple::BlackBox::::BEGIN@27Pod::Simple::BlackBox::BEGIN@27
11110µs10µsPod::Simple::BlackBox::::BEGIN@23Pod::Simple::BlackBox::BEGIN@23
1117µs7µsPod::Simple::BlackBox::::CORE:qrPod::Simple::BlackBox::CORE:qr (opcode)
0000s0sPod::Simple::BlackBox::::_closers_for_all_curr_openPod::Simple::BlackBox::_closers_for_all_curr_open
0000s0sPod::Simple::BlackBox::::_dump_curr_openPod::Simple::BlackBox::_dump_curr_open
0000s0sPod::Simple::BlackBox::::_gen_errataPod::Simple::BlackBox::_gen_errata
0000s0sPod::Simple::BlackBox::::_handle_encoding_linePod::Simple::BlackBox::_handle_encoding_line
0000s0sPod::Simple::BlackBox::::_handle_encoding_second_levelPod::Simple::BlackBox::_handle_encoding_second_level
0000s0sPod::Simple::BlackBox::::_ponder_DataPod::Simple::BlackBox::_ponder_Data
0000s0sPod::Simple::BlackBox::::_ponder_PlainPod::Simple::BlackBox::_ponder_Plain
0000s0sPod::Simple::BlackBox::::_ponder_VerbatimPod::Simple::BlackBox::_ponder_Verbatim
0000s0sPod::Simple::BlackBox::::_ponder_backPod::Simple::BlackBox::_ponder_back
0000s0sPod::Simple::BlackBox::::_ponder_beginPod::Simple::BlackBox::_ponder_begin
0000s0sPod::Simple::BlackBox::::_ponder_doc_endPod::Simple::BlackBox::_ponder_doc_end
0000s0sPod::Simple::BlackBox::::_ponder_endPod::Simple::BlackBox::_ponder_end
0000s0sPod::Simple::BlackBox::::_ponder_forPod::Simple::BlackBox::_ponder_for
0000s0sPod::Simple::BlackBox::::_ponder_itemPod::Simple::BlackBox::_ponder_item
0000s0sPod::Simple::BlackBox::::_ponder_overPod::Simple::BlackBox::_ponder_over
0000s0sPod::Simple::BlackBox::::_ponder_paragraph_bufferPod::Simple::BlackBox::_ponder_paragraph_buffer
0000s0sPod::Simple::BlackBox::::_ponder_podPod::Simple::BlackBox::_ponder_pod
0000s0sPod::Simple::BlackBox::::_stringify_lolPod::Simple::BlackBox::_stringify_lol
0000s0sPod::Simple::BlackBox::::_traverse_treelet_bitPod::Simple::BlackBox::_traverse_treelet_bit
0000s0sPod::Simple::BlackBox::::_treelet_from_formatting_codesPod::Simple::BlackBox::_treelet_from_formatting_codes
0000s0sPod::Simple::BlackBox::::_verbatim_formatPod::Simple::BlackBox::_verbatim_format
0000s0sPod::Simple::BlackBox::::parse_linePod::Simple::BlackBox::parse_line
0000s0sPod::Simple::BlackBox::::parse_linesPod::Simple::BlackBox::parse_lines
0000s0sPod::Simple::BlackBox::::prettyPod::Simple::BlackBox::pretty
0000s0sPod::Simple::BlackBox::::reinitPod::Simple::BlackBox::reinit
0000s0sPod::Simple::BlackBox::::stringify_lolPod::Simple::BlackBox::stringify_lol
0000s0sPod::Simple::BlackBox::::text_content_of_treeletPod::Simple::BlackBox::text_content_of_treelet
Call graph for these subroutines as a Graphviz dot language file.
Line State
ments
Time
on line
Calls Time
in subs
Code
1package Pod::Simple::BlackBox;
2#
3# "What's in the box?" "Pain."
4#
5###########################################################################
6#
7# This is where all the scary things happen: parsing lines into
8# paragraphs; and then into directives, verbatims, and then also
9# turning formatting sequences into treelets.
10#
11# Are you really sure you want to read this code?
12#
13#-----------------------------------------------------------------------------
14#
15# The basic work of this module Pod::Simple::BlackBox is doing the dirty work
16# of parsing Pod into treelets (generally one per non-verbatim paragraph), and
17# to call the proper callbacks on the treelets.
18#
19# Every node in a treelet is a ['name', {attrhash}, ...children...]
20
21256µs253µs
# spent 46µs (39+7) within Pod::Simple::BlackBox::BEGIN@21 which was called: # once (39µs+7µs) by Pod::Simple::LinkSection::BEGIN@9 at line 21
use integer; # vroom!
# spent 46µs making 1 call to Pod::Simple::BlackBox::BEGIN@21 # spent 7µs making 1 call to integer::import
22251µs231µs
# spent 25µs (19+6) within Pod::Simple::BlackBox::BEGIN@22 which was called: # once (19µs+6µs) by Pod::Simple::LinkSection::BEGIN@9 at line 22
use strict;
# spent 25µs making 1 call to Pod::Simple::BlackBox::BEGIN@22 # spent 6µs making 1 call to strict::import
23250µs110µs
# spent 10µs within Pod::Simple::BlackBox::BEGIN@23 which was called: # once (10µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 23
use Carp ();
# spent 10µs making 1 call to Pod::Simple::BlackBox::BEGIN@23
242118µs2129µs
# spent 73µs (18+56) within Pod::Simple::BlackBox::BEGIN@24 which was called: # once (18µs+56µs) by Pod::Simple::LinkSection::BEGIN@9 at line 24
use vars qw($VERSION );
# spent 73µs making 1 call to Pod::Simple::BlackBox::BEGIN@24 # spent 56µs making 1 call to vars::import
2512µs$VERSION = '3.32';
26#use constant DEBUG => 7;
27
# spent 16µs within Pod::Simple::BlackBox::BEGIN@27 which was called: # once (16µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 30
BEGIN {
2812µs require Pod::Simple;
29111µs *DEBUG = \&Pod::Simple::DEBUG unless defined &DEBUG
30111.7ms116µs}
# spent 16µs making 1 call to Pod::Simple::BlackBox::BEGIN@27
31
32# Matches a character iff the character will have a different meaning
33# if we choose CP1252 vs UTF-8 if there is no =encoding line.
34# This is broken for early Perls on non-ASCII platforms.
35156µsmy $non_ascii_re = eval "qr/[[:^ascii:]]/";
# spent 30µs executing statements in string eval
3612µs$non_ascii_re = qr/[\x80-\xFF]/ if ! defined $non_ascii_re;
37
3811µsmy $utf8_bom;
39125µsif (($] ge 5.007_003)) {
4012µs $utf8_bom = "\x{FEFF}";
41120µs18µs utf8::encode($utf8_bom);
# spent 8µs making 1 call to utf8::encode
42} else {
43 $utf8_bom = "\xEF\xBB\xBF"; # No EBCDIC BOM detection for early Perls.
44}
45
46#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
47
48sub parse_line { shift->parse_lines(@_) } # alias
49
50# - - - Turn back now! Run away! - - -
51
52sub parse_lines { # Usage: $parser->parse_lines(@lines)
53 # an undef means end-of-stream
54 my $self = shift;
55
56 my $code_handler = $self->{'code_handler'};
57 my $cut_handler = $self->{'cut_handler'};
58 my $wl_handler = $self->{'whiteline_handler'};
59 $self->{'line_count'} ||= 0;
60
61 my $scratch;
62
63 DEBUG > 4 and
64 print STDERR "# Parsing starting at line ", $self->{'line_count'}, ".\n";
65
66 DEBUG > 5 and
67 print STDERR "# About to parse lines: ",
68 join(' ', map defined($_) ? "[$_]" : "EOF", @_), "\n";
69
70 my $paras = ($self->{'paras'} ||= []);
71 # paragraph buffer. Because we need to defer processing of =over
72 # directives and verbatim paragraphs. We call _ponder_paragraph_buffer
73 # to process this.
74
75 $self->{'pod_para_count'} ||= 0;
76
77 my $line;
78 foreach my $source_line (@_) {
79 if( $self->{'source_dead'} ) {
80 DEBUG > 4 and print STDERR "# Source is dead.\n";
81 last;
82 }
83
84 unless( defined $source_line ) {
85 DEBUG > 4 and print STDERR "# Undef-line seen.\n";
86
87 push @$paras, ['~end', {'start_line' => $self->{'line_count'}}];
88 push @$paras, $paras->[-1], $paras->[-1];
89 # So that it definitely fills the buffer.
90 $self->{'source_dead'} = 1;
91 $self->_ponder_paragraph_buffer;
92 next;
93 }
94
95
96 if( $self->{'line_count'}++ ) {
97 ($line = $source_line) =~ tr/\n\r//d;
98 # If we don't have two vars, we'll end up with that there
99 # tr/// modding the (potentially read-only) original source line!
100
101 } else {
102 DEBUG > 2 and print STDERR "First line: [$source_line]\n";
103
104 if( ($line = $source_line) =~ s/^$utf8_bom//s ) {
105 DEBUG and print STDERR "UTF-8 BOM seen. Faking a '=encoding utf8'.\n";
106 $self->_handle_encoding_line( "=encoding utf8" );
107 delete $self->{'_processed_encoding'};
108 $line =~ tr/\n\r//d;
109
110 } elsif( $line =~ s/^\xFE\xFF//s ) {
111 DEBUG and print STDERR "Big-endian UTF-16 BOM seen. Aborting parsing.\n";
112 $self->scream(
113 $self->{'line_count'},
114 "UTF16-BE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
115 );
116 splice @_;
117 push @_, undef;
118 next;
119
120 # TODO: implement somehow?
121
122 } elsif( $line =~ s/^\xFF\xFE//s ) {
123 DEBUG and print STDERR "Little-endian UTF-16 BOM seen. Aborting parsing.\n";
124 $self->scream(
125 $self->{'line_count'},
126 "UTF16-LE Byte Encoding Mark found; but Pod::Simple v$Pod::Simple::VERSION doesn't implement UTF16 yet."
127 );
128 splice @_;
129 push @_, undef;
130 next;
131
132 # TODO: implement somehow?
133
134 } else {
135 DEBUG > 2 and print STDERR "First line is BOM-less.\n";
136 ($line = $source_line) =~ tr/\n\r//d;
137 }
138 }
139
140 if(!$self->{'parse_characters'} && !$self->{'encoding'}
141 && ($self->{'in_pod'} || $line =~ /^=/s)
142 && $line =~ /$non_ascii_re/
143 ) {
144
145 my $encoding;
146
147 # No =encoding line, and we are at the first line in the input that
148 # contains a non-ascii byte, that is one whose meaning varies depending
149 # on whether the file is encoded in UTF-8 or CP1252, which are the two
150 # possibilities permitted by the pod spec. (ASCII is assumed if the
151 # file only contains ASCII bytes.) In order to process this line, we
152 # need to figure out what encoding we will use for the file.
153 #
154 # Strictly speaking ISO 8859-1 (Latin 1) refers to the code points
155 # 160-255, but it is used here, as it often colloquially is, to refer to
156 # the complete set of code points 0-255, including ASCII (0-127), the C1
157 # controls (128-159), and strict Latin 1 (160-255).
158 #
159 # CP1252 is effectively a superset of Latin 1, because it differs only
160 # from colloquial 8859-1 in the C1 controls, which are very unlikely to
161 # actually be present in 8859-1 files, so can be used for other purposes
162 # without conflict. CP 1252 uses most of them for graphic characters.
163 #
164 # Note that all ASCII-range bytes represent their corresponding code
165 # points in CP1252 and UTF-8. In ASCII platform UTF-8 all other code
166 # points require multiple (non-ASCII) bytes to represent. (A separate
167 # paragraph for EBCDIC is below.) The multi-byte representation is
168 # quite structured. If we find an isolated byte that requires multiple
169 # bytes to represent in UTF-8, we know that the encoding is not UTF-8.
170 # If we find a sequence of bytes that violates the UTF-8 structure, we
171 # also can presume the encoding isn't UTF-8, and hence must be 1252.
172 #
173 # But there are ambiguous cases where we could guess wrong. If so, the
174 # user will end up having to supply an =encoding line. We use all
175 # readily available information to improve our chances of guessing
176 # right. The odds of something not being UTF-8, but still passing a
177 # UTF-8 validity test go down very rapidly with increasing length of the
178 # sequence. Therefore we look at all the maximal length non-ascii
179 # sequences on the line. If any of the sequences can't be UTF-8, we
180 # quit there and choose CP1252. If all could be UTF-8, we guess UTF-8.
181 #
182 # On EBCDIC platforms, the situation is somewhat different. In
183 # UTF-EBCDIC, not only do ASCII-range bytes represent their code points,
184 # but so do the bytes that are for the C1 controls. Recall that these
185 # correspond to the unused portion of 8859-1 that 1252 mostly takes
186 # over. That means that there are fewer code points that are
187 # represented by multi-bytes. But, note that the these controls are
188 # very unlikely to be in pod text. So if we encounter one of them, it
189 # means that it is quite likely CP1252 and not UTF-8. The net result is
190 # the same code below is used for both platforms.
191 while ($line =~ m/($non_ascii_re+)/g) {
192 my $non_ascii_seq = $1;
193
194 if (length $non_ascii_seq == 1) {
195 $encoding = 'CP1252';
196 goto guessed;
197 } elsif ($] ge 5.007_003) {
198
199 # On Perls that have this function, we can see if the sequence is
200 # valid UTF-8 or not.
201 if (! utf8::decode($non_ascii_seq)) {
202 $encoding = 'CP1252';
203 goto guessed;
204 }
205 } elsif (ord("A") == 65) { # An early Perl, ASCII platform
206
207 # Without utf8::decode, it's a lot harder to do a rigorous check
208 # (though some early releases had a different function that
209 # accomplished the same thing). Since these are ancient Perls, not
210 # likely to be in use today, we take the easy way out, and look at
211 # just the first two bytes of the sequence to see if they are the
212 # start of a UTF-8 character. In ASCII UTF-8, continuation bytes
213 # must be between 0x80 and 0xBF. Start bytes can range from 0xC2
214 # through 0xFF, but anything above 0xF4 is not Unicode, and hence
215 # extremely unlikely to be in a pod.
216 if ($non_ascii_seq !~ /^[\xC2-\xF4][\x80-\xBF]/) {
217 $encoding = 'CP1252';
218 goto guessed;
219 }
220
221 # We don't bother doing anything special for EBCDIC on early Perls.
222 # If there is a solitary variant, CP1252 will be chosen; otherwise
223 # UTF-8.
224 }
225 } # End of loop through all variant sequences on the line
226
227 # All sequences in the line could be UTF-8. Guess that.
228 $encoding = 'UTF-8';
229
230 guessed:
231 $self->_handle_encoding_line( "=encoding $encoding" );
232 delete $self->{'_processed_encoding'};
233 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
234
235 my ($word) = $line =~ /(\S*$non_ascii_re\S*)/;
236
237 $self->whine(
238 $self->{'line_count'},
239 "Non-ASCII character seen before =encoding in '$word'. Assuming $encoding"
240 );
241 }
242
243 DEBUG > 5 and print STDERR "# Parsing line: [$line]\n";
244
245 if(!$self->{'in_pod'}) {
246 if($line =~ m/^=([a-zA-Z][a-zA-Z0-9]*)(?:\s|$)/s) {
247 if($1 eq 'cut') {
248 $self->scream(
249 $self->{'line_count'},
250 "=cut found outside a pod block. Skipping to next block."
251 );
252
253 ## Before there were errata sections in the world, it was
254 ## least-pessimal to abort processing the file. But now we can
255 ## just barrel on thru (but still not start a pod block).
256 #splice @_;
257 #push @_, undef;
258
259 next;
260 } else {
261 $self->{'in_pod'} = $self->{'start_of_pod_block'}
262 = $self->{'last_was_blank'} = 1;
263 # And fall thru to the pod-mode block further down
264 }
265 } else {
266 DEBUG > 5 and print STDERR "# It's a code-line.\n";
267 $code_handler->(map $_, $line, $self->{'line_count'}, $self)
268 if $code_handler;
269 # Note: this may cause code to be processed out of order relative
270 # to pods, but in order relative to cuts.
271
272 # Note also that we haven't yet applied the transcoding to $line
273 # by time we call $code_handler!
274
275 if( $line =~ m/^#\s*line\s+(\d+)\s*(?:\s"([^"]+)")?\s*$/ ) {
276 # That RE is from perlsyn, section "Plain Old Comments (Not!)",
277 #$fname = $2 if defined $2;
278 #DEBUG > 1 and defined $2 and print STDERR "# Setting fname to \"$fname\"\n";
279 DEBUG > 1 and print STDERR "# Setting nextline to $1\n";
280 $self->{'line_count'} = $1 - 1;
281 }
282
283 next;
284 }
285 }
286
287 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
288 # Else we're in pod mode:
289
290 # Apply any necessary transcoding:
291 $self->{'_transcoder'} && $self->{'_transcoder'}->($line);
292
293 # HERE WE CATCH =encoding EARLY!
294 if( $line =~ m/^=encoding\s+\S+\s*$/s ) {
295 next if $self->parse_characters; # Ignore this line
296 $line = $self->_handle_encoding_line( $line );
297 }
298
299 if($line =~ m/^=cut/s) {
300 # here ends the pod block, and therefore the previous pod para
301 DEBUG > 1 and print STDERR "Noting =cut at line ${$self}{'line_count'}\n";
302 $self->{'in_pod'} = 0;
303 # ++$self->{'pod_para_count'};
304 $self->_ponder_paragraph_buffer();
305 # by now it's safe to consider the previous paragraph as done.
306 $cut_handler->(map $_, $line, $self->{'line_count'}, $self)
307 if $cut_handler;
308
309 # TODO: add to docs: Note: this may cause cuts to be processed out
310 # of order relative to pods, but in order relative to code.
311
312 } elsif($line =~ m/^(\s*)$/s) { # it's a blank line
313 if (defined $1 and $1 =~ /[^\S\r\n]/) { # it's a white line
314 $wl_handler->(map $_, $line, $self->{'line_count'}, $self)
315 if $wl_handler;
316 }
317
318 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
319 DEBUG > 1 and print STDERR "Saving blank line at line ${$self}{'line_count'}\n";
320 push @{$paras->[-1]}, $line;
321 } # otherwise it's not interesting
322
323 if(!$self->{'start_of_pod_block'} and !$self->{'last_was_blank'}) {
324 DEBUG > 1 and print STDERR "Noting para ends with blank line at ${$self}{'line_count'}\n";
325 }
326
327 $self->{'last_was_blank'} = 1;
328
329 } elsif($self->{'last_was_blank'}) { # A non-blank line starting a new para...
330
331 if($line =~ m/^(=[a-zA-Z][a-zA-Z0-9]*)(?:\s+|$)(.*)/s) {
332 # THIS IS THE ONE PLACE WHERE WE CONSTRUCT NEW DIRECTIVE OBJECTS
333 my $new = [$1, {'start_line' => $self->{'line_count'}}, $2];
334 # Note that in "=head1 foo", the WS is lost.
335 # Example: ['=head1', {'start_line' => 123}, ' foo']
336
337 ++$self->{'pod_para_count'};
338
339 $self->_ponder_paragraph_buffer();
340 # by now it's safe to consider the previous paragraph as done.
341
342 push @$paras, $new; # the new incipient paragraph
343 DEBUG > 1 and print STDERR "Starting new ${$paras}[-1][0] para at line ${$self}{'line_count'}\n";
344
345 } elsif($line =~ m/^\s/s) {
346
347 if(!$self->{'start_of_pod_block'} and @$paras and $paras->[-1][0] eq '~Verbatim') {
348 DEBUG > 1 and print STDERR "Resuming verbatim para at line ${$self}{'line_count'}\n";
349 push @{$paras->[-1]}, $line;
350 } else {
351 ++$self->{'pod_para_count'};
352 $self->_ponder_paragraph_buffer();
353 # by now it's safe to consider the previous paragraph as done.
354 DEBUG > 1 and print STDERR "Starting verbatim para at line ${$self}{'line_count'}\n";
355 push @$paras, ['~Verbatim', {'start_line' => $self->{'line_count'}}, $line];
356 }
357 } else {
358 ++$self->{'pod_para_count'};
359 $self->_ponder_paragraph_buffer();
360 # by now it's safe to consider the previous paragraph as done.
361 push @$paras, ['~Para', {'start_line' => $self->{'line_count'}}, $line];
362 DEBUG > 1 and print STDERR "Starting plain para at line ${$self}{'line_count'}\n";
363 }
364 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
365
366 } else {
367 # It's a non-blank line /continuing/ the current para
368 if(@$paras) {
369 DEBUG > 2 and print STDERR "Line ${$self}{'line_count'} continues current paragraph\n";
370 push @{$paras->[-1]}, $line;
371 } else {
372 # Unexpected case!
373 die "Continuing a paragraph but \@\$paras is empty?";
374 }
375 $self->{'last_was_blank'} = $self->{'start_of_pod_block'} = 0;
376 }
377
378 } # ends the big while loop
379
380 DEBUG > 1 and print STDERR (pretty(@$paras), "\n");
381 return $self;
382}
383
384#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
385
386sub _handle_encoding_line {
387 my($self, $line) = @_;
388
389 return if $self->parse_characters;
390
391 # The point of this routine is to set $self->{'_transcoder'} as indicated.
392
393 return $line unless $line =~ m/^=encoding\s+(\S+)\s*$/s;
394 DEBUG > 1 and print STDERR "Found an encoding line \"=encoding $1\"\n";
395
396 my $e = $1;
397 my $orig = $e;
398 push @{ $self->{'encoding_command_reqs'} }, "=encoding $orig";
399
400 my $enc_error;
401
402 # Cf. perldoc Encode and perldoc Encode::Supported
403
404 require Pod::Simple::Transcode;
405
406 if( $self->{'encoding'} ) {
407 my $norm_current = $self->{'encoding'};
408 my $norm_e = $e;
409 foreach my $that ($norm_current, $norm_e) {
410 $that = lc($that);
411 $that =~ s/[-_]//g;
412 }
413 if($norm_current eq $norm_e) {
414 DEBUG > 1 and print STDERR "The '=encoding $orig' line is ",
415 "redundant. ($norm_current eq $norm_e). Ignoring.\n";
416 $enc_error = '';
417 # But that doesn't necessarily mean that the earlier one went okay
418 } else {
419 $enc_error = "Encoding is already set to " . $self->{'encoding'};
420 DEBUG > 1 and print STDERR $enc_error;
421 }
422 } elsif (
423 # OK, let's turn on the encoding
424 do {
425 DEBUG > 1 and print STDERR " Setting encoding to $e\n";
426 $self->{'encoding'} = $e;
427 1;
428 }
429 and $e eq 'HACKRAW'
430 ) {
431 DEBUG and print STDERR " Putting in HACKRAW (no-op) encoding mode.\n";
432
433 } elsif( Pod::Simple::Transcode::->encoding_is_available($e) ) {
434
435 die($enc_error = "WHAT? _transcoder is already set?!")
436 if $self->{'_transcoder'}; # should never happen
437 require Pod::Simple::Transcode;
438 $self->{'_transcoder'} = Pod::Simple::Transcode::->make_transcoder($e);
439 eval {
440 my @x = ('', "abc", "123");
441 $self->{'_transcoder'}->(@x);
442 };
443 $@ && die( $enc_error =
444 "Really unexpected error setting up encoding $e: $@\nAborting"
445 );
446 $self->{'detected_encoding'} = $e;
447
448 } else {
449 my @supported = Pod::Simple::Transcode::->all_encodings;
450
451 # Note unsupported, and complain
452 DEBUG and print STDERR " Encoding [$e] is unsupported.",
453 "\nSupporteds: @supported\n";
454 my $suggestion = '';
455
456 # Look for a near match:
457 my $norm = lc($e);
458 $norm =~ tr[-_][]d;
459 my $n;
460 foreach my $enc (@supported) {
461 $n = lc($enc);
462 $n =~ tr[-_][]d;
463 next unless $n eq $norm;
464 $suggestion = " (Maybe \"$e\" should be \"$enc\"?)";
465 last;
466 }
467 my $encmodver = Pod::Simple::Transcode::->encmodver;
468 $enc_error = join '' =>
469 "This document probably does not appear as it should, because its ",
470 "\"=encoding $e\" line calls for an unsupported encoding.",
471 $suggestion, " [$encmodver\'s supported encodings are: @supported]"
472 ;
473
474 $self->scream( $self->{'line_count'}, $enc_error );
475 }
476 push @{ $self->{'encoding_command_statuses'} }, $enc_error;
477 if (defined($self->{'_processed_encoding'})) {
478 # Double declaration.
479 $self->scream( $self->{'line_count'}, 'Cannot have multiple =encoding directives');
480 }
481 $self->{'_processed_encoding'} = $orig;
482
483 return $line;
484}
485
486# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
487
488sub _handle_encoding_second_level {
489 # By time this is called, the encoding (if well formed) will already
490 # have been acted one.
491 my($self, $para) = @_;
492 my @x = @$para;
493 my $content = join ' ', splice @x, 2;
494 $content =~ s/^\s+//s;
495 $content =~ s/\s+$//s;
496
497 DEBUG > 2 and print STDERR "Ogling encoding directive: =encoding $content\n";
498
499 if (defined($self->{'_processed_encoding'})) {
500 #if($content ne $self->{'_processed_encoding'}) {
501 # Could it happen?
502 #}
503 delete $self->{'_processed_encoding'};
504 # It's already been handled. Check for errors.
505 if(! $self->{'encoding_command_statuses'} ) {
506 DEBUG > 2 and print STDERR " CRAZY ERROR: It wasn't really handled?!\n";
507 } elsif( $self->{'encoding_command_statuses'}[-1] ) {
508 $self->whine( $para->[1]{'start_line'},
509 sprintf "Couldn't do %s: %s",
510 $self->{'encoding_command_reqs' }[-1],
511 $self->{'encoding_command_statuses'}[-1],
512 );
513 } else {
514 DEBUG > 2 and print STDERR " (Yup, it was successfully handled already.)\n";
515 }
516
517 } else {
518 # Otherwise it's a syntax error
519 $self->whine( $para->[1]{'start_line'},
520 "Invalid =encoding syntax: $content"
521 );
522 }
523
524 return;
525}
526
527#~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`~`
528
529{
53012µsmy $m = -321; # magic line number
531
532sub _gen_errata {
533 my $self = $_[0];
534 # Return 0 or more fake-o paragraphs explaining the accumulated
535 # errors on this document.
536
537 return() unless $self->{'errata'} and keys %{$self->{'errata'}};
538
539 my @out;
540
541 foreach my $line (sort {$a <=> $b} keys %{$self->{'errata'}}) {
542 push @out,
543 ['=item', {'start_line' => $m}, "Around line $line:"],
544 map( ['~Para', {'start_line' => $m, '~cooked' => 1},
545 #['~Top', {'start_line' => $m},
546 $_
547 #]
548 ],
549 @{$self->{'errata'}{$line}}
550 )
551 ;
552 }
553
554 # TODO: report of unknown entities? unrenderable characters?
555
556 unshift @out,
557 ['=head1', {'start_line' => $m, 'errata' => 1}, 'POD ERRORS'],
558 ['~Para', {'start_line' => $m, '~cooked' => 1, 'errata' => 1},
559 "Hey! ",
560 ['B', {},
561 'The above document had some coding errors, which are explained below:'
562 ]
563 ],
564 ['=over', {'start_line' => $m, 'errata' => 1}, ''],
565 ;
566
567 push @out,
568 ['=back', {'start_line' => $m, 'errata' => 1}, ''],
569 ;
570
571 DEBUG and print STDERR "\n<<\n", pretty(\@out), "\n>>\n\n";
572
573 return @out;
574}
575
576}
577
578#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
579
580##############################################################################
581##
582## stop reading now stop reading now stop reading now stop reading now stop
583##
584## HERE IT BECOMES REALLY SCARY
585##
586## stop reading now stop reading now stop reading now stop reading now stop
587##
588##############################################################################
589
59013µssub _ponder_paragraph_buffer {
591
592 # Para-token types as found in the buffer.
593 # ~Verbatim, ~Para, ~end, =head1..4, =for, =begin, =end,
594 # =over, =back, =item
595 # and the null =pod (to be complained about if over one line)
596 #
597 # "~data" paragraphs are something we generate at this level, depending on
598 # a currently open =over region
599
600 # Events fired: Begin and end for:
601 # directivename (like head1 .. head4), item, extend,
602 # for (from =begin...=end, =for),
603 # over-bullet, over-number, over-text, over-block,
604 # item-bullet, item-number, item-text,
605 # Document,
606 # Data, Para, Verbatim
607 # B, C, longdirname (TODO -- wha?), etc. for all directives
608 #
609
610 my $self = $_[0];
611 my $paras;
612 return unless @{$paras = $self->{'paras'}};
613 my $curr_open = ($self->{'curr_open'} ||= []);
614
615 my $scratch;
616
617 DEBUG > 10 and print STDERR "# Paragraph buffer: <<", pretty($paras), ">>\n";
618
619 # We have something in our buffer. So apparently the document has started.
620 unless($self->{'doc_has_started'}) {
621 $self->{'doc_has_started'} = 1;
622
623 my $starting_contentless;
624 $starting_contentless =
625 (
626 !@$curr_open
627 and @$paras and ! grep $_->[0] ne '~end', @$paras
628 # i.e., if the paras is all ~ends
629 )
630 ;
631 DEBUG and print STDERR "# Starting ",
632 $starting_contentless ? 'contentless' : 'contentful',
633 " document\n"
634 ;
635
636 $self->_handle_element_start(
637 ($scratch = 'Document'),
638 {
639 'start_line' => $paras->[0][1]{'start_line'},
640 $starting_contentless ? ( 'contentless' => 1 ) : (),
641 },
642 );
643 }
644
645 my($para, $para_type);
646 while(@$paras) {
647 last if @$paras == 1 and
648 ( $paras->[0][0] eq '=over' or $paras->[0][0] eq '~Verbatim'
649 or $paras->[0][0] eq '=item' )
650 ;
651 # Those're the three kinds of paragraphs that require lookahead.
652 # Actually, an "=item Foo" inside an <over type=text> region
653 # and any =item inside an <over type=block> region (rare)
654 # don't require any lookahead, but all others (bullets
655 # and numbers) do.
656
657# TODO: whinge about many kinds of directives in non-resolving =for regions?
658# TODO: many? like what? =head1 etc?
659
660 $para = shift @$paras;
661 $para_type = $para->[0];
662
663 DEBUG > 1 and print STDERR "Pondering a $para_type paragraph, given the stack: (",
664 $self->_dump_curr_open(), ")\n";
665
666 if($para_type eq '=for') {
667 next if $self->_ponder_for($para,$curr_open,$paras);
668
669 } elsif($para_type eq '=begin') {
670 next if $self->_ponder_begin($para,$curr_open,$paras);
671
672 } elsif($para_type eq '=end') {
673 next if $self->_ponder_end($para,$curr_open,$paras);
674
675 } elsif($para_type eq '~end') { # The virtual end-document signal
676 next if $self->_ponder_doc_end($para,$curr_open,$paras);
677 }
678
679
680 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
681 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
682 if(grep $_->[1]{'~ignore'}, @$curr_open) {
683 DEBUG > 1 and
684 print STDERR "Skipping $para_type paragraph because in ignore mode.\n";
685 next;
686 }
687 #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
688 # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
689
690 if($para_type eq '=pod') {
691 $self->_ponder_pod($para,$curr_open,$paras);
692
693 } elsif($para_type eq '=over') {
694 next if $self->_ponder_over($para,$curr_open,$paras);
695
696 } elsif($para_type eq '=back') {
697 next if $self->_ponder_back($para,$curr_open,$paras);
698
699 } else {
700
701 # All non-magical codes!!!
702
703 # Here we start using $para_type for our own twisted purposes, to
704 # mean how it should get treated, not as what the element name
705 # should be.
706
707 DEBUG > 1 and print STDERR "Pondering non-magical $para_type\n";
708
709 my $i;
710
711 # Enforce some =headN discipline
712 if($para_type =~ m/^=head\d$/s
713 and ! $self->{'accept_heads_anywhere'}
714 and @$curr_open
715 and $curr_open->[-1][0] eq '=over'
716 ) {
717 DEBUG > 2 and print STDERR "'=$para_type' inside an '=over'!\n";
718 $self->whine(
719 $para->[1]{'start_line'},
720 "You forgot a '=back' before '$para_type'"
721 );
722 unshift @$paras, ['=back', {}, ''], $para; # close the =over
723 next;
724 }
725
726
727 if($para_type eq '=item') {
728
729 my $over;
730 unless(@$curr_open and
731 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
732 $self->whine(
733 $para->[1]{'start_line'},
734 "'=item' outside of any '=over'"
735 );
736 unshift @$paras,
737 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
738 $para
739 ;
740 next;
741 }
742
743
744 my $over_type = $over->[1]{'~type'};
745
746 if(!$over_type) {
747 # Shouldn't happen1
748 die "Typeless over in stack, starting at line "
749 . $over->[1]{'start_line'};
750
751 } elsif($over_type eq 'block') {
752 unless($curr_open->[-1][1]{'~bitched_about'}) {
753 $curr_open->[-1][1]{'~bitched_about'} = 1;
754 $self->whine(
755 $curr_open->[-1][1]{'start_line'},
756 "You can't have =items (as at line "
757 . $para->[1]{'start_line'}
758 . ") unless the first thing after the =over is an =item"
759 );
760 }
761 # Just turn it into a paragraph and reconsider it
762 $para->[0] = '~Para';
763 unshift @$paras, $para;
764 next;
765
766 } elsif($over_type eq 'text') {
767 my $item_type = $self->_get_item_type($para);
768 # That kills the content of the item if it's a number or bullet.
769 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
770
771 if($item_type eq 'text') {
772 # Nothing special needs doing for 'text'
773 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
774 $self->whine(
775 $para->[1]{'start_line'},
776 "Expected text after =item, not a $item_type"
777 );
778 # Undo our clobbering:
779 push @$para, $para->[1]{'~orig_content'};
780 delete $para->[1]{'number'};
781 # Only a PROPER item-number element is allowed
782 # to have a number attribute.
783 } else {
784 die "Unhandled item type $item_type"; # should never happen
785 }
786
787 # =item-text thingies don't need any assimilation, it seems.
788
789 } elsif($over_type eq 'number') {
790 my $item_type = $self->_get_item_type($para);
791 # That kills the content of the item if it's a number or bullet.
792 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
793
794 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
795
796 if($item_type eq 'bullet') {
797 # Hm, it's not numeric. Correct for this.
798 $para->[1]{'number'} = $expected_value;
799 $self->whine(
800 $para->[1]{'start_line'},
801 "Expected '=item $expected_value'"
802 );
803 push @$para, $para->[1]{'~orig_content'};
804 # restore the bullet, blocking the assimilation of next para
805
806 } elsif($item_type eq 'text') {
807 # Hm, it's not numeric. Correct for this.
808 $para->[1]{'number'} = $expected_value;
809 $self->whine(
810 $para->[1]{'start_line'},
811 "Expected '=item $expected_value'"
812 );
813 # Text content will still be there and will block next ~Para
814
815 } elsif($item_type ne 'number') {
816 die "Unknown item type $item_type"; # should never happen
817
818 } elsif($expected_value == $para->[1]{'number'}) {
819 DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
820
821 } else {
822 DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
823 " instead of the expected value of $expected_value\n";
824 $self->whine(
825 $para->[1]{'start_line'},
826 "You have '=item " . $para->[1]{'number'} .
827 "' instead of the expected '=item $expected_value'"
828 );
829 $para->[1]{'number'} = $expected_value; # correcting!!
830 }
831
832 if(@$para == 2) {
833 # For the cases where we /didn't/ push to @$para
834 if($paras->[0][0] eq '~Para') {
835 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
836 push @$para, splice @{shift @$paras},2;
837 } else {
838 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
839 push @$para, ''; # Just so it's not contentless
840 }
841 }
842
843
844 } elsif($over_type eq 'bullet') {
845 my $item_type = $self->_get_item_type($para);
846 # That kills the content of the item if it's a number or bullet.
847 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
848
849 if($item_type eq 'bullet') {
850 # as expected!
851
852 if( $para->[1]{'~_freaky_para_hack'} ) {
853 DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
854 push @$para, delete $para->[1]{'~_freaky_para_hack'};
855 }
856
857 } elsif($item_type eq 'number') {
858 $self->whine(
859 $para->[1]{'start_line'},
860 "Expected '=item *'"
861 );
862 push @$para, $para->[1]{'~orig_content'};
863 # and block assimilation of the next paragraph
864 delete $para->[1]{'number'};
865 # Only a PROPER item-number element is allowed
866 # to have a number attribute.
867 } elsif($item_type eq 'text') {
868 $self->whine(
869 $para->[1]{'start_line'},
870 "Expected '=item *'"
871 );
872 # But doesn't need processing. But it'll block assimilation
873 # of the next para.
874 } else {
875 die "Unhandled item type $item_type"; # should never happen
876 }
877
878 if(@$para == 2) {
879 # For the cases where we /didn't/ push to @$para
880 if($paras->[0][0] eq '~Para') {
881 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
882 push @$para, splice @{shift @$paras},2;
883 } else {
884 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
885 push @$para, ''; # Just so it's not contentless
886 }
887 }
888
889 } else {
890 die "Unhandled =over type \"$over_type\"?";
891 # Shouldn't happen!
892 }
893
894 $para_type = 'Plain';
895 $para->[0] .= '-' . $over_type;
896 # Whew. Now fall thru and process it.
897
898
899 } elsif($para_type eq '=extend') {
900 # Well, might as well implement it here.
901 $self->_ponder_extend($para);
902 next; # and skip
903 } elsif($para_type eq '=encoding') {
904 # Not actually acted on here, but we catch errors here.
905 $self->_handle_encoding_second_level($para);
906 next unless $self->keep_encoding_directive;
907 $para_type = 'Plain';
908 } elsif($para_type eq '~Verbatim') {
909 $para->[0] = 'Verbatim';
910 $para_type = '?Verbatim';
911 } elsif($para_type eq '~Para') {
912 $para->[0] = 'Para';
913 $para_type = '?Plain';
914 } elsif($para_type eq 'Data') {
915 $para->[0] = 'Data';
916 $para_type = '?Data';
917 } elsif( $para_type =~ s/^=//s
918 and defined( $para_type = $self->{'accept_directives'}{$para_type} )
919 ) {
920 DEBUG > 1 and print STDERR " Pondering known directive ${$para}[0] as $para_type\n";
921 } else {
922 # An unknown directive!
923 DEBUG > 1 and printf STDERR "Unhandled directive %s (Handled: %s)\n",
924 $para->[0], join(' ', sort keys %{$self->{'accept_directives'}} )
925 ;
926 $self->whine(
927 $para->[1]{'start_line'},
928 "Unknown directive: $para->[0]"
929 );
930
931 # And maybe treat it as text instead of just letting it go?
932 next;
933 }
934
935 if($para_type =~ s/^\?//s) {
936 if(! @$curr_open) { # usual case
937 DEBUG and print STDERR "Treating $para_type paragraph as such because stack is empty.\n";
938 } else {
939 my @fors = grep $_->[0] eq '=for', @$curr_open;
940 DEBUG > 1 and print STDERR "Containing fors: ",
941 join(',', map $_->[1]{'target'}, @fors), "\n";
942
943 if(! @fors) {
944 DEBUG and print STDERR "Treating $para_type paragraph as such because stack has no =for's\n";
945
946 #} elsif(grep $_->[1]{'~resolve'}, @fors) {
947 #} elsif(not grep !$_->[1]{'~resolve'}, @fors) {
948 } elsif( $fors[-1][1]{'~resolve'} ) {
949 # Look to the immediately containing for
950
951 if($para_type eq 'Data') {
952 DEBUG and print STDERR "Treating Data paragraph as Plain/Verbatim because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
953 $para->[0] = 'Para';
954 $para_type = 'Plain';
955 } else {
956 DEBUG and print STDERR "Treating $para_type paragraph as such because the containing =for ($fors[-1][1]{'target'}) is a resolver\n";
957 }
958 } else {
959 DEBUG and print STDERR "Treating $para_type paragraph as Data because the containing =for ($fors[-1][1]{'target'}) is a non-resolver\n";
960 $para->[0] = $para_type = 'Data';
961 }
962 }
963 }
964
965 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
966 if($para_type eq 'Plain') {
967 $self->_ponder_Plain($para);
968 } elsif($para_type eq 'Verbatim') {
969 $self->_ponder_Verbatim($para);
970 } elsif($para_type eq 'Data') {
971 $self->_ponder_Data($para);
972 } else {
973 die "\$para type is $para_type -- how did that happen?";
974 # Shouldn't happen.
975 }
976
977 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
978 $para->[0] =~ s/^[~=]//s;
979
980 DEBUG and print STDERR "\n", pretty($para), "\n";
981
982 # traverse the treelet (which might well be just one string scalar)
983 $self->{'content_seen'} ||= 1;
984 $self->_traverse_treelet_bit(@$para);
985 }
986 }
987
988 return;
989}
990
991###########################################################################
992# The sub-ponderers...
993
- -
996sub _ponder_for {
997 my ($self,$para,$curr_open,$paras) = @_;
998
999 # Fake it out as a begin/end
1000 my $target;
1001
1002 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1003 DEBUG > 1 and print STDERR "Ignoring ignorable =for\n";
1004 return 1;
1005 }
1006
1007 for(my $i = 2; $i < @$para; ++$i) {
1008 if($para->[$i] =~ s/^\s*(\S+)\s*//s) {
1009 $target = $1;
1010 last;
1011 }
1012 }
1013 unless(defined $target) {
1014 $self->whine(
1015 $para->[1]{'start_line'},
1016 "=for without a target?"
1017 );
1018 return 1;
1019 }
1020 DEBUG > 1 and
1021 print STDERR "Faking out a =for $target as a =begin $target / =end $target\n";
1022
1023 $para->[0] = 'Data';
1024
1025 unshift @$paras,
1026 ['=begin',
1027 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1028 $target,
1029 ],
1030 $para,
1031 ['=end',
1032 {'start_line' => $para->[1]{'start_line'}, '~really' => '=for'},
1033 $target,
1034 ],
1035 ;
1036
1037 return 1;
1038}
1039
1040sub _ponder_begin {
1041 my ($self,$para,$curr_open,$paras) = @_;
1042 my $content = join ' ', splice @$para, 2;
1043 $content =~ s/^\s+//s;
1044 $content =~ s/\s+$//s;
1045 unless(length($content)) {
1046 $self->whine(
1047 $para->[1]{'start_line'},
1048 "=begin without a target?"
1049 );
1050 DEBUG and print STDERR "Ignoring targetless =begin\n";
1051 return 1;
1052 }
1053
1054 my ($target, $title) = $content =~ m/^(\S+)\s*(.*)$/;
1055 $para->[1]{'title'} = $title if ($title);
1056 $para->[1]{'target'} = $target; # without any ':'
1057 $content = $target; # strip off the title
1058
1059 $content =~ s/^:!/!:/s;
1060 my $neg; # whether this is a negation-match
1061 $neg = 1 if $content =~ s/^!//s;
1062 my $to_resolve; # whether to process formatting codes
1063 $to_resolve = 1 if $content =~ s/^://s;
1064
1065 my $dont_ignore; # whether this target matches us
1066
1067 foreach my $target_name (
1068 split(',', $content, -1),
1069 $neg ? () : '*'
1070 ) {
1071 DEBUG > 2 and
1072 print STDERR " Considering whether =begin $content matches $target_name\n";
1073 next unless $self->{'accept_targets'}{$target_name};
1074
1075 DEBUG > 2 and
1076 print STDERR " It DOES match the acceptable target $target_name!\n";
1077 $to_resolve = 1
1078 if $self->{'accept_targets'}{$target_name} eq 'force_resolve';
1079 $dont_ignore = 1;
1080 $para->[1]{'target_matching'} = $target_name;
1081 last; # stop looking at other target names
1082 }
1083
1084 if($neg) {
1085 if( $dont_ignore ) {
1086 $dont_ignore = '';
1087 delete $para->[1]{'target_matching'};
1088 DEBUG > 2 and print STDERR " But the leading ! means that this is a NON-match!\n";
1089 } else {
1090 $dont_ignore = 1;
1091 $para->[1]{'target_matching'} = '!';
1092 DEBUG > 2 and print STDERR " But the leading ! means that this IS a match!\n";
1093 }
1094 }
1095
1096 $para->[0] = '=for'; # Just what we happen to call these, internally
1097 $para->[1]{'~really'} ||= '=begin';
1098 $para->[1]{'~ignore'} = (! $dont_ignore) || 0;
1099 $para->[1]{'~resolve'} = $to_resolve || 0;
1100
1101 DEBUG > 1 and print STDERR " Making note to ", $dont_ignore ? 'not ' : '',
1102 "ignore contents of this region\n";
1103 DEBUG > 1 and $dont_ignore and print STDERR " Making note to treat contents as ",
1104 ($to_resolve ? 'verbatim/plain' : 'data'), " paragraphs\n";
1105 DEBUG > 1 and print STDERR " (Stack now: ", $self->_dump_curr_open(), ")\n";
1106
1107 push @$curr_open, $para;
1108 if(!$dont_ignore or scalar grep $_->[1]{'~ignore'}, @$curr_open) {
1109 DEBUG > 1 and print STDERR "Ignoring ignorable =begin\n";
1110 } else {
1111 $self->{'content_seen'} ||= 1;
1112 $self->_handle_element_start((my $scratch='for'), $para->[1]);
1113 }
1114
1115 return 1;
1116}
1117
1118sub _ponder_end {
1119 my ($self,$para,$curr_open,$paras) = @_;
1120 my $content = join ' ', splice @$para, 2;
1121 $content =~ s/^\s+//s;
1122 $content =~ s/\s+$//s;
1123 DEBUG and print STDERR "Ogling '=end $content' directive\n";
1124
1125 unless(length($content)) {
1126 $self->whine(
1127 $para->[1]{'start_line'},
1128 "'=end' without a target?" . (
1129 ( @$curr_open and $curr_open->[-1][0] eq '=for' )
1130 ? ( " (Should be \"=end " . $curr_open->[-1][1]{'target'} . '")' )
1131 : ''
1132 )
1133 );
1134 DEBUG and print STDERR "Ignoring targetless =end\n";
1135 return 1;
1136 }
1137
1138 unless($content =~ m/^\S+$/) { # i.e., unless it's one word
1139 $self->whine(
1140 $para->[1]{'start_line'},
1141 "'=end $content' is invalid. (Stack: "
1142 . $self->_dump_curr_open() . ')'
1143 );
1144 DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1145 return 1;
1146 }
1147
1148 unless(@$curr_open and $curr_open->[-1][0] eq '=for') {
1149 $self->whine(
1150 $para->[1]{'start_line'},
1151 "=end $content without matching =begin. (Stack: "
1152 . $self->_dump_curr_open() . ')'
1153 );
1154 DEBUG and print STDERR "Ignoring mistargetted =end $content\n";
1155 return 1;
1156 }
1157
1158 unless($content eq $curr_open->[-1][1]{'target'}) {
1159 $self->whine(
1160 $para->[1]{'start_line'},
1161 "=end $content doesn't match =begin "
1162 . $curr_open->[-1][1]{'target'}
1163 . ". (Stack: "
1164 . $self->_dump_curr_open() . ')'
1165 );
1166 DEBUG and print STDERR "Ignoring mistargetted =end $content at line $para->[1]{'start_line'}\n";
1167 return 1;
1168 }
1169
1170 # Else it's okay to close...
1171 if(grep $_->[1]{'~ignore'}, @$curr_open) {
1172 DEBUG > 1 and print STDERR "Not firing any event for this =end $content because in an ignored region\n";
1173 # And that may be because of this to-be-closed =for region, or some
1174 # other one, but it doesn't matter.
1175 } else {
1176 $curr_open->[-1][1]{'start_line'} = $para->[1]{'start_line'};
1177 # what's that for?
1178
1179 $self->{'content_seen'} ||= 1;
1180 $self->_handle_element_end( my $scratch = 'for', $para->[1]);
1181 }
1182 DEBUG > 1 and print STDERR "Popping $curr_open->[-1][0] $curr_open->[-1][1]{'target'} because of =end $content\n";
1183 pop @$curr_open;
1184
1185 return 1;
1186}
1187
1188sub _ponder_doc_end {
1189 my ($self,$para,$curr_open,$paras) = @_;
1190 if(@$curr_open) { # Deal with things left open
1191 DEBUG and print STDERR "Stack is nonempty at end-document: (",
1192 $self->_dump_curr_open(), ")\n";
1193
1194 DEBUG > 9 and print STDERR "Stack: ", pretty($curr_open), "\n";
1195 unshift @$paras, $self->_closers_for_all_curr_open;
1196 # Make sure there is exactly one ~end in the parastack, at the end:
1197 @$paras = grep $_->[0] ne '~end', @$paras;
1198 push @$paras, $para, $para;
1199 # We need two -- once for the next cycle where we
1200 # generate errata, and then another to be at the end
1201 # when that loop back around to process the errata.
1202 return 1;
1203
1204 } else {
1205 DEBUG and print STDERR "Okay, stack is empty now.\n";
1206 }
1207
1208 # Try generating errata section, if applicable
1209 unless($self->{'~tried_gen_errata'}) {
1210 $self->{'~tried_gen_errata'} = 1;
1211 my @extras = $self->_gen_errata();
1212 if(@extras) {
1213 unshift @$paras, @extras;
1214 DEBUG and print STDERR "Generated errata... relooping...\n";
1215 return 1; # I.e., loop around again to process these fake-o paragraphs
1216 }
1217 }
1218
1219 splice @$paras; # Well, that's that for this paragraph buffer.
1220 DEBUG and print STDERR "Throwing end-document event.\n";
1221
1222 $self->_handle_element_end( my $scratch = 'Document' );
1223 return 1; # Hasta la byebye
1224}
1225
1226sub _ponder_pod {
1227 my ($self,$para,$curr_open,$paras) = @_;
1228 $self->whine(
1229 $para->[1]{'start_line'},
1230 "=pod directives shouldn't be over one line long! Ignoring all "
1231 . (@$para - 2) . " lines of content"
1232 ) if @$para > 3;
1233
1234 # Content ignored unless 'pod_handler' is set
1235 if (my $pod_handler = $self->{'pod_handler'}) {
1236 my ($line_num, $line) = map $_, $para->[1]{'start_line'}, $para->[2];
1237 $line = $line eq '' ? "=pod" : "=pod $line"; # imitate cut_handler output
1238 $pod_handler->($line, $line_num, $self);
1239 }
1240
1241 # The surrounding methods set content_seen, so let us remain consistent.
1242 # I do not know why it was not here before -- should it not be here?
1243 # $self->{'content_seen'} ||= 1;
1244
1245 return;
1246}
1247
1248sub _ponder_over {
1249 my ($self,$para,$curr_open,$paras) = @_;
1250 return 1 unless @$paras;
1251 my $list_type;
1252
1253 if($paras->[0][0] eq '=item') { # most common case
1254 $list_type = $self->_get_initial_item_type($paras->[0]);
1255
1256 } elsif($paras->[0][0] eq '=back') {
1257 # Ignore empty lists by default
1258 if ($self->{'parse_empty_lists'}) {
1259 $list_type = 'empty';
1260 } else {
1261 shift @$paras;
1262 return 1;
1263 }
1264 } elsif($paras->[0][0] eq '~end') {
1265 $self->whine(
1266 $para->[1]{'start_line'},
1267 "=over is the last thing in the document?!"
1268 );
1269 return 1; # But feh, ignore it.
1270 } else {
1271 $list_type = 'block';
1272 }
1273 $para->[1]{'~type'} = $list_type;
1274 push @$curr_open, $para;
1275 # yes, we reuse the paragraph as a stack item
1276
1277 my $content = join ' ', splice @$para, 2;
1278 my $overness;
1279 if($content =~ m/^\s*$/s) {
1280 $para->[1]{'indent'} = 4;
1281 } elsif($content =~ m/^\s*((?:\d*\.)?\d+)\s*$/s) {
128228.21ms252µs
# spent 43µs (34+10) within Pod::Simple::BlackBox::BEGIN@1282 which was called: # once (34µs+10µs) by Pod::Simple::LinkSection::BEGIN@9 at line 1282
no integer;
# spent 43µs making 1 call to Pod::Simple::BlackBox::BEGIN@1282 # spent 10µs making 1 call to integer::unimport
1283 $para->[1]{'indent'} = $1;
1284 if($1 == 0) {
1285 $self->whine(
1286 $para->[1]{'start_line'},
1287 "Can't have a 0 in =over $content"
1288 );
1289 $para->[1]{'indent'} = 4;
1290 }
1291 } else {
1292 $self->whine(
1293 $para->[1]{'start_line'},
1294 "=over should be: '=over' or '=over positive_number'"
1295 );
1296 $para->[1]{'indent'} = 4;
1297 }
1298 DEBUG > 1 and print STDERR "=over found of type $list_type\n";
1299
1300 $self->{'content_seen'} ||= 1;
1301 $self->_handle_element_start((my $scratch = 'over-' . $list_type), $para->[1]);
1302
1303 return;
1304}
1305
1306sub _ponder_back {
1307 my ($self,$para,$curr_open,$paras) = @_;
1308 # TODO: fire off </item-number> or </item-bullet> or </item-text> ??
1309
1310 my $content = join ' ', splice @$para, 2;
1311 if($content =~ m/\S/) {
1312 $self->whine(
1313 $para->[1]{'start_line'},
1314 "=back doesn't take any parameters, but you said =back $content"
1315 );
1316 }
1317
1318 if(@$curr_open and $curr_open->[-1][0] eq '=over') {
1319 DEBUG > 1 and print STDERR "=back happily closes matching =over\n";
1320 # Expected case: we're closing the most recently opened thing
1321 #my $over = pop @$curr_open;
1322 $self->{'content_seen'} ||= 1;
1323 $self->_handle_element_end( my $scratch =
1324 'over-' . ( (pop @$curr_open)->[1]{'~type'} ), $para->[1]
1325 );
1326 } else {
1327 DEBUG > 1 and print STDERR "=back found without a matching =over. Stack: (",
1328 join(', ', map $_->[0], @$curr_open), ").\n";
1329 $self->whine(
1330 $para->[1]{'start_line'},
1331 '=back without =over'
1332 );
1333 return 1; # and ignore it
1334 }
1335}
1336
1337sub _ponder_item {
1338 my ($self,$para,$curr_open,$paras) = @_;
1339 my $over;
1340 unless(@$curr_open and
1341 $over = (grep { $_->[0] eq '=over' } @$curr_open)[-1]) {
1342 $self->whine(
1343 $para->[1]{'start_line'},
1344 "'=item' outside of any '=over'"
1345 );
1346 unshift @$paras,
1347 ['=over', {'start_line' => $para->[1]{'start_line'}}, ''],
1348 $para
1349 ;
1350 return 1;
1351 }
1352
1353
1354 my $over_type = $over->[1]{'~type'};
1355
1356 if(!$over_type) {
1357 # Shouldn't happen1
1358 die "Typeless over in stack, starting at line "
1359 . $over->[1]{'start_line'};
1360
1361 } elsif($over_type eq 'block') {
1362 unless($curr_open->[-1][1]{'~bitched_about'}) {
1363 $curr_open->[-1][1]{'~bitched_about'} = 1;
1364 $self->whine(
1365 $curr_open->[-1][1]{'start_line'},
1366 "You can't have =items (as at line "
1367 . $para->[1]{'start_line'}
1368 . ") unless the first thing after the =over is an =item"
1369 );
1370 }
1371 # Just turn it into a paragraph and reconsider it
1372 $para->[0] = '~Para';
1373 unshift @$paras, $para;
1374 return 1;
1375
1376 } elsif($over_type eq 'text') {
1377 my $item_type = $self->_get_item_type($para);
1378 # That kills the content of the item if it's a number or bullet.
1379 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1380
1381 if($item_type eq 'text') {
1382 # Nothing special needs doing for 'text'
1383 } elsif($item_type eq 'number' or $item_type eq 'bullet') {
1384 $self->whine(
1385 $para->[1]{'start_line'},
1386 "Expected text after =item, not a $item_type"
1387 );
1388 # Undo our clobbering:
1389 push @$para, $para->[1]{'~orig_content'};
1390 delete $para->[1]{'number'};
1391 # Only a PROPER item-number element is allowed
1392 # to have a number attribute.
1393 } else {
1394 die "Unhandled item type $item_type"; # should never happen
1395 }
1396
1397 # =item-text thingies don't need any assimilation, it seems.
1398
1399 } elsif($over_type eq 'number') {
1400 my $item_type = $self->_get_item_type($para);
1401 # That kills the content of the item if it's a number or bullet.
1402 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1403
1404 my $expected_value = ++ $curr_open->[-1][1]{'~counter'};
1405
1406 if($item_type eq 'bullet') {
1407 # Hm, it's not numeric. Correct for this.
1408 $para->[1]{'number'} = $expected_value;
1409 $self->whine(
1410 $para->[1]{'start_line'},
1411 "Expected '=item $expected_value'"
1412 );
1413 push @$para, $para->[1]{'~orig_content'};
1414 # restore the bullet, blocking the assimilation of next para
1415
1416 } elsif($item_type eq 'text') {
1417 # Hm, it's not numeric. Correct for this.
1418 $para->[1]{'number'} = $expected_value;
1419 $self->whine(
1420 $para->[1]{'start_line'},
1421 "Expected '=item $expected_value'"
1422 );
1423 # Text content will still be there and will block next ~Para
1424
1425 } elsif($item_type ne 'number') {
1426 die "Unknown item type $item_type"; # should never happen
1427
1428 } elsif($expected_value == $para->[1]{'number'}) {
1429 DEBUG > 1 and print STDERR " Numeric item has the expected value of $expected_value\n";
1430
1431 } else {
1432 DEBUG > 1 and print STDERR " Numeric item has ", $para->[1]{'number'},
1433 " instead of the expected value of $expected_value\n";
1434 $self->whine(
1435 $para->[1]{'start_line'},
1436 "You have '=item " . $para->[1]{'number'} .
1437 "' instead of the expected '=item $expected_value'"
1438 );
1439 $para->[1]{'number'} = $expected_value; # correcting!!
1440 }
1441
1442 if(@$para == 2) {
1443 # For the cases where we /didn't/ push to @$para
1444 if($paras->[0][0] eq '~Para') {
1445 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1446 push @$para, splice @{shift @$paras},2;
1447 } else {
1448 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1449 push @$para, ''; # Just so it's not contentless
1450 }
1451 }
1452
1453
1454 } elsif($over_type eq 'bullet') {
1455 my $item_type = $self->_get_item_type($para);
1456 # That kills the content of the item if it's a number or bullet.
1457 DEBUG and print STDERR " Item is of type ", $para->[0], " under $over_type\n";
1458
1459 if($item_type eq 'bullet') {
1460 # as expected!
1461
1462 if( $para->[1]{'~_freaky_para_hack'} ) {
1463 DEBUG and print STDERR "Accomodating '=item * Foo' tolerance hack.\n";
1464 push @$para, delete $para->[1]{'~_freaky_para_hack'};
1465 }
1466
1467 } elsif($item_type eq 'number') {
1468 $self->whine(
1469 $para->[1]{'start_line'},
1470 "Expected '=item *'"
1471 );
1472 push @$para, $para->[1]{'~orig_content'};
1473 # and block assimilation of the next paragraph
1474 delete $para->[1]{'number'};
1475 # Only a PROPER item-number element is allowed
1476 # to have a number attribute.
1477 } elsif($item_type eq 'text') {
1478 $self->whine(
1479 $para->[1]{'start_line'},
1480 "Expected '=item *'"
1481 );
1482 # But doesn't need processing. But it'll block assimilation
1483 # of the next para.
1484 } else {
1485 die "Unhandled item type $item_type"; # should never happen
1486 }
1487
1488 if(@$para == 2) {
1489 # For the cases where we /didn't/ push to @$para
1490 if($paras->[0][0] eq '~Para') {
1491 DEBUG and print STDERR "Assimilating following ~Para content into $over_type item\n";
1492 push @$para, splice @{shift @$paras},2;
1493 } else {
1494 DEBUG and print STDERR "Can't assimilate following ", $paras->[0][0], "\n";
1495 push @$para, ''; # Just so it's not contentless
1496 }
1497 }
1498
1499 } else {
1500 die "Unhandled =over type \"$over_type\"?";
1501 # Shouldn't happen!
1502 }
1503 $para->[0] .= '-' . $over_type;
1504
1505 return;
1506}
1507
1508sub _ponder_Plain {
1509 my ($self,$para) = @_;
1510 DEBUG and print STDERR " giving plain treatment...\n";
1511 unless( @$para == 2 or ( @$para == 3 and $para->[2] eq '' )
1512 or $para->[1]{'~cooked'}
1513 ) {
1514 push @$para,
1515 @{$self->_make_treelet(
1516 join("\n", splice(@$para, 2)),
1517 $para->[1]{'start_line'}
1518 )};
1519 }
1520 # Empty paragraphs don't need a treelet for any reason I can see.
1521 # And precooked paragraphs already have a treelet.
1522 return;
1523}
1524
1525sub _ponder_Verbatim {
1526 my ($self,$para) = @_;
1527 DEBUG and print STDERR " giving verbatim treatment...\n";
1528
1529 $para->[1]{'xml:space'} = 'preserve';
1530
1531 my $indent = $self->strip_verbatim_indent;
1532 if ($indent && ref $indent eq 'CODE') {
1533 my @shifted = (shift @{$para}, shift @{$para});
1534 $indent = $indent->($para);
1535 unshift @{$para}, @shifted;
1536 }
1537
1538 for(my $i = 2; $i < @$para; $i++) {
1539 foreach my $line ($para->[$i]) { # just for aliasing
1540 # Strip indentation.
1541 $line =~ s/^\Q$indent// if $indent
1542 && !($self->{accept_codes} && $self->{accept_codes}{VerbatimFormatted});
1543 while( $line =~
1544 # Sort of adapted from Text::Tabs -- yes, it's hardwired in that
1545 # tabs are at every EIGHTH column. For portability, it has to be
1546 # one setting everywhere, and 8th wins.
1547 s/^([^\t]*)(\t+)/$1.(" " x ((length($2)<<3)-(length($1)&7)))/e
1548 ) {}
1549
1550 # TODO: whinge about (or otherwise treat) unindented or overlong lines
1551
1552 }
1553 }
1554
1555 # Now the VerbatimFormatted hoodoo...
1556 if( $self->{'accept_codes'} and
1557 $self->{'accept_codes'}{'VerbatimFormatted'}
1558 ) {
1559 while(@$para > 3 and $para->[-1] !~ m/\S/) { pop @$para }
1560 # Kill any number of terminal newlines
1561 $self->_verbatim_format($para);
1562 } elsif ($self->{'codes_in_verbatim'}) {
1563 push @$para,
1564 @{$self->_make_treelet(
1565 join("\n", splice(@$para, 2)),
1566 $para->[1]{'start_line'}, $para->[1]{'xml:space'}
1567 )};
1568 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1569 } else {
1570 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1571 $para->[-1] =~ s/\n+$//s; # Kill any number of terminal newlines
1572 }
1573 return;
1574}
1575
1576sub _ponder_Data {
1577 my ($self,$para) = @_;
1578 DEBUG and print STDERR " giving data treatment...\n";
1579 $para->[1]{'xml:space'} = 'preserve';
1580 push @$para, join "\n", splice(@$para, 2) if @$para > 3;
1581 return;
1582}
1583
- -
1587###########################################################################
1588
1589sub _traverse_treelet_bit { # for use only by the routine above
1590 my($self, $name) = splice @_,0,2;
1591
1592 my $scratch;
1593 $self->_handle_element_start(($scratch=$name), shift @_);
1594
1595 while (@_) {
1596 my $x = shift;
1597 if (ref($x)) {
1598 &_traverse_treelet_bit($self, @$x);
1599 } else {
1600 $x .= shift while @_ && !ref($_[0]);
1601 $self->_handle_text($x);
1602 }
1603 }
1604
1605 $self->_handle_element_end($scratch=$name);
1606 return;
1607}
1608
1609#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1610
1611sub _closers_for_all_curr_open {
1612 my $self = $_[0];
1613 my @closers;
1614 foreach my $still_open (@{ $self->{'curr_open'} || return }) {
1615 my @copy = @$still_open;
1616 $copy[1] = {%{ $copy[1] }};
1617 #$copy[1]{'start_line'} = -1;
1618 if($copy[0] eq '=for') {
1619 $copy[0] = '=end';
1620 } elsif($copy[0] eq '=over') {
1621 $self->whine(
1622 $still_open->[1]{start_line} ,
1623 "=over without closing =back"
1624 );
1625
1626 $copy[0] = '=back';
1627 } else {
1628 die "I don't know how to auto-close an open $copy[0] region";
1629 }
1630
1631 unless( @copy > 2 ) {
1632 push @copy, $copy[1]{'target'};
1633 $copy[-1] = '' unless defined $copy[-1];
1634 # since =over's don't have targets
1635 }
1636
1637 $copy[1]{'fake-closer'} = 1;
1638
1639 DEBUG and print STDERR "Queuing up fake-o event: ", pretty(\@copy), "\n";
1640 unshift @closers, \@copy;
1641 }
1642 return @closers;
1643}
1644
1645#--------------------------------------------------------------------------
1646
1647sub _verbatim_format {
1648 my($it, $p) = @_;
1649
1650 my $formatting;
1651
1652 for(my $i = 2; $i < @$p; $i++) { # work backwards over the lines
1653 DEBUG and print STDERR "_verbatim_format appends a newline to $i: $p->[$i]\n";
1654 $p->[$i] .= "\n";
1655 # Unlike with simple Verbatim blocks, we don't end up just doing
1656 # a join("\n", ...) on the contents, so we have to append a
1657 # newline to ever line, and then nix the last one later.
1658 }
1659
1660 if( DEBUG > 4 ) {
1661 print STDERR "<<\n";
1662 for(my $i = $#$p; $i >= 2; $i--) { # work backwards over the lines
1663 print STDERR "_verbatim_format $i: $p->[$i]";
1664 }
1665 print STDERR ">>\n";
1666 }
1667
1668 for(my $i = $#$p; $i > 2; $i--) {
1669 # work backwards over the lines, except the first (#2)
1670
1671 #next unless $p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s
1672 # and $p->[$i-1] !~ m{^#:[ \^\/\%]*\n?$}s;
1673 # look at a formatty line preceding a nonformatty one
1674 DEBUG > 5 and print STDERR "Scrutinizing line $i: $$p[$i]\n";
1675 if($p->[$i] =~ m{^#:([ \^\/\%]*)\n?$}s) {
1676 DEBUG > 5 and print STDERR " It's a formatty line. ",
1677 "Peeking at previous line ", $i-1, ": $$p[$i-1]: \n";
1678
1679 if( $p->[$i-1] =~ m{^#:[ \^\/\%]*\n?$}s ) {
1680 DEBUG > 5 and print STDERR " Previous line is formatty! Skipping this one.\n";
1681 next;
1682 } else {
1683 DEBUG > 5 and print STDERR " Previous line is non-formatty! Yay!\n";
1684 }
1685 } else {
1686 DEBUG > 5 and print STDERR " It's not a formatty line. Ignoring\n";
1687 next;
1688 }
1689
1690 # A formatty line has to have #: in the first two columns, and uses
1691 # "^" to mean bold, "/" to mean underline, and "%" to mean bold italic.
1692 # Example:
1693 # What do you want? i like pie. [or whatever]
1694 # #:^^^^^^^^^^^^^^^^^ /////////////
1695
1696
1697 DEBUG > 4 and print STDERR "_verbatim_format considers:\n<$p->[$i-1]>\n<$p->[$i]>\n";
1698
1699 $formatting = ' ' . $1;
1700 $formatting =~ s/\s+$//s; # nix trailing whitespace
1701 unless(length $formatting and $p->[$i-1] =~ m/\S/) { # no-op
1702 splice @$p,$i,1; # remove this line
1703 $i--; # don't consider next line
1704 next;
1705 }
1706
1707 if( length($formatting) >= length($p->[$i-1]) ) {
1708 $formatting = substr($formatting, 0, length($p->[$i-1]) - 1) . ' ';
1709 } else {
1710 $formatting .= ' ' x (length($p->[$i-1]) - length($formatting));
1711 }
1712 # Make $formatting and the previous line be exactly the same length,
1713 # with $formatting having a " " as the last character.
1714
1715 DEBUG > 4 and print STDERR "Formatting <$formatting> on <", $p->[$i-1], ">\n";
1716
1717
1718 my @new_line;
1719 while( $formatting =~ m{\G(( +)|(\^+)|(\/+)|(\%+))}g ) {
1720 #print STDERR "Format matches $1\n";
1721
1722 if($2) {
1723 #print STDERR "SKIPPING <$2>\n";
1724 push @new_line,
1725 substr($p->[$i-1], pos($formatting)-length($1), length($1));
1726 } else {
1727 #print STDERR "SNARING $+\n";
1728 push @new_line, [
1729 (
1730 $3 ? 'VerbatimB' :
1731 $4 ? 'VerbatimI' :
1732 $5 ? 'VerbatimBI' : die("Should never get called")
1733 ), {},
1734 substr($p->[$i-1], pos($formatting)-length($1), length($1))
1735 ];
1736 #print STDERR "Formatting <$new_line[-1][-1]> as $new_line[-1][0]\n";
1737 }
1738 }
1739 my @nixed =
1740 splice @$p, $i-1, 2, @new_line; # replace myself and the next line
1741 DEBUG > 10 and print STDERR "Nixed count: ", scalar(@nixed), "\n";
1742
1743 DEBUG > 6 and print STDERR "New version of the above line is these tokens (",
1744 scalar(@new_line), "):",
1745 map( ref($_)?"<@$_> ":"<$_>", @new_line ), "\n";
1746 $i--; # So the next line we scrutinize is the line before the one
1747 # that we just went and formatted
1748 }
1749
1750 $p->[0] = 'VerbatimFormatted';
1751
1752 # Collapse adjacent text nodes, just for kicks.
1753 for( my $i = 2; $i > $#$p; $i++ ) { # work forwards over the tokens except for the last
1754 if( !ref($p->[$i]) and !ref($p->[$i + 1]) ) {
1755 DEBUG > 5 and print STDERR "_verbatim_format merges {$p->[$i]} and {$p->[$i+1]}\n";
1756 $p->[$i] .= splice @$p, $i+1, 1; # merge
1757 --$i; # and back up
1758 }
1759 }
1760
1761 # Now look for the last text token, and remove the terminal newline
1762 for( my $i = $#$p; $i >= 2; $i-- ) {
1763 # work backwards over the tokens, even the first
1764 if( !ref($p->[$i]) ) {
1765 if($p->[$i] =~ s/\n$//s) {
1766 DEBUG > 5 and print STDERR "_verbatim_format killed the terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]}\n";
1767 } else {
1768 DEBUG > 5 and print STDERR
1769 "No terminal newline on #$i: {$p->[$i]}, after {$p->[$i-1]} !?\n";
1770 }
1771 last; # we only want the next one
1772 }
1773 }
1774
1775 return;
1776}
1777
1778
1779#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1780
1781
1782sub _treelet_from_formatting_codes {
1783 # Given a paragraph, returns a treelet. Full of scary tokenizing code.
1784 # Like [ '~Top', {'start_line' => $start_line},
1785 # "I like ",
1786 # [ 'B', {}, "pie" ],
1787 # "!"
1788 # ]
1789
1790 my($self, $para, $start_line, $preserve_space) = @_;
1791
1792 my $treelet = ['~Top', {'start_line' => $start_line},];
1793
1794 unless ($preserve_space || $self->{'preserve_whitespace'}) {
1795 $para =~ s/\s+/ /g; # collapse and trim all whitespace first.
1796 $para =~ s/ $//;
1797 $para =~ s/^ //;
1798 }
1799
1800 # Only apparent problem the above code is that N<< >> turns into
1801 # N<< >>. But then, word wrapping does that too! So don't do that!
1802
1803 my @stack;
1804 my @lineage = ($treelet);
1805 my $raw = ''; # raw content of L<> fcode before splitting/processing
1806 # XXX 'raw' is not 100% accurate: all surrounding whitespace is condensed
1807 # into just 1 ' '. Is this the regex's doing or 'raw's?
1808 my $inL = 0;
1809
1810 DEBUG > 4 and print STDERR "Paragraph:\n$para\n\n";
1811
1812 # Here begins our frightening tokenizer RE. The following regex matches
1813 # text in four main parts:
1814 #
1815 # * Start-codes. The first alternative matches C< or C<<, the latter
1816 # followed by some whitespace. $1 will hold the entire start code
1817 # (including any space following a multiple-angle-bracket delimiter),
1818 # and $2 will hold only the additional brackets past the first in a
1819 # multiple-bracket delimiter. length($2) + 1 will be the number of
1820 # closing brackets we have to find.
1821 #
1822 # * Closing brackets. Match some amount of whitespace followed by
1823 # multiple close brackets. The logic to see if this closes anything
1824 # is down below. Note that in order to parse C<< >> correctly, we
1825 # have to use look-behind (?<=\s\s), since the match of the starting
1826 # code will have consumed the whitespace.
1827 #
1828 # * A single closing bracket, to close a simple code like C<>.
1829 #
1830 # * Something that isn't a start or end code. We have to be careful
1831 # about accepting whitespace, since perlpodspec says that any whitespace
1832 # before a multiple-bracket closing delimiter should be ignored.
1833 #
1834 while($para =~
1835 m/\G
1836 (?:
1837 # Match starting codes, including the whitespace following a
1838 # multiple-delimiter start code. $1 gets the whole start code and
1839 # $2 gets all but one of the <s in the multiple-bracket case.
1840 ([A-Z]<(?:(<+)\s+)?)
1841 |
1842 # Match multiple-bracket end codes. $3 gets the whitespace that
1843 # should be discarded before an end bracket but kept in other cases
1844 # and $4 gets the end brackets themselves.
1845 (\s+|(?<=\s\s))(>{2,})
1846 |
1847 (\s?>) # $5: simple end-codes
1848 |
1849 ( # $6: stuff containing no start-codes or end-codes
1850 (?:
1851 [^A-Z\s>]
1852 |
1853 (?:
1854 [A-Z](?!<)
1855 )
1856 |
1857 # whitespace is ok, but we don't want to eat the whitespace before
1858 # a multiple-bracket end code.
1859 # NOTE: we may still have problems with e.g. S<< >>
1860 (?:
1861 \s(?!\s*>{2,})
1862 )
1863 )+
1864 )
1865 )
1866 /xgo
1867 ) {
1868 DEBUG > 4 and print STDERR "\nParagraphic tokenstack = (@stack)\n";
1869 if(defined $1) {
1870 if(defined $2) {
1871 DEBUG > 3 and print STDERR "Found complex start-text code \"$1\"\n";
1872 push @stack, length($2) + 1;
1873 # length of the necessary complex end-code string
1874 } else {
1875 DEBUG > 3 and print STDERR "Found simple start-text code \"$1\"\n";
1876 push @stack, 0; # signal that we're looking for simple
1877 }
1878 push @lineage, [ substr($1,0,1), {}, ]; # new node object
1879 push @{ $lineage[-2] }, $lineage[-1];
1880 if ('L' eq substr($1,0,1)) {
1881 $raw = $inL ? $raw.$1 : ''; # reset raw content accumulator
1882 $inL = 1;
1883 } else {
1884 $raw .= $1 if $inL;
1885 }
1886
1887 } elsif(defined $4) {
1888 DEBUG > 3 and print STDERR "Found apparent complex end-text code \"$3$4\"\n";
1889 # This is where it gets messy...
1890 if(! @stack) {
1891 # We saw " >>>>" but needed nothing. This is ALL just stuff then.
1892 DEBUG > 4 and print STDERR " But it's really just stuff.\n";
1893 push @{ $lineage[-1] }, $3, $4;
1894 next;
1895 } elsif(!$stack[-1]) {
1896 # We saw " >>>>" but needed only ">". Back pos up.
1897 DEBUG > 4 and print STDERR " And that's more than we needed to close simple.\n";
1898 push @{ $lineage[-1] }, $3; # That was a for-real space, too.
1899 pos($para) = pos($para) - length($4) + 1;
1900 } elsif($stack[-1] == length($4)) {
1901 # We found " >>>>", and it was exactly what we needed. Commonest case.
1902 DEBUG > 4 and print STDERR " And that's exactly what we needed to close complex.\n";
1903 } elsif($stack[-1] < length($4)) {
1904 # We saw " >>>>" but needed only " >>". Back pos up.
1905 DEBUG > 4 and print STDERR " And that's more than we needed to close complex.\n";
1906 pos($para) = pos($para) - length($4) + $stack[-1];
1907 } else {
1908 # We saw " >>>>" but needed " >>>>>>". So this is all just stuff!
1909 DEBUG > 4 and print STDERR " But it's really just stuff, because we needed more.\n";
1910 push @{ $lineage[-1] }, $3, $4;
1911 next;
1912 }
1913 #print STDERR "\nHOOBOY ", scalar(@{$lineage[-1]}), "!!!\n";
1914
1915 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1916 # Keep the element from being childless
1917
1918 pop @stack;
1919 pop @lineage;
1920
1921 unless (@stack) { # not in an L if there are no open fcodes
1922 $inL = 0;
1923 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1924 $lineage[-1][-1][1]{'raw'} = $raw
1925 }
1926 }
1927 $raw .= $3.$4 if $inL;
1928
1929 } elsif(defined $5) {
1930 DEBUG > 3 and print STDERR "Found apparent simple end-text code \"$5\"\n";
1931
1932 if(@stack and ! $stack[-1]) {
1933 # We're indeed expecting a simple end-code
1934 DEBUG > 4 and print STDERR " It's indeed an end-code.\n";
1935
1936 if(length($5) == 2) { # There was a space there: " >"
1937 push @{ $lineage[-1] }, ' ';
1938 } elsif( 2 == @{ $lineage[-1] } ) { # Closing a childless element
1939 push @{ $lineage[-1] }, ''; # keep it from being really childless
1940 }
1941
1942 pop @stack;
1943 pop @lineage;
1944 } else {
1945 DEBUG > 4 and print STDERR " It's just stuff.\n";
1946 push @{ $lineage[-1] }, $5;
1947 }
1948
1949 unless (@stack) { # not in an L if there are no open fcodes
1950 $inL = 0;
1951 if (ref $lineage[-1][-1] && $lineage[-1][-1][0] eq 'L') {
1952 $lineage[-1][-1][1]{'raw'} = $raw
1953 }
1954 }
1955 $raw .= $5 if $inL;
1956
1957 } elsif(defined $6) {
1958 DEBUG > 3 and print STDERR "Found stuff \"$6\"\n";
1959 push @{ $lineage[-1] }, $6;
1960 $raw .= $6 if $inL;
1961 # XXX does not capture multiplace whitespaces -- 'raw' ends up with
1962 # at most 1 leading/trailing whitespace, why not all of it?
1963
1964 } else {
1965 # should never ever ever ever happen
1966 DEBUG and print STDERR "AYYAYAAAAA at line ", __LINE__, "\n";
1967 die "SPORK 512512!";
1968 }
1969 }
1970
1971 if(@stack) { # Uhoh, some sequences weren't closed.
1972 my $x= "...";
1973 while(@stack) {
1974 push @{ $lineage[-1] }, '' if 2 == @{ $lineage[-1] };
1975 # Hmmmmm!
1976
1977 my $code = (pop @lineage)->[0];
1978 my $ender_length = pop @stack;
1979 if($ender_length) {
1980 --$ender_length;
1981 $x = $code . ("<" x $ender_length) . " $x " . (">" x $ender_length);
1982 } else {
1983 $x = $code . "<$x>";
1984 }
1985 }
1986 DEBUG > 1 and print STDERR "Unterminated $x sequence\n";
1987 $self->whine($start_line,
1988 "Unterminated $x sequence",
1989 );
1990 }
1991
1992 return $treelet;
1993}
1994
1995#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
1996
1997sub text_content_of_treelet { # method: $parser->text_content_of_treelet($lol)
1998 return stringify_lol($_[1]);
1999}
2000
2001sub stringify_lol { # function: stringify_lol($lol)
2002 my $string_form = '';
2003 _stringify_lol( $_[0] => \$string_form );
2004 return $string_form;
2005}
2006
2007sub _stringify_lol { # the real recursor
2008 my($lol, $to) = @_;
2009 for(my $i = 2; $i < @$lol; ++$i) {
2010 if( ref($lol->[$i] || '') and UNIVERSAL::isa($lol->[$i], 'ARRAY') ) {
2011 _stringify_lol( $lol->[$i], $to); # recurse!
2012 } else {
2013 $$to .= $lol->[$i];
2014 }
2015 }
2016 return;
2017}
2018
2019#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2020
2021sub _dump_curr_open { # return a string representation of the stack
2022 my $curr_open = $_[0]{'curr_open'};
2023
2024 return '[empty]' unless @$curr_open;
2025 return join '; ',
2026 map {;
2027 ($_->[0] eq '=for')
2028 ? ( ($_->[1]{'~really'} || '=over')
2029 . ' ' . $_->[1]{'target'})
2030 : $_->[0]
2031 }
2032 @$curr_open
2033 ;
2034}
2035
2036###########################################################################
2037117µsmy %pretty_form = (
2038 "\a" => '\a', # ding!
2039 "\b" => '\b', # BS
2040 "\e" => '\e', # ESC
2041 "\f" => '\f', # FF
2042 "\t" => '\t', # tab
2043 "\cm" => '\cm',
2044 "\cj" => '\cj',
2045 "\n" => '\n', # probably overrides one of either \cm or \cj
2046 '"' => '\"',
2047 '\\' => '\\\\',
2048 '$' => '\\$',
2049 '@' => '\\@',
2050 '%' => '\\%',
2051 '#' => '\\#',
2052);
2053
2054sub pretty { # adopted from Class::Classless
2055 # Not the most brilliant routine, but passable.
2056 # Don't give it a cyclic data structure!
2057 my @stuff = @_; # copy
2058 my $x;
2059 my $out =
2060 # join ",\n" .
2061 join ", ",
2062 map {;
2063 if(!defined($_)) {
2064 "undef";
2065 } elsif(ref($_) eq 'ARRAY' or ref($_) eq 'Pod::Simple::LinkSection') {
2066 $x = "[ " . pretty(@$_) . " ]" ;
2067 $x;
2068 } elsif(ref($_) eq 'SCALAR') {
2069 $x = "\\" . pretty($$_) ;
2070 $x;
2071 } elsif(ref($_) eq 'HASH') {
2072 my $hr = $_;
2073 $x = "{" . join(", ",
2074 map(pretty($_) . '=>' . pretty($hr->{$_}),
2075 sort keys %$hr ) ) . "}" ;
2076 $x;
2077 } elsif(!length($_)) { q{''} # empty string
2078 } elsif(
2079 $_ eq '0' # very common case
2080 or(
2081 m/^-?(?:[123456789]\d*|0)(?:\.\d+)?$/s
2082 and $_ ne '-0' # the strange case that RE lets thru
2083 )
2084 ) { $_;
2085 } else {
2086 # Yes, explicitly name every character desired. There are shorcuts one
2087 # could make, but I (Karl Williamson) was afraid that some Perl
2088 # releases would have bugs in some of them. For example [A-Z] works
2089 # even on EBCDIC platforms to match exactly the 26 uppercase English
2090 # letters, but I don't know if it has always worked without bugs. It
2091 # seemed safest just to list the characters.
2092 # s<([^\x20\x21\x23\x27-\x3F\x41-\x5B\x5D-\x7E])>
2093 s<([^ !#'()*+,\-./0123456789:;\<=\>?ABCDEFGHIJKLMNOPQRSTUVWXYZ\[\]^_`abcdefghijklmnopqrstuvwxyz{|}~])>
2094 <$pretty_form{$1} || '\\x{'.sprintf("%x", ord($1)).'}'>eg;
2095 #<$pretty_form{$1} || '\\x'.(unpack("H2",$1))>eg;
2096 qq{"$_"};
2097 }
2098 } @stuff;
2099 # $out =~ s/\n */ /g if length($out) < 75;
2100 return $out;
2101}
2102
2103#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2104
2105# A rather unsubtle method of blowing away all the state information
2106# from a parser object so it can be reused. Provided as a utility for
2107# backward compatibility in Pod::Man, etc. but not recommended for
2108# general use.
2109
2110sub reinit {
2111 my $self = shift;
2112 foreach (qw(source_dead source_filename doc_has_started
2113start_of_pod_block content_seen last_was_blank paras curr_open
2114line_count pod_para_count in_pod ~tried_gen_errata all_errata errata errors_seen
2115Title)) {
2116
2117 delete $self->{$_};
2118 }
2119}
2120
2121#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2122122µs1;
2123
 
# spent 7µs within Pod::Simple::BlackBox::CORE:qr which was called: # once (7µs+0s) by Pod::Simple::LinkSection::BEGIN@9 at line 1 of (eval 4)[Pod/Simple/BlackBox.pm:35]
sub Pod::Simple::BlackBox::CORE:qr; # opcode