<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>http://70.231.62.181/index.php?action=history&amp;feed=atom&amp;title=MyWiki%3AWikiProject_Astronomical_objects%2FStub_processing%2Ftools%2Fstub_triage_perl_script</id>
	<title>MyWiki:WikiProject Astronomical objects/Stub processing/tools/stub triage perl script - Revision history</title>
	<link rel="self" type="application/atom+xml" href="http://70.231.62.181/index.php?action=history&amp;feed=atom&amp;title=MyWiki%3AWikiProject_Astronomical_objects%2FStub_processing%2Ftools%2Fstub_triage_perl_script"/>
	<link rel="alternate" type="text/html" href="http://70.231.62.181/index.php?title=MyWiki:WikiProject_Astronomical_objects/Stub_processing/tools/stub_triage_perl_script&amp;action=history"/>
	<updated>2026-04-30T03:11:03Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.45.1</generator>
	<entry>
		<id>http://70.231.62.181/index.php?title=MyWiki:WikiProject_Astronomical_objects/Stub_processing/tools/stub_triage_perl_script&amp;diff=4649497&amp;oldid=prev</id>
		<title>imported&gt;Christopher Thomas: Uploaded stub triage script. Use it for good and not evil.</title>
		<link rel="alternate" type="text/html" href="http://70.231.62.181/index.php?title=MyWiki:WikiProject_Astronomical_objects/Stub_processing/tools/stub_triage_perl_script&amp;diff=4649497&amp;oldid=prev"/>
		<updated>2012-01-22T19:27:11Z</updated>

		<summary type="html">&lt;p&gt;Uploaded stub triage script. Use it for good and not evil.&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&amp;#039;&amp;#039;Originally written by [[User:Christopher Thomas]]. By posting it here, I recognize and acknowledge its release under appropriate Wikipedia licenses. --[[User:Christopher Thomas|Christopher Thomas]] ([[User talk:Christopher Thomas|talk]]) 19:27, 22 January 2012 (UTC)&amp;#039;&amp;#039;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;syntaxhighlight lang=&amp;quot;perl&amp;quot;&amp;gt;&lt;br /&gt;
#!/usr/bin/perl&lt;br /&gt;
#&lt;br /&gt;
# Stub Triage Script - Triage Processing&lt;br /&gt;
# Written by christopher Thomas per WT:ASTRO thread discussion.&lt;br /&gt;
#&lt;br /&gt;
# Usage:  ProcessStubs.pl &amp;lt;page list&amp;gt; &amp;lt;output filename&amp;gt;&lt;br /&gt;
#&lt;br /&gt;
# This script examines a series of wikipedia pages and produces a &lt;br /&gt;
# wiki-markup table listing the entries and indicating which are stubs.&lt;br /&gt;
# Auxiliary information (presence of references, infoboxes) is also &lt;br /&gt;
# included.&lt;br /&gt;
#&lt;br /&gt;
# This is intended to be made more flexible in the future. Right now, all &lt;br /&gt;
# examination is hard-coded.&lt;br /&gt;
#&lt;br /&gt;
# The page list must be in the format produced by GetStubList.pl.&lt;br /&gt;
#&lt;br /&gt;
# Long lists are split into many smaller tables, all collapsible.&lt;br /&gt;
#&lt;br /&gt;
# This script worked in January 2012. Wiki changes may break it later!&lt;br /&gt;
#&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Includes&lt;br /&gt;
#&lt;br /&gt;
&lt;br /&gt;
use strict;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Constants&lt;br /&gt;
#&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Various magic values.&lt;br /&gt;
&lt;br /&gt;
# Max number of entries per table.&lt;br /&gt;
my ($tsize);&lt;br /&gt;
$tsize = 100;&lt;br /&gt;
&lt;br /&gt;
# Character count threshold for being &amp;quot;short&amp;quot;.&lt;br /&gt;
# FIXME - This is a very mushy boundary!&lt;br /&gt;
my ($shortsize);&lt;br /&gt;
$shortsize = 400;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Functions&lt;br /&gt;
#&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Displays a help screen.&lt;br /&gt;
# No arguments.&lt;br /&gt;
# No return value.&lt;br /&gt;
&lt;br /&gt;
sub PrintHelp&lt;br /&gt;
{&lt;br /&gt;
  print &amp;lt;&amp;lt; &amp;quot;Endofblock&amp;quot;&lt;br /&gt;
&lt;br /&gt;
Stub Triage Script - Triage Processing&lt;br /&gt;
Written by christopher Thomas per WT:ASTRO thread discussion.&lt;br /&gt;
&lt;br /&gt;
Usage:  ProcessStubs.pl &amp;lt;page list&amp;gt; &amp;lt;output filename&amp;gt;&lt;br /&gt;
&lt;br /&gt;
This script examines a series of wikipedia pages and produces a &lt;br /&gt;
wiki-markup table listing the entries and indicating which are stubs.&lt;br /&gt;
Auxiliary information (presence of references, infoboxes) is also &lt;br /&gt;
included.&lt;br /&gt;
&lt;br /&gt;
This is intended to be made more flexible in the future. Right now, all &lt;br /&gt;
examination is hard-coded.&lt;br /&gt;
&lt;br /&gt;
The page list must be in the format produced by GetStubList.pl.&lt;br /&gt;
&lt;br /&gt;
Long lists are split into many smaller tables, all collapsible.&lt;br /&gt;
&lt;br /&gt;
This script worked in January 2012. Wiki changes may break it later!&lt;br /&gt;
&lt;br /&gt;
Endofblock&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Constructs a table header.&lt;br /&gt;
# FIXME - Lots of magic in here.&lt;br /&gt;
# Arg 0 is the index of the first entry in the table.&lt;br /&gt;
# Returns the string to be emitted.&lt;br /&gt;
&lt;br /&gt;
sub MakeTableHeader&lt;br /&gt;
{&lt;br /&gt;
  my ($nidx);&lt;br /&gt;
  my ($result);&lt;br /&gt;
&lt;br /&gt;
  # Process args.&lt;br /&gt;
&lt;br /&gt;
  $nidx = $_[0];&lt;br /&gt;
&lt;br /&gt;
  if (!(defined $nidx))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### [MakeTableHeader]  Bad arguments.\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    # Pick something to emit.&lt;br /&gt;
    $nidx = &amp;#039;-bogus-&amp;#039;;&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
  $result = &amp;quot;\n&amp;quot;.&amp;#039;{| class=&amp;quot;wikitable collapsible collapsed&amp;quot;&amp;#039;.&amp;quot;\n&amp;quot;&lt;br /&gt;
    . &amp;quot;|-\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! colspan=7 | Stubs starting from item $nidx\n&amp;quot;&lt;br /&gt;
    . &amp;quot;|-\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! Reviewed\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! Article\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! Length\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! Refs\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! ExLinks\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! Infobox\n&amp;quot;&lt;br /&gt;
    . &amp;quot;! JPL\n&amp;quot;&lt;br /&gt;
    ;&lt;br /&gt;
&lt;br /&gt;
  # Done.&lt;br /&gt;
  return $result;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Constructs a table footer.&lt;br /&gt;
# No arguments.&lt;br /&gt;
# Returns the string to be emitted.&lt;br /&gt;
&lt;br /&gt;
sub MakeTableFooter&lt;br /&gt;
{&lt;br /&gt;
  my ($result);&lt;br /&gt;
&lt;br /&gt;
  $result = &amp;quot;|}\n\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
  return $result;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Builds a statistics line for a given article URL.&lt;br /&gt;
# FIXME - Lots of magic in here.&lt;br /&gt;
# Arg 0 is the URL/label pair string from the page list.&lt;br /&gt;
# Returns the string to be emitted for this table row.&lt;br /&gt;
&lt;br /&gt;
sub MakeStatLineForURL&lt;br /&gt;
{&lt;br /&gt;
  my ($nstring, $result);&lt;br /&gt;
  my ($url, $name);&lt;br /&gt;
  my ($pstats_p);&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
  $nstring = $_[0];&lt;br /&gt;
  $result = &amp;quot;&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
  if (!(defined $nstring))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### [MakeStatLineForURL]  Bad arguments.\n&amp;quot;;&lt;br /&gt;
  }&lt;br /&gt;
  elsif (!($nstring =~ m/^(\S*)\s+(.*\S)/))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### [MakeStatLineForURL]  Unable to parse URL/label string.\n&amp;quot;;&lt;br /&gt;
  }&lt;br /&gt;
  else&lt;br /&gt;
  {&lt;br /&gt;
    #&lt;br /&gt;
    # Process name and URL.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    $url = $1;&lt;br /&gt;
    $name = $2;&lt;br /&gt;
&lt;br /&gt;
    # Complete the URL. It starts with &amp;quot;/wiki&amp;quot; now.&lt;br /&gt;
    $url = &amp;#039;http://en.wikipedia.org&amp;#039; . $url;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # FIXME - Non-English characters will be mangled, so extract the&lt;br /&gt;
    # true name from the URL if possible.&lt;br /&gt;
    if ($url =~ m/wikipedia\.org\/wiki\/(.*\S)/)&lt;br /&gt;
    {&lt;br /&gt;
      $name = $1;&lt;br /&gt;
      $name =~ s/_/\ /g;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
# FIXME - Diagnostics&lt;br /&gt;
#print &amp;quot;Name \&amp;quot;$name\&amp;quot;, URL \&amp;quot;$url\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    #&lt;br /&gt;
    # Get information hash for this page.&lt;br /&gt;
&lt;br /&gt;
    $pstats_p = {};&lt;br /&gt;
&lt;br /&gt;
    ComputePageStats($url, $pstats_p);&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    #&lt;br /&gt;
    # Emit table row.&lt;br /&gt;
&lt;br /&gt;
    $result = &amp;quot;|-\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;lt;!-- Add &amp;quot;tick&amp;quot;, &amp;quot;cross&amp;quot;, or other template here. --&amp;gt;&amp;#039;.&amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| {{article|&amp;#039; . $name . &amp;#039;}}&amp;#039; . &amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;#039; . $$pstats_p{length} . &amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;#039; . $$pstats_p{refcount} . &amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;#039; . $$pstats_p{excount} . &amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;#039; . $$pstats_p{hasinfo} . &amp;quot;\n&amp;quot;&lt;br /&gt;
      . &amp;#039;| &amp;#039; . $$pstats_p{jpl} . &amp;quot;\n&amp;quot;&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
  return $result;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Fetches Wikipedia markup source for a given page URL.&lt;br /&gt;
# Arg 0 is the URL used to view the page.&lt;br /&gt;
# Arg 1 points to an array to store source in.&lt;br /&gt;
# No return value.&lt;br /&gt;
&lt;br /&gt;
sub FetchWikiSource&lt;br /&gt;
{&lt;br /&gt;
  my ($url, $src_p);&lt;br /&gt;
  my (@rawdata, $ridx, $sidx, $thisline);&lt;br /&gt;
  my ($insource, $done);&lt;br /&gt;
&lt;br /&gt;
  $url = $_[0];&lt;br /&gt;
  $src_p = $_[1];&lt;br /&gt;
&lt;br /&gt;
  if (!( (defined $url) &amp;amp;&amp;amp; (defined $src_p) ))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### [FetchWikiSource]  Bad arguments!\n&amp;quot;;&lt;br /&gt;
  }&lt;br /&gt;
  else&lt;br /&gt;
  {&lt;br /&gt;
    # No matter what, delay so that we don&amp;#039;t hammer the wiki.&lt;br /&gt;
    sleep(1);&lt;br /&gt;
&lt;br /&gt;
    # Initialize.&lt;br /&gt;
    @rawdata = ();&lt;br /&gt;
&lt;br /&gt;
    # Turn this into an &amp;quot;edit page&amp;quot; URL, and fetch it.&lt;br /&gt;
&lt;br /&gt;
    if ($url =~ m/wiki\/(\S+)/)&lt;br /&gt;
    {&lt;br /&gt;
      $url = &amp;#039;http://en.wikipedia.org/w/index.php?title=&amp;#039;&lt;br /&gt;
        . $1 . &amp;#039;&amp;amp;action=edit&amp;#039;;&lt;br /&gt;
&lt;br /&gt;
      # FIXME - Doing this the messy but easy way.&lt;br /&gt;
      @rawdata = `lynx --source \&amp;quot;$url\&amp;quot;`;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # We now have either a blank array (on failure) or a raw html array.&lt;br /&gt;
    # Scan for useful information.&lt;br /&gt;
&lt;br /&gt;
    $insource = 0;&lt;br /&gt;
    $done = 0;&lt;br /&gt;
    $sidx = 0;&lt;br /&gt;
    @$src_p = ();&lt;br /&gt;
&lt;br /&gt;
    for ($ridx = 0;&lt;br /&gt;
      (!$done) &amp;amp;&amp;amp; (defined ($thisline = $rawdata[$ridx]));&lt;br /&gt;
      $ridx++)&lt;br /&gt;
    {&lt;br /&gt;
      if ($insource)&lt;br /&gt;
      {&lt;br /&gt;
        # Looking for the end of the wiki markup textarea.&lt;br /&gt;
        # Saving everything in the meantime.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/^(.*)\&amp;lt;\/textarea/i)&lt;br /&gt;
        {&lt;br /&gt;
          $thisline = $1;&lt;br /&gt;
          $insource = 0;&lt;br /&gt;
          $done = 1;&lt;br /&gt;
&lt;br /&gt;
          if ($thisline =~ m/\S/)&lt;br /&gt;
          {&lt;br /&gt;
            $$src_p[$sidx] = $thisline;&lt;br /&gt;
            $sidx++;&lt;br /&gt;
          }&lt;br /&gt;
        }&lt;br /&gt;
        # FIXME - Force sanity.&lt;br /&gt;
        elsif ($thisline =~ m/\&amp;lt;\/textarea/i)&lt;br /&gt;
        {&lt;br /&gt;
          $insource = 0;&lt;br /&gt;
          $done = 1;&lt;br /&gt;
&lt;br /&gt;
          print &amp;quot;### Un-caught end of text area (shouldn&amp;#039;t happen).\n&amp;quot;;&lt;br /&gt;
        }&lt;br /&gt;
        else&lt;br /&gt;
        {&lt;br /&gt;
          $$src_p[$sidx] = $thisline;&lt;br /&gt;
          $sidx++;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      else&lt;br /&gt;
      {&lt;br /&gt;
        # Looking for the wiki markup textarea.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/\&amp;lt;textarea .* name=\&amp;quot;wpTextbox1\&amp;quot;\&amp;gt;(.*)/i)&lt;br /&gt;
        {&lt;br /&gt;
          $thisline = $1;&lt;br /&gt;
          $insource = 1;&lt;br /&gt;
&lt;br /&gt;
          if ($thisline =~ m/\S/)&lt;br /&gt;
          {&lt;br /&gt;
            $$src_p[$sidx] = $thisline;&lt;br /&gt;
            $sidx++;&lt;br /&gt;
          }&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
  # Done.&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
# Fetches a wikipedia page and computes its stub-related statistics.&lt;br /&gt;
# FIXME - Lots of magic in here.&lt;br /&gt;
# Arg 0 is the URL to fetch (complete).&lt;br /&gt;
# Arg 1 points to a hash to store statistics in.&lt;br /&gt;
# No return value.&lt;br /&gt;
&lt;br /&gt;
sub ComputePageStats&lt;br /&gt;
{&lt;br /&gt;
  my ($url, $stats_p);&lt;br /&gt;
  my (@pagedata, $thisline, $lidx);&lt;br /&gt;
  my ($state);&lt;br /&gt;
  my ($charcount, $refcount, $excount, $hasinfo, $jplurl);&lt;br /&gt;
&lt;br /&gt;
  $url = $_[0];&lt;br /&gt;
  $stats_p = $_[1];&lt;br /&gt;
&lt;br /&gt;
  if (!( (defined $url) &amp;amp;&amp;amp; (defined $stats_p) ))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### [ComputePageStats]  Bad arguments.\n&amp;quot;;&lt;br /&gt;
  }&lt;br /&gt;
  else&lt;br /&gt;
  {&lt;br /&gt;
# FIXME - Diagnostics.&lt;br /&gt;
print &amp;quot;Fetching \&amp;quot;$url\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
    # Fetch wikipedia markup source for this page.&lt;br /&gt;
    @pagedata = ();&lt;br /&gt;
    FetchWikiSource($url, \@pagedata);&lt;br /&gt;
&lt;br /&gt;
    # Initialize stats.&lt;br /&gt;
    $charcount = 0;&lt;br /&gt;
    $refcount = 0;&lt;br /&gt;
    $excount = 0;&lt;br /&gt;
    $hasinfo = 0;&lt;br /&gt;
    $jplurl = undef;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    #&lt;br /&gt;
    # Crawl through the page, updating statistics.&lt;br /&gt;
&lt;br /&gt;
    # FIXME - This is really fragile!&lt;br /&gt;
    # Among other things, it&amp;#039;ll choke on nested infoboxes and&lt;br /&gt;
    # templates or links that are split across lines.&lt;br /&gt;
&lt;br /&gt;
    # Fortunately, the mass-created articles tend to be well-formed.&lt;br /&gt;
&lt;br /&gt;
    $state = &amp;#039;top&amp;#039;;&lt;br /&gt;
&lt;br /&gt;
    for ($lidx = 0;&lt;br /&gt;
      ($state ne &amp;#039;done&amp;#039;) &amp;amp;&amp;amp; (defined ($thisline = $pagedata[$lidx]));&lt;br /&gt;
      $lidx++)&lt;br /&gt;
    {&lt;br /&gt;
      # No matter what state we&amp;#039;re in, flag JPL URLs.&lt;br /&gt;
      # We have to do this before eating templates, as they&amp;#039;re often&lt;br /&gt;
      # within {{cite}} templates.&lt;br /&gt;
&lt;br /&gt;
      if ($thisline =~ m/(http:\/\/ssd\.jpl\.nasa\.gov\S+)/i)&lt;br /&gt;
      {&lt;br /&gt;
        # FIXME - Overwrite any previous JPL URLs.&lt;br /&gt;
        $jplurl = $1;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
        # Clip pipes or end braces.&lt;br /&gt;
&lt;br /&gt;
        if ($jplurl =~ m/(.*?)\|/)&lt;br /&gt;
        {&lt;br /&gt;
          $jplurl = $1;&lt;br /&gt;
        }&lt;br /&gt;
&lt;br /&gt;
        if ($jplurl =~ m/(.*?)\]/)&lt;br /&gt;
        {&lt;br /&gt;
          $jplurl = $1;&lt;br /&gt;
        }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
        # URL should be trimmed now.&lt;br /&gt;
      }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
      # FIXME - Eat any single-line template.&lt;br /&gt;
      # There are way too many of these, and they break infobox&lt;br /&gt;
      # recognition.&lt;br /&gt;
&lt;br /&gt;
      while ($thisline =~ m/(.*)(\{\{[^{]+\}\})(.*)/)&lt;br /&gt;
      {&lt;br /&gt;
        $thisline = $1 . $3;&lt;br /&gt;
&lt;br /&gt;
# FIXME - Diagnostics.&lt;br /&gt;
#print &amp;quot;Pruning \&amp;quot;$2\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
      }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
      # Take action depending on state.&lt;br /&gt;
&lt;br /&gt;
      if (&amp;#039;top&amp;#039; eq $state)&lt;br /&gt;
      {&lt;br /&gt;
        # At the top level.&lt;br /&gt;
        # We&amp;#039;re either seeing content, or the start of a different type&lt;br /&gt;
        # of section.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/\{\{infobox/i)&lt;br /&gt;
        {&lt;br /&gt;
          $hasinfo = 1;&lt;br /&gt;
&lt;br /&gt;
          $state = &amp;#039;infobox&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*references/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;refs&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*see also/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;also&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*external links/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;links&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/\[\[Category\:/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;done&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        else&lt;br /&gt;
        {&lt;br /&gt;
          # This seems to be content.&lt;br /&gt;
&lt;br /&gt;
# FIXME - Emit content, for debugging.&lt;br /&gt;
#print &amp;quot;-- $thisline&amp;quot;; # Already has a newline.&lt;br /&gt;
&lt;br /&gt;
          # Count characters.&lt;br /&gt;
          if ($thisline =~ m/(\S.*\S)/)&lt;br /&gt;
          {&lt;br /&gt;
            $charcount += length($1);&lt;br /&gt;
          }&lt;br /&gt;
&lt;br /&gt;
          # Make note of references.&lt;br /&gt;
          # Count close-ref tags to get a more accurate count.&lt;br /&gt;
          # FIXME - HTML source seems to turn &amp;lt; into &amp;quot;&amp;amp;lt;&amp;quot;.&lt;br /&gt;
          if ($thisline =~ m/\/ref\&amp;gt;/i)&lt;br /&gt;
          {&lt;br /&gt;
            $refcount++;&lt;br /&gt;
          }&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      elsif (&amp;#039;infobox&amp;#039; eq $state)&lt;br /&gt;
      {&lt;br /&gt;
        # We don&amp;#039;t care what&amp;#039;s in the infobox; just when it ends.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/\}\}/)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;top&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      elsif (&amp;#039;refs&amp;#039; eq $state)&lt;br /&gt;
      {&lt;br /&gt;
        # We don&amp;#039;t care what&amp;#039;s in the references section.&lt;br /&gt;
        # It should just be a &amp;quot;{{reflist}}&amp;quot; template.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/==\s*external links/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;links&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*see also/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;also&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/\[\[Category\:/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;done&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      elsif (&amp;#039;also&amp;#039; eq $state)&lt;br /&gt;
      {&lt;br /&gt;
        # We don&amp;#039;t care what&amp;#039;s in the &amp;quot;see also&amp;quot; section.&lt;br /&gt;
        # In theory it&amp;#039;s content, in practice it bloats the stats.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/==\s*external links/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;links&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*references/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;refs&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/\[\[Category\:/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;done&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      elsif (&amp;#039;links&amp;#039; eq $state)&lt;br /&gt;
      {&lt;br /&gt;
        # Look for URLs in this section.&lt;br /&gt;
&lt;br /&gt;
        if ($thisline =~ m/==\s*references/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;refs&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/==\s*see also/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;also&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/\[\[Category\:/i)&lt;br /&gt;
        {&lt;br /&gt;
          $state = &amp;#039;done&amp;#039;;&lt;br /&gt;
        }&lt;br /&gt;
        elsif ($thisline =~ m/\[http/i)&lt;br /&gt;
        {&lt;br /&gt;
          $excount++;&lt;br /&gt;
        }&lt;br /&gt;
      }&lt;br /&gt;
      else&lt;br /&gt;
      {&lt;br /&gt;
        print &amp;quot;### [ComputePageStats]  Bogus state \&amp;quot;$state\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
        $state = &amp;#039;done&amp;#039;;&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    #&lt;br /&gt;
    # Save statistics.&lt;br /&gt;
&lt;br /&gt;
    $$stats_p{length} = &amp;#039;{{tick}}&amp;#039; . $charcount;&lt;br /&gt;
    if ($charcount &amp;lt;= $shortsize)&lt;br /&gt;
    {&lt;br /&gt;
       $$stats_p{length} = &amp;#039;{{warnsign|&amp;#039; . $charcount . &amp;#039;}}&amp;#039;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    $$stats_p{refcount} = $refcount;&lt;br /&gt;
    $$stats_p{excount} = $excount;&lt;br /&gt;
&lt;br /&gt;
    $$stats_p{hasinfo} = &amp;#039;N&amp;#039;;&lt;br /&gt;
    if ($hasinfo)&lt;br /&gt;
    {&lt;br /&gt;
      $$stats_p{hasinfo} = &amp;#039;Y&amp;#039;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
    $$stats_p{jpl} = &amp;#039;{{cross}}&amp;#039;;&lt;br /&gt;
    if (defined $jplurl)&lt;br /&gt;
    {&lt;br /&gt;
      $$stats_p{jpl} = &amp;#039;[&amp;#039;. $jplurl . &amp;#039;]&amp;#039;;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # Done.&lt;br /&gt;
  }&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# Main Program&lt;br /&gt;
#&lt;br /&gt;
&lt;br /&gt;
my ($lname, $oname);&lt;br /&gt;
my ($thisname, $ncount, $nidx, $intable);&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
$lname = $ARGV[0];&lt;br /&gt;
$oname = $ARGV[1];&lt;br /&gt;
&lt;br /&gt;
if ( (!(defined $lname)) || (!(defined $oname)) || (defined $ARGV[2]) )&lt;br /&gt;
{&lt;br /&gt;
  PrintHelp();&lt;br /&gt;
}&lt;br /&gt;
elsif (!open(NFILE, &amp;quot;&amp;lt;$lname&amp;quot;))&lt;br /&gt;
{&lt;br /&gt;
  print &amp;quot;### Unable to read from \&amp;quot;$lname\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
}&lt;br /&gt;
else&lt;br /&gt;
{&lt;br /&gt;
  if (!open(OFILE, &amp;quot;&amp;gt;$oname&amp;quot;))&lt;br /&gt;
  {&lt;br /&gt;
    print &amp;quot;### Unable to write to \&amp;quot;$oname\&amp;quot;.\n&amp;quot;;&lt;br /&gt;
  }&lt;br /&gt;
  else&lt;br /&gt;
  {&lt;br /&gt;
    # Walk through the names file, processing pages.&lt;br /&gt;
&lt;br /&gt;
    $ncount = 0;&lt;br /&gt;
    $intable = 0;&lt;br /&gt;
&lt;br /&gt;
    while (defined ($thisname = &amp;lt;NFILE&amp;gt;))&lt;br /&gt;
    {&lt;br /&gt;
      $ncount++;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
      # Emit this line.&lt;br /&gt;
      # Start a new table if necessary.&lt;br /&gt;
&lt;br /&gt;
      if (!$intable)&lt;br /&gt;
      {&lt;br /&gt;
        # Diagnostics.&lt;br /&gt;
        print &amp;quot;-- Starting table at entry $ncount.\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
        print OFILE MakeTableHeader($ncount);&lt;br /&gt;
&lt;br /&gt;
        $intable = 1;&lt;br /&gt;
      }&lt;br /&gt;
&lt;br /&gt;
      print OFILE MakeStatLineForURL($thisname);&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
      # End the table if it&amp;#039;s reached the size limit.&lt;br /&gt;
&lt;br /&gt;
      $nidx = $ncount % $tsize;&lt;br /&gt;
&lt;br /&gt;
      if (0 == $nidx)&lt;br /&gt;
      {&lt;br /&gt;
        # Sanity.&lt;br /&gt;
        if (!$intable)&lt;br /&gt;
        {&lt;br /&gt;
          print &amp;quot;### Ending a table we didn&amp;#039;t start? (count = $ncount)\n&amp;quot;;&lt;br /&gt;
        }&lt;br /&gt;
&lt;br /&gt;
        print OFILE MakeTableFooter();&lt;br /&gt;
&lt;br /&gt;
        $intable = 0;&lt;br /&gt;
      }&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # We&amp;#039;ve finished processing the names list.&lt;br /&gt;
    # Print a footer if we have to.&lt;br /&gt;
&lt;br /&gt;
    if ($intable)&lt;br /&gt;
    {&lt;br /&gt;
      print OFILE MakeTableFooter();&lt;br /&gt;
&lt;br /&gt;
      $intable = 0;&lt;br /&gt;
    }&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # Diagnostics.&lt;br /&gt;
    print &amp;quot;-- Done.\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    # Close the output file no matter what.&lt;br /&gt;
    close(OFILE);&lt;br /&gt;
  }&lt;br /&gt;
&lt;br /&gt;
  # Close the names file no matter what.&lt;br /&gt;
  close(NFILE);&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
#&lt;br /&gt;
# This is the end of the file.&lt;br /&gt;
#&lt;br /&gt;
&amp;lt;/syntaxhighlight&amp;gt;&lt;/div&gt;</summary>
		<author><name>imported&gt;Christopher Thomas</name></author>
	</entry>
</feed>