#!/usr/local/bin/perl
#
#  file: process-patent
#  auth: Brad Burdick
#  desc: post-process Patent data file
#
#  usage 'process-patent [-a] [-b basedir] [input_file ...]'
#
##########################################################################
#  Copyright (c) 1994, 1995 Internet Multicasting Service
#
#  The Patent APS/Full-Text Dissemination processing software ("software")
#  was developed by the Internet Multicasting Service and may 
#  be used for academic, research, government, and internal business
#  purposes without charge.  You may not resell this code or include it
#  in a product that you are selling without prior permission of the
#  Internet Multicasting Service.
#
#  This software is provided ``as is'', without express or implied
#  warranty, and with no support nor obligation to assist in its
#  use, correction, modification or enhancement.  We assume no liability
#  with respect to the infringement of copyrights, trade secrets, or any
#  patents, and are not responsible for consequential damages.  Proper
#  use of the software is entirely the responsibility of the user.
##########################################################################

eval 'exec /usr/bin/perl -s $0 ${1+"$@"}'
  if 0;

# who am i?
($prog = $0) =~ s#.*/##;

# allow local libraries
push(@INC, '/usr/local/ims/lib');

# for processing command line options
require 'getopts.pl';

# Patent description info
require 'patent-desc.pl';

# Patent utility routines
require 'patent-util.pl';

# process command line options, if any
&Getopts('ab:');

# what type of processing?
$do_ascii = defined($opt_a);

if (! $do_ascii) {
	die "$prog: no processing type specified!  Exiting.\n";
}

# base data directory
$basedir = defined($opt_b) ? "$opt_b" : "/in/patent";

# where to place normal submissions
$workdir = "$basedir/work";

# patent text
@patent = ();

# output file name
$outfile = '';

# take data from stdin if no file provided
if ($#ARGV < 0) {
	push(@ARGV, "<&STDIN");
}

#
#  main processing loop
#
foreach $file (@ARGV) {
	local($curr_group);

	open(IN, "$file") || die "$prog: unable to open $file: $!\n";

LINE: while ($line = <IN>) {
		# in case we miss EOF doing redo LINE
		next unless $line;

		chop($line);

		# find matching description
DESC:	for (@patent_desc) {
			local($desc, $group, $tag, $ntabs);

			if ($line =~ /^     /) {  # continuation line
				push(@patent, $line);
				last DESC;
			}

			($group, $tag, $desc, $ntabs) = split(/:/);
			if ($line =~ /^$group/) {
				push(@patent, "");
				push(@patent, "$group  $desc");
				$curr_group = $group;
				last DESC;
			} elsif ($group eq 'Format' && $line =~ /^$tag /) { # handle format tags
				$line = &format_text($line, $tag, *IN);
				# restart loop with last $line input
				redo LINE;
			} elsif ($tag && $line =~ /^$tag  /) {  # spaces are important in / /
				local($value);

				next unless ($group eq $curr_group);

				# get text for tag
				$value = substr($line, 5);

				# leading space(s) is/are important for following tags
				if (($line !~ /^OCL /o) &&
				    ($line !~ /^XCL /o) &&
				    ($line !~ /^UCL /o) &&
				    ($line !~ /^DCL /o)) {
					# squeeze leading spaces
					$value =~ s/^\s+//;
				}

				# use patent number as file name for now
				if ($tag eq 'WKU') {
					$outfile = $value = substr($value, 0, 8);
				}
				push(@patent, sprintf("%s     %s:%s%s", $tag, $desc, "\t" x $ntabs,
				  $value));
				last DESC;
			}
		}
	}

	&write_patent($outfile, *patent);

	# resign the modified document
	system("/usr/local/ims/bin/sign-doc $outfile");

	@patent = ();
	$outfile = '';
}

exit 0;


#
# format paragraph/equation/table text
#
sub format_text {
	local($line) = shift;       # original input line
	local($tag) = shift;        # format tag
	local(*IN) = shift;         # input file handle
	local($indent) = 0;         # left justified by default
	local($value);

	# get first line of text
	if ($tag eq 'EQU' || $tag eq 'TBL' || $tag eq 'TBL3') {
		$value = $line;
	} else {
		# ignore format tag for most cases
		$value = substr($line, 5);
	}

	# blank line before paragraph
	push(@patent, "");
	if ($tag eq 'FNT') {
		push(@patent, "Begin Footnote");
	}

	# process format info
	if ($tag eq 'PAC') {
		# Patent data uses 80 column record...
		$indent = int( ((80 - length($value)) / 2) ) - 1;  # PAC == centered text
	} else {
		$indent = $indentation{$tag};
	}

	if ($tag eq 'PAR') {
		# special default indentation
		push(@patent, "     $value");
	} else {
		# -1 for PAR# reformat tags, has no effect on other tags
		push(@patent, ' ' x ($indent-1) . $value);
	}

	# blank group id code is a continuation line
	while (($line = <IN>) =~ /^     /) {
		chop($line);
		if ($indent != -1) {  # reformatting needed?
			$line =~ s/^\s+//;
		}
		push(@patent, ' ' x $indent . $line);
	}

	if ($tag eq 'FNT') {
		push(@patent, "End Footnote");
	}

	# return last line read
	$line;
}

