#!/usr/bin/perl
#
#  file: extract-patent
#  auth: Brad Burdick
#  desc: split Patent data file into separate patent files
#
#  usage 'extract-patent [-b basedir] [input_file ...]'
#
##########################################################################
#  Copyright (c) 1994, 1995 Internet Multicasting Service
#
#  The Patent APS/Full-Text Dissemination processing software ("software")
#  was developed by the Internet Multicasting Service and may 
#  be used for academic, research, government, and internal business
#  purposes without charge.  You may not resell this code or include it
#  in a product that you are selling without prior permission of the
#  Internet Multicasting Service.
#
#  This software is provided ``as is'', without express or implied
#  warranty, and with no support nor obligation to assist in its
#  use, correction, modification or enhancement.  We assume no liability
#  with respect to the infringement of copyrights, trade secrets, or any
#  patents, and are not responsible for consequential damages.  Proper
#  use of the software is entirely the responsibility of the user.
##########################################################################

eval 'exec /usr/bin/perl -s $0 ${1+"$@"}'
  if 0;

# who am i?
($prog = $0) =~ s#.*/##;

# allow local libraries
push(@INC, '/usr/local/ims/lib');

# for processing command line options
require 'getopts.pl';

# Patent description info
require 'patent-desc.pl';

# Patent utility routines
require 'patent-util.pl';

# process command line options, if any
&Getopts('b:');

# base data directory
$basedir = defined($opt_b) ? "$opt_b" : "/in/patent";
if (! -e $basedir) {
	mkdir($basedir, 0775);
}

# where to place normal submissions
$workdir = "$basedir/work";
if (! -e $workdir) {
	mkdir($workdir, 0775);
}

# patent text
@patent = ();

# output file name
$outfile = '';

# take data from stdin if no file provided
if ($#ARGV < 0) {
	push(@ARGV, "<&STDIN");
}

#
#  main processing loop
#
foreach $file (@ARGV) {
	open(IN, $file) || die "$prog: unable to open $file: $!\n";

	while ($line = <IN>) {
		chop($line);

		if ($line =~ /^PATN/) {
			# start of new patent?
			if ($outfile) {
				&write_patent($outfile, *patent);

				@patent = ();
				$outfile = '';
			}
		} elsif ($line =~ /^WKU/) {
			# use patent number as file name for now
			($outfile = $line) =~ s/^WKU\s\s(.*)/substr(\1, 0, 8)/e;
			$outfile .= ".nc";
		}

		push(@patent, $line);
	}

	&write_patent($outfile, *patent);
	@patent = ();
}

exit 0;

