#!/usr/local/bin/perl
#
#  file: process-index
#  auth: Brad Burdick
#  desc: post-process SEC EDGAR SGML index file creating specified index
#        file type
#
#  usage: process-index [-D YYMMDD] [-c] [-f] [-m] [-d datadir] [input_file]
#
##########################################################################
#  Copyright (c) 1994, 1995 Internet Multicasting Service
#
#  The SEC EDGAR Level 1 Dissemination processing software ("software")
#  was developed by the Internet Multicasting Service and may 
#  be used for academic, research, government, and internal business
#  purposes without charge.  You may not resell this code or include it
#  in a product that you are selling without prior permission of the
#  Internet Multicasting Service.
#
#  This software is provided ``as is'', without express or implied
#  warranty, and with no support nor obligation to assist in its
#  use, correction, modification or enhancement.  We assume no liability
#  with respect to the infringement of copyrights, trade secrets, or any
#  patents, and are not responsible for consequential damages.  Proper
#  use of the software is entirely the responsibility of the user.
##########################################################################

eval 'exec /usr/bin/perl -s $0 ${1+"$@"}'
  if 0;

# who am i?
($prog = $0) =~ s#.*/##;

# where we find our local libraries
push(@INC, "/usr/local/ims/lib");

# for processing command line options
require 'getopts.pl';

# header values for index files
require 'index-hdr.pl';

# Edgar date manipulation routines
require 'edgar-date.pl';

# miscellaneous support routines
require 'edgar-util.pl';

# process command line options, if any
&Getopts('D:cd:fm');

# what type of processing?
$do_company = defined($opt_c);
$do_formtype = defined($opt_f);

# true if master index, otherwise assume daily index
$do_master = defined($opt_m);

# where to place index files
$datadir = defined($opt_d) ? "$opt_d" : "/ftp/edgar";
&makepath($datadir, 0755);

# date stamp for file name
$date = defined($opt_D) ? "$opt_D" : "";

# data file's date
@today = &edgar_date;

# date stamp used in file name
$date = sprintf("%02d%02d%02d", $today[5], $today[4]+1, $today[3])
	unless $date;

# where to place daily index file(s)
$daily = "daily-index";

# where to place full index file(s)
$full = "full-index";

# message of the day
$motd =
"\n\n" .
"ATTENTION:  Second quarter index data has been archived to the\n" .
"            following subdirectory:  edgar/full-index/1995/QTR2/.\n\n" .
"            First quarter index data has been archived to the\n" .
"            following subdirectory:  edgar/full-index/1995/QTR1/.\n";

# archive command lines
$compress = "/bin/compress -c";
$sit = "/usr/local/bin/sit -u -C edgar -o";
$zip = "/usr/local/bin/zip -b /tmp -jlq -u -9";

# file containing _no daily index file_ message
$nofile = '/usr/local/ims/lib/no-daily';

# take data from stdin if no file provided
if ($#ARGV < 0) {
	push(@ARGV, "<&STDIN");
}

if ($do_company) {
	if ($do_master) {
		$compfile = "$datadir/$full/company.idx";
	} else {
		$compfile = "$datadir/$daily/company.$date.idx";
	}
}

if ($do_formtype) {
	if ($do_master) {
		$formfile = "$datadir/$full/form.idx";
	} else {
		$formfile = "$datadir/$daily/form.$date.idx";
	}
}


#
#  main processing loop
#
foreach $file (@ARGV) {
	open(IN, "$file") || die "$prog: error getting input: $!\n";

	while ($line = <IN>) {
		next unless ($line =~ /^[0-9]/);
		chop($line);

		($cik, $cname, $type, $filedate, $filename) = split(/\|/, $line);

		if ($do_company) {
			push(@company, sprintf("%-60.60s  %-10.10s  %-10.10s  %-10.10s  %-47.47s",
			                       $cname, $type, $cik, $filedate, $filename));
		}
		if ($do_formtype) {
			push(@form, sprintf("%-10.10s  %-60.60s  %-10.10s  %-10.10s  %-47.47s",
			                    $type, $cname, $cik, $filedate, $filename));
		}
	}
}

if ($do_company) {
	local(@sortedindex) = sort(@company);
	local(@header);

	if ($do_master) {
		@header = @master_hdr;
	} else {
		@header = @daily_hdr;
	}

	open(COMPANY, ">$compfile") || warn "$prog: unable to open $compfile: $!\n";

	&process_hdr('COMPANY', *header);
	print COMPANY join("\n", @header), "\n";

	# print message of the day if available
	print COMPANY $motd if ($do_master && $motd);

	print COMPANY sprintf("\n%-60.60s  %-10.10s  %-10.10s  %-10.10s  %-10.10s\n",
	                      'Company Name', 'Form Type', 'CIK', 'Date Filed',
	                      'File Name');
	print COMPANY "-" x 141, "\n";
	print COMPANY join("\n", @sortedindex), "\n";

	close(COMPANY);

	# create archive data files
	if ($do_master) {
		system("chdir $datadir/$full ; $compress $compfile > company.Z");
		system("chdir $datadir/$full ; $sit company.sit company.idx");
		system("chdir $datadir/$full ; $zip company.zip company.idx");
	} else {
		# mail command for daily index file
		local($mail) = "/usr/ucb/mail";
		local($mailcmd);
		local($rcpt) = "edgar-daily@town.hall.org";
		local($subj) = "";

		$subj = "EDGAR daily index for ";
		$subj .= sprintf("%s %02d, %4d", $Months[int(substr($date, 2, 2))],
		 	         substr($date, 4, 2), 1900+int(substr($date, 0, 2)));

		if (-s $compfile < 100000) {
			system("$mail -s \"$subj\" $rcpt < $compfile");
		} else {
			local($file, $subj);

			# send notice of file too big to normal daily list
			($file = $compfile) =~ s#/ftp##;
			$subj = "$file -- Index too big";
			system("$mail -s \"$subj\" $rcpt < $nofile");
		}

		# edgar-daily-big always gets the index
		$rcpt = "edgar-daily-big@town.hall.org";
		system("$mail -s \"$subj\" $rcpt < $compfile");
	}
}

if ($do_formtype) {
	local(@sortedindex) = sort(@form);
	local(@header);

	if ($do_master) {
		@header = @master_hdr;
	} else {
		@header = @daily_hdr;
	}

	open(FORM, ">$formfile") || warn "$prog: unable to open $formfile: $!\n";

	&process_hdr('FORM', *header);
	print FORM join("\n", @header), "\n";

	# print message of the day if available
	print FORM $motd if ($do_master && $motd);

	print FORM sprintf("\n%-10.10s  %-60.60s  %-10.10s  %-10.10s  %-10.10s\n",
	                   'Form Type', 'Company Name', 'CIK', 'Date Filed',
	                   'File Name');
	print FORM "-" x 141, "\n";
	print FORM join("\n", @sortedindex), "\n";

	close(FORM);

	# create archive data files
	if ($do_master) {
		system("chdir $datadir/$full ; $compress $formfile > form.Z");
		system("chdir $datadir/$full ; $sit form.sit form.idx");
		system("chdir $datadir/$full ; $zip form.zip form.idx");
	}
}

exit 0;


#
# process index header
#   expects fixed format from header - see lib/index-hdr.pl
#
sub process_hdr {
	local($hdr_type) = shift;
	local(*header) = shift;
	local($recv) = sprintf("%s %02d, %04d", $Months[int(substr($date, 2, 2))],
	                 substr($date, 4, 2), 1900+int(substr($date, 0, 2)));

	# data file's date
	$header[1] =~ s/%s/$recv/;
	if ($hdr_type eq 'COMPANY') {
		$header[0] =~ s|%s|by Company Name|;
		if ($do_master) {
			$header[6] =~ s|%s|$full/company.idx|;
		} else {
			$header[6] =~ s|%s|$daily/company.$date.idx|;
		}
	} elsif ($hdr_type eq 'FORM') {
		$header[0] =~ s|%s|by Form Type|;
		if ($do_master) {
			$header[6] =~ s|%s|$full/form.idx|;
		} else {
			$header[6] =~ s|%s|$daily/form.$date.idx|;
		}
	}
}

