#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell;

=head1 NAME

adsauthor - get BibTeX library entries or abstracts from ADS

=head1 SYNOPSIS

adsauthor "de Wijn, A.G."

=head1 DESCRIPTION

Queries the ADS database and (re)builds a BibTeX database for the specified author.

=head1 ARGUMENTS

=over 4

=item B<--help>

Print a usage message.

=item B<--man>

List the full documentation.

=item B<--verbose>

Be verbose.

=item B<--terse>

Be terse.

=item B<--update>

Update a BibTeX library or abstracts file from ADS.

=item B<--type>

Specify type of data to return.  Valid types are: bibtex (default), abstracts.

=item B<--sloppy>

Don't match author name exactly.

=item B<--nomangle>

Don't touch the ADS output; implies --nosanitize.

=item B<--nosanitize>

Don't sanitze ADS.

=item B<--maxpapers>

Maximum number of paper to return (default: 1000).

=item B<--output>

Specify the output file.  Output goes to STDOUT if not specified.

=item B<--debug>

Turn on debug messages.  Implies --verbose.

=back

=cut

use strict;

use Pod::Usage;
use Getopt::Long;
use IO::File;
use File::Copy;
use POSIX qw(tmpnam);
use Astro::ADS::Query;

use vars qw/$VERSION/;
$VERSION = (qw$Revision: 1.9$)[1];
$|=1;

my ($help, $man, $version, $update, $data_type, %opt);

sub queryads {
	my ($query,$result,@papers,$bibcodes,$base_url,$url,$ua,$request,$reply,$rawbuffer);

	print "Author is " . $opt{author}->[0] . ".\n" if $opt{debug};

	$query = new Astro::ADS::Query(Authors => $opt{author});
	$query->url("adsabs.harvard.edu");

	if($opt{type} =~ /^abs/) {
		print "Returning plain text abstracts.\n" if $opt{debug};
		$opt{type} = 'abstract';
		$data_type = "PLAINTEXT";
	} elsif($opt{type} eq '' or $opt{type} =~ /^bib/) {
		print "Returning BibTeX entries.\n" if $opt{debug};
		$opt{type} = 'bibtex';
		$data_type = "BIBTEX";
	} else {
		print "Unrecognized type '$opt{type}', using default.\n";
		$opt{type} = 'bibtex';
		$data_type = "BIBTEX";
	}

	if(!$opt{sloppy}) {
		$query->exactauthor("YES");
		print "Matching author name exactly.\n" if $opt{debug};
	}

	if($opt{maxpapers}) {
		$query->maxpapers($opt{maxpapers});
		print "Returning up to " . $opt{maxpapers} . " papers.\n" if $opt{debug};
	} else {
		$query->maxpapers(1000);
	}

	print "Connecting to ADS... " if $opt{verbose};
	$result = $query->querydb();
	@papers = $result->papers;

	if(scalar(@papers) eq 0) {
		die "Query returned no papers.\n";
	}
	print "Received " . scalar(@papers) . " papers... " if $opt{verbose};

	while(int((scalar(@papers)-1)/250) > 0) {
		$bibcodes = "";
		for(my $j=0; $j < 250; $j++) {
			$bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes;
		}
		$bibcodes =~ s/&/%26/g;

		$base_url = $query->url();
		$url = "http://$base_url/cgi-bin/nph-bib_query?bibcode=$bibcodes&data_type=$data_type";

		$ua = new LWP::UserAgent(timeout => 30); 
		$ua->agent("Astro::ADS script");
		$ua->env_proxy();

		$request = new HTTP::Request('GET', $url);
		$reply = $ua->request($request);
		if(${$reply}{"_rc"} eq 200) {
			$rawbuffer = ${$reply}{"_content"} . $rawbuffer;
			$rawbuffer =~ s#(.*?\n){5}##; # remove header
		} else {
			die "Failed to fetch entries.\n";
		}
	}

	$bibcodes = "";
	while(scalar(@papers)) {
		$bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes;
	}
	$bibcodes =~ s/&/%26/g;

	$base_url = $query->url();
	$url = "http://$base_url/cgi-bin/nph-bib_query?bibcode=$bibcodes&data_type=$data_type";

	$ua = new LWP::UserAgent(timeout => 30); 
	$ua->agent("Astro::ADS script");
	$ua->env_proxy();

	$request = new HTTP::Request('GET', $url);
	$reply = $ua->request($request);
	if(${$reply}{"_rc"} eq 200) {
		$rawbuffer = ${$reply}{"_content"} . $rawbuffer;
	} else {
		die "Failed to fetch entries.\n";
	}

	print "Succes.\n" if $opt{verbose};

	if(!$opt{nomangle} eq '') {
		return $rawbuffer . "\n";
	} else {
		return mangle($rawbuffer, $opt{author}->[0]) . "\n";
	}
}

sub mangle {
	my ($buffer, $author) = @_;
	my $extra = $opt{type};
	my $retbuffer;

	if(!$opt{sloppy} eq '') {
		$extra .= ' sloppy';
	}
	if(!$opt{nosanitize} eq '') {
		$extra .= ' nosanitize';
	}
	if($opt{maxpapers}) {
		$extra .= " maxpapers=$opt{maxpapers}";
	}

	# replace header
	print "Replacing header.\n" if $opt{debug};
	$buffer =~ s#(.*?\n){5}##;
	$buffer = "adsauthor \"$author\" $extra\n\n" . $buffer;

	if(!$opt{nosanitize}) {
		print "Sanitizing ADS.\n" if $opt{debug};
		$retbuffer = sanitize($buffer, $opt{author}->[0]);
	} else {
		$retbuffer = $buffer;
	}

	return $retbuffer;
}

sub sanitize {
	my ($buffer,$author) = @_;

	# no + in page numbers
	$buffer =~ s#(pages = {.*)-\+},#\1},#g;

	# no whitespace before punctuation
	$buffer =~ s#([A-Za-z]) ([!?.,;:])#\1\2#g;

	# no $\{$ or $\}$ anywhere, thanks.
	$buffer =~ s#\$\\{\$##g;
	$buffer =~ s#\$\\}\$##g;

	# #s must be escaped
	$buffer =~ s/#/\\#/g;

	# names; some of these may already be fixed by ADS themselves.
	$buffer =~ s#{Brue?ckner}#{Br{\\"{u}}ckner}#g;
	$buffer =~ s#{Bue?nte}#{B{\\"{u}}nte}#g;
	$buffer =~ s#{Bueno}, J.~T.#{Trujillo Bueno}, J.#g;
	$buffer =~ s#{Dame}#{Dam{\\'{e}}}#g;
	$buffer =~ s#{Del Toro Iniesta}#{del Toro Iniesta}#g;
	$buffer =~ s#{de Pontieu}#{De Pontieu}#g;
	$buffer =~ s#{de Wijn}#{De Wijn}#g;
	$buffer =~ s#{Duebner}#{Deubner}#g;
	$buffer =~ s#{Edlen}#{Edl{\\'e}n}#g;
	$buffer =~ s#{Froe?hlich}#{Fr{\\"{o}}hlich}#g;
	$buffer =~ s#{Garcia}#{Garc{\\'{\\i}}a}#g;
	$buffer =~ s#{Goeran}#{G{\\"o}ran}#g;
	$buffer =~ s#{Hubeny}#{Huben{\\'{y}}}#g;
	$buffer =~ s#{Ines}#{In{\\'{e}}s}#g;
	$buffer =~ s#{Jesus}#{Jes{\\'{u}}s}#g;
	$buffer =~ s#{Knoe?lker}#{Kn{\\"{o}}lker}#g;
	$buffer =~ s#{Loe?fdahl}#{L{\\"o}fdahl}#g;
	$buffer =~ s#{Lopez}#{L{\\'{o}}pez}#g;
	$buffer =~ s#{Lue?he}#{L{\\"{u}}he}#g;
	$buffer =~ s#{Martinez}#{Mart{\\'{\\i}}nez}#g;
	$buffer =~ s#{Martinez Pillet}#{Mart{\\'{\\i}}nez Pillet}#g;
	$buffer =~ s#{Mueller}#{M{\\"{u}}ller}#g;
	$buffer =~ s#{Palle}#{Pall{\\'{e}}}#g;
	$buffer =~ s#{Ramon}#{Ram{\\'{o}}n}#g;
	$buffer =~ s#{Rodriguez}#{Rodrigu{\\'{e}}z}#g;
	$buffer =~ s#{Rueedi}#{R{\\"{u}}edi}#g;
	$buffer =~ s#{Sanchez}#{S{\\'{a}}nchez}#g;
	$buffer =~ s#{Schroe?ter}#{Schr{\\"{o}}ter}#g;
	$buffer =~ s#{Schryver}#{Schrijver}#g;
	$buffer =~ s#{Schue?ssler}#{Sch{\\"{u}}ssler}#g;
	$buffer =~ s#{Socas Navarro}#{Socas-Navarro}#g;
	$buffer =~ s#{Trujillo-?[Bb]ueno}#{Trujillo Bueno}#g;
	$buffer =~ s#{Uexkue?ll}#{Uexk{\\"{u}}ll}#g;
	$buffer =~ s#{Uitenbroeck}#{Uitenbroek}#g;
	$buffer =~ s#{Vazquez}#{V{\\'{a}}zquez}#g;
	$buffer =~ s#{Woe?hl}#{W{\\"{o}}hl}#g;
	$buffer =~ s#{Zwann}#{Zwaan}#g;

	return $buffer;
}

GetOptions(
	"debug"        => \$opt{debug},
	"help"         => \$help,
	"man"          => \$man,
	"maxpapers=i"  => \$opt{maxpapers},
	"nomangle"     => \$opt{nomangle},
	"nosanitize"   => \$opt{nosanitize},
	"output=s"     => \$opt{output},
	"sloppy"       => \$opt{sloppy},
	"type=s"       => \$opt{type},
	"update"       => \$update,
	"verbose"      => \$opt{verbose},
	"terse"        => \$opt{terse},
	""             => \$opt{doprint},
) or die "Command line parsing failed!\n";
$opt{author} = [];

pod2usage(-verbose => 1)  if ($help);
pod2usage(-verbose => 2)  if ($man);

$opt{verbose} = 1 if($opt{debug});
$opt{terse} = 1 if($opt{verbose});

print "adsauthor version $VERSION by Alfred de Wijn (dwijn\@iluvatar.eu.org)\n" if $opt{verbose};

if(!$update) {
	my ($buffer, $tmpfile, $fh);

	$opt{author}->[0] = shift;
	if($opt{author}->[0] eq '') {
		die "No author specified.\n";
	}

	$buffer = queryads();

	if(!$opt{output} eq '') {
		# open tmpfile
		do {
			$tmpfile = tmpnam();
		} until $fh = IO::File->new($tmpfile, O_RDWR|O_CREAT|O_EXCL);
		END {
			if(stat($tmpfile)) {
				unlink($tmpfile) or die "Couldn't unlink temporary file: $!\n"; }
		}
		$fh->autoflush(1);
		print $fh $buffer or die "Couldn't write to temporary file: $!\n";
		close($fh);
		# rename to final destination
		move($tmpfile, $opt{output})
			or die "Couldn't rename temporary file: $!\n";
	} else {
		$opt{doprint} = 1;
	}

	if($opt{doprint}) {
		print $buffer;
	}
} else { # update
	my ($data_type, @directives, $dir, @list, $item, $buffer, $tmpfile, $fh);

	$opt{file}->[0] = shift;
	if($opt{file}->[0] eq '') {
		die "No file specified.\n";
	}

	# open tmpfile
	do {
		$tmpfile = tmpnam();
	} until $fh = IO::File->new($tmpfile, O_RDWR|O_CREAT|O_EXCL);
	END {
		if(stat($tmpfile)) {
			unlink($tmpfile) or die "Couldn't unlink temporary file: $!\n"; }
	}
	$fh->autoflush(1);

	# extract adsauthor directives
	open(TAP, "< $opt{file}->[0]")
		or die "Couldn't open $opt{file}->[0] for reading: $!\n";
	while(<TAP>) {
		if(/adsauthor verbatim begin/) {
			print $fh $_ or die "Couldn't write to temporary file: $!\n";
			VERB: while(<TAP>) {
				print $fh $_
				   	or die "Couldn't write to temporary file: $!\n";
				last VERB if /adsauthor verbatim end/;
			}
 			if(eof) { die "Couldn't process $opt{file}->[0]: open-ended verbatim block found!\n"; }
			print $fh "\n";
		} elsif(/adsauthor/) {
			push @directives, $_;
		}
	}
	close(TAP);

	if($opt{terse}) {
		print "Updating " . $opt{file}->[0] . ", " . scalar(@directives) . " ADS quer";
		if(scalar(@directives) > 1) {
			print "ies.\n";
		} else {
			print "y.\n";
		}
	}

	while(scalar(@directives) > 0) {
		$dir = shift @directives;
		($opt{author}->[0] = $dir) =~ s/.*"(.*)".*\n/\1/; 
		$dir =~ s/.*" //;
		@list = split / /, $dir;
		chomp @list;
		$opt{type} = shift @list;
		while(@list > 0) {
			$item = shift @list;
			if ($item eq 'sloppy') {
				$opt{sloppy} = 1;
			}
			if ($item eq 'nosanitize') {
				$opt{nosanitize} = 1;
			}
			if ($item =~ /^maxpapers/) {
				($opt{maxpapers} = $item) =~ s/maxpapers=//;
			}
		}
		$buffer .= queryads();
	}

	print $fh $buffer or die "Couldn't write to temporary file: $!\n";
	close($fh);

	# rename to final destination
	move($tmpfile, $opt{file}->[0])
		or die "Couldn't rename temporary file: $!\n";
}

=head1 BUGS

This code only has features.

=head1 AUTHORS

Alfred de Wijn E<lt>dwijn@iluvatar.eu.orgE<gt>

=head1 COPYRIGHT

Copyright (C) 2006-2008 Alfred de Wijn. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

