#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell;

=head1 NAME

hadsauthor - get H-index from ADS

=head1 SYNOPSIS

hadsauthor "de Wijn, A. G." "de Wijn, Alfred G." "de Wijn, Alfred"

=head1 DESCRIPTION

Queries the ADS database and attempts to calculate the H-index of the specified author.

=head1 ARGUMENTS

=over 4

=item B<--help>

Print a usage message.

=item B<--man>

List the full documentation.

=item B<--sloppy>

Don't match author name exactly.

=item B<--verbose>

Be verbose.

=item B<--terse>

Be terse.

=item B<--debug>

Turn on debug messages.  Implies --verbose.

=back

=cut

use strict;

use Pod::Usage;
use Getopt::Long;
use POSIX qw(tmpnam);
use Astro::ADS::Query;

use vars qw/$VERSION/;
$VERSION = (qw$Revision: 1.0$)[1];
$|=1;

my ($help, $man, $version, %opt, $arg, @author, @cites, $hindex);

sub queryads {
	my ($query,$result,@papers,$paper,$base_url,$url,$ua,$bibcodes,$reply,$rawbuffer);

	print "Author is " . (join ', ', @author) . ".\n" if $opt{debug};

	$query = new Astro::ADS::Query(Authors => \@author);
	$query->url("adsabs.harvard.edu");

	if(!$opt{sloppy}) {
		$query->exactauthor("YES");
		print "Matching author name exactly.\n" if $opt{debug};
	}

	$query->maxpapers(1000);

	print "Connecting to ADS... " if $opt{verbose};
	$result = $query->querydb();
	@papers = $result->papers;

	if(scalar(@papers) eq 0) {
		die "Query returned no papers.\n";
	}
	print "Found " . scalar(@papers) . " papers... " if $opt{verbose};

	$base_url = $query->url();
	$url = "http://$base_url/cgi-bin/nph-abs_connect";

	$ua = new LWP::UserAgent(timeout => 30); 
	$ua->agent("Astro::ADS script");
	$ua->env_proxy();

	while(int((scalar(@papers)-1)/100) > 0) {
		$bibcodes = "";
		for(my $j=0; $j < 100; $j++) {
			$bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes;
		}
		$bibcodes =~ s/&/%26/g;

		$reply = $ua->get("$url?bibcode=$bibcodes&data_type=Custom&format=%25c");
		if(${$reply}{"_rc"} eq 200) {
			$rawbuffer = ${$reply}{"_content"} . $rawbuffer;
			$rawbuffer =~ s/(.*?\n){5}//; # remove header
		} else {
			die "Failed to fetch entries.\n";
		}
	}

	$bibcodes = "";
	while(scalar(@papers)) {
		$bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes;
	}
	$bibcodes =~ s/&/%26/g;

	$reply = $ua->get("$url?bibcode=$bibcodes&data_type=Custom&format=%25c");
	if(${$reply}{"_rc"} eq 200) {
		$rawbuffer = ${$reply}{"_content"} . $rawbuffer;
	} else {
		die "Failed to fetch entries.\n";
	}

	print "Succes.\n" if $opt{verbose};

	print $rawbuffer if $opt{debug};

	parselist($rawbuffer);

	print join ' ', @cites, "\n" if $opt{debug};
}

sub parselist {
	my $buf = shift;
	$buf =~ s/(.*?\n){5}//; # remove header
	$buf =~ s/\s*\n\n/\n/g; # remove empty lines

	while($buf =~ s/(\d+)//) {
		push @cites, $1;
	}
}

sub numerically { $b <=> $a }

sub hindex {
	my @sorted;
	my $index = 0;

	@sorted = sort numerically @cites;
	print join " ", @sorted, "\n" if $opt{debug};
	while($sorted[$index] > $index) {
		$index++;
	}
	return $index;
}

GetOptions(
	"help"         => \$help,
	"man"          => \$man,
	"sloppy"       => \$opt{sloppy},
	"verbose"      => \$opt{verbose},
	"terse"        => \$opt{terse},
	"debug"        => \$opt{debug},
) or die "Command line parsing failed!\n";

pod2usage(-verbose => 1)  if ($help);
pod2usage(-verbose => 2)  if ($man);

$opt{verbose} = 1 if($opt{debug});
$opt{terse} = 1 if($opt{verbose});

print "hadsauthor version $VERSION by Alfred de Wijn (dwijn\@iluvatar.eu.org)\n" if $opt{verbose};

$author[0] = shift;
if($author[0] eq '') {
	die "No author specified.\n";
}
while($arg = shift) {
	push @author, $arg;
}

queryads();

$hindex = hindex();

printf "%s: %d papers, Hirsch index %d, Hirsch ratio %1.3f.\n", (join " & ", @author), scalar(@cites), $hindex, $hindex/scalar(@cites);

=head1 BUGS

This code only has features.

=head1 CAVEATS

I cannot guarantee the accuracy of the results of hadsauthor.  hadsauthor queries ADS to find papers by the specified author, then uses that list to find the number of citations to each paper.  While it is likely that ADS will list those papers that are cited often, the number of citations may be wrong.  Also, some authors have published using multiple names, or multiple authors may share the same name.  These factors all introduce inaccuracies.

=head1 AUTHORS

Alfred de Wijn E<lt>dwijn@iluvatar.eu.orgE<gt>

=head1 COPYRIGHT

Copyright (C) 2008 Alfred de Wijn. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut

