#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell; =head1 NAME adsauthor - get BibTeX library entries or abstracts from ADS =head1 SYNOPSIS adsauthor "de Wijn, A.G." =head1 DESCRIPTION Queries the ADS database and (re)builds a BibTeX database for the specified author. =head1 ARGUMENTS =over 4 =item B<--help> Print a usage message. =item B<--man> List the full documentation. =item B<--verbose> Be verbose. =item B<--terse> Be terse. =item B<--update> Update a BibTeX library or abstracts file from ADS. =item B<--type> Specify type of data to return. Valid types are: bibtex (default), abstracts. =item B<--sloppy> Don't match author name exactly. =item B<--nomangle> Don't touch the ADS output; implies --nosanitize. =item B<--nosanitize> Don't sanitze ADS. =item B<--maxpapers> Maximum number of paper to return (default: 1000). =item B<--output> Specify the output file. Output goes to STDOUT if not specified. =item B<--debug> Turn on debug messages. Implies --verbose. =back =cut use strict; use Pod::Usage; use Getopt::Long; use IO::File; use File::Copy; use POSIX qw(tmpnam); use Astro::ADS::Query; use vars qw/$VERSION/; $VERSION = (qw$Revision: 1.9$)[1]; $|=1; my ($help, $man, $version, $update, $data_type, %opt); sub queryads { my ($query,$result,@papers,$bibcodes,$base_url,$url,$ua,$request,$reply,$rawbuffer); print "Author is " . $opt{author}->[0] . ".\n" if $opt{debug}; $query = new Astro::ADS::Query(Authors => $opt{author}); $query->url("adsabs.harvard.edu"); if($opt{type} =~ /^abs/) { print "Returning plain text abstracts.\n" if $opt{debug}; $opt{type} = 'abstract'; $data_type = "PLAINTEXT"; } elsif($opt{type} eq '' or $opt{type} =~ /^bib/) { print "Returning BibTeX entries.\n" if $opt{debug}; $opt{type} = 'bibtex'; $data_type = "BIBTEX"; } else { print "Unrecognized type '$opt{type}', using default.\n"; $opt{type} = 'bibtex'; $data_type = "BIBTEX"; } if(!$opt{sloppy}) { $query->exactauthor("YES"); print "Matching author name exactly.\n" if $opt{debug}; } if($opt{maxpapers}) { $query->maxpapers($opt{maxpapers}); print "Returning up to " . $opt{maxpapers} . " papers.\n" if $opt{debug}; } else { $query->maxpapers(1000); } print "Connecting to ADS... " if $opt{verbose}; $result = $query->querydb(); @papers = $result->papers; if(scalar(@papers) eq 0) { die "Query returned no papers.\n"; } print "Received " . scalar(@papers) . " papers... " if $opt{verbose}; while(int((scalar(@papers)-1)/250) > 0) { $bibcodes = ""; for(my $j=0; $j < 250; $j++) { $bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes; } $bibcodes =~ s/&/%26/g; $base_url = $query->url(); $url = "http://$base_url/cgi-bin/nph-bib_query?bibcode=$bibcodes&data_type=$data_type"; $ua = new LWP::UserAgent(timeout => 30); $ua->agent("Astro::ADS script"); $ua->env_proxy(); $request = new HTTP::Request('GET', $url); $reply = $ua->request($request); if(${$reply}{"_rc"} eq 200) { $rawbuffer = ${$reply}{"_content"} . $rawbuffer; $rawbuffer =~ s#(.*?\n){5}##; # remove header } else { die "Failed to fetch entries.\n"; } } $bibcodes = ""; while(scalar(@papers)) { $bibcodes = (pop(@papers))->bibcode() . ";" . $bibcodes; } $bibcodes =~ s/&/%26/g; $base_url = $query->url(); $url = "http://$base_url/cgi-bin/nph-bib_query?bibcode=$bibcodes&data_type=$data_type"; $ua = new LWP::UserAgent(timeout => 30); $ua->agent("Astro::ADS script"); $ua->env_proxy(); $request = new HTTP::Request('GET', $url); $reply = $ua->request($request); if(${$reply}{"_rc"} eq 200) { $rawbuffer = ${$reply}{"_content"} . $rawbuffer; } else { die "Failed to fetch entries.\n"; } print "Succes.\n" if $opt{verbose}; if(!$opt{nomangle} eq '') { return $rawbuffer . "\n"; } else { return mangle($rawbuffer, $opt{author}->[0]) . "\n"; } } sub mangle { my ($buffer, $author) = @_; my $extra = $opt{type}; my $retbuffer; if(!$opt{sloppy} eq '') { $extra .= ' sloppy'; } if(!$opt{nosanitize} eq '') { $extra .= ' nosanitize'; } if($opt{maxpapers}) { $extra .= " maxpapers=$opt{maxpapers}"; } # replace header print "Replacing header.\n" if $opt{debug}; $buffer =~ s#(.*?\n){5}##; $buffer = "adsauthor \"$author\" $extra\n\n" . $buffer; if(!$opt{nosanitize}) { print "Sanitizing ADS.\n" if $opt{debug}; $retbuffer = sanitize($buffer, $opt{author}->[0]); } else { $retbuffer = $buffer; } return $retbuffer; } sub sanitize { my ($buffer,$author) = @_; # no + in page numbers $buffer =~ s#(pages = {.*)-\+},#\1},#g; # no whitespace before punctuation $buffer =~ s#([A-Za-z]) ([!?.,;:])#\1\2#g; # no $\{$ or $\}$ anywhere, thanks. $buffer =~ s#\$\\{\$##g; $buffer =~ s#\$\\}\$##g; # #s must be escaped $buffer =~ s/#/\\#/g; # names; some of these may already be fixed by ADS themselves. $buffer =~ s#{Brue?ckner}#{Br{\\"{u}}ckner}#g; $buffer =~ s#{Bue?nte}#{B{\\"{u}}nte}#g; $buffer =~ s#{Bueno}, J.~T.#{Trujillo Bueno}, J.#g; $buffer =~ s#{Dame}#{Dam{\\'{e}}}#g; $buffer =~ s#{Del Toro Iniesta}#{del Toro Iniesta}#g; $buffer =~ s#{de Pontieu}#{De Pontieu}#g; $buffer =~ s#{de Wijn}#{De Wijn}#g; $buffer =~ s#{Duebner}#{Deubner}#g; $buffer =~ s#{Edlen}#{Edl{\\'e}n}#g; $buffer =~ s#{Froe?hlich}#{Fr{\\"{o}}hlich}#g; $buffer =~ s#{Garcia}#{Garc{\\'{\\i}}a}#g; $buffer =~ s#{Goeran}#{G{\\"o}ran}#g; $buffer =~ s#{Hubeny}#{Huben{\\'{y}}}#g; $buffer =~ s#{Ines}#{In{\\'{e}}s}#g; $buffer =~ s#{Jesus}#{Jes{\\'{u}}s}#g; $buffer =~ s#{Knoe?lker}#{Kn{\\"{o}}lker}#g; $buffer =~ s#{Loe?fdahl}#{L{\\"o}fdahl}#g; $buffer =~ s#{Lopez}#{L{\\'{o}}pez}#g; $buffer =~ s#{Lue?he}#{L{\\"{u}}he}#g; $buffer =~ s#{Martinez}#{Mart{\\'{\\i}}nez}#g; $buffer =~ s#{Martinez Pillet}#{Mart{\\'{\\i}}nez Pillet}#g; $buffer =~ s#{Mueller}#{M{\\"{u}}ller}#g; $buffer =~ s#{Palle}#{Pall{\\'{e}}}#g; $buffer =~ s#{Ramon}#{Ram{\\'{o}}n}#g; $buffer =~ s#{Rodriguez}#{Rodrigu{\\'{e}}z}#g; $buffer =~ s#{Rueedi}#{R{\\"{u}}edi}#g; $buffer =~ s#{Sanchez}#{S{\\'{a}}nchez}#g; $buffer =~ s#{Schroe?ter}#{Schr{\\"{o}}ter}#g; $buffer =~ s#{Schryver}#{Schrijver}#g; $buffer =~ s#{Schue?ssler}#{Sch{\\"{u}}ssler}#g; $buffer =~ s#{Socas Navarro}#{Socas-Navarro}#g; $buffer =~ s#{Trujillo-?[Bb]ueno}#{Trujillo Bueno}#g; $buffer =~ s#{Uexkue?ll}#{Uexk{\\"{u}}ll}#g; $buffer =~ s#{Uitenbroeck}#{Uitenbroek}#g; $buffer =~ s#{Vazquez}#{V{\\'{a}}zquez}#g; $buffer =~ s#{Woe?hl}#{W{\\"{o}}hl}#g; $buffer =~ s#{Zwann}#{Zwaan}#g; return $buffer; } GetOptions( "debug" => \$opt{debug}, "help" => \$help, "man" => \$man, "maxpapers=i" => \$opt{maxpapers}, "nomangle" => \$opt{nomangle}, "nosanitize" => \$opt{nosanitize}, "output=s" => \$opt{output}, "sloppy" => \$opt{sloppy}, "type=s" => \$opt{type}, "update" => \$update, "verbose" => \$opt{verbose}, "terse" => \$opt{terse}, "" => \$opt{doprint}, ) or die "Command line parsing failed!\n"; $opt{author} = []; pod2usage(-verbose => 1) if ($help); pod2usage(-verbose => 2) if ($man); $opt{verbose} = 1 if($opt{debug}); $opt{terse} = 1 if($opt{verbose}); print "adsauthor version $VERSION by Alfred de Wijn (dwijn\@iluvatar.eu.org)\n" if $opt{verbose}; if(!$update) { my ($buffer, $tmpfile, $fh); $opt{author}->[0] = shift; if($opt{author}->[0] eq '') { die "No author specified.\n"; } $buffer = queryads(); if(!$opt{output} eq '') { # open tmpfile do { $tmpfile = tmpnam(); } until $fh = IO::File->new($tmpfile, O_RDWR|O_CREAT|O_EXCL); END { if(stat($tmpfile)) { unlink($tmpfile) or die "Couldn't unlink temporary file: $!\n"; } } $fh->autoflush(1); print $fh $buffer or die "Couldn't write to temporary file: $!\n"; close($fh); # rename to final destination move($tmpfile, $opt{output}) or die "Couldn't rename temporary file: $!\n"; } else { $opt{doprint} = 1; } if($opt{doprint}) { print $buffer; } } else { # update my ($data_type, @directives, $dir, @list, $item, $buffer, $tmpfile, $fh); $opt{file}->[0] = shift; if($opt{file}->[0] eq '') { die "No file specified.\n"; } # open tmpfile do { $tmpfile = tmpnam(); } until $fh = IO::File->new($tmpfile, O_RDWR|O_CREAT|O_EXCL); END { if(stat($tmpfile)) { unlink($tmpfile) or die "Couldn't unlink temporary file: $!\n"; } } $fh->autoflush(1); # extract adsauthor directives open(TAP, "< $opt{file}->[0]") or die "Couldn't open $opt{file}->[0] for reading: $!\n"; while() { if(/adsauthor verbatim begin/) { print $fh $_ or die "Couldn't write to temporary file: $!\n"; VERB: while() { print $fh $_ or die "Couldn't write to temporary file: $!\n"; last VERB if /adsauthor verbatim end/; } if(eof) { die "Couldn't process $opt{file}->[0]: open-ended verbatim block found!\n"; } print $fh "\n"; } elsif(/adsauthor/) { push @directives, $_; } } close(TAP); if($opt{terse}) { print "Updating " . $opt{file}->[0] . ", " . scalar(@directives) . " ADS quer"; if(scalar(@directives) > 1) { print "ies.\n"; } else { print "y.\n"; } } while(scalar(@directives) > 0) { $dir = shift @directives; ($opt{author}->[0] = $dir) =~ s/.*"(.*)".*\n/\1/; $dir =~ s/.*" //; @list = split / /, $dir; chomp @list; $opt{type} = shift @list; while(@list > 0) { $item = shift @list; if ($item eq 'sloppy') { $opt{sloppy} = 1; } if ($item eq 'nosanitize') { $opt{nosanitize} = 1; } if ($item =~ /^maxpapers/) { ($opt{maxpapers} = $item) =~ s/maxpapers=//; } } $buffer .= queryads(); } print $fh $buffer or die "Couldn't write to temporary file: $!\n"; close($fh); # rename to final destination move($tmpfile, $opt{file}->[0]) or die "Couldn't rename temporary file: $!\n"; } =head1 BUGS This code only has features. =head1 AUTHORS Alfred de Wijn Edwijn@iluvatar.eu.orgE =head1 COPYRIGHT Copyright (C) 2006-2008 Alfred de Wijn. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut