[EP-tech] Re: RoMEO autocomplete

On 10/04/14 18:05, Gilles Fournié wrote:

We would like to add an autocompletion for the journal fields.

As explained in the Wiki page
we have downloaded and used the file found at

The solution works well.

But we realized that the file romeo_journals.autocomplete is old. Its
date is Jan 19, 2009. Of course, we checked its content and we noticed
that journals we can find on the Sherpa/RoMEO website are not in the
file. So, it seems it has not been updated for a while.

Does anybody know if there is a plan to update it ? Or if we can find
another one elsewhere ?

I have two files for you, which relate to the wiki page http://wiki.eprints.org/w/Adding_an_Auto-Completer_to_a_non-workflow_page

update_romeo_publishers lives in eprints/~~/bin and should be run daily - it creates a set of lookup files

get_journals lives in eprints/~~/cgi and is the bit that does the AJAX response stuff, using the files created above


use strict;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use File::Slurp;
use EPrints;

my $varpath = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
my $mapfile = "$varpath/map.txt";
my $pub_map = {};
open( MAP, $mapfile ) || die "can't read mapfile: $mapfile: $!";
	m/^(\d+) (.*)/;
	$pub_map->{$2} = $1;
close MAP;

# Some global variables #
my $journal_data = {};

# Various subroutines #
sub urldecode{
  my ($url) = @_;
  $url =~ s/%([0-9a-f][0-9a-f])/pack("C",hex($1))/egi;
  $url =~ s/\x2B/ /; # swap '+' for ' '
  return $url;

# XML::Twig's routine for dealing with a journal entry
sub process_journal {
  my ( $twig, $journal ) = @_;

  # get the components
  my $title = urldecode( $journal->first_child('jtitle')->text );

  my $zetoc = urldecode( $journal->first_child('zetocpub')->text ) 
                  if $journal->first_child('zetocpub');
  my $romeo = urldecode( $journal->first_child('romeopub')->text )
                  if $journal->first_child('romeopub');
  my $issn  = urldecode( $journal->first_child('issn')->text )
                  if $journal->first_child('issn');

  my $publisher = $romeo;
  $publisher = $zetoc if (not $publisher && $zetoc);
  my $conditions = qq(<div class='romeo_message'><div class='romeo_yellow_content'><table width="100%"><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'></td><td><div class='publishers'><div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div> <p>The publishers conditions are not defined.</p></div></td></tr></table></div></div>);
  if( defined $pub_map->{$publisher} )
    my $fn = $varpath."/".$pub_map->{$publisher}.".xhtml";
    $conditions = read_file($fn);
  # build a lub of html based on the components
  my $html .= "<li>$title";
  $html .= "<br />published by $publisher" if $publisher;
  $title = "" unless $title; 
  $publisher = "" unless $publisher; 
  $issn = "" unless $issn; 
  $html .= "<ul>";
  $html .= "<li id='for:value:component:_publication'>$title</li>";
  $html .= "<li id='for:value:component:_publisher'>$publisher</li>";
  $html .= "<li id='for:value:component:_issn'>$issn</li>";
  $html .= "<li id='for:block:absolute:publisher_policy'>$conditions</li>";
  $html .= "</ul></li>\n";
  # save the html
  $journal_data->{$title} = $html;

} ## end process_journal

# get a list of journals that match the query
sub get_journals {
  my $journal = shift;
  my @html = ();

  if (!$journal) 
    return "<!-- No journal name supplied -->\n";

  return ("<ul><li>keep typing....</li></ul>") if (length($journal) < 3);

  $journal =~ s/([^a-z0-9])/sprintf("%%%02X",ord($1))/ige;
  my $query = "http://www.sherpa.ac.uk/romeo/api29.php?qtype=starts&jtitle=$journal&ak=hC0DitNXMJA";;

  my $request = HTTP::Request->new( GET => "$query" );

  my $ua = LWP::UserAgent->new();
  my $response = $ua->request($request);
  my $content = $response->content();

  my $twig = XML::Twig->new(
                     'keep_encoding' => 1,
                     'TwigRoots' => { 'journals' => 1 },
                     'TwigHandlers' => { 'journal' => \&process_journal, }

  if (!scalar keys %{$journal_data}) 
    push @html, "<!-- no matches -->";
    return (join "\n", @html)

  push @html, "<ul class='journals'>\n";
  foreach my $title (sort keys %{$journal_data}) 
    push @html, "$journal_data->{$title}\n";
  } ## end of  foreach my $title (sort keys %{$journal_data})
  push @html, "</ul>\n";
  return (join "\n", @html)
} ## end get_journals

my $session = EPrints::Session->new();

# we need the send an initial content-type
print <<END;
<?xml version="1.0" encoding="UTF-8" ?>


# then we send the fragment of html for the autocompleter
my $q = "";
$q = lc $session->param( "q" );
print get_journals( $q );
#print STDERR get_journals( $q );


#!/home/cpan/bin/perl -w -I/home/oarj/eprints/perl_lib

use strict;
use utf8;
use HTTP::Request;
use LWP::UserAgent;
use XML::Twig;
use EPrints::SystemSettings;

use Data::Dumper;

my $publisher_data = {};
my $pub_map = {};

sub get_romeo_pub_ids
  my $query = "http://www.sherpa.ac.uk/romeo/api.php?all=yes&ak=<your_key>";

  my $request = HTTP::Request->new( GET => "$query" );

  my $ua = LWP::UserAgent->new();
  my $response = $ua->request($request);
  my $content = $response->content();
  my @contents = split /\n/, $content;
  my @pubids = ();
  foreach (@contents) {
     if( m/id="([0-9]+)"/ ) { push @pubids, $1; }
  warn ("pubids: ".scalar @pubids."\n");
  if( scalar @pubids < 100 ) { die "urk, not enough pubids"; }
  return @pubids;

my %depositing = (
      'pre' => {
        'can' => 'It permits archiving of preprints',
        'cannot' => 'It prohibits archiving of preprints',
        'restricted' => 'It permits OA archiving of preprints subject to restrictions (see below)',
        'unclear' => 'Its policy on OA archiving of preprints is unclear.  Please check the publisher policy (see link below)'
      'post' => {
        'can' => 'It permits archiving of postprints',
        'cannot' => 'It prohibits archiving of postprints',
        'restricted' => 'It permits OA archiving of postprints subject to restrictions (see below)',
        'unclear' => 'Its policy on OA archiving of postprints is unclear.  Please check the publisher policy (see link below)'
# Various subroutines #
sub process_prints {
  my ($which, $print_twig) = @_;

  my $text;
  my $permission = $print_twig->first_child("${which}archiving")->text;
  my @restrictions = $print_twig->first_child("${which}restrictions")->children if $print_twig->first_child("${which}restrictions");

  if ($permission) {
    $text = "<dl><dt>".$depositing{$which}{$permission}."</dt>\n";
    if (scalar @restrictions) {
      $text .= "<dd>The publisher defines the following restriction:\n<ul>\n";
      foreach my $restriction (@restrictions) {
        $text .= "<li>".$restriction->text."</li>\n";
      } ## end of foreach restriction
      $text .= "</ul>\n</dd>\n";
    } ## end of if scalar restrictions
    $text .= "</dl>\n";
  } ## end of if $permission
  return $text;

# XML::Twig's routine for dealing with a journal entry
sub process_publisher {
  my ( $twig, $publisher ) = @_;

  # get the components
  my ($pubid, $name, $homeurl, $romeocolour, $copyright, $alias, $permission);
  my @restrictions;

  $name = $publisher->first_child('name')->text;
  $pubid = $publisher->{att}->{id};

  $pub_map->{$name} = $pubid;
  $homeurl = $publisher->first_child('homeurl')->text if $publisher->first_child('homeurl');
  $romeocolour = $publisher->first_child('romeocolour')->text if $publisher->first_child('romeocolour');
  $copyright = $publisher->first_child('copyright')->text if $publisher->first_child('copyright');
  #$alias = $publisher->first_child('alias')->text if $publisher->first_child('alias');

  my @conditions = $publisher->first_child('conditions')->children;
  my @mandates = $publisher->first_child('mandates')->children;
  # build a lump of html based on the data returned.
  my $html ;
  $html .= "<div class='romeo_message'>";
  if ($romeocolour eq 'green') {
    $html .= "<div class='romeo_green_content'><table style='width:100%; border:1px solid blue;'><tbody><tr><td><img class='romeno_message_icon' src='/style/images/good.png' alt='Archiving of pre- and post-prints is permitted'>";
  } elsif ($romeocolour eq 'red') {
    $html .= "<div class='romeo_orange_content'><table style='width:100%; border:1px solid red;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/warning.png' alt='Archiving of pre- and post-prints is not permitted'>";
  } else {
    $html .= "<div class='romeo_yellow_content'><table style='width:100%; border:1px solid yellow;'><tbody><tr><td><img class='romeo_message_icon' src='/style/images/alert.png' alt='Archiving of pre- and post-prints is not as straight forward as it could be.'>";
  $html .= "</td><td><div class='publishers'>";
  $html .= "<div style='float:right; padding:5px; width: 10em'><img src='http://www.sherpa.ac.uk/images/romeotiny.jpg' alt='SHERPA/RoMEO logo' title='Record data from the SHERPA/RoMEO database' /><p style='font-size:75%;'>SHERPA/Romeo is a project that categorises publisher policies on OA archiving.</p></div>\n";
  $html .= "<p>This journal is published by ";
  if ($homeurl) {
    $html .= "<a href='$homeurl' title='Link to the publishers home page. NOTE: this will open a new window.' target='_new'>$name</a>.";
  } else {
    $html .= $name."."
  #$html .= "<br />(this publisher is also known as $alias)" if ($alias);
  $html .= "<br />\nAccording to the Sherpa/Romeo database, the following conditions apply:</p><dl>";

  $html .= "<dd>".process_prints('pre', $publisher->first_child('preprints'))."</dd>\n";
  $html .= "<dd>".process_prints('post', $publisher->first_child('postprints'))."</dd>\n";
  $html .= "</dl>\n";
  # if we have any general conditions, we need to add them to the data-set
  if (scalar @conditions) {
    $html .= "<p>The publisher also defines the following general conditions</p>\n<dl>\n";
    foreach my $condition (@conditions) {
      $html .= "<dd>".$condition->text."</dd>\n";
    } ## end of foreach condition
    $html .= "</dl>\n";
  }; ## end of if conditions

  if (scalar @mandates) {
    $html .= "<p>Juliet has records on the following mandates:</p>\n<dl>\n";
    foreach my $mandate (@mandates) {
      my $funder = $mandate->first_child('funder');
      my $julieturl = $funder->first_child('julieturl')->text;
      if ( $funder->first_child('funderacronym') ) {
        $html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."'</a> (".$funder->first_child('funderacronym')->text.")</dd>\n";
      } else {
        $html .= "<dd><a title='Opens new window.' href='$julieturl' onclick=\"dialog = window.open('$julieturl','dialogwindow','directories=no,menubar=no,scrollbars=yes,taskbar=no,resizable=yes,location=no,status=no,toolbar=no;');dialog.focus(); return false\">".$funder->first_child('fundername')->text."</a></dd>\n";
    $html .= "</dl>\n";
  $copyright =~ s/&lt;/</g if $copyright;
  $copyright =~ s/&gt;/>/g if $copyright;
#  $html .= "<p>The publisher has given $copyright for their copyright references.</p>\n";
  $html .= "</div>\n</td></tr></table>\n</div></div>";
  # save the html
  $publisher_data->{$name} = $html;

} ## end process_journal

# get a list of journals that match the query
sub get_publisher {
  my $pubid = shift;
  my @html = ();

  if ($pubid) {
    my $query = "http://www.sherpa.ac.uk/romeo/api29.php?id=$pubid&ak=hC0DitNXMJA";;

    my $request = HTTP::Request->new( GET => "$query" );

    my $ua = LWP::UserAgent->new();
    my $response = $ua->request($request);
    my $content = $response->content();
    $publisher_data = {};
    my $twig = XML::Twig->new(
                       'keep_encoding' => 1,
                       'TwigRoots' => { 'publishers' => 1 },
                       'TwigHandlers' => { 'publisher' => \&process_publisher, }
    if (scalar keys %{$publisher_data}) {
      foreach my $name (sort keys %{$publisher_data}) {
        push @html, "$publisher_data->{$name}\n";
      } ## end of  foreach my $name (sort keys %{$publisher_data})
    } ## end of if (scalar keys %{$publisher_data}) ...
  } else {
    push @html, "<!-- No pubid name supplied -->\n";

  return (join "\n", @html)

} ## end get_publisher

my $path = $EPrints::SystemSettings::conf->{base_path}.'/var/romeopub';
foreach my $pub_id (  get_romeo_pub_ids() )
  open( PUBINFO, ">$path/$pub_id.xhtml" ) || die "failed to write";
  print PUBINFO get_publisher($pub_id);
  close PUBINFO;
use Data::Dumper;
open( PUBMAP, ">$path/map.txt" ) || die "failed to write map.pl";
foreach( keys %$pub_map )
	my $key = lc $pub_map->{$_};
	print PUBMAP "$key $_\n";
close PUBMAP;