EPrints Technical Mailing List Archive

See the EPrints wiki for instructions on how to join this mailing list and related information.

Message: #03267


< Previous (by date) | Next (by date) > | < Previous (in thread) | Next (in thread) > | Messages - Most Recent First | Threads - Most Recent First

[EP-tech] Re: Importing several items from an XML file


I've attached it.

Lizz

On 15/07/2014 18:42, Sebastien Francois wrote:
Code? :-)

Or: where does your import code reside (in which function/method)?

Seb.

On 15/07/14 17:00, Lizz Jennings wrote:
Hello all,

I'm writing an import plugin - I've managed to get it to match the right
fields and single records will import fine (this is through command line
at present).  However, although it'll iterate through each of the
records, it just overwrites the second over the first. So if I put the
return statement inside the foreach, I get the first record, and if I
put it at the end, I get the second record.

How can I make it make a list of items to import?

Lizz


*** Options: http://mailman.ecs.soton.ac.uk/mailman/listinfo/eprints-tech
*** Archive: http://www.eprints.org/tech.php/
*** EPrints community wiki: http://wiki.eprints.org/
*** EPrints developers Forum: http://forum.eprints.org/

=head1 NAME

EPrints::Plugin::Import::batb::PureImport

=cut
package EPrints::Plugin::Import::batb::PureImport;

use strict;

use EPrints::Plugin::Import::DefaultXML;

our @ISA = qw/ EPrints::Plugin::Import::DefaultXML /;

sub new
{
	my( $class, %params ) = @_;

	my $self = $class->SUPER::new(%params);

	$self->{name} = "Pure Import";
	$self->{visible} = "all";
	$self->{produce} = [ 'list/eprint', 'dataobj/eprint' ];

	return $self;
}

sub top_level_tag
{
	my( $plugin, $dataset ) = @_;

	return "dataset:GetDataSetsResponse";
}

sub handler_class { "EPrints::Plugin::Import::DefaultXML::DOMHandler" }

sub xml_to_epdata
{
	my( $plugin, $dataset, $xml ) = @_;
	
	my $epdata = {};
	
	my $nscore = "http://atira.dk/schemas/pure4/model/core/stable";;
	my $nsstab = "http://atira.dk/schemas/pure4/model/template/dataset/stable";;
	my $nspers = "http://atira.dk/schemas/pure4/model/template/abstractperson/stable";;
	
	my $puredatasets = $xml->getElementsByTagNameNS($nscore, "result")->item(0);
	return unless defined $puredatasets;
	
	my $purecontent = $puredatasets->getElementsByTagNameNS($nscore, "content"); #->item(0);
	return unless defined $purecontent;
		
	foreach my $pureitem (@$purecontent)
		{
			my $title = $pureitem->getElementsByTagNameNS( $nsstab, "title" )->item(0)->getElementsByTagNameNS( $nscore, "localizedString")->item(0);
			$epdata->{title} = $plugin->xml_to_text( $title ) if defined $title;

			my $pubdate = $pureitem->getElementsByTagNameNS( $nsstab, "dateMadeAvailable" )->item(0);
			if( defined $pubdate )
			{
				my $year  = $pubdate->getElementsByTagNameNS( $nscore, "year" )->item(0);
				my $month = $pubdate->getElementsByTagNameNS( $nscore, "month" )->item(0);
				my $day   = $pubdate->getElementsByTagNameNS( $nscore, "day" )->item(0);
				if (defined $year) # some pubdates have MedlineDate subfield (non parseable date : http://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html#medlinedate)
				{
					my $tmpDate = $plugin->xml_to_text( $year );
					if (defined $month)
					{
						$month = $plugin->xml_to_text( $month );
						if (length $month == 1) # convert 1 to 01
						{
							$month = '0' . $month ;
						}
						$tmpDate .= '-' . $month ; 
						if (defined $day)
						{
							$day = $plugin->xml_to_text( $day );
							if (length $day == 1) # convert 1 to 01
							{
								$day = '0' . $day ;
							}
							$tmpDate .= '-' . $day ;
						}	
					}
					if( defined $tmpDate )
					{
						$epdata->{date} = $tmpDate;
						$epdata->{date_type} = "published";
					}
				}
			}

		my $creatorlist = $pureitem->getElementsByTagNameNS( $nsstab, "persons" )->item(0);
		if( defined $creatorlist )
		{
			foreach my $creator ( $creatorlist->getElementsByTagNameNS($nsstab, "dataSetPersonAssociation") )
			{
				my $role = $creator->getElementsByTagNameNS( $nspers, "personRole")->item(0)->getElementsByTagNameNS( $nscore, "uri")->item(0);
				$role = $plugin->xml_to_text($role);
				if( $role eq "/dk/atira/pure/dataset/roles/dataset/creator" )
					{
						my $name = {};
			
						my $lastname = $creator->getElementsByTagNameNS( $nspers, "name" )->item(0)->getElementsByTagNameNS( $nscore, "lastName" )->item(0);
						$name->{family} = $plugin->xml_to_text( $lastname ) if defined $lastname;
						
						my $forename = $creator->getElementsByTagNameNS( $nspers, "name" )->item(0)->getElementsByTagNameNS( $nscore, "firstName" )->item(0);
						$name->{given} = $plugin->xml_to_text( $forename ) if defined $forename;

					push @{ $epdata->{creators_name} }, $name;
					
					}
			}
		}

		$epdata->{type} = "data_collection";
		$epdata->{eprint_status} = "inbox";
		
		return $epdata;
	}

	#return $epdata;

}

1;