Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Script for updating eprint records with ORCIDs #1

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions bin/update_eprint_orcids
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#!/usr/bin/perl -w

use FindBin;
use lib "$FindBin::Bin/../../../perl_lib";

######################################################################
#
#
######################################################################

=pod

=head1 NAME

B<update_eprint_orcids> - Update ORCID subfields of creators, editors or any other fields specified by $c->{orcid}->{eprint_fields}.

=head1 SYNOPSIS

B<update_eprint_orcids> - I<repository_id> [B<options>]

=head1 DESCRIPTION

Update ORCID subfields of creators, editors or any other fields specified by C<$c->{orcid}->{eprint_fields}>.

This script will search the repository for eprints that have fields listed in C<$c->{orcid}->{eprint_fields}> that have their ID field set but not their ORCID. It will then take the map across the ORCID field from the user record.

=head1 ARGUMENTS

=over 8

=item B<repository_id>

The ID of the EPrint repository to search.

=back

=head1 OPTIONS

=over 8

=item B<--check>

Just check for which eprints and fields will be updated.

=item B<--contributor-id> I<contributor_id>

Only update eprint ORCIDs for a specific contributor_id

=item B<--contributor-id-field> I<contributor_id_field>

Use CONTRIBUTOR_contributor_id_field rather than CONTRIBUTOR_id.

=item B<--help>

Print a brief help message and exit.

=item B<--man>

Print the full manual page and then exit.

=item B<--quiet>

Be vewwy vewwy quiet. This option will supress all output unless an error occurs.

=item B<--user_field> I<user_field>

Use specified user_field rather than email.

=item B<--verbose>

Explain in detail what is going on. May be repeated for greater effect.

=back


=cut


use EPrints;
use Getopt::Long;
use Pod::Usage;
use strict;

my $verbose = 0;
my $quiet = 0;
my $help = 0;
my $man = 0;
my $check = 0;
my $contributor_id_opt;
my $contributor_id_field_opt;
my $user_field_opt;

Getopt::Long::Configure("permute");

GetOptions(
'help|?' => \$help,
'man' => \$man,
'verbose+' => \$verbose,
'silent' => \$quiet,
'quiet' => \$quiet,
'check' => \$check,
'contributor-id=s' => \$contributor_id_opt,
'contributor-id-field=s' => \$contributor_id_field_opt,
'user-field=s' => \$user_field_opt,
) || pod2usage( 2 );
pod2usage( 1 ) if $help;
pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
pod2usage( 2 ) if( scalar @ARGV != 1 );

our $noise = 1;
$noise = 0 if( $quiet );
$noise = 1+$verbose if( $verbose );

my $contributor_id_field = "_id";
$contributor_id_field = "_" . $contributor_id_field_opt if defined $contributor_id_field_opt;

my $fieldmap = 'email';
$fieldmap = $user_field_opt if defined $user_field_opt;

# Set STDOUT to auto flush (without needing a \n)
$|=1;

my $repoid = $ARGV[0];
my $session = new EPrints::Session( 1 , $repoid , $noise );
if( !defined $session )
{
print STDERR "Failed to load repository: $repoid\n";
exit 1;
}

my $user_ds = $session->get_repository->get_dataset( "user" );
my $users;

if ( defined $contributor_id_opt )
{
my $user_ident = $contributor_id_opt;
my $users_searchexp = new EPrints::Search(
session=>$session,
dataset=>$user_ds );
$users_searchexp->add_field( $user_ds->get_field( $fieldmap ), $user_ident );
$users = $users_searchexp->perform_search;
if ( ! $users->count() )
{
print STDERR "Failed to find user with $fieldmap $user_ident\n";
exit 1;
}
}
else {
$users = $user_ds->search();
}

my $count = 0;

$users->map( sub {

my( $session, $user_dataset, $user ) = @_;

my $user_orcid = $user->get_value( 'orcid' );
return unless EPrints::Utils::is_set( $user_orcid ) && $user_orcid =~ m/^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$/;

my $eprint_ds = $session->get_repository->get_dataset( "archive" );
foreach my $contributor_field ( @{ $session->config( 'orcid', 'eprint_fields' ) } )
{
my $contributors_searchexp = new EPrints::Search(
session=>$session,
dataset=>$eprint_ds );

my $contributor_ds_field = $eprint_ds->get_field( $contributor_field . $contributor_id_field );
if ( !defined $contributor_ds_field )
{
print STDERR "Failed to find contributor field ".$contributor_field.$contributor_id_field."\n";
exit 1;
}
$contributors_searchexp->add_field( fields => [ $contributor_ds_field ], value => $user->get_value( $fieldmap ), match => "EQ" );

my $eprints = $contributors_searchexp->perform_search;
my $params = {
'user_id' => $user->get_value( $fieldmap ),
'user_orcid' => $user_orcid,
'contributor_field' => $contributor_field,
};
$eprints->map( sub {
my( $session, $eprint_dataset, $eprint, $params ) = @_;

my $update = 0;
my $contributors = $eprint->get_value( $params->{contributor_field} );
foreach my $contributor ( @{ $contributors } )
{
next unless EPrints::Utils::is_set( $contributor ) && EPrints::Utils::is_set( $contributor->{id} ) && lc($contributor->{id}) eq lc($params->{user_id});
unless ( EPrints::Utils::is_set( $contributor->{orcid} ) )
{
print "eprint[".$eprint->get_id."] setting ".$params->{contributor_field}." orcid for ".$params->{user_id}." to ".$params->{user_orcid}."\n" if $noise > 1;
$contributor->{orcid} = $params->{user_orcid};
$update = 1;
$count++;
$eprint->set_value( $params->{contributor_field}, $contributors ) if !$check;
}
last;
}
$eprint->commit if $update eq 1 && !$check;
}, $params);
}
});

print "A total of $count contributor ORCIDs have been updated.\n" if $noise > 0;

$session->terminate();
exit;