From 53b87793b2fb21e864ac3cacb1dc35c0ad36a885 Mon Sep 17 00:00:00 2001 From: David R Newman Date: Wed, 31 Aug 2022 15:54:30 +0100 Subject: [PATCH 1/2] Script for updating eprint records with ORCIDs --- bin/update_eprint_orcids | 215 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100755 bin/update_eprint_orcids diff --git a/bin/update_eprint_orcids b/bin/update_eprint_orcids new file mode 100755 index 0000000..b6d8db5 --- /dev/null +++ b/bin/update_eprint_orcids @@ -0,0 +1,215 @@ +#!/usr/bin/perl -w + +use FindBin; +use lib "$FindBin::Bin/../../../perl_lib"; + +###################################################################### +# +# +###################################################################### + +=pod + +=head1 NAME + +B - Update ORCID subfields of creators, editors or any other fields specified by $c->{orcid}->{eprint_fields}. + +=head1 SYNOPSIS + +B - I I [B] + +=head1 DESCRIPTION + +Update ORCID subfields of creators, editors or any other fields specified by $c->{orcid}->{eprint_fields}. + +This script will search the repository for eprints that have fields listed in $c->{orcid}->{eprint_fields} that have their ID field set but not their ORCID. It will then take the map across the ORCID field from the user record. + +=head1 ARGUMENTS + +=over 8 + +=item B + +The ID of the EPrint repository to search. + +=item B + +The user field that maps to creators. editors. etc. ID. + +=back + +=head1 OPTIONS + +=over 8 + +=item B<--check> + +Just check for which eprints and fields will be updated + +=item B<--contributor-id> I + +Only updste eprint ORCIDs for a specific contributor_id + +=item B<--contributor-id-field> I + +Use CONTRIBUTOR_contributor_id_field rather than CONTRIBUTOR_id + +Only updste eprint ORCIDs for a specific contributor_id + +=item B<--help> + +Print a brief help message and exit. + +=item B<--man> + +Print the full manual page and then exit. + +=item B<--quiet> + +Be vewwy vewwy quiet. This option will supress all output unless an error occurs. + +=item B<--user_field> I + +Use specified user_field rather than email + +=item B<--verbose> + +Explain in detail what is going on. May be repeated for greater effect. + +=back + + +=cut + + +use EPrints; +use Getopt::Long; +use Pod::Usage; +use strict; + +my $verbose = 0; +my $quiet = 0; +my $help = 0; +my $man = 0; +my $check = 0; +my $contributor_id_opt; +my $contributor_id_field_opt; +my $user_field_opt; + +Getopt::Long::Configure("permute"); + +GetOptions( + 'help|?' => \$help, + 'man' => \$man, + 'verbose+' => \$verbose, + 'silent' => \$quiet, + 'quiet' => \$quiet, + 'check' => \$check, + 'contributor-id=s' => \$contributor_id_opt, + 'contributor-id-field=s' => \$contributor_id_field_opt, + 'user-field=s' => \$user_field_opt, +) || pod2usage( 2 ); +pod2usage( 1 ) if $help; +pod2usage( -exitstatus => 0, -verbose => 2 ) if $man; +pod2usage( 2 ) if( scalar @ARGV != 1 ); + +our $noise = 1; +$noise = 0 if( $quiet ); +$noise = 1+$verbose if( $verbose ); + +my $contributor_id_field = "_id"; +$contributor_id_field = "_" . $contributor_id_field_opt if defined $contributor_id_field_opt; + +my $fieldmap = 'email'; +$fieldmap = $user_field_opt if defined $user_field_opt; + +# Set STDOUT to auto flush (without needing a \n) +$|=1; + +my $repoid = $ARGV[0]; +my $session = new EPrints::Session( 1 , $repoid , $noise ); +if( !defined $session ) +{ + print STDERR "Failed to load repository: $repoid\n"; + exit 1; +} + +my $user_ds = $session->get_repository->get_dataset( "user" ); +my $users; + +if ( defined $contributor_id_opt ) +{ + my $user_ident = $contributor_id_opt; + my $users_searchexp = new EPrints::Search( + session=>$session, + dataset=>$user_ds ); + $users_searchexp->add_field( $user_ds->get_field( $fieldmap ), $user_ident ); + $users = $users_searchexp->perform_search; + if ( ! $users->count() ) + { + print STDERR "Failed to find user with $fieldmap $user_ident\n"; + exit 1; + } +} +else { + $users = $user_ds->search(); +} + +my $count = 0; + +$users->map( sub { + + my( $session, $user_dataset, $user ) = @_; + + my $user_orcid = $user->get_value( 'orcid' ); + return unless EPrints::Utils::is_set( $user_orcid ) && $user_orcid =~ m/^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$/; + + my $eprint_ds = $session->get_repository->get_dataset( "archive" ); + foreach my $contributor_field ( @{ $session->config( 'orcid', 'eprint_fields' ) } ) + { + my $contributors_searchexp = new EPrints::Search( + session=>$session, + dataset=>$eprint_ds ); + + my $contributor_ds_field = $eprint_ds->get_field( $contributor_field . $contributor_id_field ); + if ( !defined $contributor_ds_field ) + { + print STDERR "Failed to find contributor field ".$contributor_field.$contributor_id_field."\n"; + exit 1; + } + $contributors_searchexp->add_field( fields => [ $contributor_ds_field ], value => $user->get_value( $fieldmap ), match => "EQ" ); + + my $eprints = $contributors_searchexp->perform_search; + my $params = { + 'user_id' => $user->get_value( $fieldmap ), + 'user_orcid' => $user_orcid, + 'contributor_field' => $contributor_field, + }; + $eprints->map( sub { + my( $session, $eprint_dataset, $eprint, $params ) = @_; + + my $update = 0; + my $contributors = $eprint->get_value( $params->{contributor_field} ); + foreach my $contributor ( @{ $contributors } ) + { + next unless EPrints::Utils::is_set( $contributor ) && EPrints::Utils::is_set( $contributor->{id} ) && lc($contributor->{id}) eq lc($params->{user_id}); + unless ( EPrints::Utils::is_set( $contributor->{orcid} ) ) + { + print "eprint[".$eprint->get_id."] setting ".$params->{contributor_field}." orcid for ".$params->{user_id}." to ".$params->{user_orcid}."\n" if $noise > 1; + $contributor->{orcid} = $params->{user_orcid}; + $update = 1; + $count++; + $eprint->set_value( $params->{contributor_field}, $contributors ) if !$check; + } + last; + } + $eprint->commit if $update eq 1 && !$check; + }, $params); + } +}); + +print "A total of $count contributor ORCIDs have been updated.\n" if $noise > 0; + +$session->terminate(); +exit; + From 37a286b382946fd5a2f4389a3547ce08246510c1 Mon Sep 17 00:00:00 2001 From: David R Newman Date: Wed, 31 Aug 2022 16:09:24 +0100 Subject: [PATCH 2/2] Tidies up POD/help text --- bin/update_eprint_orcids | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/bin/update_eprint_orcids b/bin/update_eprint_orcids index b6d8db5..55ff8ec 100755 --- a/bin/update_eprint_orcids +++ b/bin/update_eprint_orcids @@ -16,13 +16,13 @@ B - Update ORCID subfields of creators, editors or any oth =head1 SYNOPSIS -B - I I [B] +B - I [B] =head1 DESCRIPTION -Update ORCID subfields of creators, editors or any other fields specified by $c->{orcid}->{eprint_fields}. +Update ORCID subfields of creators, editors or any other fields specified by C<$c->{orcid}->{eprint_fields}>. -This script will search the repository for eprints that have fields listed in $c->{orcid}->{eprint_fields} that have their ID field set but not their ORCID. It will then take the map across the ORCID field from the user record. +This script will search the repository for eprints that have fields listed in C<$c->{orcid}->{eprint_fields}> that have their ID field set but not their ORCID. It will then take the map across the ORCID field from the user record. =head1 ARGUMENTS @@ -32,10 +32,6 @@ This script will search the repository for eprints that have fields listed in $c The ID of the EPrint repository to search. -=item B - -The user field that maps to creators. editors. etc. ID. - =back =head1 OPTIONS @@ -44,17 +40,15 @@ The user field that maps to creators. editors. etc. ID. =item B<--check> -Just check for which eprints and fields will be updated +Just check for which eprints and fields will be updated. =item B<--contributor-id> I -Only updste eprint ORCIDs for a specific contributor_id +Only update eprint ORCIDs for a specific contributor_id =item B<--contributor-id-field> I -Use CONTRIBUTOR_contributor_id_field rather than CONTRIBUTOR_id - -Only updste eprint ORCIDs for a specific contributor_id +Use CONTRIBUTOR_contributor_id_field rather than CONTRIBUTOR_id. =item B<--help> @@ -70,7 +64,7 @@ Be vewwy vewwy quiet. This option will supress all output unless an error occurs =item B<--user_field> I -Use specified user_field rather than email +Use specified user_field rather than email. =item B<--verbose>