Skip to content

Commit

Permalink
#1 Nicer representation of titles with non-ASCII in the path
Browse files Browse the repository at this point in the history
  • Loading branch information
EPrints committed Aug 23, 2023
1 parent 5864ab0 commit 847a735
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions plugins/EPrints/DataObj/Page.pm
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package EPrints::DataObj::Page;
use EPrints;
use EPrints::DataObj;
use EPrints::DataObj::RichDataObj;
use Text::Unidecode;

@ISA = ( 'EPrints::DataObj::RichDataObj' );

Expand Down Expand Up @@ -64,8 +65,17 @@ sub tidy_path
my( $path ) = @_;

my ( $tidy ) = ( $path =~ /(^.{1,100})/ );

# converts non-ASCII characters into their nearest equivalent, e.g. stripping accents
$tidy = unidecode( $tidy );

$tidy =~ s/[^ a-zA-Z0-9-]+//g;
$tidy =~ s/ /-/g;

# unidecode can leave us with some extra dashes - tidy them up
$tidy =~ s/--/-/g;
$tidy =~ s/-$//g;

$tidy = lc( $tidy );

return $tidy;
Expand Down

0 comments on commit 847a735

Please sign in to comment.