Skip to content

Commit

Permalink
Add custom model loading (#2)
Browse files Browse the repository at this point in the history
* Add custom model loading

* Improve edge case condition checks

* Use __all__ for imports
  • Loading branch information
asajatovic committed Aug 8, 2019
1 parent 781dcb5 commit e683079
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 20 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

setuptools.setup(
name="spacy-udpipe",
version="0.0.1",
version="0.0.2",
description="Use fast UDPipe models directly in spaCy",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
5 changes: 4 additions & 1 deletion spacy_udpipe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
from .language import UDPipeLanguage, UDPipeModel, load
from .language import UDPipeLanguage, UDPipeModel, load, load_from_path
from .util import download

__all__ = ["UDPipeLanguage", "UDPipeModel",
"load", "load_from_path", "download"]
56 changes: 38 additions & 18 deletions spacy_udpipe/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,27 @@ def load(lang):
mimicks spacy.load.
lang (unicode): ISO 639-1 language code or shorthand UDPipe model name.
RETURNS (spacy.language.Language): The UDPipeLanguage object.
RETURNS (spacy.language.Language): The UDPipeLanguage object.
"""
model = UDPipeModel(lang)
nlp = UDPipeLanguage(model)
return nlp


def load_from_path(lang, path, meta=None):
"""Convenience function for initializing the Language class and loading
a custom UDPipe model via the path argument.
lang (unicode): ISO 639-1 language code.
path (unicode): Path to the UDPipe model.
meta (dict): Meta-information about the UDPipe model.
RETURNS (spacy.language.Language): The UDPipeLanguage object.
"""
model = UDPipeModel(lang, path, meta)
nlp = UDPipeLanguage(model)
return nlp


class UDPipeLanguage(Language):

def __init__(self, udpipe_model, meta=None, **kwargs):
Expand Down Expand Up @@ -93,7 +107,7 @@ def __call__(self, text):
udpipe_sents = self.model(text) if text else [Sentence()]
text = " ".join(s.getText() for s in udpipe_sents)
tokens, heads = self.get_tokens_with_heads(udpipe_sents)
if not len(tokens):
if not tokens:
return Doc(self.vocab)

words = []
Expand Down Expand Up @@ -186,32 +200,38 @@ def check_aligned(self, text, tokens):

class UDPipeModel:

def __init__(self, lang):
def __init__(self, lang, path=None, meta=None):
"""Load UDPipe model for given language.
lang (unicode): ISO 639-1 language code or shorthand UDPipe model name.
path (unicode): Path to UDPipe model.
meta (dict): Meta-information about the UDPipe model.
RETURNS (UDPipeModel): Language specific UDPipeModel.
"""
path = get_path(lang)
if path is None:
path = get_path(lang)
self.model = Model.load(path)
if not self.model:
if self.model is None:
msg = "Cannot load UDPipe model from " \
"file '{}'".format(path)
raise Exception(msg)
self._lang = lang.split('-')[0]
self._meta = {'authors': ("Milan Straka, "
"Jana Straková"),
'description': "UDPipe pretrained model.",
'email': '[email protected]',
'lang': 'udpipe_' + self._lang,
'license': 'CC BY-NC-SA 4.0',
'name': path.split('/')[-1],
'parent_package': 'spacy_udpipe',
'pipeline': 'Tokenizer, POS Tagger, Lemmatizer, Parser',
'source': 'Universal Dependencies 2.4',
'url': 'http://ufal.mff.cuni.cz/udpipe',
'version': '1.2.0'
}
if meta is None:
self._meta = {'authors': ("Milan Straka, "
"Jana Straková"),
'description': "UDPipe pretrained model.",
'email': '[email protected]',
'lang': 'udpipe_' + self._lang,
'license': 'CC BY-NC-SA 4.0',
'name': path.split('/')[-1],
'parent_package': 'spacy_udpipe',
'pipeline': 'Tokenizer, POS Tagger, Lemmatizer, Parser',
'source': 'Universal Dependencies 2.4',
'url': 'http://ufal.mff.cuni.cz/udpipe',
'version': '1.2.0'
}
else:
self._meta = meta

def __call__(self, text):
"""Tokenize, tag and parse the text and return it in an UDPipe
Expand Down

0 comments on commit e683079

Please sign in to comment.