From 32b838cfecdcfe9c11c8b67de3c7e4a1879802a2 Mon Sep 17 00:00:00 2001 From: Daniel Bradburn Date: Mon, 8 May 2017 07:18:21 +0200 Subject: [PATCH 1/3] Made most of the simple mechanical changes needed for python3 support using __future__ and six --- quepy/dot_generation.py | 7 ++++--- quepy/encodingpolicy.py | 5 +++-- quepy/expression.py | 3 ++- quepy/mql_generation.py | 5 +++-- quepy/nltktagger.py | 9 ++++++--- quepy/quepyapp.py | 5 +++-- quepy/sparql_generation.py | 7 ++++--- quepy/tagger.py | 4 ++-- tests/random_expression.py | 9 +++++---- tests/test_dot_generation.py | 10 ++++++---- tests/test_dsl.py | 17 +++++++++-------- tests/test_expressions.py | 13 +++++++------ tests/test_mql_generation.py | 11 ++++++----- tests/test_quepyapp.py | 6 +++--- tests/test_sparql_generation.py | 9 +++++---- tests/test_tagger.py | 15 ++++++++------- tests/testapp/__init__.py | 2 +- 17 files changed, 77 insertions(+), 60 deletions(-) diff --git a/quepy/dot_generation.py b/quepy/dot_generation.py index e812990..b1dafc3 100644 --- a/quepy/dot_generation.py +++ b/quepy/dot_generation.py @@ -5,13 +5,14 @@ """ import random +import six from quepy.expression import isnode from quepy.dsl import IsRelatedTo, HasKeyword from quepy.encodingpolicy import assert_valid_encoding def escape(x, add_quotes=True): - x = unicode(x) + x = six.text_type(x) x = x.replace(u" ", u"_") x = x.replace(u"\n", u"") x = x.replace(u"\00", u"") @@ -29,13 +30,13 @@ def adapt(x): if isnode(x): x = u"x{}".format(x) return x - if isinstance(x, basestring): + if isinstance(x, six.string_types): assert_valid_encoding(x) x = escape(x) if x.startswith(u"\""): return x return u'"{}"'.format(x) - return unicode(x) + return six.text_type(x) def expression_to_dot(e): diff --git a/quepy/encodingpolicy.py b/quepy/encodingpolicy.py index a415f59..70ef20a 100644 --- a/quepy/encodingpolicy.py +++ b/quepy/encodingpolicy.py @@ -12,6 +12,7 @@ """ import logging +import six from quepy import settings logger = logging.getLogger("quepy.encodingpolicy") @@ -25,7 +26,7 @@ def encoding_flexible_conversion(string, complain=False): converting a string that had to be on the right encoding. """ - if isinstance(string, unicode): + if isinstance(string, six.text_type): return string try: ustring = string.decode(settings.DEFAULT_ENCODING) @@ -44,5 +45,5 @@ def assert_valid_encoding(string): ValueError exception. """ - if not isinstance(string, unicode): + if not isinstance(string, six.text_type): raise ValueError(u"Argument must be unicode") diff --git a/quepy/expression.py b/quepy/expression.py index 0f32310..dddfba2 100644 --- a/quepy/expression.py +++ b/quepy/expression.py @@ -90,6 +90,7 @@ from collections import defaultdict from copy import deepcopy +import six def isnode(x): @@ -174,7 +175,7 @@ def iter_nodes(self): """ Iterates the indexes (the unique identifiers) of the Expression nodes. """ - return xrange(len(self.nodes)) + return six.moves.xrange(len(self.nodes)) def iter_edges(self, node): """ diff --git a/quepy/mql_generation.py b/quepy/mql_generation.py index 97b3bd7..0531df6 100644 --- a/quepy/mql_generation.py +++ b/quepy/mql_generation.py @@ -2,6 +2,7 @@ import re import json +import six from quepy.dsl import IsRelatedTo from quepy.expression import isnode from quepy.encodingpolicy import encoding_flexible_conversion @@ -25,13 +26,13 @@ def safely_to_unicode(x): Given an "edge" (a relation) or "a data" from an `Expression` graph transform it into a unicode string fitted for insertion into a MQL query. """ - if isinstance(x, unicode): + if isinstance(x, six.text_type): return x if isinstance(x, str): return encoding_flexible_conversion(x) if isinstance(x, IsRelatedTo): return u"/type/reflect/any_master" - return unicode(x) # FIXME: Any object is unicode-able, this is error prone + return six.text_type(x) # FIXME: Any object is unicode-able, this is error prone def to_bidirected_graph(e): diff --git a/quepy/nltktagger.py b/quepy/nltktagger.py index 8c9149d..8693252 100644 --- a/quepy/nltktagger.py +++ b/quepy/nltktagger.py @@ -15,6 +15,7 @@ # - "maxent_treebank_pos_tagger" in Models # - "wordnet" in Corpora +import six import nltk from quepy.tagger import Word from quepy.encodingpolicy import assert_valid_encoding @@ -25,7 +26,7 @@ def penn_to_morphy_tag(tag): assert_valid_encoding(tag) - for penn, morphy in _penn_to_morphy_tag.iteritems(): + for penn, morphy in six.iteritems(_penn_to_morphy_tag): if tag.startswith(penn): return morphy return None @@ -62,12 +63,14 @@ def run_nltktagger(string, nltk_data_path=None): word = Word(token) # Eliminates stuff like JJ|CC # decode ascii because they are the penn-like POS tags (are ascii). - word.pos = pos.split("|")[0].decode("ascii") + word_pos = pos.split("|")[0] + do_decode = isinstance(word_pos, six.binary_type) + word.pos = word_pos.decode("ascii") if do_decode else word_pos mtag = penn_to_morphy_tag(word.pos) # Nice shooting, son. What's your name? lemma = wordnet.morphy(word.token, pos=mtag) - if isinstance(lemma, str): + if isinstance(lemma, six.binary_type): # In this case lemma is example-based, because if it's rule based # the result should be unicode (input was unicode). # Since english is ascii the decoding is ok. diff --git a/quepy/quepyapp.py b/quepy/quepyapp.py index e3187d0..d676ff4 100644 --- a/quepy/quepyapp.py +++ b/quepy/quepyapp.py @@ -14,6 +14,7 @@ import logging from importlib import import_module from types import ModuleType +import six from quepy import settings from quepy import generation @@ -35,10 +36,10 @@ def install(app_name): } modules = {} - for module_name, module_path in module_paths.iteritems(): + for module_name, module_path in six.iteritems(module_paths): try: modules[module_name] = import_module(module_path.format(app_name)) - except ImportError, error: + except ImportError as error: message = u"Error importing {0!r}: {1}" raise ImportError(message.format(module_name, error)) diff --git a/quepy/sparql_generation.py b/quepy/sparql_generation.py index 3b1a218..5a168e1 100644 --- a/quepy/sparql_generation.py +++ b/quepy/sparql_generation.py @@ -4,6 +4,7 @@ Sparql generation code. """ +import six from quepy import settings from quepy.dsl import IsRelatedTo from quepy.expression import isnode @@ -13,7 +14,7 @@ def escape(string): - string = unicode(string) + string = six.text_type(string) string = string.replace("\n", "") string = string.replace("\r", "") string = string.replace("\t", "") @@ -29,12 +30,12 @@ def adapt(x): if isnode(x): x = u"?x{}".format(x) return x - if isinstance(x, basestring): + if isinstance(x, six.string_types): assert_valid_encoding(x) if x.startswith(u"\"") or ":" in x: return x return u'"{}"'.format(x) - return unicode(x) + return six.text_type(x) def expression_to_sparql(e, full=False): diff --git a/quepy/tagger.py b/quepy/tagger.py index 557e093..6ff93ad 100644 --- a/quepy/tagger.py +++ b/quepy/tagger.py @@ -8,7 +8,7 @@ # Gonzalo Garcia Berrotaran import logging - +import six from quepy import settings from quepy.encodingpolicy import assert_valid_encoding @@ -50,7 +50,7 @@ def __unicode__(self): return u"|".join(str(x) for x in attrs) def __repr__(self): - return unicode(self) + return six.text_type(self) def get_tagger(): diff --git a/tests/random_expression.py b/tests/random_expression.py index d223a07..e74deff 100644 --- a/tests/random_expression.py +++ b/tests/random_expression.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import random +import six from quepy.expression import Expression @@ -9,18 +10,18 @@ def random_data(only_ascii=False): while first or 1 / 20.0 < random.random(): first = False if only_ascii: - c = unichr(random.randint(33, 126)) + c = six.unichr(random.randint(33, 126)) data.append(c) continue x = random.random() if 0.1 > x: c = random.choice(u" ./\n") elif 0.50 > x: - c = unichr(random.randint(65, 122)) + c = six.unichr(random.randint(65, 122)) elif 0.85 > x: - c = unichr(random.randint(0, 127)) + c = six.unichr(random.randint(0, 127)) else: - c = unichr(random.randint(0, 65535)) + c = six.unichr(random.randint(0, 65535)) data.append(c) return u"".join(data) diff --git a/tests/test_dot_generation.py b/tests/test_dot_generation.py index 2b8d68f..f3f745a 100644 --- a/tests/test_dot_generation.py +++ b/tests/test_dot_generation.py @@ -7,7 +7,9 @@ # Authors: Rafael Carrascosa # Gonzalo Garcia Berrotaran +from __future__ import print_function import unittest +import six import tempfile import subprocess from random_expression import random_expression @@ -38,8 +40,8 @@ class X(FixedRelation): class TestDotGeneration(unittest.TestCase): def _standard_check(self, s, e): - self.assertIsInstance(s, unicode) - vs = [u"x{}".format(i) for i in xrange(len(e))] + self.assertIsInstance(s, six.text_type) + vs = [u"x{}".format(i) for i in six.moves.xrange(len(e))] for var in vs: self.assertIn(var, s) @@ -66,7 +68,7 @@ def test_dot_stress(self): dot_file = tempfile.NamedTemporaryFile() cmdline = "dot %s" % dot_file.name msg = "dot returned error code {}, check {} input file." - for _ in xrange(100): + for _ in six.moves.xrange(100): expression = random_expression() _, dot_string = expression_to_dot(expression) with open(dot_file.name, "w") as filehandler: @@ -76,7 +78,7 @@ def test_dot_stress(self): retcode = subprocess.call(cmdline.split(), stdout=tempfile.TemporaryFile()) except OSError: - print "Warning: the program 'dot' was not found, tests skipped" + print("Warning: the program 'dot' was not found, tests skipped") return if retcode != 0: dot_file.delete = False diff --git a/tests/test_dsl.py b/tests/test_dsl.py index 792fc91..3fbc86e 100644 --- a/tests/test_dsl.py +++ b/tests/test_dsl.py @@ -8,6 +8,7 @@ # Gonzalo Garcia Berrotaran import unittest +import six from quepy.expression import Expression from quepy.dsl import HasKeyword, FixedRelation, FixedType, \ FixedDataRelation @@ -39,9 +40,9 @@ class MyFixedType(FixedType): edges = list(fixedinstance.iter_edges(head)) self.assertEqual(len(edges), 1) - self.assertIsInstance(edges[0][0], unicode) + self.assertIsInstance(edges[0][0], six.text_type) self.assertEqual(edges[0][0], u"rdf:type") - self.assertIsInstance(edges[0][1], unicode) + self.assertIsInstance(edges[0][1], six.text_type) self.assertEqual(edges[0][1], u"uranium:blowtorch") def test_fixed_data_relation(self): @@ -54,9 +55,9 @@ class MyFixedDataRelation(FixedDataRelation): edges = list(fixedinstance.iter_edges(head)) self.assertEqual(len(edges), 1) - self.assertIsInstance(edges[0][0], unicode) + self.assertIsInstance(edges[0][0], six.text_type) self.assertEqual(edges[0][0], u"uranium:blowtorch") - self.assertIsInstance(edges[0][1], unicode) + self.assertIsInstance(edges[0][1], six.text_type) self.assertEqual(edges[0][1], u"soplete") def test_has_keyword(self): @@ -67,9 +68,9 @@ def test_has_keyword(self): head = keywordinstance.get_head() edges = list(keywordinstance.iter_edges(head)) self.assertEqual(len(edges), 1) - self.assertIsInstance(edges[0][0], unicode) + self.assertIsInstance(edges[0][0], six.text_type) self.assertEqual(edges[0][0], u"uranium:keyword") - self.assertIsInstance(edges[0][1], unicode) + self.assertIsInstance(edges[0][1], six.text_type) self.assertEqual(edges[0][1], u'soplete') # With language @@ -79,7 +80,7 @@ def test_has_keyword(self): head = keywordinstance.get_head() edges = list(keywordinstance.iter_edges(head)) self.assertEqual(len(edges), 1) - self.assertIsInstance(edges[0][1], unicode) + self.assertIsInstance(edges[0][1], six.text_type) self.assertEqual(edges[0][1], u'"soplete"@en') # With sanitize @@ -89,7 +90,7 @@ def test_has_keyword(self): head = keywordinstance.get_head() edges = list(keywordinstance.iter_edges(head)) self.assertEqual(len(edges), 1) - self.assertIsInstance(edges[0][1], unicode) + self.assertIsInstance(edges[0][1], six.text_type) self.assertEqual(edges[0][1], u'"SOPLETE"@en') diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 639d000..3935525 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -13,6 +13,7 @@ """ import unittest +import six from quepy.expression import Expression, isnode @@ -35,7 +36,7 @@ def make_canonical_expression(e): if isnode(child): child = canon[child] childs.append((label, child)) - childs.sort() + childs.sort(key=str) canon[node] = tuple(childs) return canon[e.get_head()] @@ -145,7 +146,7 @@ def setUp(self): other.add_data(0, "1") other.add_data(2, "3") other.decapitate("iuju") - for _ in xrange(5): + for _ in six.moves.xrange(5): self.e.decapitate("nouu") self.e += other @@ -237,14 +238,14 @@ def setUp(self): other = Expression() other.decapitate("onelevel") self.a = Expression() - for _ in xrange(5): + for _ in six.moves.xrange(5): self.a.decapitate("step") self.a += other other = Expression() other.decapitate("onelevel", reverse=True) self.b = Expression() - for _ in xrange(5): + for _ in six.moves.xrange(5): self.b.decapitate("step") self.b += other @@ -255,7 +256,7 @@ def setUp(self): other.add_data(0, "data") other.decapitate("onelevel") self.a = Expression() - for _ in xrange(5): + for _ in six.moves.xrange(5): self.a.decapitate("step") self.a += other @@ -263,7 +264,7 @@ def setUp(self): other.add_data(0, "data") other.decapitate("onelevel", reverse=True) self.b = Expression() - for _ in xrange(5): + for _ in six.moves.xrange(5): self.b.decapitate("step") self.b += other diff --git a/tests/test_mql_generation.py b/tests/test_mql_generation.py index d437ce1..51da8fd 100644 --- a/tests/test_mql_generation.py +++ b/tests/test_mql_generation.py @@ -10,6 +10,7 @@ import json from random import seed import unittest +import six from random_expression import random_expression from quepy.mql_generation import generate_mql @@ -34,16 +35,16 @@ def _valid_mql_query(self, query): if isinstance(x, list): self.assertIsInstance(x[0], dict) self.assertEqual(len(x), 1) - for key, value in x[0].iteritems(): - self.assertIsInstance(key, unicode) + for key, value in six.iteritems(x[0]): + self.assertIsInstance(key, six.text_type) q.append(value) else: - self.assertIsInstance(x, unicode) + self.assertIsInstance(x, six.text_type) def _valid_target_for_query(self, target, query): self.assertIsInstance(target, list) for entry in target: - self.assertIsInstance(entry, unicode) + self.assertIsInstance(entry, six.text_type) x = self._get_json(query) if x is None: return @@ -58,7 +59,7 @@ def _valid_target_for_query(self, target, query): def test_mql_stress(self): seed("playadito vs amanda... 3 focas") - for _ in xrange(100): + for _ in six.moves.xrange(100): expression = random_expression() target, mql = generate_mql(expression) self._valid_mql_query(mql) diff --git a/tests/test_quepyapp.py b/tests/test_quepyapp.py index 7beb106..b29ad7c 100644 --- a/tests/test_quepyapp.py +++ b/tests/test_quepyapp.py @@ -13,7 +13,7 @@ """ import unittest - +import six import quepy @@ -26,8 +26,8 @@ def test_get_query_types(self): question = "What is this?" target, query, userdata = self.app.get_query(question) - self.assertIsInstance(target, unicode) - self.assertIsInstance(query, unicode) + self.assertIsInstance(target, six.text_type) + self.assertIsInstance(query, six.text_type) def test_get_user_data(self): question = "user data" diff --git a/tests/test_sparql_generation.py b/tests/test_sparql_generation.py index 836f7f2..d8d74ed 100644 --- a/tests/test_sparql_generation.py +++ b/tests/test_sparql_generation.py @@ -9,6 +9,7 @@ import re import unittest +import six from random_expression import random_expression from random import seed from quepy.sparql_generation import expression_to_sparql @@ -42,8 +43,8 @@ class TestSparqlGeneration(unittest.TestCase): re.DOTALL) def _standard_check(self, s, e): - self.assertIsInstance(s, unicode) - vs = [u"x{}".format(i) for i in xrange(len(e))] + self.assertIsInstance(s, six.text_type) + vs = [u"x{}".format(i) for i in six.moves.xrange(len(e))] for var in vs: self.assertIn(var, s) @@ -67,7 +68,7 @@ def test_sparql_takes_unicode(self): @unittest.skip("should be fixed") def test_sparql_ascii_stress(self): seed("sacala dunga dunga dunga") - for _ in xrange(100): + for _ in six.moves.xrange(100): expression = random_expression(only_ascii=True) _, s = expression_to_sparql(expression) self._standard_check(s, expression) @@ -75,7 +76,7 @@ def test_sparql_ascii_stress(self): def test_sparql_stress(self): seed("sacala dunga dunga dunga") - for _ in xrange(100): + for _ in six.moves.xrange(100): expression = random_expression() try: _, s = expression_to_sparql(expression) diff --git a/tests/test_tagger.py b/tests/test_tagger.py index 39be54a..08ba79a 100644 --- a/tests/test_tagger.py +++ b/tests/test_tagger.py @@ -13,24 +13,25 @@ """ import unittest +import six from quepy import tagger class TestTagger(unittest.TestCase): def test_tagset_unicode(self): for tag in tagger.PENN_TAGSET: - self.assertIsInstance(tag, unicode) + self.assertIsInstance(tag, six.text_type) def test_word_encoding(self): word = tagger.Word(token=u"æßđħłłþłłł@æµß", lemma=u"ŧłþłßæ#¶ŋħ~#~@", pos=u"øĸŋøħþ€ĸłþ€øæ«»¢") - self.assertIsInstance(word.token, unicode) + self.assertIsInstance(word.token, six.text_type) self.assertEqual(word.token, u"æßđħłłþłłł@æµß") - self.assertIsInstance(word.lemma, unicode) + self.assertIsInstance(word.lemma, six.text_type) self.assertEqual(word.lemma, u"ŧłþłßæ#¶ŋħ~#~@") - self.assertIsInstance(word.pos, unicode) + self.assertIsInstance(word.pos, six.text_type) self.assertEqual(word.pos, u"øĸŋøħþ€ĸłþ€øæ«»¢") def test_word_wrong_encoding(self): @@ -49,11 +50,11 @@ def test_word_attrib_set(self): word.lemma = u"ŧłþłßæ#¶ŋħ~#~@" word.pos = u"øĸŋøħþ€ĸłþ€øæ«»¢" - self.assertIsInstance(word.token, unicode) + self.assertIsInstance(word.token, six.text_type) self.assertEqual(word.token, u"æßđħłłþłłł@æµß") - self.assertIsInstance(word.lemma, unicode) + self.assertIsInstance(word.lemma, six.text_type) self.assertEqual(word.lemma, u"ŧłþłßæ#¶ŋħ~#~@") - self.assertIsInstance(word.pos, unicode) + self.assertIsInstance(word.pos, six.text_type) self.assertEqual(word.pos, u"øĸŋøħþ€ĸłþ€øæ«»¢") def test_word_wrong_attrib_set(self): diff --git a/tests/testapp/__init__.py b/tests/testapp/__init__.py index 03df946..4fef00e 100644 --- a/tests/testapp/__init__.py +++ b/tests/testapp/__init__.py @@ -12,4 +12,4 @@ Init for testapp quepy. """ -from basic import * \ No newline at end of file +from testapp.basic import * \ No newline at end of file From 9647c917aa75a11c9be72a49d63ac31975af58a0 Mon Sep 17 00:00:00 2001 From: Daniel Bradburn Date: Mon, 8 May 2017 20:21:07 +0200 Subject: [PATCH 2/3] Fixed failing tests in test_dot_generation due to random_expression generating single codepoints from surrogate pairs which cannot be encoded in utf-8. Python 2 was more lax about this --- test.sh | 0 tests/random_expression.py | 5 ++++- tests/test_dot_generation.py | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 test.sh diff --git a/test.sh b/test.sh new file mode 100644 index 0000000..e69de29 diff --git a/tests/random_expression.py b/tests/random_expression.py index e74deff..f154dbe 100644 --- a/tests/random_expression.py +++ b/tests/random_expression.py @@ -21,7 +21,10 @@ def random_data(only_ascii=False): elif 0.85 > x: c = six.unichr(random.randint(0, 127)) else: - c = six.unichr(random.randint(0, 65535)) + blacklist = [six.unichr(x) for x in range(0xd800, 0xdfff + 1)] + c = blacklist[0] + while c in blacklist: + c = six.unichr(random.randint(0, 65535)) data.append(c) return u"".join(data) diff --git a/tests/test_dot_generation.py b/tests/test_dot_generation.py index f3f745a..378a105 100644 --- a/tests/test_dot_generation.py +++ b/tests/test_dot_generation.py @@ -68,10 +68,10 @@ def test_dot_stress(self): dot_file = tempfile.NamedTemporaryFile() cmdline = "dot %s" % dot_file.name msg = "dot returned error code {}, check {} input file." - for _ in six.moves.xrange(100): + for i in six.moves.xrange(100): expression = random_expression() _, dot_string = expression_to_dot(expression) - with open(dot_file.name, "w") as filehandler: + with open(dot_file.name, "wb") as filehandler: filehandler.write(dot_string.encode("utf-8")) try: From 3ace241ced39a5c5acb5df4bd3c6bbbae67bd8e6 Mon Sep 17 00:00:00 2001 From: Daniel Bradburn Date: Mon, 8 May 2017 20:42:17 +0200 Subject: [PATCH 3/3] Skipped a number of tests that checked that an error is raised when ascii input is supplied. This is something which is ambiguous in python 2, however in python 3 the split between text and bytes is much more explicit --- tests/random_expression.py | 2 ++ tests/test_dot_generation.py | 7 +++++-- tests/test_nltktagger.py | 3 +++ tests/test_sparql_generation.py | 3 +++ tests/test_tagger.py | 4 ++++ 5 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/random_expression.py b/tests/random_expression.py index f154dbe..f5ed665 100644 --- a/tests/random_expression.py +++ b/tests/random_expression.py @@ -38,6 +38,8 @@ def random_relation(only_ascii=False): class UnicodeableDummy(object): def __unicode__(self): return data + def __str__(self): + return data return UnicodeableDummy() diff --git a/tests/test_dot_generation.py b/tests/test_dot_generation.py index 378a105..8f5d729 100644 --- a/tests/test_dot_generation.py +++ b/tests/test_dot_generation.py @@ -8,10 +8,11 @@ # Gonzalo Garcia Berrotaran from __future__ import print_function -import unittest -import six +import sys import tempfile import subprocess +import unittest +import six from random_expression import random_expression from random import seed from quepy.dot_generation import expression_to_dot @@ -51,12 +52,14 @@ def test_dot_takes_unicode(self): _, s = expression_to_dot(e) self._standard_check(s, e) + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_dot_takes_fails_ascii1(self): e = gen_fixedtype("a") e += gen_datarel("b", "c") e = gen_fixedrelation("d", e) self.assertRaises(ValueError, expression_to_dot, e) + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_dot_takes_fails_ascii2(self): e = gen_fixedtype("·̣─@łæßð~¶½") e += gen_datarel("tµŧurułej€", "←ðßðæßđæßæđßŋŋæ @~~·ŋŋ·¶·ŋ“¶¬@@") diff --git a/tests/test_nltktagger.py b/tests/test_nltktagger.py index e45e272..53ac2d1 100644 --- a/tests/test_nltktagger.py +++ b/tests/test_nltktagger.py @@ -12,12 +12,14 @@ Tests for nltktagger. """ +import sys import unittest from quepy import nltktagger from quepy.tagger import Word class TestNLTKTagger(unittest.TestCase): + def test_word_output(self): output = nltktagger.run_nltktagger(u"this is a test case «¢ðßæŋħħ") @@ -25,6 +27,7 @@ def test_word_output(self): for word in output: self.assertIsInstance(word, Word) + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def tests_wrong_input(self): self.assertRaises(ValueError, nltktagger.run_nltktagger, "this is not unicode") diff --git a/tests/test_sparql_generation.py b/tests/test_sparql_generation.py index d8d74ed..f4919f6 100644 --- a/tests/test_sparql_generation.py +++ b/tests/test_sparql_generation.py @@ -7,6 +7,7 @@ # Authors: Rafael Carrascosa # Gonzalo Garcia Berrotaran +import sys import re import unittest import six @@ -87,12 +88,14 @@ def test_sparql_stress(self): self._standard_check(s, expression) self._sparql_check(s) + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_sparql_takes_fails_ascii1(self): e = gen_fixedtype("a") e += gen_datarel("b", "c") e = gen_fixedrelation("d", e) self.assertRaises(ValueError, expression_to_sparql, e) + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_sparql_takes_fails_ascii2(self): e = gen_fixedtype("·̣─@łæßð~¶½") e += gen_datarel("tµŧurułej€", "←ðßðæßđæßæđßŋŋæ @~~·ŋŋ·¶·ŋ“¶¬@@") diff --git a/tests/test_tagger.py b/tests/test_tagger.py index 08ba79a..5360c2b 100644 --- a/tests/test_tagger.py +++ b/tests/test_tagger.py @@ -12,8 +12,10 @@ Tests for tagger. """ +import sys import unittest import six + from quepy import tagger @@ -34,6 +36,7 @@ def test_word_encoding(self): self.assertIsInstance(word.pos, six.text_type) self.assertEqual(word.pos, u"øĸŋøħþ€ĸłþ€øæ«»¢") + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_word_wrong_encoding(self): # Token not unicode self.assertRaises(ValueError, tagger.Word, "æßđħłłþłłł@æµß", @@ -57,6 +60,7 @@ def test_word_attrib_set(self): self.assertIsInstance(word.pos, six.text_type) self.assertEqual(word.pos, u"øĸŋøħþ€ĸłþ€øæ«»¢") + @unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3') def test_word_wrong_attrib_set(self): word = tagger.Word(u"æßđħłłþłłł@æµß")