Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Py3 issue45 #46

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions quepy/dot_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
"""

import random
import six
from quepy.expression import isnode
from quepy.dsl import IsRelatedTo, HasKeyword
from quepy.encodingpolicy import assert_valid_encoding


def escape(x, add_quotes=True):
x = unicode(x)
x = six.text_type(x)
x = x.replace(u" ", u"_")
x = x.replace(u"\n", u"")
x = x.replace(u"\00", u"")
Expand All @@ -29,13 +30,13 @@ def adapt(x):
if isnode(x):
x = u"x{}".format(x)
return x
if isinstance(x, basestring):
if isinstance(x, six.string_types):
assert_valid_encoding(x)
x = escape(x)
if x.startswith(u"\""):
return x
return u'"{}"'.format(x)
return unicode(x)
return six.text_type(x)


def expression_to_dot(e):
Expand Down
5 changes: 3 additions & 2 deletions quepy/encodingpolicy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""

import logging
import six
from quepy import settings
logger = logging.getLogger("quepy.encodingpolicy")

Expand All @@ -25,7 +26,7 @@ def encoding_flexible_conversion(string, complain=False):
converting a string that had to be on the right encoding.
"""

if isinstance(string, unicode):
if isinstance(string, six.text_type):
return string
try:
ustring = string.decode(settings.DEFAULT_ENCODING)
Expand All @@ -44,5 +45,5 @@ def assert_valid_encoding(string):
ValueError exception.
"""

if not isinstance(string, unicode):
if not isinstance(string, six.text_type):
raise ValueError(u"Argument must be unicode")
3 changes: 2 additions & 1 deletion quepy/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@

from collections import defaultdict
from copy import deepcopy
import six


def isnode(x):
Expand Down Expand Up @@ -174,7 +175,7 @@ def iter_nodes(self):
"""
Iterates the indexes (the unique identifiers) of the Expression nodes.
"""
return xrange(len(self.nodes))
return six.moves.xrange(len(self.nodes))

def iter_edges(self, node):
"""
Expand Down
5 changes: 3 additions & 2 deletions quepy/mql_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import re
import json
import six
from quepy.dsl import IsRelatedTo
from quepy.expression import isnode
from quepy.encodingpolicy import encoding_flexible_conversion
Expand All @@ -25,13 +26,13 @@ def safely_to_unicode(x):
Given an "edge" (a relation) or "a data" from an `Expression` graph
transform it into a unicode string fitted for insertion into a MQL query.
"""
if isinstance(x, unicode):
if isinstance(x, six.text_type):
return x
if isinstance(x, str):
return encoding_flexible_conversion(x)
if isinstance(x, IsRelatedTo):
return u"/type/reflect/any_master"
return unicode(x) # FIXME: Any object is unicode-able, this is error prone
return six.text_type(x) # FIXME: Any object is unicode-able, this is error prone


def to_bidirected_graph(e):
Expand Down
9 changes: 6 additions & 3 deletions quepy/nltktagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# - "maxent_treebank_pos_tagger" in Models
# - "wordnet" in Corpora

import six
import nltk
from quepy.tagger import Word
from quepy.encodingpolicy import assert_valid_encoding
Expand All @@ -25,7 +26,7 @@
def penn_to_morphy_tag(tag):
assert_valid_encoding(tag)

for penn, morphy in _penn_to_morphy_tag.iteritems():
for penn, morphy in six.iteritems(_penn_to_morphy_tag):
if tag.startswith(penn):
return morphy
return None
Expand Down Expand Up @@ -62,12 +63,14 @@ def run_nltktagger(string, nltk_data_path=None):
word = Word(token)
# Eliminates stuff like JJ|CC
# decode ascii because they are the penn-like POS tags (are ascii).
word.pos = pos.split("|")[0].decode("ascii")
word_pos = pos.split("|")[0]
do_decode = isinstance(word_pos, six.binary_type)
word.pos = word_pos.decode("ascii") if do_decode else word_pos

mtag = penn_to_morphy_tag(word.pos)
# Nice shooting, son. What's your name?
lemma = wordnet.morphy(word.token, pos=mtag)
if isinstance(lemma, str):
if isinstance(lemma, six.binary_type):
# In this case lemma is example-based, because if it's rule based
# the result should be unicode (input was unicode).
# Since english is ascii the decoding is ok.
Expand Down
5 changes: 3 additions & 2 deletions quepy/quepyapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import logging
from importlib import import_module
from types import ModuleType
import six

from quepy import settings
from quepy import generation
Expand All @@ -35,10 +36,10 @@ def install(app_name):
}
modules = {}

for module_name, module_path in module_paths.iteritems():
for module_name, module_path in six.iteritems(module_paths):
try:
modules[module_name] = import_module(module_path.format(app_name))
except ImportError, error:
except ImportError as error:
message = u"Error importing {0!r}: {1}"
raise ImportError(message.format(module_name, error))

Expand Down
7 changes: 4 additions & 3 deletions quepy/sparql_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
Sparql generation code.
"""

import six
from quepy import settings
from quepy.dsl import IsRelatedTo
from quepy.expression import isnode
Expand All @@ -13,7 +14,7 @@


def escape(string):
string = unicode(string)
string = six.text_type(string)
string = string.replace("\n", "")
string = string.replace("\r", "")
string = string.replace("\t", "")
Expand All @@ -29,12 +30,12 @@ def adapt(x):
if isnode(x):
x = u"?x{}".format(x)
return x
if isinstance(x, basestring):
if isinstance(x, six.string_types):
assert_valid_encoding(x)
if x.startswith(u"\"") or ":" in x:
return x
return u'"{}"'.format(x)
return unicode(x)
return six.text_type(x)


def expression_to_sparql(e, full=False):
Expand Down
4 changes: 2 additions & 2 deletions quepy/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Gonzalo Garcia Berrotaran <[email protected]>

import logging

import six
from quepy import settings
from quepy.encodingpolicy import assert_valid_encoding

Expand Down Expand Up @@ -50,7 +50,7 @@ def __unicode__(self):
return u"|".join(str(x) for x in attrs)

def __repr__(self):
return unicode(self)
return six.text_type(self)


def get_tagger():
Expand Down
Empty file added test.sh
Empty file.
14 changes: 10 additions & 4 deletions tests/random_expression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import random
import six
from quepy.expression import Expression


Expand All @@ -9,18 +10,21 @@ def random_data(only_ascii=False):
while first or 1 / 20.0 < random.random():
first = False
if only_ascii:
c = unichr(random.randint(33, 126))
c = six.unichr(random.randint(33, 126))
data.append(c)
continue
x = random.random()
if 0.1 > x:
c = random.choice(u" ./\n")
elif 0.50 > x:
c = unichr(random.randint(65, 122))
c = six.unichr(random.randint(65, 122))
elif 0.85 > x:
c = unichr(random.randint(0, 127))
c = six.unichr(random.randint(0, 127))
else:
c = unichr(random.randint(0, 65535))
blacklist = [six.unichr(x) for x in range(0xd800, 0xdfff + 1)]
c = blacklist[0]
while c in blacklist:
c = six.unichr(random.randint(0, 65535))
data.append(c)
return u"".join(data)

Expand All @@ -34,6 +38,8 @@ def random_relation(only_ascii=False):
class UnicodeableDummy(object):
def __unicode__(self):
return data
def __str__(self):
return data
return UnicodeableDummy()


Expand Down
17 changes: 11 additions & 6 deletions tests/test_dot_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
# Authors: Rafael Carrascosa <[email protected]>
# Gonzalo Garcia Berrotaran <[email protected]>

import unittest
from __future__ import print_function
import sys
import tempfile
import subprocess
import unittest
import six
from random_expression import random_expression
from random import seed
from quepy.dot_generation import expression_to_dot
Expand Down Expand Up @@ -38,8 +41,8 @@ class X(FixedRelation):
class TestDotGeneration(unittest.TestCase):

def _standard_check(self, s, e):
self.assertIsInstance(s, unicode)
vs = [u"x{}".format(i) for i in xrange(len(e))]
self.assertIsInstance(s, six.text_type)
vs = [u"x{}".format(i) for i in six.moves.xrange(len(e))]
for var in vs:
self.assertIn(var, s)

Expand All @@ -49,12 +52,14 @@ def test_dot_takes_unicode(self):
_, s = expression_to_dot(e)
self._standard_check(s, e)

@unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3')
def test_dot_takes_fails_ascii1(self):
e = gen_fixedtype("a")
e += gen_datarel("b", "c")
e = gen_fixedrelation("d", e)
self.assertRaises(ValueError, expression_to_dot, e)

@unittest.skipIf(sys.version_info[0] > 2, 'less relevant in py3')
def test_dot_takes_fails_ascii2(self):
e = gen_fixedtype("·̣─@łæßð~¶½")
e += gen_datarel("tµŧurułej€", "←ðßðæßđæßæđßŋŋæ @~~·ŋŋ·¶·ŋ“¶¬@@")
Expand All @@ -66,17 +71,17 @@ def test_dot_stress(self):
dot_file = tempfile.NamedTemporaryFile()
cmdline = "dot %s" % dot_file.name
msg = "dot returned error code {}, check {} input file."
for _ in xrange(100):
for i in six.moves.xrange(100):
expression = random_expression()
_, dot_string = expression_to_dot(expression)
with open(dot_file.name, "w") as filehandler:
with open(dot_file.name, "wb") as filehandler:
filehandler.write(dot_string.encode("utf-8"))

try:
retcode = subprocess.call(cmdline.split(),
stdout=tempfile.TemporaryFile())
except OSError:
print "Warning: the program 'dot' was not found, tests skipped"
print("Warning: the program 'dot' was not found, tests skipped")
return
if retcode != 0:
dot_file.delete = False
Expand Down
17 changes: 9 additions & 8 deletions tests/test_dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# Gonzalo Garcia Berrotaran <[email protected]>

import unittest
import six
from quepy.expression import Expression
from quepy.dsl import HasKeyword, FixedRelation, FixedType, \
FixedDataRelation
Expand Down Expand Up @@ -39,9 +40,9 @@ class MyFixedType(FixedType):
edges = list(fixedinstance.iter_edges(head))

self.assertEqual(len(edges), 1)
self.assertIsInstance(edges[0][0], unicode)
self.assertIsInstance(edges[0][0], six.text_type)
self.assertEqual(edges[0][0], u"rdf:type")
self.assertIsInstance(edges[0][1], unicode)
self.assertIsInstance(edges[0][1], six.text_type)
self.assertEqual(edges[0][1], u"uranium:blowtorch")

def test_fixed_data_relation(self):
Expand All @@ -54,9 +55,9 @@ class MyFixedDataRelation(FixedDataRelation):
edges = list(fixedinstance.iter_edges(head))

self.assertEqual(len(edges), 1)
self.assertIsInstance(edges[0][0], unicode)
self.assertIsInstance(edges[0][0], six.text_type)
self.assertEqual(edges[0][0], u"uranium:blowtorch")
self.assertIsInstance(edges[0][1], unicode)
self.assertIsInstance(edges[0][1], six.text_type)
self.assertEqual(edges[0][1], u"soplete")

def test_has_keyword(self):
Expand All @@ -67,9 +68,9 @@ def test_has_keyword(self):
head = keywordinstance.get_head()
edges = list(keywordinstance.iter_edges(head))
self.assertEqual(len(edges), 1)
self.assertIsInstance(edges[0][0], unicode)
self.assertIsInstance(edges[0][0], six.text_type)
self.assertEqual(edges[0][0], u"uranium:keyword")
self.assertIsInstance(edges[0][1], unicode)
self.assertIsInstance(edges[0][1], six.text_type)
self.assertEqual(edges[0][1], u'soplete')

# With language
Expand All @@ -79,7 +80,7 @@ def test_has_keyword(self):
head = keywordinstance.get_head()
edges = list(keywordinstance.iter_edges(head))
self.assertEqual(len(edges), 1)
self.assertIsInstance(edges[0][1], unicode)
self.assertIsInstance(edges[0][1], six.text_type)
self.assertEqual(edges[0][1], u'"soplete"@en')

# With sanitize
Expand All @@ -89,7 +90,7 @@ def test_has_keyword(self):
head = keywordinstance.get_head()
edges = list(keywordinstance.iter_edges(head))
self.assertEqual(len(edges), 1)
self.assertIsInstance(edges[0][1], unicode)
self.assertIsInstance(edges[0][1], six.text_type)
self.assertEqual(edges[0][1], u'"SOPLETE"@en')


Expand Down
Loading