Merge remote-tracking branch 'origin/master' into number_parsing

90ff917c · Pierre Penninckx · 80aeb570 · 5bf309db · 90ff917c · 90ff917c
Commit 90ff917c authored 8 years ago by Pierre Penninckx
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,8 +3,8 @@ install: "pip install -r requirements.txt"
 python:
    - "2.6"
    - "2.7"
-    - "3.2"
-    - "3.3"
+    - "3.4"
+    - "3.5"
 script: "py.test tests"
 notifications:
  irc: "chat.freenode.net#baron"
--- a/CHANGELOG
+++ b/CHANGELOG
 Changelog
 =========
  
+0.6.2 (unreleased)
+------------------
+
+- fix race condition when generating parser cache file
+- make all user-facing errors inherit from the same BaronError class
+- fix: dotted_name and float_exponant_complex were missing from
+  nodes_rendering_order
+- group formatting at start of file or preceded by space with comment
+
+0.6.1 (2015-01-31)
+------------------
+
+- fix: the string was having a greedy behavior on grouping the string tokens
+  surrounding it (for string chains), this ends up creating an inconsistancy in
+  the way string was grouped in general
+- fix: better number parsing handling, everything isn't fixed yet
+- make all (expected) errors inherit from the same BaronError class
+- fix: parsing fails correctly if a quoted string is not closed
+
 0.6 (2014-12-11)
 ----------------
  
@@ -37,7 +56,7 @@ Changelog
  
 - path becomes a simple list and is easier to deal with
 - bounding box allows you to know the left most and right most position
-  of a node see https://baron.readthedocs.org/en/latest/#bounding-box
+  of a node see https://baron.readthedocs.io/en/latest/#bounding-box
 - redbaron is classified as supporting python3
  https://github.com/Psycojoker/baron/pull/51
 - ensure than when a key is a string, it's empty value is an empty string and
@@ -67,20 +86,20 @@ Changelog
 0.2 (2014-06-11)
 ----------------
  
- Baron now provides documentation on https://baron.readthedocs.org
+- Baron now provides documentation on https://baron.readthedocs.io
 - feature: baron now run in python3 (*but* doesn't implement the full python3
  grammar yet) by Pierre Penninckx https://github.com/ibizaman
 - feature: drop the usage of ast.py to find print_function, this allow any
  version of python to parse any other version of python also by Pierre
  Penninckx
 - fix: rare bug where a comment end up being confused as an indentation level
- 2 new helpers: show_file and show_node, see https://baron.readthedocs.org/en/latest/#show-file
-  and https://baron.readthedocs.org/en/latest/#show-node
+- 2 new helpers: show_file and show_node, see https://baron.readthedocs.io/en/latest/#show-file
+  and https://baron.readthedocs.io/en/latest/#show-node
 - new dictionary that provides the informations on how to render a FST node:
-  nodes_rendering_order see https://baron.readthedocs.org/en/latest/#rendering-the-fst
- new utilities to find a node, see https://baron.readthedocs.org/en/latest/#locate-a-node
+  nodes_rendering_order see https://baron.readthedocs.io/en/latest/#rendering-the-fst
+- new utilities to find a node, see https://baron.readthedocs.io/en/latest/#locate-a-node
 - new generic class that provide templates to work on the FST see
-  https://baron.readthedocs.org/en/latest/#rendering-the-fst
+  https://baron.readthedocs.io/en/latest/#rendering-the-fst
  
 0.1.3 (2014-04-13)
 ------------------

--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ source_code_string == dumps(fst)
 ```
  
 Except if you want to do low level things, **use
-[RedBaron](https://github.com/Psycojoker/redbaron) instead of using Baron
+[RedBaron](https://github.com/PyCQA/redbaron) instead of using Baron
 directly**. Think of Baron as the "bytecode of python source code" and RedBaron
 as some sort of usable layer on top of it.
  
@@ -32,7 +32,7 @@ useful for you, read the [« Why is this important? » section](#why-is-this-i
 Documentation
 =============
  
-Baron documentation is available on [Read The Docs](http://baron.readthedocs.org/en/latest/).
+Baron documentation is available on [Read The Docs](http://baron.readthedocs.io/en/latest/).
  
 Why is this important?
 ======================
@@ -113,7 +113,12 @@ Run either `py.test tests/` or `nosetests` in the baron directory.
 Community
 =========
  
-You can reach us on [irc.freenode.net#baron](https://webchat.freenode.net/?channels=%23baron).
+You can reach us on [irc.freenode.net#baron](https://webchat.freenode.net/?channels=%23baron) or [irc.freenode.net##python-code-quality](https://webchat.freenode.net/?channels=%23%23python-code-quality).
+
+Code of Conduct
+===============
+
+As a member of [PyCQA](https://github.com/PyCQA), Baron follows its [Code of Conduct](http://meta.pycqa.org/en/latest/code-of-conduct.html).
  
 Misc
 ====

--- a/baron/__init__.py
+++ b/baron/__init__.py
@@ -2,8 +2,8 @@ from . import grouper
 from . import spliter
 from .baron import parse, tokenize
 from .dumper import dumps
-from .render import nodes_rendering_order
-from .parser import ParsingError
 from .inner_formatting_grouper import GroupingError, UnExpectedFormattingToken
+from .parser import ParsingError
+from .render import nodes_rendering_order
 from .spliter import UntreatedError
 from .utils import BaronError
--- a/baron/baron.py
+++ b/baron/baron.py
-from ast import parse as python_ast_parse
-
 from .spliter import split
 from .grouper import group
 from .tokenizer import tokenize as _tokenize

--- a/baron/formatting_grouper.py
+++ b/baron/formatting_grouper.py
-from .utils import FlexibleIterator
+from .utils import FlexibleIterator, BaronError
  
-class UnExpectedSpaceToken(Exception):
+class UnExpectedSpaceToken(BaronError):
    pass
  
 PRIORITY_ORDER = (
@@ -57,19 +57,22 @@ BOTH = (
    "COMMA",
    "FOR",
    "COLON",
+    "BACKQUOTE",
+)
+
+STRING = (
    "STRING",
    "RAW_STRING",
    "UNICODE_STRING",
    "UNICODE_RAW_STRING",
    "BINARY_STRING",
    "BINARY_RAW_STRING",
-    "BACKQUOTE",
 )
  
 GROUP_SPACE_BEFORE = BOTH + (
    "RIGHT_PARENTHESIS",
    "COMMENT",
-)
+) + STRING
  
 GROUP_SPACE_AFTER = BOTH + (
    "FROM",
@@ -107,24 +110,27 @@ def group(sequence):
  
 def group_generator(sequence):
    iterator = FlexibleIterator(sequence)
-    current = None, None
-    while True:
-        if iterator.end():
-            return
-
+    while not iterator.end():
        current = next(iterator)
  
        if current is None:
            return
  
-        if current[0] in ("SPACE", "COMMENT") and iterator.show_next() and iterator.show_next()[0] in GROUP_SPACE_BEFORE:
+        if current[0] in ("SPACE") and iterator.show_next() and iterator.show_next()[0] in GROUP_SPACE_BEFORE:
            new_current = next(iterator)
            current = (new_current[0], new_current[1], [current])
  
-        if current[0] in GROUP_SPACE_AFTER and\
-            (iterator.show_next() and iterator.show_next()[0] in ("SPACE", "COMMENT")) and\
+        if current[0] in GROUP_SPACE_AFTER + STRING and\
+            (iterator.show_next() and iterator.show_next()[0] in ("SPACE")) and\
            (not iterator.show_next(2) or (iterator.show_next(2) and not less_prioritary_than(current[0], iterator.show_next(2)[0]))):
+
+            # do not be greedy when you are grouping on strings
+            if current[0] in STRING and iterator.show_next(2) and iterator.show_next(2)[0] in GROUP_SPACE_BEFORE:
+                yield current
+                continue
+
            after_space = next(iterator)
            current = (current[0], current[1], current[2] if len(current) > 2 else [], [after_space])
  
+
        yield current
--- a/baron/indentation_marker.py
+++ b/baron/indentation_marker.py
@@ -24,14 +24,20 @@ def mark_indentation(sequence):
    return list(mark_indentation_generator(sequence))
  
  
+def transform_tabs_to_spaces(string):
+    return string.replace("\t", " "*8)
+
+
 def get_space(node):
-    if len(node) < 3:
-        sys.stdout.write("WARNING")
-        return None
-    if len(node[3]) == 0:
-        sys.stdout.write("WARNING")
+    """ Return space formatting information of node.
+
+    If the node does not have a third formatting item - like in
+    a ('ENDL', '\n') node - then we return None as a flag value. This is
+    maybe not the best behavior but it seems to work for now.
+    """
+    if len(node) < 3 or len(node[3]) == 0:
        return None
-    return node[3][0][1].replace("	", " "*8)
+    return transform_tabs_to_spaces(node[3][0][1])
  
  
 def mark_indentation_generator(sequence):
@@ -52,7 +58,6 @@ def mark_indentation_generator(sequence):
                yield ('DEDENT', '')
                indentations.pop()
  
-        #sys.stdout.write(current, iterator.show_next())
        if current[0] == "COLON" and iterator.show_next()[0] == "ENDL":
            if iterator.show_next(2)[0] not in ("ENDL",):
                indentations.append(get_space(iterator.show_next()))
@@ -74,10 +79,27 @@ def mark_indentation_generator(sequence):
        if indentations and current[0] == "ENDL" and (len(current) != 4 or get_space(current) != indentations[-1]) and iterator.show_next()[0] != "ENDL":
            new_indent = get_space(current) if len(current) == 4 else ""
            yield current
-            while indentations and indentations[-1] > new_indent:
+            while indentations and string_is_bigger(indentations[-1], new_indent):
                indentations.pop()
                yield ('DEDENT', '')
            yield next(iterator)
            continue
  
        yield current
+
+
+def string_is_bigger(s1, s2):
+    """ Return s1 > s2 by taking into account None values.
+
+    None is always smaller than any string.
+
+    None > "string" works in python2 but not in python3. This function
+    makes it work in python3 too.
+    """
+    if s1 is None:
+        return False
+    elif s2 is None:
+        return True
+    else:
+        return s1 > s2
+
--- a/baron/inner_formatting_grouper.py
+++ b/baron/inner_formatting_grouper.py
 from .utils import FlexibleIterator, BaronError
  
+
 class UnExpectedFormattingToken(BaronError):
        pass
  

--- a/baron/parser.py
+++ b/baron/parser.py
+import errno
 import os
 import json
 import stat
@@ -74,9 +75,14 @@ class BaronParserGenerator(ParserGenerator):
                    table = LRTable.from_cache(g, data)
        if table is None:
            table = LRTable.from_grammar(g)
-            fd = os.open(cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600)
-            with os.fdopen(fd, "w") as f:
-                json.dump(self.serialize_table(table), f)
+            try:
+                fd = os.open(cache_file, os.O_RDWR | os.O_CREAT | os.O_EXCL, 0o0600)
+            except OSError as e:
+                if e.errno != errno.EEXIST:
+                    raise
+            else:
+                with os.fdopen(fd, "w") as f:
+                    json.dump(self.serialize_table(table), f)
        # meh :(
        #if table.sr_conflicts:
            #warnings.warn(

--- a/baron/render.py
+++ b/baron/render.py
@@ -124,6 +124,7 @@ nodes_rendering_order = {
        "float_exponant":    [("string", "value", True)],
        "left_parenthesis":  [("string", "value", True)],
        "right_parenthesis": [("string", "value", True)],
+        "float_exponant_complex":    [("string", "value", True)],
  
        "break":             [("string", "type", True)],
        "continue":          [("string", "type", True)],

--- a/baron/spliter.py
+++ b/baron/spliter.py
@@ -12,10 +12,12 @@ class UntreatedError(BaronError):
  
 def split_generator(sequence):
    iterator = FlexibleIterator(sequence)
-    while True:
-        if iterator.end():
-            return
  
+    # Pay attention that if a next() call fails, a StopIteration error
+    # is raised. This coincidently is the same error used by python to
+    # understand that a function using yield has finished processing.
+    # It's not a bad thing, but it must be kept in mind.
+    while not iterator.end():
        not_found = True
  
        if iterator.next_in("#"):
@@ -30,15 +32,25 @@ def split_generator(sequence):
                result += next(iterator)
                result += next(iterator)
                result += iterator.grab_string(lambda iterator: not iterator.next_starts_with(section * 3))
-                result += next(iterator)
-                result += next(iterator)
-                result += next(iterator)
+                # This next() call can fail if no closing quote exists. We
+                # still want to yield so we catch it.
+                try:
+                    result += next(iterator)
+                    result += next(iterator)
+                    result += next(iterator)
+                except StopIteration:
+                    pass
                yield result
            elif iterator.next_in(section):
                not_found = False
                result = next(iterator)
                result += iterator.grab_string(lambda iterator: iterator.show_next() not in section)
-                result += next(iterator)
+                # This next() call can fail if no closing quote exists. We
+                # still want to yield so we catch it.
+                try:
+                    result += next(iterator)
+                except StopIteration:
+                    pass
                yield result
  
        for section in (string.ascii_letters + "_" + "1234567890", " \t"):

--- a/baron/tokenizer.py
+++ b/baron/tokenizer.py
 import re
+from .utils import BaronError
  
  
-class UnknowItem(Exception):
+class UnknowItem(BaronError):
    pass
  
 KEYWORDS = ("and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "exec", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "not", "or", "pass", "print", "raise", "return", "try", "while", "with", "yield")

--- a/baron/utils.py
+++ b/baron/utils.py
@@ -33,12 +33,12 @@ class FlexibleIterator():
        return self.sequence[self.position + 1: self.position + 1 + size_of_choice] == sentence
  
    def next_in(self, choice):
-        if self.position + 1 == len(self.sequence):
+        if self.position + 1 >= len(self.sequence):
            return False
        return self.sequence[self.position + 1] in choice
  
    def show_next(self, at=1):
-        if self.position + at == len(self.sequence):
+        if self.position + at >= len(self.sequence):
            return None
        return self.sequence[self.position + at]
  

--- a/docs/advanced.rst
+++ b/docs/advanced.rst
@@ -36,18 +36,14 @@ Let's first see the difference between the two functions:
    from baron.path import position_to_node, position_to_path
    from baron.helpers import show_node
  
-    some_code = """\
-    from baron import parse
-    from baron.helpers import show_node
-    fst = parse("a = 1")
-    show_node(fst)
-    """
+    some_code = """from baron import parse\nfrom baron.helpers import show_node\nfst = parse("a = 1")\nshow_node(fst)"""
+    print some_code
  
    tree = parse(some_code)
  
-    node = position_to_node(tree, 3, 8)
+    node = position_to_node(tree, (3, 8))
    show_node(node)
-    path = position_to_path(tree, 3, 8)
+    path = position_to_path(tree, (3, 8))
    path
  
 The first one gives the node and the second one the node's path in the
@@ -79,19 +75,19 @@ a :file:`funcdef` node:
  
    fst = parse("a(1)")
  
-    position_to_path(fst, 1, 1)
-    position_to_path(fst, 1, 2)
-    position_to_path(fst, 1, 3)
-    position_to_path(fst, 1, 4)
+    position_to_path(fst, (1, 1))
+    position_to_path(fst, (1, 2))
+    position_to_path(fst, (1, 3))
+    position_to_path(fst, (1, 4))
  
 By the way, out of bound positions are handled gracefully:
  
 .. ipython:: python
  
-    print(position_to_node(fst, -1, 1))
-    print(position_to_node(fst, 1, 0))
-    print(position_to_node(fst, 1, 5))
-    print(position_to_node(fst, 2, 4))
+    print(position_to_node(fst, (-1, 1)))
+    print(position_to_node(fst, (1, 0)))
+    print(position_to_node(fst, (1, 5)))
+    print(position_to_node(fst, (2, 4)))
  
  
 Bounding Box
@@ -148,4 +144,3 @@ As you can see, the major difference between the two functions is that
 :file:`(1, 1)` since it considers you want the bounding box of the whole
 node while :file:`path_to_bounding_box` takes the location of the node
 in the fst into account.
-
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -13,12 +13,12 @@ comments, formatting), a FST keeps everything and guarantees the
 operation :file:`fst_to_code(code_to_fst(source_code)) == source_code`.
  
 If you want to understand why this is important, read this:
-https://github.com/psycojoker/baron#why-is-this-important
+https://github.com/PyCQA/baron#why-is-this-important
  
 Github (code, bug tracker, etc.)
 --------------------------------
  
-https://github.com/psycojoker/baron
+https://github.com/PyCQA/baron
  
 Installation
 ------------
@@ -31,7 +31,7 @@ RedBaron
 --------
  
 There is a good chance that you'll want to use `RedBaron
-<https://redbaron.readthedocs.org>`_ instead of using Baron directly.
+<https://redbaron.readthedocs.io>`_ instead of using Baron directly.
 Think of Baron as the "bytecode of python source code" and RedBaron as
 some sort of usable layer on top of it, a bit like dom/jQuery or
 html/Beautifulsoup.

--- a/setup.py
+++ b/setup.py
@@ -12,12 +12,12 @@ except ImportError:
  
  
 setup(name='baron',
-      version='0.6',
+      version='0.6.2',
      description='Full Syntax Tree for python to make writing refactoring code a realist task',
      author='Laurent Peuch',
      long_description=read_md("README.md") + "\n\n" + open("CHANGELOG", "r").read(),
      author_email='cortex@worlddomination.be',
-      url='https://github.com/Psycojoker/baron',
+      url='https://github.com/PyCQA/baron',
      install_requires=['rply'],
      packages=['baron'],
      license='lgplv3+',

--- a/tests/test_baron.py
+++ b/tests/test_baron.py
 # -*- coding: utf-8 -*-
  
-from baron import parse, ParsingError, UnExpectedFormattingToken, GroupingError, UntreatedError
+from baron import (parse, BaronError, ParsingError,
+                   UnExpectedFormattingToken, GroupingError,
+                   UntreatedError)
 import pytest
  
  
@@ -11,18 +13,48 @@ def test_dummy_parse():
 def test_error_parsing_error():
    with pytest.raises(ParsingError):
        parse("(")
+    with pytest.raises(BaronError):
+        parse("(")
  
  
 def test_error_unexpected_formatting():
    with pytest.raises(UnExpectedFormattingToken):
        parse("   a\nb")
+    with pytest.raises(BaronError):
+        parse("   a\nb")
  
  
 def test_error_grouping():
    with pytest.raises(GroupingError):
        parse("   (a\n b")
+    with pytest.raises(BaronError):
+        parse("   (a\n b")
  
  
 def test_error_untreated_error():
    with pytest.raises(UntreatedError):
        parse("?")
+    with pytest.raises(BaronError):
+        parse("?")
+
+
+def test_missing_quote_yields_error():
+    with pytest.raises(UntreatedError):
+        parse("'")
+    with pytest.raises(UntreatedError):
+        parse("'\n")
+    with pytest.raises(BaronError):
+        parse("'\n")
+
+
+def test_error_bad_indentation():
+    """ Regression test case
+
+    It shows a discrepency between python2 and python3 in string
+    comparisons with None.
+    """
+    with pytest.raises(ParsingError):
+        parse("def fun():\nass")
+    with pytest.raises(BaronError):
+        parse("def fun():\nass")
+
--- a/tests/test_formatting_grouper.py
+++ b/tests/test_formatting_grouper.py
@@ -2897,3 +2897,65 @@ def test_strings():
        ], [
            (i, 'dummy', [('SPACE', ' ')], [('SPACE', ' ')]),
        ])
+
+
+def test_inconsistancy_on_space_grouping():
+    group([
+        ('LEFT_PARENTHESIS', '('),
+        ('SPACE', ' '),
+        ('INT', '1'),
+        ('SPACE', ' '),
+        ('RIGHT_PARENTHESIS', ')'),
+    ], [
+        ('LEFT_PARENTHESIS', '(', [], [('SPACE', ' ')]),
+        ('INT', '1'),
+        ('RIGHT_PARENTHESIS', ')', [('SPACE', ' ')]),
+    ])
+
+    group([
+        ('LEFT_PARENTHESIS', '('),
+        ('SPACE', ' '),
+        ('STRING', '"a"'),
+        ('SPACE', ' '),
+        ('RIGHT_PARENTHESIS', ')'),
+    ], [
+        ('LEFT_PARENTHESIS', '(', [], [('SPACE', ' ')]),
+        ('STRING', '"a"'),
+        ('RIGHT_PARENTHESIS', ')', [('SPACE', ' ')]),
+    ])
+
+
+def test_space_before_comment():
+    group([
+        ('ENDL', '\n'),
+        ('SPACE', ' '),
+        ('COMMENT', '# hello'),
+        ('ENDL', '\n'),
+        ('IMPORT', 'import'),
+        ('SPACE', ' '),
+        ('NAME', 're'),
+        ('ENDL', '\n'),
+        ('COMMENT', '# hi'),
+        ('ENDL', '\n'),
+        ('IMPORT', 'import'),
+        ('SPACE', ' '),
+        ('NAME', 'sys'),
+        ('ENDL', '\n'),
+        ('ENDMARKER', ''),
+    ], [
+        ('ENDL', '\n', [], [('SPACE', ' ')]),
+        ('COMMENT', '# hello'),
+        ('ENDL', '\n'),
+        ('IMPORT', 'import', [], [('SPACE', ' ')]),
+        ('NAME', 're'),
+        ('ENDL', '\n'),
+        ('COMMENT', '# hi'),
+        ('ENDL', '\n'),
+        ('IMPORT', 'import', [], [('SPACE', ' ')]),
+        ('NAME', 'sys'),
+        ('ENDL', '\n'),
+        ('ENDMARKER', '')
+    ]
+
+    )
+
--- a/tests/test_spliter.py
+++ b/tests/test_spliter.py
@@ -2,8 +2,9 @@
 # -*- coding:Utf-8 -*-
  
  
-from baron.spliter import split
+from baron.spliter import split, UntreatedError
 from baron.utils import python_version
+import pytest
  
  
 def test_empty():
@@ -321,6 +322,20 @@ def test_multi_string_other_quotes():
    assert split('"""pouet pouet"""') == ['"""pouet pouet"""']
  
  
+def test_missing_quote_yields_error():
+    with pytest.raises(UntreatedError):
+        split("'")
+
+    with pytest.raises(UntreatedError):
+        split("'''")
+
+    with pytest.raises(UntreatedError):
+        split('"')
+
+    with pytest.raises(UntreatedError):
+        split('"""')
+
+
 def test_escape():
    assert split("\\\\") == ["\\", "\\"]