Commit 7026ff59 authored by frmdstryr's avatar frmdstryr Committed by Sven Brauch
Browse files

WIP: Use c-api parser for python 3.10

parent c5c4478d
......@@ -43,7 +43,7 @@ add_definitions( -DTRANSLATION_DOMAIN=\"kdevpython\" )
# CMake looks for exactly the specified version first and ignores newer versions.
# To avoid that, start looking for the newest supported version and work down.
set(Python_ADDITIONAL_VERSIONS 3.9 3.8 3.7 3.6 3.5 3.4)
set(Python_ADDITIONAL_VERSIONS 3.10 3.9 3.8 3.7 3.6 3.5 3.4)
foreach(_PYTHON_V ${Python_ADDITIONAL_VERSIONS})
find_package(PythonInterp ${_PYTHON_V})
if ( PYTHONINTERP_FOUND )
......
......@@ -5,6 +5,7 @@ set(parser_STAT_SRCS
astdefaultvisitor.cpp
astvisitor.cpp
astbuilder.cpp
asttransformer.cpp
cythonsyntaxremover.cpp
rangefixvisitor.cpp
)
......
# PEGEN parser
Generate parser with
```
python -m pegen python.gram -o parser.py
```
......@@ -319,6 +319,7 @@ public:
ExpressionAst* iterator;
QList<Ast*> body;
QList<Ast*> orelse;
bool async;
};
class KDEVPYTHONPARSER_EXPORT WhileAst : public StatementAst {
......@@ -349,6 +350,7 @@ public:
WithAst(Ast* parent);
QList<Ast*> body;
QList<WithItemAst*> items;
bool async;
};
class KDEVPYTHONPARSER_EXPORT RaiseAst : public StatementAst {
......
......@@ -15,19 +15,19 @@
#include <memory>
#include "python_header.h"
#include "asttransformer.h"
#include "astdefaultvisitor.h"
#include "cythonsyntaxremover.h"
#include "rangefixvisitor.h"
#include <QStandardPaths>
#include <QDebug>
#include "parserdebug.h"
using namespace KDevelop;
extern grammar _PyParser_Grammar;
namespace Python
{
#include "generated.h"
QMutex AstBuilder::pyInitLock;
......@@ -52,27 +52,74 @@ QString PyUnicodeObjectToQString(PyObject* obj) {
Q_UNREACHABLE();
}
namespace {
struct PythonInitializer : private QMutexLocker {
PythonInitializer(QMutex& pyInitLock):
QMutexLocker(&pyInitLock), arena(nullptr)
class PyObjectRef {
public:
PyObjectRef(PyObject* py_obj): obj(py_obj) {}
PyObject* get() {return obj;}
~PyObjectRef() {
Py_XDECREF(obj);
obj = nullptr;
}
PyObject* obj = nullptr;
};
struct PythonParser : private QMutexLocker
{
PyObject* m_parser_mod = nullptr;
PyObject* m_parse_func = nullptr;
PythonParser(QMutex& lock): QMutexLocker(&lock)
{
Py_InitializeEx(0);
Q_ASSERT(Py_IsInitialized());
Py_InitializeEx(0);
Q_ASSERT(Py_IsInitialized());
//addSupportDirToSysPath();
// Import the parse function. This intentially a separate module
// to allow other parsers to be hooked in without needing to re-compile.
m_parser_mod = PyImport_ImportModule("ast");
Q_ASSERT(m_parser_mod); // parser import error
m_parse_func = PyObject_GetAttrString(m_parser_mod, "parse");
Q_ASSERT(m_parse_func); // parser function renamed?
}
void addSupportDirToSysPath() const
{
QFileInfo parserFile = QStandardPaths::locate(QStandardPaths::GenericDataLocation, "kdevpythonsupport/kdevparser.py");
QString supportDir = parserFile.absoluteDir().path();
Q_ASSERT(supportDir.size());
PyObjectRef sys = PyImport_ImportModule("sys");
if (!sys.get()) return;
PyObjectRef path = PyObject_GetAttrString(sys.get(), "path");
if (!path.get()) return;
PyObjectRef append = PyObject_GetAttrString(path.get(), "append");
if (!append.get()) return;
PyObjectRef arg = PyUnicode_FromString(supportDir.toUtf8().data());
if (!arg.get()) return;
PyObjectRef r = PyObject_CallOneArg(append.get(), arg.get());
}
arena = PyArena_New();
Q_ASSERT(arena); // out of memory
// Call parser function and return the python ast.Module.
// NOTE: The caller must DECREF the result
PyObject* parse(QString const &source, QString const &filename) const
{
PyObject* args = PyTuple_New(3);
PyTuple_SET_ITEM(args, 0, PyUnicode_FromString(source.toUtf8().data()));
PyTuple_SET_ITEM(args, 1, PyUnicode_FromString(filename.toUtf8().data()));
PyTuple_SET_ITEM(args, 2, PyUnicode_FromString("exec"));
PyObject *result = PyObject_CallObject(m_parse_func, args);
Py_DECREF(args);
return result;
}
~PythonInitializer()
~PythonParser()
{
if (arena)
PyArena_Free(arena);
if (Py_IsInitialized())
{
Py_XDECREF(m_parse_func);
Py_XDECREF(m_parser_mod);
Py_Finalize();
}
}
PyArena* arena;
};
}
CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
{
......@@ -82,17 +129,7 @@ CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
contents.append('\n');
PythonInitializer pyIniter(pyInitLock);
PyArena* arena = pyIniter.arena;
#if PYTHON_VERSION >= QT_VERSION_CHECK(3, 8, 0)
PyCompilerFlags flags;
flags.cf_flags = PyCF_SOURCE_IS_UTF8 | PyCF_IGNORE_COOKIE | PyCF_ONLY_AST;
flags.cf_feature_version = PYTHON_VERSION_MINOR;
#else
PyCompilerFlags flags = {PyCF_SOURCE_IS_UTF8 | PyCF_IGNORE_COOKIE};
#endif
PythonParser py_parser(pyInitLock);
CythonSyntaxRemover cythonSyntaxRemover;
if (filename.fileName().endsWith(".pyx", Qt::CaseInsensitive)) {
......@@ -100,7 +137,8 @@ CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
contents = cythonSyntaxRemover.stripCythonSyntax(contents);
}
mod_ty syntaxtree = PyParser_ASTFromString(contents.toUtf8().data(), "<kdev-editor-contents>", file_input, &flags, arena);
PyObject* syntaxtree = py_parser.parse(contents, filename.fileName());
if ( ! syntaxtree ) {
qCDebug(KDEV_PYTHON_PARSER) << " ====< parse error, trying to fix";
......@@ -207,11 +245,14 @@ CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
}
}
syntaxtree = PyParser_ASTFromString(contents.toUtf8(), "<kdev-editor-contents>", file_input, &flags, arena);
syntaxtree = py_parser.parse(contents, filename.fileName());
// 3rd try: discard everything after the last non-empty line, but only until the next block start
currentLineBeginning = qMin(contents.length() - 1, currentLineBeginning);
errline = qMax(0, qMin(indents.length()-1, errline));
if ( ! syntaxtree ) {
PyErr_Fetch(&exception, &value, &backtrace);
qCDebug(KDEV_PYTHON_PARSER) << "Error objects: " << exception << value << backtrace;
qCWarning(KDEV_PYTHON_PARSER) << "Discarding parts of the code to be parsed because of previous errors";
qCDebug(KDEV_PYTHON_PARSER) << indents;
int indentAtError = indents.at(errline);
......@@ -249,22 +290,23 @@ CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
if ( c.isSpace() && atLineBeginning ) currentIndent += 1;
}
qCDebug(KDEV_PYTHON_PARSER) << "This is what is left: " << contents;
syntaxtree = PyParser_ASTFromString(contents.toUtf8(), "<kdev-editor-contents>", file_input, &flags, arena);
syntaxtree = py_parser.parse(contents, filename.fileName());
}
if ( ! syntaxtree ) {
return CodeAst::Ptr(); // everything fails, so we abort.
}
}
qCDebug(KDEV_PYTHON_PARSER) << "Got syntax tree from python parser:" << syntaxtree->kind << Module_kind;
QString kind = PyUnicodeObjectToQString(PyObject_Repr(syntaxtree));
qCDebug(KDEV_PYTHON_PARSER) << "Got syntax tree from python parser:" << kind;
PythonAstTransformer t;
AstTransformer t;
t.run(syntaxtree, filename.fileName().replace(".py", ""));
Py_DECREF(syntaxtree);
RangeFixVisitor fixVisitor(contents);
fixVisitor.visitNode(t.ast);
cythonSyntaxRemover.fixAstRanges(t.ast);
return CodeAst::Ptr(t.ast);
}
......
......@@ -11,6 +11,7 @@
#include "ast.h"
#include "parserexport.h"
#include <QMutex>
#include <QUrl>
#include "astdefaultvisitor.h"
......
This diff is collapsed.
#pragma once
#include "kdevpythonversion.h"
#include "python_grammar.h"
namespace Python
{
class AstTransformer {
public:
CodeAst* ast;
Grammar grammar;
AstTransformer() {}
void run(PyObject *syntaxtree, QString moduleName) {
ast = static_cast<CodeAst*>(visitModuleNode(syntaxtree, nullptr));
ast->name = new Identifier(moduleName);
}
// Shift lines by some fixed amount
inline int tline(int line) {
if ( line == -99999 ) {
// don't touch the marker
return -99999;
}
return line - 1;
};
template <class T>
T getattr(PyObject *obj, const char *attr) const {
return getattr<T>(obj, attr);
}
public:
Ast* visitNode(PyObject* node, Ast* parent);
template<typename K> QList<K*> visitNodeList(PyObject* node, Ast* parent);
Ast* visitModuleNode(PyObject* node, Ast* parent);
Ast* visitStmtNode(PyObject* node, Ast* parent);
Ast* visitExprNode(PyObject* node, Ast* parent);
Ast* visitComprehensionNode(PyObject* node, Ast* parent);
Ast* visitExceptHandlerNode(PyObject* node, Ast* parent);
Ast* visitArgumentsNode(PyObject* node, Ast* parent);
Ast* visitArgNode(PyObject* node, Ast* parent);
Ast* visitKeywordNode(PyObject* node, Ast* parent);
Ast* visitAliasNode(PyObject* node, Ast* parent);
Ast* visitWithItemNode(PyObject* node, Ast* parent);
Ast* visitSliceNode(PyObject* node, Ast* parent);
};
} // namespace Python
#!/usr/bin/env python
# SPDX-FileCopyrightText: 2014 Sven Brauch <svenbrauch@gmail.com>
# SPDX-License-Identifier: GPL-2.0-or-later
# Transforms a conversion definition file (.sdef) into C++ code. To be copied over manually. :)
# sdef example line:
# RULE_FOR _stmt;KIND Expr_kind;ACTIONS create|ExpressionAst set|value->ExpressionAst,value;CODE;;
import sys
contents = open('python39.sdef').read().replace("\n", "").split(';;')
func_structure = '''
Ast* visitNode(%{RULE_FOR}* node) {
if ( ! node ) return nullptr;
bool ranges_copied = false; Q_UNUSED(ranges_copied);
Ast* result = nullptr;
switch ( node->kind ) {
%{SWITCH_LINES}
default:
qWarning() << "Unsupported %{RULE_FOR} AST type: " << node->kind;
Q_ASSERT(false);
}
%{APPENDIX}
if ( result && result->astType == Ast::NameAstType ) {
NameAst* r = static_cast<NameAst*>(result);
r->startCol = r->identifier->startCol;
r->endCol = r->identifier->endCol;
r->startLine = r->identifier->startLine;
r->endLine = r->identifier->endLine;
}
return result;
}
'''
simple_func_structure = '''
Ast* visitNode(%{RULE_FOR}* node) {
bool ranges_copied = false; Q_UNUSED(ranges_copied);
if ( ! node ) return nullptr;
%{SWITCH_LINES}
return v;
}
'''
switch_line = ''' case %{KIND}: {
%{ACTIONS}
break;
}'''
create_ast_line = ''' %{AST_TYPE}* v = new %{AST_TYPE}(parent());'''
create_identifier_line = ''' v->%{TARGET} = node->v.%{KIND_W/O_SUFFIX}.%{VALUE} ? new Python::Identifier(PyUnicodeObjectToQString(node->v.%{KIND_W/O_SUFFIX}.%{VALUE})) : nullptr;'''
set_attribute_line = ''' nodeStack.push(v); v->%{TARGET} = static_cast<%{AST_TYPE}*>(visitNode(node->v.%{KIND_W/O_SUFFIX}.%{VALUE})); nodeStack.pop();'''
resolve_list_line = ''' nodeStack.push(v); v->%{TARGET} = visitNodeList<%{PYTHON_AST_TYPE}, %{AST_TYPE}>(node->v.%{KIND_W/O_SUFFIX}.%{VALUE}); nodeStack.pop();'''
create_identifier_line_any = ''' v->%{TARGET} = node->%{VALUE} ? new Python::Identifier(PyUnicodeObjectToQString(node->%{VALUE})) : nullptr;'''
set_attribute_line_any = ''' nodeStack.push(v); v->%{TARGET} = static_cast<%{AST_TYPE}*>(visitNode(node->%{VALUE})); nodeStack.pop();'''
resolve_list_line_any = ''' nodeStack.push(v); v->%{TARGET} = visitNodeList<%{PYTHON_AST_TYPE}, %{AST_TYPE}>(node->%{VALUE}); nodeStack.pop();'''
direct_assignment_line = ''' v->%{TARGET} = node->v.%{KIND_W/O_SUFFIX}.%{VALUE};'''
direct_assignment_line_any = ''' v->%{TARGET} = node->v.%{VALUE};'''
cast_operator_line = ''' v->%{TARGET} = (ExpressionAst::%{AST_TYPE}) node->v.%{KIND_W/O_SUFFIX}.%{VALUE};'''
resolve_string = ''' v->%{TARGET} = PyUnicodeObjectToQString(node->v.%{KIND_W/O_SUFFIX}.%{VALUE});'''
assign_mindless = ''' v->%{TARGET} = node->%{VALUE};'''
assign_linetransform = ''' v->%{TARGET} = tline(node->%{VALUE} - 1);'''
singleton_convert_line = ''' v->%{TARGET} = node->v.NameConstant.value == Py_None ? NameConstantAst::None : node->v.NameConstant.value == Py_False ? NameConstantAst::False : NameConstantAst::True;'''
resolve_oplist_block = '''
for ( int _i = 0; _i < node->v.%{KIND_W/O_SUFFIX}.%{VALUE}->size; _i++ ) {
v->%{TARGET}.append((ExpressionAst::%{AST_TYPE}) node->v.%{KIND_W/O_SUFFIX}.%{VALUE}->elements[_i]);
}
'''
resolve_identifier_block = '''
for ( int _i = 0; _i < node->v.%{KIND_W/O_SUFFIX}.%{VALUE}->size; _i++ ) {
Python::Identifier* id = new Python::Identifier(PyUnicodeObjectToQString(
static_cast<PyObject*>(node->v.%{KIND_W/O_SUFFIX}.%{VALUE}->elements[_i])
));
v->%{TARGET}.append(id);
}
'''
copy_ident_ranges = '''
if ( v->%{TARGET} ) {
v->%{TARGET}->startCol = node->col_offset; v->startCol = v->%{TARGET}->startCol;
v->%{TARGET}->startLine = tline(node->lineno - 1); v->startLine = v->%{TARGET}->startLine;
v->%{TARGET}->endCol = node->col_offset + v->%{TARGET}->value.length() - 1; v->endCol = v->%{TARGET}->endCol;
v->%{TARGET}->endLine = tline(node->lineno - 1); v->endLine = v->%{TARGET}->endLine;
ranges_copied = true;
}'''
results = dict()
does_match_any = dict()
def pluginAstToPythonAstType(plugintypestr):
if plugintypestr == 'ExpressionAst': return '_expr'
if plugintypestr == 'StatementAst' : return '_stmt'
if plugintypestr == 'NameAst': return '_expr'
if plugintypestr == 'ExceptionHandlerAst': return '_excepthandler'
if plugintypestr == 'ComprehensionAst': return '_comprehension'
if plugintypestr == 'KeywordAst': return '_keyword'
if plugintypestr == 'ArgumentsAst': return '_arguments'
if plugintypestr == 'AliasAst': return '_alias'
if plugintypestr == 'SliceAst': return '_slice'
if plugintypestr == 'Ast': return '_stmt' # not sure about this
if plugintypestr == 'GeneratorExpressionAst': return '_expr'
if plugintypestr == 'ArgAst': return '_arg'
if plugintypestr == 'WithItemAst': return '_withitem'
else:
sys.stderr.write("W: Could not decode name %s\n" % plugintypestr)
return '<ERROR>'
for rule in contents:
outline = rule.split(';')
command = outline[0]
if command[:7] == 'COMMENT' or command == '':
continue
elif command[:7] != 'RULE_FO':
raise SyntaxError('Invalid syntax in sdef file, line: ' + rule)
rule_for = outline[0].split(' ')[1]
kind = outline[1].split(' ')[1]
kind_wo_suffix = kind.replace('_kind', '')
actions = outline[2].split(' ')[1:]
code = None
since_version = None
before_version = None
if len(outline) > 3:
if outline[3].startswith('BEFORE'):
before_version = [int(n) for n in outline[3][7:].split('.')]
elif outline[3].startswith('SINCE'):
since_version = [int(n) for n in outline[3][6:].split('.')]
elif outline[3].startswith('CODE'):
code = ' '.join(';'.join(outline[3:]).split('CODE')[1:]) + ";"
else:
raise SyntaxError('Invalid syntax in sdef file, line: ' + rule)
if len(outline) > 4 and outline[4].startswith('CODE'):
code = ' '.join(';'.join(outline[4:]).split('CODE')[1:]) + ";"
if rule_for not in results:
results[rule_for] = list()
current_actions = list()
created_v = False
for action in actions:
command = action.split('|')[0]
try:
arguments = action.split('|')[1]
except IndexError:
continue
action = '<ERROR>'
if command == 'set':
s = arguments.split('>')
commandType = s[0][-1] # -, ~, =, : , *, #
target = s[0][:-1]
s = s[1].split(',')
raw = False
if kind == 'any': any = True
else: any = False
# commands with one argument
if commandType in ['~', ':', '$', '+', 'l', '_']:
if commandType == '_':
raw = singleton_convert_line
if commandType == ':':
raw = direct_assignment_line if not any else direct_assignment_line_any
if commandType == '~':
raw = create_identifier_line if not any else create_identifier_line_any
if rule_for in ['_expr', '_stmt', '_excepthandler', '_arg']:
raw += copy_ident_ranges
if commandType == '$':
raw = resolve_string
if commandType == '+':
raw = assign_mindless;
if commandType == 'l':
raw = assign_linetransform;
value = s[0]
# commands with two arguments
else:
astType = s[0]
try:
value = s[1]
except IndexError:
raise SyntaxError('Missing argument for operator ' + commandType + ' in rule: ' + rule)
if commandType == '=':
if astType == 'Identifier':
raw = resolve_identifier_block
else:
raw = resolve_list_line if not any else resolve_list_line_any
if commandType == '-':
raw = set_attribute_line if not any else set_attribute_line_any
if commandType == '*':
raw = cast_operator_line
if commandType == '#':
raw = resolve_oplist_block
if raw:
command = raw.replace('%{AST_TYPE}', astType).replace('%{TARGET}', target) \
.replace('%{PYTHON_AST_TYPE}', pluginAstToPythonAstType(astType)) \
.replace('%{KIND_W/O_SUFFIX}', kind_wo_suffix).replace('%{VALUE}', value)
else: command = '<MISSING RAW>'
current_actions.append(command)
elif command == 'create':
astType = arguments
current_actions.append(create_ast_line.replace('%{AST_TYPE}', astType))
created_v = True
if code:
current_actions.append(code);
current_actions = "\n".join(current_actions)
if kind == 'any':
current_stmt = current_actions
else:
if created_v:
current_actions += "\n result = v;"
current_stmt = switch_line.replace('%{KIND}', kind).replace('%{ACTIONS}', current_actions)
if before_version:
version_cpp_if = ("#if PYTHON_VERSION < QT_VERSION_CHECK(%d, %d, 0)\n"
%(before_version[0], before_version[1]))
current_stmt = version_cpp_if + current_stmt + "\n#endif"
if since_version:
version_cpp_if = ("#if PYTHON_VERSION >= QT_VERSION_CHECK(%d, %d, 0)\n"
%(since_version[0], since_version[1]))
current_stmt = version_cpp_if + current_stmt + "\n#endif"
results[rule_for].append(current_stmt)
does_match_any[rule_for] = any
print('''/* This code is generated by conversiongenerator.py.
* I do not recommend editing it.
* To update, run: python2 conversionGenerator.py > generated.h
*/
#include <QStack>
#include "kdevpythonversion.h"
class PythonAstTransformer {
public:
CodeAst* ast;
void run(mod_ty syntaxtree, QString moduleName) {
ast = new CodeAst();
ast->name = new Identifier(moduleName);
nodeStack.push(ast);
ast->body = visitNodeList<_stmt, Ast>(syntaxtree->v.Module.body);
nodeStack.pop();
Q_ASSERT(nodeStack.isEmpty());
}
// Shift lines by some fixed amount
inline int tline(int line) {
if ( line == -99999 ) {
// don't touch the marker
return -99999;
}
return line;
};
private:
QStack<Ast*> nodeStack;
Ast* parent() {
return nodeStack.top();
}
template<typename T, typename K> QList<K*> visitNodeList(asdl_seq* node) {
QList<K*> nodelist;
if ( ! node ) return nodelist;
for ( int i=0; i < node->size; i++ ) {
T* currentNode = static_cast<T*>(node->elements[i]);
Ast* result = visitNode(currentNode);
K* transformedNode = static_cast<K*>(result);
nodelist.append(transformedNode);
}
return nodelist;
}
''')
for index, lines in sorted(results.items()):
current_switch_lines = "\n".join(lines)
appendix = ''
if index == '_expr' or index == '_stmt':
appendix = '''
if ( ! result ) return nullptr;
if ( ! ranges_copied ) {
result->startCol = node->col_offset;
result->endCol = node->col_offset;
result->startLine = tline(node->lineno - 1);
result->endLine = tline(node->lineno - 1);
result->hasUsefulRangeInformation = true;
}
else {
result->hasUsefulRangeInformation = true;
}
'''
appendix += '''
// Walk through the tree and set proper end columns and lines, as the python parser sadly does not do this for us
if ( result->hasUsefulRangeInformation ) {
Ast* parent = result->parent;
while ( parent ) {
if ( parent->endLine < result->endLine ) {
parent->endLine = result->endLine;
parent->endCol = result->endCol;
}
if ( ! parent->hasUsefulRangeInformation && parent->startLine == -99999 ) {
parent->startLine = result->startLine;
parent->startCol = result->startCol;
}
parent = parent->parent;
}
}
'''
if not does_match_any[index]:
func = func_structure.replace('%{RULE_FOR}', index).replace('%{SWITCH_LINES}', current_switch_lines).replace('%{APPENDIX}', appendix)
else:
func = simple_func_structure.replace('%{RULE_FOR}', index).replace('%{SWITCH_LINES}', current_switch_lines)
if index == '_slice':
func = "#if PYTHON_VERSION < QT_VERSION_CHECK(3, 9, 0)\n" + func + "\n#endif\n"
print(func)
print('''};
/*
* End generated code
*/
''')