Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion from_cpython/Include/dictobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ struct _dictobject {
#endif
typedef struct {
PyObject_HEAD;
char _filler[SIZEOF_UNORDEREDMAP];
char _filler[24];
} PyDictObject;

// Pyston change: these are no longer static objects:
Expand Down
18 changes: 18 additions & 0 deletions microbenchmarks/string_key_ubench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Test the speed of using strings as dictionary keys.
# We internally optimize many things to not use dictionaries,
# but there are some times that we can't.

def f():
d = {'a':1}
for i in xrange(3000000):
d['a']
d['a']
d['a']
d['a']
d['a']
d['a']
d['a']
d['a']
d['a']
d['a']
f()
17 changes: 1 addition & 16 deletions src/asm_writing/assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,22 +712,7 @@ void Assembler::cmp(Register reg1, Register reg2) {
}

void Assembler::cmp(Register reg, Immediate imm) {
int64_t val = imm.val;
assert((-1L << 31) <= val && val < (1L << 31) - 1);

int reg_idx = reg.regnum;

int rex = REX_W;
if (reg_idx >= 8) {
rex |= REX_B;
reg_idx -= 8;
}
assert(0 <= reg_idx && reg_idx < 8);

emitRex(rex);
emitByte(0x81);
emitModRM(0b11, 7, reg_idx);
emitInt(val, 4);
emitArith(imm, reg, OPCODE_CMP);
}

void Assembler::cmp(Indirect mem, Immediate imm) {
Expand Down
2 changes: 1 addition & 1 deletion src/asm_writing/assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class Assembler {
uint8_t* addr;
bool failed; // if the rewrite failed at the assembly-generation level for some reason

static const uint8_t OPCODE_ADD = 0b000, OPCODE_SUB = 0b101;
static const uint8_t OPCODE_ADD = 0b000, OPCODE_SUB = 0b101, OPCODE_CMP = 0b111;
static const uint8_t REX_B = 1, REX_X = 2, REX_R = 4, REX_W = 8;

#ifndef NDEBUG
Expand Down
27 changes: 27 additions & 0 deletions src/asm_writing/rewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "codegen/unwinding.h"
#include "core/common.h"
#include "core/stats.h"
#include "runtime/types.h"

namespace pyston {

Expand Down Expand Up @@ -1522,6 +1523,32 @@ void Rewriter::_allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_ele
assertConsistent();
}

void Rewriter::checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val) {
STAT_TIMER(t0, "us_timer_rewriter", 10);

addAction([=]() { this->_checkAndThrowCAPIException(r, exc_val); }, { r }, ActionType::MUTATION);
}

void Rewriter::_checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val) {
assembler->comment("_checkAndThrowCAPIException");

assembler::Register var_reg = r->getInReg();
if (exc_val == 0)
assembler->test(var_reg, var_reg);
else
assembler->cmp(var_reg, assembler::Immediate(exc_val));

{
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
assembler->mov(assembler::Immediate((void*)throwCAPIException), assembler::R11);
assembler->callq(assembler::R11);
}

r->bumpUse();

assertConsistent();
}

assembler::Indirect Rewriter::indirectFor(Location l) {
assert(l.type == Location::Scratch || l.type == Location::Stack);

Expand Down
4 changes: 4 additions & 0 deletions src/asm_writing/rewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ class Rewriter : public ICSlotRewrite::CommitHook {
int _allocate(RewriterVar* result, int n);
void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n);
void _allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_elem, RewriterVar* rest, int n_rest);
void _checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val);

// The public versions of these are in RewriterVar
void _addGuard(RewriterVar* var, RewriterVar* val_constant);
Expand Down Expand Up @@ -514,6 +515,9 @@ class Rewriter : public ICSlotRewrite::CommitHook {
RewriterVar* allocateAndCopy(RewriterVar* array, int n);
RewriterVar* allocateAndCopyPlus1(RewriterVar* first_elem, RewriterVar* rest, int n_rest);

// This emits `if (r == exc_val) throwCAPIException()`
void checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val = 0);

void abort();
void commit();
void commitReturning(RewriterVar* rtn);
Expand Down
36 changes: 34 additions & 2 deletions src/capi/typeobject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,29 @@ extern "C" void conservativeGCHandler(GCVisitor* v, Box* b) noexcept {
v->visitPotentialRange((void* const*)b, (void* const*)((char*)b + b->cls->tp_basicsize));
}

extern "C" void conservativeAndBasesGCHandler(GCVisitor* v, Box* b) noexcept {
// TODO: this function is expensive. We should try to make sure it doesn't get used
// that often, or to come up with a better approach.

// Call all the custom gc handlers defined anywhere in the hierarchy:
assert(PyTuple_CheckExact(b->cls->tp_mro));
for (auto c : *static_cast<BoxedTuple*>(b->cls->tp_mro)) {
if (!PyType_Check(c))
continue;

auto gc_visit = static_cast<BoxedClass*>(c)->gc_visit;

// Skip conservativeGCHandler since it's slow, and skip conservativeAndBasesGCHandler since
// it would cause an infinite loop:
if (gc_visit == conservativeGCHandler || gc_visit == conservativeAndBasesGCHandler)
continue;

gc_visit(v, b);
}

conservativeGCHandler(v, b);
}

static int check_num_args(PyObject* ob, int n) noexcept {
if (!PyTuple_CheckExact(ob)) {
PyErr_SetString(PyExc_SystemError, "PyArg_UnpackTuple() argument list is not a tuple");
Expand Down Expand Up @@ -3224,7 +3247,7 @@ static Box* tppProxyToTpCall(Box* self, CallRewriteArgs* rewrite_args, ArgPassSp
rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)self->cls->tp_call, rewrite_args->obj,
rewrite_args->arg1, rewrite_args->arg2);
if (S == CXX)
rewrite_args->rewriter->call(true, (void*)checkAndThrowCAPIException);
rewrite_args->rewriter->checkAndThrowCAPIException(rewrite_args->out_rtn);
rewrite_args->out_success = true;
}

Expand Down Expand Up @@ -3323,7 +3346,16 @@ extern "C" int PyType_Ready(PyTypeObject* cls) noexcept {
if (cls->tp_alloc == &PystonType_GenericAlloc)
cls->tp_alloc = &PyType_GenericAlloc;

cls->gc_visit = &conservativeGCHandler;
// If an extension class visits from a Pyston class that does custom visiting,
// the base class needs to call the parent's visit function in case it visits
// non-inline data. There's not an easy way to put in a function pointer here
// that defers to a specific class's gc_visit, even if it's a base class, since
// the gc_visit could get inherited by subclasses. For now just use an expensive
// function, conservativeAndBasesGCHandler
if (base->gc_visit != object_cls->gc_visit && base->gc_visit != &conservativeGCHandler)
cls->gc_visit = &conservativeAndBasesGCHandler;
else
cls->gc_visit = &conservativeGCHandler;
cls->is_user_defined = true;


Expand Down
2 changes: 1 addition & 1 deletion src/codegen/ast_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1508,7 +1508,7 @@ Value ASTInterpreter::visit_name(AST_Name* node) {
assert(getSymVRegMap()[node->id] == node->vreg);
Box* val = vregs[node->vreg];
if (val) {
assert(gc::isValidGCObject(val));
ASSERT(gc::isValidGCObject(val), "%s is %p", node->id.c_str(), val);
v.o = val;
return v;
}
Expand Down
18 changes: 5 additions & 13 deletions src/gc/collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,18 +271,15 @@ bool isValidGCObject(void* p) {
GCAllocation* al = global_heap.getAllocationFromInteriorPointer(p);
if (!al)
return false;
return al->user_data == p && (al->kind_id == GCKind::CONSERVATIVE_PYTHON || al->kind_id == GCKind::PYTHON);
return al->user_data == p && al->kind_id == GCKind::PYTHON;
}

void registerPythonObject(Box* b) {
assert(isValidGCMemory(b));
auto al = GCAllocation::fromUserData(b);

if (al->kind_id == GCKind::CONSERVATIVE) {
al->kind_id = GCKind::CONSERVATIVE_PYTHON;
} else {
assert(al->kind_id == GCKind::PYTHON);
}
assert(al->kind_id == GCKind::CONSERVATIVE || al->kind_id == GCKind::PYTHON);
al->kind_id = GCKind::PYTHON;

assert(b->cls);
if (hasOrderedFinalizer(b->cls)) {
Expand Down Expand Up @@ -386,7 +383,7 @@ static __attribute__((always_inline)) void visitByGCKind(void* p, GCVisitor& vis
GCKind kind_id = al->kind_id;
if (kind_id == GCKind::UNTRACKED) {
// Nothing to do here.
} else if (kind_id == GCKind::CONSERVATIVE || kind_id == GCKind::CONSERVATIVE_PYTHON) {
} else if (kind_id == GCKind::CONSERVATIVE) {
uint32_t bytes = al->kind_data;
visitor.visitPotentialRange((void**)p, (void**)((char*)p + bytes));
} else if (kind_id == GCKind::PRECISE) {
Expand Down Expand Up @@ -521,7 +518,7 @@ static void graphTraversalMarking(TraceStack& stack, GCVisitor& visitor) {
GCAllocation* al = GCAllocation::fromUserData(p);

#if TRACE_GC_MARKING
if (al->kind_id == GCKind::PYTHON || al->kind_id == GCKind::CONSERVATIVE_PYTHON)
if (al->kind_id == GCKind::PYTHON)
GC_TRACE_LOG("Looking at %s object %p\n", static_cast<Box*>(p)->cls->tp_name, p);
else
GC_TRACE_LOG("Looking at non-python allocation %p\n", p);
Expand Down Expand Up @@ -685,11 +682,6 @@ static void markPhase() {
// pending finalization list.
orderFinalizers();

#if TRACE_GC_MARKING
fclose(trace_fp);
trace_fp = NULL;
#endif

#ifndef NVALGRIND
VALGRIND_ENABLE_ERROR_REPORTING;
#endif
Expand Down
4 changes: 0 additions & 4 deletions src/gc/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ enum class GCKind : uint8_t {
// because it contains pointers into our heap or our heap points to these
// objects. These objects inherit from GCAllocatedRuntime.
RUNTIME = 5,

// A Python object where we don't have a way to visit precisely with a GC
// handler function. These are usually Python objects allocated in C extensions.
CONSERVATIVE_PYTHON = 6,
};

extern "C" void* gc_alloc(size_t nbytes, GCKind kind);
Expand Down
18 changes: 3 additions & 15 deletions src/gc/heap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*
VALGRIND_ENABLE_ERROR_REPORTING;
#endif

if (alloc_kind == GCKind::PYTHON || alloc_kind == GCKind::CONSERVATIVE_PYTHON) {
if (alloc_kind == GCKind::PYTHON) {
#ifndef NVALGRIND
VALGRIND_DISABLE_ERROR_REPORTING;
#endif
Expand All @@ -186,8 +186,7 @@ __attribute__((always_inline)) bool _doFree(GCAllocation* al, std::vector<Box*>*
return false;
}

ASSERT(!hasOrderedFinalizer(b->cls) || hasFinalized(al) || alloc_kind == GCKind::CONSERVATIVE_PYTHON, "%s",
getTypeName(b));
ASSERT(!hasOrderedFinalizer(b->cls) || hasFinalized(al), "%s", getTypeName(b));

if (b->cls->tp_dealloc != dealloc_null && b->cls->has_safe_tp_dealloc) {
gc_safe_destructors.log();
Expand Down Expand Up @@ -273,17 +272,6 @@ void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
} else if (al->kind_id == GCKind::CONSERVATIVE) {
stats->conservative.nallocs++;
stats->conservative.nbytes += nbytes;
} else if (al->kind_id == GCKind::CONSERVATIVE_PYTHON) {
stats->conservative_python.nallocs++;
stats->conservative_python.nbytes += nbytes;

if (stats->collect_cls_stats) {
Box* b = (Box*)al->user_data;
auto& t = stats->by_cls[b->cls];

t.nallocs++;
t.nbytes += nbytes;
}
} else if (al->kind_id == GCKind::UNTRACKED) {
stats->untracked.nallocs++;
stats->untracked.nbytes += nbytes;
Expand Down Expand Up @@ -509,7 +497,7 @@ SmallArena::Block** SmallArena::_freeChain(Block** head, std::vector<Box*>& weak
clearMark(al);
} else {
if (_doFree(al, &weakly_referenced)) {
GC_TRACE_LOG("freeing %p\n", al->user_data);
// GC_TRACE_LOG("freeing %p\n", al->user_data);
b->isfree.set(atom_idx);
#ifndef NDEBUG
memset(al->user_data, 0xbb, b->size - sizeof(GCAllocation));
Expand Down
6 changes: 5 additions & 1 deletion src/runtime/capi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,10 @@ void throwCAPIException() {
}

void checkAndThrowCAPIException() {
// Log these since these are expensive and usually avoidable:
static StatCounter num_checkAndThrowCAPIException("num_checkAndThrowCAPIException");
num_checkAndThrowCAPIException.log();

Box* _type = cur_thread_state.curexc_type;
if (!_type)
assert(!cur_thread_state.curexc_value);
Expand Down Expand Up @@ -1572,7 +1576,7 @@ Box* BoxedCApiFunction::tppCall(Box* _self, CallRewriteArgs* rewrite_args, ArgPa
}

if (rewrite_args) {
rewrite_args->rewriter->call(false, (void*)checkAndThrowCAPIException);
rewrite_args->rewriter->checkAndThrowCAPIException(rewrite_args->out_rtn);
rewrite_args->out_success = true;
}

Expand Down
6 changes: 3 additions & 3 deletions src/runtime/descr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ Box* BoxedMethodDescriptor::tppCall(Box* _self, CallRewriteArgs* rewrite_args, A
throwCAPIException();

if (rewrite_args) {
rewrite_args->rewriter->call(false, (void*)checkAndThrowCAPIException);
rewrite_args->rewriter->checkAndThrowCAPIException(rewrite_args->out_rtn);
rewrite_args->out_success = true;
}

Expand Down Expand Up @@ -565,7 +565,7 @@ Box* BoxedWrapperObject::tppCall(Box* _self, CallRewriteArgs* rewrite_args, ArgP
rewrite_args->out_rtn = rewriter->call(
true, (void*)wk, r_obj, rewrite_args->arg1,
rewriter->loadConst((intptr_t)self->descr->wrapped, Location::forArg(2)), rewrite_args->arg2);
rewriter->call(false, (void*)checkAndThrowCAPIException);
rewrite_args->rewriter->checkAndThrowCAPIException(rewrite_args->out_rtn);
rewrite_args->out_success = true;
}
} else if (flags == PyWrapperFlag_PYSTON || flags == 0) {
Expand All @@ -577,7 +577,7 @@ Box* BoxedWrapperObject::tppCall(Box* _self, CallRewriteArgs* rewrite_args, ArgP
rewrite_args->out_rtn
= rewriter->call(true, (void*)wrapper, r_obj, rewrite_args->arg1,
rewriter->loadConst((intptr_t)self->descr->wrapped, Location::forArg(2)));
rewriter->call(false, (void*)checkAndThrowCAPIException);
rewrite_args->rewriter->checkAndThrowCAPIException(rewrite_args->out_rtn);
rewrite_args->out_success = true;
}
} else {
Expand Down
16 changes: 6 additions & 10 deletions src/runtime/dict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ Box* dictSetdefault(BoxedDict* self, Box* k, Box* v) {
if (it != self->d.end())
return it->second;

self->d.insert(it, std::make_pair(k, v));
self->d.insert(std::make_pair(k, v));
return v;
}

Expand Down Expand Up @@ -689,14 +689,10 @@ void BoxedDict::gcHandler(GCVisitor* v, Box* b) {

BoxedDict* d = (BoxedDict*)b;

// This feels like a cludge, but we need to find anything that
// the unordered_map might have allocated.
// Another way to handle this would be to rt_alloc the unordered_map
// as well, though that incurs extra memory dereferences which would
// be nice to avoid.
void** start = (void**)&d->d;
void** end = start + (sizeof(d->d) / 8);
v->visitPotentialRange(start, end);
for (auto p : d->d) {
v->visit(p.first);
v->visit(p.second);
}
}

void BoxedDictIterator::gcHandler(GCVisitor* v, Box* b) {
Expand All @@ -708,7 +704,7 @@ void BoxedDictIterator::gcHandler(GCVisitor* v, Box* b) {
}

void BoxedDictView::gcHandler(GCVisitor* v, Box* b) {
assert(b->cls == dict_items_cls);
assert(b->cls == dict_items_cls || b->cls == dict_values_cls || b->cls == dict_keys_cls);
Box::gcHandler(v, b);

BoxedDictView* view = static_cast<BoxedDictView*>(b);
Expand Down
Loading