Skip to content

Commit cc40732

Browse files
TeeVee99Tejaswini V
andauthored
BUG: rank with object dtype and small values #62036 (#62227)
Co-authored-by: Tejaswini V <[email protected]>
1 parent 4afee55 commit cc40732

File tree

3 files changed

+15
-14
lines changed

3 files changed

+15
-14
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,6 +1158,7 @@ Other
11581158
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
11591159
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
11601160
- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`)
1161+
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
11611162
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
11621163
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
11631164
-

pandas/_libs/algos.pyx

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
cimport cython
22
from cython cimport Py_ssize_t
33
from libc.math cimport (
4-
fabs,
54
sqrt,
65
)
76
from libc.stdlib cimport (
@@ -72,13 +71,6 @@ tiebreakers = {
7271
}
7372

7473

75-
cdef bint are_diff(object left, object right):
76-
try:
77-
return fabs(left - right) > FP_ERR
78-
except TypeError:
79-
return left != right
80-
81-
8274
class Infinity:
8375
"""
8476
Provide a positive Infinity comparison method for ranking.
@@ -1135,12 +1127,8 @@ cdef void rank_sorted_1d(
11351127
dups += 1
11361128
sum_ranks += i - grp_start + 1
11371129

1138-
if numeric_object_t is object:
1139-
next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1140-
masked_vals[sort_indexer[i+1]])
1141-
else:
1142-
next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1143-
!= masked_vals[sort_indexer[i+1]])
1130+
next_val_diff = at_end or (masked_vals[sort_indexer[i]]
1131+
!= masked_vals[sort_indexer[i+1]])
11441132

11451133
# We'll need this check later anyway to determine group size, so just
11461134
# compute it here since shortcircuiting won't help

pandas/tests/test_algos.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1819,6 +1819,18 @@ def test_uint64_overflow(self, dtype):
18191819
s = Series([1, 2**63], dtype=dtype)
18201820
tm.assert_numpy_array_equal(algos.rank(s), exp)
18211821

1822+
@pytest.mark.parametrize("method", ["average", "min", "max"])
1823+
def test_rank_tiny_values(self, method):
1824+
# GH62036: regression test for ranking with tiny float values
1825+
exp = np.array([4.0, 1.0, 3.0, np.nan, 2.0], dtype=np.float64)
1826+
s = Series(
1827+
[5.4954145e29, -9.791984e-21, 9.3715776e-26, pd.NA, 1.8790257e-28],
1828+
dtype="Float64",
1829+
)
1830+
s = s.astype(object)
1831+
result = algos.rank(s, method=method)
1832+
tm.assert_numpy_array_equal(result, exp)
1833+
18221834
def test_too_many_ndims(self):
18231835
arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
18241836
msg = "Array with ndim > 2 are not supported"

0 commit comments

Comments
 (0)