Skip to content

Commit 65d7da1

Browse files
authored
Removes pyquickhelper as a dependency (#33)
* Removes pyquickhelper as a dependency * add assert * assert * ut * badge * fix -
1 parent 2d32be2 commit 65d7da1

18 files changed

+419
-224
lines changed

Diff for: .github/workflows/check-urls.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,5 @@ jobs:
4343
timeout: 2
4444
retry_count# : 2
4545
# exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document
46-
# exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c,https://dev.azure.com/
46+
exclude_patterns: https://circleci.com/gh/sdpython/pandas_streaming/
4747
# force_pass : true

Diff for: .local.jenkins.lin.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ virtualenv:
99

1010
install:
1111
- $PYINT -m pip install --upgrade pip
12-
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
12+
- $PYINT -m pip install --upgrade --no-cache-dir --no-deps --index http://localhost:8067/simple/ jyquickhelper pandas_streaming --extra-index-url=https://pypi.python.org/simple/
1313
- $PYINT -m pip install -r requirements.txt
1414
- $PYINT -m pip install -r requirements-dev.txt
1515
- $PYINT --version

Diff for: README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ pandas-streaming: streaming API over pandas
1212
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
1313
:alt: Build Status Windows
1414

15-
.. image:: https://circleci.com/gh/sdpython/pandas_streaming/tree/main.svg?style=svg
16-
:target: https://circleci.com/gh/sdpython/pandas_streaming/tree/main
15+
.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
16+
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
1717

1818
.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
1919
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Diff for: _doc/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161

6262
# The following is used by sphinx.ext.linkcode to provide links to github
6363
linkcode_resolve = make_linkcode_resolve(
64-
"pandas_streaming",
64+
"pandas-streaming",
6565
(
6666
"https://github.com/sdpython/pandas-streaming/"
6767
"blob/{revision}/{package}/"

Diff for: _doc/index.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ pandas-streaming: streaming API over pandas
1616
:target: https://ci.appveyor.com/project/sdpython/pandas-streaming
1717
:alt: Build Status Windows
1818

19-
.. image:: https://circleci.com/gh/sdpython/pandas_streaming/tree/main.svg?style=svg
20-
:target: https://circleci.com/gh/sdpython/pandas_streaming/tree/main
19+
.. image:: https://dl.circleci.com/status-badge/img/gh/sdpython/pandas-streaming/tree/main.svg?style=svg
20+
:target: https://dl.circleci.com/status-badge/redirect/gh/sdpython/pandas-streaming/tree/main
2121

2222
.. image:: https://dev.azure.com/xavierdupre3/pandas_streaming/_apis/build/status/sdpython.pandas_streaming
2323
:target: https://dev.azure.com/xavierdupre3/pandas_streaming/

Diff for: _unittests/ut_df/test_connex_split.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import unittest
22
import pandas
3-
from pyquickhelper.pycode import ExtTestCase
3+
from pandas_streaming.ext_test_case import ExtTestCase
44
from pandas_streaming.df import (
55
dataframe_shuffle,
66
train_test_split_weights,

Diff for: _unittests/ut_df/test_connex_split_big.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import unittest
44
from collections import Counter
55
import pandas
6-
from pyquickhelper.pycode import ExtTestCase
6+
from pandas_streaming.ext_test_case import ExtTestCase
77
from pandas_streaming.df import train_test_connex_split
88

99

Diff for: _unittests/ut_df/test_connex_split_cat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import unittest
44
from collections import Counter
55
import pandas
6-
from pyquickhelper.pycode import ExtTestCase
6+
from pandas_streaming.ext_test_case import ExtTestCase
77
from pandas_streaming.df import train_test_apart_stratify
88

99

Diff for: _unittests/ut_df/test_dataframe_helpers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import unittest
33
import numpy
44
import pandas
5-
from pyquickhelper.pycode import ExtTestCase
5+
from pandas_streaming.ext_test_case import ExtTestCase
66
from pandas_streaming.df import dataframe_hash_columns
77

88

Diff for: _unittests/ut_df/test_dataframe_helpers_simple.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
import pandas
33
import numpy
4-
from pyquickhelper.pycode import ExtTestCase
4+
from pandas_streaming.ext_test_case import ExtTestCase
55
from pandas_streaming.df import dataframe_unfold
66
from pandas_streaming.df.dataframe_helpers import hash_int, hash_str, hash_float
77

Diff for: _unittests/ut_df/test_dataframe_io.py

+32-31
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import os
2+
import tempfile
23
import unittest
34
import io
45
import zipfile
56
import numpy
67
import pandas
7-
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
8+
from pandas_streaming.ext_test_case import ExtTestCase
89
from pandas_streaming.df import to_zip, read_zip
910

1011

@@ -20,43 +21,43 @@ def test_zip_dataframe(self):
2021
]
2122
)
2223

23-
temp = get_temp_folder(__file__, "temp_zip")
24-
name = os.path.join(temp, "df.zip")
25-
to_zip(df, name, encoding="utf-8", index=False)
26-
df2 = read_zip(name, encoding="utf-8")
27-
self.assertEqualDataFrame(df, df2)
24+
with tempfile.TemporaryDirectory() as temp:
25+
name = os.path.join(temp, "df.zip")
26+
to_zip(df, name, encoding="utf-8", index=False)
27+
df2 = read_zip(name, encoding="utf-8")
28+
self.assertEqualDataFrame(df, df2)
2829

29-
st = io.BytesIO()
30-
zp = zipfile.ZipFile(st, "w")
31-
to_zip(df, zp, encoding="utf-8", index=False)
32-
zp.close()
30+
st = io.BytesIO()
31+
zp = zipfile.ZipFile(st, "w")
32+
to_zip(df, zp, encoding="utf-8", index=False)
33+
zp.close()
3334

34-
st = io.BytesIO(st.getvalue())
35-
zp = zipfile.ZipFile(st, "r")
36-
df3 = read_zip(zp, encoding="utf-8")
37-
zp.close()
38-
self.assertEqualDataFrame(df, df3)
35+
st = io.BytesIO(st.getvalue())
36+
zp = zipfile.ZipFile(st, "r")
37+
df3 = read_zip(zp, encoding="utf-8")
38+
zp.close()
39+
self.assertEqualDataFrame(df, df3)
3940

4041
def test_zip_numpy(self):
4142
df = numpy.zeros((3, 4))
4243
df[2, 3] = 1
4344

44-
temp = get_temp_folder(__file__, "temp_zip")
45-
name = os.path.join(temp, "df.zip")
46-
to_zip(df, name, "arr.npy")
47-
df2 = read_zip(name, "arr.npy")
48-
self.assertEqualArray(df, df2)
49-
50-
st = io.BytesIO()
51-
zp = zipfile.ZipFile(st, "w")
52-
to_zip(df, zp, "arr.npy")
53-
zp.close()
54-
55-
st = io.BytesIO(st.getvalue())
56-
zp = zipfile.ZipFile(st, "r")
57-
df3 = read_zip(zp, "arr.npy")
58-
zp.close()
59-
self.assertEqualArray(df, df3)
45+
with tempfile.TemporaryDirectory() as temp:
46+
name = os.path.join(temp, "df.zip")
47+
to_zip(df, name, "arr.npy")
48+
df2 = read_zip(name, "arr.npy")
49+
self.assertEqualArray(df, df2)
50+
51+
st = io.BytesIO()
52+
zp = zipfile.ZipFile(st, "w")
53+
to_zip(df, zp, "arr.npy")
54+
zp.close()
55+
56+
st = io.BytesIO(st.getvalue())
57+
zp = zipfile.ZipFile(st, "r")
58+
df3 = read_zip(zp, "arr.npy")
59+
zp.close()
60+
self.assertEqualArray(df, df3)
6061

6162

6263
if __name__ == "__main__":

Diff for: _unittests/ut_df/test_dataframe_io_helpers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from io import StringIO, BytesIO
33
from json import loads
44
import pandas
5-
from pyquickhelper.pycode import ExtTestCase
5+
from pandas_streaming.ext_test_case import ExtTestCase
66
from pandas_streaming.df.dataframe_io_helpers import (
77
enumerate_json_items,
88
JsonPerRowsStream,

Diff for: _unittests/ut_df/test_dataframe_sort.py

+86-85
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,105 @@
11
import os
2+
import tempfile
23
import unittest
34
import pandas
4-
from pyquickhelper.pycode import ExtTestCase, get_temp_folder
5+
from pandas_streaming.ext_test_case import ExtTestCase
56
from pandas_streaming.df import StreamingDataFrame
67

78

89
class TestDataFrameSort(ExtTestCase):
910
def test_sort_values(self):
10-
temp = get_temp_folder(__file__, "temp_sort_values")
11-
name = os.path.join(temp, "_data_")
12-
df = pandas.DataFrame(
13-
[
14-
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
15-
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
16-
dict(a=4, b="g", ind="a3", ai=3),
17-
dict(a=8, b="h", c=5.9, ai=4),
18-
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
19-
]
20-
)
21-
sdf = StreamingDataFrame.read_df(df, chunksize=2)
22-
sorted_df = df.sort_values(by="a")
23-
res = sdf.sort_values(by="a", temp_file=name)
24-
res_df = res.to_df()
25-
self.assertEqualDataFrame(sorted_df, res_df)
11+
with tempfile.TemporaryDirectory() as temp:
12+
name = os.path.join(temp, "_data_")
13+
df = pandas.DataFrame(
14+
[
15+
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
16+
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
17+
dict(a=4, b="g", ind="a3", ai=3),
18+
dict(a=8, b="h", c=5.9, ai=4),
19+
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
20+
]
21+
)
22+
sdf = StreamingDataFrame.read_df(df, chunksize=2)
23+
sorted_df = df.sort_values(by="a")
24+
res = sdf.sort_values(by="a", temp_file=name)
25+
res_df = res.to_df()
26+
self.assertEqualDataFrame(sorted_df, res_df)
2627

2728
def test_sort_values_twice(self):
28-
temp = get_temp_folder(__file__, "temp_sort_values_twice")
29-
name = os.path.join(temp, "_data_")
30-
df = pandas.DataFrame(
31-
[
32-
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
33-
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
34-
dict(a=4, b="g", ind="a3", ai=3),
35-
dict(a=8, b="h", c=5.9, ai=4),
36-
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
37-
]
38-
)
39-
sdf = StreamingDataFrame.read_df(df, chunksize=2)
40-
sorted_df = df.sort_values(by="a")
41-
res = sdf.sort_values(by="a", temp_file=name)
42-
res_df = res.to_df()
43-
self.assertEqualDataFrame(sorted_df, res_df)
44-
res_df = res.to_df()
45-
self.assertEqualDataFrame(sorted_df, res_df)
29+
with tempfile.TemporaryDirectory() as temp:
30+
name = os.path.join(temp, "_data_")
31+
df = pandas.DataFrame(
32+
[
33+
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
34+
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
35+
dict(a=4, b="g", ind="a3", ai=3),
36+
dict(a=8, b="h", c=5.9, ai=4),
37+
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
38+
]
39+
)
40+
sdf = StreamingDataFrame.read_df(df, chunksize=2)
41+
sorted_df = df.sort_values(by="a")
42+
res = sdf.sort_values(by="a", temp_file=name)
43+
res_df = res.to_df()
44+
self.assertEqualDataFrame(sorted_df, res_df)
45+
res_df = res.to_df()
46+
self.assertEqualDataFrame(sorted_df, res_df)
4647

4748
def test_sort_values_reverse(self):
48-
temp = get_temp_folder(__file__, "temp_sort_values_reverse")
49-
name = os.path.join(temp, "_data_")
50-
df = pandas.DataFrame(
51-
[
52-
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
53-
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
54-
dict(a=4, b="g", ind="a3", ai=3),
55-
dict(a=8, b="h", c=5.9, ai=4),
56-
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
57-
]
58-
)
59-
sdf = StreamingDataFrame.read_df(df, chunksize=2)
60-
sorted_df = df.sort_values(by="a", ascending=False)
61-
res = sdf.sort_values(by="a", temp_file=name, ascending=False)
62-
res_df = res.to_df()
63-
self.assertEqualDataFrame(sorted_df, res_df)
49+
with tempfile.TemporaryDirectory() as temp:
50+
name = os.path.join(temp, "_data_")
51+
df = pandas.DataFrame(
52+
[
53+
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
54+
dict(a=5, b="f", c=5.7, ind="a2", ai=2),
55+
dict(a=4, b="g", ind="a3", ai=3),
56+
dict(a=8, b="h", c=5.9, ai=4),
57+
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
58+
]
59+
)
60+
sdf = StreamingDataFrame.read_df(df, chunksize=2)
61+
sorted_df = df.sort_values(by="a", ascending=False)
62+
res = sdf.sort_values(by="a", temp_file=name, ascending=False)
63+
res_df = res.to_df()
64+
self.assertEqualDataFrame(sorted_df, res_df)
6465

6566
def test_sort_values_nan_last(self):
66-
temp = get_temp_folder(__file__, "temp_sort_values_nan_last")
67-
name = os.path.join(temp, "_data_")
68-
df = pandas.DataFrame(
69-
[
70-
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
71-
dict(b="f", c=5.7, ind="a2", ai=2),
72-
dict(b="f", c=5.8, ind="a2", ai=2),
73-
dict(a=4, b="g", ind="a3", ai=3),
74-
dict(a=8, b="h", c=5.9, ai=4),
75-
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
76-
]
77-
)
78-
sdf = StreamingDataFrame.read_df(df, chunksize=2)
79-
sorted_df = df.sort_values(by="a", na_position="last")
80-
res = sdf.sort_values(by="a", temp_file=name, na_position="last")
81-
res_df = res.to_df()
82-
self.assertEqualDataFrame(sorted_df, res_df)
67+
with tempfile.TemporaryDirectory() as temp:
68+
name = os.path.join(temp, "_data_")
69+
df = pandas.DataFrame(
70+
[
71+
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
72+
dict(b="f", c=5.7, ind="a2", ai=2),
73+
dict(b="f", c=5.8, ind="a2", ai=2),
74+
dict(a=4, b="g", ind="a3", ai=3),
75+
dict(a=8, b="h", c=5.9, ai=4),
76+
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
77+
]
78+
)
79+
sdf = StreamingDataFrame.read_df(df, chunksize=2)
80+
sorted_df = df.sort_values(by="a", na_position="last")
81+
res = sdf.sort_values(by="a", temp_file=name, na_position="last")
82+
res_df = res.to_df()
83+
self.assertEqualDataFrame(sorted_df, res_df)
8384

8485
def test_sort_values_nan_first(self):
85-
temp = get_temp_folder(__file__, "temp_sort_values_nan_first")
86-
name = os.path.join(temp, "_data_")
87-
df = pandas.DataFrame(
88-
[
89-
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
90-
dict(b="f", c=5.7, ind="a2", ai=2),
91-
dict(b="f", c=5.8, ind="a2", ai=2),
92-
dict(a=4, b="g", ind="a3", ai=3),
93-
dict(a=8, b="h", c=5.9, ai=4),
94-
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
95-
]
96-
)
97-
sdf = StreamingDataFrame.read_df(df, chunksize=2)
98-
sorted_df = df.sort_values(by="a", na_position="first")
99-
res = sdf.sort_values(by="a", temp_file=name, na_position="first")
100-
res_df = res.to_df()
101-
self.assertEqualDataFrame(sorted_df, res_df)
86+
with tempfile.TemporaryDirectory() as temp:
87+
name = os.path.join(temp, "_data_")
88+
df = pandas.DataFrame(
89+
[
90+
dict(a=1, b="eé", c=5.6, ind="a1", ai=1),
91+
dict(b="f", c=5.7, ind="a2", ai=2),
92+
dict(b="f", c=5.8, ind="a2", ai=2),
93+
dict(a=4, b="g", ind="a3", ai=3),
94+
dict(a=8, b="h", c=5.9, ai=4),
95+
dict(a=16, b="i", c=6.2, ind="a5", ai=5),
96+
]
97+
)
98+
sdf = StreamingDataFrame.read_df(df, chunksize=2)
99+
sorted_df = df.sort_values(by="a", na_position="first")
100+
res = sdf.sort_values(by="a", temp_file=name, na_position="first")
101+
res_df = res.to_df()
102+
self.assertEqualDataFrame(sorted_df, res_df)
102103

103104

104105
if __name__ == "__main__":

Diff for: _unittests/ut_df/test_pandas_groupbynan.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pandas
33
import numpy
44
from scipy.sparse.linalg import lsqr as sparse_lsqr
5-
from pyquickhelper.pycode import ExtTestCase, ignore_warnings
5+
from pandas_streaming.ext_test_case import ExtTestCase, ignore_warnings
66
from pandas_streaming.df import pandas_groupby_nan, numpy_types
77

88

0 commit comments

Comments
 (0)