-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserialize_report.py
142 lines (118 loc) · 4.52 KB
/
serialize_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Optional, Union
if TYPE_CHECKING:
from ydata_profiling.profile_report import ProfileReport
from ydata_profiling.config import Settings
from ydata_profiling.report.presentation.core import Root
from ydata_profiling.version import __version__
class SerializeReport:
"""Extend the report to be able to dump and load reports."""
df = None
config = None
_df_hash: Optional[str] = None
_report = None
_description_set = None
@property
def df_hash(self) -> Optional[str]:
return None
def dumps(self) -> bytes:
"""
Serialize ProfileReport and return bytes for reproducing ProfileReport or Caching.
Returns:
Bytes which contains hash of DataFrame, config, _description_set and _report
"""
import pickle
# Note: _description_set and _report may are None if they haven't been computed
return pickle.dumps(
[
self.df_hash,
self.config,
self._description_set,
self._report,
]
)
def loads(self, data: bytes) -> Union["ProfileReport", "SerializeReport"]:
"""
Deserialize the serialized report
Args:
data: The bytes of a serialize ProfileReport object.
Raises:
ValueError: if ignore_config is set to False and the configs do not match.
Returns:
self
"""
import pickle
try:
(
df_hash,
loaded_config,
loaded_description_set,
loaded_report,
) = pickle.loads(data)
except Exception as e:
raise ValueError("Failed to load data") from e
if not all(
(
df_hash is None or isinstance(df_hash, str),
isinstance(loaded_config, Settings),
loaded_description_set is None
or isinstance(loaded_description_set, dict),
loaded_report is None or isinstance(loaded_report, Root),
)
):
raise ValueError(
"Failed to load data: file may be damaged or from an incompatible version"
)
if (df_hash == self.df_hash) or (self.df is None):
# load to an empty ProfileReport
# Set description_set, report, sample if they are None,or raise an warning.
if self._description_set is None:
self._description_set = loaded_description_set
else:
warnings.warn(
"The description set of current ProfileReport is not None. It won't be loaded."
)
if self._report is None:
self._report = loaded_report
else:
warnings.warn(
"The report of current ProfileReport is not None. It won't be loaded."
)
# overwrite config
self.config = loaded_config
# warn if version not equal
if (
loaded_description_set is not None
and loaded_description_set["package"]["ydata_profiling_version"]
!= __version__
):
warnings.warn(
f"The package version specified in the loaded data is not equal to the version installed. "
f"Currently running on ydata-profiling {__version__} , while loaded data is generated by ydata_profiling, {loaded_description_set['package']['ydata_profiling_version']}."
)
# set df_hash
self._df_hash = df_hash
else:
raise ValueError("DataFrame does not match with the current ProfileReport.")
return self
def dump(self, output_file: Union[Path, str]) -> None:
"""
Dump ProfileReport to file
"""
if not isinstance(output_file, Path):
output_file = Path(str(output_file))
output_file = output_file.with_suffix(".pp")
output_file.write_bytes(self.dumps())
def load(
self, load_file: Union[Path, str]
) -> Union["ProfileReport", "SerializeReport"]:
"""
Load ProfileReport from file
Raises:
ValueError: if the DataFrame or Config do not match with the current ProfileReport
"""
if not isinstance(load_file, Path):
load_file = Path(str(load_file))
self.loads(load_file.read_bytes())
return self