Skip to content

Commit

Permalink
refactor the class DataFrameFetcher, minor changes in 'ml/config', 'm…
Browse files Browse the repository at this point in the history
…l/vae/models/dataset.py'
  • Loading branch information
Hanna Imshenetska authored and Hanna Imshenetska committed Sep 24, 2024
1 parent abff71a commit 8c25386
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/syngen/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.30rc17
0.9.30rc19
7 changes: 5 additions & 2 deletions src/syngen/ml/config/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,12 @@ def _load_source(self) -> Tuple[pd.DataFrame, Dict]:
Return dataframe and schema of original data
"""
if self.loader is not None:
return DataFrameFetcher(
dataframe_fetcher = DataFrameFetcher(
loader=self.loader,
table_name=self.table_name
).fetch_data()
)
self.original_schema = dataframe_fetcher.original_schema
return dataframe_fetcher.fetch_data()
else:
data_loader = DataLoader(self.source)
self.original_schema = data_loader.original_schema
Expand Down Expand Up @@ -347,6 +349,7 @@ def _set_up_size(self):
"""
if self.size is None:
data_loader = DataLoader(self.paths["input_data_path"])
data = pd.DataFrame()
if data_loader.has_existed_path:
data, schema = data_loader.load_data()
elif self.loader:
Expand Down
1 change: 1 addition & 0 deletions src/syngen/ml/data_loaders/dataframe_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class DataFrameFetcher:
"""
loader: Callable[[str], pd.DataFrame]
table_name: str
original_schema = None

def fetch_data(self) -> Tuple[pd.DataFrame, Dict]:
try:
Expand Down
6 changes: 4 additions & 2 deletions src/syngen/ml/vae/models/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,13 +133,15 @@ def _cast_to_numeric(self):
except ValueError:
continue
if self.cast_to_integer:
columns = [f"'{item}'" for item in self.cast_to_integer]
logger.info(
f"The columns - {', '.join(self.cast_to_integer)} "
f"The columns - {', '.join(columns)} "
"have been cast to the 'integer' data type"
)
if self.cast_to_float:
columns = [f"'{item}'" for item in self.cast_to_float]
logger.info(
f"The columns - {', '.join(self.cast_to_float)} "
f"The columns - {', '.join(columns)} "
"have been cast to the 'float' data type"
)

Expand Down

0 comments on commit 8c25386

Please sign in to comment.