remove redundant lines

automl · Oct 26, 2021 · 4b72887 · 4b72887
1 parent 4da0f38
commit 4b72887
Showing 1 changed file with 0 additions and 56 deletions.
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
@@ -483,59 +483,3 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
             self.object_dtype_mapping = {column: X[column].dtype for column in X.columns}
         self.logger.debug(f"Infer Objects: {self.object_dtype_mapping}")
         return X
-
-    def impute_nan_in_categories(self, X: pd.DataFrame) -> pd.DataFrame:
-        """
-        impute missing values before encoding,
-        remove once sklearn natively supports
-        it in ordinal encoding. Sklearn issue:
-        "https://github.com/scikit-learn/scikit-learn/issues/17123)"
-
-        Arguments:
-            X (pd.DataFrame):
-                data to be interpreted.
-
-        Returns:
-            pd.DataFrame
-        """
-
-        # To be on the safe side, map always to the same missing
-        # value per column
-        if not hasattr(self, 'dict_nancol_to_missing'):
-            self.dict_missing_value_per_col: typing.Dict[str, typing.Any] = {}
-
-        # First make sure that we do not alter the type of the column which cause:
-        # TypeError: '<' not supported between instances of 'int' and 'str'
-        # in the encoding
-        for column in self.enc_columns:
-            if X[column].isna().any():
-                if column not in self.dict_missing_value_per_col:
-                    try:
-                        float(X[column].dropna().values[0])
-                        can_cast_as_number = True
-                    except Exception:
-                        can_cast_as_number = False
-                    if can_cast_as_number:
-                        # In this case, we expect to have a number as category
-                        # it might be string, but its value represent a number
-                        missing_value: typing.Union[str, int] = '-1' if isinstance(X[column].dropna().values[0],
-                                                                                   str) else -1
-                    else:
-                        missing_value = 'Missing!'
-
-                    # Make sure this missing value is not seen before
-                    # Do this check for categorical columns
-                    # else modify the value
-                    if hasattr(X[column], 'cat'):
-                        while missing_value in X[column].cat.categories:
-                            if isinstance(missing_value, str):
-                                missing_value += '0'
-                            else:
-                                missing_value += missing_value
-                    self.dict_missing_value_per_col[column] = missing_value
-
-                # Convert the frame in place
-                X[column].cat.add_categories([self.dict_missing_value_per_col[column]],
-                                             inplace=True)
-                X.fillna({column: self.dict_missing_value_per_col[column]}, inplace=True)
-        return X