Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update featureset spec for featureset source #34668

Merged
merged 5 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class FeaturesetSpecMetadataSchema(YamlFileSchema):
source = fields.Nested(SourceMetadataSchema, required=True)
feature_transformation_code = fields.Nested(FeatureTransformationCodeMetadataSchema, required=False)
features = fields.List(NestedField(FeatureSchema), required=True, allow_none=False)
index_columns = fields.List(NestedField(DataColumnSchema), required=True, allow_none=False)
index_columns = fields.List(NestedField(DataColumnSchema), required=False)
source_lookback = fields.Nested(DelayMetadataSchema, required=False)
temporal_join_lookback = fields.Nested(DelayMetadataSchema, required=False)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
class SourceMetadataSchema(metaclass=PatchedSchemaMeta):
type = fields.Str(required=True)
path = fields.Str(required=False)
timestamp_column = fields.Nested(TimestampColumnMetadataSchema, required=True)
timestamp_column = fields.Nested(TimestampColumnMetadataSchema, required=False)
source_delay = fields.Nested(DelayMetadataSchema, required=False)
source_process_code = fields.Nested(SourceProcessCodeSchema, load_only=True, required=False)
dict = fields.Dict(keys=fields.Str(), values=fields.Str(), data_key="kwargs", load_only=True, required=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY
from azure.ai.ml.entities._feature_store_entity.data_column import DataColumn
from azure.ai.ml.entities._util import load_from_dict
from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationErrorType, ValidationException

from .delay_metadata import DelayMetadata
from .feature import Feature
Expand All @@ -31,7 +32,7 @@ def __init__(
source: SourceMetadata,
feature_transformation_code: Optional[FeatureTransformationCodeMetadata] = None,
features: List[Feature],
index_columns: List[DataColumn],
index_columns: Optional[List[DataColumn]] = None,
source_lookback: Optional[DelayMetadata] = None,
temporal_join_lookback: Optional[DelayMetadata] = None,
**_kwargs: Any,
Expand Down Expand Up @@ -74,6 +75,16 @@ def _load(
res: FeaturesetSpecMetadata = load_from_dict(
FeaturesetSpecMetadataSchema, yaml_data, context, "", unknown=INCLUDE, **kwargs
)
if res.source.type != "featureset":
anyalee0221 marked this conversation as resolved.
Show resolved Hide resolved
if not res.index_columns:
msg = f"You need to provide index_columns for {res.source.type} feature source."
raise ValidationException(
message=msg,
no_personal_data_message=msg,
error_type=ValidationErrorType.INVALID_VALUE,
target=ErrorTarget.FEATURE_SET,
error_category=ErrorCategory.USER_ERROR,
)
return res

def _to_dict(self) -> Dict:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,35 @@ def __init__(
self,
*,
type: str,
timestamp_column: TimestampColumnMetadata,
timestamp_column: Optional[TimestampColumnMetadata] = None,
path: Optional[str] = None,
source_delay: Optional[DelayMetadata] = None,
source_process_code: Optional[SourceProcessCodeMetadata] = None,
dict: Optional[Dict] = None,
**kwargs: Any,
):
if type != "featureset":
if not timestamp_column:
msg = f"You need to provide timestam_solumn for {type} feature source."
raise ValidationException(
message=msg,
no_personal_data_message=msg,
error_type=ValidationErrorType.INVALID_VALUE,
target=ErrorTarget.FEATURE_SET,
error_category=ErrorCategory.USER_ERROR,
)

if type != "custom":
if type == "featureset":
if not path:
msg = f"You need to provide path for featureset feature source."
raise ValidationException(
message=msg,
no_personal_data_message=msg,
error_type=ValidationErrorType.INVALID_VALUE,
target=ErrorTarget.FEATURE_SET,
error_category=ErrorCategory.USER_ERROR,
)
if not (path and not dict and not source_process_code):
msg = f"Cannot provide source_process_code or kwargs for {type} feature source."
raise ValidationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ def test_feature_set_spec_load(self) -> None:
assert len(fspec.features) == 3
assert len(fspec.source.kwargs.keys()) == 3

spec_path = "./tests/test_configs/feature_set/featureset_source_spec"
featureset_spec_contents = read_feature_set_metadata(path=spec_path)
featureset_spec_yaml_path = Path(spec_path, "FeatureSetSpec.yaml")
fspec = FeaturesetSpecMetadata._load(featureset_spec_contents, featureset_spec_yaml_path)

assert fspec.feature_transformation_code is not None
assert fspec.source is not None
assert fspec.source.timestamp_column is None
assert len(fspec.features) == 3
assert fspec.index_columns is None
assert fspec.source.source_delay is None
assert fspec.source.timestamp_column is None

def test_feature_set_spec_load_failure(self) -> None:
spec_path = "./tests/test_configs/feature_set/invalid_spec1"
featureset_spec_contents = read_feature_set_metadata(path=spec_path)
Expand All @@ -48,3 +61,21 @@ def test_feature_set_spec_load_failure(self) -> None:
featureset_spec_yaml_path = Path(spec_path, "FeatureSetSpec.yaml")
with pytest.raises(ValidationException):
FeaturesetSpecMetadata._load(featureset_spec_contents, featureset_spec_yaml_path)

spec_path = "./tests/test_configs/feature_set/invalid_spec3"
featureset_spec_contents = read_feature_set_metadata(path=spec_path)
featureset_spec_yaml_path = Path(spec_path, "FeatureSetSpec.yaml")
with pytest.raises(ValidationException):
FeaturesetSpecMetadata._load(featureset_spec_contents, featureset_spec_yaml_path)

spec_path = "./tests/test_configs/feature_set/invalid_spec4"
featureset_spec_contents = read_feature_set_metadata(path=spec_path)
featureset_spec_yaml_path = Path(spec_path, "FeatureSetSpec.yaml")
with pytest.raises(ValidationException):
FeaturesetSpecMetadata._load(featureset_spec_contents, featureset_spec_yaml_path)

spec_path = "./tests/test_configs/feature_set/invalid_spec5"
featureset_spec_contents = read_feature_set_metadata(path=spec_path)
featureset_spec_yaml_path = Path(spec_path, "FeatureSetSpec.yaml")
with pytest.raises(ValidationException):
FeaturesetSpecMetadata._load(featureset_spec_contents, featureset_spec_yaml_path)
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
source:
type: featureset
path: azureml://subscriptions/my_sub/resourcegroups/my_rg/workspaces/my_fs/feturesets/source_feature_set_name/versions/version1
feature_transformation_code:
path: ./code
transformer_class: driver_hourly_transform.DriverHourlyTransformer
features:
- name: conv_rate
type: double
- name: acc_rate
type: double
- name: avg_daily_trips
type: double
source_lookback:
days: 30
hours: 0
minutes: 0
temporal_join_lookback:
days: 2
hours: 0
minutes: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
source:
anyalee0221 marked this conversation as resolved.
Show resolved Hide resolved
type: featureset
feature_transformation_code:
path: ./code
transformer_class: driver_hourly_transform.DriverHourlyTransformer
features:
- name: conv_rate
type: double
- name: acc_rate
type: double
- name: avg_daily_trips
type: double
source_lookback:
days: 30
hours: 0
minutes: 0
temporal_join_lookback:
days: 2
hours: 0
minutes: 0

Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
source:
anyalee0221 marked this conversation as resolved.
Show resolved Hide resolved
type: custom
kwargs:
k1: v1
k2: v2
k3: v3
source_process_code:
path: ./source_process_code
process_class: source_process.MyDataSourceLoader
feature_transformation_code:
path: ./code
transformer_class: driver_hourly_transform.DriverHourlyTransformer
features:
- name: conv_rate
type: double
- name: acc_rate
type: double
- name: avg_daily_trips
type: double
index_columns:
- name: driver_id
type: long
source_lookback:
days: 30
hours: 0
minutes: 0
temporal_join_lookback:
days: 2
hours: 0
minutes: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
source:
anyalee0221 marked this conversation as resolved.
Show resolved Hide resolved
type: custom
kwargs:
k1: v1
k2: v2
k3: v3
timestamp_column:
name: timestamp
format: "%Y-%m-%d %H:%M:%S"
source_process_code:
path: ./source_process_code
process_class: source_process.MyDataSourceLoader
feature_transformation_code:
path: ./code
transformer_class: driver_hourly_transform.DriverHourlyTransformer
features:
- name: conv_rate
type: double
- name: acc_rate
type: double
- name: avg_daily_trips
type: double
source_lookback:
days: 30
hours: 0
minutes: 0
temporal_join_lookback:
days: 2
hours: 0
minutes: 0
Loading