Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Show DEVICE_MEMORY in show-gpus for AWS & Lambda. #1825

Merged
merged 4 commits into from
Apr 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sky/adaptors/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def wrapper(*args, **kwargs):
googleapiclient = _googleapiclient
google = _google
except ImportError:
raise ImportError('Fail to import dependencies for GCP.'
'Try pip install "skypilot[gcp]"') from None
raise ImportError('Failed to import dependencies for GCP. '
'Try: pip install "skypilot[gcp]"') from None
return func(*args, **kwargs)

return wrapper
Expand Down
22 changes: 17 additions & 5 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2938,8 +2938,16 @@ def show_gpus(
To show all accelerators, including less common ones and their detailed
information, use ``sky show-gpus --all``.

NOTE: If region is not specified, the price displayed for each instance type
is the lowest across all regions for both on-demand and spot instances.
Definitions of certain fields:

* ``DEVICE_MEM``: Memory of a single device; does not depend on the device
count of the instance (VM).

* ``HOST_MEM``: Memory of the host instance (VM).
Comment on lines +2943 to +2946
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Do we need `` here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, this doesn't look good on -h but is what's needed for rst/sphinx docs.


If ``--region`` is not specified, the price displayed for each instance
type is the lowest across all regions for both on-demand and spot
instances. There may be multiple regions with the same lowest price.
"""
# validation for the --region flag
if region is not None and cloud is None:
Expand Down Expand Up @@ -3012,8 +3020,9 @@ def _output():
'QTY',
'CLOUD',
'INSTANCE_TYPE',
'DEVICE_MEM',
'vCPUs',
'HOST_MEMORY',
'HOST_MEM',
'HOURLY_PRICE',
'HOURLY_SPOT_PRICE',
]
Expand All @@ -3032,7 +3041,9 @@ def _output():
cpu_str = str(int(cpu_count))
else:
cpu_str = f'{cpu_count:.1f}'
mem_str = f'{item.memory:.0f}GB' if not pd.isna(
device_memory_str = (f'{item.device_memory:.0f}GB' if
not pd.isna(item.device_memory) else '-')
Comment on lines +3044 to +3045
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the definition of device memory? IIRC, it is the amount of memory in a single device and does not depend on the device count, right? Then what about TPUs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some definition to -h. Wdyt? For TPU, we can defer adding documentation for it since we don't have that info in the catalog.

host_memory_str = f'{item.memory:.0f}GB' if not pd.isna(
item.memory) else '-'
price_str = f'$ {item.price:.3f}' if not pd.isna(
item.price) else '-'
Expand All @@ -3044,8 +3055,9 @@ def _output():
item.accelerator_count,
item.cloud,
instance_type_str,
device_memory_str,
cpu_str,
mem_str,
host_memory_str,
price_str,
spot_price_str,
]
Expand Down
23 changes: 20 additions & 3 deletions sky/clouds/service_catalog/common.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Common utilities for service catalog."""
import ast
import hashlib
import os
import time
Expand Down Expand Up @@ -31,6 +32,7 @@ class InstanceTypeInfo(NamedTuple):
- accelerator_name: Canonical name of the accelerator. E.g. `V100`.
- accelerator_count: Number of accelerators offered by this instance type.
- cpu_count: Number of vCPUs offered by this instance type.
- device_memory: Device memory in GiB.
- memory: Instance memory in GiB.
- price: Regular instance price per hour (cheapest across all regions).
- spot_price: Spot instance price per hour (cheapest across all regions).
Expand All @@ -41,6 +43,7 @@ class InstanceTypeInfo(NamedTuple):
accelerator_name: str
accelerator_count: int
cpu_count: Optional[float]
device_memory: Optional[float]
memory: Optional[float]
price: float
spot_price: float
Expand Down Expand Up @@ -178,8 +181,8 @@ def _get_candidate_str(loc: str, all_loc: List[str]) -> str:

def _get_all_supported_regions_str() -> str:
all_regions: List[str] = sorted(df['Region'].unique().tolist())
return \
f'\nList of supported {cloud_name} regions: {", ".join(all_regions)!r}'
return (f'\nList of supported {cloud_name} regions: '
f'{", ".join(all_regions)!r}')

validated_region, validated_zone = region, zone

Expand Down Expand Up @@ -435,12 +438,25 @@ def list_accelerators_impl(
"""
if gpus_only:
df = df[~df['GpuInfo'].isna()]
df = df.copy() # avoid column assignment warning

try:
gpu_info_df = df['GpuInfo'].apply(ast.literal_eval)
df['DeviceMemoryGiB'] = gpu_info_df.apply(
lambda row: row['Gpus'][0]['MemoryInfo']['SizeInMiB']) / 1024.0
except ValueError:
# TODO(zongheng,woosuk): GCP/Azure catalogs do not have well-formed
# GpuInfo fields. So the above will throw:
# ValueError: malformed node or string: <_ast.Name object at ..>
df['DeviceMemoryGiB'] = None

df = df[[
'InstanceType',
'AcceleratorName',
'AcceleratorCount',
'vCPUs',
'MemoryGiB',
'DeviceMemoryGiB', # device memory
'MemoryGiB', # host memory
'Price',
'SpotPrice',
'Region',
Expand Down Expand Up @@ -470,6 +486,7 @@ def make_list_from_df(rows):
row['AcceleratorName'],
row['AcceleratorCount'],
row['vCPUs'],
row['DeviceMemoryGiB'],
row['MemoryGiB'],
row['Price'],
row['SpotPrice'],
Expand Down