-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
287 add local morans i #305
Changes from 7 commits
4a7cb09
d5622c4
9349d44
be36976
35d9f81
82fc034
d3e9a5d
46f2458
19a473a
be7b724
059fa5a
c9b0dd4
46d5ba4
181952f
5107003
b7193c5
e4a036b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Local Moran's I | ||
|
||
::: eis_toolkit.exploratory_analyses.local_morans_i |
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,64 @@ | ||||||||||||||||||
import geopandas as gpd | ||||||||||||||||||
import libpysal | ||||||||||||||||||
import numpy as np | ||||||||||||||||||
from beartype import beartype | ||||||||||||||||||
from beartype.typing import Literal, Union | ||||||||||||||||||
from esda.moran import Moran_Local | ||||||||||||||||||
|
||||||||||||||||||
from eis_toolkit import exceptions | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
@beartype | ||||||||||||||||||
def _local_morans_i( | ||||||||||||||||||
gdf: gpd.GeoDataFrame, column: str, weight_type: Literal["queen", "knn"], k: Union[int, None], permutations: int | ||||||||||||||||||
) -> gpd.GeoDataFrame: | ||||||||||||||||||
|
||||||||||||||||||
if weight_type == "queen": | ||||||||||||||||||
w = libpysal.weights.Queen.from_dataframe(gdf) | ||||||||||||||||||
elif weight_type == "knn": | ||||||||||||||||||
w = libpysal.weights.KNN.from_dataframe(gdf, k=k) | ||||||||||||||||||
else: | ||||||||||||||||||
raise ValueError("Invalid weight_type. Use 'queen' or 'knn'.") | ||||||||||||||||||
|
||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I know, we should row-standardize the weights to minimize bias. I am not sure if it is default/automatically done, but you could add this row to be sure:
Suggested change
|
||||||||||||||||||
if len(gdf[column]) != len(w.weights): | ||||||||||||||||||
raise ValueError("Dimension mismatch between data and weights matrix.") | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this mismatch happen? Anyway, change the exception to some |
||||||||||||||||||
|
||||||||||||||||||
moran_loc = Moran_Local(gdf[column], w, permutations=permutations) | ||||||||||||||||||
|
||||||||||||||||||
gdf[f"{column}_local_moran_I"] = moran_loc.Is | ||||||||||||||||||
gdf[f"{column}_p_value"] = moran_loc.p_sim | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The name of this column could be |
||||||||||||||||||
|
||||||||||||||||||
gdf[f"{column}_p_value"].fillna(value=np.nan, inplace=True) | ||||||||||||||||||
|
||||||||||||||||||
return gdf | ||||||||||||||||||
|
||||||||||||||||||
|
||||||||||||||||||
@beartype | ||||||||||||||||||
def local_morans_i( | ||||||||||||||||||
gdf: gpd.GeoDataFrame, | ||||||||||||||||||
column: str, | ||||||||||||||||||
weight_type: Literal["queen", "knn"] = "queen", | ||||||||||||||||||
k: Union[int, None] = 2, | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would make remove the option to have I am not entirely sure about this, but I would increase the default value of |
||||||||||||||||||
permutations: int = 999, | ||||||||||||||||||
) -> gpd.GeoDataFrame: | ||||||||||||||||||
"""Execute Local Moran's I calculation for the area. | ||||||||||||||||||
|
||||||||||||||||||
Args: | ||||||||||||||||||
gdf: The geodataframe that contains the area to be examined with local morans I. | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A bit nitpicky comment, but I would change the wording here from "area" to "data".
Suggested change
|
||||||||||||||||||
column: The column to be used in the analysis. | ||||||||||||||||||
weight_type: The type of spatial weights matrix to be used. Defaults to "queen". | ||||||||||||||||||
k: Number of nearest neighbors for the KNN weights matrix. Defaults to 2. | ||||||||||||||||||
permutations: Number of permutations for significance testing. Defaults to 999. | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should add checks for the |
||||||||||||||||||
|
||||||||||||||||||
Returns: | ||||||||||||||||||
Geodataframe containing the calculations. | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
|
||||||||||||||||||
Raises: | ||||||||||||||||||
EmptyDataFrameException if input geodataframe is empty. | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
""" | ||||||||||||||||||
if gdf.shape[0] == 0: | ||||||||||||||||||
raise exceptions.EmptyDataFrameException("Geodataframe is empty.") | ||||||||||||||||||
|
||||||||||||||||||
calculations = _local_morans_i(gdf, column, weight_type, k, permutations) | ||||||||||||||||||
|
||||||||||||||||||
return calculations |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good that you added a notebook for example! However, I think we could use here our geochemical example data that is now in GitHub in the master branch. I think it's preferable to demonstrate using our geological data if convenient and possible, and I believe one of the primary uses for this Local Moran's I might be to inspect spatial autocorrelation of point geochemical data. So the dataset is called "IOCG_CLB_Till_Geochem_reg_511p.gpkg" and should be found directly under the remote folder. For the target column, any of the mineral columns will probably do. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The notebook's data is "geographical" as I was not comfortable with using example data I do not fully understand. Hence, I used GDP of African countries to demonstrate the correctness of the Local Moran's I. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that's absolutely fine. I have also used similar example datasets for some functions. This improvement just came to my mind, since we added the geochem data recently to master and I have personally used Local Moran's I for similar data exploration in my previous job. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But in the context of EIS it would be better to use the geochem data. I just do not know what columns (maybe any) and which parameters for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep. I think you could use for example "Li_ppm_551", so concentration of Lithium. |
Large diffs are not rendered by default.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I recall correctly, we haven't made unit tests so far like they have been implemented here. Meaning that the target values are calculated during the test using the underlying library functions. You know more about testing @nialov , could you comment this? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import geopandas as gpd | ||
import libpysal | ||
import numpy as np | ||
import pytest | ||
from esda.moran import Moran_Local | ||
|
||
from eis_toolkit import exceptions | ||
from eis_toolkit.exploratory_analyses.local_morans_i import local_morans_i | ||
|
||
|
||
def test_local_morans_i_queen_correctness(): | ||
"""Test Local Moran's I Queen correctness.""" | ||
|
||
permutations = 999 | ||
|
||
column = "gdp_md_est" | ||
data = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) | ||
gdf = gpd.GeoDataFrame(data) | ||
|
||
w = libpysal.weights.Queen.from_dataframe(gdf) | ||
|
||
moran_loc = Moran_Local(gdf[column], w, permutations=permutations) | ||
|
||
result = local_morans_i(gdf=gdf, column=column, weight_type="queen", permutations=permutations) | ||
|
||
np.testing.assert_allclose(result[f"{column}_local_moran_I"], moran_loc.Is, rtol=0.1, atol=0.1) | ||
np.testing.assert_allclose(result[f"{column}_p_value"], moran_loc.p_sim, rtol=0.1, atol=0.1) | ||
|
||
|
||
def test_local_morans_i_knn_correctness(): | ||
"""Test Local Moran's I KNN correctness.""" | ||
|
||
k = 3 | ||
permutations = 999 | ||
|
||
column = "gdp_md_est" | ||
data = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) | ||
gdf = gpd.GeoDataFrame(data) | ||
|
||
w = libpysal.weights.KNN.from_dataframe(gdf, k=k) | ||
moran_loc = Moran_Local(gdf[column], w, permutations=permutations) | ||
|
||
result = local_morans_i(gdf, column, "knn", k=k, permutations=permutations) | ||
|
||
np.testing.assert_allclose(result[f"{column}_local_moran_I"], moran_loc.Is, rtol=0.1, atol=0.1) | ||
np.testing.assert_allclose(result[f"{column}_p_value"], moran_loc.p_sim, rtol=0.1, atol=0.1) | ||
|
||
|
||
def test_empty_geodataframe(): | ||
"""Test Local Moran's I raises EmptyDataFrameException.""" | ||
|
||
empty_gdf = gpd.GeoDataFrame() | ||
|
||
# Use pytest.raises to check the expected exception | ||
with pytest.raises(exceptions.EmptyDataFrameException): | ||
local_morans_i(empty_gdf, column="value", weight_type="queen", k=2, permutations=999) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use
InvalidParameterValueException
from our custom exceptions here.