filling module¶

Module for filling surface depressions.

`Depression` ¶

The class for storing depression info.

Source code in lidar/filling.py

class Depression:
    """The class for storing depression info."""

    def __init__(
        self,
        id,
        count,
        size,
        volume,
        meanDepth,
        maxDepth,
        minElev,
        bndElev,
        perimeter,
        major_axis,
        minor_axis,
        elongatedness,
        eccentricity,
        orientation,
        area_bbox_ratio,
    ):
        self.id = id
        self.count = count
        self.size = size
        self.volume = volume
        self.meanDepth = meanDepth
        self.maxDepth = maxDepth
        self.minElev = minElev
        self.bndElev = bndElev
        self.perimeter = perimeter
        self.major_axis = major_axis
        self.minor_axis = minor_axis
        self.elongatedness = elongatedness
        self.eccentricity = eccentricity
        self.orientation = orientation
        self.area_bbox_ratio = area_bbox_ratio

`ExtractSinks(in_dem, min_size, out_dir, filled_dem=None, engine='whitebox', keep_files=True)` ¶

Extract sinks (e.g., maximum depression extent) from a DEM.

Parameters:

Name	Type	Description	Default
`in_dem`	`str`	File path to the input DEM.	required
`min_size`	`int`	The minimum number of pixels to be considered as a sink.	required
`out_dir`	`str`	File path to the output directory.	required
`fill_dem`	`str`	The filled DEM.	required

Returns:

Name	Type	Description
`object`		The richDEM array containing sinks.

Source code in lidar/filling.py

def ExtractSinks(
    in_dem, min_size, out_dir, filled_dem=None, engine="whitebox", keep_files=True
):
    """Extract sinks (e.g., maximum depression extent) from a DEM.

    Args:
        in_dem (str): File path to the input DEM.
        min_size (int): The minimum number of pixels to be considered as a sink.
        out_dir (str): File path to the output directory.
        fill_dem (str, optional): The filled DEM.

    Returns:
        object: The richDEM array containing sinks.
    """
    start_time = time.time()

    out_dem = os.path.join(out_dir, "dem.tif")
    out_dem_diff = os.path.join(out_dir, "dem_diff.tif")
    out_sink = os.path.join(out_dir, "sink.tif")
    out_region = os.path.join(out_dir, "region.tif")
    out_depth = os.path.join(out_dir, "depth.tif")
    out_csv_file = os.path.join(out_dir, "regions_info.csv")
    out_vec_file = os.path.join(out_dir, "regions.shp")

    basename = os.path.splitext(os.path.basename(in_dem))[0]
    out_gpkg = os.path.join(out_dir, basename + ".gpkg")

    # out_gpkg = os.path.join(out_dir, "regions.gpkg")
    if filled_dem is None:
        out_dem_filled = os.path.join(out_dir, "dem_filled.tif")
    else:
        out_dem_filled = filled_dem
    # create output folder if nonexistent
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    # load the dem and get dem info
    print("Loading data ...")
    dem = rd.LoadGDAL(in_dem)
    no_data = dem.no_data
    projection = dem.projection
    geotransform = dem.geotransform
    cell_size = np.round(geotransform[1], decimals=2)

    # get min and max elevation of the dem
    # max_elev = float(np.max(dem[dem != no_data]))
    max_elev = float(np.nanmax(dem))
    # min_elev = float(np.min(dem[dem > 0]))
    # min_elev = float(np.min(dem[dem > 0]))
    min_elev = float(np.nanmin(dem))
    print(
        "min = {:.2f}, max = {:.2f}, no_data = {}, cell_size = {}".format(
            min_elev, max_elev, no_data, cell_size
        )
    )

    # depression filling
    if filled_dem is None:
        print("Depression filling ...")
        if engine == "richdem":
            dem_filled = rd.FillDepressions(dem, in_place=False)
            dem_filled[np.isnan(dem)] = np.nan
        elif engine == "whitebox":
            wbt = whitebox.WhiteboxTools()
            wbt.verbose = False
            wbt.fill_depressions_wang_and_liu(
                os.path.abspath(in_dem), os.path.abspath(out_dem_filled)
            )
            dem_filled = rd.LoadGDAL(out_dem_filled)

    else:
        dem_filled = rd.LoadGDAL(filled_dem)
    dem_diff = dem_filled - dem
    dem_diff.no_data = 0

    if filled_dem is None:
        print("Saving filled dem ...")
        rd.SaveGDAL(out_dem_filled, dem_filled)
    rd.SaveGDAL(out_dem_diff, dem_diff)

    # nb_labels is the total number of objects. 0 represents background object.
    print("Region grouping ...")
    label_objects, nb_labels = regionGroup(dem_diff, min_size, no_data)
    dem_diff[label_objects == 0] = 0
    depth = np2rdarray(
        dem_diff, no_data=0, projection=projection, geotransform=geotransform
    )
    rd.SaveGDAL(out_depth, depth)
    del dem_diff, depth

    print("Computing properties ...")
    # objects = measure.regionprops(label_objects, dem, coordinates='xy')
    objects = measure.regionprops(label_objects, dem)
    dep_list = get_dep_props(objects, cell_size)
    write_dep_csv(dep_list, out_csv_file)
    del objects, dep_list

    # convert numpy to richdem data format
    region = np2rdarray(
        label_objects, no_data=0, projection=projection, geotransform=geotransform
    )
    del label_objects

    region[np.isnan(dem)] = 0

    print("Saving sink dem ...")
    sink = np.copy(dem)
    sink[region == 0] = 0
    sink = np2rdarray(sink, no_data=0, projection=projection, geotransform=geotransform)
    rd.SaveGDAL(out_sink, sink)
    # del sink

    print("Saving refined dem ...")
    dem_refined = dem_filled
    dem_refined[region > 0] = dem[region > 0]
    dem_refined = np2rdarray(
        dem_refined, no_data=no_data, projection=projection, geotransform=geotransform
    )
    dem_refined[np.isnan(dem)] = np.nan
    rd.SaveGDAL(out_dem, dem_refined)
    rd.SaveGDAL(out_region, region)
    del dem_refined, region, dem

    print("Converting raster to vector ...")
    polygonize(out_region, out_vec_file)

    gdf = join_csv_to_gdf(out_vec_file, out_csv_file, "id", "region_id")
    gdf.drop(columns=["id"], inplace=True)
    gdf.to_file(out_gpkg, driver="GPKG")

    if not keep_files:
        for file in [
            out_dem,
            out_dem_diff,
            out_depth,
            out_sink,
            out_dem_filled,
            out_region,
            out_csv_file,
        ]:
            if os.path.exists(file):
                os.remove(file)

        out_vec_file_dbf = os.path.splitext(out_vec_file)[0] + ".dbf"
        out_vec_file_shx = os.path.splitext(out_vec_file)[0] + ".shx"
        out_csv_file_prj = os.path.splitext(out_vec_file)[0] + ".prj"
        for file in [
            out_vec_file,
            out_vec_file_dbf,
            out_vec_file_shx,
            out_csv_file_prj,
        ]:
            if os.path.exists(file):
                os.remove(file)

    end_time = time.time()
    print("Total run time:\t\t\t {:.4f} s\n".format(end_time - start_time))

    return out_sink

`GaussianFilter(in_dem, sigma=1, out_file=None)` ¶

Applies a Gaussian filter to an image.

Parameters:

Name	Type	Description	Default
`in_dem`	`str`	File path to the input image.	required
`sigma`	`int`	Standard deviation. Defaults to 1.	`1`
`out_file`	`str`	File path to the output image. Defaults to None.	`None`

Returns:

Type	Description
	np.array: The numpy array containing the filtered image.

Source code in lidar/filtering.py

def GaussianFilter(in_dem, sigma=1, out_file=None):
    """Applies a Gaussian filter to an image.

    Args:
        in_dem (str): File path to the input image.
        sigma (int, optional): Standard deviation. Defaults to 1.
        out_file (str, optional): File path to the output image. Defaults to None.

    Returns:
        np.array: The numpy array containing the filtered image.
    """
    print("Gaussian filtering ...")
    start_time = time.time()
    dem = rd.LoadGDAL(in_dem)
    no_data = dem.no_data
    projection = dem.projection
    geotransform = dem.geotransform

    gau = ndimage.gaussian_filter(dem, sigma=sigma)
    gau = np2rdarray(gau, no_data, projection, geotransform)
    print("Run time: {:.4f} seconds".format(time.time() - start_time))

    if out_file is not None:
        print("Saving dem ...")
        rd.SaveGDAL(out_file, gau)
        return out_file

    return gau

`MeanFilter(in_dem, kernel_size=3, out_file=None)` ¶

Applies a mean filter to an image.

Parameters:

Name	Type	Description	Default
`in_dem`	`str`	File path to the input image.	required
`kernel_size`	`int`	The size of the moving window. Defaults to 3.	`3`
`out_file`	`str`	File path to the output image. Defaults to None.	`None`

Returns:

Type	Description
	np.array: The numpy array containing the filtered image.

Source code in lidar/filtering.py

def MeanFilter(in_dem, kernel_size=3, out_file=None):
    """Applies a mean filter to an image.

    Args:
        in_dem (str): File path to the input image.
        kernel_size (int, optional): The size of the moving window. Defaults to 3.
        out_file (str, optional): File path to the output image. Defaults to None.

    Returns:
        np.array: The numpy array containing the filtered image.
    """
    print("Mean filtering ...")
    start_time = time.time()
    dem = rd.LoadGDAL(in_dem)
    no_data = dem.no_data
    projection = dem.projection
    geotransform = dem.geotransform

    weights = np.full((kernel_size, kernel_size), 1.0 / (kernel_size * kernel_size))
    mean = ndimage.filters.convolve(dem, weights)
    mean = np2rdarray(mean, no_data, projection, geotransform)
    print("Run time: {:.4f} seconds".format(time.time() - start_time))

    if out_file is not None:
        print("Saving dem ...")
        rd.SaveGDAL(out_file, mean)
        return out_file

    return mean

`MedianFilter(in_dem, kernel_size=3, out_file=None)` ¶

Applies a median filter to an image.

Parameters:

Name	Type	Description	Default
`in_dem`	`str`	File path to the input image.	required
`kernel_size`	`int`	The size of the moving window. Defaults to 3.	`3`
`out_file`	`str`	File path to the output image. Defaults to None.	`None`

Returns:

Type	Description
	np.array: The numpy array containing the filtered image.

Source code in lidar/filtering.py

def MedianFilter(in_dem, kernel_size=3, out_file=None):
    """Applies a median filter to an image.

    Args:
        in_dem (str): File path to the input image.
        kernel_size (int, optional): The size of the moving window. Defaults to 3.
        out_file (str, optional): File path to the output image. Defaults to None.

    Returns:
        np.array: The numpy array containing the filtered image.
    """
    print("Median filtering ...")
    start_time = time.time()
    dem = rd.LoadGDAL(in_dem)
    no_data = dem.no_data
    projection = dem.projection
    geotransform = dem.geotransform

    med = ndimage.median_filter(dem, size=kernel_size)
    med = np2rdarray(med, no_data, projection, geotransform)
    print("Run time: {:.4f} seconds".format(time.time() - start_time))

    if out_file is not None:
        print("Saving dem ...")
        rd.SaveGDAL(out_file, med)
        return out_file

    return med

`add_crs(filename, epsg)` ¶

Add a CRS to a raster dataset.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The filename of the raster dataset.	required
`epsg`	`int \| str`	The EPSG code of the CRS.	required

Source code in lidar/common.py

def add_crs(filename, epsg):
    """Add a CRS to a raster dataset.

    Args:
        filename (str): The filename of the raster dataset.
        epsg (int | str): The EPSG code of the CRS.

    """
    try:
        import rasterio
    except ImportError:
        raise ImportError(
            "rasterio is required for adding a CRS to a raster. Please install it using 'pip install rasterio'."
        )

    if not os.path.exists(filename):
        raise ValueError("filename must exist.")

    if isinstance(epsg, int):
        epsg = f"EPSG:{epsg}"
    elif isinstance(epsg, str):
        epsg = "EPSG:" + epsg
    else:
        raise ValueError("epsg must be an integer or string.")

    crs = rasterio.crs.CRS({"init": epsg})
    with rasterio.open(filename, mode="r+") as src:
        src.crs = crs

`check_file_path(file_path, make_dirs=True)` ¶

Gets the absolute file path.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	The path to the file.	required
`make_dirs`	`bool`	Whether to create the directory if it does not exist. Defaults to True.	`True`

Raises:

Type	Description
`FileNotFoundError`	If the directory could not be found.
`TypeError`	If the input directory path is not a string.

Returns:

Name	Type	Description
`str`		The absolute path to the file.

Source code in lidar/common.py

def check_file_path(file_path, make_dirs=True):
    """Gets the absolute file path.

    Args:
        file_path (str): The path to the file.
        make_dirs (bool, optional): Whether to create the directory if it does not exist. Defaults to True.

    Raises:
        FileNotFoundError: If the directory could not be found.
        TypeError: If the input directory path is not a string.

    Returns:
        str: The absolute path to the file.
    """
    if isinstance(file_path, str):
        if file_path.startswith("~"):
            file_path = os.path.expanduser(file_path)
        else:
            file_path = os.path.abspath(file_path)

        file_dir = os.path.dirname(file_path)
        if not os.path.exists(file_dir) and make_dirs:
            os.makedirs(file_dir)

        return file_path

    else:
        raise TypeError("The provided file path must be a string.")

`check_install(package)` ¶

Checks whether a package is installed. If not, it will install the package.

Parameters:

Name	Type	Description	Default
`package`	`str`	The name of the package to check.	required

Source code in lidar/common.py

def check_install(package):
    """Checks whether a package is installed. If not, it will install the package.

    Args:
        package (str): The name of the package to check.
    """
    import subprocess

    try:
        __import__(package)
        # print('{} is already installed.'.format(package))
    except ImportError:
        print("{} is not installed. Installing ...".format(package))
        try:
            subprocess.check_call(["python", "-m", "pip", "install", package])
        except Exception as e:
            print("Failed to install {}".format(package))
            print(e)
        print("{} has been installed successfully.".format(package))

`clip_image(image, mask, output)` ¶

Clip an image by mask.

Parameters:

Name	Type	Description	Default
`image`	`str`	Path to the image file in GeoTIFF format.	required
`mask`	`str \| list \| dict`	The mask used to extract the image. It can be a path to vector datasets (e.g., GeoJSON, Shapefile), a list of coordinates, or m.user_roi.	required
`output`	`str`	Path to the output file.	required

Raises:

Type	Description
`ImportError`	If the fiona or rasterio package is not installed.
`FileNotFoundError`	If the image is not found.
`ValueError`	If the mask is not a valid GeoJSON or raster file.
`FileNotFoundError`	If the mask file is not found.

Source code in lidar/common.py

def clip_image(image, mask, output):
    """Clip an image by mask.

    Args:
        image (str): Path to the image file in GeoTIFF format.
        mask (str | list | dict): The mask used to extract the image. It can be a path to vector datasets (e.g., GeoJSON, Shapefile), a list of coordinates, or m.user_roi.
        output (str): Path to the output file.

    Raises:
        ImportError: If the fiona or rasterio package is not installed.
        FileNotFoundError: If the image is not found.
        ValueError: If the mask is not a valid GeoJSON or raster file.
        FileNotFoundError: If the mask file is not found.
    """
    import json

    try:
        import fiona
        import rasterio
        import rasterio.mask
    except ImportError as e:
        raise ImportError(e)

    if not os.path.exists(image):
        raise FileNotFoundError(f"{image} does not exist.")

    if not output.endswith(".tif"):
        raise ValueError("Output must be a tif file.")

    output = check_file_path(output)

    if isinstance(mask, str):
        if mask.startswith("http"):
            mask = download_file(mask, output)
        if not os.path.exists(mask):
            raise FileNotFoundError(f"{mask} does not exist.")
    elif isinstance(mask, list) or isinstance(mask, dict):

        if isinstance(mask, list):
            geojson = {
                "type": "FeatureCollection",
                "features": [
                    {
                        "type": "Feature",
                        "properties": {},
                        "geometry": {"type": "Polygon", "coordinates": [mask]},
                    }
                ],
            }
        else:
            geojson = {
                "type": "FeatureCollection",
                "features": [mask],
            }
        mask = temp_file_path(".geojson")
        with open(mask, "w") as f:
            json.dump(geojson, f)

    with fiona.open(mask, "r") as shapefile:
        shapes = [feature["geometry"] for feature in shapefile]

    with rasterio.open(image) as src:
        out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
        out_meta = src.meta

    out_meta.update(
        {
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
        }
    )

    with rasterio.open(output, "w", **out_meta) as dest:
        dest.write(out_image)

`clone_repo(out_dir='.', unzip=True)` ¶

Clones the lidar GitHub repository.

Parameters:

Name	Type	Description	Default
`out_dir`	`str`	Output folder for the repo. Defaults to '.'.	`'.'`
`unzip`	`bool`	Whether to unzip the repository. Defaults to True.	`True`

Source code in lidar/common.py

def clone_repo(out_dir=".", unzip=True):
    """Clones the lidar GitHub repository.

    Args:
        out_dir (str, optional): Output folder for the repo. Defaults to '.'.
        unzip (bool, optional): Whether to unzip the repository. Defaults to True.
    """
    url = "https://github.com/opengeos/lidar/archive/master.zip"
    filename = "lidar-master.zip"
    download_from_url(url, out_file_name=filename, out_dir=out_dir, unzip=unzip)

`convert_lidar(source, destination=None, point_format_id=None, file_version=None, **kwargs)` ¶

Converts a Las from one point format to another Automatically upgrades the file version if source file version is not compatible with the new point_format_id

Parameters:

Name	Type	Description	Default
`source`	`str \| LasBase`	The source data to be converted.	required
`destination`	`str`	The destination file path. Defaults to None.	`None`
`point_format_id`	`int`	The new point format id (the default is None, which won't change the source format id).	`None`
`file_version`	`str`	The new file version. None by default which means that the file_version may be upgraded for compatibility with the new point_format. The file version will not be downgraded.	`None`

Returns:

Type	Description
	aspy.lasdatas.base.LasBase: The converted LasData object.

Source code in lidar/common.py

def convert_lidar(
    source, destination=None, point_format_id=None, file_version=None, **kwargs
):
    """Converts a Las from one point format to another Automatically upgrades the file version if source file version
        is not compatible with the new point_format_id

    Args:
        source (str | laspy.lasdatas.base.LasBase): The source data to be converted.
        destination (str, optional): The destination file path. Defaults to None.
        point_format_id (int, optional): The new point format id (the default is None, which won't change the source format id).
        file_version (str, optional): The new file version. None by default which means that the file_version may be upgraded
            for compatibility with the new point_format. The file version will not be downgraded.

    Returns:
        aspy.lasdatas.base.LasBase: The converted LasData object.
    """
    try:
        import laspy
    except ImportError:
        print(
            "The laspy package is required for this function. Use `pip install laspy[lazrs,laszip]` to install it."
        )
        return

    if isinstance(source, str):
        source = read_lidar(source)

    las = laspy.convert(
        source, point_format_id=point_format_id, file_version=file_version
    )

    if destination is None:
        return las
    else:
        destination = check_file_path(destination)
        write_lidar(las, destination, **kwargs)
        return destination

`csv_points_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude')` ¶

Converts a csv file containing points (latitude, longitude) into a shapefile.

Parameters:

Name	Type	Description	Default
`in_csv`	`str`	File path or HTTP URL to the input csv file. For example, https://raw.githubusercontent.com/giswqs/data/main/world/world_cities.csv	required
`out_shp`	`str`	File path to the output shapefile.	required
`latitude`	`str`	Column name for the latitude column. Defaults to 'latitude'.	`'latitude'`
`longitude`	`str`	Column name for the longitude column. Defaults to 'longitude'.	`'longitude'`

Source code in lidar/common.py

def csv_points_to_shp(in_csv, out_shp, latitude="latitude", longitude="longitude"):
    """Converts a csv file containing points (latitude, longitude) into a shapefile.

    Args:
        in_csv (str): File path or HTTP URL to the input csv file. For example, https://raw.githubusercontent.com/giswqs/data/main/world/world_cities.csv
        out_shp (str): File path to the output shapefile.
        latitude (str, optional): Column name for the latitude column. Defaults to 'latitude'.
        longitude (str, optional): Column name for the longitude column. Defaults to 'longitude'.

    """
    import whitebox

    if in_csv.startswith("http") and in_csv.endswith(".csv"):
        out_dir = os.path.join(os.path.expanduser("~"), "Downloads")
        out_name = os.path.basename(in_csv)

        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        download_from_url(in_csv, out_dir=out_dir)
        in_csv = os.path.join(out_dir, out_name)

    wbt = whitebox.WhiteboxTools()
    in_csv = os.path.abspath(in_csv)
    out_shp = os.path.abspath(out_shp)

    if not os.path.exists(in_csv):
        raise Exception("The provided csv file does not exist.")

    with open(in_csv, encoding="utf-8") as csv_file:
        reader = csv.DictReader(csv_file)
        fields = reader.fieldnames
        xfield = fields.index(longitude)
        yfield = fields.index(latitude)

    wbt.csv_points_to_vector(in_csv, out_shp, xfield=xfield, yfield=yfield, epsg=4326)

`csv_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude')` ¶

Converts a csv file with latlon info to a point shapefile.

Parameters:

Name	Type	Description	Default
`in_csv`	`str`	The input csv file containing longitude and latitude columns.	required
`out_shp`	`str`	The file path to the output shapefile.	required
`latitude`	`str`	The column name of the latitude column. Defaults to 'latitude'.	`'latitude'`
`longitude`	`str`	The column name of the longitude column. Defaults to 'longitude'.	`'longitude'`

Source code in lidar/common.py

def csv_to_shp(in_csv, out_shp, latitude="latitude", longitude="longitude"):
    """Converts a csv file with latlon info to a point shapefile.

    Args:
        in_csv (str): The input csv file containing longitude and latitude columns.
        out_shp (str): The file path to the output shapefile.
        latitude (str, optional): The column name of the latitude column. Defaults to 'latitude'.
        longitude (str, optional): The column name of the longitude column. Defaults to 'longitude'.
    """
    import csv
    import shapefile as shp

    if in_csv.startswith("http") and in_csv.endswith(".csv"):
        out_dir = os.path.join(os.path.expanduser("~"), "Downloads")
        out_name = os.path.basename(in_csv)

        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        download_from_url(in_csv, out_dir=out_dir)
        in_csv = os.path.join(out_dir, out_name)

    out_dir = os.path.dirname(out_shp)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    try:
        points = shp.Writer(out_shp, shapeType=shp.POINT)
        with open(in_csv, encoding="utf-8") as csvfile:
            csvreader = csv.DictReader(csvfile)
            header = csvreader.fieldnames
            [points.field(field) for field in header]
            for row in csvreader:
                points.point((float(row[longitude])), (float(row[latitude])))
                points.record(*tuple([row[f] for f in header]))

        out_prj = out_shp.replace(".shp", ".prj")
        with open(out_prj, "w") as f:
            prj_str = 'GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.0174532925199433]] '
            f.write(prj_str)

    except Exception as e:
        print(e)

`download_file(url=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, verify=True, id=None, fuzzy=False, resume=False, unzip=True, overwrite=False)` ¶

Download a file from URL, including Google Drive shared URL.

Parameters:

Name	Type	Description	Default
`url`	`str`	Google Drive URL is also supported. Defaults to None.	`None`
`output`	`str`	Output filename. Default is basename of URL.	`None`
`quiet`	`bool`	Suppress terminal output. Default is False.	`False`
`proxy`	`str`	Proxy. Defaults to None.	`None`
`speed`	`float`	Download byte size per second (e.g., 256KB/s = 256 * 1024). Defaults to None.	`None`
`use_cookies`	`bool`	Flag to use cookies. Defaults to True.	`True`
`verify`	`bool \| str`	Either a bool, in which case it controls whether the server's TLS certificate is verified, or a string, in which case it must be a path to a CA bundle to use. Default is True.. Defaults to True.	`True`
`id`	`str`	Google Drive's file ID. Defaults to None.	`None`
`fuzzy`	`bool`	Fuzzy extraction of Google Drive's file Id. Defaults to False.	`False`
`resume`	`bool`	Resume the download from existing tmp file if possible. Defaults to False.	`False`
`unzip`	`bool`	Unzip the file. Defaults to True.	`True`
`overwrite`	`bool`	Overwrite the file if it already exists. Defaults to False.	`False`

Returns:

Name	Type	Description
`str`		The output file path.

Source code in lidar/common.py

def download_file(
    url=None,
    output=None,
    quiet=False,
    proxy=None,
    speed=None,
    use_cookies=True,
    verify=True,
    id=None,
    fuzzy=False,
    resume=False,
    unzip=True,
    overwrite=False,
):
    """Download a file from URL, including Google Drive shared URL.

    Args:
        url (str, optional): Google Drive URL is also supported. Defaults to None.
        output (str, optional): Output filename. Default is basename of URL.
        quiet (bool, optional): Suppress terminal output. Default is False.
        proxy (str, optional): Proxy. Defaults to None.
        speed (float, optional): Download byte size per second (e.g., 256KB/s = 256 * 1024). Defaults to None.
        use_cookies (bool, optional): Flag to use cookies. Defaults to True.
        verify (bool | str, optional): Either a bool, in which case it controls whether the server's TLS certificate is verified, or a string, in which case it must be a path to a CA bundle to use. Default is True.. Defaults to True.
        id (str, optional): Google Drive's file ID. Defaults to None.
        fuzzy (bool, optional): Fuzzy extraction of Google Drive's file Id. Defaults to False.
        resume (bool, optional): Resume the download from existing tmp file if possible. Defaults to False.
        unzip (bool, optional): Unzip the file. Defaults to True.
        overwrite (bool, optional): Overwrite the file if it already exists. Defaults to False.

    Returns:
        str: The output file path.
    """

    import gdown

    if output is None:
        if isinstance(url, str) and url.startswith("http"):
            output = os.path.basename(url)

    if isinstance(url, str):
        if os.path.exists(os.path.abspath(output)) and (not overwrite):
            print(
                f"{output} already exists. Skip downloading. Set overwrite=True to overwrite."
            )
            return os.path.abspath(output)
        else:
            url = github_raw_url(url)

    if "https://drive.google.com/file/d/" in url:
        fuzzy = True

    output = gdown.download(
        url, output, quiet, proxy, speed, use_cookies, verify, id, fuzzy, resume
    )

    if unzip and output.endswith(".zip"):

        with zipfile.ZipFile(output, "r") as zip_ref:
            if not quiet:
                print("Extracting files...")
            zip_ref.extractall(os.path.dirname(output))

    return os.path.abspath(output)

`download_folder(url=None, id=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, remaining_ok=False)` ¶

Downloads the entire folder from URL.

Parameters:

Name	Type	Description	Default
`url`	`str`	URL of the Google Drive folder. Must be of the format 'https://drive.google.com/drive/folders/{url}'. Defaults to None.	`None`
`id`	`str`	Google Drive's folder ID. Defaults to None.	`None`
`output`	`str`	String containing the path of the output folder. Defaults to current working directory.	`None`
`quiet`	`bool`	Suppress terminal output. Defaults to False.	`False`
`proxy`	`str`	Proxy. Defaults to None.	`None`
`speed`	`float`	Download byte size per second (e.g., 256KB/s = 256 * 1024). Defaults to None.	`None`
`use_cookies`	`bool`	Flag to use cookies. Defaults to True.	`True`
`resume`	`bool`	Resume the download from existing tmp file if possible. Defaults to False.	required

Returns:

Name	Type	Description
`list`		List of files downloaded, or None if failed.

Source code in lidar/common.py

def download_folder(
    url=None,
    id=None,
    output=None,
    quiet=False,
    proxy=None,
    speed=None,
    use_cookies=True,
    remaining_ok=False,
):
    """Downloads the entire folder from URL.

    Args:
        url (str, optional): URL of the Google Drive folder. Must be of the format 'https://drive.google.com/drive/folders/{url}'. Defaults to None.
        id (str, optional): Google Drive's folder ID. Defaults to None.
        output (str, optional):  String containing the path of the output folder. Defaults to current working directory.
        quiet (bool, optional): Suppress terminal output. Defaults to False.
        proxy (str, optional): Proxy. Defaults to None.
        speed (float, optional): Download byte size per second (e.g., 256KB/s = 256 * 1024). Defaults to None.
        use_cookies (bool, optional): Flag to use cookies. Defaults to True.
        resume (bool, optional): Resume the download from existing tmp file if possible. Defaults to False.

    Returns:
        list: List of files downloaded, or None if failed.
    """
    import gdown

    files = gdown.download_folder(
        url, id, output, quiet, proxy, speed, use_cookies, remaining_ok
    )
    return files

`download_from_gdrive(gfile_url, file_name, out_dir='.', unzip=True, verbose=True)` ¶

Download a file shared via Google Drive (e.g., https://drive.google.com/file/d/18SUo_HcDGltuWYZs1s7PpOmOq_FvFn04/view?usp=sharing)

Parameters:

Name	Type	Description	Default
`gfile_url`	`str`	The Google Drive shared file URL	required
`file_name`	`str`	The output file name to use.	required
`out_dir`	`str`	The output directory. Defaults to '.'.	`'.'`
`unzip`	`bool`	Whether to unzip the output file if it is a zip file. Defaults to True.	`True`
`verbose`	`bool`	Whether to display or not the output of the function	`True`

Source code in lidar/common.py

def download_from_gdrive(gfile_url, file_name, out_dir=".", unzip=True, verbose=True):
    """Download a file shared via Google Drive
       (e.g., https://drive.google.com/file/d/18SUo_HcDGltuWYZs1s7PpOmOq_FvFn04/view?usp=sharing)

    Args:
        gfile_url (str): The Google Drive shared file URL
        file_name (str): The output file name to use.
        out_dir (str, optional): The output directory. Defaults to '.'.
        unzip (bool, optional): Whether to unzip the output file if it is a zip file. Defaults to True.
        verbose (bool, optional): Whether to display or not the output of the function
    """
    try:
        from google_drive_downloader import GoogleDriveDownloader as gdd
    except ImportError:
        print("GoogleDriveDownloader package not installed. Installing ...")
        subprocess.check_call(
            ["python", "-m", "pip", "install", "googledrivedownloader"]
        )
        from google_drive_downloader import GoogleDriveDownloader as gdd

    file_id = gfile_url.split("/")[5]
    if verbose:
        print("Google Drive file id: {}".format(file_id))

    dest_path = os.path.join(out_dir, file_name)
    gdd.download_file_from_google_drive(file_id, dest_path, True, unzip)

    return

`download_from_url(url, out_file_name=None, out_dir='.', unzip=True, verbose=True)` ¶

Download a file from a URL (e.g., https://github.com/giswqs/whitebox/raw/master/examples/testdata.zip)

Parameters:

Name	Type	Description	Default
`url`	`str`	The HTTP URL to download.	required
`out_file_name`	`str`	The output file name to use. Defaults to None.	`None`
`out_dir`	`str`	The output directory to use. Defaults to '.'.	`'.'`
`unzip`	`bool`	Whether to unzip the downloaded file if it is a zip file. Defaults to True.	`True`
`verbose`	`bool`	Whether to display or not the output of the function	`True`

Source code in lidar/common.py

def download_from_url(url, out_file_name=None, out_dir=".", unzip=True, verbose=True):
    """Download a file from a URL (e.g., https://github.com/giswqs/whitebox/raw/master/examples/testdata.zip)

    Args:
        url (str): The HTTP URL to download.
        out_file_name (str, optional): The output file name to use. Defaults to None.
        out_dir (str, optional): The output directory to use. Defaults to '.'.
        unzip (bool, optional): Whether to unzip the downloaded file if it is a zip file. Defaults to True.
        verbose (bool, optional): Whether to display or not the output of the function
    """
    in_file_name = os.path.basename(url)

    if out_file_name is None:
        out_file_name = in_file_name
    out_file_path = os.path.join(os.path.abspath(out_dir), out_file_name)

    if verbose:
        print("Downloading {} ...".format(url))

    try:
        urllib.request.urlretrieve(url, out_file_path)
    except Exception:
        raise Exception("The URL is invalid. Please double check the URL.")

    final_path = out_file_path

    if unzip:
        # if it is a zip file
        if ".zip" in out_file_name:
            if verbose:
                print("Unzipping {} ...".format(out_file_name))
            with zipfile.ZipFile(out_file_path, "r") as zip_ref:
                zip_ref.extractall(out_dir)
            final_path = os.path.join(
                os.path.abspath(out_dir), out_file_name.replace(".zip", "")
            )

        # if it is a tar file
        if ".tar" in out_file_name:
            if verbose:
                print("Unzipping {} ...".format(out_file_name))
            with tarfile.open(out_file_path, "r") as tar_ref:

                def is_within_directory(directory, target):

                    abs_directory = os.path.abspath(directory)
                    abs_target = os.path.abspath(target)

                    prefix = os.path.commonprefix([abs_directory, abs_target])

                    return prefix == abs_directory

                def safe_extract(tar, path=".", members=None, *, numeric_owner=False):

                    for member in tar.getmembers():
                        member_path = os.path.join(path, member.name)
                        if not is_within_directory(path, member_path):
                            raise Exception("Attempted Path Traversal in Tar File")

                    tar.extractall(path, members, numeric_owner=numeric_owner)

                safe_extract(tar_ref, out_dir)
            final_path = os.path.join(
                os.path.abspath(out_dir), out_file_name.replace(".tart", "")
            )

    if verbose:
        print("Data downloaded to: {}".format(final_path))

    return

`download_ned_by_bbox(bbox, datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs)` ¶

Download the US National Elevation Datasets (NED) for a bounding box. See https://apps.nationalmap.gov/tnmaccess/#/ for more information.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	The bounding box in the form [xmin, ymin, xmax, ymax].	required
`huc_type`	`str`	The HUC type, e.g., huc2, huc4, huc8. Defaults to "huc8".	required
`datasets`	`str`	Comma-delimited list of valid dataset tag names. The commonly used datasets include: Digital Elevation Model (DEM) 1 meter National Elevation Dataset (NED) 1/3 arc-second Current National Elevation Dataset (NED) 1/9 arc-second Current National Elevation Dataset (NED) 1 arc-second Current For more information, see https://apps.nationalmap.gov/tnmaccess/#/product Defaults to None, which will be the (NED) 1/3 arc-second	`None`
`out_dir`	`str`	The output directory. Defaults to None, which will use the current working directory.	`None`
`return_url`	`bool`	If True, the URL will be returned instead of downloading the data. Defaults to False.	`False`
`download_args`	`dict`	The download arguments to be passed to the download_file function. Defaults to {}.	`{}`

Returns:

Name	Type	Description
`list`		The list of downloaded files.

Source code in lidar/common.py

def download_ned_by_bbox(
    bbox,
    datasets=None,
    out_dir=None,
    return_url=False,
    download_args={},
    **kwargs,
):
    """Download the US National Elevation Datasets (NED) for a bounding box. See https://apps.nationalmap.gov/tnmaccess/#/ for more information.

    Args:
        bbox (list): The bounding box in the form [xmin, ymin, xmax, ymax].
        huc_type (str, optional): The HUC type, e.g., huc2, huc4, huc8. Defaults to "huc8".
        datasets (str, optional): Comma-delimited list of valid dataset tag names. The commonly used datasets include:
            Digital Elevation Model (DEM) 1 meter
            National Elevation Dataset (NED) 1/3 arc-second Current
            National Elevation Dataset (NED) 1/9 arc-second Current
            National Elevation Dataset (NED) 1 arc-second Current
            For more information, see https://apps.nationalmap.gov/tnmaccess/#/product
            Defaults to None, which will be the (NED) 1/3 arc-second
        out_dir (str, optional): The output directory. Defaults to None, which will use the current working directory.
        return_url (bool, optional): If True, the URL will be returned instead of downloading the data. Defaults to False.
        download_args (dict, optional): The download arguments to be passed to the download_file function. Defaults to {}.

    Returns:
        list: The list of downloaded files.
    """

    import requests

    endpoint = "https://tnmaccess.nationalmap.gov/api/v1/products?"

    if datasets is None:
        datasets = "National Elevation Dataset (NED) 1/3 arc-second Current"

    if out_dir is None:
        out_dir = os.getcwd()

    if isinstance(bbox, list):
        bbox = ",".join([str(x) for x in bbox])

    kwargs["datasets"] = datasets
    kwargs["bbox"] = bbox

    result = requests.get(endpoint, params=kwargs).json()
    if "errorMessage" in result:
        raise ValueError(result["errorMessage"])
    else:
        links = [x["downloadURL"] for x in result["items"]]
        for index, link in enumerate(links):
            if "historical" in link:
                link = link.replace("historical", "current")[:-13] + ".tif"
                links[index] = link

    if return_url:
        return links
    else:
        for index, link in enumerate(links):

            r = requests.head(link)
            if r.status_code == 200:
                filepath = os.path.join(out_dir, os.path.basename(link))
                print(
                    f"Downloading {index + 1} of {len(links)}: {os.path.basename(link)}"
                )
                download_file(link, filepath, **download_args)
            else:
                print(f"{link} does not exist.")

`download_ned_by_huc(huc_id, huc_type='huc8', datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs)` ¶

Download the US National Elevation Datasets (NED) for a Hydrologic Unit region. See https://apps.nationalmap.gov/tnmaccess/#/ for more information.

Parameters:

Name	Type	Description	Default
`huc_id`	`str`	The HUC ID, for example, "01010002"	required
`huc_type`	`str`	The HUC type, e.g., huc2, huc4, huc8. Defaults to "huc8".	`'huc8'`
`datasets`	`str`	Comma-delimited list of valid dataset tag names. The commonly used datasets include: Digital Elevation Model (DEM) 1 meter National Elevation Dataset (NED) 1/3 arc-second Current National Elevation Dataset (NED) 1/9 arc-second Current National Elevation Dataset (NED) 1 arc-second Current For more information, see https://apps.nationalmap.gov/tnmaccess/#/product Defaults to None, which will be the (NED) 1/3 arc-second	`None`
`out_dir`	`str`	The output directory. Defaults to None, which will use the current working directory.	`None`
`return_url`	`bool`	If True, the URL will be returned instead of downloading the data. Defaults to False.	`False`
`download_args`	`dict`	The download arguments to be passed to the download_file function. Defaults to {}.	`{}`

Returns:

Name	Type	Description
`list`		The list of downloaded files.

Source code in lidar/common.py

def download_ned_by_huc(
    huc_id,
    huc_type="huc8",
    datasets=None,
    out_dir=None,
    return_url=False,
    download_args={},
    **kwargs,
):
    """Download the US National Elevation Datasets (NED) for a Hydrologic Unit region. See https://apps.nationalmap.gov/tnmaccess/#/ for more information.

    Args:
        huc_id (str): The HUC ID, for example, "01010002"
        huc_type (str, optional): The HUC type, e.g., huc2, huc4, huc8. Defaults to "huc8".
        datasets (str, optional): Comma-delimited list of valid dataset tag names. The commonly used datasets include:
            Digital Elevation Model (DEM) 1 meter
            National Elevation Dataset (NED) 1/3 arc-second Current
            National Elevation Dataset (NED) 1/9 arc-second Current
            National Elevation Dataset (NED) 1 arc-second Current
            For more information, see https://apps.nationalmap.gov/tnmaccess/#/product
            Defaults to None, which will be the (NED) 1/3 arc-second
        out_dir (str, optional): The output directory. Defaults to None, which will use the current working directory.
        return_url (bool, optional): If True, the URL will be returned instead of downloading the data. Defaults to False.
        download_args (dict, optional): The download arguments to be passed to the download_file function. Defaults to {}.

    Returns:
        list: The list of downloaded files.
    """

    import requests

    endpoint = "https://tnmaccess.nationalmap.gov/api/v1/products?"

    if datasets is None:
        datasets = "National Elevation Dataset (NED) 1/3 arc-second Current"

    if out_dir is None:
        out_dir = os.getcwd()

    kwargs["datasets"] = datasets
    kwargs["polyType"] = huc_type
    kwargs["polyCode"] = huc_id

    result = requests.get(endpoint, params=kwargs).json()
    if "errorMessage" in result:
        raise ValueError(result["errorMessage"])
    else:
        links = [x["downloadURL"] for x in result["items"]]
        for index, link in enumerate(links):
            if "historical" in link:
                link = link.replace("historical", "current")[:-13] + ".tif"
                links[index] = link

    if return_url:
        return links
    else:
        for index, link in enumerate(links):

            r = requests.head(link)
            if r.status_code == 200:
                filepath = os.path.join(out_dir, os.path.basename(link))
                print(
                    f"Downloading {index + 1} of {len(links)}: {os.path.basename(link)}"
                )
                download_file(link, filepath, **download_args)
            else:
                print(f"{link} does not exist.")

`extract_sinks_by_bbox(bbox, filename, min_size=10, tmp_dir=None, mask=None, crs='EPSG:5070', kernel_size=3, resolution=10, to_cog=False, keep_files=True, ignore_warnings=True)` ¶

Extract sinks from a DEM by HUC8.

Parameters:

Name	Type	Description	Default
`bbox`	`list`	The bounding box in the form of [minx, miny, maxx, maxy].	required
`filename`	`str`	The output depression file name.	required
`min_size`	`int`	The minimum number of pixels to be considered as a sink. Defaults to 10.	`10`
`tmp_dir`	`str`	The temporary directory. Defaults to None, e.g., using the current directory.	`None`
`mask`	`str`	The mask file path. Defaults to None.	`None`
`crs`	`str`	The coordinate reference system. Defaults to "EPSG:5070".	`'EPSG:5070'`
`kernel_size`	`int`	The kernel size for smoothing the DEM. Defaults to 3.	`3`
`resolution`	`int`	The resolution of the DEM. Defaults to 10.	`10`
`to_cog`	`bool`	Whether to convert the output to COG. Defaults to False.	`False`
`keep_files`	`bool`	Whether to keep the intermediate files. Defaults to True.	`True`
`ignore_warnings`	`bool`	Whether to ignore warnings. Defaults to True.	`True`

Source code in lidar/filling.py

def extract_sinks_by_bbox(
    bbox,
    filename,
    min_size=10,
    tmp_dir=None,
    mask=None,
    crs="EPSG:5070",
    kernel_size=3,
    resolution=10,
    to_cog=False,
    keep_files=True,
    ignore_warnings=True,
):
    """Extract sinks from a DEM by HUC8.

    Args:
        bbox (list): The bounding box in the form of [minx, miny, maxx, maxy].
        filename (str, optional): The output depression file name.
        min_size (int, optional): The minimum number of pixels to be considered as a sink. Defaults to 10.
        tmp_dir (str, optional): The temporary directory. Defaults to None, e.g., using the current directory.
        mask (str, optional): The mask file path. Defaults to None.
        crs (str, optional): The coordinate reference system. Defaults to "EPSG:5070".
        kernel_size (int, optional): The kernel size for smoothing the DEM. Defaults to 3.
        resolution (int, optional): The resolution of the DEM. Defaults to 10.
        to_cog (bool, optional): Whether to convert the output to COG. Defaults to False.
        keep_files (bool, optional): Whether to keep the intermediate files. Defaults to True.
        ignore_warnings (bool, optional): Whether to ignore warnings. Defaults to True.
    """
    import shutil
    import warnings

    if ignore_warnings:
        warnings.filterwarnings("ignore")

    start_time = time.time()

    if not filename.endswith(".shp"):
        filename = filename + ".shp"

    filename = os.path.abspath(filename)

    if tmp_dir is None:
        tmp_dir = os.path.join(os.getcwd(), "tmp")

    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    merge = os.path.join(tmp_dir, "mosaic.tif")
    clip = os.path.join(tmp_dir, "clip.tif")
    reproj = os.path.join(tmp_dir, "reproj.tif")
    image = os.path.join(tmp_dir, "image.tif")
    median = os.path.join(tmp_dir, "median.tif")
    regions = os.path.join(tmp_dir, "regions.shp")
    regions_info = os.path.join(tmp_dir, "regions_info.csv")

    try:
        download_ned_by_bbox(bbox, out_dir=tmp_dir)

        if not os.path.exists(merge):
            print("Merging NED tiles ...")
            mosaic(tmp_dir, merge)

        if mask is not None:
            clip_image(merge, mask, clip)
        else:
            clip = merge

        reproject_image(clip, reproj, crs)
        resample(reproj, image, resolution)
        MedianFilter(image, kernel_size, median)
        if to_cog:
            image_to_cog(median, median)
        ExtractSinks(median, min_size, tmp_dir)
        join_tables(regions, regions_info, filename)

        for file in [merge, clip, reproj, image]:
            if os.path.exists(file):
                os.remove(file)

        if not keep_files:
            shutil.rmtree(tmp_dir)
    except Exception as e:
        print(e)
        return None

    end_time = time.time()
    print("Total run time:\t\t\t {:.4f} s\n".format(end_time - start_time))

`extract_sinks_by_huc8(huc8, min_size=10, filename=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=True, error_file=None, ignore_warnings=True)` ¶

Extract sinks from a DEM by HUC8.

Parameters:

Name	Type	Description	Default
`huc8`	`str`	The HUC8 code, e.g., 01010002	required
`min_size`	`int`	The minimum number of pixels to be considered as a sink. Defaults to 10.	`10`
`filename`	`str`	The output depression file name. Defaults to None, e,g., using the HUC8 code.	`None`
`tmp_dir`	`str`	The temporary directory. Defaults to None, e.g., using the current directory.	`None`
`wbd`	`str \| GeoDataFrame`	The watershed boundary file. Defaults to None.	`None`
`crs`	`str`	The coordinate reference system. Defaults to "EPSG:5070".	`'EPSG:5070'`
`kernel_size`	`int`	The kernel size for smoothing the DEM. Defaults to 3.	`3`
`resolution`	`int`	The resolution of the DEM. Defaults to 10.	`10`
`keep_files`	`bool`	Whether to keep the intermediate files. Defaults to True.	`True`
`error_file`	`_type_`	The file to save the error IDs. Defaults to None.	`None`
`ignore_warnings`	`bool`	Whether to ignore warnings. Defaults to True.	`True`

Source code in lidar/filling.py

def extract_sinks_by_huc8(
    huc8,
    min_size=10,
    filename=None,
    tmp_dir=None,
    wbd=None,
    crs="EPSG:5070",
    kernel_size=3,
    resolution=10,
    keep_files=True,
    error_file=None,
    ignore_warnings=True,
):
    """Extract sinks from a DEM by HUC8.

    Args:
        huc8 (str): The HUC8 code, e.g., 01010002
        min_size (int, optional): The minimum number of pixels to be considered as a sink. Defaults to 10.
        filename (str, optional): The output depression file name. Defaults to None, e,g., using the HUC8 code.
        tmp_dir (str, optional): The temporary directory. Defaults to None, e.g., using the current directory.
        wbd (str | GeoDataFrame, optional): The watershed boundary file. Defaults to None.
        crs (str, optional): The coordinate reference system. Defaults to "EPSG:5070".
        kernel_size (int, optional): The kernel size for smoothing the DEM. Defaults to 3.
        resolution (int, optional): The resolution of the DEM. Defaults to 10.
        keep_files (bool, optional): Whether to keep the intermediate files. Defaults to True.
        error_file (_type_, optional): The file to save the error IDs. Defaults to None.
        ignore_warnings (bool, optional): Whether to ignore warnings. Defaults to True.
    """
    import shutil
    import warnings
    import geopandas as gpd

    if ignore_warnings:
        warnings.filterwarnings("ignore")

    start_time = time.time()

    if filename is None:
        filename = huc8

    if not filename.endswith(".shp"):
        filename = filename + ".shp"

    filename = os.path.abspath(filename)

    if tmp_dir is None:
        tmp_dir = os.path.join(os.getcwd(), huc8)

    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)

    merge = os.path.join(tmp_dir, "mosaic.tif")
    mask = os.path.join(tmp_dir, "mask.geojson")
    clip = os.path.join(tmp_dir, "clip.tif")
    reproj = os.path.join(tmp_dir, "reproj.tif")
    image = os.path.join(tmp_dir, "image.tif")
    median = os.path.join(tmp_dir, "median.tif")
    regions = os.path.join(tmp_dir, "regions.shp")
    regions_info = os.path.join(tmp_dir, "regions_info.csv")

    try:
        download_ned_by_huc(huc8, out_dir=tmp_dir)

        if wbd is None:
            print("Downloading WBD ...")
            hu8_url = "https://drive.google.com/file/d/1AVBPVVAzsLs8dnF_bCvFvGMCAEgaPthh/view?usp=sharing"
            output = os.path.join(tmp_dir, "WBDHU8_CONUS.zip")
            wbd = download_file(hu8_url, output=output, unzip=False)

        if isinstance(wbd, str):
            print("Reading WBD ...")
            gdf = gpd.read_file(wbd)
        elif isinstance(wbd, gpd.GeoDataFrame):
            gdf = wbd
        else:
            raise ValueError("shp_path must be a filepath or a GeoDataFrame.")

        selected = gdf[gdf["huc8"] == huc8].copy()
        selected.to_crs(epsg=4326, inplace=True)
        selected.to_file(mask)

        if not os.path.exists(merge):
            print("Merging NED tiles ...")
            mosaic(tmp_dir, merge)
        clip_image(merge, mask, clip)
        reproject_image(clip, reproj, crs)
        resample(reproj, image, resolution)
        MedianFilter(image, kernel_size, median)
        ExtractSinks(median, min_size, tmp_dir)
        join_tables(regions, regions_info, filename)

        for file in [merge, mask, clip, reproj, image]:
            if os.path.exists(file):
                os.remove(file)

        if not keep_files:
            shutil.rmtree(tmp_dir)
    except Exception as e:
        if error_file is not None:
            with open(error_file, "a") as f:
                f.write(huc8 + "\n")

        if os.path.exists(tmp_dir):
            shutil.rmtree(tmp_dir)
        print(e)
        return None

    end_time = time.time()
    print("Total run time:\t\t\t {:.4f} s\n".format(end_time - start_time))

`extract_sinks_by_huc8_batch(huc_ids=None, min_size=10, out_dir=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=False, reverse=False, error_file=None, ignore_warnings=True, ignored_ids=[], overwrite=False)` ¶

Extract sinks from NED by HUC8.

Parameters:

Name	Type	Description	Default
`huc8`	`str`	The HUC8 code, e.g., 01010002	required
`min_size`	`int`	The minimum number of pixels to be considered as a sink. Defaults to 10.	`10`
`filename`	`str`	The output depression file name. Defaults to None, e,g., using the HUC8 code.	required
`tmp_dir`	`str`	The temporary directory. Defaults to None, e.g., using the current directory.	`None`
`wbd`	`str \| GeoDataFrame`	The watershed boundary file. Defaults to None.	`None`
`crs`	`str`	The coordinate reference system. Defaults to "EPSG:5070".	`'EPSG:5070'`
`kernel_size`	`int`	The kernel size for smoothing the DEM. Defaults to 3.	`3`
`resolution`	`int`	The resolution of the DEM. Defaults to 10.	`10`
`keep_files`	`bool`	Whether to keep the intermediate files. Defaults to True.	`False`
`reverse`	`bool`	Whether to reverse the HUC8 list. Defaults to False.	`False`
`error_file`	`_type_`	The file to save the error IDs. Defaults to None.	`None`
`ignore_warnings`	`bool`	Whether to ignore warnings. Defaults to True.	`True`
`overwrite`	`bool`	Whether to overwrite the existing files. Defaults to False.	`False`

Source code in lidar/filling.py

def extract_sinks_by_huc8_batch(
    huc_ids=None,
    min_size=10,
    out_dir=None,
    tmp_dir=None,
    wbd=None,
    crs="EPSG:5070",
    kernel_size=3,
    resolution=10,
    keep_files=False,
    reverse=False,
    error_file=None,
    ignore_warnings=True,
    ignored_ids=[],
    overwrite=False,
):
    """Extract sinks from NED by HUC8.

    Args:
        huc8 (str): The HUC8 code, e.g., 01010002
        min_size (int, optional): The minimum number of pixels to be considered as a sink. Defaults to 10.
        filename (str, optional): The output depression file name. Defaults to None, e,g., using the HUC8 code.
        tmp_dir (str, optional): The temporary directory. Defaults to None, e.g., using the current directory.
        wbd (str | GeoDataFrame, optional): The watershed boundary file. Defaults to None.
        crs (str, optional): The coordinate reference system. Defaults to "EPSG:5070".
        kernel_size (int, optional): The kernel size for smoothing the DEM. Defaults to 3.
        resolution (int, optional): The resolution of the DEM. Defaults to 10.
        keep_files (bool, optional): Whether to keep the intermediate files. Defaults to True.
        reverse (bool, optional): Whether to reverse the HUC8 list. Defaults to False.
        error_file (_type_, optional): The file to save the error IDs. Defaults to None.
        ignore_warnings (bool, optional): Whether to ignore warnings. Defaults to True.
        overwrite (bool, optional): Whether to overwrite the existing files. Defaults to False.
    """
    import pandas as pd

    if huc_ids is None:
        url = "https://raw.githubusercontent.com/giswqs/lidar/master/examples/data/huc8.csv"
        df = pd.read_csv(url, dtype=str)
        huc_ids = df["huc8_id"].tolist()

    if not isinstance(huc_ids, list):
        huc_ids = [huc_ids]

    if reverse:
        huc_ids = huc_ids[::-1]

    if out_dir is None:
        out_dir = os.getcwd()

    for index, huc8 in enumerate(huc_ids):
        print(f"Processing {index+1}:{len(huc_ids)}: {huc8} ...")
        if huc8 in ignored_ids:
            continue
        filename = os.path.join(out_dir, str(huc8) + ".shp")
        if not os.path.exists(filename) or (os.path.exists(filename) and overwrite):
            extract_sinks_by_huc8(
                huc8,
                min_size,
                filename,
                tmp_dir,
                wbd,
                crs,
                kernel_size,
                resolution,
                keep_files,
                error_file,
                ignore_warnings,
            )
        else:
            print(f"File already exists: {filename}")

`geometry_bounds(geometry, decimals=4)` ¶

Returns the bounds of a geometry.

Parameters:

Name	Type	Description	Default
`geometry`	`dict`	A GeoJSON geometry.	required
`decimals`	`int`	The number of decimal places to round the bounds to. Defaults to 4.	`4`

Returns:

Name	Type	Description
`list`		A list of bounds in the form of [minx, miny, maxx, maxy].

Source code in lidar/common.py

def geometry_bounds(geometry, decimals=4):
    """Returns the bounds of a geometry.

    Args:
        geometry (dict): A GeoJSON geometry.
        decimals (int, optional): The number of decimal places to round the bounds to. Defaults to 4.

    Returns:
        list: A list of bounds in the form of [minx, miny, maxx, maxy].
    """
    if isinstance(geometry, dict):
        if "geometry" in geometry:
            coords = geometry["geometry"]["coordinates"][0]
        else:
            coords = geometry["coordinates"][0]

    else:
        raise ValueError("geometry must be a GeoJSON-like dictionary.")

    x = [p[0] for p in coords]
    y = [p[1] for p in coords]
    west = round(min(x), decimals)
    east = round(max(x), decimals)
    south = round(min(y), decimals)
    north = round(max(y), decimals)
    return [west, south, east, north]

`get_dep_props(objects, resolution)` ¶

Computes depression attributes.

Parameters:

Name	Type	Description	Default
`objects`	`object`	The labeled objects.	required
`resolution`	`float`	The spatial reoslution of the image.	required

Returns:

Name	Type	Description
`list`		A list of depression objects with attributes.

Source code in lidar/filling.py

def get_dep_props(objects, resolution):
    """Computes depression attributes.

    Args:
        objects (object): The labeled objects.
        resolution (float): The spatial reoslution of the image.

    Returns:
        list: A list of depression objects with attributes.
    """
    dep_list = []

    for object in objects:
        unique_id = object.label
        count = object.area
        size = count * pow(resolution, 2)  # depression size
        min_elev = float(object.min_intensity)  # depression min elevation
        max_elev = float(object.max_intensity)  # depression max elevation
        max_depth = max_elev - min_elev  # depression max depth
        mean_depth = float(
            (max_elev * count - np.sum(object.intensity_image)) / count
        )  # depression mean depth
        volume = mean_depth * count * pow(resolution, 2)  # depression volume
        perimeter = object.perimeter * resolution
        major_axis = object.major_axis_length * resolution
        minor_axis = object.minor_axis_length * resolution
        if minor_axis == 0:
            minor_axis = resolution
        elongatedness = major_axis * 1.0 / minor_axis
        eccentricity = object.eccentricity
        orientation = object.orientation / 3.1415 * 180
        area_bbox_ratio = object.extent

        dep_list.append(
            Depression(
                unique_id,
                count,
                size,
                volume,
                mean_depth,
                max_depth,
                min_elev,
                max_elev,
                perimeter,
                major_axis,
                minor_axis,
                elongatedness,
                eccentricity,
                orientation,
                area_bbox_ratio,
            )
        )

    return dep_list

`github_raw_url(url)` ¶

Get the raw URL for a GitHub file.

Parameters:

Name	Type	Description	Default
`url`	`str`	The GitHub URL.	required

Returns: str: The raw URL.

Source code in lidar/common.py

def github_raw_url(url):
    """Get the raw URL for a GitHub file.

    Args:
        url (str): The GitHub URL.
    Returns:
        str: The raw URL.
    """
    if isinstance(url, str) and url.startswith("https://github.com/") and "blob" in url:
        url = url.replace("github.com", "raw.githubusercontent.com").replace(
            "blob/", ""
        )
    return url

`image_to_cog(source, dst_path=None, profile='deflate', **kwargs)` ¶

Converts an image to a COG file.

Parameters:

Name	Type	Description	Default
`source`	`str`	A dataset path, URL or rasterio.io.DatasetReader object.	required
`dst_path`	`str`	An output dataset path or or PathLike object. Defaults to None.	`None`
`profile`	`str`	COG profile. More at https://cogeotiff.github.io/rio-cogeo/profile. Defaults to "deflate".	`'deflate'`

Raises:

Type	Description
`ImportError`	If rio-cogeo is not installed.
`FileNotFoundError`	If the source file could not be found.

Source code in lidar/filling.py

def image_to_cog(source, dst_path=None, profile="deflate", **kwargs):
    """Converts an image to a COG file.

    Args:
        source (str): A dataset path, URL or rasterio.io.DatasetReader object.
        dst_path (str, optional): An output dataset path or or PathLike object. Defaults to None.
        profile (str, optional): COG profile. More at https://cogeotiff.github.io/rio-cogeo/profile. Defaults to "deflate".

    Raises:
        ImportError: If rio-cogeo is not installed.
        FileNotFoundError: If the source file could not be found.
    """
    try:
        from rio_cogeo.cogeo import cog_translate
        from rio_cogeo.profiles import cog_profiles

    except ImportError:
        raise ImportError(
            "The rio-cogeo package is not installed. Please install it with `pip install rio-cogeo` or `conda install rio-cogeo -c conda-forge`."
        )

    if not source.startswith("http"):
        source = check_file_path(source)

        if not os.path.exists(source):
            raise FileNotFoundError("The provided input file could not be found.")

    if dst_path is None:
        if not source.startswith("http"):
            dst_path = os.path.splitext(source)[0] + "_cog.tif"
        else:
            dst_path = temp_file_path(extension=".tif")

    dst_path = check_file_path(dst_path)

    dst_profile = cog_profiles.get(profile)
    cog_translate(source, dst_path, dst_profile, **kwargs)

`in_colab_shell()` ¶

Tests if the code is being executed within Google Colab.

Source code in lidar/common.py

def in_colab_shell():
    """Tests if the code is being executed within Google Colab."""
    try:
        import google.colab  # pylint: disable=unused-variable

        return True
    except ImportError:
        return False

`is_drive_mounted()` ¶

Checks whether Google Drive is mounted in Google Colab.

Returns:

Name	Type	Description
`bool`		Returns True if Google Drive is mounted, False otherwise.

Source code in lidar/common.py

def is_drive_mounted():
    """Checks whether Google Drive is mounted in Google Colab.

    Returns:
        bool: Returns True if Google Drive is mounted, False otherwise.
    """
    drive_path = "/content/drive/My Drive"
    if os.path.exists(drive_path):
        return True
    else:
        return False

`is_tool(name)` ¶

Check whether name is on PATH and marked as executable.

Source code in lidar/common.py

def is_tool(name):
    """Check whether `name` is on PATH and marked as executable."""

    from shutil import which

    return which(name) is not None

`join_csv_to_gdf(shapefile_path, csv_path, gdf_join_column, csv_join_column)` ¶

Joins a CSV file to a GeoDataFrame based on a common column.

Parameters:

Name	Type	Description	Default
`shapefile_path`	`str`	Path to the Shapefile.	required
`csv_path`	`str`	Path to the CSV file.	required
`gdf_join_column`	`str`	Name of the join column in the GeoDataFrame.	required
`csv_join_column`	`str`	Name of the join column in the CSV.	required

Returns:

Type	Description
	geopandas.GeoDataFrame: The GeoDataFrame with the joined data.

Source code in lidar/common.py

def join_csv_to_gdf(shapefile_path, csv_path, gdf_join_column, csv_join_column):
    """
    Joins a CSV file to a GeoDataFrame based on a common column.

    Args:
        shapefile_path (str): Path to the Shapefile.
        csv_path (str): Path to the CSV file.
        gdf_join_column (str): Name of the join column in the GeoDataFrame.
        csv_join_column (str): Name of the join column in the CSV.

    Returns:
        geopandas.GeoDataFrame: The GeoDataFrame with the joined data.
    """
    import pandas as pd
    import geopandas as gpd

    # Load the datasets
    gdf = gpd.read_file(shapefile_path)
    csv_data = pd.read_csv(csv_path)

    # Perform the join
    result = gdf.merge(
        csv_data, left_on=gdf_join_column, right_on=csv_join_column, how="left"
    )

    return result

`join_tables(in_shp, in_csv, out_shp)` ¶

Joins a CSV table to a shapefile.

Parameters:

Name	Type	Description	Default
`in_shp`	`str`	Path to the input shapefile.	required
`in_csv`	`str`	Path to the input CSV file.	required
`out_shp`	`str`	Path to the output shapefile.	required

Source code in lidar/common.py

def join_tables(in_shp, in_csv, out_shp):
    """Joins a CSV table to a shapefile.

    Args:
        in_shp (str): Path to the input shapefile.
        in_csv (str): Path to the input CSV file.
        out_shp (str): Path to the output shapefile.
    """
    import geopandas as gpd
    import pandas as pd

    dep_df = gpd.read_file(in_shp)
    info_df = pd.read_csv(in_csv)
    if len(info_df) > 0:
        info_df.columns = [col.replace("-", "_")[:10] for col in info_df.columns]
        info_df["id"] = info_df["region_id"]
        info_df.drop("region_id", axis=1, inplace=True)
        df = pd.merge(dep_df, info_df, on="id")
        df.to_file(out_shp)
    else:
        print("No data to join")

`lidar_to_dsm(filename, output=None, resolution=1.0, radius=0.5, minz=None, maxz=None, max_triangle_edge_length=None, verbose=True, **kwargs)` ¶

Generates a digital surface model (DSM) from a LiDAR point cloud. It is a wrapper for the whitebox.lidar_digital_surface_model. See https://www.whiteboxgeo.com/manual/wbt_book/available_tools/lidar_tools.html#LidarDigitalSurfaceModel

Parameters:

Name	Type	Description	Default
`filename`	`str`	The input LiDAR file.	required
`output`	`str`	The output file. Defaults to None.	`None`
`resolution`	`float`	The resolution of the output raster. Defaults to 1.0.	`1.0`
`radius`	`float`	The search radius. Defaults to 0.5.	`0.5`
`minz`	`float`	Optional minimum elevation for inclusion in interpolation.	`None`
`maxz`	`float`	Optional maximum elevation for inclusion in interpolation.	`None`
`max_triangle_edge_length`	`float`	Optional maximum triangle edge length; triangles larger than this size will not be gridded	`None`
`verbose`	`bool`	description. Defaults to True.	`True`

Source code in lidar/common.py

def lidar_to_dsm(
    filename,
    output=None,
    resolution=1.0,
    radius=0.5,
    minz=None,
    maxz=None,
    max_triangle_edge_length=None,
    verbose=True,
    **kwargs,
):
    """Generates a digital surface model (DSM) from a LiDAR point cloud. It is a wrapper for the `whitebox.lidar_digital_surface_model`.
        See https://www.whiteboxgeo.com/manual/wbt_book/available_tools/lidar_tools.html#LidarDigitalSurfaceModel

    Args:
        filename (str): The input LiDAR file.
        output (str, optional): The output file. Defaults to None.
        resolution (float, optional): The resolution of the output raster. Defaults to 1.0.
        radius (float, optional): The search radius. Defaults to 0.5.
        minz (float, optional): Optional minimum elevation for inclusion in interpolation.
        maxz (float, optional): Optional maximum elevation for inclusion in interpolation.
        max_triangle_edge_length (float, optional): Optional maximum triangle edge length; triangles larger than this size will not be gridded
        verbose (bool, optional): _description_. Defaults to True.
    """
    import whitebox

    wbt = whitebox.WhiteboxTools()
    wbt.verbose = verbose

    filename = os.path.abspath(filename)
    if output is not None:
        output = os.path.abspath(output)

    wbt.lidar_digital_surface_model(
        i=filename,
        output=output,
        resolution=resolution,
        radius=radius,
        minz=minz,
        maxz=maxz,
        max_triangle_edge_length=max_triangle_edge_length,
        **kwargs,
    )

`mosaic(images, output, ext='.tif', merge_args={}, verbose=True, **kwargs)` ¶

Mosaics a list of images into a single image. Inspired by https://bit.ly/3A6roDK.

Parameters:

Name	Type	Description	Default
`images`	`str \| list`	An input directory containing images or a list of images.	required
`output`	`str`	The output image filepath.	required
`ext`	`str`	The image file extension. Defaults to '.tif'.	`'.tif'`
`merge_args`	`dict`	A dictionary of arguments to pass to the rasterio.merge function. Defaults to {}.	`{}`
`verbose`	`bool`	Whether to print progress. Defaults to True.	`True`

Source code in lidar/common.py

def mosaic(images, output, ext=".tif", merge_args={}, verbose=True, **kwargs):
    """Mosaics a list of images into a single image. Inspired by https://bit.ly/3A6roDK.

    Args:
        images (str | list): An input directory containing images or a list of images.
        output (str): The output image filepath.
        ext (str, optional): The image file extension. Defaults to '.tif'.
        merge_args (dict, optional): A dictionary of arguments to pass to the rasterio.merge function. Defaults to {}.
        verbose (bool, optional): Whether to print progress. Defaults to True.

    """
    from rasterio.merge import merge
    import rasterio as rio
    from pathlib import Path
    import shutil

    output = os.path.abspath(output)

    if isinstance(images, str):
        path = Path(images)
        raster_files = list(path.iterdir())
        raster_files = [f for f in raster_files if f.suffix == ext]
    elif isinstance(images, list):
        raster_files = images
    else:
        raise ValueError("images must be a list of raster files.")

    if len(raster_files) == 0:
        print("No raster files found.")
        return
    elif len(raster_files) == 1:
        shutil.copyfile(raster_files[0], output)
        return

    raster_to_mosiac = []

    if not os.path.exists(os.path.dirname(output)):
        os.makedirs(os.path.dirname(output))

    for index, p in enumerate(raster_files):
        if verbose:
            print(f"Reading {index+1}/{len(raster_files)}: {os.path.basename(p)}")
        raster = rio.open(p, **kwargs)
        raster_to_mosiac.append(raster)

    if verbose:
        print("Merging rasters...")
    arr, transform = merge(raster_to_mosiac, **merge_args)

    output_meta = raster.meta.copy()
    output_meta.update(
        {
            "driver": "GTiff",
            "height": arr.shape[1],
            "width": arr.shape[2],
            "transform": transform,
        }
    )

    with rio.open(output, "w", **output_meta) as m:
        m.write(arr)

`np2rdarray(in_array, no_data, projection, geotransform)` ¶

Converts an numpy array to rdarray.

Parameters:

Name	Type	Description	Default
`in_array`	`array`	The input numpy array.	required
`no_data`	`float`	The no_data value of the array.	required
`projection`	`str`	The projection of the image.	required
`geotransform`	`str`	The geotransform of the image.	required

Returns:

Name	Type	Description
`object`		The richDEM array.

Source code in lidar/filling.py

def np2rdarray(in_array, no_data, projection, geotransform):
    """Converts an numpy array to rdarray.

    Args:
        in_array (array): The input numpy array.
        no_data (float): The no_data value of the array.
        projection (str): The projection of the image.
        geotransform (str): The geotransform of the image.

    Returns:
        object: The richDEM array.
    """
    out_array = rd.rdarray(in_array, no_data=no_data)
    out_array.projection = projection
    out_array.geotransform = geotransform
    return out_array

`polygonize(img, shp_path)` ¶

Converts a raster image to vector.

Parameters:

Name	Type	Description	Default
`img`	`str`	File path to the input image.	required
`shp_path`	`str`	File path to the output shapefile.	required

Source code in lidar/filling.py

def polygonize(img, shp_path):
    """Converts a raster image to vector.

    Args:
        img (str): File path to the input image.
        shp_path (str): File path to the output shapefile.
    """
    # mapping between gdal type and ogr field type
    type_mapping = {
        gdal.GDT_Byte: ogr.OFTInteger,
        gdal.GDT_UInt16: ogr.OFTInteger,
        gdal.GDT_Int16: ogr.OFTInteger,
        gdal.GDT_UInt32: ogr.OFTInteger,
        gdal.GDT_Int32: ogr.OFTInteger,
        gdal.GDT_Float32: ogr.OFTReal,
        gdal.GDT_Float64: ogr.OFTReal,
        gdal.GDT_CInt16: ogr.OFTInteger,
        gdal.GDT_CInt32: ogr.OFTInteger,
        gdal.GDT_CFloat32: ogr.OFTReal,
        gdal.GDT_CFloat64: ogr.OFTReal,
    }

    ds = gdal.Open(img)
    prj = ds.GetProjection()
    srcband = ds.GetRasterBand(1)

    dst_layername = "Shape"
    drv = ogr.GetDriverByName("ESRI Shapefile")
    dst_ds = drv.CreateDataSource(shp_path)
    srs = osr.SpatialReference(wkt=prj)

    dst_layer = dst_ds.CreateLayer(dst_layername, srs=srs)
    # raster_field = ogr.FieldDefn('id', type_mapping[srcband.DataType])
    raster_field = ogr.FieldDefn("id", type_mapping[gdal.GDT_Int32])
    dst_layer.CreateField(raster_field)
    gdal.Polygonize(srcband, srcband, dst_layer, 0, [], callback=None)
    del img, ds, srcband, dst_ds, dst_layer

`random_string(string_length=3)` ¶

Generates a random string of fixed length.

Parameters:

Name	Type	Description	Default
`string_length`	`int`	Fixed length. Defaults to 3.	`3`

Returns:

Name	Type	Description
`str`		A random string

Source code in lidar/common.py

def random_string(string_length=3):
    """Generates a random string of fixed length.

    Args:
        string_length (int, optional): Fixed length. Defaults to 3.

    Returns:
        str: A random string
    """
    import random
    import string

    # random.seed(1001)
    letters = string.ascii_lowercase
    return "".join(random.choice(letters) for i in range(string_length))

`read_lidar(filename, **kwargs)` ¶

Read a LAS file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	A local file path or HTTP URL to a LAS file.	required

Returns:

Name	Type	Description
`LasData`		The LasData object return by laspy.read.

Source code in lidar/common.py

def read_lidar(filename, **kwargs):
    """Read a LAS file.

    Args:
        filename (str): A local file path or HTTP URL to a LAS file.

    Returns:
        LasData: The LasData object return by laspy.read.
    """
    try:
        import laspy
    except ImportError:
        print(
            "The laspy package is required for this function. Use `pip install laspy[lazrs,laszip]` to install it."
        )
        return

    if (
        isinstance(filename, str)
        and filename.startswith("http")
        and (filename.endswith(".las") or filename.endswith(".laz"))
    ):
        filename = github_raw_url(filename)
        filename = download_file(filename)

    return laspy.read(filename, **kwargs)

`regionGroup(img_array, min_size, no_data)` ¶

IdentifIies regions based on region growing method

Parameters:

Name	Type	Description	Default
`img_array`	`array`	The numpy array containing the image.	required
`min_size`	`int`	The minimum number of pixels to be considered as a depression.	required
`no_data`	`float`	The no_data value of the image.	required

Returns:

Name	Type	Description
`tuple`		The labelled objects and total number of labels.

Source code in lidar/filling.py

def regionGroup(img_array, min_size, no_data):
    """IdentifIies regions based on region growing method

    Args:
        img_array (array): The numpy array containing the image.
        min_size (int): The minimum number of pixels to be considered as a depression.
        no_data (float): The no_data value of the image.

    Returns:
        tuple: The labelled objects and total number of labels.
    """
    img_array[img_array == no_data] = 0
    label_objects, nb_labels = ndimage.label(img_array)
    sizes = np.bincount(label_objects.ravel())
    mask_sizes = sizes > min_size
    mask_sizes[0] = 0
    image_cleaned = mask_sizes[label_objects]
    label_objects, nb_labels = ndimage.label(image_cleaned)
    # nb_labels is the total number of objects. 0 represents background object.
    return label_objects, nb_labels

`reproject_image(image, output, dst_crs='EPSG:4326', resampling='nearest', **kwargs)` ¶

Reprojects an image.

Parameters:

Name	Type	Description	Default
`image`	`str`	The input image filepath.	required
`output`	`str`	The output image filepath.	required
`dst_crs`	`str`	The destination CRS. Defaults to "EPSG:4326".	`'EPSG:4326'`
`resampling`	`Resampling`	The resampling method. Defaults to "nearest".	`'nearest'`
`**kwargs`		Additional keyword arguments to pass to rasterio.open.	`{}`

Source code in lidar/common.py

def reproject_image(image, output, dst_crs="EPSG:4326", resampling="nearest", **kwargs):
    """Reprojects an image.

    Args:
        image (str): The input image filepath.
        output (str): The output image filepath.
        dst_crs (str, optional): The destination CRS. Defaults to "EPSG:4326".
        resampling (Resampling, optional): The resampling method. Defaults to "nearest".
        **kwargs: Additional keyword arguments to pass to rasterio.open.

    """
    import rasterio as rio
    from rasterio.warp import calculate_default_transform, reproject, Resampling

    if isinstance(resampling, str):
        resampling = getattr(Resampling, resampling)

    image = os.path.abspath(image)
    output = os.path.abspath(output)

    if not os.path.exists(os.path.dirname(output)):
        os.makedirs(os.path.dirname(output))

    with rio.open(image, **kwargs) as src:
        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds
        )
        kwargs = src.meta.copy()
        kwargs.update(
            {
                "crs": dst_crs,
                "transform": transform,
                "width": width,
                "height": height,
            }
        )

        with rio.open(output, "w", **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rio.band(src, i),
                    destination=rio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=resampling,
                    dst_resolution=(10, 10),
                    **kwargs,
                )

`resample(src, dst, resolution, **kwargs)` ¶

Resample a raster to a new resolution.

Parameters:

Name	Type	Description	Default
`src`	`str`	The source raster.	required
`dst`	`str`	The destination raster.	required
`resolution`	`float`	The new resolution.	required

Source code in lidar/common.py

def resample(src, dst, resolution, **kwargs):
    """Resample a raster to a new resolution.

    Args:
        src (str): The source raster.
        dst (str): The destination raster.
        resolution (float): The new resolution.
    """
    from osgeo import gdal

    gdal.Warp(dst, src, xRes=resolution, yRes=resolution, **kwargs)

`temp_file_path(extension)` ¶

Returns a temporary file path.

Parameters:

Name	Type	Description	Default
`extension`	`str`	The file extension.	required

Returns:

Name	Type	Description
`str`		The temporary file path.

Source code in lidar/common.py

def temp_file_path(extension):
    """Returns a temporary file path.

    Args:
        extension (str): The file extension.

    Returns:
        str: The temporary file path.
    """

    import tempfile
    import uuid

    if not extension.startswith("."):
        extension = "." + extension
    file_id = str(uuid.uuid4())
    file_path = os.path.join(tempfile.gettempdir(), f"{file_id}{extension}")

    return file_path

`update_package()` ¶

Updates the lidar package from the lidar GitHub repository without the need to use pip or conda. In this way, I don't have to keep updating pypi and conda-forge with every minor update of the package.

Source code in lidar/common.py

def update_package():
    """Updates the lidar package from the lidar GitHub repository without the need to use pip or conda.
    In this way, I don't have to keep updating pypi and conda-forge with every minor update of the package.

    """
    import shutil

    try:
        download_dir = os.path.join(os.path.expanduser("~"), "Downloads")
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)
        clone_repo(out_dir=download_dir)

        pkg_dir = os.path.join(download_dir, "lidar-master")
        work_dir = os.getcwd()
        os.chdir(pkg_dir)

        if shutil.which("pip") is None:
            cmd = "pip3 install ."
        else:
            cmd = "pip install ."

        os.system(cmd)
        os.chdir(work_dir)

        print(
            "\nPlease comment out 'lidar.update_package()' and restart the kernel to take effect:\nJupyter menu -> Kernel -> Restart & Clear Output"
        )

    except Exception as e:
        raise Exception(e)

`view_lidar(filename, cmap='terrain', backend='pyvista', background=None, eye_dome_lighting=False, **kwargs)` ¶

View LiDAR data in 3D.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The filepath to the LiDAR data.	required
`cmap`	`str`	The colormap to use. Defaults to "terrain". cmap currently does not work for the open3d backend.	`'terrain'`
`backend`	`str`	The plotting backend to use, can be pyvista, ipygany, panel, and open3d. Defaults to "pyvista".	`'pyvista'`
`background`	`str`	The background color to use. Defaults to None.	`None`
`eye_dome_lighting`	`bool`	Whether to use eye dome lighting. Defaults to False.	`False`

Raises:

Type	Description
`FileNotFoundError`	If the file does not exist.
`ValueError`	If the backend is not supported.

Source code in lidar/common.py

def view_lidar(
    filename,
    cmap="terrain",
    backend="pyvista",
    background=None,
    eye_dome_lighting=False,
    **kwargs,
):
    """View LiDAR data in 3D.

    Args:
        filename (str): The filepath to the LiDAR data.
        cmap (str, optional): The colormap to use. Defaults to "terrain". cmap currently does not work for the open3d backend.
        backend (str, optional): The plotting backend to use, can be pyvista, ipygany, panel, and open3d. Defaults to "pyvista".
        background (str, optional): The background color to use. Defaults to None.
        eye_dome_lighting (bool, optional): Whether to use eye dome lighting. Defaults to False.

    Raises:
        FileNotFoundError: If the file does not exist.
        ValueError: If the backend is not supported.
    """

    import sys

    if os.environ.get("USE_MKDOCS") is not None:
        return

    if "google.colab" in sys.modules:
        print("This function is not supported in Google Colab.")
        return

    warnings.filterwarnings("ignore")
    filename = os.path.abspath(filename)
    if not os.path.exists(filename):
        raise FileNotFoundError(f"{filename} does not exist.")

    backend = backend.lower()
    if backend in ["pyvista", "ipygany", "panel"]:
        try:
            import pyntcloud
        except ImportError:
            print(
                "The pyvista and pyntcloud packages are required for this function. Use pip install leafmap[lidar] to install them."
            )
            return

        try:
            if backend == "pyvista":
                backend = None
            if backend == "ipygany":
                cmap = None
            data = pyntcloud.PyntCloud.from_file(filename)
            mesh = data.to_instance("pyvista", mesh=False)
            mesh = mesh.elevation()
            mesh.plot(
                scalars="Elevation",
                cmap=cmap,
                jupyter_backend=backend,
                background=background,
                eye_dome_lighting=eye_dome_lighting,
                **kwargs,
            )

        except Exception as e:
            print("Something went wrong.")
            print(e)
            return

    elif backend == "open3d":
        try:
            import laspy
            import open3d as o3d
            import numpy as np
        except ImportError:
            print(
                "The laspy and open3d packages are required for this function. Use pip install laspy open3d to install them."
            )
            return

        try:
            las = laspy.read(filename)
            point_data = np.stack([las.X, las.Y, las.Z], axis=0).transpose((1, 0))
            geom = o3d.geometry.PointCloud()
            geom.points = o3d.utility.Vector3dVector(point_data)
            # geom.colors =  o3d.utility.Vector3dVector(colors)  # need to add colors. A list in the form of [[r,g,b], [r,g,b]] with value range 0-1. https://github.com/isl-org/Open3D/issues/614
            o3d.visualization.draw_geometries([geom], **kwargs)

        except Exception as e:
            print("Something went wrong.")
            print(e)
            return

    else:
        raise ValueError(f"{backend} is not a valid backend.")

`write_dep_csv(dep_list, csv_file)` ¶

Saves the depression list info to a CSV file.

Parameters:

Name	Type	Description	Default
`dep_list`	`list`	A list of depression objects with attributes.	required
`csv_file`	`str`	File path to the output CSV file.	required

Source code in lidar/filling.py

def write_dep_csv(dep_list, csv_file):
    """Saves the depression list info to a CSV file.

    Args:
        dep_list (list): A list of depression objects with attributes.
        csv_file (str): File path to the output CSV file.
    """
    csv = open(csv_file, "w")
    header = (
        "region_id"
        + ","
        + "count"
        + ","
        + "area"
        + ","
        + "volume"
        + ","
        + "avg_depth"
        + ","
        + "max_depth"
        + ","
        + "min_elev"
        + ","
        + "max_elev"
        + ","
        + "perimeter"
        + ","
        + "major_axis"
        + ","
        + "minor_axis"
        + ","
        + "elongatedness"
        + ","
        + "eccentricity"
        + ","
        + "orientation"
        + ","
        + "area_bbox_ratio"
    )

    csv.write(header + "\n")
    for dep in dep_list:
        line = "{},{},{:.2f},{:.2f},{:.2f},{:.2f},{:.2f},{:.2f}, {:.2f},{:.2f},{:.2f},{:.2f},{:.2f},{:.2f},{:.2f}".format(
            dep.id,
            dep.count,
            dep.size,
            dep.volume,
            dep.meanDepth,
            dep.maxDepth,
            dep.minElev,
            dep.bndElev,
            dep.perimeter,
            dep.major_axis,
            dep.minor_axis,
            dep.elongatedness,
            dep.eccentricity,
            dep.orientation,
            dep.area_bbox_ratio,
        )
        csv.write(line + "\n")
    csv.close()

`write_lidar(source, destination, do_compress=None, laz_backend=None)` ¶

Writes to a stream or file.

Parameters:

Name	Type	Description	Default
`source`	`str \| LasBase`	The source data to be written.	required
`destination`	`str`	The destination filepath.	required
`do_compress`	`bool`	Flags to indicate if you want to compress the data. Defaults to None.	`None`
`laz_backend`	`str`	The laz backend to use. Defaults to None.	`None`

Source code in lidar/common.py

def write_lidar(source, destination, do_compress=None, laz_backend=None):
    """Writes to a stream or file.

    Args:
        source (str | laspy.lasdatas.base.LasBase): The source data to be written.
        destination (str): The destination filepath.
        do_compress (bool, optional): Flags to indicate if you want to compress the data. Defaults to None.
        laz_backend (str, optional): The laz backend to use. Defaults to None.
    """

    try:
        import laspy
    except ImportError:
        print(
            "The laspy package is required for this function. Use `pip install laspy[lazrs,laszip]` to install it."
        )
        return

    if isinstance(source, str):
        source = read_lidar(source)

    source.write(destination, do_compress=do_compress, laz_backend=laz_backend)

filling module¶

Depression ¶

ExtractSinks(in_dem, min_size, out_dir, filled_dem=None, engine='whitebox', keep_files=True) ¶

GaussianFilter(in_dem, sigma=1, out_file=None) ¶

MeanFilter(in_dem, kernel_size=3, out_file=None) ¶

MedianFilter(in_dem, kernel_size=3, out_file=None) ¶

add_crs(filename, epsg) ¶

check_file_path(file_path, make_dirs=True) ¶

check_install(package) ¶

clip_image(image, mask, output) ¶

clone_repo(out_dir='.', unzip=True) ¶

convert_lidar(source, destination=None, point_format_id=None, file_version=None, **kwargs) ¶

csv_points_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude') ¶

csv_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude') ¶

download_file(url=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, verify=True, id=None, fuzzy=False, resume=False, unzip=True, overwrite=False) ¶

download_folder(url=None, id=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, remaining_ok=False) ¶

download_from_gdrive(gfile_url, file_name, out_dir='.', unzip=True, verbose=True) ¶

download_from_url(url, out_file_name=None, out_dir='.', unzip=True, verbose=True) ¶

download_ned_by_bbox(bbox, datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs) ¶

download_ned_by_huc(huc_id, huc_type='huc8', datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs) ¶

extract_sinks_by_bbox(bbox, filename, min_size=10, tmp_dir=None, mask=None, crs='EPSG:5070', kernel_size=3, resolution=10, to_cog=False, keep_files=True, ignore_warnings=True) ¶

extract_sinks_by_huc8(huc8, min_size=10, filename=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=True, error_file=None, ignore_warnings=True) ¶

extract_sinks_by_huc8_batch(huc_ids=None, min_size=10, out_dir=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=False, reverse=False, error_file=None, ignore_warnings=True, ignored_ids=[], overwrite=False) ¶

geometry_bounds(geometry, decimals=4) ¶

get_dep_props(objects, resolution) ¶

github_raw_url(url) ¶

image_to_cog(source, dst_path=None, profile='deflate', **kwargs) ¶

in_colab_shell() ¶

is_drive_mounted() ¶

is_tool(name) ¶

join_csv_to_gdf(shapefile_path, csv_path, gdf_join_column, csv_join_column) ¶

join_tables(in_shp, in_csv, out_shp) ¶

lidar_to_dsm(filename, output=None, resolution=1.0, radius=0.5, minz=None, maxz=None, max_triangle_edge_length=None, verbose=True, **kwargs) ¶

mosaic(images, output, ext='.tif', merge_args={}, verbose=True, **kwargs) ¶

np2rdarray(in_array, no_data, projection, geotransform) ¶

polygonize(img, shp_path) ¶

random_string(string_length=3) ¶

read_lidar(filename, **kwargs) ¶

regionGroup(img_array, min_size, no_data) ¶

reproject_image(image, output, dst_crs='EPSG:4326', resampling='nearest', **kwargs) ¶

resample(src, dst, resolution, **kwargs) ¶

temp_file_path(extension) ¶

update_package() ¶

view_lidar(filename, cmap='terrain', backend='pyvista', background=None, eye_dome_lighting=False, **kwargs) ¶

write_dep_csv(dep_list, csv_file) ¶

write_lidar(source, destination, do_compress=None, laz_backend=None) ¶

`Depression` ¶

`ExtractSinks(in_dem, min_size, out_dir, filled_dem=None, engine='whitebox', keep_files=True)` ¶

`GaussianFilter(in_dem, sigma=1, out_file=None)` ¶

`MeanFilter(in_dem, kernel_size=3, out_file=None)` ¶

`MedianFilter(in_dem, kernel_size=3, out_file=None)` ¶

`add_crs(filename, epsg)` ¶

`check_file_path(file_path, make_dirs=True)` ¶

`check_install(package)` ¶

`clip_image(image, mask, output)` ¶

`clone_repo(out_dir='.', unzip=True)` ¶

`convert_lidar(source, destination=None, point_format_id=None, file_version=None, **kwargs)` ¶

`csv_points_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude')` ¶

`csv_to_shp(in_csv, out_shp, latitude='latitude', longitude='longitude')` ¶

`download_file(url=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, verify=True, id=None, fuzzy=False, resume=False, unzip=True, overwrite=False)` ¶

`download_folder(url=None, id=None, output=None, quiet=False, proxy=None, speed=None, use_cookies=True, remaining_ok=False)` ¶

`download_from_gdrive(gfile_url, file_name, out_dir='.', unzip=True, verbose=True)` ¶

`download_from_url(url, out_file_name=None, out_dir='.', unzip=True, verbose=True)` ¶

`download_ned_by_bbox(bbox, datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs)` ¶

`download_ned_by_huc(huc_id, huc_type='huc8', datasets=None, out_dir=None, return_url=False, download_args={}, **kwargs)` ¶

`extract_sinks_by_bbox(bbox, filename, min_size=10, tmp_dir=None, mask=None, crs='EPSG:5070', kernel_size=3, resolution=10, to_cog=False, keep_files=True, ignore_warnings=True)` ¶

`extract_sinks_by_huc8(huc8, min_size=10, filename=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=True, error_file=None, ignore_warnings=True)` ¶

`extract_sinks_by_huc8_batch(huc_ids=None, min_size=10, out_dir=None, tmp_dir=None, wbd=None, crs='EPSG:5070', kernel_size=3, resolution=10, keep_files=False, reverse=False, error_file=None, ignore_warnings=True, ignored_ids=[], overwrite=False)` ¶

`geometry_bounds(geometry, decimals=4)` ¶

`get_dep_props(objects, resolution)` ¶

`github_raw_url(url)` ¶

`image_to_cog(source, dst_path=None, profile='deflate', **kwargs)` ¶

`in_colab_shell()` ¶

`is_drive_mounted()` ¶

`is_tool(name)` ¶

`join_csv_to_gdf(shapefile_path, csv_path, gdf_join_column, csv_join_column)` ¶

`join_tables(in_shp, in_csv, out_shp)` ¶

`lidar_to_dsm(filename, output=None, resolution=1.0, radius=0.5, minz=None, maxz=None, max_triangle_edge_length=None, verbose=True, **kwargs)` ¶

`mosaic(images, output, ext='.tif', merge_args={}, verbose=True, **kwargs)` ¶

`np2rdarray(in_array, no_data, projection, geotransform)` ¶

`polygonize(img, shp_path)` ¶

`random_string(string_length=3)` ¶

`read_lidar(filename, **kwargs)` ¶

`regionGroup(img_array, min_size, no_data)` ¶

`reproject_image(image, output, dst_crs='EPSG:4326', resampling='nearest', **kwargs)` ¶

`resample(src, dst, resolution, **kwargs)` ¶

`temp_file_path(extension)` ¶

`update_package()` ¶

`view_lidar(filename, cmap='terrain', backend='pyvista', background=None, eye_dome_lighting=False, **kwargs)` ¶

`write_dep_csv(dep_list, csv_file)` ¶

`write_lidar(source, destination, do_compress=None, laz_backend=None)` ¶