Source code for matplotcheck.vector

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib
import shapely

from .base import PlotTester


[docs]class VectorTester(PlotTester):
    """A PlotTester for spatial vector plots.

    Parameters
    ----------
    ax: ```matplotlib.axes.Axes``` object

    """

    def __init__(self, ax):
        """Initialize the vector tester"""
        super(VectorTester, self).__init__(ax)

    """ Check Data """

    def _convert_length(self, arr, n):
        """Helper function for 'get_points_by_attributes' and
        'get_lines_by_attributes'
        takes an array of either length 1 or n.
        If array is length 1: array of array's only element repeating n times
        is returned
        If array is length n: original array is returned
        Else: function raises value error

        Parameters
        ----------
        arr: array
            A numpy array of either length 1 or n
        n: int
            length of return array

        Returns
        -------
        array of length n
        """
        if len(arr) == 1:
            return list(arr) * n
        elif len(arr) == n:
            return arr
        else:
            raise ValueError("Input array length is not: 1 or {0}".format(n))

[docs]    def get_points_by_attributes(self):
        """Returns a sorted list of lists where each list contains tuples of
        xycoords for points of
        the same attributes: color, marker, and markersize

        Returns
        -------
        sorted list where each list represents all points with the same color.
        each point is represented by a tuple with its coordinates.
        """
        points_dataframe = pd.DataFrame(
            columns=["offset", "color", "msize", "mstyle"]
        )
        for c in (
            coll
            for coll in self.ax.collections
            if type(coll) == matplotlib.collections.PathCollection
        ):
            colors, sizes = (
                [tuple(color) for color in c.get_facecolors()],
                c.get_sizes(),
            )
            styles, offsets = (
                [tuple(tuple(v) for v in p.vertices) for p in c.get_paths()],
                [tuple(o) for o in c.get_offsets()],
            )
            n = len(offsets)
            colors, sizes, styles = (
                self._convert_length(colors, n),
                self._convert_length(sizes, n),
                self._convert_length(styles, n),
            )
            points_dataframe = points_dataframe.append(
                pd.DataFrame(
                    {
                        "offset": offsets,
                        "color": colors,
                        "msize": sizes,
                        "mstyle": styles,
                    }
                ),
                ignore_index=True,
            )

        points_grouped = [
            [data["offset"][i] for i in data.index]
            for c, data in points_dataframe.groupby(
                ["color", "mstyle", "msize"], sort=False
            )
        ]
        return sorted([sorted(p) for p in points_grouped])

[docs]    def assert_points_grouped_by_type(
        self, data_exp, sort_column, m="Point attributes not accurate by type"
    ):
        """Asserts that the points on Axes ax display attributes based on their
        type with error message m
        attributes tested are: color, marker, and markersize

        Parameters
        ----------
        data_exp: Geopandas Dataframe with Point objects in column 'geometry'
            an additional column with title sort_column, denotes a category for
            each point
        sort_column: string of column label in dataframe data_exp.
            this column contains values expressing which points belong to which
            group
        m: string error message if assertion is not met
        """

        groups = self.get_points_by_attributes()
        grouped_exp = [
            [(data.geometry[i].x, data.geometry[i].y) for i in data.index]
            for c, data in data_exp.groupby([sort_column], sort=False)
        ]
        np.testing.assert_equal(
            groups, sorted([sorted(p) for p in grouped_exp]), m
        )

[docs]    def sort_collection_by_markersize(self):
        """Returns a pandas dataframe of points in collections on Axes ax.

        Returns
        --------
        pandas dataframe with columns x, y, point_size. Each row reprsents a
        point on Axes ax with location x,y and markersize pointsize
        """
        df = pd.DataFrame(columns=("x", "y", "markersize"))
        for c in self.ax.collections:
            if isinstance(c, matplotlib.collections.PathCollection):
                offsets, markersizes = c.get_offsets(), c.get_sizes()
                x_data, y_data = (
                    [offset[0] for offset in offsets],
                    [offset[1] for offset in offsets],
                )
                if len(markersizes) == 1:
                    markersize = [markersizes[0]] * len(offsets)
                    df2 = pd.DataFrame(
                        {"x": x_data, "y": y_data, "markersize": markersize}
                    )
                    df = df.append(df2)
                elif len(markersizes) == len(offsets):
                    df2 = pd.DataFrame(
                        {"x": x_data, "y": y_data, "markersize": markersizes}
                    )
                    df = df.append(df2)
        df = df.sort_values(by="markersize").reset_index(drop=True)
        return df

[docs]    def assert_collection_sorted_by_markersize(self, df_expected, sort_column):
        """Asserts a collection of points vary in size by column expressed in
        sort_column

        Parameters
        ----------
        df_expected: geopandas dataframe with geometry column of expected point
        locations
        sort_column: column title from df_expected that points are expected to
        be sorted by
            if None, assertion is passed
        """
        df = self.sort_collection_by_markersize()
        df_expected = df_expected.sort_values(by=sort_column).reset_index(
            drop=True
        )
        np.testing.assert_almost_equal(
            np.array(df.x),
            np.array([p.x for p in df_expected.geometry]),
            decimal=6,
            err_msg="Markersize not based on {0} values".format(sort_column),
        )
        np.testing.assert_almost_equal(
            np.array(df.y),
            np.array([p.y for p in df_expected.geometry]),
            decimal=6,
            err_msg="Markersize not based on {0} values".format(sort_column),
        )

[docs]    def get_points(self):
        """Returns a Pandas dataframe with all x, y values for points on axis.

        Returns
        -------
        output: DataFrame with columns 'x' and 'y'. Each row represents one
        points coordinates.
        """
        points = self.get_xy(points_only=True).sort_values(by=["x", "y"])
        points.reset_index(inplace=True, drop=True)
        return points

[docs]    def assert_points(self, points_expected, m="Incorrect Point Data"):
        """
        Asserts the point data in Axes ax is equal to points_expected data
        with error message m.
        If points_expected not a GeoDataFrame, test fails.

        Parameters
        ----------
        points_expected : GeoDataFrame
        GeoDataFrame with the expected points for the axis.
        m : string (default = "Incorrect Point Data")
        String error message if assertion is not met.
        """
        if isinstance(points_expected, gpd.geodataframe.GeoDataFrame):
            points = self.get_points()
            xy_expected = pd.DataFrame(columns=["x", "y"])
            xy_expected["x"] = points_expected.geometry.x
            xy_expected["y"] = points_expected.geometry.y
            xy_expected = xy_expected.sort_values(by=["x", "y"])
            xy_expected.reset_index(inplace=True, drop=True)
            # Fix for failure if more than points were plotted in matplotlib
            if len(points) != len(xy_expected):
                # Checks if there are extra 0, 0 coords in the DataFrame
                # returned from self.get_points and removes them.
                points_zeros = (points["x"] == 0) & (points["y"] == 0)
                if points_zeros.any():
                    expected_zeros = (xy_expected["x"] == 0) & (
                        xy_expected["y"] == 0
                    )
                    keep = expected_zeros.sum()
                    zeros_index_vals = points_zeros.index[
                        points_zeros.tolist()
                    ]
                    for i in range(keep):
                        points_zeros.at[zeros_index_vals[i]] = False
                    points = points[~points_zeros].reset_index(drop=True)
                else:
                    raise AssertionError(
                        "points_expected's length does not match the stored"
                        "data's length."
                    )
            try:
                pd.testing.assert_frame_equal(left=points, right=xy_expected)
            except AssertionError:
                raise AssertionError(m)
        else:
            raise ValueError(
                "points_expected is not expected type: GeoDataFrame"
            )

    # Lines

    def _convert_multilines(self, df, column_title):
        """Helper function for get_lines_by_attribute
        converts a pandas dataframe containing a column of LineString and
        MultiLinestring objects
        to a pandas dataframe where each row represents a single line. Line
        segment values are converted
        to a list of tuples.

        Parameters
        ---------
        df: pandas Dataframe containing a column of LineString and
        MultiLinestring objects
        column_title: string of column title which holds LineString and
        MultLinestring objects

        Returns
        -------
        Dataframe where each row represents a single line.
        Line segments values are converted to a list of tuples in column
        column_title
        """
        dfout = df.copy()
        for i, row in dfout.iterrows():
            seg = row[column_title]
            if type(seg) == shapely.geometry.linestring.LineString:
                dfout.at[i, column_title] = list(seg.coords)
            elif type(seg) == shapely.geometry.multilinestring.MultiLineString:
                dfout.at[i, column_title] = list(seg[0].coords)
                for j in range(1, len(seg)):
                    new_row = row.copy()
                    new_row[column_title] = list(seg[j].coords)
                    dfout = dfout.append(new_row).reset_index(drop=True)
            else:
                raise ValueError(
                    "Segment is not of either expected type: MultiLinestring, "
                    "LineString"
                )
        return dfout

    def _convert_linestyle(self, ls):
        """helper function for get_lines_by_attributes.
            converts linestyle to a tuple of (offset, onoffseq) to get hashable
            datatypes

        Parameters
        ----------
        ls: linesytle from a LineCollection retreived by get_linestyle()

        Returns
        -------
        tuple containing (offset, onoffseq) of linestyle
        """
        onoffseq = ls[1]
        if onoffseq:
            onoffseq = tuple(ls[1])
        return (ls[0], onoffseq)

[docs]    def get_lines(self):
        """Returns a dataframe with all lines on ax

        Returns
        -------
        output: DataFrame with column 'lines'. Each row represents one line
        segment. Its value in 'lines' is a list of tuples representing the
        line segment.
        """
        lines = [
            [tuple(coords) for coords in seg]
            for c in self.ax.collections
            if type(c) == matplotlib.collections.LineCollection
            for seg in c.get_segments()
        ]
        return pd.DataFrame({"lines": lines})

[docs]    def get_lines_by_collection(self):
        """Returns a sorted list of list where each list contains line segments
        from the same collections

        Returns
        -------
        sorted list where each list represents all lines from the same
        collection
        """
        lines_grouped = [
            [[tuple(coords) for coords in seg] for seg in c.get_segments()]
            for c in self.ax.collections
            if type(c) == matplotlib.collections.LineCollection
        ]
        return sorted([sorted(lines) for lines in lines_grouped])

[docs]    def get_lines_by_attributes(self):
        """Returns a sorted list of lists where each list contains line
        segments of the same attributes:
        color, linewidth, and linestyle

        Returns
        ------
        sorted list where each list represents all lines with the same
        attributes
        """
        lines_dataframe = pd.DataFrame(
            columns=["seg", "color", "lwidth", "lstyle"]
        )
        for c in (
            coll
            for coll in self.ax.collections
            if type(coll) == matplotlib.collections.LineCollection
        ):
            segs = [[tuple(coords) for coords in s] for s in c.get_segments()]
            colors, widths, styles = (
                [tuple(color) for color in c.get_colors()],
                c.get_linewidth(),
                [self._convert_linestyle(ls) for ls in c.get_linestyle()],
            )
            n = len(segs)
            colors, widths, styles = (
                self._convert_length(colors, n),
                self._convert_length(widths, n),
                self._convert_length(styles, n),
            )
            lines_dataframe = lines_dataframe.append(
                pd.DataFrame(
                    {
                        "seg": segs,
                        "color": colors,
                        "lwidth": widths,
                        "lstyle": styles,
                    }
                ),
                ignore_index=True,
            )

        lines_grouped = [
            [data["seg"][i] for i in data.index]
            for c, data in lines_dataframe.groupby(
                ["color", "lwidth", "lstyle"], sort=False
            )
        ]
        return sorted([sorted(lines) for lines in lines_grouped])

[docs]    def assert_lines(self, lines_expected, m="Incorrect Line Data"):
        """Asserts the line data in Axes ax is equal to lines_expected with
        error message m.
        If line_expected is None or an empty list, assertion is passed

        Parameters
        ----------
        lines_expected: Geopandas Dataframe with a geometry column consisting
        of MultilineString and LineString objects
        m: string error message if assertion is not met
        """
        if type(lines_expected) == gpd.geodataframe.GeoDataFrame:
            lines_expected = lines_expected[
                ~lines_expected["geometry"].is_empty
            ].reset_index(drop=True)
            fig, ax_exp = plt.subplots()
            lines_expected.plot(ax=ax_exp)
            lines_exp = VectorTester(ax=ax_exp).get_lines()
            plt.close(fig)
            np.testing.assert_equal(
                sorted(self.get_lines().lines), sorted(lines_exp.lines), m
            )
        elif not lines_expected:
            pass
        else:
            raise ValueError(
                "lines_expected is not expected type: GeoDataFrame"
            )

[docs]    def assert_lines_grouped_by_type(
        self,
        lines_expected,
        sort_column,
        m="Line attributes not accurate by type",
    ):
        """Asserts that the lines on Axes ax display like attributes based on
        their type with error message m
        attributes tested are: color, linewidth, linestyle

        Parameters
        ----------
        lines_expected: Geopandas Dataframe with geometry column consisting of
        MultiLineString and LineString objects
        sort_column: string of column title in lines_expected that contains
        types lines are expected to be grouped by
        m: string error message if assertion is not met
        """
        if type(lines_expected) == gpd.geodataframe.GeoDataFrame:
            groups = self.get_lines_by_attributes()
            lines_expected = lines_expected[
                ~lines_expected["geometry"].is_empty
            ].reset_index(drop=True)
            fig, ax_exp = plt.subplots()
            for typ, data in lines_expected.groupby(sort_column):
                data.plot(ax=ax_exp)
            grouped_exp = [
                [[tuple(coords) for coords in seg] for seg in c.get_segments()]
                for c in ax_exp.collections
                if type(c) == matplotlib.collections.LineCollection
            ]
            grouped_exp = sorted([sorted(lines) for lines in grouped_exp])
            plt.close(fig)
            np.testing.assert_equal(groups, grouped_exp, m)
        elif lines_expected is None:
            pass
        else:
            raise ValueError(
                "lines_expected is not of expected type: GeoDataFrame"
            )

    """ Check Polygons """

[docs]    def get_polygons(self):
        """Returns all polygons on Axes ax as a sorted list of polygons where
        each polygon is a list of coord tuples

        Returns
        -------
        output: sorted list of polygons. Each polygon is a list tuples. Each
        tuple is a coordinate.
        """
        output = [
            [tuple(coords) for coords in path.vertices]
            for c in self.ax.collections
            if type(c) == matplotlib.collections.PatchCollection
            for path in c.get_paths()
        ]
        return sorted(output)

    def _convert_multipolygons(self, series):
        """Helper function for assert_polygons
        converts a pandas series of Polygon and MultiPolygon objects to a list
        of lines,
        where each line is a list of coord tuples for the exterior

        Parameters
        ----------
        series: series where each entry is a Polygon or MultiPolygon

        Returns
        -------
        list of lines where each line is a list of coord tuples for the
        exterior polygon
        """
        output = []
        for entry in series:
            if type(entry) == shapely.geometry.multipolygon.MultiPolygon:
                for poly in entry:
                    output += [list(poly.exterior.coords)]
            if type(entry) == shapely.geometry.polygon.Polygon:
                output += [list(entry.exterior.coords)]
        return output

[docs]    def assert_polygons(
        self, polygons_expected, dec=None, m="Incorrect Polygon Data"
    ):
        """Asserts the polygon data in Axes ax is equal to polygons_expected to
        decimal place dec with error message m
        If polygons_expected is am empty list or None, assertion is passed.

        Parameters
        ----------
        polygons_expected : List or GeoDataFrame
            List of polygons expected to be founds on Axes ax or a GeoDataFrame
            containing the expected polygons.
        dec : int (Optional)
            Int stating the desired decimal precision. If None, polygons must
            be exact.
        m : string (default = "Incorrect Polygon Data")
            String error message if assertion is not met.
        """
        if len(polygons_expected) != 0:
            if isinstance(polygons_expected, list):
                if len(polygons_expected[0]) == 0:
                    raise ValueError(
                        "Empty list or GeoDataFrame passed into assert_"
                        "polygons."
                    )
            if isinstance(polygons_expected, gpd.geodataframe.GeoDataFrame):
                polygons_expected = self._convert_multipolygons(
                    polygons_expected["geometry"]
                )
            polygons = self.get_polygons()
            if dec:
                assert len(polygons_expected) == len(polygons), m
                polygons_expected = sorted(polygons_expected)
                for i in range(len(polygons)):
                    np.testing.assert_almost_equal(
                        polygons[i],
                        polygons_expected[i],
                        decimal=dec,
                        err_msg=m,
                    )
            else:
                np.testing.assert_equal(polygons, sorted(polygons_expected), m)
        else:
            raise ValueError(
                "Empty list or GeoDataFrame passed into assert_polygons."
            )