Source code for matplotcheck.vector

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib
import shapely

from .base import PlotTester


[docs]class VectorTester(PlotTester): """A PlotTester for spatial vector plots. Parameters ---------- ax: ```matplotlib.axes.Axes``` object """ def __init__(self, ax): """Initialize the vector tester""" super(VectorTester, self).__init__(ax) """ Check Data """ def _convert_length(self, arr, n): """Helper function for 'get_points_by_attributes' and 'get_lines_by_attributes' takes an array of either length 1 or n. If array is length 1: array of array's only element repeating n times is returned If array is length n: original array is returned Else: function raises value error Parameters ---------- arr: array A numpy array of either length 1 or n n: int length of return array Returns ------- array of length n """ if len(arr) == 1: return list(arr) * n elif len(arr) == n: return arr else: raise ValueError("Input array length is not: 1 or {0}".format(n))
[docs] def get_points_by_attributes(self): """Returns a sorted list of lists where each list contains tuples of xycoords for points of the same attributes: color, marker, and markersize Returns ------- sorted list where each list represents all points with the same color. each point is represented by a tuple with its coordinates. """ points_dataframe = pd.DataFrame( columns=["offset", "color", "msize", "mstyle"] ) for c in ( coll for coll in self.ax.collections if type(coll) == matplotlib.collections.PathCollection ): colors, sizes = ( [tuple(color) for color in c.get_facecolors()], c.get_sizes(), ) styles, offsets = ( [tuple(tuple(v) for v in p.vertices) for p in c.get_paths()], [tuple(o) for o in c.get_offsets()], ) n = len(offsets) colors, sizes, styles = ( self._convert_length(colors, n), self._convert_length(sizes, n), self._convert_length(styles, n), ) points_dataframe = points_dataframe.append( pd.DataFrame( { "offset": offsets, "color": colors, "msize": sizes, "mstyle": styles, } ), ignore_index=True, ) points_grouped = [ [data["offset"][i] for i in data.index] for c, data in points_dataframe.groupby( ["color", "mstyle", "msize"], sort=False ) ] return sorted([sorted(p) for p in points_grouped])
[docs] def assert_points_grouped_by_type( self, data_exp, sort_column, m="Point attributes not accurate by type" ): """Asserts that the points on Axes ax display attributes based on their type with error message m attributes tested are: color, marker, and markersize Parameters ---------- data_exp: Geopandas Dataframe with Point objects in column 'geometry' an additional column with title sort_column, denotes a category for each point sort_column: string of column label in dataframe data_exp. this column contains values expressing which points belong to which group m: string error message if assertion is not met """ groups = self.get_points_by_attributes() grouped_exp = [ [(data.geometry[i].x, data.geometry[i].y) for i in data.index] for c, data in data_exp.groupby([sort_column], sort=False) ] np.testing.assert_equal( groups, sorted([sorted(p) for p in grouped_exp]), m )
[docs] def sort_collection_by_markersize(self): """Returns a pandas dataframe of points in collections on Axes ax. Returns -------- pandas dataframe with columns x, y, point_size. Each row reprsents a point on Axes ax with location x,y and markersize pointsize """ df = pd.DataFrame(columns=("x", "y", "markersize")) for c in self.ax.collections: if isinstance(c, matplotlib.collections.PathCollection): offsets, markersizes = c.get_offsets(), c.get_sizes() x_data, y_data = ( [offset[0] for offset in offsets], [offset[1] for offset in offsets], ) if len(markersizes) == 1: markersize = [markersizes[0]] * len(offsets) df2 = pd.DataFrame( {"x": x_data, "y": y_data, "markersize": markersize} ) df = df.append(df2) elif len(markersizes) == len(offsets): df2 = pd.DataFrame( {"x": x_data, "y": y_data, "markersize": markersizes} ) df = df.append(df2) df = df.sort_values(by="markersize").reset_index(drop=True) return df
[docs] def assert_collection_sorted_by_markersize(self, df_expected, sort_column): """Asserts a collection of points vary in size by column expressed in sort_column Parameters ---------- df_expected: geopandas dataframe with geometry column of expected point locations sort_column: column title from df_expected that points are expected to be sorted by if None, assertion is passed """ df = self.sort_collection_by_markersize() df_expected = df_expected.sort_values(by=sort_column).reset_index( drop=True ) np.testing.assert_almost_equal( np.array(df.x), np.array([p.x for p in df_expected.geometry]), decimal=6, err_msg="Markersize not based on {0} values".format(sort_column), ) np.testing.assert_almost_equal( np.array(df.y), np.array([p.y for p in df_expected.geometry]), decimal=6, err_msg="Markersize not based on {0} values".format(sort_column), )
[docs] def get_points(self): """Returns a Pandas dataframe with all x, y values for points on axis. Returns ------- output: DataFrame with columns 'x' and 'y'. Each row represents one points coordinates. """ points = self.get_xy(points_only=True).sort_values(by=["x", "y"]) points.reset_index(inplace=True, drop=True) return points
[docs] def assert_points(self, points_expected, m="Incorrect Point Data"): """ Asserts the point data in Axes ax is equal to points_expected data with error message m. If points_expected not a GeoDataFrame, test fails. Parameters ---------- points_expected : GeoDataFrame GeoDataFrame with the expected points for the axis. m : string (default = "Incorrect Point Data") String error message if assertion is not met. """ if isinstance(points_expected, gpd.geodataframe.GeoDataFrame): points = self.get_points() xy_expected = pd.DataFrame(columns=["x", "y"]) xy_expected["x"] = points_expected.geometry.x xy_expected["y"] = points_expected.geometry.y xy_expected = xy_expected.sort_values(by=["x", "y"]) xy_expected.reset_index(inplace=True, drop=True) # Fix for failure if more than points were plotted in matplotlib if len(points) != len(xy_expected): # Checks if there are extra 0, 0 coords in the DataFrame # returned from self.get_points and removes them. points_zeros = (points["x"] == 0) & (points["y"] == 0) if points_zeros.any(): expected_zeros = (xy_expected["x"] == 0) & ( xy_expected["y"] == 0 ) keep = expected_zeros.sum() zeros_index_vals = points_zeros.index[ points_zeros.tolist() ] for i in range(keep): points_zeros.at[zeros_index_vals[i]] = False points = points[~points_zeros].reset_index(drop=True) else: raise AssertionError( "points_expected's length does not match the stored" "data's length." ) try: pd.testing.assert_frame_equal(left=points, right=xy_expected) except AssertionError: raise AssertionError(m) else: raise ValueError( "points_expected is not expected type: GeoDataFrame" )
# Lines def _convert_multilines(self, df, column_title): """Helper function for get_lines_by_attribute converts a pandas dataframe containing a column of LineString and MultiLinestring objects to a pandas dataframe where each row represents a single line. Line segment values are converted to a list of tuples. Parameters --------- df: pandas Dataframe containing a column of LineString and MultiLinestring objects column_title: string of column title which holds LineString and MultLinestring objects Returns ------- Dataframe where each row represents a single line. Line segments values are converted to a list of tuples in column column_title """ dfout = df.copy() for i, row in dfout.iterrows(): seg = row[column_title] if type(seg) == shapely.geometry.linestring.LineString: dfout.at[i, column_title] = list(seg.coords) elif type(seg) == shapely.geometry.multilinestring.MultiLineString: dfout.at[i, column_title] = list(seg[0].coords) for j in range(1, len(seg)): new_row = row.copy() new_row[column_title] = list(seg[j].coords) dfout = dfout.append(new_row).reset_index(drop=True) else: raise ValueError( "Segment is not of either expected type: MultiLinestring, " "LineString" ) return dfout def _convert_linestyle(self, ls): """helper function for get_lines_by_attributes. converts linestyle to a tuple of (offset, onoffseq) to get hashable datatypes Parameters ---------- ls: linesytle from a LineCollection retreived by get_linestyle() Returns ------- tuple containing (offset, onoffseq) of linestyle """ onoffseq = ls[1] if onoffseq: onoffseq = tuple(ls[1]) return (ls[0], onoffseq)
[docs] def get_lines(self): """Returns a dataframe with all lines on ax Returns ------- output: DataFrame with column 'lines'. Each row represents one line segment. Its value in 'lines' is a list of tuples representing the line segment. """ lines = [ [tuple(coords) for coords in seg] for c in self.ax.collections if type(c) == matplotlib.collections.LineCollection for seg in c.get_segments() ] return pd.DataFrame({"lines": lines})
[docs] def get_lines_by_collection(self): """Returns a sorted list of list where each list contains line segments from the same collections Returns ------- sorted list where each list represents all lines from the same collection """ lines_grouped = [ [[tuple(coords) for coords in seg] for seg in c.get_segments()] for c in self.ax.collections if type(c) == matplotlib.collections.LineCollection ] return sorted([sorted(lines) for lines in lines_grouped])
[docs] def get_lines_by_attributes(self): """Returns a sorted list of lists where each list contains line segments of the same attributes: color, linewidth, and linestyle Returns ------ sorted list where each list represents all lines with the same attributes """ lines_dataframe = pd.DataFrame( columns=["seg", "color", "lwidth", "lstyle"] ) for c in ( coll for coll in self.ax.collections if type(coll) == matplotlib.collections.LineCollection ): segs = [[tuple(coords) for coords in s] for s in c.get_segments()] colors, widths, styles = ( [tuple(color) for color in c.get_colors()], c.get_linewidth(), [self._convert_linestyle(ls) for ls in c.get_linestyle()], ) n = len(segs) colors, widths, styles = ( self._convert_length(colors, n), self._convert_length(widths, n), self._convert_length(styles, n), ) lines_dataframe = lines_dataframe.append( pd.DataFrame( { "seg": segs, "color": colors, "lwidth": widths, "lstyle": styles, } ), ignore_index=True, ) lines_grouped = [ [data["seg"][i] for i in data.index] for c, data in lines_dataframe.groupby( ["color", "lwidth", "lstyle"], sort=False ) ] return sorted([sorted(lines) for lines in lines_grouped])
[docs] def assert_lines(self, lines_expected, m="Incorrect Line Data"): """Asserts the line data in Axes ax is equal to lines_expected with error message m. If line_expected is None or an empty list, assertion is passed Parameters ---------- lines_expected: Geopandas Dataframe with a geometry column consisting of MultilineString and LineString objects m: string error message if assertion is not met """ if type(lines_expected) == gpd.geodataframe.GeoDataFrame: lines_expected = lines_expected[ ~lines_expected["geometry"].is_empty ].reset_index(drop=True) fig, ax_exp = plt.subplots() lines_expected.plot(ax=ax_exp) lines_exp = VectorTester(ax=ax_exp).get_lines() plt.close(fig) np.testing.assert_equal( sorted(self.get_lines().lines), sorted(lines_exp.lines), m ) elif not lines_expected: pass else: raise ValueError( "lines_expected is not expected type: GeoDataFrame" )
[docs] def assert_lines_grouped_by_type( self, lines_expected, sort_column, m="Line attributes not accurate by type", ): """Asserts that the lines on Axes ax display like attributes based on their type with error message m attributes tested are: color, linewidth, linestyle Parameters ---------- lines_expected: Geopandas Dataframe with geometry column consisting of MultiLineString and LineString objects sort_column: string of column title in lines_expected that contains types lines are expected to be grouped by m: string error message if assertion is not met """ if type(lines_expected) == gpd.geodataframe.GeoDataFrame: groups = self.get_lines_by_attributes() lines_expected = lines_expected[ ~lines_expected["geometry"].is_empty ].reset_index(drop=True) fig, ax_exp = plt.subplots() for typ, data in lines_expected.groupby(sort_column): data.plot(ax=ax_exp) grouped_exp = [ [[tuple(coords) for coords in seg] for seg in c.get_segments()] for c in ax_exp.collections if type(c) == matplotlib.collections.LineCollection ] grouped_exp = sorted([sorted(lines) for lines in grouped_exp]) plt.close(fig) np.testing.assert_equal(groups, grouped_exp, m) elif lines_expected is None: pass else: raise ValueError( "lines_expected is not of expected type: GeoDataFrame" )
""" Check Polygons """
[docs] def get_polygons(self): """Returns all polygons on Axes ax as a sorted list of polygons where each polygon is a list of coord tuples Returns ------- output: sorted list of polygons. Each polygon is a list tuples. Each tuple is a coordinate. """ output = [ [tuple(coords) for coords in path.vertices] for c in self.ax.collections if type(c) == matplotlib.collections.PatchCollection for path in c.get_paths() ] return sorted(output)
def _convert_multipolygons(self, series): """Helper function for assert_polygons converts a pandas series of Polygon and MultiPolygon objects to a list of lines, where each line is a list of coord tuples for the exterior Parameters ---------- series: series where each entry is a Polygon or MultiPolygon Returns ------- list of lines where each line is a list of coord tuples for the exterior polygon """ output = [] for entry in series: if type(entry) == shapely.geometry.multipolygon.MultiPolygon: for poly in entry: output += [list(poly.exterior.coords)] if type(entry) == shapely.geometry.polygon.Polygon: output += [list(entry.exterior.coords)] return output
[docs] def assert_polygons( self, polygons_expected, dec=None, m="Incorrect Polygon Data" ): """Asserts the polygon data in Axes ax is equal to polygons_expected to decimal place dec with error message m If polygons_expected is am empty list or None, assertion is passed. Parameters ---------- polygons_expected : List or GeoDataFrame List of polygons expected to be founds on Axes ax or a GeoDataFrame containing the expected polygons. dec : int (Optional) Int stating the desired decimal precision. If None, polygons must be exact. m : string (default = "Incorrect Polygon Data") String error message if assertion is not met. """ if len(polygons_expected) != 0: if isinstance(polygons_expected, list): if len(polygons_expected[0]) == 0: raise ValueError( "Empty list or GeoDataFrame passed into assert_" "polygons." ) if isinstance(polygons_expected, gpd.geodataframe.GeoDataFrame): polygons_expected = self._convert_multipolygons( polygons_expected["geometry"] ) polygons = self.get_polygons() if dec: assert len(polygons_expected) == len(polygons), m polygons_expected = sorted(polygons_expected) for i in range(len(polygons)): np.testing.assert_almost_equal( polygons[i], polygons_expected[i], decimal=dec, err_msg=m, ) else: np.testing.assert_equal(polygons, sorted(polygons_expected), m) else: raise ValueError( "Empty list or GeoDataFrame passed into assert_polygons." )