Source code for matplotcheck.base

"""
matplotcheck.base
=================

Base plot checking class and methods that should apply to all plots
whether they are spatial or not.

"""

import numpy as np
import matplotlib
from matplotlib.backend_bases import RendererBase
import math
from scipy import stats
import pandas as pd
import numbers
import geopandas as gpd


[docs]class InvalidPlotError(Exception):
    pass


[docs]class PlotTester(object):
    """
    Object to grab elements from Matplotlib plots
    Temporarily removing parameters and returns as it's breaking sphinx

    Parameters
    ----------
    axis : mpl axis object

    """

    def __init__(self, ax):
        """Initialize TestPlot object"""
        self.ax = ax

    def _is_line(self):
        """Boolean expressing if ax contains scatter points.
        If plot contains scatter points and lines return True.

        Returns
        -------
        is_line : boolean
            True if Axes ax is a line plot, False if not
        """

        if self.ax.lines:
            for line in self.ax.lines:
                if (
                    not line.get_linestyle()
                    or not line.get_linewidth()
                    or line.get_linewidth() > 0
                ):
                    return True

    def _is_scatter(self):
        """Boolean expressing if ax contains scatter points.
        If plot contains scatter points as well as lines, functions will return
        true.

        Returns
        -------
        is_scatter : boolean
            True if Axes ax is a scatter plot, False if not
        """
        if self.ax.collections:
            return True
        elif self.ax.lines:
            for line in self.ax.lines:
                if (
                    line.get_linestyle() == "None"
                    or line.get_linewidth() == "None"
                    or line.get_linewidth() == 0
                ):
                    return True
        return False

[docs]    def assert_string_contains(
        self,
        string,
        strings_expected,
        message_default="String does not contain expected string: {0}",
        message_or="String does not contain at least one of: {0}",
    ):
        """Asserts that `string` contains the expected strings from
        `strings_expected`.

        Parameters
        ----------
        strings_expected : list
            Any string in `strings_expected` must be in the title for the
            assertion to pass. If there is a list of strings in
            `strings_expected`, at least one of the strings in that list must
            be in the title for the assertion to pass. For example, if
            ``strings_expected=['a', 'b', 'c']``, then ``'a'`` AND ``'b'`` AND
            ``'c'`` must be in the title for the assertion to pass.
            Alternatively, if ``strings_expected=['a', 'b', ['c', 'd']]``, then
            ``'a'`` AND ``'b'`` AND (at least one of: ``'c'``, ``'d'``) must be
            in the title for the assertion to pass. Case insensitive.
        message_default : string
            The error message to be displayed if the `string` does not contain
            a string in strings_expected. If `message` contains ``'{0}'``, it
            will be replaced with the first expected string not found in the
            label.
        message_or : string
            Similar to `message_default`, `message_or` is the error message to
            be displated if `string` does not contain at least one of
            the strings in an inner list in `strings_expected`. If `message`
            contains ``'{0}'``, it will be replaced with the first failing
            inner list in `strings_expected`.

        Raises
        -------
        AssertionError
            if `string` does not contain expected strings
        """
        # Assertion passes if strings_expected == [] or
        # strings_expected == None
        if not strings_expected:
            return

        string = string.lower().replace(" ", "")

        if isinstance(strings_expected, str):
            strings_expected = [strings_expected]

        for check in strings_expected:
            if isinstance(check, str):
                if not check.lower().replace(" ", "") in string:
                    raise AssertionError(message_default.format(check))
            elif isinstance(check, list):
                if not any(
                    [c.lower().replace(" ", "") in string for c in check]
                ):
                    if len(check) == 1:
                        raise AssertionError(message_default.format(check[0]))
                    else:
                        raise AssertionError(message_or.format(check))
            else:
                raise ValueError(
                    "str_lst must be a list of: lists or strings."
                )

[docs]    def assert_plot_type(
        self, plot_type=None, message="Plot is not of type {0}"
    ):
        """Asserts Axes `ax` contains the type of plot specified in `plot_type`.
        if `plot_type` is ``None``, assertion is passed.

        Parameters
        ----------
        plot_type: string
            String specifying the expected plot type. Options:
            `scatter`, `bar`, `line`
        message : string
            The error message to be displayed if Plot does not match
            `plot_type`. If `message` contains ``'{0}'``, it will be replaced
            with the epected plot type.

        Raises
        -------
        AssertionError
            if Plot does not match `plot_type`
        """
        if plot_type:
            if plot_type == "scatter":
                assert self._is_scatter(), message.format(plot_type)
            elif plot_type == "bar":
                assert self.ax.patches, message.format(plot_type)
            elif plot_type == "line":
                assert self._is_line(), message.format(plot_type)
            else:
                raise ValueError(
                    "Plot_type to test must be either: scatter, bar or line"
                )

    """ TITLES TESTS/HELPER FUNCTIONS """

[docs]    def get_titles(self):
        """Returns the suptitle (Figure title) and axes title of `ax`.

        Returns
        -------
        suptitle : string
            Figure title of the Figure that the `ax` object is on. If figure
            title is ``None``, this is an empty string.
        title : string
            Title on the axes. If title is ``None``, this is an empty string.
        """
        fig, suptitle = self.ax.get_figure(), ""
        if fig._suptitle:
            suptitle += fig._suptitle.get_text()
        return suptitle, self.ax.get_title()

[docs]    def assert_title_contains(
        self,
        strings_expected,
        title_type="either",
        message_default="Title does not contain expected string: {0}",
        message_or="Title does not contain at least one of: {0}",
        message_no_title="Expected title is not displayed",
    ):
        """Asserts that title defined by `title_type` contains the expected
        strings from `strings_expected`.

        Parameters
        ----------
        strings_expected : list
            Any string in `strings_expected` must be in the title for the
            assertion to pass. If there is a list of strings in
            `strings_expected`, at least one of the strings in that list must
            be in the title for the assertion to pass. For example, if
            ``strings_expected=['a', 'b', 'c']``, then ``'a'`` AND ``'b'`` AND
            ``'c'`` must be in the title for the assertion to pass.
            Alternatively, if ``strings_expected=['a', 'b', ['c', 'd']]``, then
            ``'a'`` AND ``'b'`` AND (at least one of: ``'c'``, ``'d'``) must be
            in the title for the assertion to pass. Case insensitive.
        title_type : string
            One of the following strings ["figure", "axes", "either"]
            `figure`: only the figure title (suptitle) will be tested
            'axes': only the axes title (suptitle) will be tested
            'either': either the figure title or axes title will pass this
            assertion.
            The combined title will be tested.
        message_default : string
            The error message to be displayed if the axis label does not
            contain a string in strings_expected. If `message` contains
            ``'{0}'``, it will be replaced with the first expected string not
            found in the label.
        message_or : string
            Similar to `message_default`, `message_or` is the error message to
            be displated if the axis label does not contain at least one of
            the strings in an inner list in `strings_expected`. If `message`
            contains ``'{0}'``, it will be replaced with the first failing
            inner list in `strings_expected`.
        message_no_title : string
            The error message to be displayed if the expected title is not
            displayed.

        Raises
        -------
        AssertionError
            if title does not contain expected strings
        """
        suptitle, axtitle = self.get_titles()
        if title_type == "either":
            title = axtitle + suptitle
        elif title_type == "figure":
            title = suptitle
        elif title_type == "axes":
            title = axtitle
        else:
            raise ValueError(
                "title_type must be one of the following "
                + '["figure", "axes", "either"]'
            )

        assert title, message_no_title

        self.assert_string_contains(
            title,
            strings_expected,
            message_default=message_default,
            message_or=message_or,
        )

    """CAPTION TEST/HELPER FUNCTIONS """

[docs]    def get_caption(self):
        """Returns the text that is located in the bottom right, just below the
        right side of ax
        If no text is found in location, ``None`` is returned.

        Returns
        -------
        caption : string
            the text that is found in bottom right, ``None`` if no text is
            found
        """
        caption = None
        ax_position = self.ax.get_position()
        for tex in self.ax.get_figure().texts:
            tex_position = tex.get_position()
            if (
                ax_position.ymin - 0.1 < tex_position[1] < ax_position.ymin
            ) and (
                ax_position.xmax - 0.5 < tex_position[0] < ax_position.xmax
            ):
                caption = tex
                break
        if isinstance(caption, matplotlib.text.Text):
            caption = caption.get_text()
        return caption

[docs]    def assert_caption_contains(
        self,
        strings_expected,
        message_default="Caption does not contain expected string: {0}",
        message_or="Caption does not contain at least one of: {0}",
        message_no_caption="No caption exists in appropriate location",
    ):
        """
        Asserts that caption contains expected strings from `strings_expected`.

        Parameters
        ----------
        strings_expected : list
            Any string in `strings_expected` must be in the title for the
            assertion to pass. If there is a list of strings in
            `strings_expected`, at least one of the strings in that list must
            be in the title for the assertion to pass. For example, if
            ``strings_expected=['a', 'b', 'c']``, then ``'a'`` AND ``'b'`` AND
            ``'c'`` must be in the title for the assertion to pass.
            Alternatively, if ``strings_expected=['a', 'b', ['c', 'd']]``, then
            ``'a'`` AND ``'b'`` AND (at least one of: ``'c'``, ``'d'``) must be
            in the title for the assertion to pass. Case insensitive.
        message_default : string
            The error message to be displayed if the axis label does not
            contain a string in strings_expected. If `message` contains
            ``'{0}'``, it will be replaced with the first expected string
            not found in the label.
        message_or : string
            Similar to `message_default`, `message_or` is the error message to
            be displated if the axis label does not contain at least one of
            the strings in an inner list in `strings_expected`. If `message`
            contains ``'{0}'``, it will be replaced with the first failing
            inner list in `strings_expected`.
        message_no_caption : string
            The error message to be displayed if no caption exists in the
            appropriate location.

        Raises
        -------
        AssertionError
            if caption does not contain strings matching `strings_expected`
        """
        caption = self.get_caption()
        if strings_expected is None:
            return

        assert caption, message_no_caption

        self.assert_string_contains(
            caption,
            strings_expected,
            message_default=message_default,
            message_or=message_or,
        )

    """ AXIS TEST/HELPER FUNCTIONS """

[docs]    def assert_axis_off(self, message="Axis lines are displayed on plot"):
        """Asserts one of the three cases holds true with error message m:
        1) axis have been turned off
        2) both x and y axis have visibility set to false
        3) both x and y axis ticks have been set to empty lists

        Parameters
        ----------
        message : string
            The error message to be displayed if the assertion is not met.

        Raises
        ----------
        AssertionError
            with message `m` if axis lines are displayed on plot
        """
        flag = False
        # Case 1: check if axis have been turned off
        if not self.ax.axison:
            flag = True
        # Case 2: Check if both axis visibilities set to false
        elif not self.ax.xaxis._visible and not self.ax.yaxis._visible:
            flag = True
        # Case 3: Check if both axis ticks are set to empty lists
        elif (
            self.ax.xaxis.get_gridlines() == []
            and self.ax.yaxis.get_gridlines() == []
        ):
            flag = True

        assert flag, message

[docs]    def assert_axis_label_contains(
        self,
        axis="x",
        strings_expected=None,
        message_default="{1}-axis label does not contain expected string: {0}",
        message_or="{1}-axis label does not contain at least one of: {0}",
        message_not_displayed="Expected {0} axis label is not displayed",
    ):
        """
        Asserts that the axis label contains the expected strings from
        `strings_expected`. Tests x or y axis based on 'axis' param.

        Parameters
        ----------
        axis : string
            One of the following ['x','y'] stated which axis label to be tested
        strings_expected : list
            Any string in `strings_expected` must be in the axis label for the
            assertion to pass. If there is a list of strings in
            `strings_expected`, at least one of the strings in that list must
            be in the axis label for the assertion to pass. For example, if
            ``strings_expected=['a', 'b', 'c']``, then ``'a'`` AND ``'b'`` AND
            ``'c'`` must be in the title for the assertion to pass.
            Alternatively, if ``strings_expected=['a', 'b', ['c', 'd']]``, then
            ``'a'`` AND ``'b'`` AND (at least one of: ``'c'``, ``'d'``) must be
            in the title for the assertion to pass. Case insensitive.
        message_default : string
            The error message to be displayed if the axis label does not
            contain a string in strings_expected. If `message` contains
            ``'{1}'``, it will be replaced with `axis`. If `message` contains
            ``'{0}'``, it will be replaced with the first expected string not
            found in the label.
        message_or : string
            Similar to `message_default`, `message_or` is the error message to
            be displated if the axis label does not contain at least one of
            the strings in an inner list in `strings_expected`. If `message`
            contains ``'{1}'``, it will be replaced with `axis`. If `message`
            contains ``'{0}'``, it will be replaced with the first failing
            inner list in `strings_expected`.
        message_not_displayed : string
            The error message to be displayed if the expected axis label is not
            displayed. If `message_not_displayed` contains ``'{0}'``, it will
            be replaced with `axis`.

        Raises
        ----------
        AssertionError
            if axis label does not contain expected strings
        """
        # Retrieve appropriate axis label, error if axis param is not x or y
        if axis == "x":
            label = self.ax.get_xlabel()
        elif axis == "y":
            label = self.ax.get_ylabel()
        else:
            raise ValueError('axis must be one of the following ["x", "y"]')

        # Check that axis label contains the expected strings in lst
        if strings_expected is None:
            return
        assert label, "Expected {0} axis label is not displayed".format(axis)

        message_default = message_default.replace("{1}", axis)
        message_or = message_or.replace("{1}", axis)
        self.assert_string_contains(
            label,
            strings_expected,
            message_default=message_default,
            message_or=message_or,
        )

[docs]    def assert_lims(
        self,
        lims_expected,
        axis="x",
        message="Incorrect limits on the {0} axis",
    ):
        """Assert the lims of ax match lims_expected. Tests x or y axis based on
        'axis' param

        Parameters
        ---------
        lims_expected : list of numbers (float or int)
            List of length 2 containing expected min and max vals for axis
            limits
        axis : string
            From ['x','y'], which axis to be tested
        message : string
            The error message to be displayed if the limits of ax do not match
            the expected limits. If `message` contains ``'{0}'``, it will be
            replaced with `axis`.

        Raises
        ----------
        AssertionError
            if `lims_expected` does not match the limits of ax
        """
        # Get axis limit values
        if axis == "x":
            lims = [int(xlim) for xlim in self.ax.get_xlim()]
        elif axis == "y":
            lims = [int(ylim) for ylim in self.ax.get_ylim()]
        else:
            raise ValueError(
                "axis must be one of the following string ['x', 'y']"
            )

        # Check retrieved limits against expected min and max values
        assert np.array_equal(lims, lims_expected), message.format(axis)

[docs]    def assert_lims_range(
        self,
        lims_range,
        axis="x",
        message_min="Incorrect min limit on the {0} axis",
        message_max="Incorrect max limit on the {0} axis",
    ):
        """Asserts axis limits fall within lims_range (INCLUSIVE).

        Parameters
        ----------
        lims_range: tuple of tuples.
            if axis == 'x': first tuple is range the left x limit must be in,
            second tuple is the range the right x limit must be in
            if axis == 'y': first tuple is range the bottom y limit must be in,
            second tuple is the range the top x limit must be in
        axis: string
            from list ['x','y'] declaring which axis to be tested
        message_min : string
            The error message to be displayed if the limits of ax do not fall
            within the expected limit minimum. If `message` contains ``'{0}'``,
            it will be replaced with `axis`.
        message_max : string
            The error message to be displayed if the limits of ax do not fall
            within the expected limit maximum. If `message` contains ``'{0}'``,
            it will be replaced with the specified `axis` (i.e. it will be
            replaced with 'x' or 'y').

        Raises
        ----------
        AssertionError
            if axis limits does not fall within `lims_range`
        """
        # Get ax axis limits
        if axis == "x":
            lims = self.ax.get_xlim()
        elif axis == "y":
            lims = self.ax.get_ylim()
        else:
            raise ValueError(
                "axis must be one of the following string ['x', 'y']"
            )
        # Check if the min falls with in lims_range[0]
        assert (
            lims_range[0][0] <= lims[0] <= lims_range[0][1]
        ), message_min.format(axis)
        # Check if the max falls with in lims_range[1]
        assert (
            lims_range[1][0] <= lims[1] <= lims_range[1][1]
        ), message_max.format(axis)

[docs]    def assert_equal_xlims_ylims(
        self, message="xlims and ylims are not equal"
    ):
        """Assert the x and y lims of Axes ax are exactly equal to each other

        Parameters
        ---------
        message : string
            The error message to be displayed if the x limits and y limits are
            equal.

        Raises
        ----------
        AssertionError
            with message `m` if limits are not equal

        """
        xlims = self.ax.get_xlim()
        ylims = self.ax.get_ylim()
        assert np.array_equal(xlims, ylims), message

    """ LEGEND TESTS """

[docs]    def get_legends(self):
        """Retrieve the list of legends on ax

        Returns
        -------
        legends : list
            List of matplotlib.legend.Legend objects
        """
        return self.ax.findobj(match=matplotlib.legend.Legend)

[docs]    def assert_legend_titles(
        self,
        titles_exp,
        message="Legend title does not contain expected string: {0}",
        message_num_titles="I was expecting {0} legend titles but instead "
        + "found {1}",
    ):
        """Asserts legend titles contain expected text in titles_exp list.

        Parameters
        ----------
        titles_exp : list of strings
            Each string is expected be be in one legend title. The number of
            strings is equal to the number of expected legends.
        message : string
            The error message to be displayed if the legend titles do not match
            the expected strings. If `message` contains ``'{0}'``,
            it will be replaced with the first expected string that does not
            exist in the legend title.
        message_num_titles : string
            The error message to be displayed if there exist a different number
            of legend titles than expected. If `message_num_titles` contains
            ``'{0}'`` it will be replaced with the number of titles found. If
            `message_num_titles` contains ``'{1}'`` it will be replaced with
            the expected number of titles.

        Raises
        -------
        AssertionError
            if legend titles do not contain expected text
        """
        legends = self.get_legends()

        # Test number of legends - edge case when a student might have two
        # legends rather than 2

        num_legends = len(legends)
        num_exp_legends = len(titles_exp)

        assert num_legends == num_exp_legends, message_num_titles.format(
            num_legends, num_exp_legends
        )

        # Check that each expected legend title is in a legend title in ax
        titles = [leg.get_title().get_text().lower() for leg in legends]

        for title_exp in titles_exp:
            assert any(title_exp.lower() in s for s in titles), message.format(
                title_exp
            )

[docs]    def assert_legend_labels(
        self,
        labels_exp,
        message="Legend does not have expected labels",
        message_no_legend="Legend does not exist",
        message_num_labels="I was expecting {0} legend entries, but found "
        + "{1}. Are there extra labels in your legend?",
    ):
        """Asserts legends on ax have the correct entry labels

        Parameters
        ----------
        labels_exp : list of strings
            Each string is an expected legend entry label. Checks that
            the legend entry labels match exactly (except for case).
        message : string
            The error message to be displayed if the expected labels are not
            found.
        message_no_legend : string
            The error message to be displayed if no legend is found.
        message_num_labels: string
            The error message to be displayed if there exist a different number
            of legend labels than expected. If `message_num_labels` contains
            ``'{0}'`` it will be replaced with the number of labels found. If
            `message_num_labels` contains ``'{1}'`` it will be replaced with
            the expected number of labels.


        Raises
        -------
        AssertionError
            if legend labeles do not match `labels_exp`

        Notes
        -----
        If there are multiple legends, it combines all the legend labels into
        one set and checks that set against the list labels_exp
        """
        legends = self.get_legends()
        assert legends, message_no_legend

        # Lowercase both the expected and actual legend labels
        legend_texts = [
            t.get_text().lower() for leg in legends for t in leg.get_texts()
        ]
        labels_exp = [label.lower() for label in labels_exp]

        num_exp_labs = len(labels_exp)
        num_actual_labs = len(legend_texts)
        assert num_actual_labs == num_exp_labs, message_num_labels.format(
            num_exp_labs, num_actual_labs
        )
        assert set(legend_texts) == set(labels_exp), message

[docs]    def assert_legend_no_overlay_content(
        self, message="Legend overlays plot window"
    ):
        """Asserts that each legend does not overlay plot window

        Parameters
        ----------
        message : string
            The error message to be displayed if the legend overlays the plot
            window.

        Raises
        -------
        AssertionError
            with message `m` if legend does not overlay plot window
        """
        # RendererBase() is needed to get extent, otherwise raises an error
        plot_extent = self.ax.get_window_extent(RendererBase()).get_points()
        legends = self.get_legends()
        for leg in legends:
            # RendererBase() is needed to get extent, otherwise raises error
            leg_extent = leg.get_window_extent(RendererBase()).get_points()
            legend_left = leg_extent[1][0] < plot_extent[0][0]
            legend_right = leg_extent[0][0] > plot_extent[1][0]
            legend_below = leg_extent[1][1] < plot_extent[0][1]
            assert legend_left or legend_right or legend_below, message

[docs]    def legends_overlap(self, b1, b2):
        """Helper function for assert_no_legend_overlap.
        True if points of window extents for b1 and b2 overlap, False otherwise

        Parameters
        ----------
        b1 : list of lists
            2x2 array containg numbers, bounding box of window extents
        b2 : list of lists
            2x2 array containg numbers, bounding box of window extents

        Returns
        -------
        overlap : boolean
            True if bounding boxes b1 and b2 overlap
        """
        x_overlap = (b1[0][0] <= b2[1][0] and b1[0][0] >= b2[0][0]) or (
            b1[1][0] <= b2[1][0] and b1[1][0] >= b2[0][0]
        )
        y_overlap = (b1[0][1] <= b2[1][1] and b1[0][1] >= b2[0][1]) or (
            b1[1][1] <= b2[1][1] and b1[1][1] >= b2[0][1]
        )
        return x_overlap and y_overlap

[docs]    def assert_no_legend_overlap(self, message="Legends overlap eachother"):
        """When multiple legends on ax, asserts that there are no two legends
        in ax that overlap each other

        Parameters
        ----------
        message : string
            The error message to be displayed if two legends overlap.

        Raises
        -------
        AssertionError
            with message `m` if legends overlap
        """
        legends = self.get_legends()
        n = len(legends)
        for i in range(n - 1):
            # Get extent of first legend in check, RendererBase() avoids error
            leg_extent1 = (
                legends[i].get_window_extent(RendererBase()).get_points()
            )
            for j in range(i + 1, n):
                # Get extent of second legend in check
                leg_extent2 = (
                    legends[j].get_window_extent(RendererBase()).get_points()
                )
                assert not self.legends_overlap(
                    leg_extent1, leg_extent2
                ), message

    """ BASIC PLOT DATA FUNCTIONS """

[docs]    def get_xy(self, points_only=False):
        """Returns a pandas dataframe with columns "x" and "y" holding the x
        and y coords on Axes `ax`

        Parameters
        ----------
        ax : matplotlib.axes.Axes
            Matplotlib Axes object to be tested
        points_only : boolean
            Set ``True`` to check only points, set ``False`` to check all data
            on plot.

        Returns
        -------
        df : pandas.DataFrame
            Pandas dataframe with columns "x" and "y" containing the x and y
            coords of each point on Axes `ax`
        """
        if points_only:
            xy_coords = [
                val
                for line in self.ax.lines
                if (
                    line.get_linestyle() == "None"
                    or line.get_linewidth() == "None"
                )
                for val in line.get_xydata()
            ]  # .plot()
            xy_coords += [
                val
                for c in self.ax.collections
                if type(c) != matplotlib.collections.PolyCollection
                for val in c.get_offsets()
            ]  # .scatter()

        else:
            xy_coords = [
                val for line in self.ax.lines for val in line.get_xydata()
            ]  # .plot()
            xy_coords += [
                val for c in self.ax.collections for val in c.get_offsets()
            ]  # .scatter()
            xy_coords += [
                [(p.get_x() + (p.get_width() / 2)), p.get_height()]
                for p in self.ax.patches
            ]  # .bar()

        xy_data = pd.DataFrame(data=xy_coords, columns=["x", "y"]).dropna()

        # crop to limits
        lims = self.ax.get_xlim()
        xy_data = xy_data[xy_data["x"] >= lims[0]]
        xy_data = xy_data[xy_data["x"] <= lims[1]].reset_index(drop=True)

        return xy_data

[docs]    def assert_xydata(
        self,
        xy_expected,
        xcol=None,
        ycol=None,
        points_only=False,
        xlabels=False,
        tolerance=0,
        message="Incorrect data values",
    ):
        """Asserts that the x and y data of Axes `ax` matches `xy_expected`
        with error message `message`. If ``xy_expected = None``,
        assertion is passed.

        Parameters
        ----------
        xy_expected : pandas or geopandas dataframe
            (Required) DataFrame contains data expected to be on the plot
            (axis object)
        xcol : string
            (Required for non geopandas objects) Title of column in
            `xy_expected` containing values along `x_axis`.
            If `xy_expected` contains this data in 'geometry', set to ``None``.
        ycol : String
            (Required for non geopandas objects) The y column name of
            xy_expected which represents values along the`y_axis` in a plot.
            If `xy_expected` contains this data in 'geometry' set to ``None``.
        points_only : boolean,
            Set ``True`` to check only points, set ``False`` tp check all data
            on plot.
        xlabels : boolean
            Set ``True`` if using x axis labels rather than x data. Instead of
            comparing numbers in the x-column to expected, compares numbers or
            text in x labels to expected.
        tolerance : float
            A non-zero value of tol_rel allows an absolute tolerance when
            checking the data. For example, a tolerance of 0.1 would
            check that the actual data is within 0.1 units of the actual data.
            Note that the units for datetime data is always days.
        message : string
            The error message to be displayed if the xy-data does not match
            `xy_expected`


        Raises
        -------
        AssertionError
            with message `message`, if x and y data of Axes `ax` does not match
            `xy_expected`
        """
        if xy_expected is None:
            return
        elif not isinstance(xy_expected, pd.DataFrame):
            raise ValueError(
                "xy_expected must be of type: pandas dataframe or Geopandas "
                + "Dataframe"
            )

        # If xy_expected is a GeoDataFrame, then we make is a normal DataFrame
        # with the coordinates of the geometry in that GeoDataFrame as the x
        # and y data
        if isinstance(xy_expected, gpd.geodataframe.GeoDataFrame) and not xcol:
            xy_expected = pd.DataFrame(
                data={
                    "x": [p.x for p in xy_expected.geometry],
                    "y": [p.y for p in xy_expected.geometry],
                }
            ).dropna()
            xcol, ycol = "x", "y"

        if xlabels:
            self.assert_xlabel_ydata(
                xy_expected, xcol=xcol, ycol=ycol, message=message
            )
            return
        xy_data = self.get_xy(points_only=points_only)

        # Make sure the data are sorted the same
        xy_data, xy_expected = (
            xy_data.sort_values(by="x"),
            xy_expected.sort_values(by=xcol),
        )

        if tolerance > 0:
            np.testing.assert_allclose(
                xy_data["x"],
                xy_expected[xcol],
                atol=tolerance,
                err_msg=message,
            )
            np.testing.assert_allclose(
                xy_data["y"],
                xy_expected[ycol],
                atol=tolerance,
                err_msg=message,
            )

        else:
            """We use `assert_array_max_ulp()` to compare the
            two datasets because it is able to account for small errors in
            floating point numbers, and it scales nicely between extremely
            small or large numbers. Because of the way that matplotlib stores
            datetime data, this is essential for comparing high-precision
            datetime data (i.e. millisecond or lower).

            We catch this error and raise our own that is more relevant to
            the assertion being run."""
            try:
                np.testing.assert_array_max_ulp(
                    xy_data["x"].to_numpy(dtype=np.float64),
                    xy_expected[xcol].to_numpy(dtype=np.float64),
                    5,
                )
            except AssertionError:
                # xy_data and xy_expected do not contain the same data
                raise AssertionError(message)
            except ValueError:
                # xy_data and xy_expected do not have the same shape
                raise ValueError(
                    "xy_data and xy_expected do not have the same shape"
                )
            try:
                np.testing.assert_array_max_ulp(
                    xy_data["y"].to_numpy(dtype=np.float64),
                    xy_expected[ycol].to_numpy(dtype=np.float64),
                    5,
                )

            except AssertionError:
                # xy_data and xy_expected do not contain the same data
                raise AssertionError(message)
            except ValueError:
                # xy_data and xy_expected do not have the same shape
                raise ValueError(
                    "xy_data and xy_expected do not have the same shape"
                )

[docs]    def assert_xlabel_ydata(
        self, xy_expected, xcol, ycol, message="Incorrect Data"
    ):
        """Asserts that the numbers in x labels and y values in Axes `ax` match
        `xy_expected`.

        Parameters
        ----------
        xy_expected : pandas.DataFrame
            Pandas DataFrame that contains data
        xcol : string
            Column title containing xaxis data
        ycol : string
            Column title containing yaxis data
        message : string
            The error message to be displayed if data in the x-labels and
            y-values do not match `xy_expected`.

        Raises
        -------
        AssertionError
            with message `m` if legends overlap

        Notes
        -----
        This is only testing the numbers in x-axis labels.
        """
        x_data = [
            "".join(c for c in label.get_text())
            for label in self.ax.xaxis.get_majorticklabels()
        ]
        y_data = self.get_xy()["y"]
        xy_data = pd.DataFrame(data={"x": x_data, "y": y_data})

        # If we expect x-values to be numbers
        if all([isinstance(i, numbers.Number) for i in xy_expected[xcol]]):
            x_is_numeric = True
            try:
                x_data_numeric = [float(i) for i in xy_data["x"]]
            except ValueError:
                raise AssertionError(message)
            else:
                xy_data["x"] = x_data_numeric

        # If we expect x-values to be strings
        else:
            # If we expect x-values to be numeric strings
            if all([s.isnumeric() for s in xy_expected[xcol]]):
                # We attempt to convert numeric strings to numbers
                try:
                    x_expected = [float(s) for s in xy_expected[xcol]]
                    x_data = [float(s) for s in xy_data["x"]]
                except ValueError:
                    x_is_numeric = False
                else:
                    x_is_numeric = True
                    xy_expected[xcol] = x_expected
                    xy_data["x"] = x_data
            # We expect x-values to be non-numeric strings
            else:
                x_is_numeric = False

        # Testing x-data
        if x_is_numeric:
            try:
                np.testing.assert_array_max_ulp(
                    np.array(xy_data["x"]),
                    np.array(xy_expected[xcol]),
                )
            except AssertionError:
                raise AssertionError(message)
        else:
            np.testing.assert_equal(
                np.array(xy_data["x"]), np.array(xy_expected[xcol]), message
            )

        # Testing y-data
        try:
            np.testing.assert_array_max_ulp(
                np.array(xy_data["y"]), np.array(xy_expected[ycol])
            )
        except AssertionError:
            raise AssertionError(message)

    # LINE TESTS/HELPER FUNCTIONS

[docs]    def get_slope_yintercept(self, path_verts):
        """Returns the y-intercept of line based on the average slope of the
        line

        Parameters
        ----------
        path_verts : list
            List of verticies that make a line on Axes `ax`

        Returns
        -------
        slope : float
            The average slope of the line defined by `path_verts`
        y_intercept : float
            The y intercept of the line defined by `path_verts`
        """
        slopes = [
            (path_verts[i + 1, 1] - path_verts[i, 1])
            / (path_verts[i + 1, 0] - path_verts[i, 0])
            for i in range(len(path_verts) - 1)
        ]
        slope = sum(slopes) / len(slopes)
        return slope, path_verts[0, 1] - (path_verts[0, 0] * slope)

[docs]    def assert_line(
        self,
        slope_exp,
        intercept_exp,
        check_coverage=True,
        message_no_line="Expected line not displayed",
        message_data="Line does not cover data set",
    ):
        """Asserts that there exists a line on Axes `ax` with slope `slope_exp`
        and y-intercept `intercept_exp` and

        Parameters
        ----------
        slope_exp : float
            Expected slope of line
        intercept_exp : float
            Expeted y intercept of line
        check_coverage : boolean (default = True)
            If `check_coverage` is `True`, function will check that the goes at
            least from x coordinate `min_val` to x coordinate `max_val`. If the
            line does not cover the entire dataset, and `AssertionError` with
            be thrown with message `message_data`.
        message_no_line : string
            The error message to be displayed if the line does not exist.
        message_data : string
            The error message to be displayed if the line exists but does not
            cover the dataset, and if `check_coverage` is `True`.

        Raises
        -------
        AssertionError
            with message `message_no_line` or `message_data` if no line exists
            that covers the dataset.
        """
        flag_exist = False

        if check_coverage:
            flag_length = False
            xy = self.get_xy(points_only=True)
            min_val, max_val = min(xy["x"]), max(xy["x"])

        for line in self.ax.lines:
            # Here we will get the verticies for the line and reformat them in

            # the way that get_slope_yintercept() expects
            data = line.get_data()
            path_verts = np.column_stack((data[0], data[1]))

            slope, y_intercept = self.get_slope_yintercept(path_verts)
            if math.isclose(slope, slope_exp, abs_tol=1e-4) and math.isclose(
                y_intercept, intercept_exp, abs_tol=1e-4
            ):
                flag_exist = True
                line_x_vals = [coord[0] for coord in path_verts]

                # This check ensures that the minimum and maximum values of the
                # line are within or very close to the minimum and maximum
                # values in the pandas dataframe provided. This accounts for
                # small errors sometimes found in matplotlib plots.
                if check_coverage:
                    if (
                        math.isclose(min(line_x_vals), min_val, abs_tol=1e-4)
                        or min(line_x_vals) <= min_val
                    ) and (
                        math.isclose(max(line_x_vals), max_val, abs_tol=1e-4)
                        or max(line_x_vals) >= max_val
                    ):
                        flag_length = True
                        break

        assert flag_exist, message_no_line
        if check_coverage:
            assert flag_length, message_data

[docs]    def assert_lines_of_type(self, line_types, check_coverage=True):
        """Asserts each line of type in `line_types` exist on `ax`

        Parameters
        ----------
        line_types : string or list of strings
            Acceptable strings in line_types are as follows
            ``['linear-regression', 'onetoone']``.
        check_coverage : boolean (default = True)
            If `check_coverage` is `True`, function will check that the goes at
            least from x coordinate `min_val` to x coordinate `max_val`. If the
            line does not cover the entire dataset, and `AssertionError` with
            be thrown with message `message_data`.

        Raises
        -------
        AssertionError
            if at least one line of type in `line_types` does not exist on `ax`

        Notes
        -----
            If `line_types` is empty, assertion is passed.
        """
        if isinstance(line_types, str):
            line_types = [line_types]

        for line_type in line_types:
            if line_type == "linear-regression":
                xy = self.get_xy(points_only=True)
                # Check that there is xy data for this line. Some one-to-one
                # lines do not produce xy data.
                if xy.empty:
                    raise AssertionError(
                        "linear-regression line not displayed properly"
                    )
                slope_exp, intercept_exp, _, _, _ = stats.linregress(
                    xy.x, xy.y
                )
            elif line_type == "onetoone":
                slope_exp, intercept_exp = 1, 0
            else:
                raise ValueError(
                    "each string in line_types must be from the following "
                    + '["linear-regression","onetoone"]'
                )

            self.assert_line(
                slope_exp,
                intercept_exp,
                message_no_line="{0} line not displayed properly".format(
                    line_type
                ),
                message_data="{0} line does not cover dataset".format(
                    line_type
                ),
                check_coverage=check_coverage,
            )

    # HISTOGRAM FUNCTIONS

[docs]    def get_num_bins(self):
        """Gets the number of bins in histogram with a unique x-position.

        Returns
        -------
        Int :
            Returns the number of bins with a unique x-position. For a normal
            histogram, this is just the number of bins. If there are two
            overlapping or stacked histograms in the same
            `matplotlib.axis.Axis` object, then this returns the number of bins
            with unique edges."""
        x_data = self.get_xy()["x"]
        unique_x_data = list(set(x_data))
        num_bins = len(unique_x_data)

        return num_bins

[docs]    def assert_num_bins(
        self,
        num_bins,
        message="Expected {0} bins in histogram, instead found {1}.",
    ):
        """Asserts number of bins is `num_bins`.

        Parameters
        ----------
        num_bins : int
            Number of bins expected.
        message : string
            The error message to be displayed if plot does not contain
            `num_bins`. If `message` contains ``'{0}'`` it will be replaced
            with expected number of bins. If `message` contains ``'{1}'``, it
            will be replaced with the number of bins found.

        Raises
        -------
        AssertionError
            if plot does not contain the expected number of bins
        """

        num_bins_found = self.get_num_bins()

        assert num_bins == num_bins_found, message.format(
            num_bins, num_bins_found
        )

[docs]    def get_bin_values(self):
        """Returns the value of each bin in a histogram (i.e. the height of each
        bar in a histogram.)

        Returns
        -------
        Int :
            The number of bins in the histogram"""

        bin_values = self.get_xy()["y"].tolist()

        return bin_values

[docs]    def get_bin_midpoints(self):
        """Returns the mid point value of each bin in a histogram

        Returns
        -------
        Int :
            The number of bins in the histogram"""

        bin_midpoints = self.get_xy()["x"].tolist()

        return bin_midpoints

[docs]    def assert_bin_values(
        self,
        bin_values,
        tolerance=0,
        message="Did not find expected bin values in plot",
    ):
        """Asserts that the values of histogram bins match `bin_values`.

        Parameters
        ----------
        bin_values : list
            A list of numbers representing the expected values of each
            consecutive bin (i.e. the heights of the bars in the histogram).
        tolerance : float
            A non-zero value of tol_abs allows an absolute tolerance when
            checking the bin values. For example, an absolute tolerance of 1
            checks that the actual bin values do not differ from the expected
            bin values by more than 1.
        message : string
            The error message to be displayed if the bin values do not match
            `bin_values`

        Raises
        ------
        AssertionError
            if the Values of histogram bins do not match `bin_values`


        Notes
        -----
            `bin_values` can be difficult to know. The easiest way to obtain
            them may be to create a histogram with your expected data, create a
            `PlotTester` object, and use ``get_bin_values()``.
            ``get_bin_values()`` will return exactly the type of list required
            for `bin_values` in this method.
        """
        expected_bin_values = bin_values
        plot_bin_values = self.get_bin_values()

        if tolerance > 0:
            try:
                np.testing.assert_allclose(
                    plot_bin_values,
                    expected_bin_values,
                    atol=tolerance,
                    err_msg=message,
                )
            except AssertionError:
                raise AssertionError(message)
        else:
            """We use `assert_array_max_ulp()` to compare the
            two datasets because it is able to account for small errors in
            floating point numbers, and it scales nicely between extremely
            small or large numbers. We catch this error and throw our own so
            that we can use our own message."""
            try:
                np.testing.assert_array_max_ulp(
                    np.array(plot_bin_values), np.array(expected_bin_values)
                )
            except AssertionError:
                raise AssertionError(message)

[docs]    def assert_bin_midpoints(
        self,
        bin_midpoints,
        message="Did not find expected bin midpoints in plot",
    ):
        """
        Asserts that the middle values of histogram bins match `bin_midpoints`.

        Parameters
        ----------
        bin_midpoints : list
            A list of numbers representing the expected middles of bin values
            covered by each consecutive bin (i.e. the midpoint of the bars in
            the histogram).
        message : string
            The error message to be displayed if the bin mid point values do
            not match `bin_midpoints`

        Raises
        ------
        AssertionError
            if the Values of histogram bins do not match `bin_midpoints`
        """

        plot_bin_midpoints = self.get_bin_midpoints()

        if not isinstance(bin_midpoints, list):
            raise ValueError(
                "Need to submit a list for expected bin midpoints."
            )

        if len(plot_bin_midpoints) != len(bin_midpoints):
            raise ValueError("Bin midpoints lists lengths do no match.")

        try:
            np.testing.assert_array_max_ulp(
                np.array(plot_bin_midpoints), np.array(bin_midpoints)
            )
        except AssertionError:
            raise AssertionError(message)