Skip to main content

Pandas-stubs — how we enhanced pandas with type annotations

Picture of Joanna Sendorek, null

Joanna Sendorek

Picture of Zbigniew Królikowski, null

Zbigniew Królikowski

Aug 26, 2021|6 min read
Pandas-stubs — how we enhanced pandas with type annotations
1from spark.sql import DataFrame
2import pandas as pd
3
4spark_df: DataFrame = ...
5pands_df: pd.DataFrame = ...
1from typing import List, Any
2
3def flatten_lists(list_of_lists: List[List[Any]]) -> List[Any]:
4 return [inner for outer in list_of_lists for inner in outer]
1class Series(base.IndexOpsMixin, generic.NDFrame):
2 def __init__(self, data: Optional[Any] = ..., index: Optional[Any] = ..., dtype: Optional[Dtype] = ..., name: Optional[Any] = ..., copy: bool = ..., fastpath: bool = ...) -> None: ...
3 def __len__(self) -> int: ...
4 @overload
5 def __getitem__(self, key: Label) -> Any: ...
6 @overload
7 def __getitem__(self, key: slice) -> DataFrame: ...
8 def __setitem__(self, key: Label, value: Scalar) -> None:
9 # and so on...
Image Alt

1CoercibleIntoDataFrame = Union[Dict[str, Scalar], Dict[str, Series],
2 Dict[str, Tuple[Scalar, ...]],
3 Dict[str, Iterable[Scalar]]]
1setup(
2 name="pandas-stubs",
3 package_dir={"": src_path},
4 version=version,
5 description="Type annotations for Pandas",
6 long_description=(open("README.md").read()
7 if os.path.exists("README.md") else ""),
8 long_description_content_type='text/markdown',
9 url="https://github.com/VirtusLab/pandas-stubs",
10 packages=list(list_packages()),
11 package_data={"": ["*.pyi", "py.typed"]},
1def test_types_set_index() -> None:
2 df = pd.DataFrame(data={'col1': [1, 2, 3, 4], 'col2': ['a', 'b', 'c', 'd']},
3 index=[5, 1, 3, 2])
4 res: pd.DataFrame = df.set_index('col1')
5 res2: pd.DataFrame = df.set_index('col1', drop=False)
6 res3: pd.DataFrame = df.set_index('col1', append=True)
7 res4: pd.DataFrame = df.set_index('col1', verify_integrity=True)
8 res5: pd.DataFrame = df.set_index(['col1', 'col2'])
9 res6: None = df.set_index('col1', inplace=True)
1 pd.concat({5: pd.DataFrame([1, 2, 3]), "b": pd.DataFrame([4, 5, 6]),
2 "c": pd.Series(["a", 2])},
3 axis=1, ignore_index=2)
11: error: Dict entry 0 has incompatible type "int": "DataFrame"; expected "DataFrame": "NDFrame"
21: error: Dict entry 1 has incompatible type "str": "DataFrame"; expected "DataFrame": "NDFrame"
31: error: Dict entry 2 has incompatible type "str": "Series"; expected "DataFrame": "NDFrame"
4
1pd.DataFrame.from_dict(data={'col_1': {'a': 1}, 2: {3: 1, 'b': 's2'}}, orient=2, columns={1, 2})
2
3error: Argument "orient" to "from_dict" of "DataFrame" has incompatible type "Literal[2]";
4 expected "Union[Literal['index'], Literal['columns']]"
5error: Argument "columns" to "from_dict" of "DataFrame" has incompatible type "Set[int]";
6 expected "Optional[Sequence[str]]"
7
Image Alt

Image Alt

Image Alt

1setup(
2 name="openai",
3 description="Python client library for the OpenAI API,
4 version=version_contents["VERSION",
5 install_requires=[
6 "requests=2.20", # to get the patch for CVE-2018-18074
7 "tqdm", # Needed for progress bars
8 "pandas>=1.2.3", # Needed for CLI fine-tuning data preparation tool
9 "pandas-stubs>=1.1.0.11", Needed for type hints for mypy
10 "openpyxl>=3.0.7:, # Needed for CLI fine-tuning data preparation tool xlsx format
11],
Image Alt

Subscribe to our newsletter and never miss an article