Series

import pandas as pd

df = pd.DataFrame({
    "Date": [
        "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05",
        "2015-05-08", "2015-05-07", "2015-05-06", "2015-05-05"],
    "Data": [5, 8, 6, 1, 50, 100, 60, 120],
})

def makechat(row):
    row['chat'] = f"{row.Date} is the date"
    return row

df.transform(makechat, axis=1)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[5], line 1
----> 1 df.transform(makechat, axis=1)

File ~/opt/miniconda3/envs/ThinkBayes2/lib/python3.11/site-packages/pandas/core/frame.py:10139, in DataFrame.transform(self, func, axis, *args, **kwargs)
  10136 from pandas.core.apply import frame_apply
  10138 op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
> 10139 result = op.transform()
  10140 assert isinstance(result, DataFrame)
  10141 return result

File ~/opt/miniconda3/envs/ThinkBayes2/lib/python3.11/site-packages/pandas/core/apply.py:229, in Apply.transform(self)
    227 if obj._get_axis_number(axis) == 1:
    228     assert not is_series
--> 229     return obj.T.transform(func, 0, *args, **kwargs).T
    231 if is_list_like(func) and not is_dict_like(func):
    232     func = cast(list[AggFuncTypeBase], func)

File ~/opt/miniconda3/envs/ThinkBayes2/lib/python3.11/site-packages/pandas/core/frame.py:10139, in DataFrame.transform(self, func, axis, *args, **kwargs)
  10136 from pandas.core.apply import frame_apply
  10138 op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs)
> 10139 result = op.transform()
  10140 assert isinstance(result, DataFrame)
  10141 return result

File ~/opt/miniconda3/envs/ThinkBayes2/lib/python3.11/site-packages/pandas/core/apply.py:267, in Apply.transform(self)
    260 # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
    261 # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
    262 # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
    263 # Series]"
    264 if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
    265     obj.index  # type: ignore[arg-type]
    266 ):
--> 267     raise ValueError("Function did not transform")
    269 return result

ValueError: Function did not transform

Can be considered as a type of dictionary, or a generalised 1D NumPy array.

data = pd.Series([0.25, 0.5, 0.375, 1.0], index = [2,5,3,7])

data[5]

0.5

Series(data, index). data can be a scalar (which is then repeated as many times as there are indices), a list or numpy array, in which case the index defaults to an integer (0-based) sequence, or a dictionary, in which case index defaults to the dictionary keys.

pd.Series([2,4,6])

0    2
1    4
2    6
dtype: int64

pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

s = pd.Series({2:'a', 1:'b', 3:'c'})

s[1]

'b'

In each case the index can be explitly set to control the order, or the subset of keys used. eg here we don’t use everything in the dictionary and we specify the order.

pd.Series({2:'a', 1:'b', 3:'c'}, index=[1,2])

1    b
2    a
dtype: object

DataFrames

Can be considered as either a specialization of a dictionary or a generalization of a NumPy array, or as a sequence of aligned Series. ‘aligned’ means sharing the same index.

population_dict = {'California': 39538223, 'Texas': 29145505,
                            'Florida': 21538187, 'New York': 20201249,
                            'Pennsylvania': 13002700}
population = pd.Series(population_dict)
population

California      39538223
Texas           29145505
Florida         21538187
New York        20201249
Pennsylvania    13002700
dtype: int64

area_dict = {'California': 423967, 'Texas': 695662, 'Florida': 170312,
                      'New York': 141297, 'Pennsylvania': 119280}
area = pd.Series(area_dict)
area

California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
dtype: int64

states = pd.DataFrame({'population': population, 'area': area})
states

	population	area
California	39538223	423967
Texas	29145505	695662
Florida	21538187	170312
New York	20201249	141297
Pennsylvania	13002700	119280

Dataframe can be thought of as a generalisation of a 2D NumPy array, where both the rows and columns have a generalised index for accessing the data.

states.columns

Index(['population', 'area'], dtype='object')

states.index

Index(['California', 'Texas', 'Florida', 'New York', 'Pennsylvania'], dtype='object')

states['area']

California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
Name: area, dtype: int64

For a 2d NumPy array, data[0] will return the first row. For a DataFrame, data['col0'] will return the first column

# from a list of dicts
data = [{'a': i, 'b': 2*i} for i in range(3)]
pd.DataFrame(data)

	a	b
0	0	0
1	1	2
2	2	4