Module music_df.crop_df
Functions
def crop_df(music_df: pandas.DataFrame,
start_i: typing.Any | None = None,
start_time: float | None = None,
end_i: typing.Any | None = None,
end_time: float | None = None,
infer_barlines_if_no_barlines_found: bool = True) ‑> pandas.DataFrame-
Expand source code
def crop_df( music_df: pd.DataFrame, start_i: Any | None = None, start_time: float | None = None, end_i: Any | None = None, end_time: float | None = None, infer_barlines_if_no_barlines_found: bool = True, ) -> pd.DataFrame: """ Crop `music_df`, handling time signatures and barlines gracefully. Included in the returned dataframe will be the barline and time signature that precede the first note. The barline will have its onset preserved, while the time signature will be moved to align with the barline if it is not already. Args: music_df: dataframe. Note that we assume that the dataframe is sorted (see music_df.sort_df) start_i: index of the first row to include in the cropped dataframe. It is assumed that this index points to a note. At most one of `start_i` or `start_time` should be provided. start_time: onset time of the first note to include in the cropped dataframe. At most one of `start_i` or `start_time` should be provided. end_i: index of the last row to include in the cropped dataframe. It is assumed that this index points to a note. Note that end_i is inclusive (like label-based indexing in Pandas). At most one of `end_i` or `end_time` should be provided. end_time: not implemented. infer_barlines_if_no_barlines_found: if True, infer barlines if no barlines are found. # Examples: First define an example dataframe with time signature, note, and bar events: >>> music_df = pd.DataFrame( ... { ... "onset": [0.0, 0, 1, 2, 4, 5, 6, 8, 8, 9, 10, 11, 12, 13], ... "release": [ ... None, ... 4.0, ... 4.0, ... 3.0, ... 8.0, ... 8, ... 7, ... None, ... 11, ... 11, ... 10.5, ... 14, ... 14, ... 13.5, ... ], ... "type": ["time_signature", "bar", "note", "note", "bar", "note", "note"] ... * 2, ... } ... ) >>> music_df onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 2 1.0 4.0 note 3 2.0 3.0 note 4 4.0 8.0 bar 5 5.0 8.0 note 6 6.0 7.0 note 7 8.0 NaN time_signature 8 8.0 11.0 bar 9 9.0 11.0 note 10 10.0 10.5 note 11 11.0 14.0 bar 12 12.0 14.0 note 13 13.0 13.5 note >>> crop_df(music_df, start_time=2.0) # doctest: +ELLIPSIS onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 3 2.0 3.0 note 4 ... >>> crop_df(music_df, start_time=4) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 0 4.0 NaN time_signature 4 4.0 8.0 bar 5 5.0 8.0 note 6 ... >>> crop_df(music_df, start_time=10.9) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 7 11.0 NaN time_signature 11 11.0 14.0 bar 12 12.0 14.0 note 13 ... >>> crop_df(music_df, start_i=12) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 7 11.0 NaN time_signature 11 11.0 14.0 bar 12 12.0 14.0 note 13 13.0 13.5 note Note that end_i is inclusive (like label-based indexing in Pandas): >>> crop_df(music_df, end_i=3) # doctest: +NORMALIZE_WHITESPACE onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 2 1.0 4.0 note 3 2.0 3.0 note end_time is not yet implemented (releases being unsorted makes it a wee bit complicated and I don't yet need it) >>> crop_df(music_df, start_time=6, end_time=10.5) Traceback (most recent call last): NotImplementedError >>> crop_df(music_df, start_time=6, end_i=10) # doctest: +NORMALIZE_WHITESPACE onset release type 0 4.0 NaN time_signature 4 4.0 8.0 bar 6 6.0 7.0 note 7 8.0 NaN time_signature 8 8.0 11.0 bar 9 9.0 11.0 note 10 10.0 10.5 note """ if all(x is None for x in (start_i, start_time, end_i, end_time)): LOGGER.warning("Nothing to crop, returning dataframe unchanged") return music_df if start_i is not None and start_time is not None: raise ValueError if end_i is not None and end_time is not None: raise ValueError if start_time is not None: notes_df = music_df[music_df.type == "note"] start_loc_i = get_idx_to_item_geq(notes_df.onset.values, start_time) start_i = notes_df.index[start_loc_i] if start_i is not None: start_i = int(start_i) # TODO: (Malcolm 2023-09-29) what to do in case of no time signature or bar? prev_time_sig = last_time_signature_before(music_df, start_i) if infer_barlines_if_no_barlines_found and "bar" not in music_df["type"].values: music_df = infer_barlines(music_df, keep_old_index=True) start_i = music_df[music_df["index"] == start_i].index[0] if end_i is not None: end_i = music_df[music_df["index"] == end_i].index[0] music_df = music_df.drop("index", axis=1) prev_bar = last_bar_before(music_df, start_i) if prev_time_sig.onset != prev_bar.onset: prev_time_sig = prev_time_sig.copy() prev_time_sig["onset"] = prev_bar.onset # Concatenate prev_time_sig series, prev_bar series, and music_df dataframe: music_df = pd.concat( [ prev_time_sig.to_frame().T.astype(music_df.dtypes), prev_bar.to_frame().T.astype(music_df.dtypes), music_df.loc[start_i:], ] ) if end_time is not None: raise NotImplementedError # We can not use the efficient search because releases may not be in sorted # order. # However: the below will potentially include releases that go beyond the end # time # Get last value of df.release <= end_time notes_df = music_df[music_df.type == "note"] end_i = notes_df.release[notes_df.release <= end_time].index[-1] if end_i is not None: end_i = int(end_i) music_df = music_df.loc[:end_i] return music_dfCrop
music_df, handling time signatures and barlines gracefully.Included in the returned dataframe will be the barline and time signature that precede the first note. The barline will have its onset preserved, while the time signature will be moved to align with the barline if it is not already.
Args
music_df- dataframe. Note that we assume that the dataframe is sorted (see music_df.sort_df)
start_i- index of the first row to include in the cropped dataframe. It is
assumed that this index points to a note. At most one of
start_iorstart_timeshould be provided. start_time- onset time of the first note to include in the cropped dataframe.
At most one of
start_iorstart_timeshould be provided. end_i- index of the last row to include in the cropped dataframe. It is
assumed that this index points to a note. Note that end_i is inclusive (like
label-based indexing in Pandas). At most one of
end_iorend_timeshould be provided. end_time- not implemented.
infer_barlines_if_no_barlines_found- if True, infer barlines if no barlines are found.
Examples:
First define an example dataframe with time signature, note, and bar events:
>>> music_df = pd.DataFrame( ... { ... "onset": [0.0, 0, 1, 2, 4, 5, 6, 8, 8, 9, 10, 11, 12, 13], ... "release": [ ... None, ... 4.0, ... 4.0, ... 3.0, ... 8.0, ... 8, ... 7, ... None, ... 11, ... 11, ... 10.5, ... 14, ... 14, ... 13.5, ... ], ... "type": ["time_signature", "bar", "note", "note", "bar", "note", "note"] ... * 2, ... } ... ) >>> music_df onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 2 1.0 4.0 note 3 2.0 3.0 note 4 4.0 8.0 bar 5 5.0 8.0 note 6 6.0 7.0 note 7 8.0 NaN time_signature 8 8.0 11.0 bar 9 9.0 11.0 note 10 10.0 10.5 note 11 11.0 14.0 bar 12 12.0 14.0 note 13 13.0 13.5 note>>> crop_df(music_df, start_time=2.0) # doctest: +ELLIPSIS onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 3 2.0 3.0 note 4 ...>>> crop_df(music_df, start_time=4) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 0 4.0 NaN time_signature 4 4.0 8.0 bar 5 5.0 8.0 note 6 ... >>> crop_df(music_df, start_time=10.9) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 7 11.0 NaN time_signature 11 11.0 14.0 bar 12 12.0 14.0 note 13 ... >>> crop_df(music_df, start_i=12) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE onset release type 7 11.0 NaN time_signature 11 11.0 14.0 bar 12 12.0 14.0 note 13 13.0 13.5 noteNote that end_i is inclusive (like label-based indexing in Pandas):
>>> crop_df(music_df, end_i=3) # doctest: +NORMALIZE_WHITESPACE onset release type 0 0.0 NaN time_signature 1 0.0 4.0 bar 2 1.0 4.0 note 3 2.0 3.0 noteend_time is not yet implemented (releases being unsorted makes it a wee bit complicated and I don't yet need it)
>>> crop_df(music_df, start_time=6, end_time=10.5) Traceback (most recent call last): NotImplementedError>>> crop_df(music_df, start_time=6, end_i=10) # doctest: +NORMALIZE_WHITESPACE onset release type 0 4.0 NaN time_signature 4 4.0 8.0 bar 6 6.0 7.0 note 7 8.0 NaN time_signature 8 8.0 11.0 bar 9 9.0 11.0 note 10 10.0 10.5 note def last_bar_before(music_df: pandas.DataFrame, i: Any) ‑> pandas.Series-
Expand source code
def last_bar_before(music_df: pd.DataFrame, i: Any) -> pd.Series: return _last_item_before(music_df, i, "bar") def last_time_signature_before(music_df: pandas.DataFrame, i: Any) ‑> pandas.Series-
Expand source code
def last_time_signature_before(music_df: pd.DataFrame, i: Any) -> pd.Series: """ Behavior is undefined if `i` is not in `music_df.index`. >>> music_df = pd.DataFrame( ... { ... "onset": [0, 4, 8, 12], ... "type": ["time_signature"] * 4, ... }, ... index=[str(x) for x in range(4)], ... ) >>> last_time_signature_before(music_df, "2") onset 4 type time_signature Name: 1, dtype: object >>> last_time_signature_before(music_df, "0") Traceback (most recent call last): ValueError: No time_signature before index 0 """ return _last_item_before(music_df, i, "time_signature") # # find the last row in music_df.loc[:i] with type time_signature: # # We need to use the `iloc[:-1]` because label-based indexing is inclusive in Pandas # df_subset = music_df.loc[:i].iloc[:-1] # try: # time_sig_i = df_subset[df_subset.type == "time_signature"].index[-1] # except IndexError: # raise ValueError(f"No time signature before index {i}") # return music_df.loc[time_sig_i]Behavior is undefined if
iis not inmusic_df.index.>>> music_df = pd.DataFrame( ... { ... "onset": [0, 4, 8, 12], ... "type": ["time_signature"] * 4, ... }, ... index=[str(x) for x in range(4)], ... ) >>> last_time_signature_before(music_df, "2") onset 4 type time_signature Name: 1, dtype: object >>> last_time_signature_before(music_df, "0") Traceback (most recent call last): ValueError: No time_signature before index 0