Module music_df.chord_df
Functions
def add_chord_pcs(chord_df: pandas.DataFrame,
inplace: bool = False,
rn_pc_cache: CacheDict[tuple[str, str], list[int] | str] | None = None) ‑> pandas.DataFrame-
Expand source code
def add_chord_pcs( chord_df: pd.DataFrame, inplace: bool = False, rn_pc_cache: CacheDict[tuple[str, str], list[int] | str] | None = None, ) -> pd.DataFrame: """ Adds a column 'chord_pcs' to the chord_df with the pcs of each chord. >>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... key,rn ... b,i ... ,V ... f#,iv ... ,i ... ,V/V ... ''' ... ) ... ) >>> add_chord_pcs(chord_df) key rn chord_pcs 0 b i b26 1 b V 6a1 2 f# iv b26 3 f# i 691 4 f# V/V 803 >>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... key,rn ... b,Im ... ,VM ... f#,IVm ... ,Im ... ,VM/V ... ''' ... ) ... ) >>> add_chord_pcs( ... chord_df, ... rn_pc_cache=get_rn_pc_cache(rn_format="rnbert", hex_str=True), ... ) key rn chord_pcs 0 b Im b26 1 b VM 6a1 2 f# IVm b26 3 f# Im 691 4 f# VM/V 803 """ if not inplace: chord_df = chord_df.copy() chord_df["key"] = chord_df["key"].ffill() if rn_pc_cache is None: rn_pc_cache = get_rn_pc_cache(case_matters=True, hex_str=True) chord_df["chord_pcs"] = chord_df.apply( lambda row: rn_pc_cache[row["rn"], row["key"]], axis=1, ) return chord_dfAdds a column 'chord_pcs' to the chord_df with the pcs of each chord.
>>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... key,rn ... b,i ... ,V ... f#,iv ... ,i ... ,V/V ... ''' ... ) ... ) >>> add_chord_pcs(chord_df) key rn chord_pcs 0 b i b26 1 b V 6a1 2 f# iv b26 3 f# i 691 4 f# V/V 803>>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... key,rn ... b,Im ... ,VM ... f#,IVm ... ,Im ... ,VM/V ... ''' ... ) ... ) >>> add_chord_pcs( ... chord_df, ... rn_pc_cache=get_rn_pc_cache(rn_format="rnbert", hex_str=True), ... ) key rn chord_pcs 0 b Im b26 1 b VM 6a1 2 f# IVm b26 3 f# Im 691 4 f# VM/V 803 def add_key_pcs(key_df: pandas.DataFrame,
inplace: bool = False,
key_pc_cache: CacheDict[str, list[int] | str] | None = None) ‑> pandas.DataFrame-
Expand source code
def add_key_pcs( key_df: pd.DataFrame, inplace: bool = False, key_pc_cache: CacheDict[str, list[int] | str] | None = None, ) -> pd.DataFrame: """ Adds a column 'key_pcs' to the df with the pcs of each key. >>> key_df = pd.read_csv( ... io.StringIO( ... ''' ... key ... b ... NaN ... f# ... ''' ... ) ... ) >>> add_key_pcs(key_df) key key_pcs 0 b b12467a 1 b b12467a 2 f# 689b125 """ if not inplace: key_df = key_df.copy() key_df["key"] = key_df["key"].ffill() if key_pc_cache is None: key_pc_cache = get_key_pc_cache(hex_str=True) key_df["key_pcs"] = key_df["key"].apply(lambda key: key_pc_cache[key]) return key_dfAdds a column 'key_pcs' to the df with the pcs of each key.
>>> key_df = pd.read_csv( ... io.StringIO( ... ''' ... key ... b ... NaN ... f# ... ''' ... ) ... ) >>> add_key_pcs(key_df) key key_pcs 0 b b12467a 1 b b12467a 2 f# 689b125 def assert_valid_chord_df(chord_df: pandas.DataFrame, **kwargs) ‑> None-
Expand source code
def assert_valid_chord_df(chord_df: pd.DataFrame, **kwargs) -> None: """ Validate a chord_df and raise ValueError if invalid. Parameters ---------- chord_df : pd.DataFrame The DataFrame to validate. **kwargs Arguments passed to validate_chord_df. Raises ------ ValueError If the chord_df is invalid. Examples -------- >>> df = pd.DataFrame( ... { ... "onset": [0.0, 1.0], ... "key": ["C", "G"], ... "degree": ["I", "V"], ... "quality": ["M", "M"], ... "inversion": [0, 1], ... } ... ) >>> assert_valid_chord_df(df) # No error >>> df_bad = pd.DataFrame( ... { ... "onset": [0.0], ... "key": ["X"], ... "degree": ["I"], ... "quality": ["M"], ... "inversion": [0], ... } ... ) >>> assert_valid_chord_df(df_bad) Traceback (most recent call last): ... ValueError: chord_df validation failed: - Invalid key value(s): ['X'] at row(s) [0] """ result = validate_chord_df(chord_df, **kwargs) result.raise_if_invalid()Validate a chord_df and raise ValueError if invalid.
Parameters
chord_df:pd.DataFrame- The DataFrame to validate.
**kwargs- Arguments passed to validate_chord_df.
Raises
ValueError- If the chord_df is invalid.
Examples
>>> df = pd.DataFrame( ... { ... "onset": [0.0, 1.0], ... "key": ["C", "G"], ... "degree": ["I", "V"], ... "quality": ["M", "M"], ... "inversion": [0, 1], ... } ... ) >>> assert_valid_chord_df(df) # No error>>> df_bad = pd.DataFrame( ... { ... "onset": [0.0], ... "key": ["X"], ... "degree": ["I"], ... "quality": ["M"], ... "inversion": [0], ... } ... ) >>> assert_valid_chord_df(df_bad) Traceback (most recent call last): ... ValueError: chord_df validation failed: - Invalid key value(s): ['X'] at row(s) [0] def drop_harmony_columns(df: pandas.DataFrame) ‑> pandas.DataFrame-
Expand source code
def drop_harmony_columns(df: pd.DataFrame) -> pd.DataFrame: """ If, for example, we are re-labeling a score, we want to drop all columns that may have chord annotations to be sure they don't leak through. """ patterns = [ "key", "degree", "mode", "rn", "chord", "harmony", "bass", "alteration", "inversion", "quality", "root", ] harmony_cols = [ c for c in df.columns if any(pattern in c.lower() for pattern in patterns) ] # print(f"Removing columns: {harmony_cols}") df = df.drop(columns=harmony_cols) # print(f"Remaining columns: {df.columns}") return dfIf, for example, we are re-labeling a score, we want to drop all columns that may have chord annotations to be sure they don't leak through.
def extract_chord_df_from_music_df(music_df: pandas.DataFrame,
null_chord_token: str = 'na',
columns: Iterable[str] = ('key', 'onset', 'degree', 'quality', 'inversion'),
release_col: str = 'release') ‑> pandas.DataFrame-
Expand source code
def extract_chord_df_from_music_df( music_df: pd.DataFrame, null_chord_token: str = "na", columns: Iterable[str] = ("key", "onset", "degree", "quality", "inversion"), release_col: str = "release", ) -> pd.DataFrame: """ >>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,key,onset,release,degree,quality,inversion ... bar,,,0.0,4.0,,, ... note,60,C,0.0,1.0,I,M,0.0 ... note,64,C,1.0,2.0,I,M,0.0 ... note,62,C,2.0,3.0,V,M,1.0 ... note,67,C,3.0,4.0,V,M,0.0 ... note,66,G,4.0,6.0,V,M,0.0 ... ''' ... ) ... ) >>> extract_chord_df_from_music_df(music_df) key onset degree quality inversion release 0 C 0.0 I M 0.0 2.0 1 C 2.0 V M 1.0 3.0 2 C 3.0 V M 0.0 4.0 3 G 4.0 V M 0.0 6.0 """ columns = list(columns) assert all(col in music_df.columns for col in columns), ( f"music_df must have the following columns: {columns}" ) chord_change_masks = [ music_df[col] != music_df[col].shift(1) for col in columns if col != "onset" ] chord_change_mask = np.logical_or.reduce(chord_change_masks) chord_df = music_df.loc[chord_change_mask, columns].copy() chord_df = chord_df.loc[ (chord_df[columns[0]] != null_chord_token) & (~chord_df[columns[0]].isna()) ] chord_df = chord_df.reset_index(drop=True) if release_col in music_df.columns: chord_df[release_col] = chord_df["onset"].shift(-1) # We use .max() rather than .iloc[-1] because the last item in music_df may # not have a release time, e.g. if it is a bar. chord_df.loc[len(chord_df) - 1, release_col] = music_df[release_col].max() return chord_df>>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,key,onset,release,degree,quality,inversion ... bar,,,0.0,4.0,,, ... note,60,C,0.0,1.0,I,M,0.0 ... note,64,C,1.0,2.0,I,M,0.0 ... note,62,C,2.0,3.0,V,M,1.0 ... note,67,C,3.0,4.0,V,M,0.0 ... note,66,G,4.0,6.0,V,M,0.0 ... ''' ... ) ... ) >>> extract_chord_df_from_music_df(music_df) key onset degree quality inversion release 0 C 0.0 I M 0.0 2.0 1 C 2.0 V M 1.0 3.0 2 C 3.0 V M 0.0 4.0 3 G 4.0 V M 0.0 6.0 def extract_key_df_from_music_df(music_df_or_chord_df: pandas.DataFrame,
null_key_token: str = 'na',
columns: Iterable[str] = ('key', 'onset'),
release_col: str = 'release') ‑> pandas.DataFrame-
Expand source code
def extract_key_df_from_music_df( music_df_or_chord_df: pd.DataFrame, null_key_token: str = "na", columns: Iterable[str] = ("key", "onset"), release_col: str = "release", ) -> pd.DataFrame: """ This is really just a simple wrapper for extract_chord_df_from_music_df with different default parameters. >>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,key,onset,release,degree,quality,inversion ... bar,,,0.0,4.0,,, ... note,60,C,0.0,1.0,I,M,0.0 ... note,64,C,1.0,2.0,I,M,0.0 ... note,62,C,2.0,3.0,V,M,1.0 ... note,67,C,3.0,4.0,V,M,0.0 ... note,66,G,4.0,6.0,V,M,0.0 ... note,65,F,8.0,10.0,V,M,0.0 ... ''' ... ) ... ) >>> extract_key_df_from_music_df(music_df) key onset release 0 C 0.0 4.0 1 G 4.0 8.0 2 F 8.0 10.0 It should work on chord_df as well as music_df: >>> chord_df = extract_chord_df_from_music_df(music_df) >>> extract_key_df_from_music_df(chord_df) key onset release 0 C 0.0 4.0 1 G 4.0 8.0 2 F 8.0 10.0 """ return extract_chord_df_from_music_df( music_df_or_chord_df, null_chord_token=null_key_token, columns=columns, release_col=release_col, )This is really just a simple wrapper for extract_chord_df_from_music_df with different default parameters.
>>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,key,onset,release,degree,quality,inversion ... bar,,,0.0,4.0,,, ... note,60,C,0.0,1.0,I,M,0.0 ... note,64,C,1.0,2.0,I,M,0.0 ... note,62,C,2.0,3.0,V,M,1.0 ... note,67,C,3.0,4.0,V,M,0.0 ... note,66,G,4.0,6.0,V,M,0.0 ... note,65,F,8.0,10.0,V,M,0.0 ... ''' ... ) ... ) >>> extract_key_df_from_music_df(music_df) key onset release 0 C 0.0 4.0 1 G 4.0 8.0 2 F 8.0 10.0It should work on chord_df as well as music_df:
>>> chord_df = extract_chord_df_from_music_df(music_df) >>> extract_key_df_from_music_df(chord_df) key onset release 0 C 0.0 4.0 1 G 4.0 8.0 2 F 8.0 10.0 def get_quality_for_merging(quality: pandas.Series | str) ‑> pandas.Series | str-
Expand source code
def get_quality_for_merging(quality: pd.Series | str) -> pd.Series | str: # (Malcolm 2024-04-18) possibly we want to do further processing, e.g. # - remove 6 from augmented 6 chords "aug6" quality and otherwise simplify # - only display the quality when it contradicts the expected value for the # scale (this of course would require a lot more coding) if isinstance(quality, str): return quality.replace("7", "") else: return quality.str.replace("7", "") def get_unique_annotations_per_onset(df: pandas.DataFrame,
annotation_col: str,
onset_col: str = 'onset',
pitch_col: str = 'pitch',
fill_value: str = '',
inplace: bool = True) ‑> pandas.DataFrame-
Expand source code
def get_unique_annotations_per_onset( df: pd.DataFrame, annotation_col: str, onset_col: str = "onset", pitch_col: str = "pitch", fill_value: str = "", inplace: bool = True, ) -> pd.DataFrame: """ Rather than using this function, we can just sort the dataframe and then use keep_new_elements_only. This has the virtue of only showing each annotation when it first occurs rather than on every new onset. >>> df = pd.read_csv( ... io.StringIO( ... ''' ... onset,pitch,rn ... 0.0,60,IM ... 0.0,64,IM ... 0.5,62,IM ... 1.0,67,VM6 ... 1.0,59,VM6 ... ''' ... ) ... ) >>> get_unique_annotations_per_onset(df, "rn") # doctest: +NORMALIZE_WHITESPACE onset pitch rn 0 0.0 60 IM 1 0.0 64 2 0.5 62 IM 3 1.0 67 4 1.0 59 VM6 """ if not inplace: df = df.copy() assert annotation_col in df.columns, f"annotation_col {annotation_col} not in df" min_pitch_indices = df.groupby(onset_col)[pitch_col].idxmin() mask = df.index.isin(min_pitch_indices) df.loc[~mask, annotation_col] = fill_value return dfRather than using this function, we can just sort the dataframe and then use keep_new_elements_only. This has the virtue of only showing each annotation when it first occurs rather than on every new onset.
>>> df = pd.read_csv( ... io.StringIO( ... ''' ... onset,pitch,rn ... 0.0,60,IM ... 0.0,64,IM ... 0.5,62,IM ... 1.0,67,VM6 ... 1.0,59,VM6 ... ''' ... ) ... ) >>> get_unique_annotations_per_onset(df, "rn") # doctest: +NORMALIZE_WHITESPACE onset pitch rn 0 0.0 60 IM 1 0.0 64 2 0.5 62 IM 3 1.0 67 4 1.0 59 VM6 def inversion_number_to_figure(inversion_number: int,
quality: str,
triad_inversions_mapping: Mapping[int, str] | None = None,
seventh_chord_inversions_mapping: Mapping[int, str] | None = None) ‑> str-
Expand source code
def inversion_number_to_figure( inversion_number: int, quality: str, triad_inversions_mapping: Mapping[int, str] | None = None, seventh_chord_inversions_mapping: Mapping[int, str] | None = None, ) -> str: """ Convert a number indicating the inversion (0-indexed) to a figured-bass figure. Quality is required to distinguish between triads on the one hand and 7th chords and augmented 6ths on the other. >>> inversion_number_to_figure(0, "M") '' >>> inversion_number_to_figure(1, "m") '6' >>> inversion_number_to_figure(0, "m7") '7' >>> inversion_number_to_figure(3, "Mm7") '42' """ # If the chord is a 7th or augmented 6th, we use 7th chord inversions. (Since # we only have integers to indicate 1st, 2nd inversion etc., we can't distinguish # German and Italian 6th chords.) temp_inversion_number = float(inversion_number) if isnan(temp_inversion_number): return "" inversion_number = int(temp_inversion_number) if "7" in quality or quality == "aug6": if seventh_chord_inversions_mapping is None: seventh_chord_inversions_mapping = SEVENTH_CHORD_INVERSIONS return seventh_chord_inversions_mapping.get(inversion_number, "?") # If the quality is unknown we ignore the inversion elif quality == "x": return "" # Otherwise, assume to be a triad if triad_inversions_mapping is None: triad_inversions_mapping = TRIAD_INVERSIONS return triad_inversions_mapping.get(inversion_number, "?")Convert a number indicating the inversion (0-indexed) to a figured-bass figure.
Quality is required to distinguish between triads on the one hand and 7th chords and augmented 6ths on the other.
>>> inversion_number_to_figure(0, "M") '' >>> inversion_number_to_figure(1, "m") '6' >>> inversion_number_to_figure(0, "m7") '7' >>> inversion_number_to_figure(3, "Mm7") '42' def keep_new_elements_only(series: pandas.Series, fill_element='', ignore_falsy: bool = True)-
Expand source code
def keep_new_elements_only( series: pd.Series, fill_element="", ignore_falsy: bool = True ): """ >>> s = pd.Series(list("aaabbcddde")) >>> keep_new_elements_only(s) # doctest: +NORMALIZE_WHITESPACE 0 a 1 2 3 b 4 5 c 6 d 7 8 9 e dtype: object >>> s = pd.Series([float("nan"), "a", "", "a", float("nan"), "b", ""]) >>> keep_new_elements_only(s) # doctest: +NORMALIZE_WHITESPACE 0 1 a 2 3 4 5 b 6 dtype: object """ out = series.copy() if ignore_falsy: out[~series.astype(bool)] = float("nan") out = out.ffill() mask = (out != out.shift(1)) & (~out.isna()) out[~mask] = fill_element return out>>> s = pd.Series(list("aaabbcddde")) >>> keep_new_elements_only(s) # doctest: +NORMALIZE_WHITESPACE 0 a 1 2 3 b 4 5 c 6 d 7 8 9 e dtype: object>>> s = pd.Series([float("nan"), "a", "", "a", float("nan"), "b", ""]) >>> keep_new_elements_only(s) # doctest: +NORMALIZE_WHITESPACE 0 1 a 2 3 4 5 b 6 dtype: object def label_music_df_with_chord_df(music_df: pandas.DataFrame,
chord_df: pandas.DataFrame,
columns_to_add: Iterable[str] = ('key', 'degree', 'quality', 'inversion'),
null_chord_token: str = 'na') ‑> pandas.DataFrame-
Expand source code
def label_music_df_with_chord_df( music_df: pd.DataFrame, chord_df: pd.DataFrame, columns_to_add: Iterable[str] = ("key", "degree", "quality", "inversion"), null_chord_token: str = "na", ) -> pd.DataFrame: """ >>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,onset,release ... bar,,0.0,4.0 ... note,60,0.0,1.0 ... note,64,1.0,2.0 ... note,62,2.0,3.0 ... note,67,3.0,4.0 ... bar,,4.0,8.0 ... note,66,4.0,6.0 ... note,67,6.0,8.0 ... ''' ... ) ... ) >>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... onset,key,degree,quality,inversion ... 0.0,C,I,M,0.0 ... 3.0,C,V,M,1.0 ... 5.0,G,V,M,0.0 ... 7.0,G,I,M,0.0 ... ''' ... ) ... ) >>> label_music_df_with_chord_df(music_df, chord_df) type pitch onset release key degree quality inversion 0 bar NaN 0.0 4.0 na na na NaN 1 note 60.0 0.0 1.0 C I M 0.0 2 note 64.0 1.0 2.0 C I M 0.0 3 note 62.0 2.0 3.0 C I M 0.0 4 note 67.0 3.0 4.0 C V M 1.0 5 bar NaN 4.0 8.0 na na na NaN 6 note 66.0 4.0 6.0 C V M 1.0 7 note 67.0 6.0 8.0 G V M 0.0 """ out = pd.merge_asof( music_df.drop(columns=[c for c in columns_to_add if c in music_df.columns]), chord_df[["onset"] + list(columns_to_add)], on="onset", direction="backward", ) nonnote_mask = out["type"] != "note" for col in columns_to_add: if out[col].dtype == "object": out.loc[nonnote_mask, col] = null_chord_token else: out.loc[nonnote_mask, col] = float("nan") return out>>> music_df = pd.read_csv( ... io.StringIO( ... ''' ... type,pitch,onset,release ... bar,,0.0,4.0 ... note,60,0.0,1.0 ... note,64,1.0,2.0 ... note,62,2.0,3.0 ... note,67,3.0,4.0 ... bar,,4.0,8.0 ... note,66,4.0,6.0 ... note,67,6.0,8.0 ... ''' ... ) ... ) >>> chord_df = pd.read_csv( ... io.StringIO( ... ''' ... onset,key,degree,quality,inversion ... 0.0,C,I,M,0.0 ... 3.0,C,V,M,1.0 ... 5.0,G,V,M,0.0 ... 7.0,G,I,M,0.0 ... ''' ... ) ... ) >>> label_music_df_with_chord_df(music_df, chord_df) type pitch onset release key degree quality inversion 0 bar NaN 0.0 4.0 na na na NaN 1 note 60.0 0.0 1.0 C I M 0.0 2 note 64.0 1.0 2.0 C I M 0.0 3 note 62.0 2.0 3.0 C I M 0.0 4 note 67.0 3.0 4.0 C V M 1.0 5 bar NaN 4.0 8.0 na na na NaN 6 note 66.0 4.0 6.0 C V M 1.0 7 note 67.0 6.0 8.0 G V M 0.0 def merge_annotations(df: pandas.DataFrame,
degree_col: str = 'degree',
primary_degree_col: str = 'primary_degree',
primary_alteration_col: str = 'primary_alteration',
secondary_degree_col: str = 'secondary_degree',
secondary_alteration_col: str = 'secondary_alteration',
inversion_col: str = 'inversion',
quality_col: str = 'quality',
include_key: bool = True,
key_col: str = 'key') ‑> pandas.Series-
Expand source code
def merge_annotations( df: pd.DataFrame, degree_col: str = "degree", primary_degree_col: str = "primary_degree", primary_alteration_col: str = "primary_alteration", secondary_degree_col: str = "secondary_degree", secondary_alteration_col: str = "secondary_alteration", inversion_col: str = "inversion", quality_col: str = "quality", include_key: bool = True, key_col: str = "key", ) -> pd.Series: """ We rely on the dataframe being sorted to only show new annotations. >>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,key,degree,inversion,quality ... bar,,,, ... note,C,I,0,M ... note,C,V,1,M ... bar,na,na,,na ... note,C,I,0,M ... note,G,V,1,M ... ''' ... ) ... ) >>> merge_annotations(df) # doctest: +NORMALIZE_WHITESPACE 0 1 C.IM 2 VM6 3 4 IM 5 G.VM6 dtype: object Testing that V in C is not the same as V in a. >>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,key,degree,inversion,quality ... bar,,,, ... note,a,V,0,M ... note,C,V,0,M ... ''' ... ) ... ) >>> merge_annotations(df) # doctest: +NORMALIZE_WHITESPACE 0 1 a.VM 2 C.VM dtype: object >>> merge_annotations(df, include_key=False) # doctest: +NORMALIZE_WHITESPACE 0 1 VM 2 VM dtype: object """ df = df.copy() df["inversion_figure"] = df.apply( lambda row: inversion_number_to_figure(row[inversion_col], row[quality_col]), axis=1, ) df["quality_for_merging"] = get_quality_for_merging(df[quality_col]) if degree_col not in df.columns: assert all( col in df.columns for col in [ primary_degree_col, primary_alteration_col, secondary_degree_col, secondary_alteration_col, ] ) else: df = single_degree_to_split_degrees( df, degree_col=degree_col, inplace=True, primary_degree_col=primary_degree_col, primary_alteration_col=primary_alteration_col, secondary_degree_col=secondary_degree_col, secondary_alteration_col=secondary_alteration_col, ) df = split_degrees_to_single_degree( df, inversion_col="inversion_figure", quality_col="quality_for_merging", primary_degree_col=primary_degree_col, primary_alteration_col=primary_alteration_col, secondary_degree_col=secondary_degree_col, secondary_alteration_col=secondary_alteration_col, output_col="rn", inplace=True, ) # I was using ":" as the separator character but it is a special # value in humdrum even when escaped. rn_with_nan = df["rn"].replace("na", float("nan")) df["rn"] = keep_new_elements_only(rn_with_nan) keys = keep_new_elements_only(df[key_col].replace("na", float("nan")) + ".") # When key is shown but rn was filtered out (same as previous), restore rn key_shown = keys.astype(bool) rn_empty = df["rn"] == "" rn_available = ~rn_with_nan.ffill().isna() restore_mask = key_shown & rn_empty & rn_available df.loc[restore_mask, "rn"] = rn_with_nan.ffill()[restore_mask] if include_key: return keys + df["rn"] return pd.Series(df["rn"].values)We rely on the dataframe being sorted to only show new annotations.
>>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,key,degree,inversion,quality ... bar,,,, ... note,C,I,0,M ... note,C,V,1,M ... bar,na,na,,na ... note,C,I,0,M ... note,G,V,1,M ... ''' ... ) ... ) >>> merge_annotations(df) # doctest: +NORMALIZE_WHITESPACE 0 1 C.IM 2 VM6 3 4 IM 5 G.VM6 dtype: objectTesting that V in C is not the same as V in a.
>>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,key,degree,inversion,quality ... bar,,,, ... note,a,V,0,M ... note,C,V,0,M ... ''' ... ) ... ) >>> merge_annotations(df) # doctest: +NORMALIZE_WHITESPACE 0 1 a.VM 2 C.VM dtype: object >>> merge_annotations(df, include_key=False) # doctest: +NORMALIZE_WHITESPACE 0 1 VM 2 VM dtype: object def single_degree_to_split_degrees(df: pandas.DataFrame,
degree_col: str = 'degree',
primary_degree_col: str = 'primary_degree',
primary_alteration_col: str = 'primary_alteration',
secondary_degree_col: str = 'secondary_degree',
secondary_alteration_col: str = 'secondary_alteration',
null_alteration_char: str = '_',
null_chord_token: str = 'na',
inplace: bool = True) ‑> pandas.DataFrame-
Expand source code
def single_degree_to_split_degrees( df: pd.DataFrame, degree_col: str = "degree", primary_degree_col: str = "primary_degree", primary_alteration_col: str = "primary_alteration", secondary_degree_col: str = "secondary_degree", secondary_alteration_col: str = "secondary_alteration", null_alteration_char: str = "_", null_chord_token: str = "na", inplace: bool = True, ) -> pd.DataFrame: """ >>> df = pd.read_csv( ... io.StringIO( ... ''' ... degree ... I ... IV ... V ... I ... ''' ... ) ... ) >>> single_degree_to_split_degrees(df) degree primary_degree primary_alteration secondary_degree secondary_alteration 0 I I _ I _ 1 IV IV _ I _ 2 V V _ I _ 3 I I _ I _ >>> df = pd.read_csv( ... io.StringIO( ... ''' ... degree ... I ... VII/V ... V/bVII ... #VI/bII ... bVI ... na ... ''' ... ) ... ) >>> single_degree_to_split_degrees(df) degree primary_degree primary_alteration secondary_degree secondary_alteration 0 I I _ I _ 1 VII/V VII _ V _ 2 V/bVII V _ VII b 3 #VI/bII VI # II b 4 bVI VI b I _ 5 na na na na na """ if not inplace: df = df.copy() splits = df[degree_col].str.split("/", n=1, expand=True) null_mask = (df[degree_col] == null_chord_token) | (df[degree_col].isna()) primary = ( splits[0] .str.extract(r"([b#]*)(.*)") .rename(columns={0: primary_alteration_col, 1: primary_degree_col}) ) primary[primary_alteration_col] = ( primary[primary_alteration_col] .fillna(null_alteration_char) .replace("", null_alteration_char) ) primary.loc[null_mask, :] = null_chord_token df[primary_degree_col] = primary[primary_degree_col] df[primary_alteration_col] = primary[primary_alteration_col] if splits.shape[1] == 1: # There are no secondary degrees df[secondary_degree_col] = "I" df[secondary_alteration_col] = null_alteration_char else: secondary = ( splits[1] .str.extract(r"([b#]*)(.*)") .rename(columns={0: secondary_alteration_col, 1: secondary_degree_col}) ) secondary[secondary_alteration_col] = ( secondary[secondary_alteration_col] .fillna(null_alteration_char) .replace("", null_alteration_char) ) secondary[secondary_degree_col] = secondary[secondary_degree_col].fillna("I") secondary.loc[null_mask, :] = null_chord_token df[secondary_degree_col] = secondary[secondary_degree_col] df[secondary_alteration_col] = secondary[secondary_alteration_col] return df>>> df = pd.read_csv( ... io.StringIO( ... ''' ... degree ... I ... IV ... V ... I ... ''' ... ) ... ) >>> single_degree_to_split_degrees(df) degree primary_degree primary_alteration secondary_degree secondary_alteration 0 I I _ I _ 1 IV IV _ I _ 2 V V _ I _ 3 I I _ I _>>> df = pd.read_csv( ... io.StringIO( ... ''' ... degree ... I ... VII/V ... V/bVII ... #VI/bII ... bVI ... na ... ''' ... ) ... ) >>> single_degree_to_split_degrees(df) degree primary_degree primary_alteration secondary_degree secondary_alteration 0 I I _ I _ 1 VII/V VII _ V _ 2 V/bVII V _ VII b 3 #VI/bII VI # II b 4 bVI VI b I _ 5 na na na na na def split_degrees_to_single_degree(df: pandas.DataFrame,
primary_degree_col: str = 'primary_degree',
primary_alteration_col: str = 'primary_alteration',
secondary_degree_col: str = 'secondary_degree',
secondary_alteration_col: str = 'secondary_alteration',
inversion_col: str | None = None,
quality_col: str | None = None,
null_alteration_char: str = '_',
output_col: str = 'degree',
null_chord_token: str = 'na',
inplace: bool = True) ‑> pandas.DataFrame-
Expand source code
def split_degrees_to_single_degree( df: pd.DataFrame, primary_degree_col: str = "primary_degree", primary_alteration_col: str = "primary_alteration", secondary_degree_col: str = "secondary_degree", secondary_alteration_col: str = "secondary_alteration", inversion_col: str | None = None, quality_col: str | None = None, null_alteration_char: str = "_", output_col: str = "degree", null_chord_token: str = "na", inplace: bool = True, ) -> pd.DataFrame: """ >>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,primary_degree,primary_alteration,secondary_degree,secondary_alteration,quality,inversion ... bar,na,na,na,na,na,na ... note,I,_,I,_,M, ... note,VII,#,V,_,d,43 ... bar,,,, ... note,V,_,VII,b,M,6 ... note,VI,#,II,b,m,64 ... note,VI,b,I,_,M, ... ''' ... ) ... ) >>> split_degrees_to_single_degree(df)["degree"] 0 na 1 I 2 #VII/V 3 na 4 V/bVII 5 #VI/bII 6 bVI Name: degree, dtype: object >>> split_degrees_to_single_degree( ... df, inversion_col="inversion", quality_col="quality", output_col="rn" ... )["rn"] 0 na 1 IM 2 #VIId43/V 3 na 4 VM6/bVII 5 #VIm64/bII 6 bVIM Name: rn, dtype: object """ if not inplace: df = df.copy() df[output_col] = ( df[primary_alteration_col] + df[primary_degree_col] + (df[quality_col].fillna("") if quality_col is not None else "") + (df[inversion_col].fillna("") if inversion_col is not None else "") + "/" + df[secondary_alteration_col] + df[secondary_degree_col] ) df[output_col] = df[output_col].str.replace(null_alteration_char, "") # Remove "/I" from the end of Roman numerals df[output_col] = df[output_col].str.replace(r"/I$", "", regex=True) if "type" in df.columns: df.loc[df["type"] != "note", output_col] = null_chord_token return df>>> df = pd.read_csv( ... io.StringIO( ... ''' ... type,primary_degree,primary_alteration,secondary_degree,secondary_alteration,quality,inversion ... bar,na,na,na,na,na,na ... note,I,_,I,_,M, ... note,VII,#,V,_,d,43 ... bar,,,, ... note,V,_,VII,b,M,6 ... note,VI,#,II,b,m,64 ... note,VI,b,I,_,M, ... ''' ... ) ... ) >>> split_degrees_to_single_degree(df)["degree"] 0 na 1 I 2 #VII/V 3 na 4 V/bVII 5 #VI/bII 6 bVI Name: degree, dtype: object >>> split_degrees_to_single_degree( ... df, inversion_col="inversion", quality_col="quality", output_col="rn" ... )["rn"] 0 na 1 IM 2 #VIId43/V 3 na 4 VM6/bVII 5 #VIm64/bII 6 bVIM Name: rn, dtype: object def validate_chord_df(chord_df: pandas.DataFrame,
*,
format: Literal['joined', 'split', 'rn', 'auto'] = 'auto',
null_chord_token: str = 'na',
null_alteration_char: str = '_',
strict: bool = False,
check_index: bool = True,
check_values: bool = True,
check_types: bool = True,
allow_enharmonic_keys: bool = False) ‑> ValidationResult-
Expand source code
def validate_chord_df( chord_df: pd.DataFrame, *, format: Literal["joined", "split", "rn", "auto"] = "auto", null_chord_token: str = DEFAULT_NULL_CHORD_TOKEN, null_alteration_char: str = "_", strict: bool = False, check_index: bool = True, check_values: bool = True, check_types: bool = True, allow_enharmonic_keys: bool = False, ) -> ValidationResult: """ Validate a chord_df DataFrame. Parameters ---------- chord_df : pd.DataFrame The DataFrame to validate. format : {"joined", "split", "rn", "auto"} The expected format. If "auto", detect from columns. null_chord_token : str Token used for null/rest chords (default "na"). null_alteration_char : str Character used for no alteration in split format (default "_"). strict : bool If True, treat warnings as errors and fail on unrecognized columns. check_index : bool If True, validate that index is a RangeIndex starting at 0. check_values : bool If True, validate column values (keys, degrees, inversions, etc.). check_types : bool If True, validate column types. allow_enharmonic_keys : bool If True, accept any key matching the pattern [A-Ga-g](#*|b*) instead of validating against the predefined MAJOR_KEYS and MINOR_KEYS lists. Returns ------- ValidationResult Object containing validation results. Examples -------- >>> df = pd.DataFrame( ... { ... "onset": [0.0, 1.0], ... "key": ["C", "C"], ... "degree": ["I", "V/V"], ... "quality": ["M", "M"], ... "inversion": [0, 0], ... } ... ) >>> result = validate_chord_df(df) >>> result.is_valid True >>> result.format_detected 'joined' >>> df_bad = pd.DataFrame( ... { ... "onset": [0.0], ... "key": ["X"], ... "degree": ["I"], ... "quality": ["M"], ... "inversion": [0], ... } ... ) >>> result = validate_chord_df(df_bad) >>> result.is_valid False >>> "Invalid key" in result.errors[0].message True """ errors: list[ValidationError] = [] warnings: list[ValidationError] = [] # Detect format if format == "auto": format_detected = _detect_format(chord_df) else: format_detected = format # Check index if check_index: _check_index(chord_df, errors) # Check columns _check_columns(chord_df, format_detected, errors) # Check types if check_types and format_detected != "unknown": _check_types(chord_df, format_detected, errors) # Check values if check_values and format_detected != "unknown": if format_detected == "rn": _check_key_values_rn_format( chord_df, null_chord_token, errors, allow_enharmonic_keys ) # TODO(validation): rn content validation could be added in the future. # Currently skipped because rn format varies by cache (e.g., "V" vs "VM"). else: _check_key_values(chord_df, null_chord_token, errors, allow_enharmonic_keys) if format_detected == "joined": _check_degree_format(chord_df, null_chord_token, errors) else: _check_alteration_values( chord_df, null_chord_token, null_alteration_char, errors ) _check_inversion_values(chord_df, errors) _check_temporal_consistency(chord_df, warnings) _check_chord_pcs_values(chord_df, errors) if strict: _check_unrecognized_columns(chord_df, format_detected, errors) errors.extend(warnings) warnings = [] is_valid = len(errors) == 0 return ValidationResult( is_valid=is_valid, errors=errors, warnings=warnings, format_detected=format_detected, )Validate a chord_df DataFrame.
Parameters
chord_df:pd.DataFrame- The DataFrame to validate.
format:{"joined", "split", "rn", "auto"}- The expected format. If "auto", detect from columns.
null_chord_token:str- Token used for null/rest chords (default "na").
null_alteration_char:str- Character used for no alteration in split format (default "_").
strict:bool- If True, treat warnings as errors and fail on unrecognized columns.
check_index:bool- If True, validate that index is a RangeIndex starting at 0.
check_values:bool- If True, validate column values (keys, degrees, inversions, etc.).
check_types:bool- If True, validate column types.
allow_enharmonic_keys:bool- If True, accept any key matching the pattern A-Ga-g instead of validating against the predefined MAJOR_KEYS and MINOR_KEYS lists.
Returns
ValidationResult- Object containing validation results.
Examples
>>> df = pd.DataFrame( ... { ... "onset": [0.0, 1.0], ... "key": ["C", "C"], ... "degree": ["I", "V/V"], ... "quality": ["M", "M"], ... "inversion": [0, 0], ... } ... ) >>> result = validate_chord_df(df) >>> result.is_valid True >>> result.format_detected 'joined'>>> df_bad = pd.DataFrame( ... { ... "onset": [0.0], ... "key": ["X"], ... "degree": ["I"], ... "quality": ["M"], ... "inversion": [0], ... } ... ) >>> result = validate_chord_df(df_bad) >>> result.is_valid False >>> "Invalid key" in result.errors[0].message True
Classes
class ValidationError (column: str | None,
row_index: int | None,
message: str,
severity: Literal['error', 'warning'] = 'error')-
Expand source code
@dataclass class ValidationError: column: str | None row_index: int | None message: str severity: Literal["error", "warning"] = "error"ValidationError(column: str | None, row_index: int | None, message: str, severity: Literal['error', 'warning'] = 'error')
Instance variables
var column : str | Nonevar message : strvar row_index : int | Nonevar severity : Literal['error', 'warning']
class ValidationResult (is_valid: bool,
errors: list[ValidationError] = <factory>,
warnings: list[ValidationError] = <factory>,
format_detected: Literal['joined', 'split', 'rn', 'unknown'] = 'unknown')-
Expand source code
@dataclass class ValidationResult: is_valid: bool errors: list[ValidationError] = field(default_factory=list) warnings: list[ValidationError] = field(default_factory=list) format_detected: Literal["joined", "split", "rn", "unknown"] = "unknown" def raise_if_invalid(self) -> None: if not self.is_valid: error_messages = [f" - {e.message}" for e in self.errors] raise ValueError( "chord_df validation failed:\n" + "\n".join(error_messages) )ValidationResult(is_valid: bool, errors: list[music_df.chord_df.ValidationError] =
, warnings: list[music_df.chord_df.ValidationError] = , format_detected: Literal['joined', 'split', 'rn', 'unknown'] = 'unknown') Instance variables
var errors : list[ValidationError]var format_detected : Literal['joined', 'split', 'rn', 'unknown']var is_valid : boolvar warnings : list[ValidationError]
Methods
def raise_if_invalid(self) ‑> None-
Expand source code
def raise_if_invalid(self) -> None: if not self.is_valid: error_messages = [f" - {e.message}" for e in self.errors] raise ValueError( "chord_df validation failed:\n" + "\n".join(error_messages) )