Skip to content

Commit 1ca1984

Browse files
committed
beat sync w allin1,pytsmod /chordchroma denoising
1 parent 1a5c9bd commit 1ca1984

File tree

117 files changed

+340
-747
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+340
-747
lines changed

__pycache__/predict.cpython-39.pyc

4.4 KB
Binary file not shown.
10 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.

audiocraft/models/musicgen copy.py

-484
This file was deleted.

audiocraft/models/musicgen.py

+52-1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,30 @@ def generate_continuation(self, prompt: torch.Tensor, prompt_sample_rate: int,
252252
if return_tokens:
253253
return self.generate_audio(tokens), tokens
254254
return self.generate_audio(tokens)
255+
256+
def generate_continuation_with_audio_token(self, prompt,
257+
descriptions: tp.Optional[tp.List[tp.Optional[str]]] = None,
258+
progress: bool = False, return_tokens: bool = False) \
259+
-> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
260+
"""Generate samples conditioned on audio prompts.
261+
262+
Args:
263+
prompt (torch.Tensor): A batch of waveforms used for continuation.
264+
Prompt should be [B, C, T], or [C, T] if only one sample is generated.
265+
prompt_sample_rate (int): Sampling rate of the given audio waveforms.
266+
descriptions (list of str, optional): A list of strings used as text conditioning. Defaults to None.
267+
progress (bool, optional): Flag to display progress of the generation process. Defaults to False.
268+
"""
269+
270+
if descriptions is None:
271+
descriptions = [None] * len(prompt)
272+
attributes, prompt_tokens = self._prepare_tokens_and_attributes(descriptions, None)
273+
assert prompt_tokens is None
274+
prompt_tokens = prompt
275+
tokens = self._generate_tokens(attributes, prompt_tokens, progress)
276+
if return_tokens:
277+
return self.generate_audio(tokens), tokens
278+
return self.generate_audio(tokens)
255279

256280
def generate_continuation_with_audio_chroma(self, prompt: torch.Tensor, prompt_sample_rate: int, melody_wavs: MelodyType,
257281
melody_sample_rate: int, descriptions: tp.Optional[tp.List[tp.Optional[str]]] = None,
@@ -298,7 +322,7 @@ def generate_continuation_with_audio_chroma(self, prompt: torch.Tensor, prompt_s
298322
return self.generate_audio(tokens), tokens
299323
return self.generate_audio(tokens)
300324

301-
def generate_continuation_with_audio_tokens(self, prompt, melody_wavs: MelodyType,
325+
def generate_continuation_with_audio_tokens_and_audio_chroma(self, prompt, melody_wavs: MelodyType,
302326
melody_sample_rate: int, descriptions: tp.Optional[tp.List[tp.Optional[str]]] = None,
303327
progress: bool = False, return_tokens: bool = False) \
304328
-> tp.Union[torch.Tensor, tp.Tuple[torch.Tensor, torch.Tensor]]:
@@ -369,6 +393,33 @@ def generate_continuation_with_text_chroma(self, prompt: torch.Tensor, prompt_sa
369393
if return_tokens:
370394
return self.generate_audio(tokens), tokens
371395
return self.generate_audio(tokens)
396+
397+
def generate_continuation_with_audio_tokens_and_text_chroma(self, prompt, descriptions: tp.List[str], chord_texts: tp.Union[tp.List[str],str],
398+
progress: bool = False, bpm: tp.Union[float,int,tp.List[float],tp.List[int]] = 120, meter: tp.Optional[tp.Union[int,tp.List[int]]] = 4,
399+
return_tokens: bool = False) -> tp.Union[torch.Tensor,
400+
tp.Tuple[torch.Tensor, torch.Tensor]]:
401+
"""Generate samples conditioned on text and melody.
402+
403+
Args:
404+
descriptions (list of str): A list of strings used as text conditioning.
405+
melody_wavs: (torch.Tensor or list of Tensor): A batch of waveforms used as
406+
melody conditioning. Should have shape [B, C, T] with B matching the description length,
407+
C=1 or 2. It can be [C, T] if there is a single description. It can also be
408+
a list of [C, T] tensors.
409+
melody_sample_rate: (int): Sample rate of the melody waveforms.
410+
progress (bool, optional): Flag to display progress of the generation process. Defaults to False.
411+
"""
412+
413+
if isinstance(chord_texts, str):
414+
chord_texts = [chord_texts]
415+
416+
attributes, prompt_tokens = self._prepare_tokens_and_attributes(descriptions=descriptions, prompt=None,
417+
melody_wavs=chord_texts, bpm=bpm, meter=meter)
418+
prompt_tokens = prompt
419+
tokens = self._generate_tokens(attributes, prompt_tokens, progress)
420+
if return_tokens:
421+
return self.generate_audio(tokens), tokens
422+
return self.generate_audio(tokens)
372423

373424
def generate_with_text_chroma(self, descriptions: tp.List[str], chord_texts: tp.Union[tp.List[str],str],
374425
progress: bool = False, bpm: tp.Union[float,int,tp.List[float],tp.List[int]] = 120, meter: tp.Optional[tp.Union[int,tp.List[int]]] = 4,
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

audiocraft/modules/chord_chroma.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ def __init__(self, device, sample_rate, max_duration, chroma_len, n_chroma, winh
3939

4040
self.chords = chords.Chords()
4141
self.device = device
42+
43+
self.denoise_window_size = 7
44+
self.denoise_threshold = 0.5
45+
4246
self.model = BTC_model(config=self.config.model).to(device)
4347
if os.path.isfile(self.model_file):
4448
checkpoint = torch.load(self.model_file)
@@ -122,7 +126,7 @@ def forward(self, wavs:torch.Tensor) -> torch.Tensor:
122126
break
123127

124128
strlines = ''.join(lines)
125-
129+
# print(lines)
126130
chroma = []
127131

128132
count = 0
@@ -149,6 +153,19 @@ def forward(self, wavs:torch.Tensor) -> torch.Tensor:
149153
chroma.append(multihot)
150154
count += 1
151155

152-
chromas.append(torch.stack(chroma, dim=0))
153-
156+
chroma = torch.stack(chroma, dim=0)
157+
158+
# Denoising chroma
159+
kernel = torch.ones(self.denoise_window_size)/self.denoise_window_size
160+
161+
filtered_signals = []
162+
for i in range(chroma.shape[-1]):
163+
filtered_signals.append(torch.nn.functional.conv1d(chroma[...,i].unsqueeze(0),
164+
kernel.unsqueeze(0).unsqueeze(0).to(chroma.device),
165+
padding=(self.denoise_window_size - 1) // 2))
166+
filtered_signals = torch.stack(filtered_signals, dim=-1)
167+
filtered_signals = filtered_signals > self.denoise_threshold
168+
169+
chromas.append(filtered_signals.squeeze(0))
170+
154171
return torch.stack(chromas, dim=0).to(self.device)

audiocraft/modules/conditioners.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,8 @@ def _compute_wav_embedding(self, wav: torch.Tensor, sample_rate: int) -> torch.T
842842
if wav.shape[-1] == 1:
843843
# print("1515151")
844844
return self._extract_chroma(wav)
845-
stems = self._get_stemmed_wav(wav, sample_rate)
845+
# stems = wav # Not separating the tracks
846+
stems = self._get_stemmed_wav(wav, sample_rate)
846847
chroma = self._extract_chroma(stems)
847848
# print("2727272")
848849
return chroma
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 Bytes
Binary file not shown.
Binary file not shown.
Binary file not shown.

bpm_detector.py

-177
This file was deleted.

cog.yaml

+6-17
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ build:
99
# a list of ubuntu apt packages to install
1010
system_packages:
1111
- "ffmpeg"
12-
- "portaudio19-dev"
12+
# - "portaudio19-dev"
1313
# - "libgl1-mesa-glx"
1414
# - "libglib2.0-0"
1515

@@ -26,7 +26,6 @@ build:
2626
- "hydra_colorlog"
2727
- "julius"
2828
- "num2words"
29-
- "numpy"
3029
- "sentencepiece"
3130
- "spacy==3.5.2"
3231
- "torchaudio>=2.0.0"
@@ -43,23 +42,13 @@ build:
4342
- "tensorboard>=1.15"
4443
- "pyaudio==0.2.13"
4544
- "git+https://github.com/CPJKU/madmom"
46-
- "BeatNet==1.1.0"
47-
- "contourpy==1.0.7"
48-
- "cycler==0.11.0"
49-
- "fonttools==4.38.0"
50-
- "kiwisolver==1.4.4"
51-
- "matplotlib==3.6.3"
52-
# - "numpy==1.24.2"
53-
- "packaging==23.0"
54-
- "Pillow==9.4.0"
55-
- "pyparsing==3.0.9"
56-
- "python-dateutil==2.8.2"
57-
- "PyWavelets==1.4.1"
58-
- "scipy==1.10.0"
59-
- "six==1.16.0"
45+
- "ninja"
46+
- "allin1"
47+
- "pytsmod"
6048

6149
# commands run after the environment is setup
62-
# run:
50+
run:
51+
- "pip3 install natten -f https://shi-labs.com/natten/wheels/cu117/torch2.0.0/index.html"
6352
# - "apt-get update && apt-get install -y ffmpeg"
6453
# - "apt-get install unzip"
6554
# - "python -m pip install pip --upgrade"

0 commit comments

Comments
 (0)