SIGN IN SIGN UP
2noise / ChatTTS UNCLAIMED

A generative speech model for daily dialogue.

0 0 6 Python
import argparse
import datetime
import os
import zipfile
from io import BytesIO
import requests
chattts_service_host = os.environ.get("CHATTTS_SERVICE_HOST", "127.0.0.1")
chattts_service_port = os.environ.get("CHATTTS_SERVICE_PORT", "9900")
CHATTTS_URL = f"http://{chattts_service_host}:{chattts_service_port}/generate_voice"
def parse_arguments():
parser = argparse.ArgumentParser(description="HTTP client for ChatTTS service")
parser.add_argument(
"--text", type=str, nargs="+", required=True, help="Text to synthesize"
)
parser.add_argument(
"--audio_seed", type=int, required=True, help="Audio generation seed"
)
parser.add_argument(
"--text_seed", type=int, required=True, help="Text generation seed"
)
parser.add_argument(
"--stream", type=bool, default=False, help="Enable/disable streaming"
)
parser.add_argument("--lang", type=str, default=None, help="Language code for text")
parser.add_argument(
"--skip_refine_text", type=bool, default=True, help="Skip text refinement"
)
parser.add_argument(
"--refine_text_only", type=bool, default=False, help="Only refine text"
)
parser.add_argument(
"--use_decoder", type=bool, default=True, help="Use decoder during inference"
)
parser.add_argument(
"--do_text_normalization",
type=bool,
default=True,
help="Enable text normalization",
)
parser.add_argument(
"--do_homophone_replacement",
type=bool,
default=False,
help="Enable homophone replacement",
)
parser.add_argument(
"--tgt",
type=str,
default="./output",
help="Target directory to save output files",
)
parser.add_argument(
"--filename",
type=str,
default="test.mp3",
help="Target directory to save output files",
)
# Refinement text parameters
parser.add_argument(
"--refine_prompt", type=str, default="", help="Prompt for text refinement"
)
parser.add_argument(
"--refine_top_P",
type=float,
default=0.7,
help="Top P value for text refinement",
)
parser.add_argument(
"--refine_top_K", type=int, default=20, help="Top K value for text refinement"
)
parser.add_argument(
"--refine_temperature",
type=float,
default=0.7,
help="Temperature for text refinement",
)
parser.add_argument(
"--refine_repetition_penalty",
type=float,
default=1.0,
help="Repetition penalty for text refinement",
)
parser.add_argument(
"--refine_max_new_token",
type=int,
default=384,
help="Max new tokens for text refinement",
)
parser.add_argument(
"--refine_min_new_token",
type=int,
default=0,
help="Min new tokens for text refinement",
)
parser.add_argument(
"--refine_show_tqdm",
type=bool,
default=True,
help="Show progress bar for text refinement",
)
parser.add_argument(
"--refine_ensure_non_empty",
type=bool,
default=True,
help="Ensure non-empty output",
)
parser.add_argument(
"--refine_stream_batch",
type=int,
default=24,
help="Stream batch size for refinement",
)
# Infer code parameters
parser.add_argument(
"--infer_prompt", type=str, default="[speed_5]", help="Prompt for inference"
)
parser.add_argument(
"--infer_top_P", type=float, default=0.1, help="Top P value for inference"
)
parser.add_argument(
"--infer_top_K", type=int, default=20, help="Top K value for inference"
)
parser.add_argument(
"--infer_temperature", type=float, default=0.3, help="Temperature for inference"
)
parser.add_argument(
"--infer_repetition_penalty",
type=float,
default=1.05,
help="Repetition penalty for inference",
)
parser.add_argument(
"--infer_max_new_token",
type=int,
default=2048,
help="Max new tokens for inference",
)
parser.add_argument(
"--infer_min_new_token",
type=int,
default=0,
help="Min new tokens for inference",
)
parser.add_argument(
"--infer_show_tqdm",
type=bool,
default=True,
help="Show progress bar for inference",
)
parser.add_argument(
"--infer_ensure_non_empty",
type=bool,
default=True,
help="Ensure non-empty output",
)
parser.add_argument(
"--infer_stream_batch",
type=bool,
default=True,
help="Stream batch for inference",
)
parser.add_argument(
"--infer_spk_emb",
type=str,
default=None,
help="Speaker embedding for inference",
)
return parser.parse_args()
def main():
args = parse_arguments()
# Main infer params
body = {
"text": args.text,
"stream": args.stream,
"lang": args.lang,
"filename": args.filename,
"skip_refine_text": args.skip_refine_text,
"refine_text_only": args.refine_text_only,
"use_decoder": args.use_decoder,
"audio_seed": args.audio_seed,
"text_seed": args.text_seed,
"do_text_normalization": args.do_text_normalization,
"do_homophone_replacement": args.do_homophone_replacement,
}
# Refinement text parameters
params_refine_text = {
"prompt": args.refine_prompt,
"top_P": args.refine_top_P,
"top_K": args.refine_top_K,
"temperature": args.refine_temperature,
"repetition_penalty": args.refine_repetition_penalty,
"max_new_token": args.refine_max_new_token,
"min_new_token": args.refine_min_new_token,
"show_tqdm": args.refine_show_tqdm,
"ensure_non_empty": args.refine_ensure_non_empty,
"stream_batch": args.refine_stream_batch,
}
body["params_refine_text"] = params_refine_text
# Infer code parameters
params_infer_code = {
"prompt": args.infer_prompt,
"top_P": args.infer_top_P,
"top_K": args.infer_top_K,
"temperature": args.infer_temperature,
"repetition_penalty": args.infer_repetition_penalty,
"max_new_token": args.infer_max_new_token,
"min_new_token": args.infer_min_new_token,
"show_tqdm": args.infer_show_tqdm,
"ensure_non_empty": args.infer_ensure_non_empty,
"stream_batch": args.infer_stream_batch,
"spk_emb": args.infer_spk_emb,
}
body["params_infer_code"] = params_infer_code
try:
response = requests.post(CHATTTS_URL, json=body)
response.raise_for_status()
with zipfile.ZipFile(BytesIO(response.content), "r") as zip_ref:
tgt = args.tgt
# filename=args.filename
os.makedirs(tgt, exist_ok=True)
zip_ref.extractall(tgt)
print(f"Extracted files:{tgt}/{filename}")
# print(tgt)
except requests.exceptions.RequestException as e:
print(f"Request Error: {e}")
if __name__ == "__main__":
main()