tencentcloud 1.82.7, Jul 4 25

tencentcloud 1.82.7 published on Friday, Jul 4, 2025 by tencentcloudstack

tencentcloudstack/terraform-provider-tencentcloud

tencentcloud.CiMediaSpeechRecognitionTemplate

Explore with Pulumi AI

tencentcloud 1.82.7 published on Friday, Jul 4, 2025 by tencentcloudstack

tencentcloudstack/terraform-provider-tencentcloud

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as tencentcloud from "@pulumi/tencentcloud";

const mediaSpeechRecognitionTemplate = new tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", {
    bucket: "terraform-ci-1308919341",
    speechRecognition: {
        channelNum: "1",
        convertNumMode: "0",
        engineModelType: "16k_zh",
        filterDirty: "0",
        filterModal: "1",
        filterPunc: "0",
        outputFileType: "txt",
        resTextFormat: "1",
        speakerDiarization: "1",
        speakerNumber: "0",
    },
});

import pulumi
import pulumi_tencentcloud as tencentcloud

media_speech_recognition_template = tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate",
    bucket="terraform-ci-1308919341",
    speech_recognition={
        "channel_num": "1",
        "convert_num_mode": "0",
        "engine_model_type": "16k_zh",
        "filter_dirty": "0",
        "filter_modal": "1",
        "filter_punc": "0",
        "output_file_type": "txt",
        "res_text_format": "1",
        "speaker_diarization": "1",
        "speaker_number": "0",
    })

package main

import (
	"github.com/pulumi/pulumi-terraform-provider/sdks/go/tencentcloud/tencentcloud"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := tencentcloud.NewCiMediaSpeechRecognitionTemplate(ctx, "mediaSpeechRecognitionTemplate", &tencentcloud.CiMediaSpeechRecognitionTemplateArgs{
			Bucket: pulumi.String("terraform-ci-1308919341"),
			SpeechRecognition: &tencentcloud.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs{
				ChannelNum:         pulumi.String("1"),
				ConvertNumMode:     pulumi.String("0"),
				EngineModelType:    pulumi.String("16k_zh"),
				FilterDirty:        pulumi.String("0"),
				FilterModal:        pulumi.String("1"),
				FilterPunc:         pulumi.String("0"),
				OutputFileType:     pulumi.String("txt"),
				ResTextFormat:      pulumi.String("1"),
				SpeakerDiarization: pulumi.String("1"),
				SpeakerNumber:      pulumi.String("0"),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Tencentcloud = Pulumi.Tencentcloud;

return await Deployment.RunAsync(() => 
{
    var mediaSpeechRecognitionTemplate = new Tencentcloud.CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", new()
    {
        Bucket = "terraform-ci-1308919341",
        SpeechRecognition = new Tencentcloud.Inputs.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs
        {
            ChannelNum = "1",
            ConvertNumMode = "0",
            EngineModelType = "16k_zh",
            FilterDirty = "0",
            FilterModal = "1",
            FilterPunc = "0",
            OutputFileType = "txt",
            ResTextFormat = "1",
            SpeakerDiarization = "1",
            SpeakerNumber = "0",
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.tencentcloud.CiMediaSpeechRecognitionTemplate;
import com.pulumi.tencentcloud.CiMediaSpeechRecognitionTemplateArgs;
import com.pulumi.tencentcloud.inputs.CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var mediaSpeechRecognitionTemplate = new CiMediaSpeechRecognitionTemplate("mediaSpeechRecognitionTemplate", CiMediaSpeechRecognitionTemplateArgs.builder()
            .bucket("terraform-ci-1308919341")
            .speechRecognition(CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs.builder()
                .channelNum("1")
                .convertNumMode("0")
                .engineModelType("16k_zh")
                .filterDirty("0")
                .filterModal("1")
                .filterPunc("0")
                .outputFileType("txt")
                .resTextFormat("1")
                .speakerDiarization("1")
                .speakerNumber("0")
                .build())
            .build());

    }
}

resources:
  mediaSpeechRecognitionTemplate:
    type: tencentcloud:CiMediaSpeechRecognitionTemplate
    properties:
      bucket: terraform-ci-1308919341
      speechRecognition:
        channelNum: '1'
        convertNumMode: '0'
        engineModelType: 16k_zh
        filterDirty: '0'
        filterModal: '1'
        filterPunc: '0'
        outputFileType: txt
        resTextFormat: '1'
        speakerDiarization: '1'
        speakerNumber: '0'

Create CiMediaSpeechRecognitionTemplate Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new CiMediaSpeechRecognitionTemplate(name: string, args: CiMediaSpeechRecognitionTemplateArgs, opts?: CustomResourceOptions);

@overload
def CiMediaSpeechRecognitionTemplate(resource_name: str,
                                     args: CiMediaSpeechRecognitionTemplateArgs,
                                     opts: Optional[ResourceOptions] = None)

@overload
def CiMediaSpeechRecognitionTemplate(resource_name: str,
                                     opts: Optional[ResourceOptions] = None,
                                     bucket: Optional[str] = None,
                                     speech_recognition: Optional[CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs] = None,
                                     ci_media_speech_recognition_template_id: Optional[str] = None,
                                     name: Optional[str] = None)

func NewCiMediaSpeechRecognitionTemplate(ctx *Context, name string, args CiMediaSpeechRecognitionTemplateArgs, opts ...ResourceOption) (*CiMediaSpeechRecognitionTemplate, error)

public CiMediaSpeechRecognitionTemplate(string name, CiMediaSpeechRecognitionTemplateArgs args, CustomResourceOptions? opts = null)

public CiMediaSpeechRecognitionTemplate(String name, CiMediaSpeechRecognitionTemplateArgs args)
public CiMediaSpeechRecognitionTemplate(String name, CiMediaSpeechRecognitionTemplateArgs args, CustomResourceOptions options)

type: tencentcloud:CiMediaSpeechRecognitionTemplate
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args CiMediaSpeechRecognitionTemplateArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args CiMediaSpeechRecognitionTemplateArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args CiMediaSpeechRecognitionTemplateArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args CiMediaSpeechRecognitionTemplateArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args CiMediaSpeechRecognitionTemplateArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

CiMediaSpeechRecognitionTemplate Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The CiMediaSpeechRecognitionTemplate resource accepts the following input properties:

Bucket string: bucket name.
SpeechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.
CiMediaSpeechRecognitionTemplateId string: ID of the resource.
Name string: The template name only supports Chinese, English, numbers, _, - and *.

Bucket string: bucket name.
SpeechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs: audio configuration.
CiMediaSpeechRecognitionTemplateId string: ID of the resource.
Name string: The template name only supports Chinese, English, numbers, _, - and *.

bucket String: bucket name.
speechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.
ciMediaSpeechRecognitionTemplateId String: ID of the resource.
name String: The template name only supports Chinese, English, numbers, _, - and *.

bucket string: bucket name.
speechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.
ciMediaSpeechRecognitionTemplateId string: ID of the resource.
name string: The template name only supports Chinese, English, numbers, _, - and *.

bucket str: bucket name.
speech_recognition CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs: audio configuration.
ci_media_speech_recognition_template_id str: ID of the resource.
name str: The template name only supports Chinese, English, numbers, _, - and *.

bucket String: bucket name.
speechRecognition Property Map: audio configuration.
ciMediaSpeechRecognitionTemplateId String: ID of the resource.
name String: The template name only supports Chinese, English, numbers, _, - and *.

Outputs

All input properties are implicitly available as output properties. Additionally, the CiMediaSpeechRecognitionTemplate resource produces the following output properties:

Id string: The provider-assigned unique ID for this managed resource.

Id string: The provider-assigned unique ID for this managed resource.

id String: The provider-assigned unique ID for this managed resource.

id string: The provider-assigned unique ID for this managed resource.

id str: The provider-assigned unique ID for this managed resource.

id String: The provider-assigned unique ID for this managed resource.

Look up Existing CiMediaSpeechRecognitionTemplate Resource

Get an existing CiMediaSpeechRecognitionTemplate resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: CiMediaSpeechRecognitionTemplateState, opts?: CustomResourceOptions): CiMediaSpeechRecognitionTemplate

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        bucket: Optional[str] = None,
        ci_media_speech_recognition_template_id: Optional[str] = None,
        name: Optional[str] = None,
        speech_recognition: Optional[CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs] = None) -> CiMediaSpeechRecognitionTemplate

func GetCiMediaSpeechRecognitionTemplate(ctx *Context, name string, id IDInput, state *CiMediaSpeechRecognitionTemplateState, opts ...ResourceOption) (*CiMediaSpeechRecognitionTemplate, error)

public static CiMediaSpeechRecognitionTemplate Get(string name, Input<string> id, CiMediaSpeechRecognitionTemplateState? state, CustomResourceOptions? opts = null)

public static CiMediaSpeechRecognitionTemplate get(String name, Output<String> id, CiMediaSpeechRecognitionTemplateState state, CustomResourceOptions options)

resources:  _:    type: tencentcloud:CiMediaSpeechRecognitionTemplate    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Bucket string: bucket name.
CiMediaSpeechRecognitionTemplateId string: ID of the resource.
Name string: The template name only supports Chinese, English, numbers, _, - and *.
SpeechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.

Bucket string: bucket name.
CiMediaSpeechRecognitionTemplateId string: ID of the resource.
Name string: The template name only supports Chinese, English, numbers, _, - and *.
SpeechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs: audio configuration.

bucket String: bucket name.
ciMediaSpeechRecognitionTemplateId String: ID of the resource.
name String: The template name only supports Chinese, English, numbers, _, - and *.
speechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.

bucket string: bucket name.
ciMediaSpeechRecognitionTemplateId string: ID of the resource.
name string: The template name only supports Chinese, English, numbers, _, - and *.
speechRecognition CiMediaSpeechRecognitionTemplateSpeechRecognition: audio configuration.

bucket str: bucket name.
ci_media_speech_recognition_template_id str: ID of the resource.
name str: The template name only supports Chinese, English, numbers, _, - and *.
speech_recognition CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs: audio configuration.

bucket String: bucket name.
ciMediaSpeechRecognitionTemplateId String: ID of the resource.
name String: The template name only supports Chinese, English, numbers, _, - and *.
speechRecognition Property Map: audio configuration.

Supporting Types

CiMediaSpeechRecognitionTemplateSpeechRecognition, CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs

ChannelNum string: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
EngineModelType string: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
ConvertNumMode string: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
FilterDirty string: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
FilterModal string: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
FilterPunc string: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
OutputFileType string: Output file type, optional txt, srt. The default is txt.
ResTextFormat string: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
SpeakerDiarization string: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
SpeakerNumber string: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

ChannelNum string: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
EngineModelType string: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
ConvertNumMode string: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
FilterDirty string: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
FilterModal string: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
FilterPunc string: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
OutputFileType string: Output file type, optional txt, srt. The default is txt.
ResTextFormat string: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
SpeakerDiarization string: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
SpeakerNumber string: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

channelNum String: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
engineModelType String: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
convertNumMode String: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
filterDirty String: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
filterModal String: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
filterPunc String: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
outputFileType String: Output file type, optional txt, srt. The default is txt.
resTextFormat String: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
speakerDiarization String: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
speakerNumber String: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

channelNum string: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
engineModelType string: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
convertNumMode string: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
filterDirty string: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
filterModal string: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
filterPunc string: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
outputFileType string: Output file type, optional txt, srt. The default is txt.
resTextFormat string: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
speakerDiarization string: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
speakerNumber string: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

channel_num str: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
engine_model_type str: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
convert_num_mode str: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
filter_dirty str: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
filter_modal str: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
filter_punc str: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
output_file_type str: Output file type, optional txt, srt. The default is txt.
res_text_format str: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
speaker_diarization str: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
speaker_number str: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

channelNum String: Number of voice channels: 1 means mono. EngineModelType supports only mono for non-telephone scenarios, and 2 means dual channels (only 8k_zh engine model supports dual channels, which should correspond to both sides of the call).
engineModelType String: Engine model type, divided into phone scene and non-phone scene, phone scene: 8k_zh: phone 8k Chinese Mandarin general (can be used for dual-channel audio), 8k_zh_s: phone 8k Chinese Mandarin speaker separation (only for monophonic audio), 8k_en: Telephone 8k English; non-telephone scene: 16k_zh: 16k Mandarin Chinese, 16k_zh_video: 16k audio and video field, 16k_en: 16k English, 16k_ca: 16k Cantonese, 16k_ja: 16k Japanese, 16k_zh_edu: Chinese education, 16k_en_edu: English education, 16k_zh_medical: medical, 16k_th: Thai, 16k_zh_dialect: multi-dialect, supports 23 dialects.
convertNumMode String: Whether to perform intelligent conversion of Arabic numerals (currently supports Mandarin Chinese engine): 0 means no conversion, directly output Chinese numbers, 1 means intelligently convert to Arabic numerals according to the scene, 3 means enable math-related digital conversion, the default value is 0.
filterDirty String: Whether to filter dirty words (currently supports Mandarin Chinese engine): 0 means not to filter dirty words, 1 means to filter dirty words, 2 means to replace dirty words with *, the default value is 0.
filterModal String: Whether to pass modal particles (currently supports Mandarin Chinese engine): 0 means not to filter modal particles, 1 means partial filtering, 2 means strict filtering, and the default value is 0.
filterPunc String: Whether to filter punctuation (currently supports Mandarin Chinese engine): 0 means no filtering, 1 means filtering end-of-sentence punctuation, 2 means filtering all punctuation, the default value is 0.
outputFileType String: Output file type, optional txt, srt. The default is txt.
resTextFormat String: Recognition result return form: 0 means the recognition result text (including segmented time stamps), 1 is the detailed recognition result at the word level granularity, without punctuation, and includes the speech rate value (a list of word time stamps, generally used to generate subtitle scenes), 2 Detailed recognition results at word-level granularity (including punctuation and speech rate values)..
speakerDiarization String: Whether to enable speaker separation: 0 means not enabled, 1 means enabled (only supports 8k_zh, 16k_zh, 16k_zh_video, monophonic audio), the default value is 0, Note: 8K telephony scenarios suggest using dual-channel to distinguish between the two parties, set ChannelNum=2 is enough, no need to enable speaker separation.
speakerNumber String: The number of speakers to be separated (need to be used in conjunction with enabling speaker separation), value range: 0-10, 0 means automatic separation (currently only supports <= 6 people), 1-10 represents the number of specified speakers to be separated. The default value is 0.

Import

ci media_speech_recognition_template can be imported using the bucket#templateId, e.g.

$ pulumi import tencentcloud:index/ciMediaSpeechRecognitionTemplate:CiMediaSpeechRecognitionTemplate media_speech_recognition_template terraform-ci-xxxxxx#t1d794430f2f1f4350b11e905ce2c6167e

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: tencentcloud tencentcloudstack/terraform-provider-tencentcloud
License
Notes: This Pulumi package is based on the tencentcloud Terraform Provider.

tencentcloud 1.82.7 published on Friday, Jul 4, 2025 by tencentcloudstack

tencentcloudstack/terraform-provider-tencentcloud

tencentcloud.CiMediaSpeechRecognitionTemplate

On this page

On this page

Example Usage

Create CiMediaSpeechRecognitionTemplate Resource

Constructor syntax

Parameters

CiMediaSpeechRecognitionTemplate Resource Properties

Inputs

Outputs

Look up Existing CiMediaSpeechRecognitionTemplate Resource

Supporting Types

CiMediaSpeechRecognitionTemplateSpeechRecognition, CiMediaSpeechRecognitionTemplateSpeechRecognitionArgs

Import

Package Details

On this page

On this page