Skip to content

Schemas

Models module of the Wordcab Transcribe.

AudioRequest

Bases: BaseRequest

Request model for the ASR audio file and url endpoint.

Source code in src/wordcab_transcribe/models.py
class AudioRequest(BaseRequest):
    """Request model for the ASR audio file and url endpoint."""

    multi_channel: bool = False

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "multi_channel": False,
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "multi_channel": False,
        }
    }

AudioResponse

Bases: BaseResponse

Response model for the ASR audio file and url endpoint.

Source code in src/wordcab_transcribe/models.py
class AudioResponse(BaseResponse):
    """Response model for the ASR audio file and url endpoint."""

    multi_channel: bool

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "utterances": [
                    {
                        "text": "Hello World!",
                        "start": 0.345,
                        "end": 1.234,
                        "speaker": 0,
                    },
                    {
                        "text": "Wordcab is awesome",
                        "start": 1.234,
                        "end": 2.678,
                        "speaker": 1,
                    },
                ],
                "audio_duration": 2.678,
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "process_times": {
                    "total": 2.678,
                    "transcription": 2.439,
                    "diarization": None,
                    "post_processing": 0.239,
                },
                "multi_channel": False,
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "utterances": [
                {
                    "text": "Hello World!",
                    "start": 0.345,
                    "end": 1.234,
                    "speaker": 0,
                },
                {
                    "text": "Wordcab is awesome",
                    "start": 1.234,
                    "end": 2.678,
                    "speaker": 1,
                },
            ],
            "audio_duration": 2.678,
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "process_times": {
                "total": 2.678,
                "transcription": 2.439,
                "diarization": None,
                "post_processing": 0.239,
            },
            "multi_channel": False,
        }
    }

BaseRequest

Bases: BaseModel

Base request model for the API.

Source code in src/wordcab_transcribe/models.py
class BaseRequest(BaseModel):
    """Base request model for the API."""

    offset_start: Union[float, None] = None
    offset_end: Union[float, None] = None
    num_speakers: int = -1
    diarization: bool = False
    source_lang: str = "en"
    timestamps: Timestamps = Timestamps.seconds
    vocab: Union[List[str], None] = None
    word_timestamps: bool = False
    internal_vad: bool = False
    repetition_penalty: float = 1.2
    compression_ratio_threshold: float = 2.4
    log_prob_threshold: float = -1.0
    no_speech_threshold: float = 0.6
    condition_on_previous_text: bool = True

    @field_validator("vocab")
    def validate_each_vocab_value(
        cls, value: Union[List[str], None]  # noqa: B902, N805
    ) -> List[str]:
        """Validate the value of each vocab field."""
        if value == []:
            return None
        elif value is not None and not all(isinstance(v, str) for v in value):
            raise ValueError("`vocab` must be a list of strings.")

        return value

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
        }
    }

validate_each_vocab_value(value)

Validate the value of each vocab field.

Source code in src/wordcab_transcribe/models.py
@field_validator("vocab")
def validate_each_vocab_value(
    cls, value: Union[List[str], None]  # noqa: B902, N805
) -> List[str]:
    """Validate the value of each vocab field."""
    if value == []:
        return None
    elif value is not None and not all(isinstance(v, str) for v in value):
        raise ValueError("`vocab` must be a list of strings.")

    return value

BaseResponse

Bases: BaseModel

Base response model, not meant to be used directly.

Source code in src/wordcab_transcribe/models.py
class BaseResponse(BaseModel):
    """Base response model, not meant to be used directly."""

    utterances: List[Utterance]
    audio_duration: float
    offset_start: Union[float, None]
    offset_end: Union[float, None]
    num_speakers: int
    diarization: bool
    source_lang: str
    timestamps: str
    vocab: Union[List[str], None]
    word_timestamps: bool
    internal_vad: bool
    repetition_penalty: float
    compression_ratio_threshold: float
    log_prob_threshold: float
    no_speech_threshold: float
    condition_on_previous_text: bool
    process_times: ProcessTimes

CortexError

Bases: BaseModel

Error model for the Cortex API.

Source code in src/wordcab_transcribe/models.py
class CortexError(BaseModel):
    """Error model for the Cortex API."""

    message: str

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "message": "Error message here",
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "message": "Error message here",
        }
    }

CortexPayload

Bases: BaseModel

Request object for Cortex endpoint.

Source code in src/wordcab_transcribe/models.py
class CortexPayload(BaseModel):
    """Request object for Cortex endpoint."""

    url_type: Literal["audio_url", "youtube"]
    url: Optional[str] = None
    api_key: Optional[str] = None
    offset_start: Optional[float] = None
    offset_end: Optional[float] = None
    num_speakers: Optional[int] = -1
    diarization: Optional[bool] = False
    multi_channel: Optional[bool] = False
    source_lang: Optional[str] = "en"
    timestamps: Optional[Timestamps] = Timestamps.seconds
    vocab: Union[List[str], None] = None
    word_timestamps: Optional[bool] = False
    internal_vad: Optional[bool] = False
    repetition_penalty: Optional[float] = 1.2
    compression_ratio_threshold: Optional[float] = 2.4
    log_prob_threshold: Optional[float] = -1.0
    no_speech_threshold: Optional[float] = 0.6
    condition_on_previous_text: Optional[bool] = True
    job_name: Optional[str] = None
    ping: Optional[bool] = False

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "url_type": "youtube",
                "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
                "api_key": "1234567890",
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "multi_channel": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "job_name": "job_abc123",
                "ping": False,
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "url_type": "youtube",
            "url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
            "api_key": "1234567890",
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "multi_channel": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "job_name": "job_abc123",
            "ping": False,
        }
    }

CortexUrlResponse

Bases: AudioResponse

Response model for the audio_url type of the Cortex endpoint.

Source code in src/wordcab_transcribe/models.py
class CortexUrlResponse(AudioResponse):
    """Response model for the audio_url type of the Cortex endpoint."""

    job_name: str
    request_id: Optional[str] = None

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "utterances": [
                    {
                        "speaker": 0,
                        "start": 0.0,
                        "end": 1.0,
                        "text": "Hello World!",
                    },
                    {
                        "speaker": 0,
                        "start": 1.0,
                        "end": 2.0,
                        "text": "Wordcab is awesome",
                    },
                ],
                "audio_duration": 2.0,
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "process_times": {
                    "total": 2.678,
                    "transcription": 2.439,
                    "diarization": None,
                    "post_processing": 0.239,
                },
                "multi_channel": False,
                "job_name": "job_name",
                "request_id": "request_id",
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "utterances": [
                {
                    "speaker": 0,
                    "start": 0.0,
                    "end": 1.0,
                    "text": "Hello World!",
                },
                {
                    "speaker": 0,
                    "start": 1.0,
                    "end": 2.0,
                    "text": "Wordcab is awesome",
                },
            ],
            "audio_duration": 2.0,
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "process_times": {
                "total": 2.678,
                "transcription": 2.439,
                "diarization": None,
                "post_processing": 0.239,
            },
            "multi_channel": False,
            "job_name": "job_name",
            "request_id": "request_id",
        }
    }

CortexYoutubeResponse

Bases: YouTubeResponse

Response model for the YouTube type of the Cortex endpoint.

Source code in src/wordcab_transcribe/models.py
class CortexYoutubeResponse(YouTubeResponse):
    """Response model for the YouTube type of the Cortex endpoint."""

    job_name: str
    request_id: Optional[str] = None

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "utterances": [
                    {
                        "speaker": 0,
                        "start": 0.0,
                        "end": 1.0,
                        "text": "Never gonna give you up!",
                    },
                    {
                        "speaker": 0,
                        "start": 1.0,
                        "end": 2.0,
                        "text": "Never gonna let you down!",
                    },
                ],
                "audio_duration": 2.0,
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "process_times": {
                    "total": 2.678,
                    "transcription": 2.439,
                    "diarization": None,
                    "post_processing": 0.239,
                },
                "video_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
                "job_name": "job_name",
                "request_id": "request_id",
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "utterances": [
                {
                    "speaker": 0,
                    "start": 0.0,
                    "end": 1.0,
                    "text": "Never gonna give you up!",
                },
                {
                    "speaker": 0,
                    "start": 1.0,
                    "end": 2.0,
                    "text": "Never gonna let you down!",
                },
            ],
            "audio_duration": 2.0,
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "process_times": {
                "total": 2.678,
                "transcription": 2.439,
                "diarization": None,
                "post_processing": 0.239,
            },
            "video_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
            "job_name": "job_name",
            "request_id": "request_id",
        }
    }

DiarizationOutput

Bases: BaseModel

Diarization output model for the API.

Source code in src/wordcab_transcribe/models.py
class DiarizationOutput(BaseModel):
    """Diarization output model for the API."""

    segments: List[DiarizationSegment]

DiarizationRequest

Bases: BaseModel

Request model for the diarize endpoint.

Source code in src/wordcab_transcribe/models.py
class DiarizationRequest(BaseModel):
    """Request model for the diarize endpoint."""

    audio: TensorShare
    duration: float
    num_speakers: int

DiarizationSegment

Bases: NamedTuple

Diarization segment model for the API.

Source code in src/wordcab_transcribe/models.py
class DiarizationSegment(NamedTuple):
    """Diarization segment model for the API."""

    start: float
    end: float
    speaker: int

MultiChannelSegment

Bases: NamedTuple

Multi-channel segment model for the API.

Source code in src/wordcab_transcribe/models.py
class MultiChannelSegment(NamedTuple):
    """Multi-channel segment model for the API."""

    start: float
    end: float
    text: str
    words: List[Word]
    speaker: int

MultiChannelTranscriptionOutput

Bases: BaseModel

Multi-channel transcription output model for the API.

Source code in src/wordcab_transcribe/models.py
class MultiChannelTranscriptionOutput(BaseModel):
    """Multi-channel transcription output model for the API."""

    segments: List[MultiChannelSegment]

PongResponse

Bases: BaseModel

Response model for the ping endpoint.

Source code in src/wordcab_transcribe/models.py
class PongResponse(BaseModel):
    """Response model for the ping endpoint."""

    message: str

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "message": "pong",
            },
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "message": "pong",
        },
    }

ProcessTimes

Bases: BaseModel

The execution times of the different processes.

Source code in src/wordcab_transcribe/models.py
class ProcessTimes(BaseModel):
    """The execution times of the different processes."""

    total: Union[float, None] = None
    transcription: Union[float, None] = None
    diarization: Union[float, None] = None
    post_processing: Union[float, None] = None

Timestamps

Bases: str, Enum

Timestamps enum for the API.

Source code in src/wordcab_transcribe/models.py
class Timestamps(str, Enum):
    """Timestamps enum for the API."""

    seconds = "s"
    milliseconds = "ms"
    hour_minute_second = "hms"

Token

Bases: BaseModel

Token model for authentication.

Source code in src/wordcab_transcribe/models.py
class Token(BaseModel):
    """Token model for authentication."""

    access_token: str
    token_type: str

TokenData

Bases: BaseModel

TokenData model for authentication.

Source code in src/wordcab_transcribe/models.py
class TokenData(BaseModel):
    """TokenData model for authentication."""

    username: Optional[str] = None

TranscribeRequest

Bases: BaseModel

Request model for the transcribe endpoint.

Source code in src/wordcab_transcribe/models.py
class TranscribeRequest(BaseModel):
    """Request model for the transcribe endpoint."""

    audio: Union[TensorShare, List[TensorShare]]
    compression_ratio_threshold: float
    condition_on_previous_text: bool
    internal_vad: bool
    log_prob_threshold: float
    no_speech_threshold: float
    repetition_penalty: float
    source_lang: str
    vocab: Union[List[str], None]

TranscriptionOutput

Bases: BaseModel

Transcription output model for the API.

Source code in src/wordcab_transcribe/models.py
class TranscriptionOutput(BaseModel):
    """Transcription output model for the API."""

    segments: List[Segment]

UrlSchema

Bases: BaseModel

Request model for the add_url endpoint.

Source code in src/wordcab_transcribe/models.py
class UrlSchema(BaseModel):
    """Request model for the add_url endpoint."""

    task: Literal["transcription", "diarization"]
    url: HttpUrl

Utterance

Bases: BaseModel

Utterance model for the API.

Source code in src/wordcab_transcribe/models.py
class Utterance(BaseModel):
    """Utterance model for the API."""

    text: str
    start: Union[float, str]
    end: Union[float, str]
    speaker: Union[int, None] = None
    words: Union[List[Word], None] = None

Word

Bases: BaseModel

Word model for the API.

Source code in src/wordcab_transcribe/models.py
class Word(BaseModel):
    """Word model for the API."""

    word: str
    start: float
    end: float
    probability: float

YouTubeResponse

Bases: BaseResponse

Response model for the ASR YouTube endpoint.

Source code in src/wordcab_transcribe/models.py
class YouTubeResponse(BaseResponse):
    """Response model for the ASR YouTube endpoint."""

    video_url: str

    class Config:
        """Pydantic config class."""

        json_schema_extra = {
            "example": {
                "utterances": [
                    {
                        "speaker": 0,
                        "start": 0.0,
                        "end": 1.0,
                        "text": "Never gonna give you up!",
                    },
                    {
                        "speaker": 0,
                        "start": 1.0,
                        "end": 2.0,
                        "text": "Never gonna let you down!",
                    },
                ],
                "audio_duration": 2.0,
                "offset_start": None,
                "offset_end": None,
                "num_speakers": -1,
                "diarization": False,
                "source_lang": "en",
                "timestamps": "s",
                "vocab": [
                    "custom company name",
                    "custom product name",
                    "custom co-worker name",
                ],
                "word_timestamps": False,
                "internal_vad": False,
                "repetition_penalty": 1.2,
                "compression_ratio_threshold": 2.4,
                "log_prob_threshold": -1.0,
                "no_speech_threshold": 0.6,
                "condition_on_previous_text": True,
                "process_times": {
                    "total": 2.678,
                    "transcription": 2.439,
                    "diarization": None,
                    "post_processing": 0.239,
                },
                "video_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
            }
        }

Config

Pydantic config class.

Source code in src/wordcab_transcribe/models.py
class Config:
    """Pydantic config class."""

    json_schema_extra = {
        "example": {
            "utterances": [
                {
                    "speaker": 0,
                    "start": 0.0,
                    "end": 1.0,
                    "text": "Never gonna give you up!",
                },
                {
                    "speaker": 0,
                    "start": 1.0,
                    "end": 2.0,
                    "text": "Never gonna let you down!",
                },
            ],
            "audio_duration": 2.0,
            "offset_start": None,
            "offset_end": None,
            "num_speakers": -1,
            "diarization": False,
            "source_lang": "en",
            "timestamps": "s",
            "vocab": [
                "custom company name",
                "custom product name",
                "custom co-worker name",
            ],
            "word_timestamps": False,
            "internal_vad": False,
            "repetition_penalty": 1.2,
            "compression_ratio_threshold": 2.4,
            "log_prob_threshold": -1.0,
            "no_speech_threshold": 0.6,
            "condition_on_previous_text": True,
            "process_times": {
                "total": 2.678,
                "transcription": 2.439,
                "diarization": None,
                "post_processing": 0.239,
            },
            "video_url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
        }
    }

Last update: 2023-10-12
Created: 2023-10-12