-
Notifications
You must be signed in to change notification settings - Fork 358
/
Copy pathtextGenerationStream.ts
98 lines (91 loc) · 2.82 KB
/
textGenerationStream.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import type { TextGenerationInput } from "@huggingface/tasks";
import { getProviderHelper } from "../../lib/getProviderHelper";
import type { BaseArgs, Options } from "../../types";
import { innerStreamingRequest } from "../../utils/request";
export interface TextGenerationStreamToken {
/** Token ID from the model tokenizer */
id: number;
/** Token text */
text: string;
/** Logprob */
logprob: number;
/**
* Is the token a special token
* Can be used to ignore tokens when concatenating
*/
special: boolean;
}
export interface TextGenerationStreamPrefillToken {
/** Token ID from the model tokenizer */
id: number;
/** Token text */
text: string;
/**
* Logprob
* Optional since the logprob of the first token cannot be computed
*/
logprob?: number;
}
export interface TextGenerationStreamBestOfSequence {
/** Generated text */
generated_text: string;
/** Generation finish reason */
finish_reason: TextGenerationStreamFinishReason;
/** Number of generated tokens */
generated_tokens: number;
/** Sampling seed if sampling was activated */
seed?: number;
/** Prompt tokens */
prefill: TextGenerationStreamPrefillToken[];
/** Generated tokens */
tokens: TextGenerationStreamToken[];
}
export type TextGenerationStreamFinishReason =
/** number of generated tokens == `max_new_tokens` */
| "length"
/** the model generated its end of sequence token */
| "eos_token"
/** the model generated a text included in `stop_sequences` */
| "stop_sequence";
export interface TextGenerationStreamDetails {
/** Generation finish reason */
finish_reason: TextGenerationStreamFinishReason;
/** Number of generated tokens */
generated_tokens: number;
/** Sampling seed if sampling was activated */
seed?: number;
/** Prompt tokens */
prefill: TextGenerationStreamPrefillToken[];
/** */
tokens: TextGenerationStreamToken[];
/** Additional sequences when using the `best_of` parameter */
best_of_sequences?: TextGenerationStreamBestOfSequence[];
}
export interface TextGenerationStreamOutput {
index?: number;
/** Generated token, one at a time */
token: TextGenerationStreamToken;
/**
* Complete generated text
* Only available when the generation is finished
*/
generated_text: string | null;
/**
* Generation details
* Only available when the generation is finished
*/
details: TextGenerationStreamDetails | null;
}
/**
* Use to continue text from a prompt. Same as `textGeneration` but returns generator that can be read one token at a time
*/
export async function* textGenerationStream(
args: BaseArgs & TextGenerationInput,
options?: Options
): AsyncGenerator<TextGenerationStreamOutput> {
const providerHelper = getProviderHelper(args.provider ?? "hf-inference", "text-generation");
yield* innerStreamingRequest<TextGenerationStreamOutput>(args, providerHelper, {
...options,
task: "text-generation",
});
}