Menu
Home
About
Our Role
Goals
The Team
Roadmap
Tokenomics
How To Buy
Knowledge Base
Contacts
Sitemap & Links
A.I.
Chart
Shop
IMMORTALITY
🏠
⬇️
Spike GPT
New name
B
I
U
S
link
image
code
HTML
list
Show page
Syntax
* https://github.com/ridgerchu/SpikeGPT * https://huggingface.co/ridger/SpikeGPT-OpenWebText-216M {pre} git clone https://github.com/ridgerchu/SpikeGPT.git git clone https://huggingface.co/ridger/SpikeGPT-OpenWebText-216M {/pre} {pre} python3 -m venv spike_env source ./spike_env/bin/activate pip install -r requirements.txt {/pre} Open run.py and change CUDA to CPU if you do not have CUDA. Open run.py and replae mode name with MODEL_NAME = 'SpikeGPT-OpenWebText-216M/SpikeGPT-216M' {pre} nano /home/x/SpikeGPT/src/utils.py # Replace probs = F.softmax(torch.tensor(out), dim=-1) probs = F.softmax(out.clone().detach(), dim=-1) {/pre} Error: SpikeGPT/src/model_run.py:42: FutureWarning: You are using `torch.load` with `weights_only=False {pre} w = torch.load(args.MODEL_NAME + '.pth', map_location='cpu', weights_only=True) {/pre} python3 run.py {pre} ######################################################################################################## # Run with python3 run.py # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM ######################################################################################################## import numpy as np import math, os, sys, types, time, gc import torch from src.utils import TOKENIZER import matplotlib.ticker as ticker try: os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] except: pass torch.backends.cudnn.benchmark = True torch.backends.cudnn.allow_tf32 = True torch.backends.cuda.matmul.allow_tf32 = True np.set_printoptions(precision=4, suppress=True, linewidth=200) args = types.SimpleNamespace() ######################################################################################################## # Step 1: set model & config (use v4 to run your trained-from-scratch models. v4 and v4neo are compatible) ######################################################################################################## args.RUN_DEVICE = "cpu" # 'cuda' // 'cpu' (already fast) args.FLOAT_MODE = "fp32" # fp16 (good for GPU, does not work for CPU) // fp32 (good for CPU) // bf16 (less accurate, but works for CPU) # if args.RUN_DEVICE == "cuda": # os.environ["RWKV_RUN_BACKEND"] = 'nvfuser' # !!!BUGGY!!! wrong output os.environ["RWKV_JIT_ON"] = '1' # '1' or '0'. very useful for GPU/CPU fp32, but might be harmful for GPU fp16. please benchmark !!! #For BookCorpus Pre-trained model # TOKEN_MODE = "char" # WORD_NAME = "vocab_book" # UNKNOWN_CHAR = ' ' # vocab_size = 77 #For 216M OpenWebText Pre-trained model TOKEN_MODE = "pile" WORD_NAME = [ "20B_tokenizer.json", "20B_tokenizer.json", ] # [vocab, vocab] for Pile model UNKNOWN_CHAR = None vocab_size = 50277 MODEL_NAME = 'SpikeGPT-OpenWebText-216M/SpikeGPT-216M' n_layer = 18 n_embd = 768 ctx_len = 1024 args.MODEL_NAME = MODEL_NAME args.n_layer = n_layer args.n_embd = n_embd args.ctx_len = ctx_len args.vocab_size = vocab_size args.head_qk = 0 args.pre_ffn = 0 args.grad_cp = 0 args.my_pos_emb = 0 os.environ["RWKV_RUN_DEVICE"] = args.RUN_DEVICE ######################################################################################################## # Step 2: set prompt & sampling stuffs ######################################################################################################## context = '' NUM_TRIALS = 1 LENGTH_PER_TRIAL = 333 TEMPERATURE = 1.5 top_p = 0.7 top_p_newline = 0.9 # only used in TOKEN_MODE = char DEBUG_DEBUG = False # True False --> show softmax output ######################################################################################################## print(f'\nUsing {args.RUN_DEVICE.upper()}. Loading {MODEL_NAME}...') from src.model_run import RWKV_RNN model = RWKV_RNN(args) print(f'\nOptimizing speed...') #out, _ = model.forward([187], None, None, None) # print(out) gc.collect() torch.cuda.empty_cache() print(f'\nLoading tokenizer {WORD_NAME}...') tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR) if TOKEN_MODE == "pile": assert tokenizer.tokenizer.decode([187]) == '\n' ######################################################################################################## def generate_response(context, model, tokenizer, ctx_len, temperature, top_p, top_p_newline, debug_debug): if tokenizer.charMode: context = tokenizer.refine_context(context) ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context] else: ctx = tokenizer.tokenizer.encode(context) src_len = len(ctx) src_ctx = ctx.copy() init_state = None init_out = None state = None mem1 = None mem2 = None out = None for TRIAL in range(1 if debug_debug else NUM_TRIALS): ctx = src_ctx.copy() if TRIAL == 0: for i in range(src_len): x = ctx[: i + 1] if i == src_len - 1: init_out, init_state, mem1, mem2 = model.forward(x, init_state, mem1, mem2) else: init_state, mem1, mem2 = model.forward(x, init_state, mem1, mem2, preprocess_only=True) gc.collect() torch.cuda.empty_cache() out_last = src_len for i in range(src_len, src_len + (1 if debug_debug else LENGTH_PER_TRIAL)): x = ctx[: i + 1] x = x[-ctx_len:] if i == src_len: out = init_out.clone() state = init_state.clone() else: out, state, mem1, mem2 = model.forward(x, state, mem1, mem2) if debug_debug: print("model", np.array(x), "==>", np.array(out), np.max(out.cpu().numpy()), np.min(out.cpu().numpy())) if TOKEN_MODE == "pile": out[0] = -999999999 # disable <|endoftext|> ttt = tokenizer.sample_logits( out, x, ctx_len, temperature=temperature, top_p_usual=top_p, top_p_newline=top_p_newline, ) ttt = int(ttt) ctx += [ttt] if tokenizer.charMode: char = tokenizer.itos[ttt] print(char, end="", flush=True) else: char = tokenizer.tokenizer.decode(ctx[out_last:]) if '\ufffd' not in char: # is valid utf8 string? print(char, end="", flush=True) out_last = i+1 print("\n") print("\nInteractive inference mode. Type 'exit' to quit.\n") while True: user_input = input("You: ") if user_input.lower() == 'exit': break context += f"You: {user_input}\nBot: " generate_response(context, model, tokenizer, ctx_len, TEMPERATURE, top_p, top_p_newline, DEBUG_DEBUG) context += "\n" print(("-" * 50) + '\n') {/pre}
Password
Summary of changes
📜
⏱️
⬆️