Menu
Home
About
Our Role
Goals
The Team
Roadmap
Tokenomics
How To Buy
Knowledge Base
Contacts
Sitemap & Links
A.I.
Chart
Shop
IMMORTALITY
🏠
⬇️
Spike GPT
Afrikaans
Shqip
አማርኛ
العربية
Հայերեն
Azərbaycan dili
Euskara
Беларуская мова
বাংলা
Bosanski
Български
Català
Cebuano
Chichewa
简体中文
繁體中文
Corsu
Hrvatski
Čeština
Dansk
Nederlands
English
Esperanto
Eesti
Filipino
Suomi
Français
Frysk
Galego
ქართული
Deutsch
Ελληνικά
ગુજરાતી
Kreyol ayisyen
Harshen Hausa
Ōlelo Hawaiʻi
עִבְרִית
हिन्दी
Hmong
Magyar
Íslenska
Igbo
Bahasa Indonesia
Gaeilge
Italiano
日本語
Basa Jawa
ಕನ್ನಡ
Қазақ тілі
ភាសាខ្មែរ
한국어
كوردی
Кыргызча
ພາສາລາວ
Latin
Latviešu valoda
Lietuvių kalba
Lëtzebuergesch
Македонски јазик
Malagasy
Bahasa Melayu
മലയാളം
Maltese
Te Reo Māori
मराठी
Монгол
ဗမာစာ
नेपाली
Norsk bokmål
پښتو
فارسی
Polski
Português
ਪੰਜਾਬੀ
Română
Русский
Samoan
Gàidhlig
Српски језик
Sesotho
Shona
سنڌي
සිංහල
Slovenčina
Slovenščina
Afsoomaali
Español
Basa Sunda
Kiswahili
Svenska
Тоҷикӣ
தமிழ்
తెలుగు
ไทย
Türkçe
Українська
اردو
O‘zbekcha
Tiếng Việt
Cymraeg
isiXhosa
יידיש
Yorùbá
Zulu
en
New name
B
I
U
S
link
image
code
HTML
list
Show page
Syntax
* https://github.com/ridgerchu/SpikeGPT * https://huggingface.co/ridger/SpikeGPT-OpenWebText-216M {pre} git clone https://github.com/ridgerchu/SpikeGPT.git git clone https://huggingface.co/ridger/SpikeGPT-OpenWebText-216M {/pre} {pre} python3 -m venv spike_env source ./spike_env/bin/activate pip install -r requirements.txt {/pre} Open run.py and change CUDA to CPU if you do not have CUDA. Open run.py and replae mode name with MODEL_NAME = 'SpikeGPT-OpenWebText-216M/SpikeGPT-216M' {pre} nano /home/x/SpikeGPT/src/utils.py # Replace probs = F.softmax(torch.tensor(out), dim=-1) probs = F.softmax(out.clone().detach(), dim=-1) {/pre} Error: SpikeGPT/src/model_run.py:42: FutureWarning: You are using `torch.load` with `weights_only=False {pre} w = torch.load(args.MODEL_NAME + '.pth', map_location='cpu', weights_only=True) {/pre} python3 run.py {pre} ######################################################################################################## # Run with python3 run.py # The RWKV Language Model - https://github.com/BlinkDL/RWKV-LM ######################################################################################################## import numpy as np import math, os, sys, types, time, gc import torch from src.utils import TOKENIZER import matplotlib.ticker as ticker try: os.environ["CUDA_VISIBLE_DEVICES"] = sys.argv[1] except: pass torch.backends.cudnn.benchmark = True torch.backends.cudnn.allow_tf32 = True torch.backends.cuda.matmul.allow_tf32 = True np.set_printoptions(precision=4, suppress=True, linewidth=200) args = types.SimpleNamespace() ######################################################################################################## # Step 1: set model & config (use v4 to run your trained-from-scratch models. v4 and v4neo are compatible) ######################################################################################################## args.RUN_DEVICE = "cpu" # 'cuda' // 'cpu' (already fast) args.FLOAT_MODE = "fp32" # fp16 (good for GPU, does not work for CPU) // fp32 (good for CPU) // bf16 (less accurate, but works for CPU) # if args.RUN_DEVICE == "cuda": # os.environ["RWKV_RUN_BACKEND"] = 'nvfuser' # !!!BUGGY!!! wrong output os.environ["RWKV_JIT_ON"] = '1' # '1' or '0'. very useful for GPU/CPU fp32, but might be harmful for GPU fp16. please benchmark !!! #For BookCorpus Pre-trained model # TOKEN_MODE = "char" # WORD_NAME = "vocab_book" # UNKNOWN_CHAR = ' ' # vocab_size = 77 #For 216M OpenWebText Pre-trained model TOKEN_MODE = "pile" WORD_NAME = [ "20B_tokenizer.json", "20B_tokenizer.json", ] # [vocab, vocab] for Pile model UNKNOWN_CHAR = None vocab_size = 50277 MODEL_NAME = 'SpikeGPT-OpenWebText-216M/SpikeGPT-216M' n_layer = 18 n_embd = 768 ctx_len = 1024 args.MODEL_NAME = MODEL_NAME args.n_layer = n_layer args.n_embd = n_embd args.ctx_len = ctx_len args.vocab_size = vocab_size args.head_qk = 0 args.pre_ffn = 0 args.grad_cp = 0 args.my_pos_emb = 0 os.environ["RWKV_RUN_DEVICE"] = args.RUN_DEVICE ######################################################################################################## # Step 2: set prompt & sampling stuffs ######################################################################################################## context = '' NUM_TRIALS = 1 LENGTH_PER_TRIAL = 333 TEMPERATURE = 1.5 top_p = 0.7 top_p_newline = 0.9 # only used in TOKEN_MODE = char DEBUG_DEBUG = False # True False --> show softmax output ######################################################################################################## print(f'\nUsing {args.RUN_DEVICE.upper()}. Loading {MODEL_NAME}...') from src.model_run import RWKV_RNN model = RWKV_RNN(args) print(f'\nOptimizing speed...') #out, _ = model.forward([187], None, None, None) # print(out) gc.collect() torch.cuda.empty_cache() print(f'\nLoading tokenizer {WORD_NAME}...') tokenizer = TOKENIZER(WORD_NAME, UNKNOWN_CHAR=UNKNOWN_CHAR) if TOKEN_MODE == "pile": assert tokenizer.tokenizer.decode([187]) == '\n' ######################################################################################################## def generate_response(context, model, tokenizer, ctx_len, temperature, top_p, top_p_newline, debug_debug): if tokenizer.charMode: context = tokenizer.refine_context(context) ctx = [tokenizer.stoi.get(s, tokenizer.UNKNOWN_CHAR) for s in context] else: ctx = tokenizer.tokenizer.encode(context) src_len = len(ctx) src_ctx = ctx.copy() init_state = None init_out = None state = None mem1 = None mem2 = None out = None for TRIAL in range(1 if debug_debug else NUM_TRIALS): ctx = src_ctx.copy() if TRIAL == 0: for i in range(src_len): x = ctx[: i + 1] if i == src_len - 1: init_out, init_state, mem1, mem2 = model.forward(x, init_state, mem1, mem2) else: init_state, mem1, mem2 = model.forward(x, init_state, mem1, mem2, preprocess_only=True) gc.collect() torch.cuda.empty_cache() out_last = src_len for i in range(src_len, src_len + (1 if debug_debug else LENGTH_PER_TRIAL)): x = ctx[: i + 1] x = x[-ctx_len:] if i == src_len: out = init_out.clone() state = init_state.clone() else: out, state, mem1, mem2 = model.forward(x, state, mem1, mem2) if debug_debug: print("model", np.array(x), "==>", np.array(out), np.max(out.cpu().numpy()), np.min(out.cpu().numpy())) if TOKEN_MODE == "pile": out[0] = -999999999 # disable <|endoftext|> ttt = tokenizer.sample_logits( out, x, ctx_len, temperature=temperature, top_p_usual=top_p, top_p_newline=top_p_newline, ) ttt = int(ttt) ctx += [ttt] if tokenizer.charMode: char = tokenizer.itos[ttt] print(char, end="", flush=True) else: char = tokenizer.tokenizer.decode(ctx[out_last:]) if '\ufffd' not in char: # is valid utf8 string? print(char, end="", flush=True) out_last = i+1 print("\n") print("\nInteractive inference mode. Type 'exit' to quit.\n") while True: user_input = input("You: ") if user_input.lower() == 'exit': break context += f"You: {user_input}\nBot: " generate_response(context, model, tokenizer, ctx_len, TEMPERATURE, top_p, top_p_newline, DEBUG_DEBUG) context += "\n" print(("-" * 50) + '\n') {/pre}
Password
Summary of changes
📜
⏱️
⬆️