Bitsec (subnet 60)
Back to Reports

Vulnerability History

Date High Risk Low Risk
2024-12-20 6 0

Audit Report Details

14825
Lines of Code
9
Open
0
Resolved
🚨 High Risk Vulnerabilities
⚠️ Low Risk Vulnerabilities

Vulnerable Code:

1---
2File: /coding/api/__init__.py
3---
4
5from openai import *
6
7
8---
9File: /coding/api/cleaners.py
10---
11
12from detect_secrets.core import scan
13from detect_secrets.settings import default_settings
14
15FIM_PREFIXES = ["<fim_prefix>", "[PREFIX]", "<PRE>", "<|fim_begin|>"]
16FIM_ENDS = ["<fim_middle>", "[SUFFIX]", "<SUF>", "<|fim_end|>"]
17FIM_HOLES = ["<fim_suffix>"]
18
19def clean_fixes(text):
20 for prefix in FIM_PREFIXES:
21 text = text.replace(prefix, "")
22 for end in FIM_ENDS:
23 text = text.replace(end, "")
24 for hole in FIM_HOLES:
25 text = text.replace(hole, "<|fim_hole|>")
26 return text
27
28def remove_secret_lines(multiline_string):
29 # Split the input string into individual lines
30 lines = multiline_string.split('\n')
31
32 # Initialize a list to hold lines without secrets
33 clean_lines = []
34
35 # Scan each line for secrets
36 with default_settings() as settings:
37 settings.disable_plugins(
38 'Base64HighEntropyString',
39 'HexHighEntropyString'
40 )
41 for line in lines:
42 is_secret = False
43 for secret in scan.scan_line(line):
44 is_secret = True
45 break # Exit the inner loop if a secret is found
46
47 # If no secret is found, add the line to clean_lines
48 if not is_secret:
49 clean_lines.append(line)
50
51 # Join the clean lines back into a single string
52 return '\n'.join(clean_lines)
53
54def remove_generate_prompt(string):
55 """
56 Cleaner to remove the blocks that are used by continue.dev when running `Generate Code`
57 """
58 blocks = ["<|im_start|>user\n", "<|im_end|>\n", "<|im_start|>assistant\n", "Sure! Here\'s the entire rewritten code block:\n```python\n"]
59 for block in blocks:
60 string = string.replace(block, "")
61
62 return string
63
64
65---
66File: /coding/api/code.py
67---
68
69# The MIT License (MIT)
70# Copyright © 2021 Yuma Rao
71# Copyright © 2023 Opentensor Foundation
72# Copyright © 2023 Opentensor Technologies Inc
73
74# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
75# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
76# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
77# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
78
79# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
80# the Software.
81
82# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
83# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
84# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
85# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
86# DEALINGS IN THE SOFTWARE.
87
88import bittensor as bt
89from typing import List, Optional, Union, Any, Dict
90from bittensor.subnets import SubnetsAPI
91from coding.protocol import StreamCodeSynapse
92
93class CodeAPI(SubnetsAPI):
94 def __init__(self, wallet: "bt.wallet"):
95 super().__init__(wallet)
96 self.netuid = 45
97 self.name = "code"
98
99 def prepare_synapse(self, query: str, documents: List[Any]) -> StreamCodeSynapse:
100 return StreamCodeSynapse(query=query, documents=documents)
101
102 def process_responses(
103 self, responses: List[Union["bt.StreamCodeSynapse", Any]]
104 ) -> List[int]:
105 outputs = []
106 for response in responses:
107 if response.dendrite.status_code != 200:
108 continue
109 return outputs.append(response.completion)
110 return outputs
111
112
113
114---
115File: /coding/api/completion.py
116---
117
118import time
119import json
120
121from http import HTTPStatus
122from typing import AsyncGenerator, AsyncIterator, Union
123
124from coding.api.protocol import (
125 ChatCompletionRequest,
126 ChatCompletionResponseStreamChoice,
127 ChatCompletionStreamResponse,
128 DeltaMessage,
129 ErrorResponse,
130 ChatCompletionResponse,
131 CompletionRequest,
132 CompletionResponseStreamChoice,
133 CompletionStreamResponse,
134 CompletionResponse,
135 CompletionResponseChoice
136)
137
138def create_streaming_error_response(
139 self,
140 message: str,
141 err_type: str = "BadRequestError",
142 status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> str:
143 json_str = json.dumps({
144 "error":
145 self.create_error_response(message=message,
146 err_type=err_type,
147 status_code=status_code).model_dump()
148 })
149 return json_str
150
151async def chat_completion_stream_generator(
152 request: ChatCompletionRequest,
153 result_generator: AsyncIterator
154 ) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
155
156 model_name = request.model
157 created_time = int(time.time())
158 chunk_object_type = "chat.completion.chunk"
159 first_iteration = True
160
161 try:
162 async for res in result_generator:
163 if not isinstance(res, str):
164 break
165 if first_iteration:
166 role = request.messages[-1].role
167 choice_data = ChatCompletionResponseStreamChoice(
168 index=0,
169 delta=DeltaMessage(role=role),
170 logprobs=None,
171 finish_reason=None)
172 chunk = ChatCompletionStreamResponse(
173 id="",
174 object=chunk_object_type,
175 created=created_time,
176 choices=[choice_data],
177 model=model_name)
178 data = chunk.model_dump_json(exclude_unset=True)
179 yield f"data: {data}\n\n"
180
181 first_iteration = False
182
183 choice_data = ChatCompletionResponseStreamChoice(
184 index=0,
185 delta=DeltaMessage(content=res),
186 logprobs=None,
187 finish_reason="stop",
188 stop_reason="")
189 chunk = ChatCompletionStreamResponse(
190 id="",
191 object=chunk_object_type,
192 created=created_time,
193 choices=[choice_data],
194 model=model_name)
195 data = chunk.model_dump_json(exclude_unset=True,
196 exclude_none=True)
197 yield f"data: {data}\n\n"
198 except ValueError as e:
199 data = create_streaming_error_response(str(e))
200 yield f"data: {data}\n\n"
201 print("DONE")
202 yield "data: [DONE]\n\n"
203
204
205
206async def chat_completion(
207 request: ChatCompletionRequest,
208 result_generator: AsyncIterator
209 ) -> Union[ErrorResponse, ChatCompletionResponse]:
210 completion = ""
211 async for chunk in result_generator:
212 completion += chunk
213
214 return ChatCompletionResponse(
215 id="",
216 object="chat.completion",
217 created=int(time.time()),
218 model=request.model,
219 choices=[ChatCompletionResponseStreamChoice(
220 index=0,
221 delta=DeltaMessage(content=completion),
222 logprobs=None,
223 finish_reason="stop",
224 stop_reason="")])
225
226async def completion_stream_generator(
227 request: CompletionRequest,
228 result_generator: AsyncIterator
229 ) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
230
231 model_name = request.model
232 created_time = int(time.time())
233 chunk_object_type = "chat.completion.chunk"
234 first_iteration = True
235
236 try:
237 async for res in result_generator:
238 if not isinstance(res, str):
239 break
240 if first_iteration:
241 choice_data = CompletionResponseStreamChoice(
242 index=0,
243 text="",
244 logprobs=None,
245 finish_reason=None)
246 chunk = CompletionStreamResponse(
247 choices=[choice_data],
248 model=model_name)
249 data = chunk.model_dump_json(exclude_unset=True)
250 yield f"data: {data}\n\n"
251
252 first_iteration = False
253 choice_data = CompletionResponseStreamChoice(
254 index=0,
255 text=res,
256 logprobs=None,
257 finish_reason=None)
258 chunk = CompletionStreamResponse(
259 id="",
260 object=chunk_object_type,
261 created=created_time,
262 choices=[choice_data],
263 model=model_name)
264 data = chunk.model_dump_json(exclude_unset=True,
265 exclude_none=True)
266 yield f"data: {data}\n\n"
267 except ValueError as e:
268 data = create_streaming_error_response(str(e))
269 yield f"data: {data}\n\n"
270 yield "data: [DONE]\n\n"
271
272
273async def completion(
274 request: CompletionRequest,
275 result_generator: AsyncIterator
276 ) -> Union[ErrorResponse, ChatCompletionResponse]:
277 completion = ""
278 async for chunk in result_generator:
279 completion += chunk
280
281 return CompletionResponse(
282 model=request.model,
283 choices=[CompletionResponseChoice(
284 index=0,
285 text=completion,
286 finish_reason="stop",
287 stop_reason="")])
288
289
290---
291File: /coding/api/get_query_axons.py
292---
293
294# The MIT License (MIT)
295# Copyright © 2021 Yuma Rao
296# Copyright © 2023 Opentensor Foundation
297# Copyright © 2023 Opentensor Technologies Inc
298
299# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
300# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
301# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
302# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
303
304# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
305# the Software.
306
307# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
308# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
309# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
310# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
311# DEALINGS IN THE SOFTWARE.
312import numpy as np
313import random
314import bittensor as bt
315
316
317async def ping_uids(dendrite, metagraph, uids, timeout=3):
318 """
319 Pings a list of UIDs to check their availability on the Bittensor network.
320
321 Args:
322 dendrite (bittensor.dendrite): The dendrite instance to use for pinging nodes.
323 metagraph (bittensor.metagraph): The metagraph instance containing network information.
324 uids (list): A list of UIDs (unique identifiers) to ping.
325 timeout (int, optional): The timeout in seconds for each ping. Defaults to 3.
326
327 Returns:
328 tuple: A tuple containing two lists:
329 - The first list contains UIDs that were successfully pinged.
330 - The second list contains UIDs that failed to respond.
331 """
332 axons = [metagraph.axons[uid] for uid in uids]
333 try:
334 responses = await dendrite(
335 axons,
336 bt.Synapse(), # TODO: potentially get the synapses available back?
337 deserialize=False,
338 timeout=timeout,
339 )
340 successful_uids = [
341 uid
342 for uid, response in zip(uids, responses)
343 if response.dendrite.status_code == 200
344 ]
345 failed_uids = [
346 uid
347 for uid, response in zip(uids, responses)
348 if response.dendrite.status_code != 200
349 ]
350 except Exception as e:
351 bt.logging.error(f"Dendrite ping failed: {e}")
352 successful_uids = []
353 failed_uids = uids
354 bt.logging.debug(f"ping() successful uids: {successful_uids}")
355 bt.logging.debug(f"ping() failed uids : {failed_uids}")
356 return successful_uids, failed_uids
357
358async def get_query_api_nodes(dendrite, metagraph, n=0.1, timeout=3):
359 """
360 Fetches the available API nodes to query for the particular subnet.
361
362 Args:
363 wallet (bittensor.wallet): The wallet instance to use for querying nodes.
364 metagraph (bittensor.metagraph): The metagraph instance containing network information.
365 n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1.
366 timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3.
367
368 Returns:
369 list: A list of UIDs representing the available API nodes.
370 """
371 bt.logging.debug(
372 f"Fetching available API nodes for subnet {metagraph.netuid}"
373 )
374 vtrust_uids = [
375 uid.item()
376 for uid in metagraph.uids
377 if metagraph.validator_trust[uid] > 0
378 ]
379 top_uids = np.where(metagraph.S > np.quantile(metagraph.S, 1 - n))[0].tolist()
380 init_query_uids = set(top_uids).intersection(set(vtrust_uids))
381 query_uids, _ = await ping_uids(
382 dendrite, metagraph, list(init_query_uids), timeout=timeout
383 )
384 bt.logging.debug(
385 f"Available API node UIDs for subnet {metagraph.netuid}: {query_uids}"
386 )
387 if len(query_uids) > 3:
388 query_uids = random.sample(query_uids, 3)
389 return query_uids
390
391
392async def get_query_api_axons(
393 wallet, metagraph=None, n=0.1, timeout=3, uids=None
394):
395 """
396 Retrieves the axons of query API nodes based on their availability and stake.
397
398 Args:
399 wallet (bittensor.wallet): The wallet instance to use for querying nodes.
400 metagraph (bittensor.metagraph, optional): The metagraph instance containing network information.
401 n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1.
402 timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3.
403 uids (Union[List[int], int], optional): The specific UID(s) of the API node(s) to query. Defaults to None.
404
405 Returns:
406 list: A list of axon objects for the available API nodes.
407 """
408 dendrite = bt.dendrite(wallet=wallet)
409
410 if metagraph is None:
411 metagraph = bt.metagraph(netuid=21)
412
413 if uids is not None:
414 query_uids = [uids] if isinstance(uids, int) else uids
415 else:
416 query_uids = await get_query_api_nodes(
417 dendrite, metagraph, n=n, timeout=timeout
418 )
419 return [metagraph.axons[uid] for uid in query_uids]
420
421
422
423---
424File: /coding/api/loggers.py
425---
426
427import httpx
428
429class CallCountManager:
430 def __init__(self, url, key):
431 self.url = url
432 self.key = key
433 self.headers = {
434 "Content-Type": "application/json"
435 }
436
437 async def add(self):
438 async with httpx.AsyncClient() as client:
439 response = await client.get(f"{self.url}/counter/add", params={"api_key": self.key}, headers=self.headers)
440 response.raise_for_status()
441 return response.json()
442
443
444
445---
446File: /coding/api/openai.py
447---
448
449import os
450import httpx
451import dotenv
452import logging
453import asyncio
454import argparse
455import bittensor as bt
456from cachetools.func import ttl_cache
457from contextlib import asynccontextmanager
458from fastapi import FastAPI, HTTPException
459from fastapi.responses import JSONResponse, StreamingResponse
460
461from coding.protocol import StreamCodeSynapse
462from coding.api.loggers import CallCountManager
463from coding.api.protocol import CompletionRequest, ChatCompletionRequest
464from coding.api.completion import completion, chat_completion, chat_completion_stream_generator, completion_stream_generator
465from coding.api.cleaners import clean_fixes, remove_secret_lines, remove_generate_prompt
466
467dotenv.load_dotenv()
468
469logging.basicConfig(level=logging.INFO)
470logger = logging.getLogger(__name__)
471
472
473parser = argparse.ArgumentParser(
474 description="Run the FastAPI server with configurable constants."
475)
476parser.add_argument(
477 "--wallet", type=str, default="test_validator", help="Name of the wallet"
478) # TODO change to validator
479parser.add_argument("--hotkey", type=str, default="default", help="Name of the hotkey")
480parser.add_argument(
481 "--network", type=str, default="ws://127.0.0.1:9946", help="Network address"
482) # TODO change to finney
483parser.add_argument(
484 "--netuid", type=int, default=1, help="NetUID value"
485) # TODO change to real
486parser.add_argument(
487 "--stat_api_url", type=str, default=None, help="Url of the statistics API"
488)
489parser.add_argument(
490 "--stat_api_key", type=str, default=None, help="Key for the statistics API"
491)
492args = parser.parse_args()
493
494WALLET_NAME = args.wallet
495HOTKEY_NAME = args.hotkey
496NETWORK = args.network
497NETUID = args.netuid
498
499STAT_API_URL = os.getenv("STAT_API_URL", args.stat_api_url)
500STAT_API_KEY = os.getenv("STAT_API_KEY", args.stat_api_key)
501CALL_COUNTER = None
502
503if STAT_API_URL and STAT_API_KEY:
504 CALL_COUNTER = CallCountManager(url=STAT_API_URL, key=STAT_API_KEY)
505
506
507subtensor = None
508subnet = None
509wallet = None
510dendrite = None
511
512
513@asynccontextmanager
514async def lifespan(app: FastAPI):
515 global subtensor
516 global subnet
517 global dendrite
518 global wallet
519 app.requests_client = httpx.AsyncClient()
520 subtensor = bt.subtensor(network=NETWORK)
521 subnet = subtensor.metagraph(netuid=NETUID)
522 wallet = bt.wallet(name=WALLET_NAME, hotkey=HOTKEY_NAME)
523 dendrite = bt.dendrite(wallet=wallet)
524 yield
525 await app.requests_client.aclose()
526
527
528@ttl_cache(maxsize=100, ttl=60 * 60)
529def get_top_miner_uid():
530 global subtensor
531 global subnet
532 subtensor = bt.subtensor(network=NETWORK)
533 subnet = subtensor.metagraph(netuid=NETUID)
534 return int(subnet.I.argmax())
535
536
537async def forward(uid, synapse, timeout=25):
538 global dendrite
539 response = await dendrite(
540 axons=subnet.axons[uid],
541 synapse=synapse,
542 deserialize=False,
543 timeout=timeout,
544 streaming=True,
545 )
546 return response
547
548app = FastAPI(
549 lifespan=lifespan,
550 docs_url="/",
551 redoc_url=None,
552)
553
554
555@app.post(
556 "/chat/completions"
557)
558@app.post("/v1/chat/completions")
559async def chat_completions(request: ChatCompletionRequest):
560 if CALL_COUNTER:
561 asyncio.create_task(CALL_COUNTER.add())
562 if not request.attachments:
563 request.attachments = []
564 if not request.files:
565 request.files = []
566 try:
567 generator = await forward(
568 0, StreamCodeSynapse(messages=request.messages, attachments=request.attachments, files=request.files, uid=0)
569 )
570 if request.stream:
571 return StreamingResponse(chat_completion_stream_generator(request, generator), media_type="text/event-stream")
572 else:
573 return JSONResponse(content=(await chat_completion(request, generator)).model_dump())
574 except httpx.ReadTimeout:
575 raise HTTPException(408) from None
576 except Exception as e:
577 raise HTTPException(500) from None
578
579
580async def collect_async_gen(gen):
581 return [item async for item in gen]
582
583
584@app.post("/completions")
585@app.post("/v1/completions")
586async def completions(request: CompletionRequest):
587 if CALL_COUNTER:
588 asyncio.create_task(CALL_COUNTER.add())
589 if isinstance(request.prompt, list):
590 request.prompt = " ".join(request.prompt)
591 # remove any fim prefix/suffixes
592 request.prompt = remove_generate_prompt(remove_secret_lines(clean_fixes(request.prompt)))
593 try:
594 # generator = await forward(
595 # get_top_miner_uid(), StreamCodeSynapse(query=clean_deepseek(request.prompt))
596 # )
597 generator = await forward(
598 0, StreamCodeSynapse(query=request.prompt, uid=0)
599 )
600
601 if request.stream:
602 return StreamingResponse(completion_stream_generator(request, generator), media_type="text/event-stream")
603 else:
604 return JSONResponse(content=(await completion(request, generator)).model_dump())
605 except httpx.ReadTimeout:
606 raise HTTPException(408) from None
607 except Exception as e:
608 print(e)
609 raise HTTPException(500) from None
610
611
612@app.get("/models")
613@app.get("/v1/models")
614async def models():
615 try:
616 return "code"
617 except httpx.ReadTimeout:
618 raise HTTPException(408) from None
619 except Exception:
620 raise HTTPException(500) from None
621
622
623if __name__ == "__main__":
624 import uvicorn
625
626 log_config = uvicorn.config.LOGGING_CONFIG
627 log_config["loggers"]["uvicorn"]["level"] = "DEBUG"
628 log_config["loggers"]["uvicorn.error"]["level"] = "DEBUG"
629 log_config["loggers"]["uvicorn.access"]["level"] = "DEBUG"
630 uvicorn.run("coding.api.openai:app", host="0.0.0.0", port=9990, reload=False)
631
632
633
634---
635File: /coding/api/protocol.py
636---
637
638import time
639import uuid
640from typing import List, Literal, Optional, Union, Any
641
642from pydantic import (
643 BaseModel,
644 Field,
645)
646from coding.schemas import ChatMessage
647
648
649def random_uuid() -> str:
650 return str(uuid.uuid4().hex)
651
652class ChatCompletionResponseChoice(BaseModel):
653 index: int
654 message: ChatMessage
655 finish_reason: Optional[Literal["stop", "length"]] = None
656 stop_reason: Union[None, int, str] = None
657
658
659class ChatCompletionResponse(BaseModel):
660 id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
661 object: str = "chat.completion"
662 created: int = Field(default_factory=lambda: int(time.time()))
663 model: str
664 choices: List[ChatCompletionResponseChoice]
665
666class CompletionResponseChoice(BaseModel):
667 index: int
668 text: str
669 finish_reason: Optional[Literal["stop", "length"]] = None
670 stop_reason: Union[None, int, str] = Field(
671 default=None,
672 description=(
673 "The stop string or token id that caused the completion "
674 "to stop, None if the completion finished for some other reason "
675 "including encountering the EOS token"),
676 )
677
678class ErrorResponse(BaseModel):
679 object: str = "error"
680 message: str
681 type: str
682 param: Optional[str] = None
683 code: int
684
685
686class UsageInfo(BaseModel):
687 prompt_tokens: int = 0
688 total_tokens: int = 0
689 completion_tokens: Optional[int] = 0
690
691class ResponseFormat(BaseModel):
692 # type must be "json_object" or "text"
693 type: str = Literal["text", "json_object"]
694
695class CompletionRequest(BaseModel):
696 model: str
697 # a string, array of strings, array of tokens, or array of token arrays
698 prompt: Union[List[int], List[List[int]], str, List[str]]
699 stream: Optional[bool] = False
700
701class ChatCompletionRequest(BaseModel):
702 model: str
703 messages: List[ChatMessage]
704 stream: Optional[bool] = True
705 attachments: Union[List[Any], None] = []
706 files: Union[List[Any], None] = []
707
708
709
710class CompletionResponse(BaseModel):
711 id: str = Field(default_factory=lambda: f"cmpl-{random_uuid()}")
712 object: str = "text_completion"
713 created: int = Field(default_factory=lambda: int(time.time()))
714 model: str
715 choices: List[CompletionResponseChoice]
716 usage: UsageInfo
717
718
719class CompletionResponseStreamChoice(BaseModel):
720 index: int
721 text: str
722 finish_reason: Optional[Literal["stop", "length"]] = None
723 stop_reason: Union[None, int, str] = Field(
724 default=None,
725 description=(
726 "The stop string or token id that caused the completion "
727 "to stop, None if the completion finished for some other reason "
728 "including encountering the EOS token"),
729 )
730
731
732class CompletionStreamResponse(BaseModel):
733 id: str = Field(default_factory=lambda: f"cmpl-{random_uuid()}")
734 object: str = "text_completion"
735 created: int = Field(default_factory=lambda: int(time.time()))
736 model: str
737 choices: List[CompletionResponseStreamChoice]
738 usage: Optional[UsageInfo] = Field(default=None)
739
740class DeltaMessage(BaseModel):
741 role: Optional[str] = None
742 content: Optional[str] = None
743
744
745class ChatCompletionResponseStreamChoice(BaseModel):
746 index: int
747 delta: DeltaMessage
748 finish_reason: Optional[Literal["stop", "length"]] = None
749 stop_reason: Union[None, int, str] = None
750
751
752class ChatCompletionStreamResponse(BaseModel):
753 id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
754 object: str = "chat.completion.chunk"
755 created: int = Field(default_factory=lambda: int(time.time()))
756 model: str
757 choices: List[ChatCompletionResponseStreamChoice]
758 usage: Optional[UsageInfo] = Field(default=None)
759
760
761
762---
763File: /coding/api/testing.py
764---
765
766
767
768
769---
770File: /coding/base/utils/weight_utils.py
771---
772
773import numpy as np
774from typing import Tuple, List
775import bittensor
776
777U32_MAX = 4294967295
778U16_MAX = 65535
779
780def normalize_max_weight(
781 x: np.ndarray, limit: float = 0.1
782) -> np.ndarray:
783 r"""Normalizes the numpy array x so that sum(x) = 1 and the max value is not greater than the limit.
784 Args:
785 x (:obj:`np.ndarray`):
786 Array to be max_value normalized.
787 limit: float:
788 Max value after normalization.
789 Returns:
790 y (:obj:`np.ndarray`):
791 Normalized x array.
792 """
793 epsilon = 1e-7 # For numerical stability after normalization
794
795 weights = x.copy()
796 values = np.sort(weights)
797
798 if x.sum() == 0 or len(x) * limit <= 1:
799 return np.ones_like(x) / x.size
800 else:
801 estimation = values / values.sum()
802
803 if estimation.max() <= limit:
804 return weights / weights.sum()
805
806 # Find the cumulative sum and sorted array
807 cumsum = np.cumsum(estimation, 0)
808
809 # Determine the index of cutoff
810 estimation_sum = np.array(
811 [(len(values) - i - 1) * estimation[i] for i in range(len(values))]
812 )
813 n_values = (estimation / (estimation_sum + cumsum + epsilon) < limit).sum()
814
815 # Determine the cutoff based on the index
816 cutoff_scale = (limit * cumsum[n_values - 1] - epsilon) / (
817 1 - (limit * (len(estimation) - n_values))
818 )
819 cutoff = cutoff_scale * values.sum()
820
821 # Applying the cutoff
822 weights[weights > cutoff] = cutoff
823
824 y = weights / weights.sum()
825
826 return y
827
828
829def convert_weights_and_uids_for_emit(
830 uids: np.ndarray, weights: np.ndarray
831) -> Tuple[List[int], List[int]]:
832 r"""Converts weights into integer u32 representation that sum to MAX_INT_WEIGHT.
833 Args:
834 uids (:obj:`np.ndarray,`):
835 Array of uids as destinations for passed weights.
836 weights (:obj:`np.ndarray,`):
837 Array of weights.
838 Returns:
839 weight_uids (List[int]):
840 Uids as a list.
841 weight_vals (List[int]):
842 Weights as a list.
843 """
844 # Checks.
845 weights = weights.tolist()
846 uids = uids.tolist()
847 if np.min(weights) < 0:
848 raise ValueError(
849 "Passed weight is negative cannot exist on chain {}".format(weights)
850 )
851 if np.min(uids) < 0:
852 raise ValueError("Passed uid is negative cannot exist on chain {}".format(uids))
853 if len(uids) != len(weights):
854 raise ValueError(
855 "Passed weights and uids must have the same length, got {} and {}".format(
856 len(uids), len(weights)
857 )
858 )
859 if np.sum(weights) == 0:
860 return [], [] # Nothing to set on chain.
861 else:
862 max_weight = float(np.max(weights))
863 weights = [
864 float(value) / max_weight for value in weights
865 ] # max-upscale values (max_weight = 1).
866
867 weight_vals = []
868 weight_uids = []
869 for i, (weight_i, uid_i) in enumerate(list(zip(weights, uids))):
870 uint16_val = round(
871 float(weight_i) * int(U16_MAX)
872 ) # convert to int representation.
873
874 # Filter zeros
875 if uint16_val != 0: # Filter zeros
876 weight_vals.append(uint16_val)
877 weight_uids.append(uid_i)
878
879 return weight_uids, weight_vals
880
881
882def process_weights_for_netuid(
883 uids,
884 weights: np.ndarray,
885 netuid: int,
886 subtensor: "bittensor.subtensor",
887 metagraph: "bittensor.metagraph" = None,
888 exclude_quantile: int = 0,
889) -> np.ndarray:
890 print("process_weights_for_netuid()")
891 print("weights", weights)
892 print("netuid", netuid)
893 print("subtensor", subtensor)
894 print("metagraph", metagraph)
895
896 # Get latest metagraph from chain if metagraph is None.
897 if metagraph == None:
898 metagraph = subtensor.metagraph(netuid)
899
900 # Cast weights to floats.
901 if not isinstance(weights, np.ndarray) or weights.dtype != np.float32:
902 weights = weights.astype(np.float32)
903
904 # Network configuration parameters from an subtensor.
905 # These parameters determine the range of acceptable weights for each neuron.
906 quantile = exclude_quantile / U16_MAX
907 min_allowed_weights = subtensor.min_allowed_weights(netuid=netuid)
908 max_weight_limit = subtensor.max_weight_limit(netuid=netuid)
909 print("quantile", quantile)
910 print("min_allowed_weights", min_allowed_weights)
911 print("max_weight_limit", max_weight_limit)
912
913 # Find all non zero weights.
914 non_zero_weight_idx = np.argwhere(weights > 0).squeeze()
915 non_zero_weight_uids = uids[non_zero_weight_idx]
916 non_zero_weights = weights[non_zero_weight_idx]
917 if non_zero_weights.size == 0 or metagraph.n < min_allowed_weights:
918 bittensor.logging.warning("No non-zero weights returning all ones.")
919 final_weights = np.ones((metagraph.n)) / metagraph.n
920 print("final_weights", final_weights)
921 return np.arange(len(final_weights)), final_weights
922
923 elif non_zero_weights.size < min_allowed_weights:
924 bittensor.logging.warning(
925 "No non-zero weights less then min allowed weight, returning all ones."
926 )
927 weights = (
928 np.ones((metagraph.n)) * 1e-5
929 ) # creating minimum even non-zero weights
930 weights[non_zero_weight_idx] += non_zero_weights
931 print("final_weights", weights)
932 normalized_weights = normalize_max_weight(
933 x=weights, limit=max_weight_limit
934 )
935 return np.arange(len(normalized_weights)), normalized_weights
936
937 print("non_zero_weights", non_zero_weights)
938
939 # Compute the exclude quantile and find the weights in the lowest quantile
940 max_exclude = max(0, len(non_zero_weights) - min_allowed_weights) / len(
941 non_zero_weights
942 )
943 exclude_quantile = min([quantile, max_exclude])
944 lowest_quantile = np.quantile(non_zero_weights, exclude_quantile)
945 print("max_exclude", max_exclude)
946 print("exclude_quantile", exclude_quantile)
947 print("lowest_quantile", lowest_quantile)
948
949 # Exclude all weights below the allowed quantile.
950 condition = non_zero_weights >= lowest_quantile
951 non_zero_weight_uids = non_zero_weight_uids[condition]
952 non_zero_weights = non_zero_weights[condition]
953 print("non_zero_weight_uids", non_zero_weight_uids)
954 print("non_zero_weights", non_zero_weights)
955
956 # Normalize weights and return.
957 normalized_weights = normalize_max_weight(
958 x=non_zero_weights, limit=max_weight_limit
959 )
960 print("final_weights", normalized_weights)
961
962 return non_zero_weight_uids, normalized_weights
963
964
965
966---
967File: /coding/base/__init__.py
968---
969
970
971
972
973---
974File: /coding/base/miner.py
975---
976
977# The MIT License (MIT)
978# Copyright © 2024 Yuma Rao
979
980# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
981# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
982# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
983# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
984
985# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
986# the Software.
987
988# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
989# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
990# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
991# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
992# DEALINGS IN THE SOFTWARE.
993
994import time
995import argparse
996import asyncio
997import threading
998import bittensor as bt
999
1000from typing import Union
1001from traceback import print_exception
1002from coding.base.neuron import BaseNeuron
1003from coding.utils.config import add_miner_args
1004from coding.protocol import StreamCodeSynapse, HFModelSynapse
1005
1006
1007class BaseMinerNeuron(BaseNeuron):
1008 """
1009 Base class for Bittensor miners.
1010 """
1011
1012 @classmethod
1013 def add_args(cls, parser: argparse.ArgumentParser):
1014 super().add_args(parser)
1015 add_miner_args(cls, parser)
1016
1017 def __init__(self, config=None):
1018 super().__init__(config=config)
1019
1020 # Warn if allowing incoming requests from anyone.
1021 if not self.config.blacklist.force_validator_permit:
1022 bt.logging.warning(
1023 "You are allowing non-validators to send requests to your miner. This is a security risk."
1024 )
1025 if self.config.blacklist.allow_non_registered:
1026 bt.logging.warning(
1027 "You are allowing non-registered entities to send requests to your miner. This is a security risk."
1028 )
1029
1030 # The axon handles request processing, allowing validators to send this miner requests.
1031 self.axon = bt.axon(wallet=self.wallet, config=self.config)
1032
1033 # Attach determiners which functions are called when servicing a request.
1034 bt.logging.info(f"Attaching forward function to miner axon.")
1035 for forward_capability in self.forward_capabilities:
1036 forward_fn = forward_capability['forward']
1037 blacklist_fn = forward_capability['blacklist']
1038 priority_fn = forward_capability['priority']
1039 self.axon.attach(
1040 forward_fn=forward_fn,
1041 blacklist_fn=blacklist_fn,
1042 priority_fn=priority_fn,
1043 )
1044 bt.logging.info(f"Axon created: {self.axon}")
1045
1046 # Instantiate runners
1047 self.should_exit: bool = False
1048 self.is_running: bool = False
1049 self.thread: threading.Thread = None
1050 self.lock = asyncio.Lock()
1051
1052 def run(self):
1053 """
1054 Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.
1055
1056 This function performs the following primary tasks:
1057 1. Check for registration on the Bittensor network.
1058 2. Starts the miner's axon, making it active on the network.
1059 3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights.
1060
1061 The miner continues its operations until `should_exit` is set to True or an external interruption occurs.
1062 During each epoch of its operation, the miner waits for new blocks on the Bittensor network, updates its
1063 knowledge of the network (metagraph), and sets its weights. This process ensures the miner remains active
1064 and up-to-date with the network's latest state.
1065
1066 Note:
1067 - The function leverages the global configurations set during the initialization of the miner.
1068 - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests.
1069
1070 Raises:
1071 KeyboardInterrupt: If the miner is stopped by a manual interruption.
1072 Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis.
1073 """
1074
1075 # Check that miner is registered on the network.
1076 self.sync()
1077
1078 # Serve passes the axon information to the network + netuid we are hosting on.
1079 # This will auto-update if the axon port of external ip have changed.
1080 bt.logging.info(
1081 f"Serving miner axon {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1082 )
1083 self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
1084
1085 # Start starts the miner's axon, making it active on the network.
1086 self.axon.start()
1087
1088 bt.logging.info(f"Miner starting at block: {self.block}")
1089
1090 # This loop maintains the miner's operations until intentionally stopped.
1091 try:
1092 while not self.should_exit:
1093 while (
1094 self.block - self.metagraph.last_update[self.uid]
1095 < self.config.neuron.epoch_length
1096 ):
1097 # Wait before checking again.
1098 time.sleep(1)
1099
1100 # Check if we should exit.
1101 if self.should_exit:
1102 break
1103
1104 # Sync metagraph and potentially set weights.
1105 self.sync()
1106 self.step += 1
1107
1108 # If someone intentionally stops the miner, it'll safely terminate operations.
1109 except KeyboardInterrupt:
1110 self.axon.stop()
1111 bt.logging.success("Miner killed by keyboard interrupt.")
1112 exit()
1113
1114 # In case of unforeseen errors, the miner will log the error and continue operations.
1115 except Exception as err:
1116 bt.logging.error("Error during mining", str(err))
1117 bt.logging.debug(print_exception(type(err), err, err.__traceback__))
1118 self.should_exit = True
1119
1120 def run_in_background_thread(self):
1121 """
1122 Starts the miner's operations in a separate background thread.
1123 This is useful for non-blocking operations.
1124 """
1125 if not self.is_running:
1126 bt.logging.debug("Starting miner in background thread.")
1127 self.should_exit = False
1128 self.thread = threading.Thread(target=self.run, daemon=True)
1129 self.thread.start()
1130 self.is_running = True
1131 bt.logging.debug("Started")
1132
1133 def stop_run_thread(self):
1134 """
1135 Stops the miner's operations that are running in the background thread.
1136 """
1137 if self.is_running:
1138 bt.logging.debug("Stopping miner in background thread.")
1139 self.should_exit = True
1140 self.thread.join(5)
1141 self.is_running = False
1142 bt.logging.debug("Stopped")
1143
1144 def __enter__(self):
1145 """
1146 Starts the miner's operations in a background thread upon entering the context.
1147 This method facilitates the use of the miner in a 'with' statement.
1148 """
1149 self.run_in_background_thread()
1150
1151 return self
1152
1153 def __exit__(self, exc_type, exc_value, traceback):
1154 """
1155 Stops the miner's background operations upon exiting the context.
1156 This method facilitates the use of the miner in a 'with' statement.
1157
1158 Args:
1159 exc_type: The type of the exception that caused the context to be exited.
1160 None if the context was exited without an exception.
1161 exc_value: The instance of the exception that caused the context to be exited.
1162 None if the context was exited without an exception.
1163 traceback: A traceback object encoding the stack trace.
1164 None if the context was exited without an exception.
1165 """
1166 self.stop_run_thread()
1167
1168 def resync_metagraph(self):
1169 """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph."""
1170 # bt.logging.info("resync_metagraph()")
1171
1172 # Sync the metagraph.
1173 self.metagraph.sync(subtensor=self.subtensor)
1174 self.last_block_sync = self.block
1175
1176
1177
1178---
1179File: /coding/base/neuron.py
1180---
1181
1182# The MIT License (MIT)
1183# Copyright © 2024 Yuma Rao
1184
1185# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
1186# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
1187# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
1188# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1189
1190# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
1191# the Software.
1192
1193# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
1194# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1195# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1196# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1197# DEALINGS IN THE SOFTWARE.
1198
1199import sys
1200import copy
1201import json
1202
1203import bittensor as bt
1204
1205from abc import ABC, abstractmethod
1206
1207# Sync calls set weights and also resyncs the metagraph.
1208from coding.utils.config import check_config, add_args, config
1209from coding.utils.misc import ttl_get_block
1210from coding import __spec_version__ as spec_version
1211
1212from coding.mock import MockSubtensor, MockMetagraph
1213
1214
1215class BaseNeuron(ABC):
1216 """
1217 Base class for Bittensor miners. This class is abstract and should be inherited by a subclass. It contains the core logic for all neurons; validators and miners.
1218
1219 In addition to creating a wallet, subtensor, and metagraph, this class also handles the synchronization of the network state via a basic checkpointing mechanism based on epoch length.
1220 """
1221
1222 @classmethod
1223 def check_config(cls, config: "bt.Config"):
1224 check_config(cls, config)
1225
1226 @classmethod
1227 def add_args(cls, parser):
1228 add_args(cls, parser)
1229
1230 @classmethod
1231 def _config(cls):
1232 return config(cls)
1233
1234 subtensor: "bt.subtensor"
1235 wallet: "bt.wallet"
1236 metagraph: "bt.metagraph"
1237 spec_version: int = spec_version
1238
1239 @property
1240 def block(self):
1241 return ttl_get_block(self)
1242
1243 def __init__(self, config=None):
1244 print("start", flush=True)
1245 base_config = copy.deepcopy(config or BaseNeuron._config())
1246 self.config = self._config()
1247 self.config.merge(base_config)
1248 self.check_config(self.config)
1249
1250 # Set up logging with the provided configuration and directory.
1251 bt.logging(config=self.config, logging_dir=self.config.full_path)
1252
1253 # If a gpu is required, set the device to cuda:N (e.g. cuda:0)
1254 self.device = self.config.neuron.device
1255
1256 # Log the configuration for reference.
1257 bt.logging.info(self.config)
1258
1259 # Build Bittensor objects
1260 # These are core Bittensor classes to interact with the network.
1261 bt.logging.info("Setting up bittensor objects.")
1262
1263 # The wallet holds the cryptographic key pairs for the miner.
1264 if self.config.mock:
1265 self.wallet = bt.MockWallet(config=self.config)
1266 self.subtensor = MockSubtensor(self.config.netuid, wallet=self.wallet)
1267 self.metagraph = MockMetagraph(netuid=self.config.netuid, subtensor=self.subtensor)
1268 else:
1269 self.wallet = bt.wallet(config=self.config)
1270 self.subtensor = bt.subtensor(config=self.config)
1271 self.metagraph = self.subtensor.metagraph(self.config.netuid)
1272
1273 bt.logging.info(f"Wallet: {self.wallet}")
1274 bt.logging.info(f"Subtensor: {self.subtensor}")
1275 bt.logging.info(f"Metagraph: {self.metagraph}")
1276
1277 # Check if the miner is registered on the Bittensor network before proceeding further.
1278 self.check_registered()
1279
1280 # Each miner gets a unique identity (UID) in the network for differentiation.
1281 self.uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
1282 bt.logging.info(
1283 f"Running neuron on subnet: {self.config.netuid} with uid {self.uid} using network: {self.subtensor.chain_endpoint}"
1284 )
1285 self.last_block_sync = self.block
1286 self.step = 0
1287
1288
1289 @abstractmethod
1290 def forward(self, synapse: bt.Synapse) -> bt.Synapse:
1291 ...
1292
1293 @abstractmethod
1294 def run(self):
1295 ...
1296
1297 def sync(self):
1298 """
1299 Wrapper for synchronizing the state of the network for the given miner or validator.
1300 """
1301 # Ensure miner or validator hotkey is still registered on the network.
1302 self.check_registered()
1303
1304 if self.should_sync_metagraph():
1305 self.resync_metagraph()
1306
1307 if self.should_set_weights():
1308 self.set_weights()
1309
1310 # Always save state.
1311 self.save_state()
1312
1313 def check_registered(self):
1314 # --- Check for registration.
1315 try:
1316 if not self.subtensor.is_hotkey_registered(
1317 netuid=self.config.netuid,
1318 hotkey_ss58=self.wallet.hotkey.ss58_address,
1319 ):
1320 bt.logging.error(
1321 f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}."
1322 f" Please register the hotkey using `btcli subnets register` before trying again"
1323 )
1324 sys.exit()
1325 except json.decoder.JSONDecodeError:
1326 bt.logging.error(
1327 f"JSONDecodeError encountered while checking registration for wallet: {self.wallet} on netuid {self.config.netuid}."
1328 )
1329 # Handle the error or continue without exiting
1330 def should_sync_metagraph(self):
1331 """
1332 Check if enough epoch blocks have elapsed since the last checkpoint to sync.
1333 """
1334 return (
1335 self.block - self.last_block_sync
1336 ) > self.config.neuron.epoch_length
1337
1338 def should_set_weights(self) -> bool:
1339 # Don't set weights on initialization.
1340 if self.step == 0:
1341 return False
1342
1343 # Check if enough epoch blocks have elapsed since the last epoch.
1344 if self.config.neuron.disable_set_weights:
1345 return False
1346
1347 # If neuron has validator permit we assume its running the validator code. If it is a dual permit neuron then we check that it also has a set_weights method (only true if it is running validator neuron)
1348 if not self.metagraph.validator_permit[self.uid] or not hasattr(
1349 self, "set_weights"
1350 ):
1351 return False
1352
1353 # Define appropriate logic for when set weights.
1354 return (
1355 self.block - self.metagraph.last_update[self.uid]
1356 ) > self.config.neuron.epoch_length
1357
1358 def save_state(self):
1359 pass
1360
1361 def load_state(self):
1362 bt.logging.debug(
1363 "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data."
1364 )
1365
1366
1367
1368---
1369File: /coding/base/validator.py
1370---
1371
1372# The MIT License (MIT)
1373# Copyright © 2024 Yuma Rao
1374
1375# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
1376# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
1377# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
1378# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1379
1380# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
1381# the Software.
1382
1383# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
1384# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1385# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1386# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1387# DEALINGS IN THE SOFTWARE.
1388
1389import os
1390import sys
1391import copy
1392import asyncio
1393import argparse
1394import threading
1395import bittensor as bt
1396import numpy as np
1397
1398from traceback import print_exception
1399
1400from coding.mock import MockDendrite
1401from coding.base.neuron import BaseNeuron
1402from coding.utils.config import add_validator_args
1403from coding.utils.exceptions import MaxRetryError
1404from coding.utils.uids import get_hotkey_from_uid, get_uid_from_hotkey
1405
1406class BaseValidatorNeuron(BaseNeuron):
1407 """
1408 Base class for Bittensor validators. Your validator should inherit from this class.
1409 """
1410
1411 @classmethod
1412 def add_args(cls, parser: argparse.ArgumentParser):
1413 super().add_args(parser)
1414 add_validator_args(cls, parser)
1415
1416 def __init__(self, config=None):
1417 super().__init__(config=config)
1418 self.load_state()
1419 # Save a copy of the hotkeys to local memory.
1420 self.hotkeys = copy.deepcopy(self.metagraph.hotkeys)
1421
1422 # Dendrite lets us send messages to other nodes (axons) in the network.
1423 if self.config.mock:
1424 self.dendrite = MockDendrite(wallet=self.wallet)
1425 else:
1426 self.dendrite = bt.dendrite(wallet=self.wallet)
1427 bt.logging.info(f"Dendrite: {self.dendrite}")
1428
1429 # Set up initial scoring weights for validation
1430 bt.logging.info("Building validation weights.")
1431 self.scores = np.zeros(
1432 self.metagraph.n
1433 )
1434 # Init sync with the network. Updates the metagraph.
1435 self.sync()
1436
1437 # Serve axon to enable external connections.
1438 if not self.config.neuron.axon_off:
1439 self.serve_axon()
1440 else:
1441 bt.logging.warning("axon off, not serving ip to chain.")
1442
1443 # Create asyncio event loop to manage async tasks.
1444 self.loop = asyncio.get_event_loop()
1445
1446 # Instantiate runners
1447 self.should_exit: bool = False
1448 self.is_running: bool = False
1449 self.thread: threading.Thread = None
1450 self.lock = asyncio.Lock()
1451
1452 def serve_axon(self):
1453 """Serve axon to enable external connections."""
1454
1455 bt.logging.info("serving ip to chain...")
1456 try:
1457 self.axon = bt.axon(wallet=self.wallet, config=self.config)
1458
1459 try:
1460 self.axon.attach(
1461 forward_fn=self._forward,
1462 blacklist_fn=self.blacklist,
1463 priority_fn=self.priority,
1464 )
1465 self.axon.serve(
1466 netuid=self.config.netuid,
1467 subtensor=self.subtensor,
1468 )
1469 except Exception as e:
1470 bt.logging.error(f"Failed to serve Axon with exception: {e}")
1471
1472 except Exception as e:
1473 bt.logging.error(f"Failed to create Axon initialize with exception: {e}")
1474
1475 def run(self):
1476 """
1477 Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.
1478
1479 This function performs the following primary tasks:
1480 1. Check for registration on the Bittensor network.
1481 2. Continuously forwards queries to the miners on the network, rewarding their responses and updating the scores accordingly.
1482 3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights.
1483
1484 The essence of the validator's operations is in the forward function, which is called every step. The forward function is responsible for querying the network and scoring the responses.
1485
1486 Note:
1487 - The function leverages the global configurations set during the initialization of the miner.
1488 - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests.
1489
1490 Raises:
1491 KeyboardInterrupt: If the miner is stopped by a manual interruption.
1492 Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis.
1493 """
1494
1495 # Check that validator is registered on the network.
1496
1497 try:
1498 self.sync()
1499 except Exception as e: # Broken pipe handling
1500 bt.logging.error("Error while syncing, killing self to restart", str(e))
1501 bt.logging.debug(print_exception(type(e), e, e.__traceback__))
1502 sys.exit(1)
1503 if not self.config.neuron.axon_off:
1504 try:
1505 bt.logging.info(
1506 f"Running validator {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1507 )
1508 # serve the axon
1509 self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
1510 self.axon.start()
1511 except Exception as e:
1512 bt.logging.error(f"Failed to serve and then start Axon with exception: {e}")
1513 else:
1514 bt.logging.info(
1515 f"Running validator on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1516 )
1517
1518 bt.logging.info(f"Validator starting at block: {self.block}")
1519
1520 # This loop maintains the validator's operations until intentionally stopped.
1521 try:
1522 while True:
1523 bt.logging.info(f"step({self.step}) block({self.block})")
1524
1525 forward_timeout = self.config.neuron.forward_max_time
1526 try:
1527 tasks = [self.loop.create_task(asyncio.run(self.forward(synapse=None))) for _ in range(self.config.neuron.num_concurrent_forwards)]
1528 self.loop.run_until_complete(
1529 asyncio.wait_for(asyncio.gather(*tasks), timeout=forward_timeout)
1530 )
1531 except MaxRetryError as e:
1532 bt.logging.error(f"MaxRetryError: {e}")
1533 continue
1534 except asyncio.TimeoutError as e:
1535 bt.logging.error(
1536 f"Forward timeout: Task execution exceeded {forward_timeout} seconds and was cancelled.: {e}"
1537 )
1538 continue
1539 except Exception as e: # TODO this wasnt here previously, but any errors were cancelling the forward loop so i added it
1540 bt.logging.error("Error during validation", str(e))
1541 bt.logging.debug(print_exception(type(e), e, e.__traceback__))
1542 sys.exit(1)
1543
1544 # Check if we should exit.
1545 if self.should_exit:
1546 break
1547
1548 # Sync metagraph and potentially set weights.
1549 self.sync()
1550 if self.step is None:
1551 self.step = 0
1552 self.step += 1
1553
1554 # If someone intentionally stops the validator, it'll safely terminate operations.
1555 except KeyboardInterrupt:
1556 self.axon.stop()
1557 bt.logging.success("Validator killed by keyboard interrupt.")
1558 sys.exit()
1559
1560 # In case of unforeseen errors, the validator will log the error and quit
1561 except Exception as err:
1562 bt.logging.error("Error during validation", str(err))
1563 bt.logging.debug(print_exception(type(err), err, err.__traceback__))
1564 # self.should_exit = True
1565 sys.exit()
1566
1567
1568 def run_in_background_thread(self):
1569 """
1570 Starts the validator's operations in a background thread upon entering the context.
1571 This method facilitates the use of the validator in a 'with' statement.
1572 """
1573 if not self.is_running:
1574 bt.logging.debug("Starting validator in background thread.")
1575 self.should_exit = False
1576 self.thread = threading.Thread(target=self.run, daemon=True)
1577 self.thread.start()
1578 self.is_running = True
1579 bt.logging.debug("Started")
1580
1581 def stop_run_thread(self):
1582 """
1583 Stops the validator's operations that are running in the background thread.
1584 """
1585 if self.is_running:
1586 bt.logging.debug("Stopping validator in background thread.")
1587 self.should_exit = True
1588 self.thread.join(5)
1589 self.is_running = False
1590 bt.logging.debug("Stopped")
1591
1592 def __enter__(self):
1593 self.run_in_background_thread()
1594 return self
1595
1596 def __exit__(self, exc_type, exc_value, traceback):
1597 """
1598 Stops the validator's background operations upon exiting the context.
1599 This method facilitates the use of the validator in a 'with' statement.
1600
1601 Args:
1602 exc_type: The type of the exception that caused the context to be exited.
1603 None if the context was exited without an exception.
1604 exc_value: The instance of the exception that caused the context to be exited.
1605 None if the context was exited without an exception.
1606 traceback: A traceback object encoding the stack trace.
1607 None if the context was exited without an exception.
1608 """
1609 if self.is_running:
1610 bt.logging.debug("Stopping validator in background thread.")
1611 self.should_exit = True
1612 self.thread.join(5)
1613 self.is_running = False
1614 bt.logging.debug("Stopped")
1615
1616 def set_weights(self):
1617 """
1618 Sets the validator weights to the metagraph hotkeys based on the scores it has received from the miners. The weights determine the trust and incentive level the validator assigns to miner nodes on the network.
1619 """
1620 # check to be sure self.scores is not all 0's
1621 if np.all(self.scores == 0):
1622 bt.logging.warning("self.scores is all 0's, skipping set_weights.")
1623 return
1624 # Check if self.scores contains any NaN values and log a warning if it does.
1625 for _ in range(1):
1626 raw_weights = np.divide(self.scores, np.sum(self.scores, axis=0))
1627
1628 # Process the raw weights to final_weights via subtensor limitations.
1629 (
1630 processed_weight_uids,
1631 processed_weights,
1632 ) = bt.utils.weight_utils.process_weights_for_netuid(
1633 uids=self.metagraph.uids,
1634 weights=raw_weights,
1635 netuid=self.config.netuid,
1636 subtensor=self.subtensor,
1637 metagraph=self.metagraph,
1638 )
1639 print("processed_weights", processed_weights)
1640 print("processed_weight_uids", processed_weight_uids)
1641
1642 # Convert to uint16 weights and uids.
1643 (
1644 uint_uids,
1645 uint_weights,
1646 ) = bt.utils.weight_utils.convert_weights_and_uids_for_emit(
1647 uids=processed_weight_uids, weights=processed_weights
1648 )
1649 print("uint_weights", uint_weights)
1650 print("uint_uids", uint_uids)
1651 # Set the weights on chain via our subtensor connection.
1652 result, msg = self.subtensor.set_weights(
1653 wallet=self.wallet,
1654 netuid=self.config.netuid,
1655 uids=uint_uids,
1656 weights=uint_weights,
1657 wait_for_finalization=False,
1658 wait_for_inclusion=False,
1659 version_key=self.spec_version,
1660 )
1661 if result is True:
1662 bt.logging.info("set_weights on chain successfully!")
1663 return
1664 else:
1665 bt.logging.error(f"set_weights failed {msg}")
1666
1667 def resync_metagraph(self):
1668 """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph."""
1669 bt.logging.info("resync_metagraph()")
1670
1671 # Copies state of metagraph before syncing.
1672 previous_metagraph = copy.deepcopy(self.metagraph)
1673
1674 # Sync the metagraph.
1675 self.metagraph.sync(subtensor=self.subtensor)
1676
1677 # Check if the metagraph axon info has changed.
1678 if previous_metagraph.axons == self.metagraph.axons:
1679 return
1680
1681 bt.logging.info(
1682 "Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages"
1683 )
1684 # Zero out all hotkeys that have been replaced.
1685 for uid, hotkey in enumerate(self.hotkeys):
1686 if hotkey != self.metagraph.hotkeys[uid]:
1687 self.scores[uid] = 0 # hotkey has been replaced
1688
1689 # Check to see if the metagraph has changed size.
1690 # If so, we need to add new hotkeys and moving averages.
1691 if len(self.hotkeys) < len(self.metagraph.hotkeys):
1692 # Update the size of the moving average scores.
1693 new_moving_average = np.zeros((self.metagraph.n))
1694 min_len = min(len(self.hotkeys), len(self.scores))
1695 new_moving_average[:min_len] = self.scores[:min_len]
1696 self.scores = new_moving_average
1697
1698 # Update the hotkeys.
1699 self.hotkeys = copy.deepcopy(self.metagraph.hotkeys)
1700
1701 def update_scores(self):
1702 """Performs exponential moving average on the scores based on the rewards received from the miners."""
1703 if not self.finetune_results:
1704 return
1705 latest_competition_id = max(self.finetune_results.keys())
1706 finetune_scores = np.zeros(self.metagraph.n)
1707 for tracker in self.finetune_results[latest_competition_id].trackers:
1708 finetune_scores[tracker.uid] = tracker.score
1709
1710 max_score = np.max(finetune_scores)
1711 threshold = max_score - 0.1 # within 0.1 of max score
1712 finetune_scores[finetune_scores < threshold] = 0
1713 self.scores = finetune_scores
1714 bt.logging.info(f"Updated moving avg scores: {self.scores}")
1715
1716 def save_state(self):
1717 """Saves the state of the validator to a file."""
1718 bt.logging.info("Saving validator state.")
1719
1720 # Convert finetune_results to a numpy array of tuples for saving
1721 finetune_items = np.array(list(self.finetune_results.items()), dtype=object)
1722
1723 # Save the state of the validator to file.
1724 np.savez(
1725 self.config.neuron.full_path + "/state.npz",
1726 step=self.step,
1727 scores=self.scores,
1728 hotkeys=self.hotkeys,
1729 finetune_items=finetune_items,
1730 )
1731
1732 def load_state(self):
1733 """Loads the state of the validator from a file."""
1734 bt.logging.info("Loading validator state.")
1735
1736 state_path = self.config.neuron.full_path + "/state.npz"
1737
1738 # Check if the state file exists before loading.
1739 if not os.path.exists(state_path):
1740 bt.warning("State file not found. Loading default state.")
1741 self.step = None
1742 self.scores = None
1743 self.hotkeys = None
1744 self.finetune_results = {}
1745 return
1746
1747 # Load the state of the validator from file.
1748 state = np.load(state_path, allow_pickle=True)
1749
1750 # Set attributes, using default values if they don't exist in the state file.
1751 self.step = state["step"].item() if "step" in state else None
1752 self.scores = state["scores"] if "scores" in state else None
1753 self.hotkeys = state["hotkeys"] if "hotkeys" in state else None
1754
1755 # Convert finetune_items back to dictionary
1756 self.finetune_results = {}
1757 if "finetune_items" in state:
1758 for key, value in state["finetune_items"]:
1759 self.finetune_results[key] = value
1760
1761
1762
1763
1764
1765
1766---
1767File: /coding/datasets/prompts/bigcodebench.py
1768---
1769
1770DATA_SYNTH_PROMPT = """
1771Based on the following simple example, write more complex scenarios and invoke multiple Python libraries
1772to solve each problem.
1773The written intent should align with a more specific and practical scenario, but should still be easy to
1774do functional correctness assertion.
1775For each scenario, write a single Python function with the rewritten intent.
1776Please include requirements and terminal-based input-output examples in the function docstring.
1777The function should contain complex logic like if-else statements and loops.
1778You have to use more than three Python libraries for a scenario. Write imports and variable definitions
1779outside the function.
1780Try to avoid using web APIs if possible.
1781If there are any constants (e.g. strings and numeric values) used in the functions, you need to declare
1782them before the function.
1783If data is used, you need to provide sample data in the comment.
1784Try to return values for correctness assertion.
1785Each programming scenario and intent should be separated by a newline.
1786Generate two examples with two scenarios from the following simple example:
1787```python
1788def count_char(char, word):
1789 \"\"\"Counts the characters in word\"\"\"
1790 return word.count(char) # If you want to do it manually try a for loop
1791```
1792
1793
1794Scenario 1:
1795```python
1796import re
1797from collections import Counter
1798from itertools import chain
1799import pandas as pd
1800import numpy as np
1801import random
1802import string
1803
1804# Constants
1805COMMON_WORDS = ["the", "be", "to", "of", "and", "a", "in", "that", "have", "I"]
1806THRESHOLD_FREQUENCY = 5
1807
1808
1809def analyze_text_corpus(corpus):
1810 \"\"\"
1811 Analyzes a list of text documents for word frequency, rare words, and token length statistics.
1812
1813 Parameters:
1814 - corpus (List[str]): A list of text documents, where each document is a single string.
1815
1816 Requirements:
1817 - re
1818 - collections
1819 - itertools
1820 - pandas
1821 - numpy
1822 - random
1823 - string
1824
1825 Example:
1826 >>> corpus = [
1827 ... "The quick brown fox jumps over the lazy dog.",
1828 ... "To be or not to be, that is the question.",
1829 ... "A journey of a thousand miles begins with a single step."
1830 ... ]
1831 >>> result = analyze_text_corpus(corpus)
1832 >>> print(result)
1833 {
1834 'most_common_words': [('the', 3), ('be', 2)],
1835 'rare_words': ['journey', 'thousand', 'begins'],
1836 'token_length_stats': {
1837 'mean': 4.0,
1838 'std_dev': 1.58,
1839 'median': 4
1840 }
1841 }
1842
1843 Returns:
1844 dict: A dictionary containing the most common words, rare words, and token length statistics.
1845 \"\"\"
1846
1847 # Tokenize and filter common words
1848 all_tokens = [re.findall(r'\b\w+\b', doc.lower()) for doc in corpus]
1849 flattened_tokens = list(chain.from_iterable(all_tokens))
1850 filtered_tokens = [word for word in flattened_tokens if word not in COMMON_WORDS]
1851
1852 # Word frequency analysis
1853 word_counts = Counter(filtered_tokens)
1854 most_common_words = word_counts.most_common(5)
1855 rare_words = [word for word, count in word_counts.items() if count < THRESHOLD_FREQUENCY]
1856
1857 # Token length analysis
1858 token_lengths = [len(token) for token in flattened_tokens]
1859 token_length_series = pd.Series(token_lengths)
1860 token_length_stats = {
1861 'mean': np.round(token_length_series.mean(), 2),
1862 'std_dev': np.round(token_length_series.std(), 2),
1863 'median': int(token_length_series.median())
1864 }
1865
1866 return {
1867 'most_common_words': most_common_words,
1868 'rare_words': rare_words,
1869 'token_length_stats': token_length_stats
1870 }
1871```
1872Scenario 2:
1873```python
1874import re
1875from collections import Counter
1876from itertools import chain
1877import pandas as pd
1878import numpy as np
1879import random
1880import string
1881# Sample dataset for product data analysis
1882# Commented data format for input to function
1883# products = [
1884# {"name": "Laptop", "price": 899.99, "category": "Electronics"},
1885# {"name": "Book", "price": 14.99, "category": "Education"},
1886# {"name": "Smartphone", "price": 699.99, "category": "Electronics"},
1887# {"name": "Pen", "price": 1.99, "category": "Stationery"},
1888# {"name": "Notebook", "price": 2.99, "category": "Stationery"},
1889# {"name": "Headphones", "price": 199.99, "category": "Electronics"},
1890# ]
1891
1892def product_category_statistics(products):
1893 \"\"\"
1894 Processes product information to analyze average prices, identify top categories,
1895 and group products by category based on price ranges.
1896
1897 Parameters:
1898 - products (List[dict]): A list of dictionaries with keys 'name', 'price', and 'category'
1899
1900 Requirements:
1901 - collections
1902 - pandas
1903 - numpy
1904 - random
1905 - string
1906
1907 Example:
1908 >>> products = [
1909 ... {"name": "Laptop", "price": 899.99, "category": "Electronics"},
1910 ... {"name": "Book", "price": 14.99, "category": "Education"},
1911 ... {"name": "Smartphone", "price": 699.99, "category": "Electronics"},
1912 ... {"name": "Pen", "price": 1.99, "category": "Stationery"},
1913 ... {"name": "Notebook", "price": 2.99, "category": "Stationery"},
1914 ... {"name": "Headphones", "price": 199.99, "category": "Electronics"},
1915 ... ]
1916 >>> result = product_category_statistics(products)
1917 >>> print(result)
1918 {
1919 'average_price_by_category': {'Electronics': 599.99, 'Education': 14.99, 'Stationery': 2.49},
1920 'top_category': 'Electronics',
1921 'products_in_price_ranges': {
1922 'low': ['Pen', 'Notebook'],
1923 'mid': ['Book', 'Headphones'],
1924 'high': ['Smartphone', 'Laptop']
1925 }
1926 }
1927
1928 Returns:
1929 dict: A dictionary containing average prices, the top category, and products grouped by price ranges.
1930 \"\"\"
1931
1932 # DataFrame creation
1933 df = pd.DataFrame(products)
1934
1935 # Average price by category
1936 avg_price_by_category = df.groupby("category")["price"].mean().round(2).to_dict()
1937
1938 # Top category by product count
1939 category_counts = Counter(df['category'])
1940 top_category = category_counts.most_common(1)[0][0]
1941
1942 # Price range grouping
1943 price_ranges = {'low': [], 'mid': [], 'high': []}
1944 for _, row in df.iterrows():
1945 if row["price"] < 10:
1946 price_ranges['low'].append(row["name"])
1947 elif 10 <= row["price"] < 100:
1948 price_ranges['mid'].append(row["name"])
1949 else:
1950 price_ranges['high'].append(row["name"])
1951
1952 return {
1953 'average_price_by_category': avg_price_by_category,
1954 'top_category': top_category,
1955 'products_in_price_ranges': price_ranges
1956 }
1957```
1958
1959Above is the illustration.
1960
1961Generate five complex scenarios based on the following simple example:
1962"""
1963
1964
1965
1966---
1967File: /coding/datasets/__init__.py
1968---
1969
1970from .base import Dataset
1971
1972from .bigcodebench import BigCodeBenchDataset
1973from .thestack import TheStackDataset
1974from .pip import PipDataset
1975from .swe import SWEBenchDataset
1976
1977class DatasetManager:
1978 def __init__(self, config = None):
1979 self._datasets = None
1980 self.config = config
1981
1982 @property
1983 def datasets(self):
1984 if self._datasets is None:
1985 self._datasets = {
1986 TheStackDataset.name: TheStackDataset(),
1987 PipDataset.name: PipDataset(),
1988 SWEBenchDataset.name: SWEBenchDataset()
1989 }
1990 return self._datasets
1991
1992
1993
1994---
1995File: /coding/datasets/base.py
1996---
1997
1998# The MIT License (MIT)
1999# Copyright © 2024 Yuma Rao
2000# Copyright © 2023 Opentensor Foundation
2001# Copyright © 2024 Macrocosmos
2002
2003# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2004# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2005# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2006# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2007
2008# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2009# the Software.
2010
2011# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2012# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2013# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2014# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2015# DEALINGS IN THE SOFTWARE.
2016
2017import time
2018import random
2019import functools
2020from abc import ABC, abstractmethod
2021from typing import Dict
2022import bittensor as bt
2023
2024from coding.schemas.context import Context
2025from coding.helpers.selector import Selector
2026from coding.utils.exceptions import MaxRetryError
2027
2028class Dataset(ABC):
2029 """Base class for datasets."""
2030
2031 max_tries: int = 10
2032
2033 @abstractmethod
2034 def search(self, name):
2035 ...
2036
2037 @abstractmethod
2038 def random(self, name):
2039 ...
2040
2041 @abstractmethod
2042 def get(self, name):
2043 ...
2044
2045 def next(
2046 self, method: str = "random", selector: Selector = Selector(), **kwargs
2047 ) -> Context:
2048 tries = 1
2049 t0 = time.time()
2050
2051 while True:
2052 info = {}
2053 if method == "random":
2054 info = self.random(selector=selector, **kwargs)
2055 elif method == "search":
2056 info = self.search(selector=selector, **kwargs)
2057 elif method == "get":
2058 info = self.get(selector=selector, **kwargs)
2059 else:
2060 raise ValueError(f"Unknown dataset get method {method!r}")
2061
2062 if info:
2063 break
2064
2065 bt.logging.debug(
2066 f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries. Retrying... ({self.max_tries - tries} tries remaining.)"
2067 )
2068
2069 tries += 1
2070 if tries >= self.max_tries:
2071 raise MaxRetryError(
2072 f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries."
2073 )
2074
2075 info["source"] = self.__class__.__name__
2076 info["stats"] = {
2077 "fetch_time": time.time() - t0,
2078 "num_tries": tries,
2079 "fetch_method": method,
2080 "next_kwargs": kwargs,
2081 }
2082 return Context(**info)
2083
2084
2085---
2086File: /coding/datasets/bigcodebench.py
2087---
2088
2089import re
2090import os
2091import bittensor as bt
2092from pydantic import BaseModel
2093from datasets import load_dataset
2094from langchain_openai import ChatOpenAI
2095
2096from .base import Dataset
2097from .prompts.bigcodebench import DATA_SYNTH_PROMPT
2098
2099class BigCodeBenchDataset(Dataset):
2100 name = "bigcodebench"
2101
2102 def __init__(
2103 self,
2104 config
2105 ):
2106 self.config = config
2107 self.instruct_ds = load_dataset(
2108 "bigcode/self-oss-instruct-sc2-instructions", split="train", streaming=True
2109 ).shuffle()
2110 self.instruct_iterset = iter(self.instruct_ds)
2111
2112 self.llm = ChatOpenAI(
2113 base_url=self.config.neuron.model_url,
2114 model_name=self.config.neuron.model_id,
2115 api_key=self.config.neuron.vllm_api_key,
2116 temperature=0.7,
2117 # max_tokens=12000
2118 )
2119
2120 self.buffer = []
2121
2122 def random(
2123 self,
2124 **kwargs,
2125 ):
2126 return self.get(
2127 **kwargs,
2128 )
2129
2130 def get(
2131 self,
2132 **kwargs,
2133 ):
2134 count = 0
2135 while len(self.buffer) == 0 and count < 10:
2136 count += 1
2137 row = next(self.instruct_iterset)
2138 seed = row["seed"]
2139 response = self.llm.invoke(DATA_SYNTH_PROMPT + "\n" + seed).content
2140
2141 # Extract all Python code blocks from the content, including those with a newline after 'python'
2142 code_blocks = re.findall(r"```python\s*(.*?)```", response, re.DOTALL)
2143
2144 self.buffer.extend(code_blocks)
2145
2146 content = self.buffer.pop(0)
2147
2148 return {
2149 "title": "",
2150 "topic": "",
2151 "subtopic": "",
2152 "content": content,
2153 "internal_links": [],
2154 "external_links": [],
2155 "source": "GitHub",
2156 "tags": [],
2157 "extras": {},
2158 }
2159
2160 def search(
2161 self,
2162 ):
2163 pass
2164
2165
2166---
2167File: /coding/datasets/pip.py
2168---
2169
2170# The MIT License (MIT)
2171# Copyright © 2024 Yuma Rao
2172# Copyright © 2023 Opentensor Foundation
2173# Copyright © 2024 Macrocosmos
2174
2175# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2176# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2177# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2178# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2179
2180# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2181# the Software.
2182
2183# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2184# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2185# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2186# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2187# DEALINGS IN THE SOFTWARE.
2188
2189import io
2190import os
2191import math
2192import random
2193import tarfile
2194import requests
2195
2196from typing import List
2197from pydantic import BaseModel
2198
2199from .base import Dataset
2200from coding.schemas import Context
2201from coding.schemas import File
2202from coding.helpers.selector import Selector
2203
2204
2205def fetch_pip_repo_contents(repo_name: str, size_limit: int = 10 * 1024 * 1024 ) -> List[File]:
2206 """
2207 Fetch the contents of a pip repository as a list of file objects.
2208
2209 Parameters:
2210 - repo_name: The name of the pip repository.
2211 - size_limit: The maximum allowable size of the tarball in bytes.
2212
2213 Returns:
2214 - A list of FileObject instances representing the files in the repository.
2215
2216 Raises:
2217 - ValueError if the tarball size exceeds the specified limit or if there are issues fetching data.
2218 """
2219 # Fetch the latest release metadata from PyPI
2220 pypi_url = f"https://pypi.org/pypi/{repo_name}/json"
2221 response = requests.get(pypi_url)
2222 if response.status_code != 200:
2223 raise ValueError(f"Could not fetch repository data for {repo_name}")
2224
2225 data = response.json()
2226 latest_version = data["info"]["version"]
2227 tarball_url = data["releases"][latest_version][-1]["url"]
2228
2229 # Get the size of the tarball without downloading it
2230 head_response = requests.head(tarball_url)
2231 if head_response.status_code != 200:
2232 raise ValueError(f"Could not fetch tarball metadata for {repo_name}")
2233
2234 content_length = int(head_response.headers.get('Content-Length', 0))
2235 if content_length > size_limit:
2236 raise ValueError(f"Tarball size ({content_length} bytes) exceeds the limit of {size_limit} bytes")
2237
2238 # Download the tarball of the latest release
2239 tarball_response = requests.get(tarball_url)
2240 if tarball_response.status_code != 200:
2241 raise ValueError(f"Could not fetch tarball for {repo_name}")
2242
2243 # Read the tarball contents
2244 tarball_file = io.BytesIO(tarball_response.content)
2245 tar = tarfile.open(fileobj=tarball_file)
2246
2247 file_objects = []
2248 for member in tar.getmembers():
2249 if member.isfile():
2250 f = tar.extractfile(member)
2251 if f is not None:
2252 contents = f.read().decode('utf-8')
2253 # split the name to remove the package name
2254 file_objects.append(File(path='/'.join(member.name.split('/')[1:]), contents=contents))
2255
2256 return file_objects
2257
2258def get_pip_repo_size(repo_name: str) -> int:
2259 """
2260 Get the size of the latest tarball for a given pip repository.
2261
2262 Parameters:
2263 - repo_name: The name of the pip repository.
2264
2265 Returns:
2266 - The size of the latest tarball in bytes.
2267
2268 Raises:
2269 - ValueError if the repository data or tarball metadata cannot be fetched.
2270 """
2271
2272 # Fetch the latest release metadata from PyPI
2273 pypi_url = f"https://pypi.org/pypi/{repo_name}/json"
2274 response = requests.get(pypi_url)
2275 if response.status_code != 200:
2276 raise ValueError(f"Could not fetch repository data for {repo_name}")
2277
2278 data = response.json()
2279 latest_version = data["info"]["version"]
2280 tarball_url = data["releases"][latest_version][-1]["url"]
2281
2282 # Get the size of the tarball without downloading it
2283 head_response = requests.head(tarball_url)
2284 if head_response.status_code != 200:
2285 raise ValueError(f"Could not fetch tarball metadata for {repo_name}")
2286
2287 content_length = int(head_response.headers.get('Content-Length', 0))
2288
2289 return content_length
2290
2291def get_total_pip_packages():
2292 url = "https://libraries.io/api/search"
2293 params = {
2294 "platforms": "pypi",
2295 "sort": "dependents_count",
2296 "per_page": 1, # Get only one result to find out the total count
2297 "api_key": os.getenv('LIBRARIES_API_KEY', '45cc24a495c25a68a052e3f99af9a05a') # TODO remove the api key
2298 }
2299
2300 response = requests.get(url, params=params)
2301 response.raise_for_status()
2302 total_packages = int(response.headers.get('total', 0))
2303 return total_packages
2304
2305def get_random_packages(n=100):
2306 url = "https://libraries.io/api/search"
2307 total_packages = get_total_pip_packages()
2308 total_pages = math.ceil(total_packages / n)
2309 random_offset = random.randint(0, total_pages - n)
2310
2311 params = {
2312 "platforms": "pypi",
2313 "sort": "dependents_count",
2314 "per_page": n,
2315 "offset": random_offset,
2316 "api_key": os.getenv('LIBRARIES_API_KEY', '45cc24a495c25a68a052e3f99af9a05a') # TODO remove the api key
2317 }
2318
2319 response = requests.get(url, params=params)
2320 response.raise_for_status()
2321 data = response.json()
2322
2323 return [package["name"] for package in data]
2324
2325
2326
2327
2328class PipDataset(Dataset):
2329 name = "pip"
2330 def __init__(
2331 self,
2332 seed=None,
2333 ):
2334 if seed is None:
2335 seed = random.randint(0, 1000)
2336 self.seed = seed
2337
2338 def get(self, n=100, selector: Selector = None):
2339 for _ in range(300):
2340
2341 packages = get_random_packages(n=n)
2342 package_name = selector(packages)
2343 if not get_pip_repo_size(package_name) < 10 * 1024 * 1024: # 10MB
2344 continue
2345 return dict(
2346 title = package_name,
2347 source = "pip",
2348 # files= fetch_pip_repo_contents(package_name)
2349 )
2350 raise Exception("Failed to find a valid pip package")
2351
2352 def search(
2353 self, query, selector: Selector = None, **kwargs
2354 ):
2355 pass
2356
2357 def random(self, n=100, selector: Selector = None, **kwargs):
2358 return self.get(n=100, selector=selector)
2359
2360
2361
2362---
2363File: /coding/datasets/swe.py
2364---
2365
2366# The MIT License (MIT)
2367# Copyright © 2024 Yuma Rao
2368# Copyright © 2023 Opentensor Foundation
2369# Copyright © 2024 Macrocosmos
2370
2371# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2372# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2373# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2374# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2375
2376# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2377# the Software.
2378
2379# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2380# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2381# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2382# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2383# DEALINGS IN THE SOFTWARE.
2384
2385import os
2386import random
2387import requests
2388
2389from fastcore.xtras import obj2dict
2390from swebench.collect.build_dataset import create_instance
2391
2392from .base import Dataset
2393from coding.helpers.selector import Selector
2394from coding.helpers.swebench import Repo as SWERepo
2395
2396
2397def get_package_stats(package_name: str):
2398 package_url = f"https://pypi.org/pypi/{package_name}/json"
2399 package_github = None
2400 response = requests.get(package_url)
2401 if response.status_code != 200:
2402 raise Exception(f"Failed to get package data from URL: {package_url}")
2403 response = response.json()
2404 if "info" in response:
2405 if (
2406 "Source" in response["info"]["project_urls"]
2407 and "github" in response["info"]["project_urls"]["Source"]
2408 ):
2409 package_github = response["info"]["project_urls"]["Source"]
2410 elif (
2411 "Homepage" in response["info"]["project_urls"]
2412 and "github" in response["info"]["project_urls"]["Homepage"]
2413 ):
2414 package_github = response["info"]["project_urls"]["Homepage"]
2415 if not package_github:
2416 raise Exception(f"No github link found for package: {package_name}")
2417
2418 return {
2419 "name": package_name,
2420 "url": package_url,
2421 "github": package_github,
2422 }
2423
2424
2425def get_top_pip_packages():
2426 response = requests.get(
2427 "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json"
2428 )
2429 packages = [row["project"] for row in response.json()["rows"]]
2430 return packages
2431
2432
2433class SWEBenchDataset(Dataset):
2434 name = "swebench"
2435
2436 def __init__(
2437 self,
2438 ):
2439 pass
2440
2441 def get(self, n=100, selector: Selector = Selector()) -> dict:
2442 package_name = selector(get_top_pip_packages())
2443 package_info = get_package_stats(package_name)
2444 token = os.environ.get("GITHUB_TOKEN", None)
2445 if not token:
2446 raise Exception("GITHUB_TOKEN not set")
2447 repo = SWERepo(
2448 package_info["github"].split("/")[-2],
2449 package_info["github"].split("/")[-1],
2450 token,
2451 )
2452
2453 # Check repo size before proceeding
2454 if repo.size > 1024 * 1024 * 1024: # 1GB in bytes
2455 raise Exception(f"Repository {package_info['github']} is too large (>1GB)")
2456
2457 valid_pull = None
2458 err_count = 0
2459 pulls = [pull for pull in repo.get_all_pulls(state="closed")]
2460 random.shuffle(pulls)
2461 for pull in pulls:
2462 try:
2463 if valid_pull or err_count > 5:
2464 break
2465 resolved_issues = repo.extract_resolved_issues(pull)
2466 setattr(pull, "resolved_issues", resolved_issues)
2467 if len(resolved_issues) > 0:
2468 valid_pull = obj2dict(pull)
2469 except:
2470 err_count += 1
2471
2472 if not valid_pull:
2473 raise Exception(f"Could not get a valid SWE pull for {package_info['github']}")
2474 pull_data = create_instance(repo, valid_pull)
2475 diff_text = pull_data["patch"]
2476 return {
2477 "topic": pull_data["problem_statement"],
2478 "title": f'{package_info["github"].split("/")[-2]}/{package_info["github"].split("/")[-1]}',
2479 "content": diff_text,
2480 "extras": dict(pull_number=pull_data["pull_number"], base_commit=pull_data["base_commit"]),
2481 }
2482 def search(self, query, selector: Selector = None, **kwargs):
2483 pass
2484
2485 def random(self, n=100, selector: Selector = None, **kwargs):
2486 return self.get(n=100, selector=selector)
2487
2488
2489---
2490File: /coding/datasets/thestack.py
2491---
2492
2493# The MIT License (MIT)
2494# Copyright © 2024 Yuma Rao
2495# Copyright © 2023 Opentensor Foundation
2496# Copyright © 2024 Macrocosmos
2497
2498# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2499# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2500# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2501# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2502
2503# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2504# the Software.
2505
2506# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2507# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2508# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2509# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2510# DEALINGS IN THE SOFTWARE.
2511
2512import os
2513import re
2514import boto3
2515import random
2516import itertools
2517import numpy as np
2518from smart_open import open
2519from datasets import load_dataset, Dataset, interleave_datasets
2520
2521from .base import Dataset
2522from coding.schemas import Context
2523from coding.helpers.selector import Selector
2524
2525LANGUAGES = {
2526 "C++": {
2527 "keywords": [
2528 "auto",
2529 "break",
2530 "case",
2531 "char",
2532 "const",
2533 "continue",
2534 "default",
2535 "do",
2536 "double",
2537 "else",
2538 "enum",
2539 "extern",
2540 "float",
2541 "for",
2542 "goto",
2543 "if",
2544 "int",
2545 "long",
2546 "register",
2547 "return",
2548 "short",
2549 "signed",
2550 "sizeof",
2551 "static",
2552 "struct",
2553 "switch",
2554 "typedef",
2555 "union",
2556 "unsigned",
2557 "void",
2558 "volatile",
2559 "while",
2560 ],
2561 "libraries": [
2562 "iostream",
2563 "fstream",
2564 "string",
2565 "vector",
2566 "map",
2567 "set",
2568 "algorithm",
2569 "cmath",
2570 "cstdio",
2571 "cstdlib",
2572 "ctime",
2573 "cstring",
2574 "cassert",
2575 "cctype",
2576 "cerrno",
2577 "cfloat",
2578 "ciso646",
2579 "climits",
2580 "clocale",
2581 "cmath",
2582 "csetjmp",
2583 "csignal",
2584 "cstdarg",
2585 "cstddef",
2586 "cstdio",
2587 "cstdlib",
2588 "cstring",
2589 "ctime",
2590 "cwchar",
2591 "cwctype",
2592 "complex",
2593 "deque",
2594 "exception",
2595 "fstream",
2596 "functional",
2597 "iomanip",
2598 "ios",
2599 "iosfwd",
2600 "iostream",
2601 "istream",
2602 "iterator",
2603 "limits",
2604 "list",
2605 "locale",
2606 "map",
2607 "memory",
2608 "new",
2609 "numeric",
2610 "ostream",
2611 "queue",
2612 "set",
2613 "sstream",
2614 "stack",
2615 "stdexcept",
2616 "streambuf",
2617 "string",
2618 "typerow",
2619 "utility",
2620 "valarray",
2621 "vector",
2622 ],
2623 "comments": ["//", "/*", "*/"],
2624 "multiline_comments": [("/*", "*/")],
2625 },
2626 "Dockerfile": {
2627 "keywords": [
2628 "from",
2629 "maintainer",
2630 "run",
2631 "cmd",
2632 "expose",
2633 "env",
2634 "add",
2635 "copy",
2636 "entrypoint",
2637 "volume",
2638 "user",
2639 "workdir",
2640 "onbuild",
2641 ],
2642 "libraries": [],
2643 "comments": ["#"],
2644 "multiline_comments": [],
2645 },
2646 "HTML": {
2647 "keywords": [
2648 "div",
2649 "span",
2650 "input",
2651 "ul",
2652 "body",
2653 "tag",
2654 "html",
2655 "head",
2656 "title",
2657 "meta",
2658 "link",
2659 "script",
2660 "style",
2661 "a",
2662 "img",
2663 "table",
2664 "label",
2665 ],
2666 "libraries": [],
2667 "comments": ["<!--", "-->"],
2668 "multiline_comments": [("<!--", "-->")],
2669 },
2670 "Java": {
2671 "keywords": [
2672 "abstract",
2673 "assert",
2674 "boolean",
2675 "break",
2676 "byte",
2677 "case",
2678 "catch",
2679 "char",
2680 "class",
2681 "continue",
2682 "default",
2683 "do",
2684 "double",
2685 "else",
2686 "enum",
2687 "extends",
2688 "final",
2689 "finally",
2690 "float",
2691 "for",
2692 "if",
2693 "implements",
2694 "import",
2695 "instanceof",
2696 "int",
2697 "interface",
2698 "long",
2699 "native",
2700 "new",
2701 "package",
2702 "private",
2703 "protected",
2704 "public",
2705 "return",
2706 "short",
2707 "static",
2708 "strictfp",
2709 "super",
2710 "switch",
2711 "synchronized",
2712 "this",
2713 "throw",
2714 "throws",
2715 "transient",
2716 "try",
2717 "void",
2718 "volatile",
2719 "while",
2720 ],
2721 "libraries": [
2722 "java.awt",
2723 "java.awt.event",
2724 "java.io",
2725 "java.lang",
2726 "java.math",
2727 "java.net",
2728 "java.text",
2729 "java.util",
2730 "javax.swing",
2731 ],
2732 "comments": ["//", "/*", "*/", "*"],
2733 "multiline_comments": [("/*", "*/")],
2734 },
2735 "JavaScript": {
2736 "keywords": [
2737 "abstract",
2738 "arguments",
2739 "boolean",
2740 "break",
2741 "byte",
2742 "case",
2743 "catch",
2744 "char",
2745 "class",
2746 "const",
2747 "continue",
2748 "debugger",
2749 "default",
2750 "delete",
2751 "do",
2752 "double",
2753 "else",
2754 "enum",
2755 "eval",
2756 "export",
2757 "extends",
2758 "false",
2759 "final",
2760 "finally",
2761 "float",
2762 "for",
2763 "function",
2764 "goto",
2765 "if",
2766 "implements",
2767 "import",
2768 "in",
2769 "instanceof",
2770 "int",
2771 "interface",
2772 "let",
2773 "long",
2774 "native",
2775 "module.exports" "new",
2776 "null",
2777 "package",
2778 "private",
2779 "protected",
2780 "public",
2781 "return",
2782 "short",
2783 "static",
2784 "super",
2785 "switch",
2786 "synchronized",
2787 "this",
2788 "throw",
2789 "throws",
2790 "transient",
2791 "true",
2792 "try",
2793 "typeof",
2794 "var",
2795 "void",
2796 "volatile",
2797 "while",
2798 "with",
2799 "yield",
2800 ],
2801 "libraries": [
2802 "react",
2803 "express",
2804 "mongoose",
2805 "axios",
2806 "redux",
2807 "react-redux",
2808 "react-router-dom",
2809 "react-dom",
2810 "react-scripts",
2811 "material-ui",
2812 ],
2813 "comments": ["//", "/*", "*/"],
2814 "multiline_comments": [("/*", "*/")],
2815 },
2816 "Python": {
2817 "keywords": [
2818 "False",
2819 "None",
2820 "True",
2821 "and",
2822 "as",
2823 "assert",
2824 "break",
2825 "class",
2826 "continue",
2827 "def",
2828 "del",
2829 "elif",
2830 "else",
2831 "except",
2832 "finally",
2833 "for",
2834 "from",
2835 "global",
2836 "if",
2837 "import",
2838 "in",
2839 "is",
2840 "lambda",
2841 "nonlocal",
2842 "not",
2843 "or",
2844 "pass",
2845 "raise",
2846 "return",
2847 "try",
2848 "while",
2849 "with",
2850 "yield",
2851 ],
2852 "libraries": [
2853 "numpy",
2854 "pandas",
2855 "matplotlib",
2856 "seaborn",
2857 "scipy",
2858 "sklearn",
2859 "tensorflow",
2860 "keras",
2861 "pytorch",
2862 "django",
2863 "flask",
2864 "requests",
2865 "bs4",
2866 "selenium",
2867 "pyautogui",
2868 "pyperclip",
2869 "pyinputplus",
2870 "pillow",
2871 ],
2872 "comments": ["#"],
2873 "multiline_comments": [('"""', '"""'), ("'''", "'''")],
2874 },
2875 "SQL": {
2876 "keywords": [
2877 "add",
2878 "all",
2879 "alter",
2880 "and",
2881 "any",
2882 "as",
2883 "asc",
2884 "backup",
2885 "between",
2886 "case",
2887 "check",
2888 "column",
2889 "constraint",
2890 "create",
2891 "database",
2892 "default",
2893 "delete",
2894 "desc",
2895 "distinct",
2896 "drop",
2897 "exec",
2898 "exists",
2899 "foreign",
2900 "from",
2901 "full",
2902 "group",
2903 "having",
2904 "in",
2905 "index",
2906 "inner",
2907 "insert",
2908 "into",
2909 "is",
2910 "join",
2911 "key",
2912 "left",
2913 "like",
2914 "limit",
2915 "not",
2916 "null",
2917 "on",
2918 "or",
2919 "order",
2920 "outer",
2921 "primary",
2922 "procedure",
2923 "right",
2924 "rownum",
2925 "select",
2926 "set",
2927 "table",
2928 "top",
2929 "truncate",
2930 "union",
2931 "unique",
2932 "update",
2933 "values",
2934 "view",
2935 "where",
2936 ],
2937 "comments": ["--", "/*", "*/"],
2938 },
2939 "Shell": {
2940 "keywords": [
2941 "alias",
2942 "bg",
2943 "bind",
2944 "break",
2945 "builtin",
2946 "caller",
2947 "cd",
2948 "command",
2949 "compgen",
2950 "complete",
2951 "continue",
2952 "declare",
2953 "dirs",
2954 "disown",
2955 "echo",
2956 "enable",
2957 "eval",
2958 "exec",
2959 "exit",
2960 "export",
2961 "false",
2962 "fc",
2963 "fg",
2964 "getopts",
2965 "hash",
2966 "help",
2967 "history",
2968 "jobs",
2969 "kill",
2970 "let",
2971 "local",
2972 "logout",
2973 "popd",
2974 "printf",
2975 "pushd",
2976 "pwd",
2977 "read",
2978 "readonly",
2979 "return",
2980 "set",
2981 "shift",
2982 "shopt",
2983 "source",
2984 "suspend",
2985 "test",
2986 "times",
2987 "trap",
2988 "true",
2989 "type",
2990 "typeset",
2991 "ulimit",
2992 "umask",
2993 "unalias",
2994 "unset",
2995 "wait",
2996 ],
2997 "comments": ["#"],
2998 "multiline_comments": [(":'", "'")],
2999 },
3000}
3001
3002
3003def convert_to_python3(code: str) -> str:
3004 """
3005 Convert Python 2/3 code to Python 3 code.
3006
3007 Args:
3008 - code (str): A string containing Python 2/3 code.
3009
3010 Returns:
3011 - str: A string containing Python 3 code.
3012 """
3013
3014 def replace_print_statement(match):
3015 return f"print({match.group(1)})"
3016
3017 code = re.sub(r"print (.*)", replace_print_statement, code)
3018
3019 # Replace xrange with range
3020 code = code.replace("xrange", "range")
3021
3022 return code
3023
3024
3025def process_repo_row(row):
3026 for file in row["files"]:
3027 blob_id = file["blob_id"]
3028 src_encoding = file["src_encoding"]
3029 session = boto3.Session(
3030 aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
3031 aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
3032 )
3033 s3 = session.client("s3")
3034 s3_url = f"s3://softwareheritage/content/{blob_id}"
3035
3036 with open(
3037 s3_url, "rb", compression=".gz", transport_params={"client": s3}
3038 ) as fin:
3039 file["content"] = fin.read().decode(src_encoding)
3040
3041 return row
3042
3043
3044def process_row(row):
3045 blob_id = row["blob_id"]
3046 src_encoding = row["src_encoding"]
3047 session = boto3.Session(
3048 aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
3049 aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
3050 )
3051 s3 = session.client("s3")
3052 s3_url = f"s3://softwareheritage/content/{blob_id}"
3053
3054 with open(s3_url, "rb", compression=".gz", transport_params={"client": s3}) as fin:
3055 content = fin.read().decode(src_encoding)
3056
3057 row["code"] = content
3058 return row
3059
3060
3061def filter_comments(code, language):
3062 if language not in LANGUAGES:
3063 return code
3064 # Filter out multiline comments
3065 if "multiline_comments" in LANGUAGES[language]:
3066 for start_tag, end_tag in LANGUAGES[language]["multiline_comments"]:
3067 code = re.sub(
3068 rf"{re.escape(start_tag)}.*?{re.escape(end_tag)}",
3069 "",
3070 code,
3071 flags=re.DOTALL,
3072 )
3073
3074 # Filter out single-line comments
3075 lines = []
3076 for line in code.splitlines():
3077 if any(
3078 line.strip().startswith(symbol)
3079 for symbol in LANGUAGES[language]["comments"]
3080 ):
3081 continue
3082 lines.append(line.lower())
3083
3084 return "\n".join(lines)
3085
3086
3087class TheStackDataset(Dataset):
3088 name = "thestack"
3089
3090 def __init__(
3091 self,
3092 seed=None,
3093 languages=None,
3094 ):
3095
3096 if seed is None:
3097 seed = random.randint(0, 1000)
3098 self.seed = seed
3099
3100 if languages is None:
3101 languages = list(LANGUAGES.keys())
3102 self.languages = languages
3103
3104 # self.dataset = cache_dataset(dataset_id=dataset_id, seed=seed)
3105 datasets = []
3106 for language in [
3107 "Python",
3108 # "JavaScript",
3109 # "TypeScript",
3110 # "Go",
3111 # "Java",
3112 # "C++",
3113 # "C",
3114 # "SQL",
3115 # "Shell",
3116 ]:
3117 datasets.append(
3118 load_dataset(
3119 "bigcode/the-stack-v2",
3120 language,
3121 split="train",
3122 streaming=True,
3123 )
3124 )
3125 # shuffle the datasets
3126 for dataset in datasets:
3127 dataset = dataset.shuffle()
3128 self.stack_dataset = interleave_datasets(datasets)
3129 self.stack_dataset = self.stack_dataset.shuffle()
3130 self.stack_dataset = self.stack_dataset.map(lambda row: process_row(row))
3131 self.stack_iterset = iter(self.stack_dataset)
3132
3133 self.stack_repo_dataset = load_dataset(
3134 "bigcode/the-stack-v2-train-smol-ids", split="train", streaming=True
3135 )
3136 self.stack_repo_dataset = self.stack_repo_dataset.shuffle()
3137 self.stack_repo_iterset = iter(self.stack_repo_dataset)
3138
3139 def random(
3140 self,
3141 min_lines=10,
3142 max_lines=3000,
3143 selector: Selector = None,
3144 include_sibling_docs=False,
3145 min_sibling_docs=1,
3146 **kwargs,
3147 ):
3148 return self.get(
3149 min_lines,
3150 max_lines,
3151 selector,
3152 include_sibling_docs,
3153 min_sibling_docs,
3154 **kwargs,
3155 )
3156
3157 def get(
3158 self,
3159 min_lines=25,
3160 max_lines=3000,
3161 selector: Selector = None,
3162 include_sibling_docs=False,
3163 min_sibling_docs=1,
3164 **kwargs,
3165 ):
3166 content = None
3167 if include_sibling_docs:
3168 row = next(self.stack_repo_iterset)
3169 if not row["gha_language"]:
3170 row["gha_language"] = ""
3171 else:
3172 row = next(self.stack_iterset)
3173 if not (min_lines <= len(row["code"].splitlines()) <= max_lines):
3174 return None
3175 content = row["code"]
3176
3177 sibling_docs = []
3178 if include_sibling_docs:
3179 if (
3180 row["num_files"] < min_sibling_docs
3181 or row["num_files"] > 15 # TODO modify this eventually to be different
3182 or len(row["files"]) < 2
3183 ):
3184 return None
3185 row = process_repo_row(row)
3186 randindex = random.randint(1, len(row["files"]) - 1)
3187 # choose all but the random index
3188 for file in row["files"][:randindex] + row["files"][randindex + 1 :]:
3189 sibling_docs.append(
3190 Context(
3191 title=file["path"],
3192 content=file["content"],
3193 topic=row["gha_language"],
3194 )
3195 )
3196 content = row["files"][randindex]["content"]
3197
3198 if ("language" in row and row["language"] == "Python") or (
3199 "gha_language" in row and row["gha_language"] == "Python"
3200 ):
3201 content = convert_to_python3(content)
3202
3203 if len(content.splitlines()) < min_lines or len(content.splitlines()) > max_lines:
3204 return None
3205
3206 for sibling_doc in sibling_docs:
3207 if len(sibling_doc.content.splitlines()) < min_lines or len(sibling_doc.content.splitlines()) > max_lines:
3208 return None
3209
3210 return {
3211 "title": row["repo_name"], # name of the repo
3212 "topic": (
3213 row["language"] if "language" in row else row["gha_language"]
3214 ), # language of the code
3215 "subtopic": "",
3216 "content": filter_comments(
3217 content, row["language"] if "language" in row else row["gha_language"]
3218 ),
3219 "internal_links": [row["repo_name"]],
3220 "external_links": [],
3221 "source": "GitHub",
3222 "tags": [
3223 row["language"] if "language" in row else row["gha_language"],
3224 row["repo_name"],
3225 "",
3226 ],
3227 "extras": {
3228 "sibling_docs": sibling_docs,
3229 },
3230 }
3231
3232 def search(
3233 self,
3234 query,
3235 column="path",
3236 min_lines=5,
3237 max_lines=100,
3238 selector: Selector = None,
3239 **kwargs,
3240 ):
3241 mask = np.array(self.dataset[column]) == query
3242 filtered_dataset = iter(self.dataset.select(np.where(mask)[0]))
3243
3244 return [
3245 {
3246 "title": row["repo_name"], # name of the repo
3247 "topic": row["language"], # language of the code
3248 "subtopic": row["path"],
3249 "content": (
3250 convert_to_python3(filter_comments(row["code"], row["language"]))
3251 if row["language"] == "Python"
3252 else filter_comments(row["code"], row["language"])
3253 ),
3254 "internal_links": [row["repo_name"], row["path"], row["language"]],
3255 "external_links": [], # TODO complete
3256 "source": "GitHub",
3257 "tags": [row["language"], row["repo_name"], row["path"]],
3258 "extras": {"size": row["size"], "license": row["license"]},
3259 }
3260 for row in filtered_dataset
3261 ]
3262
3263 def extract_keywords(self, code, language, field):
3264 matches = set()
3265
3266 # check which keywords and libraries are present in the code
3267 for keyword in LANGUAGES[language].get(field, []):
3268 if re.search(r"\b" + keyword + r"\b", code):
3269 matches.add(keyword)
3270
3271 return matches
3272
3273 def get_special_contents(self, code, language, remove_comments=True):
3274 if remove_comments:
3275 code = filter_comments(code, language)
3276
3277 present_libraries = self.extract_keywords(code, language, "libraries")
3278 present_keywords = self.extract_keywords(code, language, "keywords")
3279
3280 return present_keywords, present_libraries
3281
3282
3283
3284---
3285File: /coding/finetune/llm/__init__.py
3286---
3287
3288
3289
3290
3291---
3292File: /coding/finetune/llm/app.py
3293---
3294
3295import os
3296import asyncio
3297from fastapi import FastAPI, HTTPException, Depends
3298from pydantic import BaseModel
3299from typing import Optional, Dict, List
3300from dotenv import load_dotenv
3301
3302# ------------------------------
3303# LangChain-based LLM Imports
3304# ------------------------------
3305from langchain_anthropic import ChatAnthropic
3306from langchain_google_genai import ChatGoogleGenerativeAI
3307from langchain_openai import ChatOpenAI, OpenAIEmbeddings
3308
3309load_dotenv("../../../.env")
3310
3311if not os.getenv("LLM_AUTH_KEY"):
3312 raise ValueError("LLM_AUTH_KEY environment variable not set")
3313
3314
3315# ------------------------------
3316# Global Variables
3317# ------------------------------
3318token_usage: Dict[str, int] = {}
3319current_key: Optional[str] = None
3320
3321# FastAPI App
3322app = FastAPI()
3323
3324
3325# ------------------------------
3326# Pydantic Models
3327# ------------------------------
3328class InitRequest(BaseModel):
3329 key: str
3330
3331class LLMRequest(BaseModel):
3332 query: str
3333 llm_name: str
3334
3335class LLMResponse(BaseModel):
3336 result: str
3337 total_tokens: int
3338
3339class EmbeddingRequest(BaseModel):
3340 query: str
3341
3342class EmbeddingResponse(BaseModel):
3343 vector: List[float]
3344
3345
3346# ------------------------------
3347# Auth Dependency
3348# ------------------------------
3349async def verify_auth(auth_key: str = Depends(lambda: os.getenv("LLM_AUTH_KEY"))):
3350 if not auth_key:
3351 raise HTTPException(
3352 status_code=500,
3353 detail="LLM_AUTH_KEY environment variable not set"
3354 )
3355 return auth_key
3356
3357
3358# ------------------------------
3359# Initialize / Reset / Count
3360# ------------------------------
3361@app.post("/init")
3362async def init_key(request: InitRequest, auth_key: str = Depends(verify_auth)):
3363 """Initialize token tracking for a new key and set as current."""
3364 global current_key
3365 if request.key not in token_usage:
3366 token_usage[request.key] = 0
3367 current_key = request.key
3368 return {"message": f"Set active key to {request.key}"}
3369
3370@app.post("/reset")
3371async def reset_count(auth_key: str = Depends(verify_auth)):
3372 """Reset token count for current key."""
3373 global current_key
3374 if not current_key:
3375 raise HTTPException(
3376 status_code=400,
3377 detail="No active key. Call /init endpoint first."
3378 )
3379 token_usage[current_key] = 0
3380 return {"message": f"Reset token count for key {current_key}"}
3381
3382@app.get("/count")
3383async def get_count(auth_key: str = Depends(verify_auth)):
3384 """Get current token count."""
3385 global current_key
3386 if not current_key:
3387 raise HTTPException(
3388 status_code=400,
3389 detail="No active key. Call /init endpoint first."
3390 )
3391 return {"key": current_key, "count": token_usage[current_key]}
3392
3393
3394# ------------------------------
3395# Helper: Async LLM Invoker
3396# ------------------------------
3397async def ainvoke_with_retry(llm, query: str, max_retries: int = 50, initial_delay: int = 1):
3398 """
3399 Invoke the LLM asynchronously with exponential backoff on rate-limit or server errors.
3400 Returns the response if successful; raises Exception after max_retries.
3401 """
3402 delay = initial_delay
3403 last_exception = None
3404
3405 for attempt in range(max_retries):
3406 try:
3407 response = await llm.ainvoke(query)
3408 return response
3409 except Exception as e:
3410 # Check if it's a rate-limit or server error
3411 if "429" in str(e) or "529" in str(e):
3412 last_exception = e
3413 if attempt < max_retries - 1:
3414 # Exponential backoff
3415 await asyncio.sleep(delay)
3416 delay *= 2
3417 else:
3418 # Retries exhausted
3419 raise
3420 else:
3421 # Some other error - don't keep retrying
3422 raise
3423
3424 # If we exit the loop without returning, raise whatever last exception we had
3425 if last_exception:
3426 raise last_exception
3427 else:
3428 raise HTTPException(status_code=500, detail="Unknown error invoking LLM")
3429
3430
3431# ------------------------------
3432# Call LLM
3433# ------------------------------
3434@app.post("/call", response_model=LLMResponse)
3435async def call_llm(request: LLMRequest):
3436 """Call one of the registered LLMs. If repeated failures, fallback to 'gpt-4o'."""
3437 global current_key, token_usage
3438
3439 # Models dictionary
3440 models = {
3441 "gpt-4o": ChatOpenAI(model="gpt-4o", max_tokens=16384),
3442 "gpt-3.5-turbo": ChatOpenAI(model="gpt-3.5-turbo", max_tokens=16384),
3443 "gpt-4o-mini": ChatOpenAI(model="gpt-4o-mini", max_tokens=16384),
3444 "claude-3-5-sonnet": ChatAnthropic(model="claude-3-5-sonnet-latest", max_tokens=8912),
3445 "gemini-2.0-flash-exp": ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", max_tokens=8912),
3446 }
3447
3448 try:
3449 if not current_key:
3450 # If no key was initialized, default to "test" so code doesn't break
3451 current_key = "test"
3452 token_usage[current_key] = 0
3453
3454 # Try to retrieve requested model; fallback to "gpt-4o" if not found
3455 requested_llm = models.get(request.llm_name, models["gpt-4o"])
3456 fallback_llm = models["gpt-4o"]
3457
3458 # --- Step 1: Try the requested LLM ---
3459 try:
3460 response = await ainvoke_with_retry(requested_llm, request.query)
3461 except Exception:
3462 # If the requested LLM fails after max retries, fallback
3463 response = await ainvoke_with_retry(fallback_llm, request.query)
3464
3465 # Extract tokens from usage metadata (some LLMs may not provide it)
3466 tokens = response.usage_metadata.get("total_tokens", 0)
3467
3468 # Update token usage
3469 token_usage[current_key] += tokens
3470
3471 return LLMResponse(
3472 result=response.content,
3473 total_tokens=token_usage[current_key]
3474 )
3475 except Exception as e:
3476 raise HTTPException(status_code=500, detail=str(e))
3477
3478
3479# ------------------------------
3480# Embeddings
3481# ------------------------------
3482@app.post("/embed", response_model=EmbeddingResponse)
3483async def get_embeddings(request: EmbeddingRequest):
3484 """
3485 Returns embeddings vector for the given input query.
3486 """
3487 embedder = OpenAIEmbeddings(model="text-embedding-3-small")
3488 try:
3489 # embed_query is often synchronous in many libraries; if there's an async version, use that instead.
3490 vector = embedder.embed_query(request.query)
3491 return EmbeddingResponse(vector=vector)
3492 except Exception as e:
3493 raise HTTPException(status_code=500, detail=str(e))
3494
3495
3496# ------------------------------
3497# Run via Uvicorn
3498# ------------------------------
3499if __name__ == "__main__":
3500 import uvicorn
3501 uvicorn.run(app, host="0.0.0.0", port=25000)
3502
3503
3504
3505---
3506File: /coding/finetune/llm/client.py
3507---
3508
3509import os
3510import requests
3511
3512class LLMClient:
3513 def __init__(self, base_url: str = f"http://{os.getenv('DOCKER_HOST_IP', 'localhost')}:25000"):
3514 """Initialize LLM client with API server URL"""
3515 self.base_url = base_url.rstrip("/")
3516
3517 def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
3518 """
3519 Call LLM API endpoint
3520
3521 Args:
3522 query (str): The prompt/query to send to the LLM
3523 llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
3524
3525 Returns:
3526 tuple[str, int]: (Generated response text, Total tokens used for this key)
3527
3528 Raises:
3529 requests.exceptions.RequestException: If API call fails
3530 """
3531 payload = {"query": query, "llm_name": llm_name}
3532
3533 response = requests.post(f"{self.base_url}/call", json=payload)
3534 response.raise_for_status()
3535
3536 result = response.json()
3537 return result["result"], result["total_tokens"]
3538
3539 def embed(self, query: str) -> list[float]:
3540 """
3541 Get embeddings for text using the embedding API endpoint
3542
3543 Args:
3544 query (str): The text to get embeddings for
3545
3546 Returns:
3547 list[float]: Vector embedding of the input text
3548
3549 Raises:
3550 requests.exceptions.RequestException: If API call fails
3551 """
3552 payload = {"query": query}
3553
3554 response = requests.post(f"{self.base_url}/embed", json=payload)
3555 response.raise_for_status()
3556
3557 result = response.json()
3558 return result["vector"]
3559
3560
3561---
3562File: /coding/finetune/llm/manager.py
3563---
3564
3565import os
3566import requests
3567from typing import Optional, Dict, Any
3568from urllib.parse import urljoin
3569
3570class LLMManager:
3571
3572 """Manager for interacting with LLM API endpoints"""
3573
3574 def __init__(self, base_url: str = f"http://localhost:25000"):
3575 """
3576 Initialize LLM manager
3577
3578 Args:
3579 base_url: Base URL of LLM API server
3580
3581 Raises:
3582 ValueError: If LLM_AUTH_KEY environment variable is not set
3583 """
3584 self.base_url = base_url.rstrip('/')
3585 self.auth_key = os.getenv("LLM_AUTH_KEY")
3586 if not self.auth_key:
3587 raise ValueError("LLM_AUTH_KEY environment variable not set")
3588 self.current_key: Optional[str] = None
3589
3590 def _make_request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
3591 """
3592 Make HTTP request to API endpoint
3593
3594 Args:
3595 method: HTTP method (get, post, etc)
3596 endpoint: API endpoint path
3597 **kwargs: Additional arguments passed to requests
3598
3599 Returns:
3600 Dict containing API response
3601
3602 Raises:
3603 requests.exceptions.RequestException: If request fails
3604 """
3605 url = urljoin(f"{self.base_url}/", endpoint.lstrip('/'))
3606 headers = kwargs.pop('headers', {})
3607 headers['Authorization'] = self.auth_key
3608
3609 response = requests.request(
3610 method,
3611 url,
3612 headers=headers,
3613 **kwargs
3614 )
3615 response.raise_for_status()
3616 return response.json()
3617
3618 def init_key(self, key: str) -> Dict[str, str]:
3619 """
3620 Initialize token tracking for a key
3621
3622 Args:
3623 key: Key to initialize
3624
3625 Returns:
3626 Dict containing initialization status
3627 """
3628 result = self._make_request(
3629 'post',
3630 'init',
3631 json={'key': key}
3632 )
3633 self.current_key = key
3634 return result
3635
3636 def reset_count(self) -> Dict[str, str]:
3637 """
3638 Reset token count for current key
3639
3640 Returns:
3641 Dict containing reset status
3642 """
3643 return self._make_request('post', 'reset')
3644
3645 def get_count(self) -> Dict[str, Any]:
3646 """
3647 Get current token count
3648
3649 Returns:
3650 Dict containing current key and count
3651 """
3652 return self._make_request('get', 'count')
3653
3654
3655
3656
3657---
3658File: /coding/finetune/swe-server/runner.py
3659---
3660
3661import os
3662import submission
3663
3664swe_instance = submission.SWE()
3665
3666def run_swe(repo_location, issue_description):
3667 return swe_instance(repo_location, issue_description)
3668
3669if __name__ == "__main__":
3670 repo_location = "/app/repo"
3671 issue_description = os.getenv("ISSUE_DESCRIPTION")
3672 result = run_swe(repo_location, issue_description)
3673 print("Patch: ", result.model_dump())
3674
3675
3676---
3677File: /coding/finetune/swe-server/server.py
3678---
3679
3680from fastapi import FastAPI, HTTPException
3681from pydantic import BaseModel
3682import submission
3683
3684app = FastAPI()
3685
3686# Initialize the LLM class from submission.py
3687
3688swe_instance = submission.SWE()
3689
3690class CallRequest(BaseModel):
3691 repo_location: str
3692 issue_description: str
3693
3694@app.post("/call")
3695async def call_swe(request: CallRequest) -> dict:
3696 try:
3697 # Run the LLM object with the given inputs
3698 result = swe_instance(request.repo_location, request.issue_description)
3699 return {"result": result.model_dump()}
3700 except Exception as e:
3701 raise HTTPException(status_code=500, detail=str(e))
3702
3703
3704if __name__ == "__main__":
3705 import uvicorn
3706
3707 uvicorn.run(app, host="0.0.0.0", port=3000)
3708
3709
3710
3711---
3712File: /coding/finetune/swe-server/swebase.py
3713---
3714
3715import os
3716import requests
3717from pydantic import BaseModel
3718from abc import ABC, abstractmethod
3719
3720class Edit(BaseModel):
3721 file_name: str
3722 line_number: int
3723 line_content: str
3724 new_line_content: str
3725
3726class Patch(BaseModel):
3727 edits: list[Edit]
3728
3729# if host ip is localhost itll fail, need to get docker host ip
3730class LLMClient:
3731 def __init__(self, base_url: str = f"http://{os.getenv('HOST_IP', 'localhost')}:25000"):
3732 """Initialize LLM client with API server URL"""
3733 self.base_url = base_url.rstrip("/")
3734
3735 def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
3736 """
3737 Call LLM API endpoint
3738
3739 Args:
3740 query (str): The prompt/query to send to the LLM
3741 llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
3742
3743 Returns:
3744 tuple[str, int]: (Generated response text, Total tokens used for this key)
3745
3746 Raises:
3747 requests.exceptions.RequestException: If API call fails
3748 """
3749 payload = {"query": query, "llm_name": llm_name}
3750
3751 response = requests.post(f"{self.base_url}/call", json=payload)
3752 response.raise_for_status()
3753
3754 result = response.json()
3755 return result["result"], result["total_tokens"]
3756
3757 def embed(self, query: str) -> list[float]:
3758 """
3759 Get embeddings for text using the embedding API endpoint
3760
3761 Args:
3762 query (str): The text to get embeddings for
3763
3764 Returns:
3765 list[float]: Vector embedding of the input text
3766
3767 Raises:
3768 requests.exceptions.RequestException: If API call fails
3769 """
3770 payload = {"query": query}
3771
3772 response = requests.post(f"{self.base_url}/embed", json=payload)
3773 response.raise_for_status()
3774
3775 result = response.json()
3776 return result["vector"]
3777
3778class SWEBase(ABC):
3779 def __init__(self):
3780 self.llm = LLMClient()
3781
3782 @abstractmethod
3783 def __call__(self, repo_location: str, issue_description: str) -> Patch:
3784 pass
3785
3786
3787
3788---
3789File: /coding/finetune/__init__.py
3790---
3791
3792from .pipeline import FinetunePipeline
3793
3794ALLOWED_MODULES = [
3795 "ast",
3796 "sentencetransformers",
3797 "networkx",
3798 "grep-ast",
3799 "tree-sitter",
3800 "tree-sitter-languages",
3801 "rapidfuzz",
3802 "llama-index",
3803 "pydantic",
3804 "numpy",
3805 "ruamel.yaml",
3806 "json"
3807]
3808
3809
3810---
3811File: /coding/finetune/dockerutil.py
3812---
3813
3814import os
3815import ast
3816import json
3817import docker
3818import tempfile
3819import threading
3820from pathlib import Path
3821
3822from coding.constants import COMPETITION_ID
3823from ..helpers.git import GitRepo
3824
3825def exec_container_with_timeout(container, command, timeout):
3826 """
3827 Executes a command in a Docker container with a timeout.
3828
3829 Args:
3830 container: The Docker container object.
3831 command: The command to execute.
3832 timeout: Timeout in seconds.
3833
3834 Returns:
3835 Tuple of exec result and logs.
3836
3837 Raises:
3838 TimeoutError: If the command takes longer than the timeout.
3839 """
3840 exec_result = None
3841 logs = None
3842 exception = None
3843
3844 def target():
3845 nonlocal exec_result, logs, exception
3846 try:
3847 exec_result, logs = container.exec_run(command)
3848 except Exception as e:
3849 exception = e
3850
3851 thread = threading.Thread(target=target)
3852 thread.start()
3853 thread.join(timeout)
3854
3855 if thread.is_alive():
3856 # Kill the container if the timeout is exceeded
3857 try:
3858 container.kill()
3859 except Exception as kill_exception:
3860 raise RuntimeError(
3861 f"Failed to kill the container after timeout: {kill_exception}"
3862 )
3863
3864 raise TimeoutError(
3865 f"The command '{command}' exceeded the timeout of {timeout} seconds and the container was killed."
3866 )
3867
3868 if exception:
3869 raise exception
3870
3871 return exec_result, logs
3872
3873def build_docker_container(logic_files: dict, hotkey: str, repo_files: dict) -> str:
3874 """
3875 Builds a Docker container for evaluating model logic.
3876
3877 Args:
3878 logic_files (dict): Dictionary mapping filenames to file contents
3879 hotkey (str): Unique identifier for the logic
3880 repo_files (dict): Dictionary mapping filenames to file contents to copy to repo
3881 repo_path (str): Path to copy repo files to
3882
3883 Returns:
3884 str: ID of the built container
3885 """
3886 # Initialize Docker client
3887 client = docker.from_env()
3888
3889 # Create temporary directory to store files
3890 with tempfile.TemporaryDirectory() as temp_dir:
3891 # Write logic files to temp directory
3892 for filename, content in logic_files.items():
3893 file_path = os.path.join(temp_dir, filename)
3894 # Create all parent directories
3895 os.makedirs(os.path.dirname(file_path), exist_ok=True)
3896 # Create the file and write content
3897 with open(file_path, "w", encoding="latin-1") as f:
3898 f.write(content)
3899
3900 # Write repo files to repo path
3901 for filename, content in repo_files.items():
3902 file_path = os.path.join(temp_dir, "repo", filename)
3903 # Create all parent directories
3904 os.makedirs(os.path.dirname(file_path), exist_ok=True)
3905 # Create the file and write content
3906 with open(file_path, "w", encoding="latin-1") as f:
3907 f.write(content)
3908
3909 # Copy Dockerfile and server files
3910 swe_server_path = Path(__file__).parent / "swe-server"
3911 for item in swe_server_path.glob("*"):
3912 if item.is_file():
3913 dest_path = os.path.join(temp_dir, item.name)
3914 with open(item, "rb") as src, open(dest_path, "wb") as dst:
3915 dst.write(src.read())
3916 elif item.is_dir():
3917 dest_dir = os.path.join(temp_dir, item.name)
3918 os.system(f"cp -r {item} {dest_dir}")
3919
3920 # Build the container
3921 try:
3922 image, logs = client.images.build(
3923 path=temp_dir, tag=f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower(), rm=True
3924 )
3925 return image.id
3926
3927 except docker.errors.BuildError as e:
3928 print(f"Error building container: {str(e)}")
3929 raise
3930 except docker.errors.APIError as e:
3931 print(f"Docker API error: {str(e)}")
3932 raise
3933
3934def run_docker_container(
3935 image_id: str, repo: GitRepo, hotkey: str, issue_description: str
3936) -> dict:
3937 """
3938 Runs a Docker container for evaluating model logic.
3939
3940 Args:
3941 image_id (str): ID of the Docker image to run
3942 repo (GitRepo): Git repository object containing code to evaluate
3943 hotkey (str): Unique identifier for the logic
3944 issue_description (str): Description of the issue to fix
3945
3946 Returns:
3947 dict: The patch output from the container
3948 """
3949 # Initialize Docker client
3950 client = docker.from_env()
3951
3952 container_name = f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower()
3953
3954 try:
3955 # Remove any existing container with the same name
3956 try:
3957 existing = client.containers.get(container_name)
3958 existing.remove(force=True)
3959 except docker.errors.NotFound:
3960 pass
3961
3962 container = client.containers.create(
3963 image=image_id,
3964 name=container_name,
3965 detach=True,
3966 ports={"3000/tcp": 3000},
3967 extra_hosts={"host.docker.internal": "host-gateway"},
3968 environment={"HOST_IP": os.getenv("HOST_IP", "localhost"), "ISSUE_DESCRIPTION": issue_description},
3969 # environment={"HOST_IP": "host.docker.internal"},
3970 # auto_remove=True # Container will be automatically removed when stopped
3971 )
3972
3973 # Start the container
3974 container.start()
3975 logs = container.logs().decode('utf-8')
3976
3977 # Wait for container to finish and get logs
3978 result = container.wait()
3979 logs = container.logs().decode('utf-8')
3980 print("===== CONTAINER LOGS =====")
3981 print(logs)
3982 print("===== CONTAINER LOGS =====")
3983 # Parse the patch from the logs
3984 patch_line = next(line for line in reversed(logs.split('\n')) if line.startswith('Patch:'))
3985 try:
3986 # First try parsing as JSON
3987 patch_dict = json.loads(patch_line.replace('Patch:', '').strip())
3988 except json.JSONDecodeError:
3989 # Fall back to safely evaluating as literal Python dict
3990 patch_dict = ast.literal_eval(patch_line.replace('Patch:', '').strip())
3991
3992 # Cleanup container
3993 try:
3994 container.stop(timeout=1)
3995 container.remove(force=True)
3996 except:
3997 pass
3998
3999 return patch_dict
4000
4001 except docker.errors.APIError as e:
4002 print(f"Docker API error: {str(e)}")
4003 raise
4004
4005
4006def run_docker_container_from_base(
4007 container_name: str, repo: GitRepo, hotkey: str, issue_description: str, logic_files: dict
4008) -> dict:
4009 """
4010 Runs a Docker container for evaluating model logic.
4011
4012 Args:
4013 container_name (str): Name of the Docker container to run
4014 repo (GitRepo): Git repository object containing code to evaluate
4015 hotkey (str): Unique identifier for the logic
4016 issue_description (str): Description of the issue to fix
4017
4018 Returns:
4019 dict: The patch output from the container
4020 """
4021 # Initialize Docker client
4022 client = docker.from_env()
4023 # container_name = f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower()
4024 with tempfile.TemporaryDirectory() as temp_dir:
4025 code_dir = os.path.join(temp_dir, "code")
4026 os.makedirs(code_dir)
4027
4028 # Write logic files to code directory
4029 for filename, content in logic_files.items():
4030 file_path = os.path.join(code_dir, filename)
4031 # Create all parent directories
4032 os.makedirs(os.path.dirname(file_path), exist_ok=True)
4033 # Create the file and write content
4034 with open(file_path, "w", encoding="latin-1") as f:
4035 f.write(content)
4036
4037 # Write repo files to repo path
4038 for filename, content in repo.files.items():
4039 file_path = os.path.join(temp_dir, "repo", filename)
4040 # Create all parent directories
4041 os.makedirs(os.path.dirname(file_path), exist_ok=True)
4042 # Create the file and write content
4043 with open(file_path, "w", encoding="latin-1") as f:
4044 f.write(content)
4045
4046 # Copy Dockerfile and server files
4047 swe_server_path = Path(__file__).parent / "swe-server"
4048 for item in swe_server_path.glob("*"):
4049 if item.is_file():
4050 dest_path = os.path.join(code_dir, item.name)
4051 with open(item, "rb") as src, open(dest_path, "wb") as dst:
4052 dst.write(src.read())
4053 elif item.is_dir():
4054 dest_dir = os.path.join(code_dir, item.name)
4055 os.system(f"cp -r {item} {dest_dir}")
4056
4057 try:
4058 # Remove any existing container with the same name
4059 try:
4060 existing = client.containers.get(container_name)
4061 existing.remove(force=True)
4062 except docker.errors.NotFound:
4063 pass
4064
4065 container = client.containers.create(
4066 image="brokespace/swe-server:latest",
4067 name=container_name,
4068 detach=True,
4069 # ports={"3000/tcp": 3000},
4070 extra_hosts={"host.docker.internal": "host-gateway"},
4071 environment={"HOST_IP": os.getenv("HOST_IP", "localhost"), "ISSUE_DESCRIPTION": issue_description},
4072 command="sleep infinity"
4073 )
4074
4075 # Start the container
4076 container.start()
4077
4078 # Copy files from temp_dir into container
4079 os.system(f"docker cp {temp_dir}/. {container_name}:/app/")
4080
4081 # Execute runner.py in container
4082 exec_result, logs = exec_container_with_timeout(container, "python3 -u /app/code/runner.py", 600)
4083 logs = logs.decode('utf-8')
4084 # Parse the patch from the logs
4085 patch_line = next(line for line in reversed(logs.split('\n')) if line.startswith('Patch:'))
4086 try:
4087 # First try parsing as JSON
4088 patch_dict = json.loads(patch_line.replace('Patch:', '').strip())
4089 except json.JSONDecodeError:
4090 # Fall back to safely evaluating as literal Python dict
4091 patch_dict = ast.literal_eval(patch_line.replace('Patch:', '').strip())
4092
4093 return patch_dict
4094
4095 except docker.errors.APIError as e:
4096 print(f"Docker API error: {str(e)}")
4097 raise
4098
4099 finally:
4100 # Cleanup container
4101 try:
4102 container.stop(timeout=1)
4103 except:
4104 pass
4105
4106 try:
4107 container.remove(force=True)
4108 except:
4109 pass
4110
4111
4112
4113---
4114File: /coding/finetune/evaluate.py
4115---
4116
4117from transformers import AutoTokenizer, AutoModelForCausalLM
4118
4119
4120def evaluate(
4121 model: AutoModelForCausalLM,
4122 tokenizer: AutoTokenizer,
4123 renderer: callable,
4124 query: str,
4125) -> str:
4126 messages = [{"role": "user", "content": query}]
4127 inputs = tokenizer(renderer(messages), return_tensors="pt").to("cuda")
4128 outputs = model.generate(**inputs, max_new_tokens=4096)
4129 prompt = renderer(messages)
4130 response = tokenizer.decode(outputs[0], skip_special_tokens=False)
4131 response = response[len(prompt) :].strip()
4132 special_tokens = tokenizer.all_special_tokens
4133 for token in special_tokens:
4134 response = response.replace(token, "")
4135 return response.strip()
4136
4137
4138
4139---
4140File: /coding/finetune/model.py
4141---
4142
4143import os
4144import time
4145import shutil
4146import psutil
4147import random
4148import asyncio
4149import requests
4150from tqdm import tqdm
4151import bittensor as bt
4152from transformers import AutoConfig
4153from langchain_openai import ChatOpenAI
4154from sglang.utils import terminate_process
4155from coding.utils.shell import execute_shell_command
4156
4157MODEL_DIR = "~/.cache/huggingface/hub"
4158
4159def is_phi_model(model_name: str):
4160 config = AutoConfig.from_pretrained(model_name)
4161 return "phi3" in config.model_type.lower()
4162
4163
4164# Delete the model from the huggingface cache when we're done serving it so we don't run out of disk space
4165def delete_model_from_hf_cache(model_name: str):
4166 # Determine the cache directory
4167 cache_dir = os.path.expanduser(MODEL_DIR)
4168
4169 # Format the directory name based on the model name
4170 model_cache_dir = os.path.join(cache_dir, f"models--{model_name.replace('/', '--')}")
4171
4172 # Check if the directory exists and delete it
4173 if os.path.exists(model_cache_dir):
4174 try:
4175 shutil.rmtree(model_cache_dir)
4176 bt.logging.debug(f"Finetune: Model has been removed from the HF cache.")
4177 except Exception as e:
4178 bt.logging.error(f"Finetune: Error deleting model: from HF cache: {e}")
4179 else:
4180 bt.logging.debug(f"Finetune: Model not found in the cache, could not delete")
4181
4182def wait_for_server(base_url: str, server_process, timeout: int = None) -> None:
4183 """Wait for the server to be ready by polling the /v1/models endpoint.
4184
4185 Args:
4186 base_url: The base URL of the server
4187 server_process: The process to terminate if the server is ready
4188 timeout: Maximum time to wait in seconds. None means wait forever.
4189 """
4190 start_time = time.time()
4191 procutil = psutil.Process(int(server_process.pid))
4192 while True:
4193 try:
4194 if timeout and time.time() - start_time > timeout:
4195 bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4196 raise TimeoutError("Server did not become ready within timeout period")
4197
4198 # Use psutil to monitor the process
4199 if not procutil.is_running(): # Check if process is still running
4200 bt.logging.error(f"Finetune: Server process terminated unexpectedly, check VRAM usage")
4201 raise Exception("Server process terminated unexpectedly, potentially VRAM usage issue")
4202 if server_process.poll() is not None:
4203 bt.logging.error(f"Finetune: Server process terminated with code {server_process.poll()}")
4204 raise Exception(f"Server process terminated with code {server_process.poll()}")
4205
4206 response = requests.get(
4207 f"{base_url}/v1/models",
4208 headers={"Authorization": "Bearer None"},
4209 )
4210 if response.status_code == 200:
4211 time.sleep(5)
4212 break
4213
4214 except requests.exceptions.RequestException:
4215 time.sleep(1)
4216
4217
4218class ModelServer:
4219 def __init__(self, model_name: str):
4220 self.model_path = f"{model_name}"
4221 self.model_name = model_name
4222 # random port between 12000 and 15999
4223 self.port = random.randint(12000, 15999)
4224 self.server_process = None
4225 self.start_server()
4226
4227
4228 def invoke(self, messages: list[dict]):
4229 return self.llm.invoke(messages).content
4230
4231 async def ainvoke(self, messages: list[dict]):
4232 response = await self.llm.ainvoke(messages)
4233 return response.content
4234
4235 async def _invoke_batch_async(self, message_batches, batch_size=10):
4236 """Async function to process all batches."""
4237 results = []
4238 for i in tqdm(range(0, len(message_batches), batch_size), desc="Processing batches"):
4239 batch = message_batches[i : i + batch_size]
4240 # Schedule all tasks in this batch concurrently
4241 tasks = [self.llm.ainvoke(messages) for messages in batch]
4242 # Wait for them all
4243 responses = await asyncio.gather(*tasks)
4244 # Collect results
4245 results.extend(response.content for response in responses)
4246 return results
4247
4248 def invoke_batch(self, message_batches, batch_size=10):
4249 return asyncio.run(self._invoke_batch_async(message_batches, batch_size))
4250
4251 def start_server(self):
4252 if not is_phi_model(self.model_name):
4253 self.server_process = execute_shell_command(
4254 f"""
4255 {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4256 --model {self.model_name} \
4257 --model-path {self.model_path} \
4258 --port {self.port} \
4259 --host 0.0.0.0 \
4260 --quantization fp8 \
4261 --mem-fraction-static 0.6 \
4262 --context-length 8096 \
4263 --disable-cuda-graph
4264 """,
4265 self.model_name
4266 )
4267 else:
4268 self.server_process = execute_shell_command(
4269 f"""
4270 {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4271 --model {self.model_name} \
4272 --model-path {self.model_path} \
4273 --port {self.port} \
4274 --host 0.0.0.0 \
4275 --quantization fp8 \
4276 --mem-fraction-static 0.6 \
4277 --context-length 8096 \
4278 --attention-backend triton
4279 """,
4280 self.model_name
4281 )
4282 # Wait for the server to be ready
4283 try:
4284 wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4285 except Exception as e:
4286 terminate_process(self.server_process)
4287 self.server_process.kill()
4288 bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4289
4290 if not is_phi_model(self.model_name):
4291 self.server_process = execute_shell_command(
4292 f"""
4293 {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4294 --model {self.model_name} \
4295 --model-path {self.model_path} \
4296 --port {self.port} \
4297 --host 0.0.0.0 \
4298 --mem-fraction-static 0.6 \
4299 --context-length 8096 \
4300 --disable-cuda-graph
4301 """,
4302 self.model_name
4303 )
4304 else:
4305 self.server_process = execute_shell_command(
4306 f"""
4307 {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4308 --model {self.model_name} \
4309 --model-path {self.model_path} \
4310 --port {self.port} \
4311 --host 0.0.0.0 \
4312 --mem-fraction-static 0.6 \
4313 --context-length 8096 \
4314 --attention-backend triton
4315 """,
4316 self.model_name
4317 )
4318
4319 try:
4320 wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4321 except Exception as e:
4322 # it might be a phi model, try again
4323 terminate_process(self.server_process)
4324 self.server_process.kill()
4325 self.server_process = execute_shell_command(
4326 f"""
4327 {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4328 --model {self.model_name} \
4329 --model-path {self.model_path} \
4330 --port {self.port} \
4331 --host 0.0.0.0 \
4332 --mem-fraction-static 0.6 \
4333 --context-length 8096 \
4334 --attention-backend triton
4335 """,
4336 self.model_name
4337 )
4338 try:
4339 wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4340 except Exception as e:
4341 bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4342 self.server_process.kill()
4343 self.cleanup()
4344 raise Exception(f"Error running model {e}")
4345
4346
4347 self.llm = ChatOpenAI(
4348 api_key="None",
4349 base_url=f"http://localhost:{self.port}/v1",
4350 model=self.model_name,
4351 )
4352
4353 def cleanup(self):
4354 try:
4355 if self.server_process:
4356 try:
4357 terminate_process(self.server_process)
4358 except:
4359 pass
4360 self.server_process = None
4361 delete_model_from_hf_cache(self.model_name)
4362 self.server_process.kill()
4363 except Exception as e:
4364 pass
4365
4366 def __del__(self):
4367 self.cleanup()
4368
4369 def __enter__(self):
4370 return self
4371
4372 def __exit__(self, exc_type, exc_val, exc_tb):
4373 self.cleanup()
4374
4375if __name__ == "__main__":
4376 # Test the model server with a simple prompt
4377 model_name = "MistralAI/Mistral-7B-Instruct-v0.1"
4378 server = ModelServer(model_name)
4379
4380 try:
4381 # Test basic invoke
4382 query = "What is 2+2?"
4383 response = server.invoke(query)
4384 print("Basic invoke test:")
4385 print(f"Response: {response}\n")
4386
4387 # Test batch invoke
4388 queries = [f"What is {i}+{i}?" for i in range(3)]
4389 responses = server.invoke_batch(queries, batch_size=2)
4390 print("Batch invoke test:")
4391 for i, response in enumerate(responses):
4392 print(f"Batch {i} response: {response}")
4393
4394 except Exception as e:
4395 print(f"Error during testing: {e}")
4396 finally:
4397 server.cleanup()
4398
4399
4400---
4401File: /coding/finetune/pipeline.py
4402---
4403
4404import os
4405import pickle
4406import argparse
4407import traceback
4408import bittensor as bt
4409from typing import List
4410from pydantic import BaseModel
4411from .tracker import gather_all_logics
4412from concurrent.futures import ThreadPoolExecutor, as_completed
4413
4414from .dockerutil import run_docker_container_from_base
4415
4416from coding.schemas import Patch
4417from coding.schemas.context import Context
4418from coding.constants import COMPETITION_ID
4419from coding.rewards.codesim import CodeSimModel
4420from coding.schemas.tracking import TrackingInfo
4421from coding.constants import COMPETITION_ID, ALLOWED_MODULES, NUM_ALLOWED_CHARACTERS, ALLOWED_IMPORTS
4422
4423from coding.tasks.swe import SWEBenchTask
4424from coding.datasets.swe import SWEBenchDataset
4425from coding.finetune.llm.manager import LLMManager
4426from coding.helpers.codeanal import verify_code_usage
4427from coding.utils.config import config as util_config
4428from coding.utils.config import add_validator_args
4429
4430
4431
4432
4433
4434class FinetuneEventResults(BaseModel):
4435 trackers: List[TrackingInfo]
4436 competition_id: int = COMPETITION_ID
4437
4438 def __state_dict__(self):
4439 return {
4440 "trackers": [tracker.model_dump() for tracker in self.trackers],
4441 "competition_id": COMPETITION_ID,
4442 }
4443
4444 def public_state_dict(self):
4445 trackers = [tracker.model_dump() for tracker in self.trackers]
4446 for tracker in trackers:
4447 tracker["model"] = None
4448 return {
4449 "trackers": trackers,
4450 "competition_id": COMPETITION_ID,
4451 }
4452
4453
4454
4455def generate_swe_tasks(ds: SWEBenchDataset, n: int = 1000, code_scorer = None) -> List[SWEBenchTask]:
4456 tasks = []
4457 while len(tasks) < n:
4458 try:
4459 tasks.append(SWEBenchTask(llm=None, context=Context(**ds.get()), code_scorer=code_scorer))
4460 except Exception as e:
4461 bt.logging.error(f"Error generating task: {e}")
4462 print(traceback.format_exc())
4463 return tasks
4464
4465
4466def bittensor_injector(self):
4467 self.wallet = bt.wallet(config=self.config)
4468 self.dendrite = bt.dendrite(wallet=self.wallet)
4469 self.subtensor = bt.subtensor(config=self.config)
4470 self.metagraph = self.subtensor.metagraph(self.config.netuid)
4471
4472
4473def verify_logic(logic: dict) -> tuple[bool, str]:
4474 # Dictionary mapping modules to allowed functions/imports
4475 allowed_modules = ALLOWED_MODULES.copy()
4476
4477 # Define allowed file extensions
4478 allowed_extensions = {'.yaml', '.py', '.txt', '.json'}
4479
4480 for module in logic:
4481 # Handle folder paths by taking first component
4482 module_name = module.split("/")[0].split(".")[0]
4483 if module_name not in allowed_modules:
4484 allowed_modules.append(module_name)
4485
4486 for key, value in logic.items():
4487 if value:
4488 # Check if the file extension is allowed
4489 file_extension = key.split('.')[-1]
4490 if f".{file_extension}" not in allowed_extensions:
4491 return False, f"File extension .{file_extension} is not allowed."
4492
4493 # Create expanded allowed modules list that includes submodules and specific imports
4494 expanded_allowed = set()
4495 for mod in allowed_modules:
4496 expanded_allowed.add(mod)
4497 # If module is allowed, all its submodules are allowed
4498 for used_mod in value.split():
4499 if used_mod.startswith(f"{mod}."):
4500 expanded_allowed.add(used_mod)
4501 # Check for specific allowed imports like "from os import getenv"
4502 usage_pass, usage_msg = verify_code_usage(value, list(expanded_allowed), ALLOWED_IMPORTS)
4503 if not usage_pass:
4504 return False, usage_msg
4505
4506 total_chars = 0
4507 for key, value in logic.items():
4508 # Include full folder path in character count
4509 total_chars += len(key) + len(value)
4510
4511 if total_chars > NUM_ALLOWED_CHARACTERS:
4512 return (
4513 False,
4514 f"Total characters: {total_chars} exceeds the limit of {NUM_ALLOWED_CHARACTERS}",
4515 )
4516
4517 return True, "Logic is valid"
4518
4519class FinetunePipeline:
4520 def __init__(
4521 self, config, tracking_logics: List[TrackingInfo] = None,
4522 ):
4523 self.config = config
4524 try:
4525 bittensor_injector(self)
4526 except Exception as e:
4527 bt.logging.error(f"Error injecting bittensor: {e}")
4528 print(traceback.format_exc())
4529 self.code_sim_model = CodeSimModel()
4530 self.trackers = []
4531 self.dataset = SWEBenchDataset()
4532 self.load_results()
4533 self.llm_manager = LLMManager()
4534
4535 if tracking_logics is None:
4536 self.load_logics()
4537 else:
4538 self.tracking_logics = tracking_logics
4539
4540 self.load_tasks()
4541 self.load_completed_trackers()
4542 # Register cleanup to be called when the object is deleted
4543 # self._finalizer = weakref.finalize(self, self.cleanup)
4544
4545 def load_completed_trackers(self):
4546 if os.path.exists(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl"):
4547 with open(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl", "rb") as f:
4548 self.completed_trackers = pickle.load(f)
4549 else:
4550 self.completed_trackers = []
4551
4552 def store_completed_trackers(self):
4553 with open(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl", "wb") as f:
4554 pickle.dump(self.completed_trackers, f)
4555
4556 def load_tasks(self):
4557 if os.path.exists(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl"):
4558 with open(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "rb") as f:
4559 self.tasks = pickle.load(f)[:self.config.neuron.finetune_test_size]
4560 for task in self.tasks:
4561 task.code_scorer = self.code_sim_model
4562 else:
4563 self.tasks = generate_swe_tasks(self.dataset, self.config.neuron.finetune_test_size, code_scorer=self.code_sim_model)
4564 self.store_tasks()
4565
4566 def load_results(self):
4567 results_file = f"{self.config.neuron.full_path}/results_{COMPETITION_ID}.pkl"
4568 if os.path.exists(results_file):
4569 with open(results_file, "rb") as f:
4570 saved_results = pickle.load(f)
4571 self.trackers = saved_results.get("trackers", [])
4572
4573 def store_logics(self):
4574 with open(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl", "wb") as f:
4575 pickle.dump(self.tracking_logics, f)
4576
4577 def load_logics(self):
4578 if os.path.exists(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl"):
4579 with open(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl", "rb") as f:
4580 self.tracking_logics = pickle.load(f)
4581 else:
4582 self.tracking_logics = gather_all_logics(self)
4583 self.store_logics()
4584
4585 @property
4586 def results(self) -> FinetuneEventResults:
4587 return FinetuneEventResults(
4588 trackers=self.trackers
4589 )
4590
4591 # TODO add time taken and handle race condition due to parallel execution
4592 # make use the same docker container for each task , where task repo files are copied over needs to change
4593 def evaluate(self) -> FinetuneEventResults:
4594 # gather all logics
4595 bt.logging.info("Gathering all logics...")
4596 bt.logging.info(f"Gathered {len(self.tracking_logics)} logics.")
4597
4598 bt.logging.info("Verifying and building docker containers for each logic...")
4599 for tracker in self.tracking_logics:
4600 bt.logging.info(f"Verifying logic for hotkey {tracker.hotkey}...")
4601 pass_logic, pass_msg = verify_logic(tracker.logic)
4602 if not pass_logic:
4603 bt.logging.info(
4604 f"Logic failed verification: {pass_msg} on tracker {tracker.hotkey}"
4605 )
4606 tracker.logic = {}
4607 continue
4608 bt.logging.info(f"Logic for hotkey {tracker.hotkey} passed verification.")
4609
4610 bt.logging.info(f"Beginning evaluation of {len(self.tasks)} tasks...")
4611 for tracker_idx, tracking_logic in enumerate(self.tracking_logics):
4612 bt.logging.info(f"Processing tracker {tracker_idx + 1}/{len(self.tracking_logics)}")
4613 # Skip if no logic provided
4614 if not tracking_logic.logic:
4615 bt.logging.info(f"No logic provided for tracker {tracking_logic.hotkey}, skipping...")
4616 tracking_logic.score = 0
4617 self.trackers.append(tracking_logic)
4618 continue
4619
4620 previous_tracker = next((tracker for tracker in self.trackers if str(tracker.logic) == str(tracking_logic.logic)), None)
4621 if previous_tracker is not None:
4622 bt.logging.info(f"Finetune: Using previously evaluated score for hotkey: {tracking_logic.hotkey}")
4623 tracking_logic.score = previous_tracker.score
4624 if tracking_logic.hotkey != previous_tracker.hotkey:
4625 self.trackers.append(tracking_logic)
4626 continue
4627
4628 # Otherwise, evaluate the logic
4629 bt.logging.info(f"Initializing LLM key for hotkey {tracking_logic.hotkey}...")
4630 self.llm_manager.init_key(tracking_logic.hotkey)
4631 bt.logging.info(f"Starting docker container for hotkey {tracking_logic.hotkey}...")
4632 scores = []
4633 # Create a thread pool to process tasks in parallel
4634 bt.logging.info("Starting thread pool for task processing...")
4635 with ThreadPoolExecutor() as executor:
4636 bt.logging.info("Thread pool started.")
4637 def process_task(task_data):
4638 bt.logging.info(f"Processing task...")
4639 task_idx, task = task_data
4640 try:
4641 bt.logging.info(f"Making request to container for hotkey {tracking_logic.hotkey}, task index {task_idx}...")
4642 result = run_docker_container_from_base(
4643 f"swe-logic-{str(tracking_logic.hotkey)}-{COMPETITION_ID}-{task_idx}".lower(),
4644 task.repo,
4645 tracking_logic.hotkey,
4646 task.query,
4647 tracking_logic.logic
4648 )
4649 patch = Patch(**result)
4650 bt.logging.info(f"Scoring response for hotkey {tracking_logic.hotkey}, task index {task_idx}...")
4651 # TODO in the next comp uncomment the below
4652 # score = task.score(patch, self.llm_manager.get_count())
4653 score = task.score(patch, 1)
4654 self.llm_manager.reset_count()
4655 bt.logging.info(f"Score for hotkey {tracking_logic.hotkey}, task index {task_idx}: {score}")
4656 return score
4657 except Exception as e:
4658 bt.logging.error(f"Request failed for hotkey {tracking_logic.hotkey}, task index {task_idx}: {e}")
4659 print(traceback.format_exc())
4660 return 0
4661
4662 # Keep track of active futures and tasks
4663 active_futures = {}
4664 task_queue = list(enumerate(self.tasks))
4665 task_idx = 0
4666
4667 # Start initial batch of 8 tasks
4668 bt.logging.info("Starting initial batch of 8 tasks...")
4669 while len(active_futures) < 8 and task_queue:
4670 task_data = task_queue.pop(0)
4671 future = executor.submit(process_task, task_data)
4672 active_futures[future] = task_data
4673
4674 bt.logging.info(f"Task queue drained, active futures left: {len(active_futures)}")
4675 # Process remaining tasks as others complete
4676 while active_futures:
4677 completed_future = next(as_completed(active_futures))
4678 task_data = active_futures.pop(completed_future)
4679
4680 # Get score from completed task
4681 score = completed_future.result()
4682 scores.append(score)
4683 bt.logging.info(f"Average score for hotkey {tracking_logic.hotkey}: {sum(scores) / len(scores)}")
4684
4685 # Start next task if any remain
4686 if task_queue:
4687 task_data = task_queue.pop(0)
4688 future = executor.submit(process_task, task_data)
4689 active_futures[future] = task_data
4690
4691 task_idx += 1
4692 bt.logging.info(f"Completed task {task_idx}/{len(self.tasks)} for hotkey {tracking_logic.hotkey}")
4693 tracking_logic.score = sum(scores) / len(scores)
4694 self.trackers.append(tracking_logic)
4695 self.store_results()
4696
4697 bt.logging.info(f"Cleaning up container for hotkey {tracking_logic.hotkey}...")
4698 bt.logging.info(f"Final score for hotkey {tracking_logic.hotkey}: {tracking_logic.score}")
4699
4700 bt.logging.info("Evaluation complete!")
4701 self.store_results()
4702
4703 return self.results
4704 def __str__(self):
4705 return f"{self.__class__.__name__}(scores={self.scores!r}, models={self.tracking_logics!r})"
4706
4707 def __repr__(self):
4708 return self.__str__()
4709
4710 def __state_dict__(self):
4711 return {
4712 "scores": self.scores,
4713 "tracking_logics": [model.model_dump() for model in self.tracking_logics],
4714 }
4715
4716 @staticmethod
4717 def start(
4718 config, code_sim_model: CodeSimModel = None
4719 ) -> FinetuneEventResults:
4720 if code_sim_model is None:
4721 code_sim_model = CodeSimModel()
4722 pipeline = FinetunePipeline(config, code_sim_model)
4723 result = pipeline.evaluate()
4724 pipeline.cleanup() # Ensure cleanup is called after evaluation
4725 return result
4726
4727 def store_tasks(self):
4728 with open(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "wb") as f:
4729 for task in self.tasks:
4730 task.code_scorer = None
4731 pickle.dump(self.tasks, f)
4732
4733 def store_results(self):
4734 results_file = f"{self.config.neuron.full_path}/results_{COMPETITION_ID}.pkl"
4735 temp_file = results_file + ".tmp"
4736
4737 # Write to a temp file first
4738 with open(temp_file, "wb") as f:
4739 pickle.dump({"trackers": self.trackers}, f)
4740
4741 # Replace the old file with the new
4742 os.replace(temp_file, results_file)
4743
4744 @staticmethod
4745 def generate_tasks(config) -> List[SWEBenchTask]:
4746 dataset = SWEBenchDataset()
4747 code_scorer = CodeSimModel()
4748 tasks = generate_swe_tasks(dataset, config.neuron.finetune_test_size, code_scorer=code_scorer)
4749 with open(f"{config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "wb") as f:
4750 for task in tasks:
4751 task.code_scorer = None
4752 pickle.dump(tasks, f)
4753
4754 @staticmethod
4755 def tasks_exist(config):
4756 return os.path.exists(f"{config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl")
4757
4758 def cleanup(self):
4759 """
4760 Delete the tasks file and any other task files
4761 """
4762 os.remove(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl")
4763 # check if tasks_*.pkl exists and delete it if it does
4764 for file in os.listdir(self.config.neuron.full_path):
4765 if file.startswith("tasks_") and file.endswith(".pkl"):
4766 os.remove(os.path.join(self.config.neuron.full_path, file))
4767 if file.startswith("results_") and file.endswith(".pkl"):
4768 os.remove(os.path.join(self.config.neuron.full_path, file))
4769
4770
4771
4772---
4773File: /coding/finetune/score.py
4774---
4775
4776import bittensor as bt
4777from typing import List, Any
4778from huggingface_hub import model_info
4779from concurrent.futures import ProcessPoolExecutor
4780
4781from coding.tasks.task import Task
4782from coding.finetune.evaluate import evaluate
4783from coding.finetune.model import ModelServer
4784from coding.rewards.codesim import CodeSimModel
4785
4786
4787def cleanup_code_sim_model(self):
4788 try:
4789 import torch
4790 from accelerate.utils import release_memory
4791
4792 torch.cuda.empty_cache()
4793 with torch.no_grad():
4794 self.code_sim_model.code_scorer._model.cpu()
4795 release_memory(self.code_sim_model.code_scorer._model)
4796 del self.code_sim_model.code_scorer._model
4797
4798 with torch.no_grad():
4799 self.code_sim_model.code_scorer._tokenizer.cpu()
4800 release_memory(self.code_sim_model.code_scorer._tokenizer)
4801 del self.code_sim_model.code_scorer._tokenizer
4802
4803 del self.code_sim_model
4804 except Exception as e:
4805 pass
4806
4807def validate_model_info(model_name: str) -> bool:
4808 try:
4809 miner_model_info = model_info(model_name)
4810 license = miner_model_info.card_data['license']
4811 total_size = miner_model_info.safetensors.total
4812 return license in ["apache-2.0", "cc-by-nc-4.0", "mit"] and total_size < 10000000000
4813 except Exception as e:
4814 bt.logging.info(f"Error validating model {model_name}: {e}")
4815 return False
4816
4817def score(self, model_name: str, tasks: List[Task]) -> float:
4818 """
4819 Calculate the average score across multiple tasks for a given model.
4820
4821 Args:
4822 model_name (str): Name or path of the model to evaluate
4823 prompt_tokens (dict): Dictionary containing FIM prompt tokens:
4824 - "prefix": the prefix of the prompt
4825 - "middle": the middle of the prompt
4826 - "suffix": the suffix of the prompt
4827 tasks (List[Task]): List of Task objects to evaluate the model on. Task must be of the FIM type.
4828
4829 Returns:
4830 float: Average score across all tasks, where each task score is between 0 and 1
4831
4832 The function:
4833 1. Validates the model info
4834 2. Loads the model and tokenizer
4835 3. For each task:
4836 - Evaluates the model's response on the task query
4837 - Calculates a score for that response
4838 4. Cleans up model resources
4839 5. Returns mean score across all tasks
4840 """
4841
4842 if not validate_model_info(model_name):
4843 bt.logging.info(f"Model {model_name} is not valid. It must have a valid license and be less than 10B parameters.")
4844 return 0.0
4845
4846 model_server = None
4847 try:
4848 model_server = ModelServer(model_name)
4849 except Exception as e:
4850 bt.logging.info(f"Error loading model {model_name}: {e}") # TODO change to logging
4851 try:
4852 model_server.cleanup()
4853 except Exception as e:
4854 pass
4855 return 0.0
4856
4857 scores = []
4858 responses = []
4859 try:
4860 # Create list of queries
4861 queries = [task.query for task in tasks]
4862
4863 # Make parallel calls using asyncio
4864 responses = model_server.invoke_batch(queries)
4865 model_server.cleanup()
4866 del model_server
4867 self.code_sim_model = CodeSimModel()
4868 # Get references
4869 references = [task.reference for task in tasks]
4870 scores = self.code_sim_model.similarity_batch(references, responses)
4871 return sum(scores) / len(scores)
4872 except Exception as e:
4873 bt.logging.info(f"Error evaluating model: {e}")
4874 try:
4875 model_server.cleanup()
4876 except Exception as e:
4877 pass
4878 return 0.0
4879 finally:
4880 cleanup_code_sim_model(self)
4881
4882
4883
4884
4885
4886---
4887File: /coding/finetune/tracker.py
4888---
4889
4890from typing import List
4891
4892from coding.protocol import LogicSynapse
4893from coding.schemas.tracking import TrackingInfo
4894from coding.utils.uids import get_miner_uids, get_hotkey_from_uid
4895
4896def gather_all_logics(validator) -> List[TrackingInfo]:
4897 uids = get_miner_uids(validator)
4898 axons = [validator.metagraph.axons[uid] for uid in uids]
4899 synapse = LogicSynapse()
4900 responses = []
4901 for axon in axons:
4902 try:
4903 responses.append(validator.dendrite.query(axons=[axon], synapse=synapse, timeout=45, deserialize=False)[0])
4904 except Exception as e:
4905 print("Error querying axon", axon, e)
4906 responses.append(synapse)
4907 return [
4908 TrackingInfo(
4909 logic=synapse.logic,
4910 block=validator.metagraph.block,
4911 hotkey=get_hotkey_from_uid(validator, uids[i]),
4912 uid=uids[i],
4913 score=0.0,
4914 )
4915 for i, synapse in enumerate(responses)
4916 ]
4917
4918
4919
4920---
4921File: /coding/helpers/__init__.py
4922---
4923
4924from .selector import Selector
4925from .parser import *
4926from .cosine import *
4927from .forwards import *
4928from .fim import *
4929
4930
4931---
4932File: /coding/helpers/codeanal.py
4933---
4934
4935import ast
4936from typing import List, Dict
4937
4938def verify_code_usage(code: str, allowed_modules: List[str], allowed_imports: Dict[str, List[str]]) -> tuple[bool, str]:
4939 try:
4940 tree = ast.parse(code)
4941 imported_modules = set()
4942 imported_names = {} # Track what names were imported from each module
4943 for node in ast.walk(tree):
4944 if isinstance(node, ast.Import):
4945 for alias in node.names:
4946 # Only block import if module is in allowed_imports but used without restrictions
4947 if alias.name in allowed_imports and not allowed_imports[alias.name]:
4948 return False, f"Disallowed unrestricted use of module: {alias.name}"
4949 if alias.name not in allowed_modules and alias.name not in allowed_imports:
4950 return False, f"Disallowed module: {alias.name}"
4951 imported_modules.add(alias.name)
4952 elif isinstance(node, ast.ImportFrom):
4953 if node.module not in allowed_modules and node.module not in allowed_imports:
4954 return False, f"Disallowed module: {node.module}"
4955 # Track imported names from restricted modules
4956 if node.module in allowed_imports:
4957 imported_names[node.module] = set()
4958 for alias in node.names:
4959 if alias.name not in allowed_imports[node.module]:
4960 return False, f"Disallowed import {alias.name} from module {node.module}"
4961 imported_names[node.module].add(alias.name)
4962 imported_modules.add(node.module)
4963 elif isinstance(node, ast.Call):
4964 if isinstance(node.func, ast.Attribute):
4965 # Check if attribute access like os.getenv is allowed
4966 if isinstance(node.func.value, ast.Name):
4967 module_name = node.func.value.id
4968 # Only check restricted functions if module was imported and has restrictions
4969 if module_name in imported_modules and module_name in allowed_imports:
4970 if node.func.attr not in allowed_imports[module_name]:
4971 return False, f"Disallowed function {module_name}.{node.func.attr}"
4972 elif isinstance(node.func, ast.Name):
4973 if node.func.id == 'eval' or node.func.id == 'exec':
4974 return False, f"Dangerous built-in function call: {node.func.id}"
4975 elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
4976 if isinstance(node.value.func, ast.Name):
4977 if node.value.func.id in ['eval', 'exec']:
4978 return False, f"Dangerous built-in function call: {node.value.func.id}"
4979 return True, "Code is safe"
4980 except Exception as e:
4981 return False, f"Error during parsing: {e}"
4982
4983
4984---
4985File: /coding/helpers/cosine.py
4986---
4987
4988import numpy as np
4989from sklearn.metrics.pairwise import cosine_similarity
4990
4991def cosim(model, text1: str, text2: str) -> float:
4992 # Load the pre-trained sentence transformer model
4993
4994 # Embed the texts
4995 embeddings = model.encode([text1, text2])
4996
4997 # Calculate cosine similarity
4998 similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
4999
5000 return similarity
5001
5002def normalize_cosim(value, min_value=0.5, max_value=1.0, exponent=1.3):
5003 """
5004 Exponentially normalize the cosine similarity value to a range of 0 to 1.
5005
5006 Parameters:
5007 value (float): The cosine similarity value to be normalized.
5008 min_value (float): The minimum value of the original range. Default is 0.5.
5009 max_value (float): The maximum value of the original range. Default is 1.0.
5010 exponent (float): The exponent to be used for the normalization. Default is 1.3.
5011
5012 Returns:
5013 float: The exponentially normalized value in the range of 0 to 1, or 0 if the result is invalid.
5014 """
5015 if min_value == max_value:
5016 raise ValueError("min_value and max_value must be different")
5017
5018 # First normalize linearly
5019 linear_normalized_value = (value - min_value) / (max_value - min_value)
5020
5021 # Check for invalid linear_normalized_value (e.g., NaN or out of bounds)
5022 if np.isnan(linear_normalized_value) or linear_normalized_value < 0 or linear_normalized_value > 1:
5023 return 0
5024
5025 # Then apply the exponential transformation
5026 exponential_normalized_value = np.power(linear_normalized_value, exponent)
5027
5028 return exponential_normalized_value
5029
5030
5031---
5032File: /coding/helpers/fim.py
5033---
5034
5035import random
5036from typing import Tuple
5037
5038def insert_fim_hole(code: str) -> Tuple[str, str]:
5039 lines = code.splitlines()
5040 if len(lines) < 2:
5041 return code, []
5042
5043 # Determine the maximum possible size of the hole (between 1 and 15 lines)
5044 max_hole_size = min(15, len(lines))
5045
5046 # Randomly select the start index and the size of the hole
5047 start_index = random.randint(0, len(lines) - 1)
5048 hole_size = random.randint(1, max_hole_size)
5049
5050 # Ensure the hole does not exceed the bounds of the code
5051 end_index = min(start_index + hole_size - 1, len(lines) - 1)
5052
5053 # Extract the selected lines
5054 replaced_lines = lines[start_index:end_index + 1]
5055
5056 # Replace the selected lines with "<|fim_hole|>"
5057 lines[start_index:end_index + 1] = ["<|fim_hole|>"]
5058
5059 # Reconstruct the code
5060 new_code = "\n".join(lines)
5061
5062 return new_code, "\n".join(replaced_lines)
5063
5064
5065---
5066File: /coding/helpers/forwards.py
5067---
5068
5069import json
5070import time
5071import traceback
5072import bittensor as bt
5073from starlette.types import Send
5074from typing import List, Any, Dict
5075from langchain_core.runnables.base import RunnableSequence
5076
5077
5078async def string_forward(string, send: Send):
5079 await send(
5080 {
5081 "type": "http.response.body",
5082 "body": string,
5083 "more_body": False,
5084 }
5085 )
5086
5087async def chain_forward(
5088 self,
5089 query: str,
5090 files: List[Any],
5091 extra_info: Dict[str, Any],
5092 init_time: float,
5093 timeout_threshold: float,
5094 chain: RunnableSequence,
5095 chain_formatter: Dict[str, str],
5096 send: Send,
5097 ):
5098 buffer = []
5099 temp_completion = "" # for wandb logging
5100 timeout_reached = False
5101 try:
5102 # Langchain built in streaming. 'astream' also available for async
5103 for token in chain.stream(chain_formatter):
5104 if not isinstance(token, str):
5105 token = token.content
5106 buffer.append(token)
5107
5108 if time.time() - init_time > timeout_threshold:
5109 bt.logging.debug(f"⏰ Timeout reached, stopping streaming")
5110 timeout_reached = True
5111 break
5112
5113 if (
5114 not "broken_file" in extra_info.keys()
5115 and len(buffer) == self.config.neuron.streaming_batch_size
5116 ):
5117 joined_buffer = "".join(buffer)
5118 temp_completion += joined_buffer
5119 bt.logging.debug(f"Streamed tokens: {repr(joined_buffer)}")
5120
5121 await send(
5122 {
5123 "type": "http.response.body",
5124 "body": joined_buffer,
5125 "more_body": True,
5126 }
5127 )
5128 buffer = []
5129
5130 if (
5131 buffer and not timeout_reached
5132 ): # Don't send the last buffer of data if timeout.
5133 body = "".join(buffer)
5134 await send(
5135 {
5136 "type": "http.response.body",
5137 "body": body,
5138 "more_body": False,
5139 }
5140 )
5141 except Exception as e:
5142 bt.logging.error(f"Error in forward: {e}, - {traceback.format_exc()}")
5143 if self.config.neuron.stop_on_forward_exception:
5144 self.should_exit = True
5145
5146
5147---
5148File: /coding/helpers/git.py
5149---
5150
5151import os
5152import shutil
5153import tempfile
5154import weakref
5155from git import Repo
5156
5157class GitRepo:
5158 def __init__(self, repo_name: str, commit_hash: str):
5159 """
5160 Initialize a Git repository object that manages cloning and cleanup.
5161
5162 Args:
5163 repo_name (str): Name/URL of the repository to clone
5164 commit_hash (str): Specific commit hash to checkout
5165
5166 Raises:
5167 git.exc.GitCommandError: If repository does not exist or other git error occurs
5168 """
5169 self.repo_name = repo_name
5170 self.commit_hash = commit_hash
5171 self.temp_dir = tempfile.mkdtemp()
5172 self.repo = None
5173 self._initialize_repo()
5174
5175 def _initialize_repo(self):
5176 """Initialize/reinitialize the git repository"""
5177 if self.temp_dir and os.path.exists(self.temp_dir) and os.listdir(self.temp_dir):
5178 self._finalizer = weakref.finalize(self, self._cleanup)
5179 return
5180 # Ensure repo name includes full GitHub URL if not already
5181 if not self.repo_name.startswith(('http://', 'https://', 'git://')):
5182 self.repo_name = f"https://github.com/{self.repo_name}"
5183
5184 # Clone repo with minimal history and specific commit
5185 self.repo = Repo.clone_from(
5186 self.repo_name,
5187 self.temp_dir,
5188 depth=1, # Only get most recent commit
5189 no_single_branch=True, # Allow fetching specific commit
5190 no_tags=True # Don't fetch any tags
5191 )
5192 # Fetch only the specific commit
5193 self.repo.git.fetch('origin', self.commit_hash, depth=1)
5194 self.repo.git.checkout(self.commit_hash)
5195 # Register cleanup to be called when object is deleted
5196 self._finalizer = weakref.finalize(self, self._cleanup)
5197
5198 def __getstate__(self):
5199 """Called when pickling - return state without repo objects"""
5200 state = self.__dict__.copy()
5201 # Remove unpicklable objects
5202 state['repo'] = None
5203 state['_finalizer'] = None
5204 return state
5205
5206 def __setstate__(self, state):
5207 """Called when unpickling - restore state and reinitialize repo"""
5208 self.__dict__.update(state)
5209 if self.temp_dir == None:
5210 self.temp_dir = tempfile.mkdtemp()
5211 self._initialize_repo()
5212
5213 def _cleanup(self):
5214 """
5215 Clean up the temporary directory containing the cloned repository.
5216 """
5217 try:
5218 if self.temp_dir and os.path.exists(self.temp_dir):
5219 shutil.rmtree(self.temp_dir)
5220 except Exception as e:
5221 print(f"Error during cleanup: {str(e)}")
5222
5223 @property
5224 def path(self) -> str:
5225 """
5226 Get the path to the cloned repository.
5227
5228 Returns:
5229 str: Path to the repository directory
5230 """
5231 return self.temp_dir
5232
5233 @property
5234 def files(self) -> dict[str, str]:
5235 logic = {}
5236 # Read all files in test-submission directory
5237 for root, dirs, files in os.walk(self.path):
5238 # Skip __pycache__ directories
5239 if '__pycache__' in dirs:
5240 dirs.remove('__pycache__')
5241
5242 # Get relative path from test_submission_dir
5243 rel_path = os.path.relpath(root, self.path)
5244
5245 # Process all files in current directory
5246 for filename in files:
5247 # Skip __pycache__ files
5248 if '__pycache__' in filename:
5249 continue
5250
5251 file_path = os.path.join(root, filename)
5252 # Get the relative path for the logic dict key
5253 if rel_path == '.':
5254 logic_key = filename
5255 else:
5256 logic_key = os.path.join(rel_path, filename)
5257
5258 with open(file_path, 'r', encoding='latin-1') as f:
5259 logic[logic_key] = f.read()
5260 return logic
5261
5262 def __enter__(self):
5263 return self
5264
5265 def __exit__(self, exc_type, exc_val, exc_tb):
5266 self._cleanup()
5267
5268
5269---
5270File: /coding/helpers/parser.py
5271---
5272
5273import re
5274
5275def extract_python_code(markdown_string):
5276 """
5277 Extracts Python code blocks from a Markdown string.
5278
5279 Parameters:
5280 markdown_string (str): The Markdown string to extract Python code from.
5281
5282 Returns:
5283 list of str: A list of extracted Python code blocks.
5284 """
5285 # Regular expression to match Python code blocks
5286 python_code_pattern = re.compile(r'```python\n(.*?)\n```', re.DOTALL)
5287
5288 # Find all Python code blocks
5289 python_code_blocks = python_code_pattern.findall(markdown_string)
5290
5291 return python_code_blocks
5292
5293
5294---
5295File: /coding/helpers/rewrite.py
5296---
5297
5298import random
5299
5300REWRITE_REASONS = [
5301 "more concise",
5302 "more verbose",
5303 "more pythonic",
5304 "more efficient",
5305 "more readable",
5306 "more correct",
5307 "more efficient",
5308 "a little different",
5309 "super concise",
5310 "super verbose",
5311 "super pythonic",
5312 "super efficient",
5313 "super readable",
5314 "super correct",
5315]
5316
5317def rewrite_code(code: str, model: str) -> str:
5318 res = model.invoke(f"Rewrite the following code to be {random.choice(REWRITE_REASONS)}, make sure it does the same thing though: {code}").content
5319
5320 if "```" in res:
5321 start = res.find("```") + 3 # Skip the backticks and newline
5322 start = res.find("\n", start) + 1
5323
5324 end = res.rfind("```")
5325 res = res[start:end].strip()
5326 return res
5327
5328
5329
5330---
5331File: /coding/helpers/selector.py
5332---
5333
5334# The MIT License (MIT)
5335# Copyright © 2024 Yuma Rao
5336# Copyright © 2023 Opentensor Foundation
5337# Copyright © 2024 Macrocosmos
5338
5339# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
5340# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
5341# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
5342# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5343
5344# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
5345# the Software.
5346
5347# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
5348# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5349# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
5350# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
5351# DEALINGS IN THE SOFTWARE.
5352
5353import random
5354
5355
5356class Selector:
5357 def __init__(self, seed=None):
5358 self.seed = seed
5359 self.rng = random.Random(seed)
5360
5361 def __call__(self, items, weights=None):
5362 return self.rng.choices(items, weights=weights)[0]
5363
5364
5365class PageRankSelector(Selector):
5366 """Preferentially chooses the items at the top of the list, under the assumption that they are more important."""
5367
5368 def __init__(self, seed=None, alpha=0.85):
5369 super().__init__(seed)
5370 self.alpha = alpha
5371
5372 def __call__(self, items):
5373 weights = [self.alpha**i for i in range(len(items))]
5374 return self.rng.choices(items, weights=weights)[0]
5375
5376
5377class SimilaritySelector(Selector):
5378 """Chooses the item most similar to the query."""
5379
5380 def __init__(self, seed=None, similarity_fn=None):
5381 super().__init__(seed)
5382 self.similarity_fn = similarity_fn
5383
5384 def __call__(self, query, items):
5385 return max(items, key=lambda item: self.similarity_fn(query, item))
5386
5387
5388class TopSelector(Selector):
5389 """Chooses the top item."""
5390
5391 def __init__(self, seed=None):
5392 super().__init__(seed)
5393
5394 def __call__(self, items):
5395 return items[0]
5396
5397
5398if __name__ == "__main__":
5399 selector = Selector(seed=42)
5400 items = range(10)
5401 item = selector(items)
5402
5403 assert item in items, "Selector should return one of the items"
5404
5405
5406
5407---
5408File: /coding/helpers/swebench.py
5409---
5410
5411from __future__ import annotations
5412
5413import re
5414import requests
5415import time
5416
5417from bs4 import BeautifulSoup
5418from ghapi.core import GhApi
5419from fastcore.net import HTTP404NotFoundError, HTTP403ForbiddenError
5420from typing import Callable, Iterator, Optional
5421from unidiff import PatchSet
5422
5423
5424class Repo:
5425 def __init__(self, owner: str, name: str, token: Optional[str] = None):
5426 """
5427 Init to retrieve target repository and create ghapi tool
5428
5429 Args:
5430 owner (str): owner of target repository
5431 name (str): name of target repository
5432 token (str): github token
5433 """
5434 self.owner = owner
5435 self.name = name
5436 self.token = token
5437 self.api = GhApi(token=token)
5438 self.repo = self.call_api(self.api.repos.get, owner=owner, repo=name)
5439 self.size = self.repo.size if self.repo else 0
5440
5441 def call_api(self, func: Callable, **kwargs) -> dict|None:
5442 """
5443 API call wrapper with rate limit handling (checks every 5 minutes if rate limit is reset)
5444
5445 Args:
5446 func (callable): API function to call
5447 **kwargs: keyword arguments to pass to API function
5448 Return:
5449 values (dict): response object of `func`
5450 """
5451 for _ in range(10):
5452 try:
5453 values = func(**kwargs)
5454 return values
5455 except HTTP403ForbiddenError as e:
5456 for _ in range(10):
5457 rl = self.api.rate_limit.get()
5458 if rl.resources.core.remaining > 0:
5459 break
5460 except HTTP404NotFoundError as e:
5461 return None
5462
5463 def extract_resolved_issues(self, pull: dict) -> list[str]:
5464 """
5465 Extract list of issues referenced by a PR
5466
5467 Args:
5468 pull (dict): PR dictionary object from GitHub
5469 Return:
5470 resolved_issues (list): list of issue numbers referenced by PR
5471 """
5472 # Define 1. issue number regex pattern 2. comment regex pattern 3. keywords
5473 issues_pat = re.compile(r"(\w+)\s+\#(\d+)")
5474 comments_pat = re.compile(r"(?s)<!--.*?-->")
5475 keywords = {
5476 "close",
5477 "closes",
5478 "closed",
5479 "fix",
5480 "fixes",
5481 "fixed",
5482 "resolve",
5483 "resolves",
5484 "resolved",
5485 "complete",
5486 "completed",
5487 "finish",
5488 "finishes",
5489 "finished",
5490 }
5491
5492 # Construct text to search over for issue numbers from PR body and commit messages
5493 text = pull.title if pull.title else ""
5494 text += "\n" + (pull.body if pull.body else "")
5495 commits = self.get_all_loop(
5496 self.api.pulls.list_commits, pull_number=pull.number, quiet=True
5497 )
5498 commit_messages = [commit.commit.message for commit in commits]
5499 commit_text = "\n".join(commit_messages) if commit_messages else ""
5500 text += "\n" + commit_text
5501 # Remove comments from text
5502 text = comments_pat.sub("", text)
5503 # Look for issue numbers in text via scraping <keyword, number> patterns
5504 references = dict(issues_pat.findall(text))
5505 resolved_issues = list()
5506 if references:
5507 for word, issue_num in references.items():
5508 if word.lower() in keywords:
5509 resolved_issues.append(issue_num)
5510 return resolved_issues
5511
5512 def get_all_loop(
5513 self,
5514 func: Callable,
5515 per_page: int = 100,
5516 num_pages: Optional[int] = None,
5517 quiet: bool = False,
5518 start_page = 1,
5519 **kwargs,
5520 ) -> Iterator:
5521 """
5522 Return all values from a paginated API endpoint.
5523
5524 Args:
5525 func (callable): API function to call
5526 per_page (int): number of values to return per page
5527 num_pages (int): number of pages to return
5528 quiet (bool): whether to print progress
5529 **kwargs: keyword arguments to pass to API function
5530 """
5531 page = start_page
5532 args = {
5533 "owner": self.owner,
5534 "repo": self.name,
5535 "per_page": per_page,
5536 **kwargs,
5537 }
5538 for _ in range(10):
5539 try:
5540 # Get values from API call
5541 values = func(**args, page=page)
5542 yield from values
5543 if len(values) == 0:
5544 break
5545 if not quiet:
5546 rl = self.api.rate_limit.get()
5547 if num_pages is not None and page >= num_pages:
5548 break
5549 page += 1
5550 except Exception as e:
5551 # Rate limit handling
5552 for _ in range(10):
5553 rl = self.api.rate_limit.get()
5554 if rl.resources.core.remaining > 0:
5555 break
5556
5557 def get_all_issues(
5558 self,
5559 per_page: int = 100,
5560 num_pages: Optional[int] = None,
5561 direction: str = "desc",
5562 sort: str = "created",
5563 state: str = "closed",
5564 quiet: bool = False,
5565 ) -> Iterator:
5566 """
5567 Wrapper for API call to get all issues from repo
5568
5569 Args:
5570 per_page (int): number of issues to return per page
5571 num_pages (int): number of pages to return
5572 direction (str): direction to sort issues
5573 sort (str): field to sort issues by
5574 state (str): state of issues to look for
5575 quiet (bool): whether to print progress
5576 """
5577 issues = self.get_all_loop(
5578 self.api.issues.list_for_repo,
5579 num_pages=num_pages,
5580 per_page=per_page,
5581 direction=direction,
5582 sort=sort,
5583 state=state,
5584 quiet=quiet,
5585 )
5586 return issues
5587
5588 def get_all_pulls(
5589 self,
5590 per_page: int = 100,
5591 num_pages: Optional[int] = None,
5592 direction: str = "desc",
5593 sort: str = "created",
5594 state: str = "closed",
5595 quiet: bool = False,
5596 start_page = 1,
5597 ) -> Iterator:
5598 """
5599 Wrapper for API call to get all PRs from repo
5600
5601 Args:
5602 per_page (int): number of PRs to return per page
5603 num_pages (int): number of pages to return
5604 direction (str): direction to sort PRs
5605 sort (str): field to sort PRs by
5606 state (str): state of PRs to look for
5607 quiet (bool): whether to print progress
5608 """
5609 pulls = self.get_all_loop(
5610 self.api.pulls.list,
5611 num_pages=num_pages,
5612 direction=direction,
5613 per_page=per_page,
5614 sort=sort,
5615 state=state,
5616 quiet=quiet,
5617 start_page=start_page,
5618 )
5619 return pulls
5620
5621
5622def extract_problem_statement_and_hints(pull: dict, repo: Repo) -> tuple[str, str]:
5623 """
5624 Extract problem statement from issues associated with a pull request
5625
5626 Args:
5627 pull (dict): PR dictionary object from GitHub
5628 repo (Repo): Repo object
5629 Return:
5630 text (str): problem statement
5631 hints (str): hints
5632 """
5633 if repo.name == "django":
5634 return extract_problem_statement_and_hints_django(pull, repo)
5635 text = ""
5636 all_hint_texts = list()
5637 for issue_number in pull["resolved_issues"]:
5638 issue = repo.call_api(
5639 repo.api.issues.get,
5640 owner=repo.owner,
5641 repo=repo.name,
5642 issue_number=issue_number,
5643 )
5644 if issue is None:
5645 continue
5646 title = issue.title if issue.title else ""
5647 body = issue.body if issue.body else ""
5648 text += f"{title}\n{body}\n"
5649 issue_number = issue.number
5650 hint_texts = _extract_hints(pull, repo, issue_number)
5651 hint_text = "\n".join(hint_texts)
5652 all_hint_texts.append(hint_text)
5653 return text, "\n".join(all_hint_texts) if all_hint_texts else ""
5654
5655
5656def _extract_hints(pull: dict, repo: Repo, issue_number: int) -> list[str]:
5657 """
5658 Extract hints from comments associated with a pull request (before first commit)
5659
5660 Args:
5661 pull (dict): PR dictionary object from GitHub
5662 repo (Repo): Repo object
5663 issue_number (int): issue number
5664 Return:
5665 hints (list): list of hints
5666 """
5667 # Get all commits in PR
5668 commits = repo.get_all_loop(
5669 repo.api.pulls.list_commits, pull_number=pull["number"], quiet=True
5670 )
5671 commits = list(commits)
5672 if len(commits) == 0:
5673 # If there are no comments, return no hints
5674 return []
5675 # Get time of first commit in PR
5676 commit_time = commits[0].commit.author.date # str
5677 commit_time = time.mktime(time.strptime(commit_time, "%Y-%m-%dT%H:%M:%SZ"))
5678 # Get all comments in PR
5679 all_comments = repo.get_all_loop(
5680 repo.api.issues.list_comments, issue_number=issue_number, quiet=True
5681 )
5682 all_comments = list(all_comments)
5683 # Iterate through all comments, only keep comments created before first commit
5684 comments = list()
5685 for comment in all_comments:
5686 comment_time = time.mktime(
5687 time.strptime(comment.updated_at, "%Y-%m-%dT%H:%M:%SZ")
5688 ) # use updated_at instead of created_at
5689 if comment_time < commit_time:
5690 comments.append(comment)
5691 else:
5692 break
5693 # only include information available before the first commit was created
5694 # Keep text from comments
5695 comments = [comment.body for comment in comments]
5696 return comments
5697
5698
5699def extract_patches(pull: dict, repo: Repo) -> tuple[str, str]:
5700 """
5701 Get patch and test patch from PR
5702
5703 Args:
5704 pull (dict): PR dictionary object from GitHub
5705 repo (Repo): Repo object
5706 Return:
5707 patch_change_str (str): gold patch
5708 patch_test_str (str): test patch
5709 """
5710 patch = requests.get(pull["diff_url"]).text
5711 patch_test = ""
5712 patch_fix = ""
5713 for hunk in PatchSet(patch):
5714 if any(
5715 test_word in hunk.path for test_word in
5716 ['test', 'tests', 'e2e', 'testing']
5717 ):
5718 patch_test += str(hunk)
5719 else:
5720 patch_fix += str(hunk)
5721 return patch_fix, patch_test
5722
5723
5724### MARK: Repo Specific Parsing Functions ###
5725def extract_problem_statement_and_hints_django(
5726 pull: dict, repo: Repo
5727) -> tuple[str, list[str]]:
5728 """
5729 Get problem statement and hints from issues associated with a pull request
5730
5731 Args:
5732 pull (dict): PR dictionary object from GitHub
5733 repo (Repo): Repo object
5734 Return:
5735 text (str): problem statement
5736 hints (str): hints
5737 """
5738 text = ""
5739 all_hints_text = list()
5740 for issue_number in pull["resolved_issues"]:
5741 url = f"https://code.djangoproject.com/ticket/{issue_number}"
5742 resp = requests.get(url)
5743 if resp.status_code != 200:
5744 continue
5745 soup = BeautifulSoup(resp.text, "html.parser")
5746
5747 # Get problem statement (title + body)
5748 issue_desc = soup.find("div", {"id": "ticket"})
5749 title = issue_desc.find("h1", class_="searchable").get_text()
5750 title = re.sub(r"\s+", " ", title).strip()
5751 body = issue_desc.find("div", class_="description").get_text()
5752 body = re.sub(r"\n+", "\n", body)
5753 body = re.sub(r" ", "\t", body)
5754 body = re.sub(r"[ ]{2,}", " ", body).strip()
5755 text += f"{title}\n{body}\n"
5756
5757 # Get time of first commit in PR
5758 commits = repo.get_all_loop(
5759 repo.api.pulls.list_commits, pull_number=pull["number"], quiet=True
5760 )
5761 commits = list(commits)
5762 if len(commits) == 0:
5763 continue
5764 commit_time = commits[0].commit.author.date
5765 commit_time = time.mktime(time.strptime(commit_time, "%Y-%m-%dT%H:%M:%SZ"))
5766
5767 # Get all comments before first commit
5768 comments_html = soup.find("div", {"id": "changelog"})
5769 div_blocks = comments_html.find_all("div", class_="change")
5770 # Loop through each div block
5771 for div_block in div_blocks:
5772 # Find the comment text and timestamp
5773 comment_resp = div_block.find("div", class_="comment")
5774 timestamp_resp = div_block.find("a", class_="timeline")
5775 if comment_resp is None or timestamp_resp is None:
5776 continue
5777
5778 comment_text = re.sub(r"\s+", " ", comment_resp.text).strip()
5779 timestamp = timestamp_resp["title"]
5780 if timestamp.startswith("See timeline at "):
5781 timestamp = timestamp[len("See timeline at ") :]
5782 if "/" in timestamp:
5783 timestamp = time.mktime(time.strptime(timestamp, "%m/%d/%y %H:%M:%S"))
5784 elif "," in timestamp:
5785 timestamp = time.mktime(time.strptime(timestamp, "%b %d, %Y, %I:%M:%S %p"))
5786 else:
5787 raise ValueError(f"Timestamp format not recognized: {timestamp}")
5788
5789 # Append the comment and timestamp as a tuple to the comments list
5790 if timestamp < commit_time:
5791 all_hints_text.append((comment_text, timestamp))
5792
5793 return text, all_hints_text
5794
5795
5796
5797---
5798File: /coding/miners/finetune.py
5799---
5800
5801from coding.protocol import HFModelSynapse
5802
5803def miner_process(self, synapse: HFModelSynapse) -> HFModelSynapse:
5804 """
5805 The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
5806 """
5807 synapse.model_name = "microsoft/Phi-3-mini-128k-instruct"
5808 synapse.competition_id = 1
5809
5810 return synapse
5811
5812
5813
5814---
5815File: /coding/miners/openai_miner.py
5816---
5817
5818import os
5819import time
5820import bittensor as bt
5821from starlette.types import Send
5822from functools import partial
5823from typing import Dict, Awaitable
5824from langchain_openai import OpenAI
5825from dotenv import load_dotenv, find_dotenv
5826from langchain.prompts import PromptTemplate
5827from langchain_core.output_parsers import StrOutputParser
5828from langchain_core.runnables.base import RunnableSequence
5829
5830from coding.protocol import StreamCodeSynapse
5831
5832
5833def miner_init(self):
5834 """
5835 Initializes the miner. This function is called once when the miner is created.
5836 """
5837 _ = load_dotenv(find_dotenv())
5838 api_key = os.environ.get("OPENAI_API_KEY", "EMPTY")
5839 # Set openai key and other args
5840 self.model = OpenAI(
5841 api_key=api_key,
5842 model_name=self.config.neuron.model_id,
5843 max_tokens=2048,
5844 temperature=0.7,
5845 )
5846
5847def miner_process(self, synapse: StreamCodeSynapse) -> Awaitable:
5848 """
5849 The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
5850 """
5851 async def _forward(
5852 self,
5853 query: str,
5854 init_time: float,
5855 timeout_threshold: float,
5856 chain: RunnableSequence,
5857 chain_formatter: Dict[str, str],
5858 send: Send,
5859 ):
5860 buffer = []
5861 temp_completion = "" # for wandb logging
5862 timeout_reached = False
5863
5864 try:
5865 # Langchain built in streaming. 'astream' also available for async
5866 for token in chain.stream(chain_formatter):
5867 buffer.append(token)
5868
5869 if time.time() - init_time > timeout_threshold:
5870 bt.logging.debug(f"⏰ Timeout reached, stopping streaming")
5871 timeout_reached = True
5872 break
5873
5874 if len(buffer) == self.config.neuron.streaming_batch_size:
5875 joined_buffer = "".join(buffer)
5876 temp_completion += joined_buffer
5877 bt.logging.debug(f"Streamed tokens: {joined_buffer}")
5878
5879 await send(
5880 {
5881 "type": "http.response.body",
5882 "body": joined_buffer.encode("utf-8"),
5883 "more_body": True,
5884 }
5885 )
5886 buffer = []
5887
5888 if (
5889 buffer and not timeout_reached
5890 ): # Don't send the last buffer of data if timeout.
5891 joined_buffer = "".join(buffer)
5892 await send(
5893 {
5894 "type": "http.response.body",
5895 "body": joined_buffer.encode("utf-8"),
5896 "more_body": False,
5897 }
5898 )
5899
5900 except Exception as e:
5901 bt.logging.error(f"Error in forward: {e}")
5902 if self.config.neuron.stop_on_forward_exception:
5903 self.should_exit = True
5904
5905 bt.logging.debug(f"📧 Query received, forwarding synapse: {synapse}")
5906
5907 prompt = PromptTemplate.from_template(
5908 "{query}"
5909 )
5910 chain = prompt | self.model | StrOutputParser()
5911
5912 query = synapse.query
5913
5914 chain_formatter = {"query": query}
5915
5916 init_time = time.time()
5917 timeout_threshold = synapse.timeout
5918
5919 token_streamer = partial(
5920 _forward,
5921 self,
5922 query,
5923 init_time,
5924 timeout_threshold,
5925 chain,
5926 chain_formatter,
5927 )
5928 return synapse.create_streaming_response(token_streamer)
5929
5930
5931---
5932File: /coding/miners/qwen_mistral_miner.py
5933---
5934
5935import json
5936import time
5937import traceback
5938import bittensor as bt
5939from typing import Awaitable
5940from functools import partial
5941from langchain.prompts import PromptTemplate
5942from langchain_openai import OpenAI, ChatOpenAI
5943from coding.protocol import StreamCodeSynapse
5944from coding.helpers import chain_forward, string_forward
5945
5946
5947def parse_diff(diff_string):
5948 lines = diff_string.splitlines()
5949 file_diffs = {}
5950 current_file = None
5951 diff_content = []
5952 is_diff_block = False
5953
5954 for line in lines:
5955 if "diff --git" in line:
5956 if current_file and diff_content:
5957 file_diffs[current_file] = "\n".join(diff_content)
5958 current_file = line.split()[-1]
5959 diff_content = []
5960 is_diff_block = False
5961 elif line.startswith("---") or line.startswith("+++"):
5962 # Ignore these lines, as they indicate the old/new file path
5963 continue
5964 elif line.startswith("@@"):
5965 is_diff_block = True
5966 continue
5967 elif is_diff_block:
5968 diff_content.append(line)
5969
5970 if current_file and diff_content:
5971 file_diffs[current_file] = "\n".join(diff_content)
5972
5973 return file_diffs
5974
5975
5976def miner_init(self):
5977 """
5978 Initializes the miner. This function is called once when the miner is created.
5979 """
5980
5981 def model_factory(
5982 api_base="http://localhost:8000/v1",
5983 model_name=self.config.neuron.model_id,
5984 max_tokens=4096,
5985 temperature=0.7,
5986 top_p=1.0,
5987 chat=False,
5988 ):
5989 if chat:
5990 return ChatOpenAI(
5991 openai_api_base=api_base,
5992 openai_api_key="EMPTY",
5993 model_name=model_name,
5994 max_tokens=max_tokens,
5995 temperature=temperature,
5996 top_p=top_p,
5997 streaming=True,
5998 )
5999 return OpenAI(
6000 openai_api_base=api_base,
6001 openai_api_key="EMPTY",
6002 model_name=model_name,
6003 max_tokens=max_tokens,
6004 temperature=temperature,
6005 top_p=top_p,
6006 streaming=True,
6007 )
6008
6009 self.model_factory = model_factory
6010
6011 self.model = model_factory(chat=True)
6012 self.mistral = model_factory(
6013 api_base="http://localhost:8001/v1",
6014 model_name="thesven/Mistral-7B-Instruct-v0.3-GPTQ",
6015 chat=True,
6016 )
6017
6018
6019def miner_process(self, synapse: StreamCodeSynapse) -> Awaitable:
6020 """
6021 The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
6022 """
6023
6024 if synapse.messages:
6025 query = synapse.messages[-1].content
6026
6027 extra_info = {}
6028 stop = None
6029 chain = None
6030 chain_formatter = None
6031 query = synapse.query
6032
6033 bt.logging.debug(f"📧 Query received, forwarding synapse: {synapse}")
6034 if "<|fim_hole|>" in synapse.query and not synapse.files:
6035 chain = self.model_factory(chat=False)
6036 chain_formatter = f"<fim_prefix>{synapse.query.replace('<|fim_hole|>', '<fim_suffix>')}<fim_middle>"
6037 stop = [
6038 "<fim_prefix>",
6039 "<fim_suffix>",
6040 "<fim_middle>",
6041 "//",
6042 "<|end▁of▁sentence|>",
6043 "\n\n",
6044 "\r\n\r\n",
6045 "/src/",
6046 "#- coding: utf-8",
6047 "```",
6048 "\ndef",
6049 "\nclass",
6050 '\n"""#',
6051 ]
6052 elif synapse.messages and synapse.files:
6053 chain = self.model
6054 for file in synapse.files:
6055 file.content = file.content.replace("}", "}}").replace("{", "{{")
6056 filestring += f"#{file.path}\n{file.content}\n"
6057 chain_formatter = synapse.messages + [
6058 {"role": "user", "content": f"{filestring}\n{synapse.query}"}
6059 ]
6060 elif synapse.messages:
6061 chain = self.model
6062 synapse.messages[0].role = "user"
6063 chain_formatter = [msg.dict() for msg in synapse.messages]
6064 elif "The following issue is:\n\n" in synapse.query:
6065 # this is a SWE-Bench style task
6066 prompt = synapse.query + "\n"
6067 for file in synapse.files:
6068 prompt += f"#Filename: {file.path}\n{file.content}\n"
6069 prompt += "Respond only with the patch, only modify the files you have been provided."
6070 model_res = (
6071 self.mistral.invoke([{"role": "user", "content": prompt[0:15000]}])
6072 .content.replace("<patch>", "")
6073 .replace("</patch>", "")
6074 .replace("b/", "")
6075 .replace("a/", "")
6076 )
6077 if "```" in model_res:
6078 model_res = model_res.split("```")[1]
6079 model_res = json.dumps(parse_diff(model_res))
6080 return synapse.create_streaming_response(partial(string_forward, model_res))
6081 elif synapse.files and "<|fim_hole|>" in synapse.query:
6082 chain = self.model_factory(chat=False)
6083 string = ""
6084 for file in synapse.files:
6085 if "path" not in file:
6086 file.path = ""
6087 string += f"<file_sep>{file.path}\n{file.content}\n"
6088 chain_formatter = (
6089 string
6090 + "<fim_prefix>"
6091 + synapse.query.replace("<|fim_hole|>", "<fim_middle>")
6092 )
6093 elif "write code to" in synapse.query:
6094 string = ""
6095 chain = self.mistral
6096 for file in synapse.files:
6097 if "path" not in file:
6098 file.path = ""
6099 string += f"{file.path}\n{file.content}\n"
6100 if string:
6101 "Using the above files, and responding only with python code \n"
6102 chain_formatter = string + synapse.query
6103 else:
6104 chain = self.model
6105 chain_formatter = synapse.query
6106 if stop:
6107 self.model = self.model.bind(stop=stop)
6108 if not chain:
6109 prompt = PromptTemplate.from_template("{query}")
6110 chain = prompt | self.model
6111
6112 init_time = time.time()
6113 timeout_threshold = synapse.timeout
6114
6115 streamer = partial(
6116 chain_forward,
6117 self,
6118 synapse.query,
6119 synapse.files,
6120 extra_info,
6121 init_time,
6122 timeout_threshold,
6123 chain,
6124 chain_formatter,
6125 )
6126 return synapse.create_streaming_response(streamer)
6127
6128
6129
6130---
6131File: /coding/miners/swe.py
6132---
6133
6134import os
6135from coding.protocol import LogicSynapse
6136
6137def miner_process(self, synapse: LogicSynapse) -> LogicSynapse:
6138 """
6139 The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
6140 """
6141 logic = {}
6142 test_submission_dir = ""
6143
6144 # Read all files in test-submission directory
6145 for root, dirs, files in os.walk(test_submission_dir):
6146 # Skip __pycache__ directories
6147 if '__pycache__' in dirs:
6148 dirs.remove('__pycache__')
6149
6150 # Get relative path from test_submission_dir
6151 rel_path = os.path.relpath(root, test_submission_dir)
6152
6153 # Process all files in current directory
6154 for filename in files:
6155 # Skip __pycache__ files
6156 if '__pycache__' in filename:
6157 continue
6158
6159 file_path = os.path.join(root, filename)
6160 # Get the relative path for the logic dict key
6161 if rel_path == '.':
6162 logic_key = filename
6163 else:
6164 logic_key = os.path.join(rel_path, filename)
6165
6166 with open(file_path, 'r', encoding='latin-1') as f:
6167 logic[logic_key] = f.read()
6168 synapse.logic = logic
6169 return synapse
6170
6171
6172
6173---
6174File: /coding/rewards/__init__.py
6175---
6176
6177from .reward import (
6178 BaseRewardModel,
6179 RewardResult,
6180 RewardEvent,
6181 BatchRewardOutput,
6182 RewardModelTypeEnum,
6183)
6184# from .pipeline import RewardPipeline
6185from .diffsim import DiffSimModel
6186from .codesim import CodeSimModel
6187from .speed import SpeedModel
6188from .validcode import ValidCodeModel
6189
6190
6191
6192---
6193File: /coding/rewards/codesim.py
6194---
6195
6196import time
6197import numpy as np
6198from typing import List
6199from code_bert_score import BERTScorer
6200
6201from .reward import (
6202 BaseRewardModel,
6203 BatchRewardOutput,
6204 RewardModelTypeEnum,
6205)
6206from coding.helpers.cosine import normalize_cosim
6207
6208
6209class CodeSimModel(BaseRewardModel):
6210 @property
6211 def name(self) -> str:
6212 return "codesim"
6213
6214 def __init__(self, code_scorer=None, **kwargs):
6215 super().__init__()
6216 if code_scorer is None:
6217 self.code_scorer = BERTScorer(lang="python")
6218 else:
6219 self.code_scorer = code_scorer
6220
6221 def similarity(self, reference: str, completion: str) -> float:
6222 if not reference:
6223 return 0
6224 if not completion:
6225 return 0
6226 P, R, F1 = self.code_scorer.score([completion], [reference])
6227 score = F1.tolist()[0]
6228 return normalize_cosim(score)
6229
6230 def similarity_batch(self, references: str|list, completions: List[str]) -> List[float]:
6231 if not references or not completions:
6232 return [0] * len(completions)
6233
6234 # Filter out None or empty strings and keep track of their indices
6235 valid_completions = [(idx, comp) for idx, comp in enumerate(completions) if comp]
6236 if not valid_completions:
6237 return [0] * len(completions)
6238
6239 # Unzip the indices and valid completions
6240 indices, filtered_completions = zip(*valid_completions)
6241
6242 if not isinstance(references, list):
6243 references = [references] * len(filtered_completions)
6244
6245 # Score only the valid completions
6246 P, R, F1 = self.code_scorer.score(filtered_completions, references)
6247 scores = F1.tolist()
6248
6249 # Initialize a result list with zeros for all completions
6250 result_scores = [0] * len(completions)
6251
6252 # Place the normalized scores back in their original positions
6253 for idx, score in zip(indices, scores):
6254 if score > 1:
6255 score = 1.0
6256 result_scores[idx] = normalize_cosim(score)
6257
6258 return result_scores
6259
6260 def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6261 """
6262 Get the score between a reference string and a list of completion strings.
6263 """
6264
6265 t0 = time.time()
6266 rewards = self.similarity_batch(reference, completions)
6267 total_time = time.time() - t0
6268 timings = [total_time] * len(completions) # Assuming equal distribution of time for each completion
6269
6270 output = BatchRewardOutput(
6271 rewards=rewards,
6272 timings=timings,
6273 extra_info={}
6274 )
6275
6276 return output
6277
6278
6279---
6280File: /coding/rewards/debugrun.py
6281---
6282
6283
6284
6285
6286---
6287File: /coding/rewards/diffsim.py
6288---
6289
6290import time
6291import difflib
6292from typing import List
6293from .reward import (
6294 BaseRewardModel,
6295 BatchRewardOutput,
6296 RewardModelTypeEnum,
6297)
6298from coding.helpers.cosine import normalize_cosim
6299
6300
6301class DiffSimModel(BaseRewardModel):
6302 @property
6303 def name(self) -> str:
6304 return "diffsim"
6305
6306 def __init__(self):
6307 super().__init__()
6308
6309 def similarity(self, reference: str, completion: str) -> float:
6310 if not completion:
6311 return 0
6312 sequence_matcher = difflib.SequenceMatcher(None, reference, completion)
6313 score = sequence_matcher.ratio()
6314 return normalize_cosim(score)
6315
6316 def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6317 """
6318 Get the score between two strings.
6319 """
6320 rewards = []
6321 timings = []
6322 for completion in completions:
6323 t0 = time.time()
6324 rewards.append(self.similarity(reference, completion))
6325 timings.append(time.time() - t0)
6326 output = BatchRewardOutput(
6327 rewards=rewards,
6328 timings=timings,
6329 extra_info={}
6330 )
6331
6332 return output
6333
6334
6335
6336---
6337File: /coding/rewards/pipeline.py
6338---
6339
6340# The MIT License (MIT)
6341# Copyright © 2024 Yuma Rao
6342# Copyright © 2023 Opentensor Foundation
6343# Copyright © 2024 Macrocosmos
6344# Copyright © 2024 Brokespace
6345
6346
6347# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6348# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6349# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6350# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6351
6352# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6353# the Software.
6354
6355# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6356# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6357# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6358# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6359# DEALINGS IN THE SOFTWARE.
6360
6361from typing import List, Callable, Any
6362
6363from coding.tasks import TASKS
6364from .reward import BaseRewardModel
6365from .codesim import CodeSimModel
6366from .speed import SpeedModel
6367from .diffsim import DiffSimModel
6368from .validcode import ValidCodeModel
6369
6370REWARD_MODELS = {
6371 "codesim": CodeSimModel,
6372 DiffSimModel().name: DiffSimModel,
6373 SpeedModel().name: SpeedModel,
6374 ValidCodeModel().name: ValidCodeModel,
6375 "self": None
6376}
6377
6378
6379class RewardPipeline:
6380 def __init__(self, selected_tasks: List[str], device: str, code_scorer):
6381 self.selected_tasks = selected_tasks
6382 self.device = device
6383 self.code_scorer = code_scorer
6384 self.validate_tasks()
6385 self.load_reward_pipeline()
6386
6387 def __getitem__(self, __key: str) -> BaseRewardModel:
6388 return self.reward_models.get(__key)
6389
6390 def get(self, __key: str) -> BaseRewardModel:
6391 return self.reward_models.get(__key)
6392
6393 def keys(self) -> List[str]: #TODO this might not be the right return type
6394 return self.reward_models.keys()
6395
6396 def __repr__(self):
6397 return f"RewardPipeline({self.reward_models})"
6398
6399 def validate_tasks(self):
6400 for task in self.selected_tasks:
6401 if task not in TASKS:
6402 raise ValueError(
6403 f"Task {task} not supported. Please choose from {TASKS.keys()}"
6404 )
6405 # Check that the reward_definition and penalty_definition are lists of dictionaries whose weights sum to one
6406 self._check_weights(task, "reward_definition", expected_weight=1)
6407 self._check_weights(task, "penalty_definition", expected_weight=None)
6408
6409 def _check_weights(self, task, definition, expected_weight):
6410 total_weight = 0
6411
6412 model_infos = getattr(TASKS[task], definition)
6413
6414 for model_info in model_infos:
6415 if not isinstance(model_info, dict):
6416 raise ValueError(
6417 f"{definition} model {model_info} is not a dictionary."
6418 )
6419 if "weight" not in model_info:
6420 raise ValueError(
6421 f"{definition} model {model_info} does not have a weight."
6422 )
6423
6424 weight = model_info["weight"]
6425 if not isinstance(weight, (float, int)):
6426 raise ValueError(
6427 f"{definition} model {model_info} weight is not a float."
6428 )
6429 if not 0 <= weight <= 1:
6430 raise ValueError(
6431 f"{definition} model {model_info} weight is not between 0 and 1."
6432 )
6433
6434 total_weight += weight
6435
6436 if (
6437 model_infos
6438 and expected_weight is not None
6439 and total_weight != expected_weight
6440 ):
6441 raise ValueError(
6442 f"{definition} model {model_infos} weights do not sum to {expected_weight} (sum={total_weight})"
6443 )
6444
6445 def load_reward_pipeline(self):
6446 """Dynamically loads the reward models required by the selected tasks so that we only use the necessary resources."""
6447 active_reward_models = []
6448
6449 for task in self.selected_tasks:
6450 active_reward_models += TASKS[task].reward_definition
6451 active_reward_models += TASKS[task].penalty_definition
6452
6453 # Instantiate only the required reward models
6454 reward_models = {}
6455 for model in active_reward_models:
6456 name = model.get("name")
6457 if not name:
6458 raise ValueError(f"Reward model {model} does not have a name. ")
6459 if name not in REWARD_MODELS.keys():
6460 raise ValueError(
6461 f"Reward model {name} not supported. Please choose from {REWARD_MODELS.keys()}"
6462 )
6463 elif name in reward_models: # Prevents duplicate reward models
6464 continue
6465
6466 cls = REWARD_MODELS[name]
6467 if name == "self":
6468 reward_models[name] = "self"
6469 continue
6470 params = {k: v for k, v in model.items() if k not in ["name", "weight"]}
6471 reward_models[name] = cls(device=self.device, code_scorer=self.code_scorer, **params)
6472
6473 self.reward_models = reward_models
6474
6475
6476---
6477File: /coding/rewards/reward.py
6478---
6479
6480# The MIT License (MIT)
6481# Copyright © 2024 Yuma Rao
6482# Copyright © 2023 Opentensor Foundation
6483# Copyright © 2024 Macrocosmos
6484# Copyright © 2024 Broke
6485
6486
6487# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6488# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6489# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6490# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6491
6492# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6493# the Software.
6494
6495# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6496# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6497# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6498# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6499# DEALINGS IN THE SOFTWARE.
6500
6501import time
6502import numpy as np
6503from enum import Enum
6504from abc import ABC, abstractmethod
6505from dataclasses import dataclass
6506from typing import List, Any, Union, Dict
6507
6508class RewardModelTypeEnum(Enum):
6509 WEIGHTED_REWARD = "reward"
6510 FILTER_REWARD = "filter"
6511 PENALTY = "penalty"
6512
6513@dataclass
6514class RewardEvent(ABC):
6515 """Contains rewards for all the responses in a batch"""
6516
6517 model_name: str
6518 rewards: Any
6519 rewards_normalized: Any
6520 timings: Any
6521 model_type: RewardModelTypeEnum
6522 batch_time: float
6523 extra_info: dict
6524
6525 # implement custom asdict to return a dict with the same keys as the dataclass using the model name
6526 def asdict(self) -> dict:
6527 return {
6528 f"{self.model_name}_raw_{self.model_type.value}": self.rewards.tolist(),
6529 f"{self.model_name}_{self.model_type.value}": self.rewards_normalized,
6530 f"{self.model_name}_{self.model_type.value}_timings": self.timings,
6531 f"{self.model_name}_{self.model_type.value}_batch_time": self.batch_time,
6532 f"{self.model_name}_{self.model_type.value}_extra_info": self.extra_info,
6533 }
6534
6535
6536class RewardResult:
6537 def __init__(self, reward_pipeline, task, response_event, device):
6538 """Passes the responses through the reward models and calculates the total reward
6539
6540 Args:
6541 reward_pipeline (RewardPipeline): List of all loaded/ative reward models
6542 task (Task): Task instance which contains reward_definition (list of reward model requirements) and a reference answer (str)
6543 response_event (DendriteResponseEvent): Network responses to the prompt
6544 device (str): Device to run the reward models on
6545 """
6546 self.reward_pipeline = reward_pipeline
6547 self.task = task
6548 self.response_event = response_event
6549 self.device = device
6550 self.task_rewards = task.reward_definition
6551 self.task_penalties = task.penalty_definition
6552 self.reward_events = self.reward_responses(
6553 reference=task.reference,
6554 models=self.task_rewards,
6555 reward_type=RewardModelTypeEnum.WEIGHTED_REWARD,
6556 task=task,
6557 )
6558 self.penalty_events = self.reward_responses(
6559 reference=task.reference,
6560 models=self.task_penalties,
6561 reward_type=RewardModelTypeEnum.PENALTY,
6562 task=task,
6563 )
6564 self.rewards = self.total_reward()
6565
6566 def __state_dict__(self):
6567 state = {"rewards": self.rewards.tolist(), "highest_reward": self.rewards.max(), "average_reward": self.rewards.mean(), "task_name": self.task.name}
6568 for event in self.reward_events + self.penalty_events:
6569 state.update(event.asdict())
6570 return state
6571
6572 def reward_responses(
6573 self, reference: Union[str, List[str], Dict], models: List[dict], reward_type: RewardModelTypeEnum, task
6574 ) -> List[RewardEvent]:
6575 """Calculates the rewards for the responses given the task and returns a RewardEvent for each reward model
6576 reward_events: List[RewardEvent] = [
6577 RewardEvent(model_name='rouge', rewards=torch.zeros(50), timings=torch.zeros(50), ...),
6578 RewardEvent(model_name='relevance', rewards=torch.zeros(50), timings=torch.zeros(50), ...),
6579 ]
6580 """
6581 reward_events = []
6582 ref = reference
6583 for reward_info in models:
6584 # Select the reward model from preloaded reward model pipeline
6585 reward_model = self.reward_pipeline.get(reward_info["name"])
6586 if not reward_model:
6587 raise ValueError(
6588 f"Reward model {reward_info['name']} not supported. Please choose from {self.reward_pipeline.keys()}"
6589 )
6590 if isinstance(reference, dict):
6591 ref = reference.get(reward_info["name"])
6592
6593 if reward_model == "self":
6594 reward_event = self.task.reward_apply(self.response_event, reward_type=reward_type)
6595 else:
6596 # Compute the rewards for the responses given the prompt
6597 reward_event = reward_model.apply(
6598 ref, self.response_event, reward_type=reward_type, task=task
6599 )
6600 reward_events.append(reward_event)
6601
6602 return reward_events
6603
6604 def total_reward(self):
6605 """Combines the rewards from all the reward models into a single reward tensor"""
6606 # Compute the rewards for the responses given the prompt
6607 rewards = np.zeros_like(self.response_event.uids, dtype=np.float64)
6608 for event in self.reward_events:
6609 for reward_info in filter(lambda x: x["name"] == event.model_name, self.task_rewards):
6610 rewards += reward_info["weight"] * event.rewards
6611
6612 for event in self.penalty_events:
6613 for reward_info in filter(lambda x: x["name"] == event.model_name, self.task_penalties):
6614 rewards *= 1 - reward_info["weight"] * event.rewards
6615
6616 return rewards
6617
6618 def __str__(self):
6619 return f"{self.__class__.__name__}(rewards={self.rewards!r}, reward_events={self.reward_events!r}, penalty_events={self.penalty_events!r})"
6620
6621@dataclass
6622class BatchRewardOutput():
6623 rewards: Any
6624 timings: Any
6625 extra_info: dict
6626
6627 def __post_init__(self):
6628 self.rewards = np.asarray(self.rewards)
6629 self.timings = np.asarray(self.timings)
6630 if self.rewards.shape != self.timings.shape:
6631 raise ValueError(
6632 f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}"
6633 )
6634
6635 self.rewards_normalized = (self.rewards - self.rewards.min()) / (
6636 self.rewards.max() - self.rewards.min() + 1e-6
6637 )
6638 self.rewards_normalized = self.rewards_normalized.tolist()
6639
6640
6641class BaseRewardModel(ABC):
6642 @property
6643 @abstractmethod
6644 def name(self) -> str:
6645 ...
6646
6647 @abstractmethod
6648 def __init__(self, **kwargs):
6649 pass
6650
6651 @abstractmethod
6652 def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6653 pass
6654
6655 def apply(self, reference: str, response_event, reward_type, task) -> RewardEvent:
6656 t0 = time.time()
6657 if self.name == "speed":
6658 batch_rewards_output = self.reward(response_event.timings)
6659 elif self.name == "validcode":
6660 if "<|fim_hole|>" in task.query:
6661 batch_rewards_output = self.reward(task.context.content, [task.query.replace("<|fim_hole|>", completion) for completion in response_event.completions], task.context.topic)
6662 else:
6663 batch_rewards_output = self.reward(task.context.content, response_event.completions, task.context.topic)
6664 # elif self.name == "debugrun": #TODO remove
6665 # batch_rewards_output = self.reward(task, response_event)
6666 else:
6667 batch_rewards_output = self.reward(reference, response_event.completions)
6668 batch_rewards_time = time.time() - t0
6669
6670 return RewardEvent(
6671 model_name=self.name,
6672 rewards=batch_rewards_output.rewards,
6673 rewards_normalized=batch_rewards_output.rewards_normalized,
6674 model_type=reward_type,
6675 batch_time=batch_rewards_time,
6676 extra_info=batch_rewards_output.extra_info,
6677 timings=batch_rewards_output.timings,
6678 )
6679
6680 def __repr__(self):
6681 return f"{self.__class__.__name__}(name={self.name})"
6682
6683
6684
6685---
6686File: /coding/rewards/speed.py
6687---
6688
6689import math
6690import time
6691from typing import List
6692from .reward import (
6693 BaseRewardModel,
6694 BatchRewardOutput,
6695)
6696
6697
6698class SpeedModel(BaseRewardModel):
6699 @property
6700 def name(self) -> str:
6701 return "speed"
6702
6703 def __init__(self, ideal_time: float = 12.0, decay_rate: float = 1.0, **kwargs):
6704 super().__init__()
6705 self.ideal_time = ideal_time
6706 self.decay_rate = decay_rate
6707
6708 def score_time(self, time_taken: float) -> float:
6709 """
6710 Calculates a score from 0 to 1 based on how fast an event occurs.
6711 The score decreases exponentially as the time taken increases beyond the ideal time.
6712
6713 :param time_taken: Time taken for the event in seconds.
6714 :param ideal_time: Ideal time for the event in seconds.
6715 :return: Score between 0 and 1.
6716 """
6717 if time_taken <= 0 or self.ideal_time <= 0:
6718 raise ValueError("Time taken and ideal time must be positive values.")
6719
6720 # Calculate the score using an exponential decay function
6721 score = math.exp(-self.decay_rate * (time_taken - self.ideal_time) / self.ideal_time)
6722
6723 # Ensure the score is between 0 and 1
6724 return max(0, min(1, score))
6725
6726 def reward(self, times) -> BatchRewardOutput:
6727 """Get the score between two strings.
6728 """
6729
6730 rewards = []
6731 timings = []
6732
6733 for time_taken in times:
6734 t0 = time.time()
6735 rewards.append(self.score_time(time_taken))
6736 timings.append(time.time() - t0)
6737
6738 output = BatchRewardOutput(
6739 rewards=rewards,
6740 timings=timings,
6741 extra_info={"ideal_time": self.ideal_time},
6742 )
6743
6744 return output
6745
6746
6747---
6748File: /coding/rewards/validcode.py
6749---
6750
6751import ast
6752import time
6753import autopep8
6754from typing import List
6755from .reward import (
6756 BaseRewardModel,
6757 BatchRewardOutput,
6758 RewardModelTypeEnum,
6759)
6760
6761def fix_python_spacing(code_str):
6762 fixed_code = autopep8.fix_code(code_str)
6763 return fixed_code
6764
6765
6766class ValidCodeModel(BaseRewardModel):
6767 @property
6768 def name(self) -> str:
6769 return "validcode"
6770
6771 def __init__(self, **kwargs):
6772 super().__init__()
6773
6774 def score(self, reference: str, completions: List[str], language: str) -> List[float]:
6775 """
6776 Get the score between a reference string and a list of completion strings.
6777 """
6778 scores = []
6779 if language != "Python":
6780 return [0] * len(completions)
6781 for completion in completions:
6782 # Check if reference is valid python code
6783 try:
6784 ast.parse(reference)
6785 except SyntaxError:
6786 scores.append(0) # Invalid reference code, so we dont score it
6787 continue
6788 try:
6789 ast.parse(fix_python_spacing(completion))
6790 scores.append(0) # Valid Python code
6791 except SyntaxError:
6792 scores.append(0.6) # Invalid Python code
6793 return scores
6794
6795 def reward(self, reference: str, completions: List[str], language: str) -> BatchRewardOutput:
6796 """
6797 Get the score between a reference string and a list of completion strings.
6798 """
6799
6800 t0 = time.time()
6801 rewards = self.score(reference, completions, language)
6802 total_time = time.time() - t0
6803 timings = [total_time] * len(
6804 completions
6805 ) # Assuming equal distribution of time for each completion
6806
6807 output = BatchRewardOutput(rewards=rewards, timings=timings, extra_info={})
6808
6809 return output
6810
6811
6812
6813---
6814File: /coding/schemas/__init__.py
6815---
6816
6817from .context import Context
6818from .file import File
6819from .package import *
6820from .chat import *
6821from .swe import *
6822
6823
6824---
6825File: /coding/schemas/chat.py
6826---
6827
6828from strenum import StrEnum
6829from pydantic import BaseModel
6830
6831class ChatRole(StrEnum):
6832 """The role identifying who sent a chat message"""
6833
6834 SYSTEM = "system"
6835 ASSISTANT = "assistant"
6836 USER = "user"
6837
6838class ChatMessage(BaseModel):
6839 role: ChatRole
6840 content: str
6841
6842
6843---
6844File: /coding/schemas/context.py
6845---
6846
6847# The MIT License (MIT)
6848# Copyright © 2024 Yuma Rao
6849# Copyright © 2023 Opentensor Foundation
6850# Copyright © 2024 Macrocosmos
6851# Copyright © 2024 Broke
6852
6853
6854# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6855# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6856# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6857# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6858
6859# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6860# the Software.
6861
6862# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6863# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6864# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6865# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6866# DEALINGS IN THE SOFTWARE.
6867
6868from typing import List, Any, Dict
6869from pydantic import BaseModel
6870
6871from .file import File
6872from .chat import ChatMessage
6873
6874class Context(BaseModel):
6875 title: str = ""
6876 topic: str = ""
6877 content: str = ""
6878 internal_links: List[str] = []
6879 external_links: List[str] = []
6880 source: str = ""
6881 tags: List[str] = None
6882 extras: Dict[str, Any] = None
6883 files: List[File] = None
6884 messages: List[ChatMessage] = []
6885
6886
6887
6888---
6889File: /coding/schemas/file.py
6890---
6891
6892from pydantic import BaseModel
6893
6894class File(BaseModel):
6895 path: str
6896 content: str
6897
6898
6899---
6900File: /coding/schemas/model.py
6901---
6902
6903from pydantic import BaseModel
6904
6905class Model(BaseModel):
6906 model_name: str
6907 # prompt_tokens: dict
6908 # hash: str
6909 competition_id: int
6910 block: int
6911
6912
6913 def to_compressed_str(self) -> str:
6914 return f"{self.model_name}-{self.competition_id}-{self.block}"
6915
6916 @classmethod
6917 def from_compressed_str(cls, compressed_str: str) -> "Model":
6918 model_name, competition_id, block = compressed_str.split("-")
6919 return cls(model_name=model_name, competition_id=int(competition_id), block=int(block))
6920
6921
6922
6923
6924
6925---
6926File: /coding/schemas/package.py
6927---
6928
6929import re
6930import random
6931import string
6932from typing import List, Dict
6933from pydantic import BaseModel
6934
6935from .file import File
6936
6937class Package(BaseModel):
6938 files: List[File]
6939
6940 def update_file(self, new_file: File):
6941 for i, file in enumerate(self.files):
6942 if file.path == new_file.path:
6943 self.files[i] = new_file
6944 return
6945 raise ValueError(f"File with path {new_file.path} not found in package.")
6946
6947
6948class ObscurePackage(Package):
6949 mapping: Dict = {}
6950
6951 def obscure_package(self):
6952 mapping = {}
6953
6954 # Obscure file paths
6955 for file in self.files:
6956 new_path = self._generate_random_string(len(file.path))+".py"
6957 mapping[file.path] = new_path
6958 file.path = new_path
6959
6960 # Obscure classes and contents
6961 for file in self.files:
6962 file.content, class_mapping = self._obscure_classes(file.content)
6963 mapping.update(class_mapping)
6964 file.content = self._obscure_contents(file.content, mapping)
6965
6966 self.mapping = mapping
6967
6968 def undo_obscure_package(self):
6969 if hasattr(self, 'mapping'):
6970 # Undo obscuring file paths
6971 reverse_mapping = {v: k for k, v in self.mapping.items()}
6972 for file in self.files:
6973 if file.path in reverse_mapping:
6974 file.path = reverse_mapping[file.path]
6975
6976 # Undo obscuring classes and contents
6977 for file in self.files:
6978 file.content = self._undo_obscure_contents(file.content, reverse_mapping)
6979 file.content = self._undo_obscure_classes(file.content, reverse_mapping)
6980
6981 del self.mapping
6982
6983 def obscure_string(self, script: str):
6984 if not hasattr(self, 'mapping'):
6985 raise ValueError("Package must be obscured before obscuring a script string.")
6986
6987 script, class_mapping = self._obscure_classes(script)
6988 script = self._obscure_contents(script, self.mapping)
6989 script = self._obscure_contents(script, class_mapping)
6990 return script
6991
6992 def undo_obscure_string(self, script: str):
6993 if not hasattr(self, 'mapping'):
6994 raise ValueError("Package must be obscured before undoing obscuring a script string.")
6995
6996 reverse_mapping = {v: k for k, v in self.mapping.items()}
6997 script = self._undo_obscure_contents(script, reverse_mapping)
6998 script = self._undo_obscure_classes(script, reverse_mapping)
6999 return script
7000
7001 def _generate_random_string(self, length):
7002 return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
7003
7004 def _generate_random_class_name(self, length):
7005 return ''.join(random.choices(string.ascii_uppercase, k=1) + random.choices(string.ascii_lowercase, k=length-1))
7006
7007
7008 def _obscure_contents(self, contents, mapping):
7009 for original, obscure in mapping.items():
7010 contents = re.sub(r'\b' + re.escape(original) + r'\b', obscure, contents)
7011 return contents
7012
7013 def _undo_obscure_contents(self, contents, reverse_mapping):
7014 for obscure, original in reverse_mapping.items():
7015 contents = re.sub(r'\b' + re.escape(obscure) + r'\b', original, contents)
7016 return contents
7017
7018 def _obscure_classes(self, contents):
7019 class_pattern = r'\bclass\s+(\w+)'
7020 class_names = re.findall(class_pattern, contents)
7021 class_mapping = {}
7022 for class_name in class_names:
7023 new_class_name = self._generate_random_class_name(len(class_name))
7024 class_mapping[class_name] = new_class_name
7025 contents = re.sub(r'\b' + re.escape(class_name) + r'\b', new_class_name, contents)
7026 return contents, class_mapping
7027
7028 def _undo_obscure_classes(self, contents, reverse_mapping):
7029 class_names = list(reverse_mapping.keys())
7030 for obscure_name in class_names:
7031 original_name = reverse_mapping[obscure_name]
7032 contents = re.sub(r'\b' + re.escape(obscure_name) + r'\b', original_name, contents)
7033 return contents
7034
7035
7036---
7037File: /coding/schemas/swe.py
7038---
7039
7040from pydantic import BaseModel
7041
7042
7043class Edit(BaseModel):
7044 file_name: str
7045 line_number: int
7046 line_content: str
7047 new_line_content: str
7048
7049class Patch(BaseModel):
7050 edits: list[Edit]
7051
7052
7053---
7054File: /coding/schemas/tracking.py
7055---
7056
7057from typing import List
7058from pydantic import BaseModel
7059
7060from .model import Model
7061
7062class TrackingInfo(BaseModel):
7063 logic: dict
7064 block: int
7065 hotkey: str
7066 uid: int
7067 score: float = 0.0
7068
7069
7070
7071---
7072File: /coding/tasks/__init__.py
7073---
7074
7075# The MIT License (MIT)
7076# Copyright © 2024 Yuma Rao
7077# Copyright © 2023 Opentensor Foundation
7078# Copyright © 2024 Macrocosmos
7079# Copyright © 2024 Broke
7080
7081
7082# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
7083# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
7084# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7085# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7086
7087# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
7088# the Software.
7089
7090# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
7091# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7092# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
7093# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
7094# DEALINGS IN THE SOFTWARE.
7095
7096import random
7097from typing import Callable
7098
7099from .task import Task
7100from .swe import SWEBenchTask
7101# from .debug import DebugTask
7102from .fim import FillInMiddleTask
7103from .repofile import RepoFileTask
7104from .repo import RepoCompletionTask
7105from .completion import CompletionTask
7106from .bigcodebench import BigCodeBenchTask
7107from .organic_convo import OrganicConvoTask
7108from .bigcodebench import BigCodeBenchTask
7109
7110TASKS = {
7111 RepoCompletionTask.name: RepoCompletionTask,
7112 FillInMiddleTask.name: FillInMiddleTask,
7113 CompletionTask.name: CompletionTask,
7114 RepoFileTask.name: RepoFileTask,
7115 # DebugTask.name: DebugTask,
7116 SWEBenchTask.name: SWEBenchTask,
7117}
7118
7119from coding.schemas import Context
7120from coding.helpers import Selector
7121from coding.protocol import StreamCodeSynapse
7122from coding.datasets import TheStackDataset, PipDataset, SWEBenchDataset, DatasetManager
7123
7124TASK_REGISTRY = {
7125 RepoCompletionTask.name: [TheStackDataset.name],
7126 FillInMiddleTask.name: [TheStackDataset.name],
7127 CompletionTask.name: [TheStackDataset.name],
7128 RepoFileTask.name: [TheStackDataset.name],
7129 # DebugTask.name: [PipDataset.name],
7130 SWEBenchTask.name: [SWEBenchDataset.name],
7131}
7132
7133
7134def create_task(
7135 llm,
7136 task_name: str,
7137 selector: Selector = random.choice,
7138 repl = None,
7139 code_scorer: Callable = None,
7140 dataset_manager: DatasetManager = None
7141) -> Task:
7142 """Create a task from the given task name and LLM pipeline.
7143
7144 Args:
7145 llm (Pipeline): Pipeline to use for text generation
7146 task_name (str): Name of the task to create
7147 selector (Selector, optional): Selector function to choose a dataset. Defaults to random.choice.
7148
7149 Raises:
7150 ValueError: If task_name is not a valid alias for a task, or if the task is not a subclass of Task
7151 ValueError: If no datasets are available for the given task
7152 ValueError: If the dataset for the given task is not found
7153
7154 Returns:
7155 Task: Task instance
7156 """
7157 task = TASKS.get(task_name, None)
7158 if task is None or not issubclass(task, Task):
7159 raise ValueError(f"Task {task_name} not found")
7160
7161 dataset_choices = TASK_REGISTRY.get(task_name, None)
7162 if len(dataset_choices) == 0:
7163 raise ValueError(f"No datasets available for task {task_name}")
7164 dataset_name = selector(dataset_choices)
7165 dataset = dataset_manager.datasets.get(dataset_name, None)
7166 if dataset is None:
7167 raise ValueError(f"Dataset {dataset_name} not found")
7168 return task(llm=llm, context=dataset.next(**dict(task.dataset_options)), repl=repl, code_scorer=code_scorer)
7169
7170
7171def create_organic_task(
7172 llm,
7173 synapse: StreamCodeSynapse,
7174) -> Task:
7175 """Create a task from the given synapse and LLM pipeline."""
7176
7177 return OrganicConvoTask(
7178 llm=llm,
7179 context=Context(messages=synapse.messages, files=synapse.files),
7180 )
7181
7182
7183
7184---
7185File: /coding/tasks/bigcodebench.py
7186---
7187
7188import re
7189from pydantic import BaseModel
7190from typing import Callable, List, Dict
7191
7192from .task import Task
7193from coding.schemas import Context
7194
7195
7196class BigCodeInstruction(BaseModel):
7197 imports: list[str]
7198 description: str
7199 parameters: dict
7200 returns: str
7201 example: str
7202 requirements: list[str]
7203 signature: str
7204 code: str
7205
7206 @property
7207 def prompt(self) -> str:
7208 imports = "\n".join(f"import {pkg}" for pkg in self.imports)
7209 return f"""
7210write a function {self.signature} to:
7211{self.description}
7212
7213The function should output with:
7214{self.returns}
7215
7216You should start with:
7217```
7218{imports}
7219{self.signature} ```
7220"""
7221
7222
7223def bigcode_splitter(prompt: str) -> BigCodeInstruction:
7224 """
7225 Split the prompt string and return the generated prompt from BigCodeInstruction.
7226 """
7227 # Extracting each section using regex
7228 imports = re.findall(r"import (.+)", prompt)
7229
7230 description_match = re.search(r'\"\"\"(.+?)Parameters:', prompt, re.DOTALL)
7231 description = description_match.group(1).strip() if description_match else ""
7232
7233 parameters_match = re.search(r'Parameters:\s*(.+?)Requirements:', prompt, re.DOTALL)
7234 parameters_raw = parameters_match.group(1).strip() if parameters_match else ""
7235 parameters = parse_parameters(parameters_raw)
7236
7237 requirements = re.findall(r"- (\w+)", prompt.split("Requirements:")[1].split("Example:")[0])
7238
7239 example_match = re.search(r'Example:\s+(.+?)Returns:', prompt, re.DOTALL)
7240 example = example_match.group(1).strip() if example_match else ""
7241
7242 returns_match = re.search(r'Returns:\s*(.+?)\"\"\"', prompt, re.DOTALL)
7243 returns = returns_match.group(1).strip() if returns_match else ""
7244
7245 signature_match = re.search(r'def (.+?):', prompt)
7246 signature = f'def {signature_match.group(1)}' if signature_match else ""
7247
7248 # Extract the full code including the definition
7249 # TODO ensure to include the imports
7250 code_match = re.search(r'(def .+?:\s*.+)', prompt, re.DOTALL)
7251 code = code_match.group(1).strip() if code_match else ""
7252
7253 # Create the BigCodeInstruction instance
7254 instruction = BigCodeInstruction(
7255 imports=imports,
7256 description=description,
7257 parameters=parameters,
7258 returns=returns,
7259 example=example,
7260 code=code,
7261 requirements=requirements,
7262 signature=signature
7263 )
7264
7265 # Return the formatted prompt
7266 return instruction
7267
7268def parse_parameters(params_raw: str) -> Dict:
7269 """
7270 Parse the parameters section into a dictionary.
7271 """
7272 parameters = {}
7273 for param_line in params_raw.splitlines():
7274 param_line = param_line.strip()
7275 if param_line:
7276 # Example format: "- corpus (List[str]): A list of text documents"
7277 match = re.match(r'- (\w+) \(([^)]+)\): (.+)', param_line)
7278 if match:
7279 param_name, param_type, param_desc = match.groups()
7280 parameters[param_name] = {"type": param_type, "description": param_desc}
7281 return parameters
7282
7283
7284class BigCodeBenchTask(Task):
7285 name: str = "bigcodebench"
7286 desc: str = "Complete the code to match the given instructions"
7287 goal: str = "to complete the code to match the given instructions"
7288 reward_definition: str = [
7289 dict(name="codesim", weight=0.8),
7290 dict(name="speed", weight=0.2, ideal_time=4.5)
7291 ]
7292 penalty_definition: List = [
7293 ]
7294 cleaning_pipeline: List = [
7295 ]
7296 dataset_options: Dict = {}
7297 attachments = []
7298 messages = []
7299 files = []
7300
7301 def __init__(self, llm: Callable | None = None, context: Context | None = None, **kwargs):
7302 self.context = context
7303 instruction = bigcode_splitter(context.content)
7304 self.query = instruction.prompt
7305 self.reference = context.content
7306 self.topic = context.title
7307 self.subtopic = context.topic
7308 self.tags = context.tags
7309
7310
7311---
7312File: /coding/tasks/completion.py
7313---
7314
7315import ast
7316import random
7317from typing import Callable, List, Dict
7318
7319from .task import Task
7320from coding.schemas import Context
7321from coding.helpers.fim import insert_fim_hole
7322from coding.helpers.rewrite import rewrite_code
7323
7324def extract_random_function(code):
7325 """
7326 Takes a string of Python code, finds a random function within it,
7327 and returns the function signature and body as separate strings.
7328
7329 Parameters:
7330 code (str): The Python code as a string.
7331
7332 Returns:
7333 tuple: A tuple containing the function signature and function body as separate strings.
7334 """
7335 random.seed(None)
7336 try:
7337 tree = ast.parse(code)
7338 except SyntaxError as e:
7339 return None, None
7340
7341 functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
7342
7343 if not functions:
7344 return None, None
7345
7346 selected_function = random.choice(functions)
7347
7348 # Construct the function signature
7349 args = [arg.arg for arg in selected_function.args.args]
7350 args_str = ", ".join(args)
7351 func_signature = f"def {selected_function.name}({args_str}):"
7352
7353 # Extract the function body (excluding the signature)
7354 # `ast.get_source_segment` gives us the entire function, so we need to split it.
7355 full_function = ast.get_source_segment(code, selected_function)
7356 func_body = full_function.split(":", 1)[-1].strip() # Split at the first colon and remove leading/trailing whitespace
7357
7358 return func_signature, func_body
7359
7360class CompletionTask(Task):
7361 name: str = "completion"
7362 desc: str = "code completion"
7363 goal: str = "complete the code "
7364 reward_definition: List[dict] = [
7365 dict(name="codesim", weight=0.8),
7366 dict(name="speed", weight=0.2, ideal_time=1.5)
7367 ]
7368 penalty_definition: List = [
7369 dict(name="validcode", weight=1)
7370 ]
7371 cleaning_pipeline: List = [
7372 ] # TODO remove markdown wrappings
7373 dataset_options: Dict = {}
7374 attachments = []
7375 messages = []
7376 files = []
7377
7378 def __init__(self, llm: Callable, context: Context, **kwargs):
7379 self.context = context
7380 context.content = rewrite_code(context.content, llm)
7381
7382 func_signature, func_body = extract_random_function(context.content) # TODO handle comments
7383 if func_signature is None or func_body is None:
7384 self.query, self.reference = insert_fim_hole(context.content)
7385 else:
7386 self.query = (
7387 func_signature + "<|fim_hole|>" # we want them to complete that area, pretending its a hole
7388 )
7389 self.reference = func_body
7390
7391 self.topic = context.title
7392 self.subtopic = context.topic
7393 self.tags = context.tags
7394
7395
7396---
7397File: /coding/tasks/fim.py
7398---
7399
7400import random
7401from typing import Callable, List, Dict
7402
7403from .task import Task
7404from coding.schemas import Context
7405from coding.helpers.rewrite import rewrite_code
7406
7407def make_hole(text, chunk_size=5):
7408 lines = text.splitlines()
7409 total_lines = len(lines)
7410
7411 if chunk_size >= total_lines:
7412 return '<|fim_hole|>', text
7413
7414 start_index = random.randint(0, total_lines - chunk_size)
7415 end_index = start_index + chunk_size
7416
7417 hole = '\n'.join(lines[start_index:end_index])
7418 new_lines = lines[:start_index] + ['<|fim_hole|>'] + lines[end_index:]
7419
7420 return '\n'.join(new_lines), hole
7421
7422class FillInMiddleTask(Task):
7423 name: str = "fim"
7424 desc: str = "fill in the middle of the code"
7425 goal: str = "to fill in the blanks in the code"
7426 reward_definition: str = [
7427 dict(name="codesim", weight=0.8),
7428 dict(name="speed", weight=0.2, ideal_time=1.5)
7429 ]
7430 penalty_definition: List = [
7431 dict(name="validcode", weight=1)
7432 ]
7433 cleaning_pipeline: List = [
7434 ] # TODO remove markdown wrappings
7435 dataset_options: Dict = {}
7436 attachments = []
7437 messages = []
7438 files = []
7439
7440 def __init__(self, llm: Callable, context: Context, **kwargs):
7441 self.context = context
7442 context.content = rewrite_code(context.content, llm)
7443 fim_query, hole = make_hole(context.content)
7444 self.query = (
7445 fim_query
7446 )
7447 self.reference = hole
7448
7449 self.topic = context.title
7450 self.subtopic = context.topic
7451 self.tags = context.tags
7452
7453
7454---
7455File: /coding/tasks/organic_convo.py
7456---
7457
7458import ast
7459import random
7460from typing import Callable, List, Dict
7461
7462from .task import Task
7463from coding.schemas import Context, ChatMessage, File
7464
7465def complete_conversation(llm: Callable, messages: List[ChatMessage], files: List[File], **kwargs):
7466 if not messages:
7467 raise ValueError("No messages provided")
7468 additional_context = ""
7469 if files:
7470 additional_context += "\n\nUse the following files as context for your response: \n"
7471 for file in files:
7472 if "path" not in file:
7473 file.path = ""
7474 file.content = file.content.replace("}", "}}").replace("{", "{{")
7475 additional_context += f"#{file.path}\n{file.content}\n"
7476 messages[-1].content += additional_context
7477 response = llm.invoke([msg.dict() for msg in messages]).content
7478 return response
7479
7480
7481class OrganicConvoTask(Task):
7482 name: str = "organic_convo"
7483 desc: str = "organic conversation task"
7484 goal: str = "respond correctly to the conversation"
7485 reward_definition: List[dict] = [
7486 dict(name="codesim", weight=0.8), # TODO using code similarity might not work for responses, but it should be fine? maybe do rogue or difflib
7487 dict(name="speed", weight=0.2, ideal_time=2.5)
7488 ]
7489 penalty_definition: List = []
7490 cleaning_pipeline: List = [
7491 ] # TODO remove markdown wrappings
7492 dataset_options: Dict = {}
7493 attachments = []
7494 messages = []
7495 files = []
7496
7497
7498 def __init__(self, llm: Callable, context: Context, **kwargs):
7499 self.context = context
7500
7501 self.query = None
7502 self.messages = context.messages
7503 self.files = context.files
7504 self.reference = complete_conversation(llm, self.messages, self.files)
7505
7506 self.topic = context.title
7507 self.subtopic = context.topic
7508 self.tags = context.tags
7509
7510
7511---
7512File: /coding/tasks/repo.py
7513---
7514
7515import ast
7516import random
7517from typing import Callable, List, Dict
7518
7519from .task import Task
7520from coding.schemas import Context, File
7521from coding.helpers.fim import insert_fim_hole
7522from coding.helpers.rewrite import rewrite_code
7523
7524def delete_function_body_and_following(code: str) -> (str, str):
7525 """
7526 Takes in some code, randomly finds a function, deletes the body of that function and anything after it.
7527
7528 Returns the function definition alongside the deleted body of the function.
7529 """
7530 random.seed(None)
7531
7532 class FunctionBodyRemover(ast.NodeTransformer):
7533 def __init__(self, target_func_name):
7534 self.target_func_name = target_func_name
7535 self.body = None
7536 self.stop_processing = False
7537
7538 def visit_FunctionDef(self, node):
7539 if self.stop_processing:
7540 return None
7541 if node.name == self.target_func_name:
7542 self.body = ast.unparse(node.body) if node.body else ""
7543 node.body = [] # Remove the function body
7544 self.stop_processing = True # Stop after we modify the targeted function
7545 return node
7546
7547 # Parse the code into an ASTt
7548 try:
7549 tree = ast.parse(code)
7550 except Exception as e:
7551 return None, None
7552
7553 # Randomly select a function to delete the body from
7554 functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
7555 if not functions:
7556 return None, None
7557
7558 target_func = random.choice(functions)
7559
7560 # Remove the body of the target function
7561 remover = FunctionBodyRemover(target_func.name)
7562 remover.visit(tree)
7563
7564 # If the body was not captured, return an empty string
7565 if remover.body is None or remover.body.strip() == "":
7566 return None, None
7567
7568 # Find the function definition line in the original code
7569 func_def_start = code.find(f'def {target_func.name}')
7570
7571 if func_def_start == -1:
7572 return None, None
7573
7574 # Extract just the function definition line
7575 func_def_end = code.find(":", func_def_start) + 1
7576 function_definition = code[func_def_start:func_def_end]
7577
7578 if function_definition.strip() == "":
7579 return None, None
7580
7581 if not function_definition or not remover.body:
7582 return None, None
7583
7584 return function_definition, remover.body
7585
7586
7587class RepoCompletionTask(Task):
7588 name: str = "repo"
7589 desc: str = "repository level code completion"
7590 goal: str = "complete the code given the context of the rest of the repo"
7591 reward_definition: List[dict] = [
7592 dict(name="codesim", weight=0.8),
7593 dict(name="speed", weight=0.2, ideal_time=2.5)
7594 ]
7595 penalty_definition: List = [
7596 dict(name="validcode", weight=1)
7597 ]
7598 cleaning_pipeline: List = [
7599 ] # TODO remove markdown wrappings
7600 dataset_options: Dict = dict(include_sibling_docs=True)
7601 attachments = []
7602 messages = []
7603 files = []
7604
7605 def __init__(self, llm: Callable, context: Context, **kwargs):
7606 self.context = context
7607 context.content = rewrite_code(context.content, llm)
7608
7609 if context.topic == "Python":
7610 mod_code, correct_body = delete_function_body_and_following(context.content)
7611 if mod_code is not None and correct_body is not None:
7612 self.query = mod_code + "<|fim_hole|>"
7613 self.reference = correct_body
7614 else:
7615 self.query, self.reference = insert_fim_hole(context.content)
7616 else:
7617 self.query, self.reference = insert_fim_hole(context.content)
7618 # rewrite every file
7619 for file in context.extras['sibling_docs']:
7620 file.content = rewrite_code(file.content, llm)
7621 self.files = [File(path=cont.title, content=cont.content) for cont in context.extras['sibling_docs']] # Filter the info sent to the miners
7622
7623 self.topic = context.title
7624 self.subtopic = context.topic
7625 self.tags = context.tags
7626
7627
7628---
7629File: /coding/tasks/repofile.py
7630---
7631
7632from typing import Callable, List, Dict
7633
7634from .task import Task
7635from coding.schemas import Context, File
7636from coding.helpers.rewrite import rewrite_code
7637class RepoFileTask(Task):
7638 name: str = "repofile"
7639 desc: str = "repository level file creation"
7640 goal: str = "write the python module that completes the code"
7641 reward_definition: List[dict] = [
7642 dict(name="codesim", weight=0.8), # TODO compare functions and objects to the closest as they might be out of order
7643 dict(name="speed", weight=0.2, ideal_time=3)
7644 ]
7645 penalty_definition: List = [
7646 dict(name="validcode", weight=1)
7647 ]
7648 cleaning_pipeline: List = [] # TODO remove markdown wrappings
7649 dataset_options: Dict = dict(include_sibling_docs=True)
7650 attachments = []
7651 messages = []
7652 files = []
7653
7654 def __init__(self, llm: Callable, context: Context, **kwargs):
7655 self.context = context
7656
7657 self.query = (
7658 "write code to" + llm.invoke(f'Summarize what is happening in this code: {context.content}').content
7659 )
7660 # rewrite every file
7661 for file in context.extras['sibling_docs']:
7662 file.content = rewrite_code(file.content, llm)
7663 self.files = [File(path=cont.title, content=cont.content) for cont in context.extras['sibling_docs']] # Filter the info sent to the miners
7664 self.reference = context.content
7665
7666 self.topic = context.title
7667 self.subtopic = context.topic
7668 self.tags = context.tags
7669
7670
7671---
7672File: /coding/tasks/swe.py
7673---
7674
7675import re
7676import bittensor as bt
7677from pydantic import BaseModel
7678from typing import Callable, List, Dict
7679from code_bert_score import BERTScorer
7680
7681from .task import Task
7682from coding.helpers.git import GitRepo
7683from coding.rewards.codesim import CodeSimModel
7684from coding.schemas import Context, Patch, Edit
7685
7686class PatchChunk(BaseModel):
7687 file_name: str
7688 start_index: int
7689 end_index: int
7690 content: str
7691 new_content: str
7692
7693def parse_diff(diff_text: str, no_title=False) -> Patch:
7694 diff_pattern = r"^diff --git a\/(.+?) b\/(.+?)$"
7695 line_change_pattern = r"^@@ -(\d+),\d+ \+(\d+),\d+ @@"
7696 edits = []
7697
7698 current_file = None
7699 old_file_line_num = 0
7700 new_file_line_num = 0
7701
7702 for line in diff_text.splitlines():
7703 diff_match = re.match(diff_pattern, line)
7704 if diff_match:
7705 current_file = diff_match.group(2)
7706 old_file_line_num = 0
7707 new_file_line_num = 0
7708 continue
7709 elif no_title and not current_file:
7710 current_file = ""
7711 old_file_line_num = 0
7712 new_file_line_num = 0
7713 continue
7714
7715 line_change_match = re.match(line_change_pattern, line)
7716
7717 if line_change_match:
7718 old_file_line_num = int(line_change_match.group(1))
7719 new_file_line_num = int(line_change_match.group(2))
7720 continue
7721
7722 if line.startswith("+") and not line.startswith("+++"):
7723 # Line added in new file
7724 edits.append(
7725 Edit(
7726 file_name=current_file,
7727 line_number=new_file_line_num,
7728 line_content="",
7729 new_line_content=line[1:].strip(),
7730 )
7731 )
7732 new_file_line_num += 1
7733 elif line.startswith("-") and not line.startswith("---"):
7734 # Line removed from old file
7735 edits.append(
7736 Edit(
7737 file_name=current_file,
7738 line_number=old_file_line_num,
7739 line_content=line[1:].strip(),
7740 new_line_content="",
7741 )
7742 )
7743 old_file_line_num += 1
7744 elif line.startswith(" "):
7745 # Context lines (lines present in both old and new files)
7746 old_file_line_num += 1
7747 new_file_line_num += 1
7748
7749 return Patch(edits=edits)
7750
7751
7752# TODO ensure chunks within 2 lines of each other are grouped together
7753def chunk_patch(patch: Patch) -> List[PatchChunk]:
7754 chunks = []
7755 current_chunk = []
7756 current_file = None
7757
7758 # Group edits by file and line number
7759 file_edits = {}
7760 for edit in patch.edits:
7761 if edit.file_name not in file_edits:
7762 file_edits[edit.file_name] = {}
7763 if edit.line_number not in file_edits[edit.file_name]:
7764 file_edits[edit.file_name][edit.line_number] = []
7765 file_edits[edit.file_name][edit.line_number].append(edit)
7766
7767 # Process each file's edits
7768 for file_name, line_edits in file_edits.items():
7769 current_chunk = []
7770 prev_line = None
7771
7772 # Sort line numbers
7773 for line_num in sorted(line_edits.keys()):
7774 if prev_line is None or line_num <= prev_line + 1:
7775 current_chunk.extend(line_edits[line_num])
7776 else:
7777 # Create chunk for previous group
7778 if current_chunk:
7779 start_idx = current_chunk[0].line_number
7780 end_idx = current_chunk[-1].line_number
7781 content = "\n".join(e.line_content for e in current_chunk if e.line_content)
7782 new_content = "\n".join(e.new_line_content for e in current_chunk if e.new_line_content)
7783 chunks.append(PatchChunk(
7784 file_name=file_name,
7785 start_index=start_idx,
7786 end_index=end_idx,
7787 content=content,
7788 new_content=new_content
7789 ))
7790 current_chunk = line_edits[line_num]
7791 prev_line = line_num
7792
7793 # Add final chunk for this file
7794 if current_chunk:
7795 start_idx = current_chunk[0].line_number
7796 end_idx = current_chunk[-1].line_number
7797 content = "\n".join(e.line_content for e in current_chunk if e.line_content)
7798 new_content = "\n".join(e.new_line_content for e in current_chunk if e.new_line_content)
7799 chunks.append(PatchChunk(
7800 file_name=file_name,
7801 start_index=start_idx,
7802 end_index=end_idx,
7803 content=content,
7804 new_content=new_content
7805 ))
7806
7807 return chunks
7808
7809class SWEBenchTask(Task):
7810 name: str = "swebench"
7811 desc: str = "given a github issue corrrectly solve it"
7812 goal: str = "return the valid patch"
7813 reward_definition: str = [
7814 dict(name="speed", weight=0.1, ideal_time=25),
7815 dict(name="self", weight=0.9),
7816 ]
7817 penalty_definition: List = []
7818 cleaning_pipeline: List = [] # TODO remove markdown wrappings
7819 dataset_options: Dict = {}
7820 attachments = []
7821 messages = []
7822 files = []
7823
7824 def __init__(
7825 self, llm: Callable, context: Context, code_scorer: Callable = None, **kwargs
7826 ):
7827 self.repo = GitRepo(context.title, context.extras["base_commit"])
7828 if code_scorer is None:
7829 self.code_scorer = CodeSimModel()
7830 else:
7831 self.code_scorer = code_scorer
7832 self.context = context
7833 self.patch: Patch = parse_diff(context.content)
7834 self.query = context.topic
7835 # self.repo = context.title
7836 self.base_commit = context.extras["base_commit"]
7837 self.pull_number = context.extras["pull_number"]
7838 self.topic = context.title
7839 self.subtopic = context.topic
7840 self.tags = context.tags
7841
7842 def score(self, patch: Patch, token_count: int):
7843 bt.logging.info(f"Scoring patch")
7844 num_valid_lines = len(self.patch.edits)
7845 num_miner_lines = len(patch.edits)
7846
7847 # Checking to see if the miner changed more than what was needed
7848 lines_over_percent = 1
7849
7850 if num_valid_lines > 20:
7851 if num_miner_lines / num_valid_lines > 3:
7852 lines_over_percent -= ((num_miner_lines - (num_valid_lines * 2)) / num_valid_lines) * 0.1
7853 else:
7854 if num_miner_lines / num_valid_lines > 7:
7855 lines_over_percent -= ((num_miner_lines - (num_valid_lines * 2)) / num_valid_lines) * 0.1
7856
7857 if lines_over_percent <= 0:
7858 return 0
7859
7860 valid_num_lines = {} # file name -> num lines
7861 miner_num_lines = {}
7862
7863 for edit in self.patch.edits:
7864 if edit.file_name not in valid_num_lines:
7865 valid_num_lines[edit.file_name] = 0
7866 valid_num_lines[edit.file_name] += 1
7867
7868 if edit.file_name not in miner_num_lines:
7869 miner_num_lines[edit.file_name] = 0
7870 miner_num_lines[edit.file_name] += 1
7871
7872 # see which lines in valid patch are in miner patch and find percent
7873 # miner can edit extra lines but not less
7874 total_valid_lines = 0
7875 lines_in_miner = 0
7876 for file_name in valid_num_lines:
7877 if file_name in miner_num_lines:
7878 valid_lines = [
7879 edit.line_number
7880 for edit in self.patch.edits
7881 if edit.file_name == file_name
7882 ]
7883 miner_lines = [
7884 edit.line_number
7885 for edit in patch.edits
7886 if edit.file_name == file_name
7887 ]
7888 lines_in_miner += len(set(valid_lines) & set(miner_lines))
7889 total_valid_lines += len(set(valid_lines))
7890 percent_lines_in_miner = lines_in_miner / total_valid_lines if total_valid_lines > 0 else 0
7891
7892
7893
7894 # Group edits into chunks by consecutive line numbers
7895 valid_chunks = chunk_patch(self.patch)
7896 miner_chunks = chunk_patch(patch)
7897
7898 chunk_score = 0
7899 total_chunk_score = 0
7900 # find chunks that share an index in the same file
7901 for valid_chunk in valid_chunks:
7902 exists = False
7903 for miner_chunk in miner_chunks:
7904 if (
7905 miner_chunk.file_name == valid_chunk.file_name
7906 and abs(miner_chunk.start_index - valid_chunk.start_index) <= 10
7907 ):
7908 chunk_score += self.code_scorer.similarity(
7909 miner_chunk.new_content, valid_chunk.new_content
7910 )
7911 total_chunk_score += 1
7912 exists = True
7913 break
7914 if not exists:
7915 total_chunk_score += 1
7916
7917 chunk_percent = chunk_score / total_chunk_score
7918 score = ((5 * percent_lines_in_miner + 5 * chunk_percent) / 10) * lines_over_percent
7919
7920 return score
7921
7922
7923
7924---
7925File: /coding/tasks/task.py
7926---
7927
7928# The MIT License (MIT)
7929# Copyright © 2024 Yuma Rao
7930# Copyright © 2023 Opentensor Foundation
7931# Copyright © 2024 Macrocosmos
7932# Copyright © 2024 Broke
7933
7934
7935# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
7936# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
7937# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7938# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7939
7940# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
7941# the Software.
7942
7943# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
7944# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7945# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
7946# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
7947# DEALINGS IN THE SOFTWARE.
7948
7949from abc import ABC
7950from dataclasses import dataclass, field
7951from typing import List, Union, Any, Dict, Callable
7952
7953from coding.schemas import Context, File
7954
7955
7956@dataclass
7957class Task(ABC):
7958 name: str
7959 desc: str
7960 goal: str
7961 query: str
7962 topic: str
7963 subtopic: str
7964 tags: List[str]
7965 context: Context
7966 reward_definition: List[dict]
7967 timeout: int = 12
7968 attachments: List[Any] = field(default_factory=[])
7969 files: List[File] = field(default_factory=[])
7970 penalty_definition: List[dict] = None
7971 dataset_options: Dict = field(default_factory=dict)
7972 reward_threshold: float = 0.0
7973 reference: Union[str, List[str], Dict] = ""
7974 criteria: str = ("",)
7975 delimiter: str = ""
7976 complete: bool = False
7977 static_reference: bool = False
7978 static_query: bool = False
7979 reference_prompt: str = ""
7980 query_system_prompt: str = ""
7981 query_prompt: str = ""
7982 llm: Callable = None
7983 code_scorer: Callable = None
7984 extra_info: Dict = field(default_factory=dict)
7985
7986 def __str__(self):
7987 return f"{self.__class__.__name__}(name={self.name!r}, desc={self.desc!r}, goal={self.goal!r}, query={self.query!r}, reference={self.reference!r}, topic={self.topic!r}, subtopic={self.subtopic!r}, tags={self.tags!r})"
7988
7989 def __repr__(self):
7990 return str(self)
7991
7992 def __state_dict__(self, full=False):
7993 state = {
7994 "task": self.name,
7995 "desc": self.desc,
7996 "goal": self.goal,
7997 "query": self.query,
7998 "query_time": getattr(self, "query_time", 0),
7999 "reference": self.reference,
8000 "reference_time": getattr(self, "reference_time", 0),
8001 "topic": self.topic,
8002 "subtopic": self.subtopic,
8003 "context_time": self.context.stats.get("fetch_time", 0.0),
8004 }
8005 if full:
8006 state.update(dict(self.context))
8007
8008 return state
8009
8010
8011---
8012File: /coding/utils/__init__.py
8013---
8014
8015# from . import config
8016from . import misc
8017from . import uids
8018
8019
8020---
8021File: /coding/utils/config.py
8022---
8023
8024# The MIT License (MIT)
8025# Copyright © 2023 Yuma Rao
8026# Copyright © 2023 Opentensor Foundation
8027
8028# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8029# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8030# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8031# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8032
8033# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8034# the Software.
8035
8036# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8037# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8038# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8039# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8040# DEALINGS IN THE SOFTWARE.
8041
8042import os
8043import subprocess
8044import argparse
8045import bittensor as bt
8046from .logging import setup_events_logger
8047
8048from coding.tasks import TASKS
8049
8050def is_cuda_available():
8051 try:
8052 output = subprocess.check_output(["nvidia-smi", "-L"], stderr=subprocess.STDOUT)
8053 if "NVIDIA" in output.decode("utf-8"):
8054 return "cuda"
8055 except Exception:
8056 pass
8057 try:
8058 output = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
8059 if "release" in output:
8060 return "cuda"
8061 except Exception:
8062 pass
8063 return "cpu"
8064
8065def check_config(cls, config: "bt.Config"):
8066 r"""Checks/validates the config namespace object."""
8067 bt.logging.check_config(config)
8068
8069 full_path = os.path.expanduser(
8070 "{}/{}/{}/netuid{}/{}".format(
8071 config.logging.logging_dir, # TODO: change from ~/.bittensor/miners to ~/.bittensor/neurons
8072 config.wallet.name,
8073 config.wallet.hotkey,
8074 config.netuid,
8075 config.neuron.name,
8076 )
8077 )
8078
8079 config.neuron.full_path = os.path.expanduser(full_path)
8080 if not os.path.exists(config.neuron.full_path):
8081 os.makedirs(config.neuron.full_path, exist_ok=True)
8082
8083 if not config.neuron.dont_save_events:
8084 # Add custom event logger for the events.
8085 events_logger = setup_events_logger(
8086 config.neuron.full_path, config.neuron.events_retention_size
8087 )
8088 bt.logging.register_primary_logger(events_logger.name)
8089
8090
8091def add_args(cls, parser):
8092 """
8093 Adds relevant arguments to the parser for operation.
8094 """
8095 parser.add_argument("--netuid", type=int, help="Subnet netuid", default=45)
8096 parser.add_argument(
8097 "--neuron.device",
8098 type=str,
8099 help="Device to run on.",
8100 default=is_cuda_available(),
8101 )
8102 parser.add_argument(
8103 "--neuron.epoch_length",
8104 type=int,
8105 help="The default epoch length (how often we set weights, measured in 12 second blocks).",
8106 default=100,
8107 )
8108
8109 parser.add_argument(
8110 "--mock",
8111 action="store_true",
8112 help="Mock neuron and all network components.",
8113 default=False,
8114 )
8115
8116 parser.add_argument(
8117 "--neuron.events_retention_size",
8118 type=str,
8119 help="Events retention size.",
8120 default=2 * 1024 * 1024 * 1024, # 2 GB
8121 )
8122
8123 parser.add_argument(
8124 "--neuron.dont_save_events",
8125 action="store_true",
8126 help="If set, we dont save events to a log file.",
8127 default=False,
8128 )
8129
8130 parser.add_argument(
8131 "--neuron.tasks",
8132 type=str,
8133 nargs="+",
8134 help="The tasks to use for the validator.",
8135 default=list(TASKS.keys()),
8136 )
8137
8138 parser.add_argument(
8139 "--neuron.task_weights",
8140 type=int,
8141 nargs="+",
8142 help="The weights for sampling of each task.",
8143 default=[0,0,0,0,0,1]
8144 )
8145
8146 parser.add_argument(
8147 "--neuron.percent_organic_score",
8148 type=float,
8149 help="The percent of organic synapses to score",
8150 default=0.25,
8151 )
8152
8153def add_miner_args(cls, parser):
8154 """Add miner specific arguments to the parser."""
8155
8156 parser.add_argument(
8157 "--miner.name",
8158 type=str,
8159 help="The name of the miner to load",
8160 default="miner",
8161 )
8162
8163 parser.add_argument(
8164 "--neuron.model_id",
8165 type=str,
8166 help="The model to use for the validator.",
8167 default="gpt-3.5-turbo-1106",
8168 )
8169
8170 parser.add_argument(
8171 "--neuron.name",
8172 type=str,
8173 help="Trials for this neuron go in neuron.root / (wallet_cold - wallet_hot) / neuron.name. ",
8174 default="miner",
8175 )
8176
8177 parser.add_argument(
8178 "--blacklist.force_validator_permit",
8179 action="store_true",
8180 help="If set, we will force incoming requests to have a permit.",
8181 default=True,
8182 )
8183
8184 parser.add_argument(
8185 "--blacklist.allow_non_registered",
8186 action="store_true",
8187 help="If set, miners will accept queries from non registered entities. (Dangerous!)",
8188 default=False,
8189 )
8190
8191 parser.add_argument(
8192 "--neuron.streaming_batch_size",
8193 type=int,
8194 default=12,
8195 help="Batch size in tokens for streaming forward calls.",
8196 )
8197
8198
8199
8200def add_validator_args(cls, parser):
8201 """Add validator specific arguments to the parser."""
8202
8203 parser.add_argument(
8204 "--neuron.name",
8205 type=str,
8206 help="Trials for this neuron go in neuron.root / (wallet_cold - wallet_hot) / neuron.name. ",
8207 default="validator",
8208 )
8209
8210 parser.add_argument(
8211 "--neuron.timeout",
8212 type=float,
8213 help="The timeout for each forward call in seconds.",
8214 default=10,
8215 )
8216
8217 parser.add_argument(
8218 "--neuron.num_concurrent_forwards",
8219 type=int,
8220 help="The number of concurrent forwards running at any time.",
8221 default=1, # TODO increase
8222 )
8223
8224 parser.add_argument(
8225 "--neuron.sample_size",
8226 type=int,
8227 help="The number of miners to query in a single step.",
8228 default=50, # TODO decrease?
8229 )
8230
8231 parser.add_argument(
8232 "--neuron.disable_set_weights",
8233 action="store_true",
8234 help="Disables setting weights.",
8235 default=False,
8236 )
8237
8238 parser.add_argument(
8239 "--neuron.moving_average_alpha",
8240 type=float,
8241 help="Moving average alpha parameter, how much to add of the new observation.",
8242 default=0.05,
8243 )
8244
8245 parser.add_argument(
8246 "--wandb.project_name",
8247 type=str,
8248 help="The name of the project where you are sending the new run.",
8249 default="gen42",
8250 )
8251
8252 parser.add_argument(
8253 "--wandb.on",
8254 type=bool,
8255 default=True,
8256 help="Enable wandb logging.",
8257 )
8258
8259 parser.add_argument(
8260 "--wandb.entity",
8261 type=str,
8262 default="gen42",
8263 help="Wandb entity to log to.",
8264 )
8265
8266 parser.add_argument(
8267 "--neuron.axon_off",
8268 "--axon_off",
8269 action="store_true",
8270 # Note: the validator needs to serve an Axon with their IP or they may
8271 # be blacklisted by the firewall of serving peers on the network.
8272 help="Set this flag to not attempt to serve an Axon.",
8273 default=False,
8274 )
8275
8276 parser.add_argument(
8277 "--neuron.vpermit_tao_limit",
8278 type=int,
8279 help="The maximum number of TAO allowed to query a validator with a vpermit.",
8280 default=4096,
8281 )
8282
8283 parser.add_argument(
8284 "--neuron.model_id",
8285 type=str,
8286 help="The name of the LLM to be used for the validator.",
8287 default="Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4",
8288 )
8289
8290 parser.add_argument(
8291 "--neuron.model_url",
8292 type=str,
8293 help="The openai compatible model url to be used for the validator",
8294 default="http://localhost:8028/v1",
8295 )
8296
8297 parser.add_argument(
8298 "--neuron.vllm_api_key",
8299 type=str,
8300 help="The openai compatible model url to be used for the validator",
8301 default="EMPTY",
8302 )
8303
8304 parser.add_argument(
8305 "--neuron.forward_max_time",
8306 type=int,
8307 help="Max time to wait for a forward call to complete in seconds.",
8308 default=120,
8309 )
8310
8311 parser.add_argument(
8312 "--neuron.finetune_gpu_id",
8313 type=int,
8314 help="The gpu to use for finetuning.",
8315 default=0,
8316 )
8317
8318 parser.add_argument(
8319 "--neuron.finetune_test_size",
8320 type=int,
8321 help="The number of finetune tasks to generate and score with.",
8322 default=100,
8323 )
8324
8325
8326
8327
8328def config(cls):
8329 """
8330 Returns the configuration object specific to this miner or validator after adding relevant arguments.
8331 """
8332 parser = argparse.ArgumentParser()
8333 bt.wallet.add_args(parser)
8334 bt.subtensor.add_args(parser)
8335 bt.logging.add_args(parser)
8336 bt.axon.add_args(parser)
8337 if cls is not None:
8338 cls.add_args(parser)
8339 bt.trace() # TODO add if statement for if they want this
8340 bt.debug()
8341 return bt.config(parser)
8342
8343
8344
8345---
8346File: /coding/utils/exceptions.py
8347---
8348
8349# The MIT License (MIT)
8350# Copyright © 2024 Yuma Rao
8351# Copyright © 2023 Opentensor Foundation
8352# Copyright © 2024 Macrocosmos
8353# Copyright © 2024 Broke
8354
8355
8356# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8357# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8358# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8359# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8360
8361# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8362# the Software.
8363
8364# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8365# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8366# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8367# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8368# DEALINGS IN THE SOFTWARE.
8369
8370class MaxRetryError(Exception):
8371 """Exception raised when the maximum number of retries is exceeded."""
8372
8373 def __init__(self, message="Maximum number of retries exceeded"):
8374 self.message = message
8375 super().__init__(self.message)
8376
8377
8378---
8379File: /coding/utils/logging.py
8380---
8381
8382# The MIT License (MIT)
8383# Copyright © 2024 Yuma Rao
8384# Copyright © 2023 Opentensor Foundation
8385# Copyright © 2024 Macrocosmos
8386# Copyright © 2024 Brokespace
8387
8388
8389# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8390# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8391# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8392# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8393
8394# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8395# the Software.
8396
8397# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8398# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8399# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8400# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8401# DEALINGS IN THE SOFTWARE.
8402
8403import os
8404import copy
8405import wandb
8406import coding
8407import logging
8408import bittensor as bt
8409from logging.handlers import RotatingFileHandler
8410
8411
8412EVENTS_LEVEL_NUM = 38
8413DEFAULT_LOG_BACKUP_COUNT = 10
8414
8415def setup_events_logger(full_path, events_retention_size):
8416 logging.addLevelName(EVENTS_LEVEL_NUM, "EVENT")
8417
8418 logger = logging.getLogger("event")
8419 logger.setLevel(EVENTS_LEVEL_NUM)
8420
8421 def event(self, message, *args, **kws):
8422 if self.isEnabledFor(EVENTS_LEVEL_NUM):
8423 self._log(EVENTS_LEVEL_NUM, message, args, **kws)
8424
8425 logging.Logger.event = event
8426
8427 formatter = logging.Formatter(
8428 "%(asctime)s | %(levelname)s | %(message)s",
8429 datefmt="%Y-%m-%d %H:%M:%S",
8430 )
8431
8432 file_handler = RotatingFileHandler(
8433 os.path.join(full_path, "events.log"),
8434 maxBytes=events_retention_size,
8435 backupCount=DEFAULT_LOG_BACKUP_COUNT,
8436 )
8437 file_handler.setFormatter(formatter)
8438 file_handler.setLevel(EVENTS_LEVEL_NUM)
8439 logger.addHandler(file_handler)
8440
8441 return logger
8442
8443
8444def should_reinit_wandb(self):
8445 # Check if wandb run needs to be rolled over.
8446 return (
8447 not self.config.wandb.off
8448 and self.step
8449 and self.step % self.config.wandb.run_step_length == 0
8450 )
8451
8452
8453def init_wandb(self, reinit=False):
8454 """Starts a new wandb run."""
8455 uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
8456 spec_version = str(coding.__spec_version__)
8457 tags = [
8458 self.wallet.hotkey.ss58_address,
8459 coding.__version__,
8460 str(coding.__spec_version__),
8461 f"netuid_{self.metagraph.netuid}",
8462 ]
8463
8464 if self.config.mock:
8465 tags.append("mock")
8466 for task in self.active_tasks:
8467 tags.append(task)
8468 if self.config.neuron.disable_set_weights:
8469 tags.append("disable_set_weights")
8470
8471 wandb_config = {
8472 key: copy.deepcopy(self.config.get(key, None))
8473 for key in ("neuron", "reward", "netuid", "wandb")
8474 }
8475 wandb_config["neuron"].pop("full_path", None)
8476
8477 self.wandb = wandb.init(
8478 anonymous="allow",
8479 reinit=reinit,
8480 project=self.config.wandb.project_name if self.config.netuid == 45 else self.config.wandb.project_name + "testnet",
8481 entity=self.config.wandb.entity,
8482 config=wandb_config,
8483 mode="offline" if self.config.wandb.offline else "online",
8484 dir=self.config.neuron.full_path,
8485 tags=tags,
8486 notes=self.config.wandb.notes,
8487 name=f"{uid}-{spec_version}",
8488 )
8489 bt.logging.success(f"Started a new wandb run <blue> {self.wandb.name} </blue>")
8490
8491
8492def reinit_wandb(self):
8493 """Reinitializes wandb, rolling over the run."""
8494 self.wandb.finish()
8495 init_wandb(self, reinit=True)
8496
8497
8498def log_event(self, event):
8499 if self.config.netuid != 45 and self.config.netuid != 171:
8500 return
8501
8502 if not self.config.wandb.on:
8503 return
8504
8505 if not getattr(self, "wandb", None):
8506 init_wandb(self)
8507
8508 # Log the event to wandb.
8509 self.wandb.log(event)
8510
8511
8512---
8513File: /coding/utils/misc.py
8514---
8515
8516# The MIT License (MIT)
8517# Copyright © 2024 Yuma Rao
8518# Copyright © 2023 Opentensor Foundation
8519# Copyright © 2024 Macrocosmos
8520# Copyright © 2024 Broke
8521
8522
8523# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8524# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8525# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8526# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8527
8528# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8529# the Software.
8530
8531# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8532# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8533# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8534# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8535# DEALINGS IN THE SOFTWARE.
8536
8537import time
8538import math
8539import hashlib as rpccheckhealth
8540from math import floor
8541from typing import Callable, Any
8542from functools import lru_cache, update_wrapper
8543
8544
8545# LRU Cache with TTL
8546def ttl_cache(maxsize: int = 128, typed: bool = False, ttl: int = -1):
8547 """
8548 Decorator that creates a cache of the most recently used function calls with a time-to-live (TTL) feature.
8549 The cache evicts the least recently used entries if the cache exceeds the `maxsize` or if an entry has
8550 been in the cache longer than the `ttl` period.
8551
8552 Args:
8553 maxsize (int): Maximum size of the cache. Once the cache grows to this size, subsequent entries
8554 replace the least recently used ones. Defaults to 128.
8555 typed (bool): If set to True, arguments of different types will be cached separately. For example,
8556 f(3) and f(3.0) will be treated as distinct calls with distinct results. Defaults to False.
8557 ttl (int): The time-to-live for each cache entry, measured in seconds. If set to a non-positive value,
8558 the TTL is set to a very large number, effectively making the cache entries permanent. Defaults to -1.
8559
8560 Returns:
8561 Callable: A decorator that can be applied to functions to cache their return values.
8562
8563 The decorator is useful for caching results of functions that are expensive to compute and are called
8564 with the same arguments frequently within short periods of time. The TTL feature helps in ensuring
8565 that the cached values are not stale.
8566
8567 Example:
8568 @ttl_cache(ttl=10)
8569 def get_data(param):
8570 # Expensive data retrieval operation
8571 return data
8572 """
8573 if ttl <= 0:
8574 ttl = 65536
8575 hash_gen = _ttl_hash_gen(ttl)
8576
8577 def wrapper(func: Callable) -> Callable:
8578 @lru_cache(maxsize, typed)
8579 def ttl_func(ttl_hash, *args, **kwargs):
8580 return func(*args, **kwargs)
8581
8582 def wrapped(*args, **kwargs) -> Any:
8583 th = next(hash_gen)
8584 return ttl_func(th, *args, **kwargs)
8585
8586 return update_wrapper(wrapped, func)
8587
8588 return wrapper
8589
8590
8591def _ttl_hash_gen(seconds: int):
8592 """
8593 Internal generator function used by the `ttl_cache` decorator to generate a new hash value at regular
8594 time intervals specified by `seconds`.
8595
8596 Args:
8597 seconds (int): The number of seconds after which a new hash value will be generated.
8598
8599 Yields:
8600 int: A hash value that represents the current time interval.
8601
8602 This generator is used to create time-based hash values that enable the `ttl_cache` to determine
8603 whether cached entries are still valid or if they have expired and should be recalculated.
8604 """
8605 start_time = time.time()
8606 while True:
8607 yield floor((time.time() - start_time) / seconds)
8608
8609
8610# 12 seconds updating block.
8611@ttl_cache(maxsize=1, ttl=12)
8612def ttl_get_block(self) -> int:
8613 """
8614 Retrieves the current block number from the blockchain. This method is cached with a time-to-live (TTL)
8615 of 12 seconds, meaning that it will only refresh the block number from the blockchain at most every 12 seconds,
8616 reducing the number of calls to the underlying blockchain interface.
8617
8618 Returns:
8619 int: The current block number on the blockchain.
8620
8621 This method is useful for applications that need to access the current block number frequently and can
8622 tolerate a delay of up to 12 seconds for the latest information. By using a cache with TTL, the method
8623 efficiently reduces the workload on the blockchain interface.
8624
8625 Example:
8626 current_block = ttl_get_block(self)
8627
8628 Note: self here is the miner or validator instance
8629 """
8630 return self.subtensor.get_current_block()
8631
8632
8633
8634---
8635File: /coding/utils/shell.py
8636---
8637
8638import shlex
8639import subprocess
8640import bittensor as bt
8641from threading import Thread
8642
8643def execute_shell_command(command: str, model_name: str) -> subprocess.Popen:
8644 """
8645 Execute a shell command and stream the output to the caller in real-time.
8646 The subprocess will be terminated after 5 hours.
8647
8648 Args:
8649 command: Shell command as a string (can include \\ line continuations)
8650 Returns:
8651 subprocess.Popen: The process handle for further interaction.
8652 """
8653 # Replace \ newline with space and split using shlex
8654 command = command.replace("\\\n", " ").replace("\\", " ")
8655 parts = shlex.split(command) # Handles quoted strings correct
8656
8657 try:
8658 # Run the process
8659 process = subprocess.Popen(
8660 parts, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
8661 )
8662
8663 def stream_output(stream, stream_name):
8664 for line in iter(stream.readline, ''):
8665 line = line.rstrip('\n')
8666 if stream_name == "STDERR":
8667 # only print lines that relate to the model or loading status
8668 if model_name in line or "shard" in line:
8669 redacted_line = line.replace(model_name, "[REDACTED]")
8670 bt.logging.debug(f"{stream_name}: {redacted_line}")
8671
8672 # Uncomment this if you want STDOUT logging as well:
8673 # else:
8674 # print(f"{stream_name}: {line}")
8675
8676 stream.close()
8677
8678 # Stream both stdout and stderr
8679 Thread(target=stream_output, args=(process.stdout, "STDOUT")).start()
8680 Thread(target=stream_output, args=(process.stderr, "STDERR")).start()
8681
8682 # Start a timer thread to kill the process after 5 hours
8683 def kill_after_timeout():
8684 import time
8685 time.sleep(5 * 60 * 60) # Sleep for 5 hours
8686 if process.poll() is None: # If process is still running
8687 process.terminate()
8688 bt.logging.debug(f"Process terminated after 5 hour timeout")
8689
8690 Thread(target=kill_after_timeout, daemon=True).start()
8691
8692 return process
8693 except Exception as e:
8694 print(f"Error executing command: {command}. Exception: {e}")
8695 raise
8696
8697
8698---
8699File: /coding/utils/uids.py
8700---
8701
8702# The MIT License (MIT)
8703# Copyright © 2024 Yuma Rao
8704# Copyright © 2023 Opentensor Foundation
8705# Copyright © 2024 Macrocosmos
8706# Copyright © 2024 Brokespace
8707
8708
8709# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8710# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8711# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8712# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8713
8714# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8715# the Software.
8716
8717# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8718# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8719# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8720# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8721# DEALINGS IN THE SOFTWARE.
8722
8723import random
8724import numpy as np
8725import bittensor as bt
8726from typing import List
8727
8728
8729def check_uid_availability(
8730 metagraph: "bt.metagraph.Metagraph",
8731 uid: int,
8732 vpermit_tao_limit: int,
8733 coldkeys: set = None,
8734 ips: set = None,
8735) -> bool:
8736 """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake
8737 Args:
8738 metagraph (:obj: bt.metagraph.Metagraph): Metagraph object
8739 uid (int): uid to be checked
8740 vpermit_tao_limit (int): Validator permit tao limit
8741 coldkeys (set): Set of coldkeys to exclude
8742 ips (set): Set of ips to exclude
8743 Returns:
8744 bool: True if uid is available, False otherwise
8745 """
8746 # Filter non serving axons.
8747 if not metagraph.axons[uid].is_serving:
8748 # bt.logging.debug(f"uid: {uid} is not serving")
8749 return False
8750
8751 # Filter validator permit > 1024 stake.
8752 if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit:
8753 bt.logging.debug(
8754 f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}"
8755 )
8756 return False
8757
8758 if coldkeys and metagraph.axons[uid].coldkey in coldkeys:
8759 return False
8760
8761 if ips and metagraph.axons[uid].ip in ips:
8762 return False
8763
8764 # Available otherwise.
8765 return True
8766
8767def get_random_uids(
8768 self, k: int, exclude: List[int] = None
8769) -> np.ndarray:
8770 """Returns k available random uids from the metagraph.
8771 Args:
8772 k (int): Number of uids to return.
8773 exclude (List[int]): List of uids to exclude from the random sampling.
8774 Returns:
8775 uids (np.ndarray): Randomly sampled available uids.
8776 Notes:
8777 If `k` is larger than the number of available `uids`, set `k` to the number of available `uids`.
8778 """
8779 candidate_uids = []
8780 avail_uids = []
8781
8782 for uid in range(self.metagraph.n.item()):
8783 uid_is_available = check_uid_availability(
8784 self.metagraph, uid, self.config.neuron.vpermit_tao_limit
8785 )
8786 uid_is_not_excluded = exclude is None or uid not in exclude
8787
8788 if uid_is_available:
8789 avail_uids.append(uid)
8790 if uid_is_not_excluded:
8791 candidate_uids.append(uid)
8792 # If k is larger than the number of available uids, set k to the number of available uids.
8793 k = min(k, len(avail_uids))
8794 # Check if candidate_uids contain enough for querying, if not grab all avaliable uids
8795 available_uids = candidate_uids
8796 if len(candidate_uids) < k:
8797 available_uids += random.sample(
8798 [uid for uid in avail_uids if uid not in candidate_uids],
8799 k - len(candidate_uids),
8800 )
8801 uids = np.array(random.sample(available_uids, k))
8802 return uids
8803
8804def get_miner_hotkeys(self) -> List[str]:
8805 hotkeys = []
8806 for uid in range(self.metagraph.n.item()):
8807 if check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit):
8808 hotkeys.append(self.metagraph.axons[uid].hotkey)
8809 return hotkeys
8810
8811def get_uid_from_hotkey(self, hotkey: str) -> int:
8812 for uid in range(self.metagraph.n.item()):
8813 if self.metagraph.axons[uid].hotkey == hotkey:
8814 return uid
8815 return None
8816
8817def get_hotkey_from_uid(self, uid: int) -> str:
8818 return self.metagraph.axons[uid].hotkey
8819
8820def get_miner_uids(self) -> List[int]:
8821 return [uid for uid in range(self.metagraph.n.item()) if check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit)]
8822
8823
8824
8825---
8826File: /coding/validator/__init__.py
8827---
8828
8829from .forward import forward
8830from .reward import reward
8831
8832
8833
8834---
8835File: /coding/validator/forward.py
8836---
8837
8838from time import sleep
8839import bittensor as bt
8840from datetime import datetime, timezone, timedelta
8841
8842from coding.utils.logging import log_event
8843from coding.finetune import FinetunePipeline
8844from coding.protocol import StreamCodeSynapse
8845from coding.rewards.codesim import CodeSimModel
8846from coding.constants import COMPETITION_END_DATE, COMPETITION_ID
8847
8848
8849
8850async def forward(self, synapse: StreamCodeSynapse):
8851 """
8852 The forward function is called by the validator every time step.
8853
8854 It is responsible for querying the network and scoring the responses.
8855
8856 Args:
8857 self (:obj:`bittensor.neuron.Neuron`): The neuron object which contains all the necessary state for the validator.
8858
8859 """
8860 bt.logging.info("🚀 Starting forward loop...")
8861 if not FinetunePipeline.tasks_exist(self.config) and COMPETITION_ID not in self.finetune_results:
8862 FinetunePipeline.generate_tasks(self.config)
8863
8864 eastern = timezone(timedelta(hours=-5)) # EST is UTC-5
8865 end_time = datetime.strptime(COMPETITION_END_DATE, "%Y-%m-%d").replace(hour=18, tzinfo=eastern)
8866 if datetime.now(eastern) > end_time:
8867 if COMPETITION_ID not in self.finetune_results and not hasattr(self, 'finetune_eval_future'):
8868 finetune_pipeline = FinetunePipeline(
8869 config=self.config,
8870 )
8871 self.finetune_eval_future = self.executor.submit(finetune_pipeline.evaluate)
8872 # Check if evaluation is complete
8873 if hasattr(self, 'finetune_eval_future') and self.finetune_eval_future.done():
8874 self.finetune_results[COMPETITION_ID] = self.finetune_eval_future.result()
8875 delattr(self, 'finetune_eval_future') # Remove the future after getting results
8876
8877 self.update_scores()
8878
8879 log_event(
8880 self,
8881 {
8882 "step": self.step,
8883 **(self.finetune_results[COMPETITION_ID].__state_dict__() if COMPETITION_ID in self.finetune_results else {}),
8884 },
8885 )
8886 sleep(30)
8887
8888
8889
8890---
8891File: /coding/validator/reward.py
8892---
8893
8894# The MIT License (MIT)
8895# Copyright © 2024 Yuma Rao
8896# Copyright © 2023 Opentensor Foundation
8897# Copyright © 2024 Macrocosmos
8898# Copyright © 2024 Broke
8899
8900
8901# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8902# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8903# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8904# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8905
8906# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8907# the Software.
8908
8909# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8910# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8911# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8912# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8913# DEALINGS IN THE SOFTWARE.
8914
8915import numpy as np
8916from typing import List
8917
8918
8919def reward(query: int, response: int) -> float:
8920 """
8921 Reward the miner response to the dummy request. This method returns a reward
8922 value for the miner, which is used to update the miner's score.
8923
8924 Returns:
8925 - float: The reward value for the miner.
8926 """
8927
8928 return 1.0 if response == query * 2 else 0
8929
8930
8931def get_rewards(
8932 self,
8933 query: int,
8934 responses: List[float],
8935) -> np.ndarray:
8936 """
8937 Returns an array of rewards for the given query and responses.
8938
8939 Args:
8940 - query (int): The query sent to the miner.
8941 - responses (List[float]): A list of responses from the miner.
8942
8943 Returns:
8944 - np.ndarray: An array of rewards for the given query and responses.
8945 """
8946 # Get all the reward results by iteratively calling your reward() function.
8947 # Cast response to int as the reward function expects an int type for response.
8948
8949 # Remove any None values
8950 responses = [response for response in responses if response is not None]
8951 return np.array(
8952 [reward(query, int(response)) for response in responses]
8953 )
8954
8955
8956
8957---
8958File: /coding/__init__.py
8959---
8960
8961__version__ = "0.1.3"
8962version_split = __version__.split(".")
8963__spec_version__ = (
8964 (1000 * int(version_split[0]))
8965 + (10 * int(version_split[1]))
8966 + (1 * int(version_split[2]))
8967)
8968
8969# Import all submodules.
8970from . import protocol
8971from . import base
8972# from . import validator
8973from . import api
8974
8975
8976
8977---
8978File: /coding/constants.py
8979---
8980
8981COMPETITION_ID = 4
8982
8983COMPETITION_END_DATE = "2025-01-29"
8984
8985ALLOWED_MODULES = [
8986 "langchain_community",
8987 "langchain_openai",
8988 "ast",
8989 "sentence_transformers",
8990 "networkx",
8991 "grep_ast",
8992 "tree_sitter",
8993 "tree_sitter_languages",
8994 "rapidfuzz",
8995 "llama_index",
8996 "pydantic",
8997 "numpy",
8998 "ruamel.yaml",
8999 "json",
9000 "libcst",
9001 "schemas.swe",
9002 "abc",
9003 "coding.finetune.llm.client",
9004 "coding.schemas.swe",
9005 "requests",
9006 "difflib",
9007 "logging",
9008 "time",
9009 "datetime",
9010 "random",
9011 "sklearn",
9012 "argparse",
9013 "uuid",
9014 "pandas",
9015 "numpy",
9016 "tqdm",
9017 "collections",
9018 "platform",
9019 "re",
9020 "traceback",
9021 "typing",
9022 "resource",
9023 "concurrent",
9024 "io",
9025 "tokenize",
9026 "pathlib",
9027 "threading",
9028 "jsonlines",
9029 "tiktoken",
9030 "openai",
9031 "anthropic",
9032 "google",
9033 "langchain_anthropic",
9034 "langchain_google_genai",
9035 "langchain_core",
9036 "langchain_community",
9037]
9038
9039ALLOWED_IMPORTS = {
9040 'os': ['getenv', 'path', 'environ', 'makedirs', 'rm', 'walk', 'sep', 'remove'],
9041}
9042
9043NUM_ALLOWED_CHARACTERS = 1000000
9044
9045
9046
9047---
9048File: /coding/dendrite.py
9049---
9050
9051# The MIT License (MIT)
9052# Copyright © 2024 Yuma Rao
9053# Copyright © 2023 Opentensor Foundation
9054# Copyright © 2024 Macrocosmos
9055# Copyright © 2024 Brokespace
9056
9057
9058# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9059# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9060# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9061# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9062
9063# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9064# the Software.
9065
9066# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9067# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9068# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9069# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9070# DEALINGS IN THE SOFTWARE.
9071
9072import bittensor as bt
9073from typing import List, Any
9074
9075
9076class DendriteResponseEvent:
9077 def __init__(
9078 self, responses: List[bt.Synapse], uids, timeout: float, axons: List[Any]
9079 ):
9080 self.uids = uids
9081 self.completions = []
9082 self.status_messages = []
9083 self.status_codes = []
9084 self.timings = []
9085 self.hotkeys = []
9086 self.axons = axons
9087 for synapse in responses:
9088 self.completions.append(synapse.completion)
9089 self.status_messages.append(synapse.dendrite.status_message)
9090
9091 if len(synapse.completion) == 0 and synapse.dendrite.status_code == 200:
9092 synapse.dendrite.status_code = 204
9093
9094 self.status_codes.append(synapse.dendrite.status_code)
9095
9096 if (synapse.dendrite.process_time) and (
9097 synapse.dendrite.status_code == 200
9098 or synapse.dendrite.status_code == 204
9099 ):
9100 self.timings.append(synapse.dendrite.process_time)
9101 elif synapse.dendrite.status_code == 408:
9102 self.timings.append(timeout)
9103 else:
9104 self.timings.append(0) # situation where miner is not alive
9105
9106 self.completions = [synapse.completion for synapse in responses]
9107 self.timings = [
9108 synapse.dendrite.process_time or timeout for synapse in responses
9109 ]
9110 self.status_messages = [
9111 synapse.dendrite.status_message for synapse in responses
9112 ]
9113 self.status_codes = [synapse.dendrite.status_code for synapse in responses]
9114
9115 self.miner_hotkeys = [axon.hotkey for axon in axons]
9116
9117 def __state_dict__(self):
9118 return {
9119 "uids": self.uids.tolist(),
9120 "completions": self.completions,
9121 "timings": self.timings,
9122 "status_messages": self.status_messages,
9123 "status_codes": self.status_codes,
9124 "miner_hotkeys": self.miner_hotkeys,
9125 }
9126
9127 def __repr__(self):
9128 return f"DendriteResponseEvent(uids={self.uids}, completions={self.completions}, timings={self.timings}, status_messages={self.status_messages}, status_codes={self.status_codes}, miner_hotkeys={self.hotkeys})"
9129
9130
9131
9132---
9133File: /coding/mock.py
9134---
9135
9136# The MIT License (MIT)
9137# Copyright © 2024 Yuma Rao
9138# Copyright © 2023 Opentensor Foundation
9139# Copyright © 2024 Macrocosmos
9140# Copyright © 2024 Brokespace
9141
9142
9143# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9144# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9145# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9146# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9147
9148# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9149# the Software.
9150
9151# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9152# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9153# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9154# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9155# DEALINGS IN THE SOFTWARE.
9156
9157import time
9158
9159import asyncio
9160import random
9161import bittensor as bt
9162
9163from typing import List
9164
9165
9166class MockSubtensor(bt.MockSubtensor):
9167 def __init__(self, netuid, n=16, wallet=None, network="mock"):
9168 super().__init__(network=network)
9169
9170 if not self.subnet_exists(netuid):
9171 self.create_subnet(netuid)
9172
9173 # Register ourself (the validator) as a neuron at uid=0
9174 if wallet is not None:
9175 self.force_register_neuron(
9176 netuid=netuid,
9177 hotkey=wallet.hotkey.ss58_address,
9178 coldkey=wallet.coldkey.ss58_address,
9179 balance=100000,
9180 stake=100000,
9181 )
9182
9183 # Register n mock neurons who will be miners
9184 for i in range(1, n + 1):
9185 self.force_register_neuron(
9186 netuid=netuid,
9187 hotkey=f"miner-hotkey-{i}",
9188 coldkey="mock-coldkey",
9189 balance=100000,
9190 stake=100000,
9191 )
9192
9193
9194class MockMetagraph(bt.metagraph):
9195 def __init__(self, netuid=1, network="mock", subtensor=None):
9196 super().__init__(netuid=netuid, network=network, sync=False)
9197
9198 if subtensor is not None:
9199 self.subtensor = subtensor
9200 self.sync(subtensor=subtensor)
9201
9202 for axon in self.axons:
9203 axon.ip = "127.0.0.0"
9204 axon.port = 8091
9205
9206 bt.logging.info(f"Metagraph: {self}")
9207 bt.logging.info(f"Axons: {self.axons}")
9208
9209
9210class MockDendrite(bt.dendrite):
9211 """
9212 Replaces a real bittensor network request with a mock request that just returns some static response for all axons that are passed and adds some random delay.
9213 """
9214
9215 def __init__(self, wallet):
9216 super().__init__(wallet)
9217
9218 async def forward(
9219 self,
9220 axons: List[bt.axon],
9221 synapse: bt.Synapse = bt.Synapse(),
9222 timeout: float = 12,
9223 deserialize: bool = True,
9224 run_async: bool = True,
9225 streaming: bool = False,
9226 ):
9227 if streaming:
9228 raise NotImplementedError("Streaming not implemented yet.")
9229
9230 async def query_all_axons(streaming: bool):
9231 """Queries all axons for responses."""
9232
9233 async def single_axon_response(i, axon):
9234 """Queries a single axon for a response."""
9235
9236 start_time = time.time()
9237 s = synapse.copy()
9238 # Attach some more required data so it looks real
9239 s = self.preprocess_synapse_for_request(axon, s, timeout)
9240 # We just want to mock the response, so we'll just fill in some data
9241 process_time = random.random()
9242 if process_time < timeout:
9243 s.dendrite.process_time = str(time.time() - start_time)
9244 # Update the status code and status message of the dendrite to match the axon
9245 # TODO (developer): replace with your own expected synapse data
9246 s.dummy_output = s.dummy_input * 2
9247 s.dendrite.status_code = 200
9248 s.dendrite.status_message = "OK"
9249 synapse.dendrite.process_time = str(process_time)
9250 else:
9251 s.dummy_output = 0
9252 s.dendrite.status_code = 408
9253 s.dendrite.status_message = "Timeout"
9254 synapse.dendrite.process_time = str(timeout)
9255
9256 # Return the updated synapse object after deserializing if requested
9257 if deserialize:
9258 return s.deserialize()
9259 else:
9260 return s
9261
9262 return await asyncio.gather(
9263 *(
9264 single_axon_response(i, target_axon)
9265 for i, target_axon in enumerate(axons)
9266 )
9267 )
9268
9269 return await query_all_axons(streaming)
9270
9271 def __str__(self) -> str:
9272 """
9273 Returns a string representation of the Dendrite object.
9274
9275 Returns:
9276 str: The string representation of the Dendrite object in the format "dendrite(<user_wallet_address>)".
9277 """
9278 return "MockDendrite({})".format(self.keypair.ss58_address)
9279
9280
9281
9282---
9283File: /coding/protocol.py
9284---
9285
9286# The MIT License (MIT)
9287# Copyright © 2024 Yuma Rao
9288# Copyright © 2023 Opentensor Foundation
9289# Copyright © 2024 Macrocosmos
9290# Copyright © 2024 Broke
9291
9292
9293# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9294# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9295# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9296# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9297
9298# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9299# the Software.
9300
9301# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9302# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9303# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9304# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9305# DEALINGS IN THE SOFTWARE.
9306
9307import json
9308import pydantic
9309import bittensor as bt
9310
9311from starlette.responses import StreamingResponse
9312from typing import List, AsyncIterator, Any, Optional
9313
9314from coding.schemas import ChatMessage, File
9315from coding.constants import COMPETITION_ID
9316
9317
9318class LogicSynapse(bt.Synapse):
9319 """
9320 LogicSynapse is a Synapse that is used to get the logic of the miner.
9321
9322 Attributes:
9323 logic (dict): A dictionary where the key is a filename and the value is the file contents
9324 """
9325 logic: dict = pydantic.Field(
9326 {},
9327 title="logic",
9328 description="A dictionary where the key is a filename and the value is the file contents",
9329 )
9330
9331class HFModelSynapse(bt.Synapse):
9332 """
9333 HFModelSynapse is a Synapse that is used to get the HF model name that this miner published to HF
9334
9335 Attributes:
9336 model_name (Optional[str]): The HF model name that this miner published to HF
9337 prompt_tokens (Optional[dict]): Dictionary containing FIM prompt tokens:
9338 - "prefix": the prefix of the prompt
9339 - "middle": the middle of the prompt
9340 - "suffix": the suffix of the prompt
9341
9342 """
9343 model_name: Optional[str] = ""
9344 competition_id: Optional[int] = COMPETITION_ID
9345 # prompt_tokens: Optional[dict] = None
9346
9347
9348class StreamCodeSynapse(bt.StreamingSynapse):
9349 """
9350 StreamPromptingSynapse is a specialized implementation of the `StreamingSynapse` tailored for prompting functionalities within
9351 the Bittensor network. This class is intended to interact with a streaming response that contains a sequence of tokens,
9352 which represent prompts or messages in a certain scenario.
9353
9354 As a developer, when using or extending the `StreamPromptingSynapse` class, you should be primarily focused on the structure
9355 and behavior of the prompts you are working with. The class has been designed to seamlessly handle the streaming,
9356 decoding, and accumulation of tokens that represent these prompts.
9357
9358 Attributes:
9359 - `roles` (List[str]): A list of roles involved in the prompting scenario. This could represent different entities
9360 or agents involved in the conversation or use-case. They are immutable to ensure consistent
9361 interaction throughout the lifetime of the object.
9362
9363 - `messages` (List[str]): These represent the actual prompts or messages in the prompting scenario. They are also
9364 immutable to ensure consistent behavior during processing.
9365
9366 - `completion` (str): Stores the processed result of the streaming tokens. As tokens are streamed, decoded, and
9367 processed, they are accumulated in the completion attribute. This represents the "final"
9368 product or result of the streaming process.
9369 - `required_hash_fields` (List[str]): A list of fields that are required for the hash.
9370
9371 Methods:
9372 - `process_streaming_response`: This method asynchronously processes the incoming streaming response by decoding
9373 the tokens and accumulating them in the `completion` attribute.
9374
9375 - `deserialize`: Converts the `completion` attribute into its desired data format, in this case, a string.
9376
9377 - `extract_response_json`: Extracts relevant JSON data from the response, useful for gaining insights on the response's
9378 metadata or for debugging purposes.
9379
9380 Note: While you can directly use the `StreamPromptingSynapse` class, it's designed to be extensible. Thus, you can create
9381 subclasses to further customize behavior for specific prompting scenarios or requirements.
9382 """
9383
9384
9385
9386
9387
9388 query: str = pydantic.Field(
9389 "",
9390 title="query",
9391 description="The query",
9392 )
9393
9394 script: str = pydantic.Field(
9395 "",
9396 title="script",
9397 description="A python script that is being worked with",
9398 )
9399
9400 messages: List[ChatMessage] = pydantic.Field(
9401 [],
9402 title="messages",
9403 description="A list of messages",
9404 )
9405
9406 attachments: List[Any] = pydantic.Field(
9407 [],
9408 title="attachments",
9409 description="Attachments to be sent alongside the query",
9410 )
9411
9412 completion: str = pydantic.Field(
9413 "",
9414 title="Completion",
9415 description="Completion status of the current CodeSynapse object. This attribute is mutable and can be updated.",
9416 )
9417
9418 files: List[File] = pydantic.Field(
9419 [],
9420 title="Files",
9421 description="Files",
9422 )
9423
9424 uid: int = pydantic.Field(
9425 9999,
9426 title="UID",
9427 description="Miner uid to send task to",
9428 )
9429
9430 async def process_streaming_response(
9431 self, response: StreamingResponse
9432 ) -> AsyncIterator[str]:
9433 """
9434 `process_streaming_response` is an asynchronous method designed to process the incoming streaming response from the
9435 Bittensor network. It's the heart of the StreamPromptingSynapse class, ensuring that streaming tokens, which represent
9436 prompts or messages, are decoded and appropriately managed.
9437
9438 As the streaming response is consumed, the tokens are decoded from their 'utf-8' encoded format, split based on
9439 newline characters, and concatenated into the `completion` attribute. This accumulation of decoded tokens in the
9440 `completion` attribute allows for a continuous and coherent accumulation of the streaming content.
9441
9442 Args:
9443 response: The streaming response object containing the content chunks to be processed. Each chunk in this
9444 response is expected to be a set of tokens that can be decoded and split into individual messages or prompts.
9445 """
9446 if self.completion is None:
9447 self.completion = ""
9448
9449 async for chunk in response.content.iter_any():
9450 tokens = chunk.decode("utf-8")
9451
9452 try:
9453 data = json.loads(tokens)
9454 if isinstance(data, dict) or isinstance(data, list):
9455 # Process the dictionary data as needed
9456 self.completion = self.completion + json.dumps(data)
9457 yield json.dumps(data)
9458 else:
9459 self.completion = self.completion + tokens
9460 yield tokens
9461 except json.JSONDecodeError:
9462 self.completion = self.completion + tokens
9463 yield tokens
9464 # if self.completion is None: #TODO remove this once confirm that above works
9465 # self.completion = ""
9466
9467 # async for chunk in response.content.iter_any():
9468 # tokens = chunk.decode("utf-8")
9469
9470 # self.completion = self.completion + "".join([t for t in tokens if t])
9471 # yield tokens
9472
9473 def deserialize(self) -> str:
9474 """
9475 Deserializes the response by returning the completion attribute.
9476
9477 Returns:
9478 str: The completion result.
9479 """
9480 return self.completion
9481
9482 def extract_response_json(self, response: StreamingResponse) -> dict:
9483 """
9484 `extract_response_json` is a method that performs the crucial task of extracting pertinent JSON data from the given
9485 response. The method is especially useful when you need a detailed insight into the streaming response's metadata
9486 or when debugging response-related issues.
9487
9488 Beyond just extracting the JSON data, the method also processes and structures the data for easier consumption
9489 and understanding. For instance, it extracts specific headers related to dendrite and axon, offering insights
9490 about the Bittensor network's internal processes. The method ultimately returns a dictionary with a structured
9491 view of the extracted data.
9492
9493 Args:
9494 response: The response object from which to extract the JSON data. This object typically includes headers and
9495 content which can be used to glean insights about the response.
9496
9497 Returns:
9498 dict: A structured dictionary containing:
9499 - Basic response metadata such as name, timeout, total_size, and header_size.
9500 - Dendrite and Axon related information extracted from headers.
9501 - Roles and Messages pertaining to the current StreamPromptingSynapse instance.
9502 - The accumulated completion.
9503 """
9504 headers = {
9505 k.decode("utf-8"): v.decode("utf-8")
9506 for k, v in response.__dict__["_raw_headers"]
9507 }
9508
9509 def extract_info(prefix):
9510 return {
9511 key.split("_")[-1]: value
9512 for key, value in headers.items()
9513 if key.startswith(prefix)
9514 }
9515
9516 return {
9517 "name": headers.get("name", ""),
9518 "timeout": float(headers.get("timeout", 0)),
9519 "total_size": int(headers.get("total_size", 0)),
9520 "header_size": int(headers.get("header_size", 0)),
9521 "dendrite": extract_info("bt_header_dendrite"),
9522 "axon": extract_info("bt_header_axon"),
9523 "query": self.query,
9524 "attachments": self.attachments,
9525 "completion": self.completion,
9526 }
9527
9528
9529
9530---
9531File: /contrib/CODE_REVIEW_DOCS.md
9532---
9533
9534# Code Review
9535### Conceptual Review
9536
9537A review can be a conceptual review, where the reviewer leaves a comment
9538 * `Concept (N)ACK`, meaning "I do (not) agree with the general goal of this pull
9539 request",
9540 * `Approach (N)ACK`, meaning `Concept ACK`, but "I do (not) agree with the
9541 approach of this change".
9542
9543A `NACK` needs to include a rationale why the change is not worthwhile.
9544NACKs without accompanying reasoning may be disregarded.
9545After conceptual agreement on the change, code review can be provided. A review
9546begins with `ACK BRANCH_COMMIT`, where `BRANCH_COMMIT` is the top of the PR
9547branch, followed by a description of how the reviewer did the review. The
9548following language is used within pull request comments:
9549
9550 - "I have tested the code", involving change-specific manual testing in
9551 addition to running the unit, functional, or fuzz tests, and in case it is
9552 not obvious how the manual testing was done, it should be described;
9553 - "I have not tested the code, but I have reviewed it and it looks
9554 OK, I agree it can be merged";
9555 - A "nit" refers to a trivial, often non-blocking issue.
9556
9557### Code Review
9558Project maintainers reserve the right to weigh the opinions of peer reviewers
9559using common sense judgement and may also weigh based on merit. Reviewers that
9560have demonstrated a deeper commitment and understanding of the project over time
9561or who have clear domain expertise may naturally have more weight, as one would
9562expect in all walks of life.
9563
9564Where a patch set affects consensus-critical code, the bar will be much
9565higher in terms of discussion and peer review requirements, keeping in mind that
9566mistakes could be very costly to the wider community. This includes refactoring
9567of consensus-critical code.
9568
9569Where a patch set proposes to change the Bittensor consensus, it must have been
9570discussed extensively on the discord server and other channels, be accompanied by a widely
9571discussed BIP and have a generally widely perceived technical consensus of being
9572a worthwhile change based on the judgement of the maintainers.
9573
9574### Finding Reviewers
9575
9576As most reviewers are themselves developers with their own projects, the review
9577process can be quite lengthy, and some amount of patience is required. If you find
9578that you've been waiting for a pull request to be given attention for several
9579months, there may be a number of reasons for this, some of which you can do something
9580about:
9581
9582 - It may be because of a feature freeze due to an upcoming release. During this time,
9583 only bug fixes are taken into consideration. If your pull request is a new feature,
9584 it will not be prioritized until after the release. Wait for the release.
9585 - It may be because the changes you are suggesting do not appeal to people. Rather than
9586 nits and critique, which require effort and means they care enough to spend time on your
9587 contribution, thundering silence is a good sign of widespread (mild) dislike of a given change
9588 (because people don't assume *others* won't actually like the proposal). Don't take
9589 that personally, though! Instead, take another critical look at what you are suggesting
9590 and see if it: changes too much, is too broad, doesn't adhere to the
9591 [developer notes](DEVELOPMENT_WORKFLOW.md), is dangerous or insecure, is messily written, etc.
9592 Identify and address any of the issues you find. Then ask e.g. on IRC if someone could give
9593 their opinion on the concept itself.
9594 - It may be because your code is too complex for all but a few people, and those people
9595 may not have realized your pull request even exists. A great way to find people who
9596 are qualified and care about the code you are touching is the
9597 [Git Blame feature](https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/tracking-changes-in-a-file). Simply
9598 look up who last modified the code you are changing and see if you can find
9599 them and give them a nudge. Don't be incessant about the nudging, though.
9600 - Finally, if all else fails, ask on IRC or elsewhere for someone to give your pull request
9601 a look. If you think you've been waiting for an unreasonably long time (say,
9602 more than a month) for no particular reason (a few lines changed, etc.),
9603 this is totally fine. Try to return the favor when someone else is asking
9604 for feedback on their code, and the universe balances out.
9605 - Remember that the best thing you can do while waiting is give review to others!
9606
9607
9608---
9609File: /contrib/CONTRIBUTING.md
9610---
9611
9612# Contributing to Bittensor Subnet Development
9613
9614The following is a set of guidelines for contributing to the Bittensor ecosystem. These are **HIGHLY RECOMMENDED** guidelines, but not hard-and-fast rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
9615
9616## Table Of Contents
96171. [How Can I Contribute?](#how-can-i-contribute)
9618 1. [Communication Channels](#communication-channels)
9619 1. [Code Contribution General Guideline](#code-contribution-general-guidelines)
9620 1. [Pull Request Philosophy](#pull-request-philosophy)
9621 1. [Pull Request Process](#pull-request-process)
9622 1. [Addressing Feedback](#addressing-feedback)
9623 1. [Squashing Commits](#squashing-commits)
9624 1. [Refactoring](#refactoring)
9625 1. [Peer Review](#peer-review)
9626 1. [Suggesting Features](#suggesting-enhancements-and-features)
9627
9628
9629## How Can I Contribute?
9630TODO(developer): Define your desired contribution procedure.
9631
9632## Communication Channels
9633TODO(developer): Place your communication channels here
9634
9635> Please follow the Bittensor Subnet [style guide](./STYLE.md) regardless of your contribution type.
9636
9637Here is a high-level summary:
9638- Code consistency is crucial; adhere to established programming language conventions.
9639- Use `black` to format your Python code; it ensures readability and consistency.
9640- Write concise Git commit messages; summarize changes in ~50 characters.
9641- Follow these six commit rules:
9642 - Atomic Commits: Focus on one task or fix per commit.
9643 - Subject and Body Separation: Use a blank line to separate the subject from the body.
9644 - Subject Line Length: Keep it under 50 characters for readability.
9645 - Imperative Mood: Write subject line as if giving a command or instruction.
9646 - Body Text Width: Wrap text manually at 72 characters.
9647 - Body Content: Explain what changed and why, not how.
9648- Make use of your commit messages to simplify project understanding and maintenance.
9649
9650> For clear examples of each of the commit rules, see the style guide's [rules](./STYLE.md#the-six-rules-of-a-great-commit) section.
9651
9652### Code Contribution General Guidelines
9653
9654> Review the Bittensor Subnet [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before contributing.
9655
9656
9657#### Pull Request Philosophy
9658
9659Patchsets and enhancements should always be focused. A pull request could add a feature, fix a bug, or refactor code, but it should not contain a mixture of these. Please also avoid 'super' pull requests which attempt to do too much, are overly large, or overly complex as this makes review difficult.
9660
9661Specifically, pull requests must adhere to the following criteria:
9662- Contain fewer than 50 files. PRs with more than 50 files will be closed.
9663- If a PR introduces a new feature, it *must* include corresponding tests.
9664- Other PRs (bug fixes, refactoring, etc.) should ideally also have tests, as they provide proof of concept and prevent regression.
9665- Categorize your PR properly by using GitHub labels. This aids in the review process by informing reviewers about the type of change at a glance.
9666- Make sure your code includes adequate comments. These should explain why certain decisions were made and how your changes work.
9667- If your changes are extensive, consider breaking your PR into smaller, related PRs. This makes your contributions easier to understand and review.
9668- Be active in the discussion about your PR. Respond promptly to comments and questions to help reviewers understand your changes and speed up the acceptance process.
9669
9670Generally, all pull requests must:
9671
9672 - Have a clear use case, fix a demonstrable bug or serve the greater good of the project (e.g. refactoring for modularisation).
9673 - Be well peer-reviewed.
9674 - Follow code style guidelines.
9675 - Not break the existing test suite.
9676 - Where bugs are fixed, where possible, there should be unit tests demonstrating the bug and also proving the fix.
9677 - Change relevant comments and documentation when behaviour of code changes.
9678
9679#### Pull Request Process
9680
9681Please follow these steps to have your contribution considered by the maintainers:
9682
9683*Before* creating the PR:
96841. Read the [development workflow](./DEVELOPMENT_WORKFLOW.md) defined for this repository to understand our workflow.
96852. Ensure your PR meets the criteria stated in the 'Pull Request Philosophy' section.
96863. Include relevant tests for any fixed bugs or new features as stated in the [testing guide](./TESTING.md).
96874. Ensure your commit messages are clear and concise. Include the issue number if applicable.
96885. If you have multiple commits, rebase them into a single commit using `git rebase -i`.
96896. Explain what your changes do and why you think they should be merged in the PR description consistent with the [style guide](./STYLE.md).
9690
9691*After* creating the PR:
96921. Verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing after you submit your pull request.
96932. Label your PR using GitHub's labeling feature. The labels help categorize the PR and streamline the review process.
96943. Document your code with comments that provide a clear understanding of your changes. Explain any non-obvious parts of your code or design decisions you've made.
96954. If your PR has extensive changes, consider splitting it into smaller, related PRs. This reduces the cognitive load on the reviewers and speeds up the review process.
9696
9697Please be responsive and participate in the discussion on your PR! This aids in clarifying any confusion or concerns and leads to quicker resolution and merging of your PR.
9698
9699> Note: If your changes are not ready for merge but you want feedback, create a draft pull request.
9700
9701Following these criteria will aid in quicker review and potential merging of your PR.
9702While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted.
9703
9704When you are ready to submit your changes, create a pull request:
9705
9706> **Always** follow the [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before submitting pull requests.
9707
9708After you submit a pull request, it will be reviewed by the maintainers. They may ask you to make changes. Please respond to any comments and push your changes as a new commit.
9709
9710> Note: Be sure to merge the latest from "upstream" before making a pull request:
9711
9712```bash
9713git remote add upstream https://github.com/opentensor/bittensor.git # TODO(developer): replace with your repo URL
9714git fetch upstream
9715git merge upstream/<your-branch-name>
9716git push origin <your-branch-name>
9717```
9718
9719#### Addressing Feedback
9720
9721After submitting your pull request, expect comments and reviews from other contributors. You can add more commits to your pull request by committing them locally and pushing to your fork.
9722
9723You are expected to reply to any review comments before your pull request is merged. You may update the code or reject the feedback if you do not agree with it, but you should express so in a reply. If there is outstanding feedback and you are not actively working on it, your pull request may be closed.
9724
9725#### Squashing Commits
9726
9727If your pull request contains fixup commits (commits that change the same line of code repeatedly) or too fine-grained commits, you may be asked to [squash](https://git-scm.com/docs/git-rebase#_interactive_mode) your commits before it will be reviewed. The basic squashing workflow is shown below.
9728
9729 git checkout your_branch_name
9730 git rebase -i HEAD~n
9731 # n is normally the number of commits in the pull request.
9732 # Set commits (except the one in the first line) from 'pick' to 'squash', save and quit.
9733 # On the next screen, edit/refine commit messages.
9734 # Save and quit.
9735 git push -f # (force push to GitHub)
9736
9737Please update the resulting commit message, if needed. It should read as a coherent message. In most cases, this means not just listing the interim commits.
9738
9739If your change contains a merge commit, the above workflow may not work and you will need to remove the merge commit first. See the next section for details on how to rebase.
9740
9741Please refrain from creating several pull requests for the same change. Use the pull request that is already open (or was created earlier) to amend changes. This preserves the discussion and review that happened earlier for the respective change set.
9742
9743The length of time required for peer review is unpredictable and will vary from pull request to pull request.
9744
9745#### Refactoring
9746
9747Refactoring is a necessary part of any software project's evolution. The following guidelines cover refactoring pull requests for the project.
9748
9749There are three categories of refactoring: code-only moves, code style fixes, and code refactoring. In general, refactoring pull requests should not mix these three kinds of activities in order to make refactoring pull requests easy to review and uncontroversial. In all cases, refactoring PRs must not change the behaviour of code within the pull request (bugs must be preserved as is).
9750
9751Project maintainers aim for a quick turnaround on refactoring pull requests, so where possible keep them short, uncomplex and easy to verify.
9752
9753Pull requests that refactor the code should not be made by new contributors. It requires a certain level of experience to know where the code belongs to and to understand the full ramification (including rebase effort of open pull requests). Trivial pull requests or pull requests that refactor the code with no clear benefits may be immediately closed by the maintainers to reduce unnecessary workload on reviewing.
9754
9755#### Peer Review
9756
9757Anyone may participate in peer review which is expressed by comments in the pull request. Typically reviewers will review the code for obvious errors, as well as test out the patch set and opine on the technical merits of the patch. Project maintainers take into account the peer review when determining if there is consensus to merge a pull request (remember that discussions may have taken place elsewhere, not just on GitHub). The following language is used within pull-request comments:
9758
9759- ACK means "I have tested the code and I agree it should be merged";
9760- NACK means "I disagree this should be merged", and must be accompanied by sound technical justification. NACKs without accompanying reasoning may be disregarded;
9761- utACK means "I have not tested the code, but I have reviewed it and it looks OK, I agree it can be merged";
9762- Concept ACK means "I agree in the general principle of this pull request";
9763- Nit refers to trivial, often non-blocking issues.
9764
9765Reviewers should include the commit(s) they have reviewed in their comments. This can be done by copying the commit SHA1 hash.
9766
9767A pull request that changes consensus-critical code is considerably more involved than a pull request that adds a feature to the wallet, for example. Such patches must be reviewed and thoroughly tested by several reviewers who are knowledgeable about the changed subsystems. Where new features are proposed, it is helpful for reviewers to try out the patch set on a test network and indicate that they have done so in their review. Project maintainers will take this into consideration when merging changes.
9768
9769For a more detailed description of the review process, see the [Code Review Guidelines](CODE_REVIEW_DOCS.md).
9770
9771> **Note:** If you find a **Closed** issue that seems like it is the same thing that you're experiencing, open a new issue and include a link to the original issue in the body of your new one.
9772
9773#### How Do I Submit A (Good) Bug Report?
9774
9775Please track bugs as GitHub issues.
9776
9777Explain the problem and include additional details to help maintainers reproduce the problem:
9778
9779* **Use a clear and descriptive title** for the issue to identify the problem.
9780* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started the application, e.g. which command exactly you used in the terminal, or how you started Bittensor otherwise. When listing steps, **don't just say what you did, but explain how you did it**. For example, if you ran with a set of custom configs, explain if you used a config file or command line arguments.
9781* **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines).
9782* **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior.
9783* **Explain which behavior you expected to see instead and why.**
9784* **Include screenshots and animated GIFs** which show you following the described steps and clearly demonstrate the problem. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux.
9785* **If you're reporting that Bittensor crashed**, include a crash report with a stack trace from the operating system. On macOS, the crash report will be available in `Console.app` under "Diagnostic and usage information" > "User diagnostic reports". Include the crash report in the issue in a [code block](https://help.github.com/articles/markdown-basics/#multiple-lines), a [file attachment](https://help.github.com/articles/file-attachments-on-issues-and-pull-requests/), or put it in a [gist](https://gist.github.com/) and provide link to that gist.
9786* **If the problem is related to performance or memory**, include a CPU profile capture with your report, if you're using a GPU then include a GPU profile capture as well. Look into the [PyTorch Profiler](https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) to look at memory usage of your model.
9787* **If the problem wasn't triggered by a specific action**, describe what you were doing before the problem happened and share more information using the guidelines below.
9788
9789Provide more context by answering these questions:
9790
9791* **Did the problem start happening recently** (e.g. after updating to a new version) or was this always a problem?
9792* If the problem started happening recently, **can you reproduce the problem in an older version of Bittensor?**
9793* **Can you reliably reproduce the issue?** If not, provide details about how often the problem happens and under which conditions it normally happens.
9794
9795Include details about your configuration and environment:
9796
9797* **Which version of Bittensor Subnet are you using?**
9798* **What commit hash are you on?** You can get the exact commit hash by checking `git log` and pasting the full commit hash.
9799* **What's the name and version of the OS you're using**?
9800* **Are you running Bittensor Subnet in a virtual machine?** If so, which VM software are you using and which operating systems and versions are used for the host and the guest?
9801* **Are you running Bittensor Subnet in a dockerized container?** If so, have you made sure that your docker container contains your latest changes and is up to date with Master branch?
9802
9803### Suggesting Enhancements and Features
9804
9805This section guides you through submitting an enhancement suggestion, including completely new features and minor improvements to existing functionality. Following these guidelines helps maintainers and the community understand your suggestion :pencil: and find related suggestions :mag_right:.
9806
9807When you are creating an enhancement suggestion, please [include as many details as possible](#how-do-i-submit-a-good-enhancement-suggestion). Fill in [the template](https://bit.ly/atom-behavior-pr), including the steps that you imagine you would take if the feature you're requesting existed.
9808
9809#### Before Submitting An Enhancement Suggestion
9810
9811* **Check the [debugging guide](./DEBUGGING.md).** for tips — you might discover that the enhancement is already available. Most importantly, check if you're using the latest version of the project first.
9812
9813#### How Submit A (Good) Feature Suggestion
9814
9815* **Use a clear and descriptive title** for the issue to identify the problem.
9816* **Provide a step-by-step description of the suggested enhancement** in as many details as possible.
9817* **Provide specific examples to demonstrate the steps**. Include copy/pasteable snippets which you use in those examples, as [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines).
9818* **Describe the current behavior** and **explain which behavior you expected to see instead** and why.
9819* **Include screenshots and animated GIFs** which help you demonstrate the steps or point out the part of the project which the suggestion is related to. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux.
9820* **Explain why this enhancement would be useful** to most users.
9821* **List some other text editors or applications where this enhancement exists.**
9822* **Specify the name and version of the OS you're using.**
9823
9824Thank you for considering contributing to Bittensor! Any help is greatly appreciated along this journey to incentivize open and permissionless intelligence.
9825
9826
9827
9828---
9829File: /contrib/DEVELOPMENT_WORKFLOW.md
9830---
9831
9832# Bittensor Subnet Development Workflow
9833
9834This is a highly advisable workflow to follow to keep your subtensor project organized and foster ease of contribution.
9835
9836## Table of contents
9837
9838- [Bittensor Subnet Development Workflow](#bittensor-subnet-development-workflow)
9839 - [Main Branches](#main-branches)
9840 - [Development Model](#development-model)
9841 - [Feature Branches](#feature-branches)
9842 - [Release Branches](#release-branches)
9843 - [Hotfix Branches](#hotfix-branches)
9844 - [Git Operations](#git-operations)
9845 - [Creating a Feature Branch](#creating-a-feature-branch)
9846 - [Merging Feature Branch into Staging](#merging-feature-branch-into-staging)
9847 - [Creating a Release Branch](#creating-a-release-branch)
9848 - [Finishing a Release Branch](#finishing-a-release-branch)
9849 - [Creating a Hotfix Branch](#creating-a-hotfix-branch)
9850 - [Finishing a Hotfix Branch](#finishing-a-hotfix-branch)
9851 - [Continuous Integration (CI) and Continuous Deployment (CD)](#continuous-integration-ci-and-continuous-deployment-cd)
9852 - [Versioning and Release Notes](#versioning-and-release-notes)
9853 - [Pending Tasks](#pending-tasks)
9854
9855## Main Branches
9856
9857Bittensor's codebase consists of two main branches: **main** and **staging**.
9858
9859**main**
9860- This is Bittensor's live production branch, which should only be updated by the core development team. This branch is protected, so refrain from pushing or merging into it unless authorized.
9861
9862**staging**
9863- This branch is continuously updated and is where you propose and merge changes. It's essentially Bittensor's active development branch.
9864
9865## Development Model
9866
9867### Feature Branches
9868
9869- Branch off from: `staging`
9870- Merge back into: `staging`
9871- Naming convention: `feature/<ticket>/<descriptive-sentence>`
9872
9873Feature branches are used to develop new features for upcoming or future releases. They exist as long as the feature is in development, but will eventually be merged into `staging` or discarded. Always delete your feature branch after merging to avoid unnecessary clutter.
9874
9875### Release Branches
9876
9877- Branch off from: `staging`
9878- Merge back into: `staging` and then `main`
9879- Naming convention: `release/<version>/<descriptive-message>/<creator's-name>`
9880
9881Release branches support the preparation of a new production release, allowing for minor bug fixes and preparation of metadata (version number, configuration, etc). All new features should be merged into `staging` and wait for the next big release.
9882
9883### Hotfix Branches
9884
9885General workflow:
9886
9887- Branch off from: `main` or `staging`
9888- Merge back into: `staging` then `main`
9889- Naming convention: `hotfix/<version>/<descriptive-message>/<creator's-name>`
9890
9891Hotfix branches are meant for quick fixes in the production environment. When a critical bug in a production version must be resolved immediately, a hotfix branch is created.
9892
9893## Git Operations
9894
9895#### Create a feature branch
9896
98971. Branch from the **staging** branch.
9898 1. Command: `git checkout -b feature/my-feature staging`
9899
9900> Rebase frequently with the updated staging branch so you do not face big conflicts before submitting your pull request. Remember, syncing your changes with other developers could also help you avoid big conflicts.
9901
9902#### Merge feature branch into staging
9903
9904In other words, integrate your changes into a branch that will be tested and prepared for release.
9905
99061. Switch branch to staging: `git checkout staging`
99072. Merging feature branch into staging: `git merge --no-ff feature/my-feature`
99083. Pushing changes to staging: `git push origin staging`
99094. Delete feature branch: `git branch -d feature/my-feature` (alternatively, this can be navigated on the GitHub web UI)
9910
9911This operation is done by Github when merging a PR.
9912
9913So, what you have to keep in mind is:
9914- Open the PR against the `staging` branch.
9915- After merging a PR you should delete your feature branch. This will be strictly enforced.
9916
9917#### Creating a release branch
9918
99191. Create branch from staging: `git checkout -b release/3.4.0/descriptive-message/creator's_name staging`
99202. Updating version with major or minor: `./scripts/update_version.sh major|minor`
99213. Commit file changes with new version: `git commit -a -m "Updated version to 3.4.0"`
9922
9923
9924#### Finishing a Release Branch
9925
9926This involves releasing stable code and generating a new version for bittensor.
9927
99281. Switch branch to main: `git checkout main`
99292. Merge release branch into main: `git merge --no-ff release/3.4.0/optional-descriptive-message`
99303. Tag changeset: `git tag -a v3.4.0 -m "Releasing v3.4.0: some comment about it"`
99314. Push changes to main: `git push origin main`
99325. Push tags to origin: `git push origin --tags`
9933
9934To keep the changes made in the __release__ branch, we need to merge those back into `staging`:
9935
9936- Switch branch to staging: `git checkout staging`.
9937- Merging release branch into staging: `git merge --no-ff release/3.4.0/optional-descriptive-message`
9938
9939This step may well lead to a merge conflict (probably even, since we have changed the version number). If so, fix it and commit.
9940
9941
9942#### Creating a hotfix branch
99431. Create branch from main: `git checkout -b hotfix/3.3.4/descriptive-message/creator's-name main`
99442. Update patch version: `./scripts/update_version.sh patch`
99453. Commit file changes with new version: `git commit -a -m "Updated version to 3.3.4"`
99464. Fix the bug and commit the fix: `git commit -m "Fixed critical production issue X"`
9947
9948#### Finishing a Hotfix Branch
9949
9950Finishing a hotfix branch involves merging the bugfix into both `main` and `staging`.
9951
99521. Switch branch to main: `git checkout main`
99532. Merge hotfix into main: `git merge --no-ff hotfix/3.3.4/optional-descriptive-message`
99543. Tag new version: `git tag -a v3.3.4 -m "Releasing v3.3.4: descriptive comment about the hotfix"`
99554. Push changes to main: `git push origin main`
99565. Push tags to origin: `git push origin --tags`
99576. Switch branch to staging: `git checkout staging`
99587. Merge hotfix into staging: `git merge --no-ff hotfix/3.3.4/descriptive-message/creator's-name`
99598. Push changes to origin/staging: `git push origin staging`
99609. Delete hotfix branch: `git branch -d hotfix/3.3.4/optional-descriptive-message`
9961
9962The one exception to the rule here is that, **when a release branch currently exists, the hotfix changes need to be merged into that release branch, instead of** `staging`. Back-merging the bugfix into the __release__ branch will eventually result in the bugfix being merged into `develop` too, when the release branch is finished. (If work in develop immediately requires this bugfix and cannot wait for the release branch to be finished, you may safely merge the bugfix into develop now already as well.)
9963
9964Finally, we remove the temporary branch:
9965
9966- `git branch -d hotfix/3.3.4/optional-descriptive-message`
9967## Continuous Integration (CI) and Continuous Deployment (CD)
9968
9969Continuous Integration (CI) is a software development practice where members of a team integrate their work frequently. Each integration is verified by an automated build and test process to detect integration errors as quickly as possible.
9970
9971Continuous Deployment (CD) is a software engineering approach in which software functionalities are delivered frequently through automated deployments.
9972
9973- **CircleCI job**: Create jobs in CircleCI to automate the merging of staging into main and release version (needed to release code) and building and testing Bittensor (needed to merge PRs).
9974
9975> It is highly recommended to set up your own circleci pipeline with your subnet
9976
9977## Versioning and Release Notes
9978
9979Semantic versioning helps keep track of the different versions of the software. When code is merged into main, generate a new version.
9980
9981Release notes provide documentation for each version released to the users, highlighting the new features, improvements, and bug fixes. When merged into main, generate GitHub release and release notes.
9982
9983## Pending Tasks
9984
9985Follow these steps when you are contributing to the bittensor subnet:
9986
9987- Determine if main and staging are different
9988- Determine what is in staging that is not merged yet
9989 - Document not released developments
9990 - When merged into staging, generate information about what's merged into staging but not released.
9991 - When merged into main, generate GitHub release and release notes.
9992- CircleCI jobs
9993 - Merge staging into main and release version (needed to release code)
9994 - Build and Test Bittensor (needed to merge PRs)
9995
9996This document can be improved as the Bittensor project continues to develop and change.
9997
9998
9999
10000---
10001File: /contrib/STYLE.md
10002---
10003
10004# Style Guide
10005
10006A project’s long-term success rests (among other things) on its maintainability, and a maintainer has few tools more powerful than his or her project’s log. It’s worth taking the time to learn how to care for one properly. What may be a hassle at first soon becomes habit, and eventually a source of pride and productivity for all involved.
10007
10008Most programming languages have well-established conventions as to what constitutes idiomatic style, i.e. naming, formatting and so on. There are variations on these conventions, of course, but most developers agree that picking one and sticking to it is far better than the chaos that ensues when everybody does their own thing.
10009
10010# Table of Contents
100111. [Code Style](#code-style)
100122. [Naming Conventions](#naming-conventions)
100133. [Git Commit Style](#git-commit-style)
100144. [The Six Rules of a Great Commit](#the-six-rules-of-a-great-commit)
10015 - [1. Atomic Commits](#1-atomic-commits)
10016 - [2. Separate Subject from Body with a Blank Line](#2-separate-subject-from-body-with-a-blank-line)
10017 - [3. Limit the Subject Line to 50 Characters](#3-limit-the-subject-line-to-50-characters)
10018 - [4. Use the Imperative Mood in the Subject Line](#4-use-the-imperative-mood-in-the-subject-line)
10019 - [5. Wrap the Body at 72 Characters](#5-wrap-the-body-at-72-characters)
10020 - [6. Use the Body to Explain What and Why vs. How](#6-use-the-body-to-explain-what-and-why-vs-how)
100215. [Tools Worth Mentioning](#tools-worth-mentioning)
10022 - [Using `--fixup`](#using---fixup)
10023 - [Interactive Rebase](#interactive-rebase)
100246. [Pull Request and Squashing Commits Caveats](#pull-request-and-squashing-commits-caveats)
10025
10026
10027### Code style
10028
10029#### General Style
10030Python's official style guide is PEP 8, which provides conventions for writing code for the main Python distribution. Here are some key points:
10031
10032- `Indentation:` Use 4 spaces per indentation level.
10033
10034- `Line Length:` Limit all lines to a maximum of 79 characters.
10035
10036- `Blank Lines:` Surround top-level function and class definitions with two blank lines. Method definitions inside a class are surrounded by a single blank line.
10037
10038- `Imports:` Imports should usually be on separate lines and should be grouped in the following order:
10039
10040 - Standard library imports.
10041 - Related third party imports.
10042 - Local application/library specific imports.
10043- `Whitespace:` Avoid extraneous whitespace in the following situations:
10044
10045 - Immediately inside parentheses, brackets or braces.
10046 - Immediately before a comma, semicolon, or colon.
10047 - Immediately before the open parenthesis that starts the argument list of a function call.
10048- `Comments:` Comments should be complete sentences and should be used to clarify code and are not a substitute for poorly written code.
10049
10050#### For Python
10051
10052- `List Comprehensions:` Use list comprehensions for concise and readable creation of lists.
10053
10054- `Generators:` Use generators when dealing with large amounts of data to save memory.
10055
10056- `Context Managers:` Use context managers (with statement) for resource management.
10057
10058- `String Formatting:` Use f-strings for formatting strings in Python 3.6 and above.
10059
10060- `Error Handling:` Use exceptions for error handling whenever possible.
10061
10062#### More details
10063
10064Use `black` to format your python code before commiting for consistency across such a large pool of contributors. Black's code [style](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#code-style) ensures consistent and opinionated code formatting. It automatically formats your Python code according to the Black style guide, enhancing code readability and maintainability.
10065
10066Key Features of Black:
10067
10068 Consistency: Black enforces a single, consistent coding style across your project, eliminating style debates and allowing developers to focus on code logic.
10069
10070 Readability: By applying a standard formatting style, Black improves code readability, making it easier to understand and collaborate on projects.
10071
10072 Automation: Black automates the code formatting process, saving time and effort. It eliminates the need for manual formatting and reduces the likelihood of inconsistencies.
10073
10074### Naming Conventions
10075
10076- `Classes:` Class names should normally use the CapWords Convention.
10077- `Functions and Variables:` Function names should be lowercase, with words separated by underscores as necessary to improve readability. Variable names follow the same convention as function names.
10078
10079- `Constants:` Constants are usually defined on a module level and written in all capital letters with underscores separating words.
10080
10081- `Non-public Methods and Instance Variables:` Use a single leading underscore (_). This is a weak "internal use" indicator.
10082
10083- `Strongly "private" methods and variables:` Use a double leading underscore (__). This triggers name mangling in Python.
10084
10085
10086### Git commit style
10087
10088Here’s a model Git commit message when contributing:
10089```
10090Summarize changes in around 50 characters or less
10091
10092More detailed explanatory text, if necessary. Wrap it to about 72
10093characters or so. In some contexts, the first line is treated as the
10094subject of the commit and the rest of the text as the body. The
10095blank line separating the summary from the body is critical (unless
10096you omit the body entirely); various tools like `log`, `shortlog`
10097and `rebase` can get confused if you run the two together.
10098
10099Explain the problem that this commit is solving. Focus on why you
10100are making this change as opposed to how (the code explains that).
10101Are there side effects or other unintuitive consequences of this
10102change? Here's the place to explain them.
10103
10104Further paragraphs come after blank lines.
10105
10106 - Bullet points are okay, too
10107
10108 - Typically a hyphen or asterisk is used for the bullet, preceded
10109 by a single space, with blank lines in between, but conventions
10110 vary here
10111
10112If you use an issue tracker, put references to them at the bottom,
10113like this:
10114
10115Resolves: #123
10116See also: #456, #789
10117```
10118
10119
10120## The six rules of a great commit.
10121
10122#### 1. Atomic Commits
10123An “atomic” change revolves around one task or one fix.
10124
10125Atomic Approach
10126 - Commit each fix or task as a separate change
10127 - Only commit when a block of work is complete
10128 - Commit each layout change separately
10129 - Joint commit for layout file, code behind file, and additional resources
10130
10131Benefits
10132
10133- Easy to roll back without affecting other changes
10134- Easy to make other changes on the fly
10135- Easy to merge features to other branches
10136
10137#### Avoid trivial commit messages
10138
10139Commit messages like "fix", "fix2", or "fix3" don't provide any context or clear understanding of what changes the commit introduces. Here are some examples of good vs. bad commit messages:
10140
10141**Bad Commit Message:**
10142
10143 $ git commit -m "fix"
10144
10145**Good Commit Message:**
10146
10147 $ git commit -m "Fix typo in README file"
10148
10149> **Caveat**: When working with new features, an atomic commit will often consist of multiple files, since a layout file, code behind file, and additional resources may have been added/modified. You don’t want to commit all of these separately, because if you had to roll back the application to a state before the feature was added, it would involve multiple commit entries, and that can get confusing
10150
10151#### 2. Separate subject from body with a blank line
10152
10153Not every commit requires both a subject and a body. Sometimes a single line is fine, especially when the change is so simple that no further context is necessary.
10154
10155For example:
10156
10157 Fix typo in introduction to user guide
10158
10159Nothing more need be said; if the reader wonders what the typo was, she can simply take a look at the change itself, i.e. use git show or git diff or git log -p.
10160
10161If you’re committing something like this at the command line, it’s easy to use the -m option to git commit:
10162
10163 $ git commit -m"Fix typo in introduction to user guide"
10164
10165However, when a commit merits a bit of explanation and context, you need to write a body. For example:
10166
10167 Derezz the master control program
10168
10169 MCP turned out to be evil and had become intent on world domination.
10170 This commit throws Tron's disc into MCP (causing its deresolution)
10171 and turns it back into a chess game.
10172
10173Commit messages with bodies are not so easy to write with the -m option. You’re better off writing the message in a proper text editor. [See Pro Git](https://git-scm.com/book/en/v2/Customizing-Git-Git-Configuration).
10174
10175In any case, the separation of subject from body pays off when browsing the log. Here’s the full log entry:
10176
10177 $ git log
10178 commit 42e769bdf4894310333942ffc5a15151222a87be
10179 Author: Kevin Flynn <[email protected]>
10180 Date: Fri Jan 01 00:00:00 1982 -0200
10181
10182 Derezz the master control program
10183
10184 MCP turned out to be evil and had become intent on world domination.
10185 This commit throws Tron's disc into MCP (causing its deresolution)
10186 and turns it back into a chess game.
10187
10188
10189#### 3. Limit the subject line to 50 characters
1019050 characters is not a hard limit, just a rule of thumb. Keeping subject lines at this length ensures that they are readable, and forces the author to think for a moment about the most concise way to explain what’s going on.
10191
10192GitHub’s UI is fully aware of these conventions. It will warn you if you go past the 50 character limit. Git will truncate any subject line longer than 72 characters with an ellipsis, thus keeping it to 50 is best practice.
10193
10194#### 4. Use the imperative mood in the subject line
10195Imperative mood just means “spoken or written as if giving a command or instruction”. A few examples:
10196
10197 Clean your room
10198 Close the door
10199 Take out the trash
10200
10201Each of the seven rules you’re reading about right now are written in the imperative (“Wrap the body at 72 characters”, etc.).
10202
10203The imperative can sound a little rude; that’s why we don’t often use it. But it’s perfect for Git commit subject lines. One reason for this is that Git itself uses the imperative whenever it creates a commit on your behalf.
10204
10205For example, the default message created when using git merge reads:
10206
10207 Merge branch 'myfeature'
10208
10209And when using git revert:
10210
10211 Revert "Add the thing with the stuff"
10212
10213 This reverts commit cc87791524aedd593cff5a74532befe7ab69ce9d.
10214
10215Or when clicking the “Merge” button on a GitHub pull request:
10216
10217 Merge pull request #123 from someuser/somebranch
10218
10219So when you write your commit messages in the imperative, you’re following Git’s own built-in conventions. For example:
10220
10221 Refactor subsystem X for readability
10222 Update getting started documentation
10223 Remove deprecated methods
10224 Release version 1.0.0
10225
10226Writing this way can be a little awkward at first. We’re more used to speaking in the indicative mood, which is all about reporting facts. That’s why commit messages often end up reading like this:
10227
10228 Fixed bug with Y
10229 Changing behavior of X
10230
10231And sometimes commit messages get written as a description of their contents:
10232
10233 More fixes for broken stuff
10234 Sweet new API methods
10235
10236To remove any confusion, here’s a simple rule to get it right every time.
10237
10238**A properly formed Git commit subject line should always be able to complete the following sentence:**
10239
10240 If applied, this commit will <your subject line here>
10241
10242For example:
10243
10244 If applied, this commit will refactor subsystem X for readability
10245 If applied, this commit will update getting started documentation
10246 If applied, this commit will remove deprecated methods
10247 If applied, this commit will release version 1.0.0
10248 If applied, this commit will merge pull request #123 from user/branch
10249
10250#### 5. Wrap the body at 72 characters
10251Git never wraps text automatically. When you write the body of a commit message, you must mind its right margin, and wrap text manually.
10252
10253The recommendation is to do this at 72 characters, so that Git has plenty of room to indent text while still keeping everything under 80 characters overall.
10254
10255A good text editor can help here. It’s easy to configure Vim, for example, to wrap text at 72 characters when you’re writing a Git commit.
10256
10257#### 6. Use the body to explain what and why vs. how
10258This [commit](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) from Bitcoin Core is a great example of explaining what changed and why:
10259
10260```
10261commit eb0b56b19017ab5c16c745e6da39c53126924ed6
10262Author: Pieter Wuille <[email protected]>
10263Date: Fri Aug 1 22:57:55 2014 +0200
10264
10265 Simplify serialize.h's exception handling
10266
10267 Remove the 'state' and 'exceptmask' from serialize.h's stream
10268 implementations, as well as related methods.
10269
10270 As exceptmask always included 'failbit', and setstate was always
10271 called with bits = failbit, all it did was immediately raise an
10272 exception. Get rid of those variables, and replace the setstate
10273 with direct exception throwing (which also removes some dead
10274 code).
10275
10276 As a result, good() is never reached after a failure (there are
10277 only 2 calls, one of which is in tests), and can just be replaced
10278 by !eof().
10279
10280 fail(), clear(n) and exceptions() are just never called. Delete
10281 them.
10282```
10283
10284Take a look at the [full diff](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) and just think how much time the author is saving fellow and future committers by taking the time to provide this context here and now. If he didn’t, it would probably be lost forever.
10285
10286In most cases, you can leave out details about how a change has been made. Code is generally self-explanatory in this regard (and if the code is so complex that it needs to be explained in prose, that’s what source comments are for). Just focus on making clear the reasons why you made the change in the first place—the way things worked before the change (and what was wrong with that), the way they work now, and why you decided to solve it the way you did.
10287
10288The future maintainer that thanks you may be yourself!
10289
10290
10291
10292#### Tools worth mentioning
10293
10294##### Using `--fixup`
10295
10296If you've made a commit and then realize you've missed something or made a minor mistake, you can use the `--fixup` option.
10297
10298For example, suppose you've made a commit with a hash `9fceb02`. Later, you realize you've left a debug statement in your code. Instead of making a new commit titled "remove debug statement" or "fix", you can do the following:
10299
10300 $ git commit --fixup 9fceb02
10301
10302This will create a new commit to fix the issue, with a message like "fixup! The original commit message".
10303
10304##### Interactive Rebase
10305
10306Interactive rebase, or `rebase -i`, can be used to squash these fixup commits into the original commits they're fixing, which cleans up your commit history. You can use the `autosquash` option to automatically squash any commits marked as "fixup" into their target commits.
10307
10308For example:
10309
10310 $ git rebase -i --autosquash HEAD~5
10311
10312This command starts an interactive rebase for the last 5 commits (`HEAD~5`). Any commits marked as "fixup" will be automatically moved to squash with their target commits.
10313
10314The benefit of using `--fixup` and interactive rebase is that it keeps your commit history clean and readable. It groups fixes with the commits they are related to, rather than having a separate "fix" commit that might not make sense to other developers (or even to you) in the future.
10315
10316
10317---
10318
10319#### Pull Request and Squashing Commits Caveats
10320
10321While atomic commits are great for development and for understanding the changes within the branch, the commit history can get messy when merging to the main branch. To keep a cleaner and more understandable commit history in our main branch, we encourage squashing all the commits of a PR into one when merging.
10322
10323This single commit should provide an overview of the changes that the PR introduced. It should follow the guidelines for atomic commits (an atomic commit is complete, self-contained, and understandable) but on the scale of the entire feature, task, or fix that the PR addresses. This approach combines the benefits of atomic commits during development with a clean commit history in our main branch.
10324
10325Here is how you can squash commits:
10326
10327```bash
10328git rebase -i HEAD~n
10329```
10330
10331where `n` is the number of commits to squash. After running the command, replace `pick` with `squash` for the commits you want to squash into the previous commit. This will combine the commits and allow you to write a new commit message.
10332
10333In this context, an atomic commit message could look like:
10334
10335```
10336Add feature X
10337
10338This commit introduces feature X which does A, B, and C. It adds
10339new files for layout, updates the code behind the file, and introduces
10340new resources. This change is important because it allows users to
10341perform task Y more efficiently.
10342
10343It includes:
10344- Creation of new layout file
10345- Updates in the code-behind file
10346- Addition of new resources
10347
10348Resolves: #123
10349```
10350
10351In your PRs, remember to detail what the PR is introducing or fixing. This will be helpful for reviewers to understand the context and the reason behind the changes.
10352
10353
10354
10355---
10356File: /docs/miners/finetuning.md
10357---
10358
10359# SWE Finetuning
10360
10361## Task Outline
10362
10363The task is to create a patch that fixes an issue in the repository. You will be provided the location to a repository and a description of the issue. This will be a real git repository as well as a real issue and you will be graded against the real patch.
10364
10365### What is a patch?
10366
10367A patch is a list of edits to the repository. Each edit is an edit of a file, containing the file name, line number, line content, and new line content. As defined in the `Patch` class below.
10368
10369```python
10370class Edit(BaseModel):
10371 file_name: str
10372 line_number: int
10373 line_content: str
10374 new_line_content: str
10375
10376class Patch(BaseModel):
10377 edits: list[Edit]
10378```
10379
10380## Things available to you
10381
10382### Packages
10383
10384You will have access to the modules in the `coding/constants.py` file in the `ALLOWED_MODULES` list. Along with specific imports from certain packages defined in the `coding/constants.py` file, in the `ALLOWED_IMPORTS` dictionary.
10385
10386### Size Limits
10387
10388You will have access to the `NUM_ALLOWED_CHARACTERS` variable in the `coding/constants.py` file. This is the maximum number of characters that can be used in your submission.
10389
10390### LLM Models
10391
10392You will have access to the following LLM models:
10393
10394- "gpt-4o"
10395- "gpt-3.5-turbo"
10396- "gpt-4o-mini"
10397- "claude-3-5-sonnet"
10398- "gemini-2.0-flash-exp"
10399
10400You will also have access to the following embedding models:
10401
10402- "text-embedding-3-small"
10403
10404#### How to use the models
10405
10406You can use the models by calling the `llm` property of the `SWEBase` class. For example:
10407
10408```python
10409from coding.finetune.swe-server.swebase import SWEBase
10410
10411swe = SWEBase()
10412response, tokens = swe.llm("gpt-4o", "What is the capital of France?")
10413embeddings = swe.llm.embed("What is the capital of France?")
10414```
10415
10416#### Reminders
10417
10418- The server that hosts your code is restricted to not allow for internet access. You should not try to use it as you will likely fail.
10419
10420## Submission
10421
10422Locate the `coding/miners/swe.py` file. This is where your miner will go to grab your submission.
10423
10424Your submission must initiate a class `SWE` that inherits from `SWEBase`. This will be called with a `repo_location` and `issue_description`.
10425
10426The `SWE` class must return a `Patch` object. This will be used to evaluate your submission.
10427
10428## Testing
10429
10430Use the notebook `notebooks/sample-swe-task.ipynb` to test your submission.
10431
10432You need to verify your logic using the notebook `notebooks/logic-verification.ipynb`.
10433
10434
10435
10436---
10437File: /docs/miners/quickstart.md
10438---
10439
10440# Quickstart to Mining
10441
10442## Installation
10443
10444
10445This repository requires python3.9 or higher. To install it, simply clone this repository and run the [install.sh](./install.sh) script.
10446```bash
10447git clone https://github.com/brokespace/code
10448cd code
10449python -m pip install --use-deprecated=legacy-resolver -r requirements.txt
10450python -m pip install --use-deprecated=legacy-resolver -e .
10451python -m pip uninstall uvloop # b/c it causes issues with threading/loops
10452```
10453
10454
10455## How to Run
10456You can use the following command to run a miner or a validator.
10457
10458```bash
10459python <SCRIPT_PATH>
10460 --netuid 45
10461 --subtensor.network <finney/local/test>
10462 --neuron.device cuda
10463 --wallet.name <your wallet> # Must be created using the bittensor-cli
10464 --wallet.hotkey <your hotkey> # Must be created using the bittensor-cli
10465 --logging.debug # Run in debug mode, alternatively --logging.trace for trace mode
10466 --axon.port # VERY IMPORTANT: set the port to be one of the open TCP ports on your machine
10467```
10468
10469where `SCRIPT_PATH` is either:
104701. neurons/miner.py
104712. neurons/validator.py
10472
10473For ease of use, you can run the scripts as well with PM2. Installation of PM2 is:
10474**On Linux**:
10475```bash
10476sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
10477```
10478
10479Example of running an openai miner:
10480
10481```bash
10482pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid XY --subtensor.network finney --wallet.name coldkey --wallet.hotkey hotkey --neuron.model_id gpt4 --axon.port 8091 --logging.debug --miner.name openai
10483```
10484
10485## Subnet Wallet Registration
10486Register your wallet on the subnet:
10487```
10488btcli s register --subtensor.network finney --netuid 45
10489```
10490
10491Testnet:
10492```
10493btcli s register --subtensor.network test --netuid 171
10494```
10495
10496
10497# Testnet
10498We highly recommend that you run your miners on testnet before deploying on main. This is give you an opportunity to debug your systems, and ensure that you will not lose valuable immunity time. The SN1 testnet is **netuid 171**.
10499
10500In order to run on testnet, you will need to go through the same hotkey registration proceure as on main, but using **testtao**. You will need to ask for some in the community discord if you do not have any.
10501
10502To run:
10503
10504```bash
10505pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid 171 --subtensor.network test --wallet.name test_coldkey --wallet.hotkey test_hotkey --neuron.model_id gpt4 --axon.port 8091 --logging.debug --miner.name openai
10506```
10507
10508
10509# Ramping up
10510
10511
10512## Tasks
10513
10514A list of the provided tasks can be seen [here](./tasks.md). Tasks are scored equally based on speed and similarity to the answer.
10515
10516## Sample Miners
10517
10518There are some sample miners you can use check them out [here](./sample-miners.md).
10519
10520
10521## Helpful Tips
10522
10523It is suggested that you play around with mining on Testnet before going to Mainnet.
10524If issues are encountered with btcli, it is recommended to use btcli v7.1.2 (https://github.com/opentensor/bittensor/commits/release/7.1.2/)
10525
10526
10527
10528
10529---
10530File: /docs/miners/sample-miners.md
10531---
10532
10533# Sample Miners
10534
10535
10536## Qwen Mistral Miner
10537
10538To get started on this miner you are going to want two models, `CodeQwen` and `Mistral`. This setup requires quite a bit of VRAM, I would suggest at a minimum 24gb of VRAM.
10539
10540
10541
10542### Starting LLM's
10543
10544Either use Python or Docker to start the LLMs. If using Docker you will need to get the [cuda container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.13.5/install-guide.html).
10545
10546#### Using Python
10547Create a venv for VLLM, this venv must not be the same one you use to run the miner
10548
10549```bash
10550python -m venv vllm
10551source vllm/bin/activate
10552pip install vllm
10553```
10554
10555Then start the LLM's
10556
10557```bash
10558pm2 start --name "mistral" "vllm serve thesven/Mistral-7B-Instruct-v0.3-GPTQ --max-model-len 4096 --quantization gptq --dtype half --gpu-memory-utilization 0.40 --port 8001"
10559```
10560
10561```bash
10562pm2 start --name "qwen" "vllm serve Qwen/CodeQwen1.5-7B-AWQ --max-model-len 4096 --quantization awq --dtype half --gpu-memory-utilization 0.40 --port 8000"
10563```
10564
10565#### Using Docker
10566
10567The commands below will run VLLM on device=0 (gpu0), be sure to modify that if you want to run on a different gpu
10568
10569```bash
10570sudo docker run -d -p 8000:8000 --gpus device=0 --ipc host --name codeqwen docker.io/vllm/vllm-openai:latest --model Qwen/CodeQwen1.5-7B-AWQ --max-model-len 8096 --dtype half --gpu-memory-utilization 0.4
10571```
10572
10573```bash
10574sudo docker run -d -p 8001:8001 --gpus device=0 --ipc host --name mistral-instruct docker.io/vllm/vllm-openai:latest --model thesven/Mistral-7B-Instruct-v0.3-GPTQ --max-model-len 8912 --dtype half --gpu-memory-utilization 0.40
10575```
10576
10577
10578### Starting the Miner
10579
10580Exit the previous venv for vllm, either creating a new venv or using your default python interpreter.
10581
10582```
10583pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid 45 --subtensor.network finney --wallet.name coldkey --wallet.hotkey hotkey --neuron.model_id Qwen/CodeQwen1.5-7B-AWQ --axon.port 8091 --logging.debug --miner.name qwen_mistral
10584```
10585
10586
10587---
10588File: /docs/miners/tasks.md
10589---
10590
10591# Coding Tasks
10592
10593### 1. Completion
10594
10595The goal of this task is to complete the given code. You will be provided a functions name followed by "<|fim_hole|>" and you must complete the function body.
10596
10597The only protocol being sent is `query`, the expected response is the completed function body.
10598
10599
10600### 2. Debugging
10601
10602This task is under development.
10603
10604### 3. Fill-In-The-Middle (FIM)
10605
10606The goal of this task is to fill in the middle of the given code. You will be provided a portion of code with a chunk missing. The chunk to be filled in is marked with "<|fim_hole|>". You should return the code to be placed in the filled in spot.
10607
10608The only protocol being sent is `query`, the expected response is the code to be placed in the "<|fim_hole|>".
10609
10610
10611### 4. Organic Convo
10612
10613This task is dynamic and will be at random sent using the input from the user using the frontend. You will be sent a conversation from the chat-frontend and are expected to return a good response.
10614
10615You will be provided `messages` and potentially some `files`. You must return with an appropriate response given the messages and files.
10616
10617
10618### 5. Repo
10619
10620In this task you will be sent a `query` containing a majority of the code from a file in a given repo, alongside that you will be given `files` containing the other files in the repo. Your goal is to use the files to complete the missing code in the query file.
10621
10622
10623### 6. Repo File
10624
10625In this task you will be given a `query` containing a summary of what a python file did, and `files` containing some other files that came from the same repo. You are to write the entire python file given the summary and files.
10626
10627### 7. SWE Task
10628
10629In this task you are given `files` and a `query` of the style:
10630
10631```
10632Given the following issue and files, please return a patch file that would fix the issue. An example of what you should return is
10633<patch> diff --git a/example.txt b/example.txt
10634index e69de29..d95f3ad 100644
10635--- a/example.txt
10636+++ b/example.txt
10637@@ -1,3 +1,3 @@
10638-Hello, world!
10639+Hello, universe!
10640
10641 This is a simple text file.
10642-The end.
10643+Goodbye, world! </patch>
10644The following issue is:\n\n
10645
10646<INSERT ISSUE HERE>
10647```
10648
10649You must return a jsonified dictionary where the key is the filename and the value is the patch for that file. It does not have to be perfect as it will be parsed out and specific line numbers will be compared.
10650
10651The above prompt when fed into an LLM alonsigde the files should be parsable and returnable immediately with the following code:
10652
10653```python
10654def parse_diff(diff_string):
10655 lines = diff_string.splitlines()
10656 file_diffs = {}
10657 current_file = None
10658 diff_content = []
10659 is_diff_block = False
10660
10661 for line in lines:
10662 if "diff --git" in line:
10663 if current_file and diff_content:
10664 file_diffs[current_file] = "\n".join(diff_content)
10665 current_file = line.split()[-1]
10666 diff_content = []
10667 is_diff_block = False
10668 elif line.startswith("---") or line.startswith("+++"):
10669 # Ignore these lines, as they indicate the old/new file path
10670 continue
10671 elif line.startswith("@@"):
10672 is_diff_block = True
10673 continue
10674 elif is_diff_block:
10675 diff_content.append(line)
10676
10677 if current_file and diff_content:
10678 file_diffs[current_file] = "\n".join(diff_content)
10679
10680 return file_diffs
10681```
10682
10683
10684---
10685File: /docs/stream_tutorial/client.py
10686---
10687
10688import argparse
10689import asyncio
10690import bittensor as bt
10691
10692from protocol import StreamPrompting
10693
10694"""
10695This has assumed you have:
106961. Registered your miner on the chain (finney/test)
106972. Are serving your miner on an open port (e.g. 12345)
10698
10699Steps:
10700- Instantiate your synapse subclass with the relevant information. E.g. messages, roles, etc.
10701- Instantiate your wallet and a dendrite client
10702- Query the dendrite client with your synapse object
10703- Iterate over the async generator to extract the yielded tokens on the server side
10704"""
10705
10706
10707async def query_synapse(my_uid, wallet_name, hotkey, network, netuid):
10708 syn = StreamPrompting(
10709 roles=["user"],
10710 messages=[
10711 "hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
10712 ],
10713 )
10714
10715 # create a wallet instance with provided wallet name and hotkey
10716 wallet = bt.wallet(name=wallet_name, hotkey=hotkey)
10717
10718 # instantiate the metagraph with provided network and netuid
10719 metagraph = bt.metagraph(
10720 netuid=netuid, network=network, sync=True, lite=False
10721 )
10722
10723 # Grab the axon you're serving
10724 axon = metagraph.axons[my_uid]
10725
10726 # Create a Dendrite instance to handle client-side communication.
10727 dendrite = bt.dendrite(wallet=wallet)
10728
10729 async def main():
10730 responses = await dendrite(
10731 [axon], syn, deserialize=False, streaming=True
10732 )
10733
10734 for resp in responses:
10735 i = 0
10736 async for chunk in resp:
10737 i += 1
10738 if i % 5 == 0:
10739 print()
10740 if isinstance(chunk, list):
10741 print(chunk[0], end="", flush=True)
10742 else:
10743 # last object yielded is the synapse itself with completion filled
10744 synapse = chunk
10745 break
10746
10747 # Run the main function with asyncio
10748 await main()
10749
10750
10751if __name__ == "__main__":
10752 parser = argparse.ArgumentParser(
10753 description="Query a Bittensor synapse with given parameters."
10754 )
10755
10756 # Adding arguments
10757 parser.add_argument(
10758 "--my_uid",
10759 type=int,
10760 required=True,
10761 help="Your unique miner ID on the chain",
10762 )
10763 parser.add_argument(
10764 "--netuid", type=int, required=True, help="Network Unique ID"
10765 )
10766 parser.add_argument(
10767 "--wallet_name", type=str, default="default", help="Name of the wallet"
10768 )
10769 parser.add_argument(
10770 "--hotkey", type=str, default="default", help="Hotkey for the wallet"
10771 )
10772 parser.add_argument(
10773 "--network",
10774 type=str,
10775 default="test",
10776 help='Network type, e.g., "test" or "mainnet"',
10777 )
10778
10779 # Parse arguments
10780 args = parser.parse_args()
10781
10782 # Running the async function with provided arguments
10783 asyncio.run(
10784 query_synapse(
10785 args.my_uid,
10786 args.wallet_name,
10787 args.hotkey,
10788 args.network,
10789 args.netuid,
10790 )
10791 )
10792
10793
10794
10795---
10796File: /docs/stream_tutorial/config.py
10797---
10798
10799import bittensor as bt
10800import argparse
10801import os
10802
10803
10804def check_config(cls, config: "bt.Config"):
10805 bt.axon.check_config(config)
10806 bt.logging.check_config(config)
10807 full_path = os.path.expanduser(
10808 "{}/{}/{}/{}".format(
10809 config.logging.logging_dir,
10810 config.wallet.get("name", bt.defaults.wallet.name),
10811 config.wallet.get("hotkey", bt.defaults.wallet.hotkey),
10812 config.miner.name,
10813 )
10814 )
10815 config.miner.full_path = os.path.expanduser(full_path)
10816 if not os.path.exists(config.miner.full_path):
10817 os.makedirs(config.miner.full_path)
10818
10819
10820def get_config() -> "bt.Config":
10821 parser = argparse.ArgumentParser()
10822 parser.add_argument(
10823 "--axon.port", type=int, default=8098, help="Port to run the axon on."
10824 )
10825 # Subtensor network to connect to
10826 parser.add_argument(
10827 "--subtensor.network",
10828 default="finney",
10829 help="Bittensor network to connect to.",
10830 )
10831 # Chain endpoint to connect to
10832 parser.add_argument(
10833 "--subtensor.chain_endpoint",
10834 default="wss://entrypoint-finney.opentensor.ai:443",
10835 help="Chain endpoint to connect to.",
10836 )
10837 # Adds override arguments for network and netuid.
10838 parser.add_argument(
10839 "--netuid", type=int, default=1, help="The chain subnet uid."
10840 )
10841
10842 parser.add_argument(
10843 "--miner.root",
10844 type=str,
10845 help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ",
10846 default="~/.bittensor/miners/",
10847 )
10848 parser.add_argument(
10849 "--miner.name",
10850 type=str,
10851 help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ",
10852 default="Bittensor Miner",
10853 )
10854
10855 # Run config.
10856 parser.add_argument(
10857 "--miner.blocks_per_epoch",
10858 type=str,
10859 help="Blocks until the miner repulls the metagraph from the chain",
10860 default=100,
10861 )
10862
10863 # Switches.
10864 parser.add_argument(
10865 "--miner.no_serve",
10866 action="store_true",
10867 help="If True, the miner doesnt serve the axon.",
10868 default=False,
10869 )
10870 parser.add_argument(
10871 "--miner.no_start_axon",
10872 action="store_true",
10873 help="If True, the miner doesnt start the axon.",
10874 default=False,
10875 )
10876
10877 # Mocks.
10878 parser.add_argument(
10879 "--miner.mock_subtensor",
10880 action="store_true",
10881 help="If True, the miner will allow non-registered hotkeys to mine.",
10882 default=False,
10883 )
10884
10885 # Adds subtensor specific arguments i.e. --subtensor.chain_endpoint ... --subtensor.network ...
10886 bt.subtensor.add_args(parser)
10887
10888 # Adds logging specific arguments i.e. --logging.debug ..., --logging.trace .. or --logging.logging_dir ...
10889 bt.logging.add_args(parser)
10890
10891 # Adds wallet specific arguments i.e. --wallet.name ..., --wallet.hotkey ./. or --wallet.path ...
10892 bt.wallet.add_args(parser)
10893
10894 # Adds axon specific arguments i.e. --axon.port ...
10895 bt.axon.add_args(parser)
10896
10897 # Activating the parser to read any command-line inputs.
10898 # To print help message, run python3 template/miner.py --help
10899 config = bt.config(parser)
10900
10901 # Logging captures events for diagnosis or understanding miner's behavior.
10902 config.full_path = os.path.expanduser(
10903 "{}/{}/{}/netuid{}/{}".format(
10904 config.logging.logging_dir,
10905 config.wallet.name,
10906 config.wallet.hotkey,
10907 config.netuid,
10908 "miner",
10909 )
10910 )
10911 # Ensure the directory for logging exists, else create one.
10912 if not os.path.exists(config.full_path):
10913 os.makedirs(config.full_path, exist_ok=True)
10914 return config
10915
10916
10917
10918---
10919File: /docs/stream_tutorial/miner.py
10920---
10921
10922import copy
10923import time
10924import asyncio
10925import argparse
10926import threading
10927import traceback
10928from abc import ABC, abstractmethod
10929from functools import partial
10930from starlette.types import Send
10931
10932import bittensor as bt
10933from transformers import GPT2Tokenizer
10934from typing import List, Dict, Tuple, Union, Callable, Awaitable
10935
10936from protocol import StreamPrompting
10937from config import get_config, check_config
10938
10939
10940class StreamMiner(ABC):
10941 def __init__(self, config=None, axon=None, wallet=None, subtensor=None):
10942 # Setup base config from Miner.config() and merge with subclassed config.
10943 base_config = copy.deepcopy(config or get_config())
10944 self.config = self.config()
10945 self.config.merge(base_config)
10946
10947 check_config(StreamMiner, self.config)
10948 bt.logging.info(self.config) # TODO: duplicate print?
10949
10950 self.prompt_cache: Dict[str, Tuple[str, int]] = {}
10951
10952 # Activating Bittensor's logging with the set configurations.
10953 bt.logging.set_config(config=self.config.logging)
10954
10955 # Wallet holds cryptographic information, ensuring secure transactions and communication.
10956 self.wallet = wallet or bt.wallet(config=self.config)
10957 bt.logging.info(f"Wallet {self.wallet}")
10958
10959 # subtensor manages the blockchain connection, facilitating interaction with the Bittensor blockchain.
10960 self.subtensor = subtensor or bt.subtensor(config=self.config)
10961 bt.logging.info(f"Subtensor: {self.subtensor}")
10962 bt.logging.info(
10963 f"Running miner for subnet: {self.config.netuid} on network: {self.subtensor.chain_endpoint} with config:"
10964 )
10965
10966 # metagraph provides the network's current state, holding state about other participants in a subnet.
10967 self.metagraph = self.subtensor.metagraph(self.config.netuid)
10968 bt.logging.info(f"Metagraph: {self.metagraph}")
10969
10970 if self.wallet.hotkey.ss58_address not in self.metagraph.hotkeys:
10971 bt.logging.error(
10972 f"\nYour validator: {self.wallet} if not registered to chain connection: {self.subtensor} \nRun btcli register and try again. "
10973 )
10974 exit()
10975 else:
10976 # Each miner gets a unique identity (UID) in the network for differentiation.
10977 self.my_subnet_uid = self.metagraph.hotkeys.index(
10978 self.wallet.hotkey.ss58_address
10979 )
10980 bt.logging.info(f"Running miner on uid: {self.my_subnet_uid}")
10981
10982 # The axon handles request processing, allowing validators to send this process requests.
10983 self.axon = axon or bt.axon(
10984 wallet=self.wallet, port=self.config.axon.port
10985 )
10986 # Attach determiners which functions are called when servicing a request.
10987 bt.logging.info(f"Attaching forward function to axon.")
10988 print(f"Attaching forward function to axon. {self._prompt}")
10989 self.axon.attach(
10990 forward_fn=self._prompt,
10991 )
10992 bt.logging.info(f"Axon created: {self.axon}")
10993
10994 # Instantiate runners
10995 self.should_exit: bool = False
10996 self.is_running: bool = False
10997 self.thread: threading.Thread = None
10998 self.lock = asyncio.Lock()
10999 self.request_timestamps: Dict = {}
11000
11001 @abstractmethod
11002 def config(self) -> "bt.Config":
11003 ...
11004
11005 @classmethod
11006 @abstractmethod
11007 def add_args(cls, parser: argparse.ArgumentParser):
11008 ...
11009
11010 def _prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11011 """
11012 A wrapper method around the `prompt` method that will be defined by the subclass.
11013
11014 This method acts as an intermediary layer to perform pre-processing before calling the
11015 actual `prompt` method implemented in the subclass. Specifically, it checks whether a
11016 prompt is in cache to avoid reprocessing recent requests. If the prompt is not in the
11017 cache, the subclass `prompt` method is called.
11018
11019 Args:
11020 synapse (StreamPrompting): The incoming request object encapsulating the details of the request.
11021
11022 Returns:
11023 StreamPrompting: The response object to be sent back in reply to the incoming request, essentially
11024 the filled synapse request object.
11025
11026 Raises:
11027 ValueError: If the prompt is found in the cache indicating it was sent recently.
11028
11029 Example:
11030 This method is not meant to be called directly but is invoked internally when a request
11031 is received, and it subsequently calls the `prompt` method of the subclass.
11032 """
11033 return self.prompt(synapse)
11034
11035 @abstractmethod
11036 def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11037 """
11038 Abstract method to handle and respond to incoming requests to the miner.
11039
11040 Subclasses should implement this method to define their custom logic for processing and
11041 responding to requests. This method is designed to be overridden, and its behavior will
11042 be dependent on the specific implementation provided in the subclass.
11043
11044 Args:
11045 synapse (StreamPrompting): The incoming request object encapsulating the details
11046 of the request. This must contain `messages` and `roles` as fields.
11047
11048 Returns:
11049 StreamPrompting: The response object that should be sent back in reply to the
11050 incoming request. This is essentially the filled synapse request object.
11051
11052 Example:
11053 class CustomMiner(Miner):
11054 def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11055 # Custom logic to process and respond to the request.
11056 synapse.completion = "The meaning of life is 42."
11057 return synapse
11058 """
11059 ...
11060
11061 def run(self):
11062 """
11063 Runs the miner logic. This method starts the miner's operations, including
11064 listening for incoming requests and periodically updating the miner's knowledge
11065 of the network graph.
11066 """
11067 if not self.subtensor.is_hotkey_registered(
11068 netuid=self.config.netuid,
11069 hotkey_ss58=self.wallet.hotkey.ss58_address,
11070 ):
11071 bt.logging.error(
11072 f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}"
11073 f"Please register the hotkey using `btcli subnets register` before trying again"
11074 )
11075 exit()
11076
11077 # Serve passes the axon information to the network + netuid we are hosting on.
11078 # This will auto-update if the axon port of external ip have changed.
11079 bt.logging.info(
11080 f"Serving axon {StreamPrompting} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
11081 )
11082 self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
11083
11084 # Start starts the miner's axon, making it active on the network.
11085 bt.logging.info(
11086 f"Starting axon server on port: {self.config.axon.port}"
11087 )
11088 self.axon.start()
11089
11090 # --- Run until should_exit = True.
11091 self.last_epoch_block = self.subtensor.get_current_block()
11092 bt.logging.info(f"Miner starting at block: {self.last_epoch_block}")
11093
11094 # This loop maintains the miner's operations until intentionally stopped.
11095 bt.logging.info(f"Starting main loop")
11096 step = 0
11097 try:
11098 while not self.should_exit:
11099 start_epoch = time.time()
11100
11101 # --- Wait until next epoch.
11102 current_block = self.subtensor.get_current_block()
11103 while (
11104 current_block - self.last_epoch_block
11105 < self.config.miner.blocks_per_epoch
11106 ):
11107 # --- Wait for next bloc.
11108 time.sleep(1)
11109 current_block = self.subtensor.get_current_block()
11110
11111 # --- Check if we should exit.
11112 if self.should_exit:
11113 break
11114
11115 # --- Update the metagraph with the latest network state.
11116 self.last_epoch_block = self.subtensor.get_current_block()
11117
11118 metagraph = self.subtensor.metagraph(
11119 netuid=self.config.netuid,
11120 lite=True,
11121 block=self.last_epoch_block,
11122 )
11123 log = (
11124 f"Step:{step} | "
11125 f"Block:{metagraph.block.item()} | "
11126 f"Stake:{metagraph.S[self.my_subnet_uid]} | "
11127 f"Rank:{metagraph.R[self.my_subnet_uid]} | "
11128 f"Trust:{metagraph.T[self.my_subnet_uid]} | "
11129 f"Consensus:{metagraph.C[self.my_subnet_uid] } | "
11130 f"Incentive:{metagraph.I[self.my_subnet_uid]} | "
11131 f"Emission:{metagraph.E[self.my_subnet_uid]}"
11132 )
11133 bt.logging.info(log)
11134
11135 step += 1
11136
11137 # If someone intentionally stops the miner, it'll safely terminate operations.
11138 except KeyboardInterrupt:
11139 self.axon.stop()
11140 bt.logging.success("Miner killed by keyboard interrupt.")
11141 exit()
11142
11143 # In case of unforeseen errors, the miner will log the error and continue operations.
11144 except Exception as e:
11145 bt.logging.error(traceback.format_exc())
11146
11147 def run_in_background_thread(self):
11148 """
11149 Starts the miner's operations in a separate background thread.
11150 This is useful for non-blocking operations.
11151 """
11152 if not self.is_running:
11153 bt.logging.debug("Starting miner in background thread.")
11154 self.should_exit = False
11155 self.thread = threading.Thread(target=self.run, daemon=True)
11156 self.thread.start()
11157 self.is_running = True
11158 bt.logging.debug("Started")
11159
11160 def stop_run_thread(self):
11161 """
11162 Stops the miner's operations that are running in the background thread.
11163 """
11164 if self.is_running:
11165 bt.logging.debug("Stopping miner in background thread.")
11166 self.should_exit = True
11167 self.thread.join(5)
11168 self.is_running = False
11169 bt.logging.debug("Stopped")
11170
11171 def __enter__(self):
11172 """
11173 Starts the miner's operations in a background thread upon entering the context.
11174 This method facilitates the use of the miner in a 'with' statement.
11175 """
11176 self.run_in_background_thread()
11177
11178 def __exit__(self, exc_type, exc_value, traceback):
11179 """
11180 Stops the miner's background operations upon exiting the context.
11181 This method facilitates the use of the miner in a 'with' statement.
11182
11183 Args:
11184 exc_type: The type of the exception that caused the context to be exited.
11185 None if the context was exited without an exception.
11186 exc_value: The instance of the exception that caused the context to be exited.
11187 None if the context was exited without an exception.
11188 traceback: A traceback object encoding the stack trace.
11189 None if the context was exited without an exception.
11190 """
11191 self.stop_run_thread()
11192
11193
11194class StreamingTemplateMiner(StreamMiner):
11195 def config(self) -> "bt.Config":
11196 """
11197 Returns the configuration object specific to this miner.
11198
11199 Implement and extend this method to provide custom configurations for the miner.
11200 Currently, it sets up a basic configuration parser.
11201
11202 Returns:
11203 bt.Config: A configuration object with the miner's operational parameters.
11204 """
11205 parser = argparse.ArgumentParser(description="Streaming Miner Configs")
11206 self.add_args(parser)
11207 return bt.config(parser)
11208
11209 def add_args(cls, parser: argparse.ArgumentParser):
11210 """
11211 Adds custom arguments to the command line parser.
11212
11213 Developers can introduce additional command-line arguments specific to the miner's
11214 functionality in this method. These arguments can then be used to configure the miner's operation.
11215
11216 Args:
11217 parser (argparse.ArgumentParser):
11218 The command line argument parser to which custom arguments should be added.
11219 """
11220 pass
11221
11222 def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11223 """
11224 Generates a streaming response for the provided synapse.
11225
11226 This function serves as the main entry point for handling streaming prompts. It takes
11227 the incoming synapse which contains messages to be processed and returns a streaming
11228 response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode
11229 the incoming message, and then sends the response back to the client token by token.
11230
11231 Args:
11232 synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed.
11233
11234 Returns:
11235 StreamPrompting: The streaming response object which can be used by other functions to
11236 stream back the response to the client.
11237
11238 Usage:
11239 This function can be extended and customized based on specific requirements of the
11240 miner. Developers can swap out the tokenizer, model, or adjust how streaming responses
11241 are generated to suit their specific applications.
11242 """
11243 bt.logging.trace("HI. PROMPT()")
11244 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11245
11246 # Simulated function to decode token IDs into strings. In a real-world scenario,
11247 # this can be replaced with an actual model inference step.
11248 def model(ids):
11249 return (tokenizer.decode(id) for id in ids)
11250
11251 async def _prompt(text: str, send: Send):
11252 """
11253 Asynchronously processes the input text and sends back tokens as a streaming response.
11254
11255 This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then
11256 uses the simulated model to decode token IDs into strings. It then sends each token
11257 back to the client as a streaming response, with a delay between tokens to simulate
11258 the effect of real-time streaming.
11259
11260 Args:
11261 text (str): The input text message to be processed.
11262 send (Send): An asynchronous function that allows sending back the streaming response.
11263
11264 Usage:
11265 This function can be adjusted based on the streaming requirements, speed of
11266 response, or the model being used. Developers can also introduce more sophisticated
11267 processing steps or modify how tokens are sent back to the client.
11268 """
11269 bt.logging.trace("HI. _PROMPT()")
11270 input_ids = tokenizer(
11271 text, return_tensors="pt"
11272 ).input_ids.squeeze()
11273 buffer = []
11274 bt.logging.debug(f"Input text: {text}")
11275 bt.logging.debug(f"Input ids: {input_ids}")
11276
11277 N = 3 # Number of tokens to send back to the client at a time
11278 for token in model(input_ids):
11279 bt.logging.trace(f"appending token: {token}")
11280 buffer.append(token)
11281 # If buffer has N tokens, send them back to the client.
11282 if len(buffer) == N:
11283 time.sleep(0.1)
11284 joined_buffer = "".join(buffer)
11285 bt.logging.debug(f"sedning tokens: {joined_buffer}")
11286 await send(
11287 {
11288 "type": "http.response.body",
11289 "body": joined_buffer.encode("utf-8"),
11290 "more_body": True,
11291 }
11292 )
11293 bt.logging.debug(f"Streamed tokens: {joined_buffer}")
11294 buffer = [] # Clear the buffer for next batch of tokens
11295
11296 # Send any remaining tokens in the buffer
11297 if buffer:
11298 joined_buffer = "".join(buffer)
11299 await send(
11300 {
11301 "type": "http.response.body",
11302 "body": joined_buffer.encode("utf-8"),
11303 "more_body": False, # No more tokens to send
11304 }
11305 )
11306 bt.logging.trace(f"Streamed tokens: {joined_buffer}")
11307
11308 message = synapse.messages[0]
11309 bt.logging.trace(f"message in _prompt: {message}")
11310 token_streamer = partial(_prompt, message)
11311 bt.logging.trace(f"token streamer: {token_streamer}")
11312 return synapse.create_streaming_response(token_streamer)
11313
11314
11315# This is the main function, which runs the miner.
11316if __name__ == "__main__":
11317 with StreamingTemplateMiner():
11318 while True:
11319 time.sleep(1)
11320
11321
11322
11323---
11324File: /docs/stream_tutorial/protocol.py
11325---
11326
11327import pydantic
11328import bittensor as bt
11329
11330from abc import ABC, abstractmethod
11331from typing import List, Union, Callable, Awaitable
11332from starlette.responses import StreamingResponse
11333
11334
11335class StreamPrompting(bt.StreamingSynapse):
11336 """
11337 StreamPrompting is a specialized implementation of the `StreamingSynapse` tailored for prompting functionalities within
11338 the Bittensor network. This class is intended to interact with a streaming response that contains a sequence of tokens,
11339 which represent prompts or messages in a certain scenario.
11340
11341 As a developer, when using or extending the `StreamPrompting` class, you should be primarily focused on the structure
11342 and behavior of the prompts you are working with. The class has been designed to seamlessly handle the streaming,
11343 decoding, and accumulation of tokens that represent these prompts.
11344
11345 Attributes:
11346 - `roles` (List[str]): A list of roles involved in the prompting scenario. This could represent different entities
11347 or agents involved in the conversation or use-case. They are immutable to ensure consistent
11348 interaction throughout the lifetime of the object.
11349
11350 - `messages` (List[str]): These represent the actual prompts or messages in the prompting scenario. They are also
11351 immutable to ensure consistent behavior during processing.
11352
11353 - `completion` (str): Stores the processed result of the streaming tokens. As tokens are streamed, decoded, and
11354 processed, they are accumulated in the completion attribute. This represents the "final"
11355 product or result of the streaming process.
11356 - `required_hash_fields` (List[str]): A list of fields that are required for the hash.
11357
11358 Methods:
11359 - `process_streaming_response`: This method asynchronously processes the incoming streaming response by decoding
11360 the tokens and accumulating them in the `completion` attribute.
11361
11362 - `deserialize`: Converts the `completion` attribute into its desired data format, in this case, a string.
11363
11364 - `extract_response_json`: Extracts relevant JSON data from the response, useful for gaining insights on the response's
11365 metadata or for debugging purposes.
11366
11367 Note: While you can directly use the `StreamPrompting` class, it's designed to be extensible. Thus, you can create
11368 subclasses to further customize behavior for specific prompting scenarios or requirements.
11369 """
11370
11371 roles: List[str] = pydantic.Field(
11372 ...,
11373 title="Roles",
11374 description="A list of roles in the StreamPrompting scenario. Immuatable.",
11375 allow_mutation=False,
11376 )
11377
11378 messages: List[str] = pydantic.Field(
11379 ...,
11380 title="Messages",
11381 description="A list of messages in the StreamPrompting scenario. Immutable.",
11382 allow_mutation=False,
11383 )
11384
11385 required_hash_fields: List[str] = pydantic.Field(
11386 ["messages"],
11387 title="Required Hash Fields",
11388 description="A list of required fields for the hash.",
11389 allow_mutation=False,
11390 )
11391
11392 completion: str = pydantic.Field(
11393 "",
11394 title="Completion",
11395 description="Completion status of the current StreamPrompting object. This attribute is mutable and can be updated.",
11396 )
11397
11398 async def process_streaming_response(self, response: StreamingResponse):
11399 """
11400 `process_streaming_response` is an asynchronous method designed to process the incoming streaming response from the
11401 Bittensor network. It's the heart of the StreamPrompting class, ensuring that streaming tokens, which represent
11402 prompts or messages, are decoded and appropriately managed.
11403
11404 As the streaming response is consumed, the tokens are decoded from their 'utf-8' encoded format, split based on
11405 newline characters, and concatenated into the `completion` attribute. This accumulation of decoded tokens in the
11406 `completion` attribute allows for a continuous and coherent accumulation of the streaming content.
11407
11408 Args:
11409 response: The streaming response object containing the content chunks to be processed. Each chunk in this
11410 response is expected to be a set of tokens that can be decoded and split into individual messages or prompts.
11411 """
11412 if self.completion is None:
11413 self.completion = ""
11414 bt.logging.debug(
11415 "Processing streaming response (StreamingSynapse base class)."
11416 )
11417 async for chunk in response.content.iter_any():
11418 bt.logging.debug(f"Processing chunk: {chunk}")
11419 tokens = chunk.decode("utf-8").split("\n")
11420 for token in tokens:
11421 bt.logging.debug(f"--processing token: {token}")
11422 if token:
11423 self.completion += token
11424 bt.logging.debug(f"yielding tokens {tokens}")
11425 yield tokens
11426
11427 def deserialize(self) -> str:
11428 """
11429 Deserializes the response by returning the completion attribute.
11430
11431 Returns:
11432 str: The completion result.
11433 """
11434 return self.completion
11435
11436 def extract_response_json(self, response: StreamingResponse) -> dict:
11437 """
11438 `extract_response_json` is a method that performs the crucial task of extracting pertinent JSON data from the given
11439 response. The method is especially useful when you need a detailed insight into the streaming response's metadata
11440 or when debugging response-related issues.
11441
11442 Beyond just extracting the JSON data, the method also processes and structures the data for easier consumption
11443 and understanding. For instance, it extracts specific headers related to dendrite and axon, offering insights
11444 about the Bittensor network's internal processes. The method ultimately returns a dictionary with a structured
11445 view of the extracted data.
11446
11447 Args:
11448 response: The response object from which to extract the JSON data. This object typically includes headers and
11449 content which can be used to glean insights about the response.
11450
11451 Returns:
11452 dict: A structured dictionary containing:
11453 - Basic response metadata such as name, timeout, total_size, and header_size.
11454 - Dendrite and Axon related information extracted from headers.
11455 - Roles and Messages pertaining to the current StreamPrompting instance.
11456 - The accumulated completion.
11457 """
11458 headers = {
11459 k.decode("utf-8"): v.decode("utf-8")
11460 for k, v in response.__dict__["_raw_headers"]
11461 }
11462
11463 def extract_info(prefix):
11464 return {
11465 key.split("_")[-1]: value
11466 for key, value in headers.items()
11467 if key.startswith(prefix)
11468 }
11469
11470 return {
11471 "name": headers.get("name", ""),
11472 "timeout": float(headers.get("timeout", 0)),
11473 "total_size": int(headers.get("total_size", 0)),
11474 "header_size": int(headers.get("header_size", 0)),
11475 "dendrite": extract_info("bt_header_dendrite"),
11476 "axon": extract_info("bt_header_axon"),
11477 "roles": self.roles,
11478 "messages": self.messages,
11479 "completion": self.completion,
11480 }
11481
11482
11483
11484---
11485File: /docs/stream_tutorial/README.md
11486---
11487
11488# Bittensor Streaming Tutorial
11489This document is intented as a developer-friendly walkthrough of integrating streaming into your bittensor application.
11490
11491If you prefer to jump right into a complete stand-alone example, see:
11492- `miner.py`
11493- `protocol.py`
11494- `client.py`
11495
11496Start your miner:
11497```bash
11498python miner.py --netuid 8 --wallet.name default --wallet.hotkey miner --subtensor.network test --axon.port 10000 --logging.trace
11499```
11500
11501Run the client:
11502```bash
11503python client.py --netuid 8 --my_uid 1 --network test
11504```
11505
11506## Overview
11507This tutorial is designed to show you how to use the streaming API to integrate into your application. It will cover the following topics:
11508- writing your streaming protocol (inherits from bittensor.StreamingSynapse)
11509- writing your streaming server (uses your streaming protocol)
11510- writing your streaming client (uses your streaming protocol)
11511
11512### Defining your streaming protocol
11513When designing your protocol, it would be helpful to look at the bittensor.StreamingSynapse for reference. Below is a condensed snippet of the abstract methods that you will need to implement in your subclass.
11514
11515You will need to implement two methods:
11516
11517- `process_streaming_response`
11518- `extract_response_json`
11519
11520These two methods are the core of your streaming protocol. The first method process_streaming_response is called as the response is being streamed from the network. It is responsible for handling the streaming response, such as parsing and accumulating data. The second method extract_response_json is called after the response has been processed and is responsible for retrieving structured data to be post-processed in the dendrite in bittensor core code.
11521
11522```python
11523class StreamingSynapse(bittensor.Synapse, ABC):
11524 ...
11525 class BTStreamingResponse(_StreamingResponse):
11526 ...
11527 @abstractmethod
11528 async def process_streaming_response(self, response: Response):
11529 """
11530 Abstract method that must be implemented by the subclass.
11531 This method should provide logic to handle the streaming response, such as parsing and accumulating data.
11532 It is called as the response is being streamed from the network, and should be implemented to handle the specific
11533 streaming data format and requirements of the subclass.
11534
11535 Args:
11536 response: The response object to be processed, typically containing chunks of data.
11537 """
11538 ...
11539
11540 @abstractmethod
11541 def extract_response_json(self, response: Response) -> dict:
11542 """
11543 Abstract method that must be implemented by the subclass.
11544 This method should provide logic to extract JSON data from the response, including headers and content.
11545 It is called after the response has been processed and is responsible for retrieving structured data
11546 that can be used by the application.
11547
11548 Args:
11549 response: The response object from which to extract JSON data.
11550 """
11551 ...
11552 ...
11553```
11554
11555See the full reference code at the bittensor [repo](https://github.com/opentensor/bittensor/blob/master/bittensor/stream.py).
11556
11557
11558#### Create your protocol
11559Let's walk through how to create a protocol using the bittensor.StreamingSynapse class.
11560```python
11561class MyStreamingSynapse(bt.StreamingSynapse):
11562 # define your expected data fields here as pydantic field objects
11563 # This allows you to control what information is passed along the network
11564 messages: List[str] = pydantic.Field(
11565 ..., # this ellipsis (...) indicates the object is required
11566 title="Messages", # What is the name of this field?
11567 description="A list of messages in the Prompting scenario. Immutable.",
11568 allow_mutation=False, # disallow modification of this field after creation
11569 )
11570 completion: str = pydantic.Field(
11571 "",
11572 title="Completion",
11573 )
11574 # add fields as necessary
11575 ...
11576
11577 # This method controls how your synapse is deserialized from the network
11578 # E.g. you can extract whatever information you want to receive at the final
11579 # yield in the async generator returned by the server, without receiving
11580 # the entire synapse object itself.
11581 # In this example, we just want the completion string at the end.
11582 def deserialize(self) -> str:
11583 return self.completion
11584
11585 # implement your `process_streaming_response` logic to actually yield objects to the streamer
11586 # this effectively defines the async generator that you'll recieve on the client side
11587 async def process_streaming_response(self, response: MyStreamingSynapse):
11588 # this is an example of how you might process a streaming response
11589 # iterate over the response content and yield each line
11590 async for chunk in response.content.iter_any():
11591 tokens = chunk.decode("utf-8").split("\n")
11592 yield tokens
11593
11594 # implement `extract_response_json` to extract the JSON data from the response headers
11595 # this will be dependent on the data you are streaming and how you want to structure it
11596 # it MUST conform to the following format expected by the bittensor dendrite:
11597 """
11598 {
11599 # METADATA AND HEADERS
11600 "name": ...,
11601 "timeout": float(...),
11602 "total_size": int(...),
11603 "header_size": int(...),
11604 "dendrite": ...,
11605 "axon": ...,
11606 # YOUR FIELDS
11607 "messages": self.messages,
11608 ...
11609 }
11610 """
11611 def extract_response_json(self, response: MyStreamingSynapse) -> dict:
11612 # iterate over the response headers and extract the necessary data
11613 headers = {
11614 k.decode("utf-8"): v.decode("utf-8")
11615 for k, v in response.__dict__["_raw_headers"]
11616 }
11617 # helper function to extract data from headers
11618 def extract_info(prefix):
11619 return {
11620 key.split("_")[-1]: value
11621 for key, value in headers.items()
11622 if key.startswith(prefix)
11623 }
11624 # return the extracted data in the expected format
11625 return {
11626 "name": headers.get("name", ""),
11627 "timeout": float(headers.get("timeout", 0)),
11628 "total_size": int(headers.get("total_size", 0)),
11629 "header_size": int(headers.get("header_size", 0)),
11630 "dendrite": extract_info("bt_header_dendrite"), # dendrite info
11631 "axon": extract_info("bt_header_axon"), # axon info
11632 "messages": self.messages, # field object
11633 }
11634```
11635
11636[Here](https://github.com/opentensor/text-prompting/blob/main/prompting/protocol.py#L131) is a full example implementation of a streaming protocol based on the text-prompting network.
11637
11638Please read the docstrings provided, they can be very helpful!
11639
11640### Writing the server
11641Great! Now we have our protocol defined, let's see how to define our server.
11642This will generate the tokens to be streamed in this prompting example.
11643
11644For brevity we will not be building a full miner, but inspecting the central components.
11645```python
11646class MyStreamPromptingMiner(bt.Miner):
11647 ... # any relevant methods you'd need for your miner
11648
11649 # define your server forward here
11650 # NOTE: It is crucial that your typehints are correct and reflect your streaming protocol object
11651 # otherwise the axon will reject adding your route to the server.
11652 def forward(self, synapse: MyStreamingSynapse) -> MyStreamingSynapse:
11653 # Let's use a GPT2 tokenizer for this toy example
11654 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11655
11656 # Simulated function to decode token IDs into strings. In a real-world scenario,
11657 # this can be replaced with an actual model inference step.
11658 def model(ids):
11659 return (tokenizer.decode(id) for id in ids)
11660
11661 # This function is called asynchronously to process the input text and send back tokens
11662 # as a streaming response. It essentially produces the async generator that will be
11663 # consumed by the client with an `async for` loop.
11664 async def _forward(text: str, send: Send):
11665 # `text` may be the input prompt to your model in a real-world scenario.
11666 # let's tokenize them into IDs for the sake of this example.
11667 input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze()
11668
11669 # You may want to buffer your tokens before sending them back to the client.
11670 # this can be useful so we aren't flooding the client with individual tokens
11671 # and allows you more fine-grained control over how much data is sent back
11672 # with each yield.
11673 N = 3 # Number of tokens to send back to the client at a time
11674 buffer = []
11675 # Iterate over the tokens and send the generationed tokens back to the client
11676 # when we have sufficient (N) tokens in the buffer.
11677 for token in model(input_ids):
11678 buffer.append(token) # Add token to buffer
11679
11680 # If buffer has N tokens, send them back to the client.
11681 if len(buffer) == N:
11682 joined_buffer = "".join(buffer)
11683 # Send the tokens back to the client
11684 # This is the core of the streaming response and the format
11685 # is important. The `send` function is provided by the ASGI server
11686 # and is responsible for sending the response back to the client.
11687 # This buffer will be received by the client as a single chunk of
11688 # data, which can then be split into individual tokens!
11689 await send(
11690 {
11691 "type": "http.response.body",
11692 "body": joined_buffer.encode("utf-8"),
11693 "more_body": True,
11694 }
11695 )
11696 buffer = [] # Clear the buffer for next batch of tokens
11697
11698 # Create a streaming response object using the `_forward` function
11699 # It is useful to wrap your _forward function in a partial function
11700 # to pass in the text argument lazily.
11701 token_streamer = partial(_forward, synapse.messages[0])
11702 # Return the streaming response object, which is an instance of the
11703 # `BTStreamingResponse` class.
11704 return synapse.create_streaming_response(token_streamer)
11705```
11706
11707#### Complete Example
11708Here is a full example for reference:
11709> This inherits from the prompting (text-prompting) miner base class.
11710> Take a look at the `prompting/baseminer/miner.py` file [here](https://github.com/opentensor/text-prompting/blob/main/prompting/baseminer/miner.py) for more details.
11711
11712```python
11713class StreamingTemplateMiner(prompting.Miner):
11714 def config(self) -> "bt.Config":
11715 """
11716 Returns the configuration object specific to this miner.
11717
11718 Implement and extend this method to provide custom configurations for the miner.
11719 Currently, it sets up a basic configuration parser.
11720
11721 Returns:
11722 bt.Config: A configuration object with the miner's operational parameters.
11723 """
11724 parser = argparse.ArgumentParser(description="Streaming Miner Configs")
11725 self.add_args(parser)
11726 return bt.config(parser)
11727
11728 def add_args(cls, parser: argparse.ArgumentParser):
11729 """
11730 Adds custom arguments to the command line parser.
11731
11732 Developers can introduce additional command-line arguments specific to the miner's
11733 functionality in this method. These arguments can then be used to configure the miner's operation.
11734
11735 Args:
11736 parser (argparse.ArgumentParser):
11737 The command line argument parser to which custom arguments should be added.
11738 """
11739 pass
11740
11741 def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11742 """
11743 Generates a streaming response for the provided synapse.
11744
11745 This function serves as the main entry point for handling streaming prompts. It takes
11746 the incoming synapse which contains messages to be processed and returns a streaming
11747 response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode
11748 the incoming message, and then sends the response back to the client token by token.
11749
11750 Args:
11751 synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed.
11752
11753 Returns:
11754 StreamPrompting: The streaming response object which can be used by other functions to
11755 stream back the response to the client.
11756
11757 Usage:
11758 This function can be extended and customized based on specific requirements of the
11759 miner. Developers can swap out the tokenizer, model, or adjust how streaming responses
11760 are generated to suit their specific applications.
11761 """
11762 bt.logging.trace("In outer PROMPT()")
11763 tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11764
11765 # Simulated function to decode token IDs into strings. In a real-world scenario,
11766 # this can be replaced with an actual model inference step.
11767 def model(ids):
11768 return (tokenizer.decode(id) for id in ids)
11769
11770 async def _prompt(text: str, send: Send):
11771 """
11772 Asynchronously processes the input text and sends back tokens as a streaming response.
11773
11774 This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then
11775 uses the simulated model to decode token IDs into strings. It then sends each token
11776 back to the client as a streaming response, with a delay between tokens to simulate
11777 the effect of real-time streaming.
11778
11779 Args:
11780 text (str): The input text message to be processed.
11781 send (Send): An asynchronous function that allows sending back the streaming response.
11782
11783 Usage:
11784 This function can be adjusted based on the streaming requirements, speed of
11785 response, or the model being used. Developers can also introduce more sophisticated
11786 processing steps or modify how tokens are sent back to the client.
11787 """
11788 bt.logging.trace("In inner _PROMPT()")
11789 input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze()
11790 buffer = []
11791 bt.logging.debug(f"Input text: {text}")
11792 bt.logging.debug(f"Input ids: {input_ids}")
11793
11794 N = 3 # Number of tokens to send back to the client at a time
11795 for token in model(input_ids):
11796 bt.logging.trace(f"appending token: {token}")
11797 buffer.append(token)
11798 # If buffer has N tokens, send them back to the client.
11799 if len(buffer) == N:
11800 time.sleep(0.1)
11801 joined_buffer = "".join(buffer)
11802 bt.logging.debug(f"sedning tokens: {joined_buffer}")
11803 await send(
11804 {
11805 "type": "http.response.body",
11806 "body": joined_buffer.encode("utf-8"),
11807 "more_body": True,
11808 }
11809 )
11810 bt.logging.debug(f"Streamed tokens: {joined_buffer}")
11811 buffer = [] # Clear the buffer for next batch of tokens
11812
11813 # Send any remaining tokens in the buffer
11814 if buffer:
11815 joined_buffer = "".join(buffer)
11816 await send(
11817 {
11818 "type": "http.response.body",
11819 "body": joined_buffer.encode("utf-8"),
11820 "more_body": False, # No more tokens to send
11821 }
11822 )
11823 bt.logging.trace(f"Streamed tokens: {joined_buffer}")
11824
11825 message = synapse.messages[0]
11826 bt.logging.trace(f"message in _prompt: {message}")
11827 token_streamer = partial(_prompt, message)
11828 bt.logging.trace(f"token streamer: {token_streamer}")
11829 return synapse.create_streaming_response(token_streamer)
11830```
11831
11832### Writing the client
11833Excellent! Now we have defined our server, now we can define our client.
11834
11835This has assumed you have:
118361. Registered your miner on the chain (`finney`/`test`)
118372. Are serving your miner on an open port (e.g. `12345`)
11838
11839Steps:
11840- Instantiate your synapse subclass with the relevant information. E.g. `messages`, `roles`, etc.
11841- Instantiate your wallet and a dendrite client
11842- Query the dendrite client with your synapse object
11843- Iterate over the async generator to extract the yielded tokens on the server side
11844
11845```python
11846
11847# Import bittensor
11848import bittensor as bt
11849
11850# Create your streaming synapse subclass object to house the request body
11851syn = MyStreamingSynapse(
11852 roles=["user"],
11853 messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."]
11854)
11855
11856# Create a wallet instance that must be registered on the network
11857wallet = bt.wallet(name="default", hotkey="default")
11858
11859# Instantiate the metagraph
11860metagraph = bt.metagraph(
11861 netuid=8, network="test", sync=True, lite=False
11862)
11863
11864# Grab the axon you're serving
11865my_uid = 1
11866axon = metagraph.axons[my_uid]
11867
11868# Create a Dendrite instance to handle client-side communication.
11869dendrite = bt.dendrite(wallet=wallet)
11870
11871
11872This is an async function so we can use the `await` keyword when querying the server with the dendrite object.
11873async def main():
11874 # Send a request to the Axon using the Dendrite, passing in a StreamPrompting
11875 # instance with roles and messages. The response is awaited, as the Dendrite
11876 # communicates asynchronously with the Axon. Returns a list of async generator.
11877 responses = await dendrite(
11878 [axon],
11879 syn,
11880 deserialize=False,
11881 streaming=True
11882 )
11883
11884 # Now that we have our responses we want to iterate over the yielded tokens
11885 # iterate over the async generator to extract the yielded tokens on server side
11886 for resp in responses:
11887 i=0
11888 async for chunk in resp:
11889 i += 1
11890 if i % 5 == 0:
11891 print()
11892 if isinstance(chunk, list):
11893 print(chunk[0], end="", flush=True)
11894 else:
11895 # last object yielded is the synapse itself with completion filled
11896 synapse = chunk
11897 break
11898
11899 # The synapse object contains the completion attribute which contains the
11900 # accumulated tokens from the streaming response.
11901
11902if __name__ == "__main__":
11903 # Run the main function with asyncio
11904 asyncio.run(main())
11905
11906```
11907There you have it!
11908
11909### Complete example
11910If you would like to see a complete standalone example that only depends on bittensor>=6.2.0, look below:
11911
11912- client.py
11913- streaming_miner.py
11914-
11915
11916# client.py
11917```python
11918# Import bittensor and the text-prompting packages
11919import bittensor as bt
11920import prompting
11921
11922# Create a StreamPrompting synapse object to house the request body
11923syn = prompting.protocol.StreamPrompting(
11924 roles=["user"],
11925 messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."])
11926syn
11927
11928# create a wallet instance that must be registered on the network
11929wallet = bt.wallet(name="default", hotkey="default")
11930wallet
11931
11932# instantiate the metagraph
11933metagraph = bt.metagraph(
11934 netuid=8, network="test", sync=True, lite=False
11935)
11936metagraph
11937
11938# Grab the axon you're serving
11939axon = metagraph.axons[62]
11940axon
11941
11942# Create a Dendrite instance to handle client-side communication.
11943d = bt.dendrite(wallet=wallet)
11944d
11945
11946
11947async def main():
11948
11949 # Send a request to the Axon using the Dendrite, passing in a StreamPrompting
11950 # instance with roles and messages. The response is awaited, as the Dendrite
11951 # communicates asynchronously with the Axon. Returns a list of async generator.
11952 responses = await d(
11953 [axon],
11954 syn,
11955 deserialize=False,
11956 streaming=True
11957 )
11958 responses
11959
11960 # iterate over the async generator to extract the yielded tokens on server side
11961 for resp in responses:
11962 i=0
11963 async for chunk in resp:
11964 i += 1
11965 if i % 5 == 0:
11966 print()
11967 if isinstance(chunk, list):
11968 print(chunk[0], end="", flush=True)
11969 else:
11970 # last object yielded is the synapse itself with completion filled
11971 synapse = chunk
11972 break
11973
11974if __name__ == "__main__":
11975 import asyncio
11976 asyncio.run(main())
11977```
11978
11979
11980
11981---
11982File: /docs/validators/quickstart.md
11983---
11984
11985# Quickstart
11986
11987
11988## Dependencies
11989
11990You must have the following things:
11991
11992- System with at least 12gb of VRAM
11993- Python >=3.10
11994- OpenAI API key
11995- Anthropic API Key
11996- Google Gemini API Key
11997- Github Token
11998- Wandb account
11999
12000## Getting started
12001
12002
12003## Installation
12004
12005This repository requires python3.11, follow the commands below to install it if you do not already have it.
12006
12007ONLY RUN THE FOLLOWING COMMANDS IF YOU DO NOT HAVE PYTHON INSTALLED
12008```bash
12009sudo add-apt-repository ppa:deadsnakes/ppa
12010sudo apt update
12011sudo apt install python3.11 python3.11-venv
12012```
12013
12014Ensure that your python version is 3.11 before continuing:
12015```bash
12016python3 --version
12017```
12018
12019If the above doesnt return `python3.11` try using the command `python3.11` instead. If the cmd `python3.11` works, use that in place of every python command below.
12020
12021YOU WILL GET SOME ERRORS ABOUT THE PYTHON VERSION, IGNORE THEM.
12022
12023After ensuring you have python run the following commands:
12024```bash
12025git clone https://github.com/brokespace/code
12026cd code
12027python3 -m venv .venv
12028source .venv/bin/activate
12029python3 -m pip install --use-deprecated=legacy-resolver -r requirements.txt
12030python3 -m pip install --use-deprecated=legacy-resolver -e .
12031python3 -m pip uninstall uvloop # b/c it causes issues with threading/loops
12032```
12033
12034
12035#### Setup your dotenv
12036
12037Copy `.env.example` to `.env` - `cp .env.example .env`. Then edit the `.env` file with the github token you get below
12038
12039#### Get a Github Token
12040
12041We require github tokens, to get one follow the instructions [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens), or below.
12042
120431. Go to [Github](http://Github.com)
120442. Open the top right menu and select `Settings`
120453. Go to the bottom left and select `Developer Settings`
120464. Go to either `Tokens (classic)` or `Fine-grained tokens`
120475. Generate a new token and place it in the .env
12048
12049#### Get an OpenAI Key
12050
12051To use OpenAI's services, you need to obtain an API key. Follow the steps below to get your OpenAI API key:
12052
120531. Go to the [OpenAI website](https://www.openai.com/).
120542. Sign up for an account if you don't already have one, or log in if you do.
120553. Navigate to the API section of your account.
120564. Generate a new API key.
120575. Copy the API key and store it in a secure location.
12058
12059Once you have your OpenAI API key, add it to your `.env` file like this:
12060
12061```
12062OPENAI_API_KEY=<your openai api key>
12063```
12064
12065#### Get a Claude API Key
12066
12067Place the api key in the .env file like this:
12068
12069```
12070ANTHROPIC_API_KEY=<your anthropic api key>
12071```
12072
12073#### Get a Gemini API Key
12074
12075Place the api key in the .env file like this:
12076
12077```
12078GOOGLE_API_KEY=<your gemini api key>
12079```
12080
12081
12082#### Setup Docker Server
12083
12084Setup the docker server to host the miner submissions.
12085
12086[Docker Server Quickstart](./swe.md)
12087
12088#### Setup LLM Server
12089
12090Start the server:
12091
12092```bash
12093source .venv/bin/activate
12094cd coding/finetune/llm
12095pm2 start --name llm-server.25000 "gunicorn app:app --workers 5 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:25000 --timeout 800"
12096```
12097
12098Ensure that the port 25000 is open on your machine and accessable from the Docker server.
12099
12100Ensure that ufw is enabled on your machine, after doing so you can restrict the port to only be accessable from the Docker server by running the following commands:
12101
12102```bash
12103sudo ufw allow from <docker-server-ip> to any port 25000
12104sudo ufw deny 25000
12105sudo ufw reload
12106```
12107
12108
12109Test that the port is open by running the following command from the docker server:
12110
12111```bash
12112curl <validator-ip>:25000
12113```
12114
12115The command should return the response: `{"detail":"Not Found"}`. If it does not, then the port is not open or accessable from the Docker server.
12116
12117#### Setup IP Addresses
12118
12119Setup the IP addresses in the .env file like this:
12120
12121```
12122DOCKER_HOST_IP=<docker-server-ip>
12123HOST_IP=<validator-server-ip>
12124DOCKER_HOST=tcp://<docker-server-ip>:2375
12125```
12126
12127#### Setup LLM Auth Key
12128
12129Setup the LLM auth key in the .env file like this:
12130
12131```
12132LLM_AUTH_KEY=<random auth key>
12133```
12134
12135#### Start the validator
12136
12137
12138
12139```bash
12140source .venv/bin/activate
12141python3 scripts/start_validator.py
12142 --netuid 45
12143 --subtensor.network <finney/local/test>
12144 --neuron.device cuda
12145 --wallet.name <your wallet> # Must be created using the bittensor-cli
12146 --wallet.hotkey <your hotkey> # Must be created using the bittensor-cli
12147 --logging.debug # Run in debug mode, alternatively --logging.trace for trace mode
12148 --axon.port # VERY IMPORTANT: set the port to be one of the open TCP ports on your machine
12149 --wandb.on True # default is true but you can disable
12150```
12151
12152
12153
12154
12155
12156---
12157File: /docs/validators/swe.md
12158---
12159
12160# SWE Start
12161
12162
12163## Remote Server Setup
12164
12165You should use a separate server from the one you run the validator on for this. This is to ensure security and avoid any potential issues. I recommend using a digital ocean droplet. A small one is fine, maybe 2-4gb of ram.
12166
12167### Setup Docker
12168
12169Install docker: https://docs.docker.com/engine/install/ubuntu/
12170
12171Next setup https://docs.docker.com/engine/daemon/remote-access/#configuring-remote-access-with-daemonjson with 0.0.0.0:2375 - Do so by running the following commands:
12172
12173```bash
12174sudo systemctl edit docker.service
12175```
12176
12177Add the following to the file at the line where it opens:
12178```bash
12179[Service]
12180ExecStart=
12181ExecStart=/usr/bin/dockerd -H fd:// -H tcp://0.0.0.0:2375
12182```
12183
12184```bash
12185sudo systemctl daemon-reload
12186sudo systemctl restart docker.service
12187```
12188
12189### Get Base Image
12190
12191```bash
12192docker pull brokespace/swe-server:latest
12193```
12194
12195### Configure UFW
12196
12197```bash
12198sudo ufw disable
12199```
12200
12201
12202### IPTables
12203```bash
12204sudo apt-get install iptables-persistent
12205```
12206
12207The order of the rules is important. Run the following commands to setup the rules:
12208
12209
12210Let docker manage the iptables rules update file `/etc/docker/daemon.json` with the following content:
12211```bash
12212{
12213 "iptables": true
12214}
12215```
12216```bash
12217sudo apt install ipset
12218```
12219
12220Create a file in `/etc/cron.monthly/dockerio` with the following content:
12221
12222MAKE SURE YOU SET THE IP OF THE SERVER YOU ARE RUNNING THE VALIDATOR ON IN THE IPTABLES RULES BELOW.
12223
12224```bash
12225#!/bin/bash
12226sudo iptables -F
12227sudo iptables -t nat -F
12228sudo iptables -t mangle -F
12229sudo iptables -t raw -F
12230
12231# Define the IP set name
12232IPSET_NAME="dockerio"
12233
12234# Check if the IP set exists; create it if it doesn't
12235if ! ipset list $IPSET_NAME &>/dev/null; then
12236 sudo ipset create $IPSET_NAME hash:ip
12237fi
12238
12239# Clear existing IPs in the set
12240sudo ipset flush $IPSET_NAME
12241
12242# Resolve required domains and add to ipset
12243for domain in registry-1.docker.io auth.docker.io cdn.docker.io; do
12244 for ip in $(dig +short $domain); do
12245 sudo ipset add $IPSET_NAME $ip
12246 done
12247done
12248
12249# Add iptables rules for the IP set
12250sudo iptables -A OUTPUT -m set --match-set $IPSET_NAME dst -p tcp --dport 443 -j ACCEPT
12251sudo iptables -A OUTPUT -m set --match-set $IPSET_NAME dst -p tcp --dport 80 -j ACCEPT
12252
12253# Restart Docker to apply changes
12254sudo systemctl restart docker
12255
12256sudo iptables -N DOCKER-USER
12257sudo iptables -A DOCKER-USER -p tcp --dport 3000 -j ACCEPT
12258sudo iptables -I DOCKER-USER 1 -p tcp --dport 3000 -j ACCEPT
12259sudo iptables -I DOCKER-USER 1 -p tcp --dport 25000 -j ACCEPT
12260# Allow forwarding from your host interface to the Docker bridge
12261sudo iptables -A FORWARD -p tcp -d 172.17.0.0/16 --dport 3000 -j ACCEPT
12262sudo iptables -A FORWARD -p tcp -s 172.17.0.0/16 --sport 3000 -j ACCEPT
12263sudo iptables -A INPUT -p tcp -s <ip-of-server-you-are-running-the-validator-on> --dport 2375 -j ACCEPT
12264sudo iptables -A OUTPUT -p tcp -s <ip-of-server-you-are-running-the-validator-on> --dport 2375 -j ACCEPT
12265sudo iptables -A INPUT -p tcp --dport 2375 -j DROP
12266sudo iptables -I OUTPUT 1 -p tcp --dport 25000 -j ACCEPT
12267sudo iptables -A INPUT -p tcp --sport 25000 -j ACCEPT
12268sudo iptables -A OUTPUT -p tcp --sport 25000 -j ACCEPT
12269
12270sudo iptables -A OUTPUT -p udp --dport 53 -j ACCEPT
12271sudo iptables -A INPUT -p udp --sport 53 -j ACCEPT
12272
12273sudo iptables -A INPUT -p tcp --sport 443 -j ACCEPT
12274sudo iptables -I OUTPUT 1 -p tcp --dport 3000 -j ACCEPT
12275sudo iptables -A OUTPUT -p tcp --dport 3000 -j ACCEPT
12276sudo iptables -I INPUT 1 -p tcp --dport 3000 -j ACCEPT
12277sudo iptables -A INPUT -p tcp --dport 3000 -j ACCEPT
12278
12279sudo iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
12280sudo iptables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
12281
12282
12283# Allow outgoing SSH traffic (port 22)
12284sudo iptables -A OUTPUT -p tcp --dport 22 -j ACCEPT
12285
12286
12287# sudo iptables -I OUTPUT 1 -p tcp --dport 25000 -j ACCEPT
12288
12289
12290# Allow incoming SSH traffic (port 22)
12291sudo iptables -A INPUT -p tcp --sport 22 -j ACCEPT
12292sudo iptables -A OUTPUT -j DROP
12293sudo iptables -A DOCKER-USER -j DROP
12294sudo iptables -A INPUT -p tcp --dport 2375 -j DROP
12295
12296sudo iptables-save | sudo tee /etc/iptables/rules.v4
12297sudo systemctl restart docker
12298
12299```
12300
12301Ensure the file is executable:
12302```bash
12303sudo chmod +x /etc/cron.monthly/dockerio
12304```
12305
12306Run it now:
12307
12308```bash
12309sudo /etc/cron.monthly/dockerio
12310```
12311
12312
12313
12314## Testing Docker Remote Access
12315
12316From the server you are running the validator on - NOT THE ONE YOU RAN THE ABOVE COMMANDS ON - run the following command:
12317
12318```bash
12319curl <docker-server-ip>:2375
12320```
12321
12322it should return `{"message":"page not found"}`
12323
12324Next to test further run from the validator server:
12325
12326```bash
12327DOCKER_HOST=tcp://<docker-server-ip>:2375 docker run --rm brokespace/swe-server:latest bash -c "sleep 600"
12328```
12329
12330While that command is running you should be able to go onto the docker server and see the container running with the following command:
12331
12332```bash
12333docker ps
12334```
12335
12336
12337
12338
12339---
12340File: /docs/api_deployment.md
12341---
12342
12343# Deploying the API for use
12344
12345Run the following command:
12346
12347```bash
12348python3 -m coding.api.openai --wallet <COLDKEY NAME> --hotkey <HOTKEY NAME> --network <NETWORK RUNNING ON> --netuid <UID OF THE NETWORK>
12349```
12350
12351For example, using the default network and netuid I could start it like so:
12352
12353```bash
12354python3 -m coding.api.openai --wallet coldkey --hotkey hotkey
12355```
12356
12357
12358
12359
12360---
12361File: /docs/FAQ.md
12362---
12363
12364# FAQ
12365
12366
12367## How do I determine how good my miner is?
12368
12369Check wandb - https://wandb.ai/gen42/gen42. Complete the miner-average-score.ipynb notebook in /notebooks.
12370
12371## How do I know if my miner is working?
12372
12373Ensure you can curl it: `curl <miner-ip>:<miner-port>`.
12374
12375Ensure that you are seeing logs like "Received query" in your pm2 logs.
12376
12377Ensure that there is no errors in the logs, warnings are fine.
12378
12379## What are these pydantic errors?
12380
12381Just ignore them.
12382
12383## How can i remove debug logging?
12384
12385Edit `coding/utils/config.py` and remove line 301 `bt.debug()`.
12386
12387## How can i disable trace logging?
12388
12389Edit `coding/utils/config.py` and remove line 300 `bt.trace()`.
12390
12391## How is scoring done?
12392
12393The scoring depends on the task, however primarily it is done in the following route:
12394
123951. Get code from The Stack
123962. Rewrite the code with an LLM to ensure that lookups are not possible
123973. Grab a chunk from that code and erase it
123984. Provide the remaining code to the miner
123995. Compare the chunk to the miner's response using Cosine Similarity with CodeBERT
124006. Return the score
12401
12402
12403
12404---
12405File: /docs/running_on_mainnet.md
12406---
12407
12408# Running Subnet on Mainnet
12409
12410This tutorial shows how to use the bittensor `btcli` to create a subnetwork and connect your incentive mechanism to it.
12411
12412**IMPORTANT:** Before attempting to register on mainnet, we strongly recommend that you:
12413- First run [Running Subnet Locally](running_on_staging.md), and
12414- Then run [Running on the Testnet](running_on_testnet.md).
12415
12416Your incentive mechanisms running on the mainnet are open to anyone. They emit real TAO. Creating these mechanisms incur a `lock_cost` in TAO.
12417
12418**DANGER**
12419- Do not expose your private keys.
12420- Only use your testnet wallet.
12421- Do not reuse the password of your mainnet wallet.
12422- Make sure your incentive mechanism is resistant to abuse.
12423
12424## Prerequisites
12425
12426Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
12427
12428- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
12429
12430After installing `bittensor`, proceed as below:
12431
12432## Steps
12433
12434## 1. Install your subnet template
12435
12436**NOTE: Skip this step if** you already did this during local testing and development.
12437
12438In your project directory:
12439
12440```bash
12441git clone https://github.com/opentensor/bittensor-subnet-template.git
12442```
12443
12444Next, `cd` into `bittensor-subnet-template` repo directory:
12445
12446```bash
12447cd bittensor-subnet-template
12448```
12449
12450Install the Bittensor subnet template package:
12451
12452```bash
12453python -m pip install -e . # Install your subnet template package
12454```
12455
12456## 2. Create wallets
12457
12458Create wallets for subnet owner, subnet validator and for subnet miner.
12459
12460This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner.
12461
12462The owner will create and control the subnet. The owner must have at least 100 TAO before the owner can run next steps.
12463
12464The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
12465
12466**NOTE**: You can also use existing wallets to register. Creating new keys is shown here for reference.
12467
12468Create a coldkey for the owner wallet:
12469
12470```bash
12471btcli wallet new_coldkey --wallet.name owner
12472```
12473
12474Create a coldkey and hotkey for the subnet miner wallet:
12475```bash
12476btcli wallet new_coldkey --wallet.name miner
12477```
12478
12479and
12480
12481```bash
12482btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
12483```
12484
12485Create a coldkey and hotkey for the subnet validator wallet:
12486
12487```bash
12488btcli wallet new_coldkey --wallet.name validator
12489```
12490
12491and
12492
12493```bash
12494btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
12495```
12496
12497## 3. Getting the price of subnet creation
12498
12499Creating subnets on mainnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the Bittensor blockchain.
12500
12501By default you must have at least 100 TAO on your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below code shows how to get the current price of creating a subnet.
12502
12503```bash
12504btcli subnet lock_cost
12505```
12506
12507The above command will show:
12508
12509```bash
12510>> Subnet lock cost: τ100.000000000
12511```
12512
12513## 4. Purchasing a slot
12514
12515Using your TAO balance, you can register your subnet to the mainchain. This will create a new subnet on the mainchain and give you the owner permissions to it. The below command shows how to purchase a slot.
12516
12517**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered.
12518
12519```bash
12520btcli subnet create
12521```
12522
12523Enter the owner wallet name. This gives permissions to the coldkey.
12524
12525```bash
12526>> Enter wallet name (default): owner # Enter your owner wallet name
12527>> Enter password to unlock key: # Enter your wallet password.
12528>> Register subnet? [y/n]: <y/n> # Select yes (y)
12529>> ⠇ 📡 Registering subnet...
12530✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later.
12531```
12532
12533## 5. (Optional) Register keys
12534
12535**NOTE**: While this is not enforced, we recommend subnet owners to run a subnet validator and a subnet miner on the subnet to demonstrate proper use to the community.
12536
12537This step registers your subnet validator and subnet miner keys to the subnet giving them the **first two slots** on the subnet.
12538
12539Register your miner key to the subnet:
12540
12541```bash
12542btcli subnet recycle_register --netuid 45 --subtensor.network finney --wallet.name miner --wallet.hotkey default
12543```
12544
12545Follow the below prompts:
12546
12547```bash
12548>> Enter netuid [45] (45): # Enter netuid 1 to specify the subnet you just created.
12549>> Continue Registration?
12550 hotkey: ...
12551 coldkey: ...
12552 network: finney [y/n]: # Select yes (y)
12553>> ✅ Registered
12554```
12555
12556Next, register your validator key to the subnet:
12557
12558```bash
12559btcli subnet recycle_register --netuid 45 --subtensor.network finney --wallet.name validator --wallet.hotkey default
12560```
12561
12562Follow the below prompts:
12563
12564```bash
12565>> Enter netuid [45] (45): # Enter netuid 1 to specify the subnet you just created.
12566>> Continue Registration?
12567 hotkey: ...
12568 coldkey: ...
12569 network: finney [y/n]: # Select yes (y)
12570>> ✅ Registered
12571```
12572
12573## 6. Check that your keys have been registered
12574
12575Check that your subnet validator key has been registered:
12576
12577```bash
12578btcli wallet overview --wallet.name validator
12579```
12580
12581The output will be similar to the below:
12582
12583```bash
12584Subnet: 1
12585COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
12586miner default 0 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
125871 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
12588 Wallet balance: τ0.0
12589```
12590
12591Check that your subnet miner has been registered:
12592
12593```bash
12594btcli wallet overview --wallet.name miner
12595```
12596
12597The output will be similar to the below:
12598
12599```bash
12600Subnet: 1
12601COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
12602miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
126031 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
12604 Wallet balance: τ0.0
12605```
12606
12607## 7. Run subnet miner and subnet validator
12608
12609Run the subnet miner:
12610
12611```bash
12612python neurons/miner.py --netuid 45 --wallet.name miner --wallet.hotkey default --logging.debug
12613```
12614
12615You will see the below terminal output:
12616
12617```bash
12618>> 2023-08-08 16:58:11.223 | INFO | Running miner for subnet: 45 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ...
12619```
12620
12621Run the subnet validator:
12622
12623```bash
12624python neurons/validator.py --netuid 1 --wallet.name validator --wallet.hotkey default --logging.debug
12625```
12626
12627You will see the below terminal output:
12628
12629```bash
12630>> 2023-08-08 16:58:11.223 | INFO | Running validator for subnet: 45 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ...
12631```
12632
12633## 8. Get emissions flowing
12634
12635Register to the root subnet using the `btcli`:
12636
12637```bash
12638btcli root register
12639```
12640
12641Then set your weights for the subnet:
12642
12643```bash
12644btcli root weights
12645```
12646
12647## 9. Stopping your nodes
12648
12649To stop your nodes, press CTRL + C in the terminal where the nodes are running.
12650
12651---
12652
12653
12654---
12655File: /docs/running_on_staging.md
12656---
12657
12658# Running Subnet Locally
12659
12660This tutorial will guide you through:
12661
12662- Setting up a local blockchain that is not connected to either Bittensor testchain or mainchain
12663- Creating a subnet
12664- Run your incentive mechanism on the subnet.
12665
12666## Local blockchain vs local subtensor node
12667
12668Running a local blockchain is sometimes synonymously referred as running on staging. This is **different** from running a local subtensor node that connects to the Bittensor mainchain.
12669
12670A local subtensor node will connect to the mainchain and sync with the mainchain, giving you your own access point to the mainchain.
12671
12672Running a local blockchain spins up two authority nodes locally, not connected to any other nodes or testchain or mainchain. This tutorial is for running a local blockchain.
12673
12674## Prerequisites
12675
12676Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
12677
12678- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
12679
12680After installing `bittensor`, proceed as below:
12681
12682## 1. Install Substrate dependencies
12683
12684Begin by installing the required dependencies for running a Substrate node.
12685
12686Update your system packages:
12687
12688```bash
12689sudo apt update
12690```
12691
12692Install additional required libraries and tools
12693
12694```bash
12695sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler
12696```
12697
12698## 2. Install Rust and Cargo
12699
12700Rust is the programming language used in Substrate development. Cargo is Rust package manager.
12701
12702Install rust and cargo:
12703
12704```bash
12705curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
12706```
12707
12708Update your shell's source to include Cargo's path:
12709
12710```bash
12711source "$HOME/.cargo/env"
12712```
12713
12714## 3. Clone the subtensor repository
12715
12716This step fetches the subtensor codebase to your local machine.
12717
12718```bash
12719git clone https://github.com/opentensor/subtensor.git
12720```
12721
12722## 4. Setup Rust
12723
12724This step ensures that you have the nightly toolchain and the WebAssembly (wasm) compilation target. Note that this step will run the subtensor chain on your terminal directly, hence we advise that you run this as a background process using PM2 or other software.
12725
12726Update to the nightly version of Rust:
12727
12728```bash
12729./subtensor/scripts/init.sh
12730```
12731
12732## 5. Initialize
12733
12734These steps initialize your local subtensor chain in development mode. These commands will set up and run a local subtensor.
12735
12736Build the binary with the faucet feature enabled:
12737
12738```bash
12739cargo build --release --features pow-faucet
12740```
12741
12742**NOTE**: The `--features pow-faucet` option in the above is required if we want to use the command `btcli wallet faucet` [See the below Mint tokens step](#8-mint-tokens-from-faucet).
12743
12744Next, run the localnet script and turn off the attempt to build the binary (as we have already done this above):
12745
12746```bash
12747BUILD_BINARY=0 ./scripts/localnet.sh
12748```
12749
12750**NOTE**: Watch for any build or initialization outputs in this step. If you are building the project for the first time, this step will take a while to finish building, depending on your hardware.
12751
12752## 6. Install subnet template
12753
12754`cd` to your project directory and clone the bittensor subnet template repository:
12755
12756```bash
12757git clone https://github.com/opentensor/bittensor-subnet-template.git
12758```
12759
12760Navigate to the cloned repository:
12761
12762```bash
12763cd bittensor-subnet-template
12764```
12765
12766Install the bittensor-subnet-template Python package:
12767
12768```bash
12769python -m pip install -e .
12770```
12771
12772## 7. Set up wallets
12773
12774You will need wallets for the different roles, i.e., subnet owner, subnet validator and subnet miner, in the subnet.
12775
12776- The owner wallet creates and controls the subnet.
12777- The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
12778
12779Create a coldkey for the owner role:
12780
12781```bash
12782btcli wallet new_coldkey --wallet.name owner
12783```
12784
12785Set up the miner's wallets:
12786
12787```bash
12788btcli wallet new_coldkey --wallet.name miner
12789```
12790
12791```bash
12792btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
12793```
12794
12795Set up the validator's wallets:
12796
12797```bash
12798btcli wallet new_coldkey --wallet.name validator
12799```
12800```bash
12801btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
12802```
12803
12804## 8. Mint tokens from faucet
12805
12806You will need tokens to initialize the intentive mechanism on the chain as well as for registering the subnet.
12807
12808Run the following commands to mint faucet tokens for the owner and for the validator.
12809
12810Mint faucet tokens for the owner:
12811
12812```bash
12813btcli wallet faucet --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946
12814```
12815
12816You will see:
12817
12818```bash
12819>> Balance: τ0.000000000 ➡ τ100.000000000
12820```
12821
12822Mint tokens for the validator:
12823
12824```bash
12825btcli wallet faucet --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946
12826```
12827
12828You will see:
12829
12830```bash
12831>> Balance: τ0.000000000 ➡ τ100.000000000
12832```
12833
12834## 9. Create a subnet
12835
12836The below commands establish a new subnet on the local chain. The cost will be exactly τ1000.000000000 for the first subnet you create and you'll have to run the faucet several times to get enough tokens.
12837
12838```bash
12839btcli subnet create --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946
12840```
12841
12842You will see:
12843
12844```bash
12845>> Your balance is: τ200.000000000
12846>> Do you want to register a subnet for τ1000.000000000? [y/n]:
12847>> Enter password to unlock key: [YOUR_PASSWORD]
12848>> ✅ Registered subnetwork with netuid: 1
12849```
12850
12851**NOTE**: The local chain will now have a default `netuid` of 1. The second registration will create a `netuid` 2 and so on, until you reach the subnet limit of 8. If you register more than 8 subnets, then a subnet with the least staked TAO will be replaced by the 9th subnet you register.
12852
12853## 10. Register keys
12854
12855Register your subnet validator and subnet miner on the subnet. This gives your two keys unique slots on the subnet. The subnet has a current limit of 128 slots.
12856
12857Register the subnet miner:
12858
12859```bash
12860btcli subnet register --wallet.name miner --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12861```
12862
12863Follow the below prompts:
12864
12865```bash
12866>> Enter netuid [1] (1): 1
12867>> Continue Registration? [y/n]: y
12868>> ✅ Registered
12869```
12870
12871Register the subnet validator:
12872
12873```bash
12874
12875btcli subnet register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12876```
12877
12878Follow the below prompts:
12879
12880```
12881>> Enter netuid [1] (1): 1
12882>> Continue Registration? [y/n]: y
12883>> ✅ Registered
12884```
12885
12886## 11. Add stake
12887
12888This step bootstraps the incentives on your new subnet by adding stake into its incentive mechanism.
12889
12890```bash
12891btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12892```
12893
12894Follow the below prompts:
12895
12896```bash
12897>> Stake all Tao from account: 'validator'? [y/n]: y
12898>> Stake:
12899 τ0.000000000 ➡ τ100.000000000
12900```
12901
12902## 12. Validate key registrations
12903
12904Verify that both the miner and validator keys are successfully registered:
12905
12906```bash
12907btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946
12908```
12909
12910You will see the `2` entry under `NEURONS` column for the `NETUID` of 1, indicating that you have registered a validator and a miner in this subnet:
12911
12912```bash
12913NETUID NEURONS MAX_N DIFFICULTY TEMPO CON_REQ EMISSION BURN(τ)
12914 1 2 256.00 10.00 M 1000 None 0.00% τ1.00000
12915 2 128
12916```
12917
12918See the subnet validator's registered details:
12919
12920```bash
12921btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946
12922```
12923
12924You will see:
12925
12926```
12927Subnet: 1
12928COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
12929miner default 0 True 100.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
129301 1 2 τ100.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
12931 Wallet balance: τ0.0
12932```
12933
12934See the subnet miner's registered details:
12935
12936```bash
12937btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946
12938```
12939
12940You will see:
12941
12942```bash
12943Subnet: 1
12944COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
12945miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
129461 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
12947 Wallet balance: τ0.0
12948
12949```
12950
12951## 13. Run subnet miner and subnet validator
12952
12953Run the subnet miner and subnet validator. Make sure to specify your subnet parameters.
12954
12955Run the subnet miner:
12956
12957```bash
12958python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug
12959```
12960
12961Run the subnet validator:
12962
12963```bash
12964python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug
12965```
12966
12967## 14. Set weights for your subnet
12968
12969Register a validator on the root subnet and boost to set weights for your subnet. This is a necessary step to ensure that the subnet is able to receive emmissions.
12970
12971### Register your validator on the root subnet
12972
12973```bash
12974btcli root register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12975```
12976
12977### Boost your subnet on the root subnet
12978```bash
12979btcli root boost --netuid 1 --increase 1 --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12980```
12981
12982## 15. Verify your incentive mechanism
12983
12984After a few blocks the subnet validator will set weights. This indicates that the incentive mechanism is active. Then after a subnet tempo elapses (360 blocks or 72 minutes) you will see your incentive mechanism beginning to distribute TAO to the subnet miner.
12985
12986```bash
12987btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946
12988```
12989
12990## Ending your session
12991
12992To halt your nodes:
12993```bash
12994# Press CTRL + C keys in the terminal.
12995```
12996
12997---
12998
12999
13000
13001---
13002File: /docs/running_on_testnet.md
13003---
13004
13005# Running Subnet on Testnet
13006
13007This tutorial shows how to use the Bittensor testnet to create a subnet and run your incentive mechanism on it.
13008
13009**IMPORTANT:** We strongly recommend that you first run [Running Subnet Locally](running_on_staging.md) before running on the testnet. Incentive mechanisms running on the testnet are open to anyone, and although these mechanisms on testnet do not emit real TAO, they cost you test TAO which you must create.
13010
13011**DANGER**
13012- Do not expose your private keys.
13013- Only use your testnet wallet.
13014- Do not reuse the password of your mainnet wallet.
13015- Make sure your incentive mechanism is resistant to abuse.
13016
13017## Prerequisites
13018
13019Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
13020
13021- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
13022
13023After installing `bittensor`, proceed as below:
13024
13025## 1. Install Bittensor subnet template
13026
13027**NOTE: Skip this step if** you already did this during local testing and development.
13028
13029`cd` into your project directory and clone the bittensor-subnet-template repo:
13030
13031```bash
13032git clone https://github.com/opentensor/bittensor-subnet-template.git
13033```
13034
13035Next, `cd` into bittensor-subnet-template repo directory:
13036
13037```bash
13038cd bittensor-subnet-template # Enter the
13039```
13040
13041Install the bittensor-subnet-template package:
13042
13043```bash
13044python -m pip install -e .
13045```
13046
13047## 2. Create wallets
13048
13049Create wallets for subnet owner, subnet validator and for subnet miner.
13050
13051This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner.
13052
13053The owner will create and control the subnet. The owner must have at least 100 testnet TAO before the owner can run next steps.
13054
13055The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
13056
13057Create a coldkey for your owner wallet:
13058
13059```bash
13060btcli wallet new_coldkey --wallet.name owner
13061```
13062
13063Create a coldkey and hotkey for your miner wallet:
13064
13065```bash
13066btcli wallet new_coldkey --wallet.name miner
13067```
13068
13069and
13070
13071```bash
13072btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
13073```
13074
13075Create a coldkey and hotkey for your validator wallet:
13076
13077```bash
13078btcli wallet new_coldkey --wallet.name validator
13079```
13080
13081and
13082
13083```bash
13084btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
13085```
13086
13087## 3. Get the price of subnet creation
13088
13089Creating subnets on the testnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the chain.
13090
13091By default you must have at least 100 testnet TAO in your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below command shows how to get the current price of creating a subnet.
13092
13093```bash
13094btcli subnet lock_cost --subtensor.network test
13095```
13096
13097The above command will show:
13098
13099```bash
13100>> Subnet lock cost: τ100.000000000
13101```
13102
13103## 4. (Optional) Get faucet tokens
13104
13105Faucet is disabled on the testnet. Hence, if you don't have sufficient faucet tokens, ask the [Bittensor Discord community](https://discord.com/channels/799672011265015819/830068283314929684) for faucet tokens.
13106
13107## 5. Purchase a slot
13108
13109Using the test TAO from the previous step you can register your subnet on the testnet. This will create a new subnet on the testnet and give you the owner permissions to it.
13110
13111The below command shows how to purchase a slot.
13112
13113**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered.
13114
13115```bash
13116btcli subnet create --subtensor.network test
13117```
13118
13119Enter the owner wallet name which gives permissions to the coldkey:
13120
13121```bash
13122>> Enter wallet name (default): owner # Enter your owner wallet name
13123>> Enter password to unlock key: # Enter your wallet password.
13124>> Register subnet? [y/n]: <y/n> # Select yes (y)
13125>> ⠇ 📡 Registering subnet...
13126✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later.
13127```
13128
13129## 6. Register keys
13130
13131This step registers your subnet validator and subnet miner keys to the subnet, giving them the **first two slots** on the subnet.
13132
13133Register your miner key to the subnet:
13134
13135```bash
13136btcli subnet recycle_register --netuid 171 --subtensor.network test --wallet.name miner --wallet.hotkey default
13137```
13138
13139Follow the below prompts:
13140
13141```bash
13142>> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created.
13143>> Continue Registration?
13144 hotkey: ...
13145 coldkey: ...
13146 network: finney [y/n]: # Select yes (y)
13147>> ✅ Registered
13148```
13149
13150Next, register your validator key to the subnet:
13151
13152```bash
13153btcli subnet recycle_register --netuid 171 --subtensor.network test --wallet.name validator --wallet.hotkey default
13154```
13155
13156Follow the prompts:
13157
13158```bash
13159>> Enter netuid [171] (171): # Enter netuid 1 to specify the subnet you just created.
13160>> Continue Registration?
13161 hotkey: ...
13162 coldkey: ...
13163 network: finney [y/n]: # Select yes (y)
13164>> ✅ Registered
13165```
13166
13167## 7. Check that your keys have been registered
13168
13169This step returns information about your registered keys.
13170
13171Check that your validator key has been registered:
13172
13173```bash
13174btcli wallet overview --wallet.name validator --subtensor.network test
13175```
13176
13177The above command will display the below:
13178
13179```bash
13180Subnet: 1
13181COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
13182miner default 0 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
131831 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
13184 Wallet balance: τ0.0
13185```
13186
13187Check that your miner has been registered:
13188
13189```bash
13190btcli wallet overview --wallet.name miner --subtensor.network test
13191```
13192
13193The above command will display the below:
13194
13195```bash
13196Subnet: 1
13197COLDKEY HOTKEY UID ACTIVE STAKE(τ) RANK TRUST CONSENSUS INCENTIVE DIVIDENDS EMISSION(ρ) VTRUST VPERMIT UPDATED AXON HOTKEY_SS58
13198miner default 1 True 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 0 0.00000 14 none 5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
131991 1 2 τ0.00000 0.00000 0.00000 0.00000 0.00000 0.00000 ρ0 0.00000
13200 Wallet balance: τ0.0
13201```
13202
13203## 8. Run subnet miner and subnet validator
13204
13205Run the subnet miner:
13206
13207```bash
13208python neurons/miner.py --netuid 171 --subtensor.network test --wallet.name miner --wallet.hotkey default --logging.debug
13209```
13210
13211You will see the below terminal output:
13212
13213```bash
13214>> 2023-08-08 16:58:11.223 | INFO | Running miner for subnet: 171 on network: ws://127.0.0.1:9946 with config: ...
13215```
13216
13217Next, run the subnet validator:
13218
13219```bash
13220python neurons/validator.py --netuid 171 --subtensor.network test --wallet.name validator --wallet.hotkey default --logging.debug
13221```
13222
13223You will see the below terminal output:
13224
13225```bash
13226>> 2023-08-08 16:58:11.223 | INFO | Running validator for subnet: 171 on network: ws://127.0.0.1:9946 with config: ...
13227```
13228
13229
13230## 9. Get emissions flowing
13231
13232Register to the root network using the `btcli`:
13233
13234```bash
13235btcli root register --subtensor.network test
13236```
13237
13238Then set your weights for the subnet:
13239
13240```bash
13241btcli root weights --subtensor.network test
13242```
13243
13244## 10. Stopping your nodes
13245
13246To stop your nodes, press CTRL + C in the terminal where the nodes are running.
13247
13248
13249
13250---
13251File: /docs/vscode_completion.md
13252---
13253
13254# Getting Code Completion in VSCODE
13255
13256## Installing the extension
13257
132581. Open VS Code.
132592. Press `Ctrl+Shift+X` to open the Extensions view.
132603. Search for `Continue.dev` and install it.
132614. Restart VS Code.
13262
13263## Configuring the extension
13264
132651. Run the keybinding `Ctrl+Shift+P` to open the Command Palette.
132662. Type `Continue.dev: Open config.json` and press `Enter`.
132673. This will open the `config.json` file in your workspace.
13268
13269Now add the following configuration:
13270# TODO FINISH the below
13271```json
13272"models": [
13273 {
13274 "title": "Code",
13275 "model": "code",
13276 "contextLength": 8000,
13277 "provider": "openai",
13278 "apiKey": "EMPTY",
13279 "apiBase": "http://0.0.0.0:8000/v1"
13280
13281 }
13282 ],
13283"tabAutocompleteModel": {
13284 "title": "Code",
13285 "model": "code",
13286 "contextLength": 8000,
13287 "provider": "openai",
13288 "apiKey": "EMPTY",
13289 "apiBase": "http://0.0.0.0:8000/v1"
13290 },
13291```
13292
13293
13294
13295---
13296File: /neurons/__init__.py
13297---
13298
13299
13300
13301
13302---
13303File: /neurons/miner.py
13304---
13305
13306# The MIT License (MIT)
13307# Copyright © 2023 Yuma Rao
13308# Copyright © 2024 Broke
13309
13310# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
13311# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
13312# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13313# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13314
13315# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
13316# the Software.
13317
13318# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13319# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13320# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13321# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13322# DEALINGS IN THE SOFTWARE.
13323
13324import time
13325import typing
13326import traceback
13327import importlib
13328import bittensor as bt
13329
13330from typing import Awaitable
13331
13332# Bittensor Miner Template:
13333import coding
13334
13335# import base miner class which takes care of most of the boilerplate
13336from coding.base.miner import BaseMinerNeuron
13337from coding.utils.config import config as util_config
13338from coding.protocol import StreamCodeSynapse, LogicSynapse
13339from coding.miners.swe import miner_process as miner_process_swe
13340
13341class Miner(BaseMinerNeuron):
13342 """
13343 Your miner neuron class. You should use this class to define your miner's behavior. In particular, you should replace the forward function with your own logic. You may also want to override the blacklist and priority functions according to your needs.
13344
13345 This class inherits from the BaseMinerNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
13346
13347 This class provides reasonable default behavior for a miner such as blacklisting unrecognized hotkeys, prioritizing requests based on stake, and forwarding requests to the forward function. If you need to define custom
13348 """
13349
13350 def __init__(self, config=None):
13351 if not config:
13352 config = util_config(self)
13353 self.forward_capabilities = [
13354 {'forward': self.forward, 'blacklist': self.blacklist, 'priority': self.priority},
13355 {'forward': self.forward_swe, 'blacklist': self.blacklist_swe, 'priority': self.priority_swe},
13356 ]
13357 super().__init__(config=config)
13358 miner_name = f"coding.miners.{config.miner.name}_miner" # if config and config.miner else "bitagent.miners.t5_miner"
13359 miner_module = importlib.import_module(miner_name)
13360
13361 self.miner_init = miner_module.miner_init
13362 self.miner_process = miner_module.miner_process
13363
13364 self.miner_init(self)
13365
13366 async def forward_swe(
13367 self, synapse: LogicSynapse
13368 ) -> LogicSynapse:
13369 return miner_process_swe(self, synapse)
13370
13371 async def blacklist_swe(
13372 self, synapse: LogicSynapse
13373 ) -> typing.Tuple[bool, str]:
13374 return await self.blacklist(synapse)
13375
13376 async def priority_swe(
13377 self, synapse: LogicSynapse
13378 ) -> float:
13379 return await self.priority(synapse)
13380
13381 def forward(
13382 self, synapse: StreamCodeSynapse
13383 ) -> StreamCodeSynapse:
13384 """
13385 Processes the incoming 'Dummy' synapse by performing a predefined operation on the input data.
13386 This method should be replaced with actual logic relevant to the miner's purpose.
13387
13388 Args:
13389 synapse (template.protocol.Dummy): The synapse object containing the 'dummy_input' data.
13390
13391 Returns:
13392 template.protocol.Dummy: The synapse object with the 'dummy_output' field set to twice the 'dummy_input' value.
13393
13394 The 'forward' function is a placeholder and should be overridden with logic that is appropriate for
13395 the miner's intended operation. This method demonstrates a basic transformation of input data.
13396 """
13397 try:
13398 response = self.miner_process(self, synapse)
13399 except:
13400 bt.logging.error(
13401 "An error occurred while processing the synapse: ",
13402 traceback.format_exc(),
13403 )
13404 return response
13405
13406 async def blacklist(
13407 self, synapse: StreamCodeSynapse
13408 ) -> typing.Tuple[bool, str]:
13409 """
13410 Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should
13411 define the logic for blacklisting requests based on your needs and desired security parameters.
13412
13413 Blacklist runs before the synapse data has been deserialized (i.e. before synapse.data is available).
13414 The synapse is instead contructed via the headers of the request. It is important to blacklist
13415 requests before they are deserialized to avoid wasting resources on requests that will be ignored.
13416
13417 Args:
13418 synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request.
13419
13420 Returns:
13421 Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted,
13422 and a string providing the reason for the decision.
13423
13424 This function is a security measure to prevent resource wastage on undesired requests. It should be enhanced
13425 to include checks against the metagraph for entity registration, validator status, and sufficient stake
13426 before deserialization of synapse data to minimize processing overhead.
13427
13428 Example blacklist logic:
13429 - Reject if the hotkey is not a registered entity within the metagraph.
13430 - Consider blacklisting entities that are not validators or have insufficient stake.
13431
13432 In practice it would be wise to blacklist requests from entities that are not validators, or do not have
13433 enough stake. This can be checked via metagraph.S and metagraph.validator_permit. You can always attain
13434 the uid of the sender via a metagraph.hotkeys.index( synapse.dendrite.hotkey ) call.
13435
13436 Otherwise, allow the request to be processed further.
13437 """
13438 try:
13439 if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13440 bt.logging.warning("Received a request without a dendrite or hotkey.")
13441 return True, "Missing dendrite or hotkey"
13442 if (
13443 synapse.dendrite.hotkey
13444 == "5Fy7c6skhxBifdPPEs3TyytxFc7Rq6UdLqysNPZ5AMAUbRQx"
13445 ):
13446 return False, "Subnet owner hotkey"
13447 # TODO(developer): Define how miners should blacklist requests.
13448 uid = self.metagraph.hotkeys.index(synapse.dendrite.hotkey)
13449 if (
13450 not self.config.blacklist.allow_non_registered
13451 and synapse.dendrite.hotkey not in self.metagraph.hotkeys
13452 ):
13453 # Ignore requests from un-registered entities.
13454 bt.logging.trace(
13455 f"Blacklisting un-registered hotkey {synapse.dendrite.hotkey}"
13456 )
13457 return True, "Unrecognized hotkey"
13458
13459 if self.config.blacklist.force_validator_permit:
13460 # If the config is set to force validator permit, then we should only allow requests from validators.
13461 if not self.metagraph.validator_permit[uid]:
13462 bt.logging.warning(
13463 f"Blacklisting a request from non-validator hotkey {synapse.dendrite.hotkey}"
13464 )
13465 return True, "Non-validator hotkey"
13466
13467 bt.logging.trace(
13468 f"Not Blacklisting recognized hotkey {synapse.dendrite.hotkey}"
13469 )
13470 return False, "Hotkey recognized!"
13471 except:
13472 return True, "Errored out the blacklist function, blacklisting the hotkey"
13473
13474 async def priority(
13475 self, synapse: StreamCodeSynapse
13476 ) -> float:
13477 """
13478 The priority function determines the order in which requests are handled. More valuable or higher-priority
13479 requests are processed before others. You should design your own priority mechanism with care.
13480
13481 This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph.
13482
13483 Args:
13484 synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request.
13485
13486 Returns:
13487 float: A priority score derived from the stake of the calling entity.
13488
13489 Miners may recieve messages from multiple entities at once. This function determines which request should be
13490 processed first. Higher values indicate that the request should be processed first. Lower values indicate
13491 that the request should be processed later.
13492
13493 Example priority logic:
13494 - A higher stake results in a higher priority value.
13495 """
13496 if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13497 bt.logging.warning("Received a request without a dendrite or hotkey.")
13498 return 0.0
13499 try:
13500 caller_uid = self.metagraph.hotkeys.index(
13501 synapse.dendrite.hotkey
13502 ) # Get the caller index.
13503 priority = float(
13504 self.metagraph.S[caller_uid]
13505 ) # Return the stake as the priority.
13506 bt.logging.trace(
13507 f"Prioritizing {synapse.dendrite.hotkey} with value: {priority}"
13508 )
13509 return priority
13510 except:
13511 return 1
13512
13513
13514# This is the main function, which runs the miner.
13515if __name__ == "__main__":
13516 with Miner() as miner:
13517 while True:
13518 bt.logging.info(f"Miner running... {time.time()}")
13519 time.sleep(5)
13520
13521
13522
13523---
13524File: /neurons/validator.py
13525---
13526
13527# The MIT License (MIT)
13528# Copyright © 2023 Yuma Rao
13529# Copyright © 2024 Broke
13530
13531# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
13532# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
13533# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13534# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13535
13536# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
13537# the Software.
13538
13539# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13540# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13541# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13542# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13543# DEALINGS IN THE SOFTWARE.
13544import dotenv
13545
13546dotenv.load_dotenv()
13547
13548import sys
13549import time
13550import random
13551import asyncio
13552import threading
13553
13554import bittensor as bt
13555from typing import Awaitable, Tuple
13556from code_bert_score import BERTScorer
13557from langchain_openai import ChatOpenAI
13558from concurrent.futures import ThreadPoolExecutor
13559from coding.validator import forward
13560from coding.rewards.pipeline import RewardPipeline
13561from coding.protocol import StreamCodeSynapse
13562
13563# import base validator class which takes care of most of the boilerplate
13564from coding.utils.config import config as util_config
13565from coding.base.validator import BaseValidatorNeuron
13566
13567class Validator(BaseValidatorNeuron):
13568 """
13569 Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic.
13570
13571 This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
13572
13573 This class provides reasonable default behavior for a validator such as keeping a moving average of the scores of the miners and using them to set weights at the end of each epoch. Additionally, the scores are reset for new hotkeys at the end of each epoch.
13574 """
13575
13576 def __init__(self, config=None):
13577 if not config:
13578 config = util_config(self)
13579 self.finetune_results = {}
13580 super(Validator, self).__init__(config=config)
13581
13582 bt.logging.info("load_state()")
13583 self.load_state()
13584
13585 self.active_tasks = [
13586 task
13587 for task, p in zip(
13588 self.config.neuron.tasks, self.config.neuron.task_weights
13589 )
13590 if p > 0
13591 ]
13592 self.executor = ThreadPoolExecutor()
13593 # Load the reward pipeline
13594 self.reward_pipeline = RewardPipeline(
13595 selected_tasks=self.active_tasks,
13596 device=self.device,
13597 code_scorer=None,
13598 )
13599
13600 def _forward(
13601 self, synapse: StreamCodeSynapse
13602 ) -> (
13603 StreamCodeSynapse
13604 ): # TODO remove this since its duplicate code, could be handled better
13605 """
13606 forward method that is called when the validator is queried with an axon
13607 """
13608 return forward(self, synapse)
13609 # # response = forward_organic_synapse(self, synapse=synapse)
13610
13611 # def _run():
13612 # asyncio.run(forward(self, synapse))
13613
13614 # if random.random() < self.config.neuron.percent_organic_score:
13615 # try:
13616 # loop = asyncio.get_running_loop()
13617 # loop.create_task(forward(self, synapse))
13618 # except RuntimeError: # No event loop running
13619 # threading.Thread(target=_run).start()
13620 # # return the response
13621 # return response
13622
13623 async def forward(self, synapse: StreamCodeSynapse) -> Awaitable:
13624 """
13625 Validator forward pass. Consists of:
13626 - Generating the query
13627 - Querying the miners
13628 - Getting the responses
13629 - Rewarding the miners
13630 - Updating the scores
13631 """
13632 return forward(self, synapse)
13633
13634 # TODO make it so that the only thing accepted is the subnet owners hotkey + the validators coldkey
13635 async def blacklist(self, synapse: StreamCodeSynapse) -> Tuple[bool, str]:
13636 """
13637 Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should
13638 define the logic for blacklisting requests based on your needs and desired security parameters.
13639
13640 Blacklist runs before the synapse data has been deserialized (i.e. before synapse.data is available).
13641 The synapse is instead contructed via the headers of the request. It is important to blacklist
13642 requests before they are deserialized to avoid wasting resources on requests that will be ignored.
13643
13644 Args:
13645 synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request.
13646
13647 Returns:
13648 Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted,
13649 and a string providing the reason for the decision.
13650
13651 This function is a security measure to prevent resource wastage on undesired requests. It should be enhanced
13652 to include checks against the metagraph for entity registration, validator status, and sufficient stake
13653 before deserialization of synapse data to minimize processing overhead.
13654
13655 Example blacklist logic:
13656 - Reject if the hotkey is not a registered entity within the metagraph.
13657 - Consider blacklisting entities that are not validators or have insufficient stake.
13658
13659 In practice it would be wise to blacklist requests from entities that are not validators, or do not have
13660 enough stake. This can be checked via metagraph.S and metagraph.validator_permit. You can always attain
13661 the uid of the sender via a metagraph.hotkeys.index( synapse.dendrite.hotkey ) call.
13662
13663 Otherwise, allow the request to be processed further.
13664 """
13665 if synapse.dendrite.hotkey == "5Fy7c6skhxBifdPPEs3TyytxFc7Rq6UdLqysNPZ5AMAUbRQx":
13666 return False, "Subnet owner hotkey"
13667 return True, "Blacklisted"
13668
13669 async def priority(self, synapse: StreamCodeSynapse) -> float:
13670 """
13671 The priority function determines the order in which requests are handled. More valuable or higher-priority
13672 requests are processed before others. You should design your own priority mechanism with care.
13673
13674 This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph.
13675
13676 Args:
13677 synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request.
13678
13679 Returns:
13680 float: A priority score derived from the stake of the calling entity.
13681
13682 Miners may recieve messages from multiple entities at once. This function determines which request should be
13683 processed first. Higher values indicate that the request should be processed first. Lower values indicate
13684 that the request should be processed later.
13685
13686 Example priority logic:
13687 - A higher stake results in a higher priority value.
13688 """
13689 if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13690 bt.logging.warning("Received a request without a dendrite or hotkey.")
13691 return 0.0
13692
13693 # TODO(developer): Define how miners should prioritize requests.
13694 caller_uid = self.metagraph.hotkeys.index(
13695 synapse.dendrite.hotkey
13696 ) # Get the caller index.
13697 priority = float(
13698 self.metagraph.S[caller_uid]
13699 ) # Return the stake as the priority.
13700 bt.logging.trace(
13701 f"Prioritizing {synapse.dendrite.hotkey} with value: {priority}"
13702 )
13703 return priority
13704
13705
13706# The main function parses the configuration and runs the validator.
13707if __name__ == "__main__":
13708 with Validator() as validator:
13709 while True:
13710 if not validator.thread.is_alive():
13711 bt.logging.error("Child thread has exited, terminating parent thread.")
13712 sys.exit(1) # Exit the parent thread if the child thread dies
13713 bt.logging.info(f"Validator running... {time.time()}")
13714 time.sleep(5)
13715
13716
13717
13718---
13719File: /notebooks/example_submission/diff.py
13720---
13721
13722from difflib import unified_diff
13723from typing import Dict
13724from swebase import Patch, Edit
13725
13726def create_patch(original_files: Dict[str, str], edited_files: Dict[str, str]) -> Patch:
13727 """
13728 Create a Patch object by comparing original and edited file contents
13729
13730 Args:
13731 original_files (Dict[str, str]): Dictionary mapping filenames to original file contents
13732 edited_files (Dict[str, str]): Dictionary mapping filenames to edited file contents
13733
13734 Returns:
13735 Patch: Patch object containing the edits
13736 """
13737 edits = []
13738
13739 # Process each edited file
13740 for filename in edited_files:
13741 if filename not in original_files:
13742 continue
13743
13744 # Split files into lines
13745 original_lines = original_files[filename].splitlines()
13746 edited_lines = edited_files[filename].splitlines()
13747
13748 # Generate diff
13749 diff = list(unified_diff(
13750 original_lines,
13751 edited_lines,
13752 lineterm='',
13753 ))
13754
13755 print(f"Diff for {filename}:")
13756 for d in diff:
13757 print(d)
13758
13759 # Parse diff to create Edit objects
13760 line_num = 0
13761 j = 0
13762 while j < len(diff):
13763 line = diff[j]
13764 if line.startswith('@@'):
13765 # Parse the line numbers from the @@ line
13766 # Format is @@ -start,length +start,length @@
13767 parts = line.split(' ')
13768 if len(parts) >= 2:
13769 old_range = parts[1] # Get the -start,length part
13770 line_num = int(old_range.split(',')[0][1:]) # Extract start number after '-'
13771 elif line.startswith('- '):
13772 old_content = line[2:]
13773 # Check if next line is an addition (modification)
13774 if j + 1 < len(diff) and diff[j + 1].startswith('+ '):
13775 new_content = diff[j + 1][2:]
13776 edits.append(
13777 Edit(
13778 file_name=filename,
13779 line_number=line_num,
13780 line_content=old_content,
13781 new_line_content=new_content
13782 )
13783 )
13784 j += 1 # Skip the next line since we handled it
13785 line_num += 1
13786 elif line.startswith('+ '):
13787 # This is a new line being added
13788 if line_num == 0: # Handle additions at start of file
13789 edits.append(
13790 Edit(
13791 file_name=filename,
13792 line_number=0,
13793 line_content="",
13794 new_line_content=line[2:]
13795 )
13796 )
13797 else: # Handle additions elsewhere
13798 edits.append(
13799 Edit(
13800 file_name=filename,
13801 line_number=line_num,
13802 line_content="",
13803 new_line_content=line[2:]
13804 )
13805 )
13806 elif not line.startswith('@@'):
13807 line_num += 1
13808 j += 1
13809 return Patch(edits=edits)
13810
13811
13812---
13813File: /notebooks/example_submission/files.py
13814---
13815
13816import os
13817from typing import List
13818
13819def load_directory(directory: str) -> List[str]:
13820 # Create repo_files dict from task.repo.path
13821 repo_files = {}
13822
13823 # Walk through all files in repo path
13824 for root, dirs, files in os.walk(directory):
13825 # Skip __pycache__ directories
13826 if '__pycache__' in dirs:
13827 dirs.remove('__pycache__')
13828
13829 # Get relative path from repo root
13830 rel_path = os.path.relpath(root, directory)
13831
13832 # Process all files
13833 for filename in files:
13834 # Skip __pycache__ files
13835 if '__pycache__' in filename:
13836 continue
13837
13838 file_path = os.path.join(root, filename)
13839
13840 # Get the relative path for the repo_files dict key
13841 if rel_path == '.':
13842 repo_key = filename
13843 else:
13844 repo_key = os.path.join(rel_path, filename)
13845
13846 # Read file contents
13847 with open(file_path, 'r', encoding='latin-1') as f:
13848 repo_files[repo_key] = f.read()
13849 return repo_files
13850
13851
13852---
13853File: /notebooks/example_submission/fix.py
13854---
13855
13856from typing import List, Dict
13857FIX_PROMPT = """
13858Given the following file and the issue, rewrite the file to fix the issue. If no issue is found, respond with nothing.
13859
13860File: {file}
13861
13862Issue: {issue}
13863"""
13864
13865
13866def fix(files: Dict[str, str], file_names: List[str], issue: str, llm) -> Dict[str, str]:
13867 fixed_files = {}
13868 for file_name in file_names:
13869 prompt = FIX_PROMPT.format(file=files[file_name], issue=issue)
13870 response, _ = llm(prompt, "gpt-4o")
13871
13872 # Extract code block if present
13873 if "```python" in response:
13874 start = response.find("```python") + len("```python")
13875 end = response.find("```", start)
13876 response = response[start:end]
13877 elif "```" in response:
13878 start = response.find("```") + len("```")
13879 end = response.find("```", start)
13880 response = response[start:end]
13881
13882 if response:
13883 fixed_files[file_name] = response.strip()
13884
13885 return fixed_files
13886
13887
13888---
13889File: /notebooks/example_submission/search.py
13890---
13891
13892import ast
13893from typing import List
13894
13895SEARCH_PROMPT = """
13896Given the following file names, find the file that contains the code that is relevant to the issue.
13897
13898{file_names}
13899
13900Issue: {issue}
13901
13902Your response should be a python list of file names.
13903"""
13904
13905def search(file_names: List[str], issue: str, llm) -> str:
13906 prompt = SEARCH_PROMPT.format(file_names=file_names, issue=issue)
13907 response, _ = llm(prompt, "gpt-4o")
13908
13909 # Extract code block if present
13910 if "```python" in response:
13911 start = response.find("```python") + len("```python")
13912 end = response.find("```", start)
13913 response = response[start:end]
13914 elif "```" in response:
13915 start = response.find("```") + len("```")
13916 end = response.find("```", start)
13917 response = response[start:end]
13918
13919 # Clean and parse the response
13920 response = response.strip()
13921 try:
13922 # Safely evaluate the string as a Python literal
13923 import ast
13924 files = ast.literal_eval(response)
13925 if not isinstance(files, list):
13926 files = [files]
13927 except:
13928 # Fallback to basic string parsing if eval fails
13929 files = response.replace("[", "").replace("]", "").replace("'", "").replace("\"", "").split(",")
13930 files = [f.strip() for f in files if f.strip()]
13931
13932 return files
13933
13934
13935---
13936File: /notebooks/example_submission/submission.py
13937---
13938
13939from fix import fix
13940from search import search
13941from diff import create_patch
13942from files import load_directory
13943from swebase import SWEBase, Patch
13944
13945
13946class SWE(SWEBase):
13947 def __call__(self, repo_location: str, issue_description: str) -> Patch:
13948 print(f"Searching for relevant files for issue: {issue_description}")
13949 file_names = search(repo_location, issue_description, self.llm)
13950 print(f"Found relevant files: {file_names}")
13951
13952 print(f"Loading files from directory: {repo_location}")
13953 files = load_directory(repo_location)
13954 print(f"Loaded {len(files)} files")
13955
13956 print("Fixing files...")
13957 fixed_files = fix(files, file_names, issue_description, self.llm)
13958 print(f"Fixed {len(fixed_files)} files")
13959
13960 print("Creating patch...")
13961 patch = create_patch(files, fixed_files)
13962 print("Patch created")
13963 return patch
13964
13965
13966
13967---
13968File: /notebooks/example_submission/swebase.py
13969---
13970
13971import os
13972import requests
13973from pydantic import BaseModel
13974from abc import ABC, abstractmethod
13975from langchain_openai import ChatOpenAI
13976
13977class Edit(BaseModel):
13978 file_name: str
13979 line_number: int
13980 line_content: str
13981 new_line_content: str
13982
13983class Patch(BaseModel):
13984 edits: list[Edit]
13985
13986class LLMClient:
13987 def __init__(self, base_url: str = f"http://{os.getenv('HOST_IP', 'localhost')}:25000"):
13988 """Initialize LLM client with API server URL"""
13989 self.base_url = base_url.rstrip("/")
13990 self.use_server = True
13991 try:
13992 # Test connection to server
13993 requests.get(self.base_url)
13994 except requests.exceptions.RequestException:
13995 # If server not available, fall back to local ChatOpenAI
13996 self.use_server = False
13997 from langchain_openai import ChatOpenAI
13998 self.chat_models = {}
13999
14000 def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
14001 """
14002 Call LLM API endpoint or local ChatOpenAI
14003
14004 Args:
14005 query (str): The prompt/query to send to the LLM
14006 llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
14007
14008 Returns:
14009 tuple[str, int]: (Generated response text, Total tokens used for this key)
14010
14011 Raises:
14012 requests.exceptions.RequestException: If API call fails when using server
14013 """
14014 if self.use_server:
14015 payload = {"query": query, "llm_name": llm_name}
14016 response = requests.post(f"{self.base_url}/call", json=payload)
14017 response.raise_for_status()
14018 result = response.json()
14019 return result["result"], result["total_tokens"]
14020 else:
14021 # Use local ChatOpenAI
14022 if llm_name not in self.chat_models:
14023 self.chat_models[llm_name] = ChatOpenAI(model_name=llm_name)
14024 response = self.chat_models[llm_name].invoke(query)
14025 # ChatOpenAI doesn't provide token count, so return -1
14026 return response.content, -1
14027
14028 def embed(self, query: str) -> list[float]:
14029 """
14030 Get embeddings for text using the embedding API endpoint or local embeddings
14031
14032 Args:
14033 query (str): The text to get embeddings for
14034
14035 Returns:
14036 list[float]: Vector embedding of the input text
14037
14038 Raises:
14039 requests.exceptions.RequestException: If API call fails when using server
14040 """
14041 if self.use_server:
14042 payload = {"query": query}
14043 response = requests.post(f"{self.base_url}/embed", json=payload)
14044 response.raise_for_status()
14045 result = response.json()
14046 return result["vector"]
14047 else:
14048 # Use local embeddings
14049 from langchain_openai import OpenAIEmbeddings
14050 embeddings = OpenAIEmbeddings()
14051 return embeddings.embed_query(query)
14052
14053class SWEBase(ABC):
14054 def __init__(self):
14055 self.llm = LLMClient()
14056
14057 @abstractmethod
14058 def __call__(self, repo_location: str, issue_description: str) -> Patch:
14059 pass
14060
14061
14062
14063---
14064File: /scripts/check_compatibility.sh
14065---
14066
14067#!/bin/bash
14068
14069if [ -z "$1" ]; then
14070 echo "Please provide a Python version as an argument."
14071 exit 1
14072fi
14073
14074python_version="$1"
14075all_passed=true
14076
14077GREEN='\033[0;32m'
14078YELLOW='\033[0;33m'
14079RED='\033[0;31m'
14080NC='\033[0m' # No Color
14081
14082check_compatibility() {
14083 all_supported=0
14084
14085 while read -r requirement; do
14086 # Skip lines starting with git+
14087 if [[ "$requirement" == git+* ]]; then
14088 continue
14089 fi
14090
14091 package_name=$(echo "$requirement" | awk -F'[!=<>]' '{print $1}' | awk -F'[' '{print $1}') # Strip off brackets
14092 echo -n "Checking $package_name... "
14093
14094 url="https://pypi.org/pypi/$package_name/json"
14095 response=$(curl -s $url)
14096 status_code=$(curl -s -o /dev/null -w "%{http_code}" $url)
14097
14098 if [ "$status_code" != "200" ]; then
14099 echo -e "${RED}Information not available for $package_name. Failure.${NC}"
14100 all_supported=1
14101 continue
14102 fi
14103
14104 classifiers=$(echo "$response" | jq -r '.info.classifiers[]')
14105 requires_python=$(echo "$response" | jq -r '.info.requires_python')
14106
14107 base_version="Programming Language :: Python :: ${python_version%%.*}"
14108 specific_version="Programming Language :: Python :: $python_version"
14109
14110 if echo "$classifiers" | grep -q "$specific_version" || echo "$classifiers" | grep -q "$base_version"; then
14111 echo -e "${GREEN}Supported${NC}"
14112 elif [ "$requires_python" != "null" ]; then
14113 if echo "$requires_python" | grep -Eq "==$python_version|>=$python_version|<=$python_version"; then
14114 echo -e "${GREEN}Supported${NC}"
14115 else
14116 echo -e "${RED}Not compatible with Python $python_version due to constraint $requires_python.${NC}"
14117 all_supported=1
14118 fi
14119 else
14120 echo -e "${YELLOW}Warning: Specific version not listed, assuming compatibility${NC}"
14121 fi
14122 done < requirements.txt
14123
14124 return $all_supported
14125}
14126
14127echo "Checking compatibility for Python $python_version..."
14128check_compatibility
14129if [ $? -eq 0 ]; then
14130 echo -e "${GREEN}All requirements are compatible with Python $python_version.${NC}"
14131else
14132 echo -e "${RED}All requirements are NOT compatible with Python $python_version.${NC}"
14133 all_passed=false
14134fi
14135
14136echo ""
14137if $all_passed; then
14138 echo -e "${GREEN}All tests passed.${NC}"
14139else
14140 echo -e "${RED}All tests did not pass.${NC}"
14141 exit 1
14142fi
14143
14144
14145
14146---
14147File: /scripts/check_requirements_changes.sh
14148---
14149
14150#!/bin/bash
14151
14152# Check if requirements files have changed in the last commit
14153if git diff --name-only HEAD~1 | grep -E 'requirements.txt|requirements.txt'; then
14154 echo "Requirements files have changed. Running compatibility checks..."
14155 echo 'export REQUIREMENTS_CHANGED="true"' >> $BASH_ENV
14156else
14157 echo "Requirements files have not changed. Skipping compatibility checks..."
14158 echo 'export REQUIREMENTS_CHANGED="false"' >> $BASH_ENV
14159fi
14160
14161
14162
14163---
14164File: /scripts/docker-firewall.py
14165---
14166
14167import docker
14168import subprocess
14169import time
14170
14171def run_command(command):
14172 """Run a shell command and return its output."""
14173 result = subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
14174 if result.returncode != 0:
14175 print(f"Error running command '{command}': {result.stderr.strip()}")
14176 return result.stdout.strip()
14177
14178def get_container_ip(container):
14179 """Get the IP address of a container."""
14180 try:
14181 return container.attrs['NetworkSettings']['IPAddress']
14182 except KeyError:
14183 return None
14184
14185def add_iptables_rule(ip):
14186 """Add an iptables rule to restrict a container's traffic to port 25000."""
14187 # Check if the rule already exists
14188 existing_rule = run_command(f"iptables -C FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14189 if existing_rule:
14190 return # Rule already exists
14191
14192 # Add the rules
14193 run_command(f"iptables -A FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14194 run_command(f"iptables -A FORWARD -s {ip} -j DROP")
14195 print(f"Added iptables rules for IP: {ip}")
14196
14197def monitor_containers():
14198 """Monitor Docker containers and apply iptables rules dynamically."""
14199 client = docker.from_env()
14200 applied_ips = set()
14201
14202 while True:
14203 try:
14204 containers = client.containers.list()
14205 for container in containers:
14206 if "swe" in container.name:
14207 ip = get_container_ip(container)
14208 if ip and ip not in applied_ips:
14209 add_iptables_rule(ip)
14210 applied_ips.add(ip)
14211
14212 # Clean up rules for stopped containers
14213 active_ips = {get_container_ip(c) for c in containers if "swe" in c.name}
14214 removed_ips = applied_ips - active_ips
14215 for ip in removed_ips:
14216 run_command(f"iptables -D FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14217 run_command(f"iptables -D FORWARD -s {ip} -j DROP")
14218 print(f"Removed iptables rules for IP: {ip}")
14219 applied_ips.remove(ip)
14220
14221 except Exception as e:
14222 print(f"Error: {e}")
14223
14224 time.sleep(5) # Check every 5 seconds
14225
14226if __name__ == "__main__":
14227 monitor_containers()
14228
14229
14230
14231---
14232File: /scripts/install_staging.sh
14233---
14234
14235#!/bin/bash
14236
14237# Section 1: Build/Install
14238# This section is for first-time setup and installations.
14239
14240install_dependencies() {
14241 # Function to install packages on macOS
14242 install_mac() {
14243 which brew > /dev/null
14244 if [ $? -ne 0 ]; then
14245 echo "Installing Homebrew..."
14246 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
14247 fi
14248 echo "Updating Homebrew packages..."
14249 brew update
14250 echo "Installing required packages..."
14251 brew install make llvm curl libssl protobuf tmux
14252 }
14253
14254 # Function to install packages on Ubuntu/Debian
14255 install_ubuntu() {
14256 echo "Updating system packages..."
14257 sudo apt update
14258 echo "Installing required packages..."
14259 sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler tmux
14260 }
14261
14262 # Detect OS and call the appropriate function
14263 if [[ "$OSTYPE" == "darwin"* ]]; then
14264 install_mac
14265 elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
14266 install_ubuntu
14267 else
14268 echo "Unsupported operating system."
14269 exit 1
14270 fi
14271
14272 # Install rust and cargo
14273 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
14274
14275 # Update your shell's source to include Cargo's path
14276 source "$HOME/.cargo/env"
14277}
14278
14279# Call install_dependencies only if it's the first time running the script
14280if [ ! -f ".dependencies_installed" ]; then
14281 install_dependencies
14282 touch .dependencies_installed
14283fi
14284
14285
14286# Section 2: Test/Run
14287# This section is for running and testing the setup.
14288
14289# Create a coldkey for the owner role
14290wallet=${1:-owner}
14291
14292# Logic for setting up and running the environment
14293setup_environment() {
14294 # Clone subtensor and enter the directory
14295 if [ ! -d "subtensor" ]; then
14296 git clone https://github.com/opentensor/subtensor.git
14297 fi
14298 cd subtensor
14299 git pull
14300
14301 # Update to the nightly version of rust
14302 ./scripts/init.sh
14303
14304 cd ../bittensor-subnet-template
14305
14306 # Install the bittensor-subnet-template python package
14307 python -m pip install -e .
14308
14309 # Create and set up wallets
14310 # This section can be skipped if wallets are already set up
14311 if [ ! -f ".wallets_setup" ]; then
14312 btcli wallet new_coldkey --wallet.name $wallet --no_password --no_prompt
14313 btcli wallet new_coldkey --wallet.name miner --no_password --no_prompt
14314 btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default --no_prompt
14315 btcli wallet new_coldkey --wallet.name validator --no_password --no_prompt
14316 btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default --no_prompt
14317 touch .wallets_setup
14318 fi
14319
14320}
14321
14322# Call setup_environment every time
14323setup_environment
14324
14325## Setup localnet
14326# assumes we are in the bittensor-subnet-template/ directory
14327# Initialize your local subtensor chain in development mode. This command will set up and run a local subtensor network.
14328cd ../subtensor
14329
14330# Start a new tmux session and create a new pane, but do not switch to it
14331echo "FEATURES='pow-faucet runtime-benchmarks' BT_DEFAULT_TOKEN_WALLET=$(cat ~/.bittensor/wallets/$wallet/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+') bash scripts/localnet.sh" >> setup_and_run.sh
14332chmod +x setup_and_run.sh
14333tmux new-session -d -s localnet -n 'localnet'
14334tmux send-keys -t localnet 'bash ../subtensor/setup_and_run.sh' C-m
14335
14336# Notify the user
14337echo ">> localnet.sh is running in a detached tmux session named 'localnet'"
14338echo ">> You can attach to this session with: tmux attach-session -t localnet"
14339
14340# Register a subnet (this needs to be run each time we start a new local chain)
14341btcli subnet create --wallet.name $wallet --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14342
14343# Transfer tokens to miner and validator coldkeys
14344export BT_MINER_TOKEN_WALLET=$(cat ~/.bittensor/wallets/miner/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+')
14345export BT_VALIDATOR_TOKEN_WALLET=$(cat ~/.bittensor/wallets/validator/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+')
14346
14347btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_MINER_TOKEN_WALLET --amount 1000 --no_prompt
14348btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_VALIDATOR_TOKEN_WALLET --amount 10000 --no_prompt
14349
14350# Register wallet hotkeys to subnet
14351btcli subnet register --wallet.name miner --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14352btcli subnet register --wallet.name validator --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14353
14354# Add stake to the validator
14355btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --amount 10000 --no_prompt
14356
14357# Ensure both the miner and validator keys are successfully registered.
14358btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946
14359btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14360btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14361
14362cd ../bittensor-subnet-template
14363
14364
14365# Check if inside a tmux session
14366if [ -z "$TMUX" ]; then
14367 # Start a new tmux session and run the miner in the first pane
14368 tmux new-session -d -s bittensor -n 'miner' 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug'
14369
14370 # Split the window and run the validator in the new pane
14371 tmux split-window -h -t bittensor:miner 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug'
14372
14373 # Attach to the new tmux session
14374 tmux attach-session -t bittensor
14375else
14376 # If already in a tmux session, create two panes in the current window
14377 tmux split-window -h 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug'
14378 tmux split-window -v -t 0 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name3 validator --wallet.hotkey default --logging.debug'
14379fi
14380
14381
14382
14383---
14384File: /scripts/start_validator.py
14385---
14386
14387"""
14388This script runs a validator process and automatically updates it when a new version is released.
14389Command-line arguments will be forwarded to validator (`neurons/validator.py`), so you can pass
14390them like this:
14391 python3 scripts/start_validator.py --wallet.name=my-wallet
14392Auto-updates are enabled by default and will make sure that the latest version is always running
14393by pulling the latest version from git and upgrading python packages. This is done periodically.
14394Local changes may prevent the update, but they will be preserved.
14395
14396The script will use the same virtual environment as the one used to run it. If you want to run
14397validator within virtual environment, run this auto-update script from the virtual environment.
14398
14399Pm2 is required for this script. This script will start a pm2 process using the name provided by
14400the --pm2_name argument.
14401"""
14402
14403import argparse
14404import logging
14405import subprocess
14406import sys
14407import os
14408import time
14409from datetime import timedelta
14410from shlex import split
14411from typing import List
14412import datetime
14413
14414log = logging.getLogger(__name__)
14415UPDATES_CHECK_TIME = timedelta(minutes=1)
14416
14417
14418def get_version() -> str:
14419 """Extract the version as current git commit hash"""
14420 result = subprocess.run(
14421 split("git rev-parse HEAD"),
14422 check=True,
14423 capture_output=True,
14424 cwd=os.getcwd(),
14425 )
14426 commit = result.stdout.decode().strip()
14427 assert len(commit) == 40, f"Invalid commit hash: {commit}"
14428 return commit[:8]
14429
14430
14431def start_validator_process(pm2_name: str, args: List[str], current_version: str = "0") -> subprocess.Popen:
14432 """
14433 Spawn a new python process running neurons.validator.
14434 `sys.executable` ensures thet the same python interpreter is used as the one
14435 used to run this auto-updater.
14436 """
14437 assert sys.executable, "Failed to get python executable"
14438
14439 # First check if process already exists and delete it
14440 try:
14441 subprocess.run(("pm2", "delete", pm2_name), cwd=os.getcwd(), check=True)
14442 except subprocess.CalledProcessError:
14443 # Process doesn't exist, which is fine
14444 pass
14445
14446 log.info("Starting validator process with pm2, name: %s", pm2_name)
14447 process = subprocess.Popen(
14448 (
14449 "pm2",
14450 "start",
14451 "--interpreter",
14452 "python3",
14453 "--name",
14454 pm2_name,
14455 "neurons/validator.py",
14456 "--",
14457 *args,
14458 ),
14459 cwd=os.getcwd(),
14460 )
14461 process.pm2_name = pm2_name
14462 log.info("Started validator process with pm2, name: %s, version: %s", pm2_name, current_version)
14463
14464 return process
14465
14466def stop_validator_process(process: subprocess.Popen) -> None:
14467 """Stop the validator process"""
14468 subprocess.run(("pm2", "delete", process.pm2_name), cwd=os.getcwd(), check=True)
14469
14470
14471def pull_latest_version() -> None:
14472 """
14473 Pull the latest version from git.
14474 This uses `git pull --rebase`, so if any changes were made to the local repository,
14475 this will try to apply them on top of origin's changes. This is intentional, as we
14476 don't want to overwrite any local changes. However, if there are any conflicts,
14477 this will abort the rebase and return to the original state.
14478 The conflicts are expected to happen rarely since validator is expected
14479 to be used as-is.
14480 """
14481 try:
14482 subprocess.run(split("git pull --rebase --autostash"), check=True, cwd=os.getcwd())
14483 except subprocess.CalledProcessError as exc:
14484 log.error("Failed to pull, reverting: %s", exc)
14485
14486 subprocess.run(split("git rebase --abort"), check=True, cwd=os.getcwd())
14487
14488
14489def upgrade_packages() -> None:
14490 """
14491 Upgrade python packages by running `pip install --upgrade -r requirements.txt`.
14492 Notice: this won't work if some package in `requirements.txt` is downgraded.
14493 Ignored as this is unlikely to happen.
14494 """
14495 log.info("Upgrading requirements")
14496 try:
14497 subprocess.run(
14498 split(f"{sys.executable} -m pip install --use-deprecated=legacy-resolver -r requirements.txt"),
14499 check=True,
14500 cwd=os.getcwd(),
14501 )
14502 except subprocess.CalledProcessError as exc:
14503 log.error("Failed to upgrade packages, proceeding anyway. %s", exc)
14504
14505 log.info("Upgrading packages")
14506 try:
14507 subprocess.run(
14508 split(f"{sys.executable} -m pip install -e ."),
14509 check=True,
14510 cwd=os.getcwd(),
14511 )
14512 except subprocess.CalledProcessError as exc:
14513 log.error("Failed to upgrade packages, proceeding anyway. %s", exc)
14514
14515
14516def main(pm2_name: str, args: List[str]) -> None:
14517 """
14518 Run the validator process and automatically update it when a new version is released.
14519 This will check for updates every `UPDATES_CHECK_TIME` and update the validator
14520 if a new version is available. Update is performed as simple `git pull --rebase`.
14521 """
14522
14523 validator = start_validator_process(pm2_name, args)
14524 current_version = get_version()
14525
14526 log.info("Current version: %s", current_version)
14527
14528 try:
14529 while True:
14530 try:
14531 pull_latest_version()
14532 latest_version = get_version()
14533 log.info("Latest version: %s", latest_version)
14534
14535 if latest_version != current_version:
14536 log.info(
14537 "Upgraded to latest version: %s -> %s",
14538 current_version,
14539 latest_version,
14540 )
14541 upgrade_packages()
14542 current_version = get_version()
14543 stop_validator_process(validator)
14544 validator = start_validator_process(pm2_name, args, current_version)
14545 current_version = latest_version
14546
14547 time.sleep(UPDATES_CHECK_TIME.total_seconds())
14548 except:
14549 pass
14550 finally:
14551 stop_validator_process(validator)
14552
14553
14554if __name__ == "__main__":
14555 logging.basicConfig(
14556 level=logging.INFO,
14557 format="%(asctime)s %(levelname)s %(message)s",
14558 handlers=[logging.StreamHandler(sys.stdout)],
14559 )
14560
14561 parser = argparse.ArgumentParser(
14562 description="Automatically update and restart the validator process when a new version is released.",
14563 epilog="Example usage: python start_validator.py --pm2_name 'sn45vali' --wallet_name 'wallet1' --wallet_hotkey 'key123'",
14564 )
14565
14566 parser.add_argument("--pm2_name", default="sn45vali", help="Name of the PM2 process.")
14567
14568 flags, extra_args = parser.parse_known_args()
14569
14570 main(flags.pm2_name, extra_args)
14571
14572
14573
14574
14575---
14576File: /verify/generate.py
14577---
14578
14579from substrateinterface import Keypair
14580from os import getenv, environ
14581from datetime import datetime
14582import bittensor
14583
14584# Hardcode or set the environment variable WALLET_PASS to the password for the wallet
14585# environ["WALLET_PASS"] = ""
14586
14587
14588def main(args):
14589 wallet = bittensor.wallet(name=args.name)
14590 keypair = wallet.coldkey
14591
14592 timestamp = datetime.now()
14593 timezone = timestamp.astimezone().tzname()
14594
14595 message = f"On {timestamp} {timezone} {args.message}"
14596 signature = keypair.sign(data=message)
14597
14598 file_contents = f"{message}\n\tSigned by: {keypair.ss58_address}\n\tSignature: {signature.hex()}"
14599 print(file_contents)
14600 open("message_and_signature.txt", "w").write(file_contents)
14601
14602 print(f"Signature generated and saved to message_and_signature.txt")
14603
14604
14605if __name__ == "__main__":
14606 import argparse
14607
14608 parser = argparse.ArgumentParser(description="Generate a signature")
14609 parser.add_argument("--message", help="The message to sign", type=str)
14610 parser.add_argument("--name", help="The wallet name", type=str)
14611 args = parser.parse_args()
14612
14613 main(args)
14614
14615
14616
14617---
14618File: /verify/verify.py
14619---
14620
14621from substrateinterface import Keypair
14622from binascii import unhexlify
14623
14624
14625def main(args):
14626 file_data = open(args.file).read()
14627 file_split = file_data.split("\n\t")
14628
14629 address_line = file_split[1]
14630 address_prefix = "Signed by: "
14631 if address_line.startswith(address_prefix):
14632 address = address_line[len(address_prefix) :]
14633 else:
14634 address = address_line
14635
14636 keypair = Keypair(ss58_address=address, ss58_format=42)
14637
14638 message = file_split[0]
14639
14640 signature_line = file_split[2]
14641 signature_prefix = "Signature: "
14642 if signature_line.startswith(signature_prefix):
14643 signature = signature_line[len(signature_prefix) :]
14644 else:
14645 signature = signature_line
14646
14647 real_signature = unhexlify(signature.encode())
14648
14649 if not keypair.verify(data=message, signature=real_signature):
14650 raise ValueError(f"Invalid signature for address={address}")
14651 else:
14652 print(f"Signature verified, signed by {address}")
14653
14654
14655if __name__ == "__main__":
14656 import argparse
14657
14658 parser = argparse.ArgumentParser(description="Verify a signature")
14659 parser.add_argument("--file", help="The file containing the message and signature")
14660 args = parser.parse_args()
14661 main(args)
14662
14663
14664
14665---
14666File: /README.md
14667---
14668
14669# **Gen42 - Code Generation on Bittensor** <!-- omit in toc -->
14670
14671<!-- ### Decentralizing Code Generation -->
14672
14673<!-- [Discord](https://discord.gg/code) • [Network](https://taostats.io/) • [Research](https://bittensor.com/whitepaper) -->
14674
14675<!-- </div> -->
14676
14677<!-- --- -->
14678
14679# Introduction
14680
14681Gen42 leverages the Bittensor network to provide decentralized code generation services. Our focus is on creating robust, scalable tools for code-based Q&A and code completion, powered by open-source large language models.
14682
14683:link:**Useful Links:** <br>
14684
14685- [Gen42 Home](https://www.gen42.ai)
14686- [Gen42 Chat](https://chat.gen42.ai)
14687- [Gen42 API](http://api.gen42.ai)
14688
14689
14690### Products
14691
14692#### Chat App
14693
14694We provide a chat frontend that allows users to interact with our subnet. The primary offering of this app is code-based QnA.
14695
14696#### Code Completion
14697<!--
14698Code completion has exploded in recent years, tools like [Github Copilot](https://github.com/features/copilot) are extremely popular but lack in some manners. -->
14699
14700<!-- Our subnet aims to compete with Copilot by offering code completion hosted on Bittensor through [Continue.dev](https://continue.dev/). Unlike Copilot we will not be relying on OpenAI. Our miners will be running open-source code-focused LLMs which have proven to be faster and smarter than the product Copilot uses (GPT Codex). -->
14701
14702<!-- With an unoptimized miner we have already found that -->
14703
14704We provide an openai compliant api capable of being utilized with [continue.dev](https://continue.dev/). For information on getting started visit [Gen42](https://www.gen42.ai).
14705
14706---
14707
14708
14709## Mining and Validating
14710
14711#### Validators
14712
14713To get started as a validator, follow the [Validator Quickstart Guide](./docs/validators/quickstart.md).
14714
14715#### Miners
14716
14717To begin mining, refer to the [Miner Quickstart Guide](./docs/miners/quickstart.md).
14718
14719
14720
14721##### Disclaimer
14722
14723This repo is a fork off Subnet 1, [Prompting](https://github.com/macrocosm-os/prompting/tree/main). Credit for the amazing code goes to them, they did a wonderful job.
14724
14725
14726---
14727File: /setup.py
14728---
14729
14730# The MIT License (MIT)
14731# Copyright © 2023 Yuma Rao
14732# TODO(developer): Set your name
14733# Copyright © 2023 <your name>
14734
14735# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
14736# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
14737# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
14738# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
14739
14740# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
14741# the Software.
14742
14743# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
14744# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
14745# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14746# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14747# DEALINGS IN THE SOFTWARE.
14748
14749import re
14750import os
14751import codecs
14752import pathlib
14753from os import path
14754from io import open
14755from setuptools import setup, find_packages
14756from pkg_resources import parse_requirements
14757
14758
14759def read_requirements(path):
14760 with open(path, "r") as f:
14761 requirements = f.read().splitlines()
14762 processed_requirements = []
14763
14764 for req in requirements:
14765 # For git or other VCS links
14766 if req.startswith("git+") or "@" in req:
14767 pkg_name = re.search(r"(#egg=)([\w\-_]+)", req)
14768 if pkg_name:
14769 processed_requirements.append(pkg_name.group(2))
14770 else:
14771 # You may decide to raise an exception here,
14772 # if you want to ensure every VCS link has an #egg=<package_name> at the end
14773 continue
14774 else:
14775 processed_requirements.append(req)
14776 return processed_requirements
14777
14778
14779requirements = read_requirements("requirements.txt")
14780here = path.abspath(path.dirname(__file__))
14781
14782with open(path.join(here, "README.md"), encoding="utf-8") as f:
14783 long_description = f.read()
14784
14785# loading version from setup.py
14786with codecs.open(
14787 os.path.join(here, "coding/__init__.py"), encoding="utf-8"
14788) as init_file:
14789 version_match = re.search(
14790 r"^__version__ = ['\"]([^'\"]*)['\"]", init_file.read(), re.M
14791 )
14792 version_string = version_match.group(1)
14793
14794setup(
14795 name="coding",
14796 version=version_string,
14797 description="Code Generation Subnet",
14798 long_description=long_description,
14799 long_description_content_type="text/markdown",
14800 url="https://github.com/brokespace/code",
14801 author="brokespace",
14802 packages=find_packages(),
14803 include_package_data=True,
14804 author_email="",
14805 license="MIT",
14806 python_requires=">=3.8",
14807 install_requires=requirements,
14808 classifiers=[
14809 "Development Status :: 3 - Alpha",
14810 "Intended Audience :: Developers",
14811 "Topic :: Software Development :: Build Tools",
14812 # Pick your license as you wish
14813 "License :: OSI Approved :: MIT License",
14814 "Programming Language :: Python :: 3 :: Only",
14815 "Programming Language :: Python :: 3.8",
14816 "Programming Language :: Python :: 3.9",
14817 "Programming Language :: Python :: 3.10",
14818 "Topic :: Scientific/Engineering",
14819 "Topic :: Scientific/Engineering :: Mathematics",
14820 "Topic :: Scientific/Engineering :: Artificial Intelligence",
14821 "Topic :: Software Development",
14822 "Topic :: Software Development :: Libraries",
14823 "Topic :: Software Development :: Libraries :: Python Modules",
14824 ],
14825)